summaryrefslogtreecommitdiffstats
path: root/yql/essentials/udfs
diff options
context:
space:
mode:
authorimunkin <[email protected]>2024-11-08 10:00:23 +0300
committerimunkin <[email protected]>2024-11-08 10:12:13 +0300
commita784a2f943d6e15caa6241e2e96d80aac6dbf375 (patch)
tree05f1e5366c916b988a8afb75bdab8ddeee0f6e6d /yql/essentials/udfs
parentd70137a7b530ccaa52834274913bbb5a3d1ca06e (diff)
Move yql/udfs/common/ to /yql/essentials YQL-19206
Except the following directories: * clickhouse/client * datetime * knn * roaring commit_hash:c7da95636144d28db109d6b17ddc762e9bacb59f
Diffstat (limited to 'yql/essentials/udfs')
-rw-r--r--yql/essentials/udfs/common/compress_base/compress_udf.cpp17
-rw-r--r--yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp1
-rw-r--r--yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h218
-rw-r--r--yql/essentials/udfs/common/compress_base/lib/ya.make23
-rw-r--r--yql/essentials/udfs/common/compress_base/test/canondata/result.json12
-rw-r--r--yql/essentials/udfs/common/compress_base/test/canondata/test.test_RoundTrip_/results.txt124
-rw-r--r--yql/essentials/udfs/common/compress_base/test/canondata/test.test_TryDecompress_/results.txt188
-rw-r--r--yql/essentials/udfs/common/compress_base/test/cases/RoundTrip.sql12
-rw-r--r--yql/essentials/udfs/common/compress_base/test/cases/TryDecompress.sql19
-rw-r--r--yql/essentials/udfs/common/compress_base/test/cases/default.in3
-rw-r--r--yql/essentials/udfs/common/compress_base/test/ya.make11
-rw-r--r--yql/essentials/udfs/common/compress_base/ya.make22
-rw-r--r--yql/essentials/udfs/common/datetime2/datetime_udf.cpp2396
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/result.json137
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockFrom_/results.txt206
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockGet_/results.txt188
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockSplitMake_/results.txt76
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt314
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt628
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockTo_/results.txt356
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_EndOf_/results.txt295
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_FormatMicroseconds_/results.txt98
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_Format_/results.txt48
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_From_/results.txt148
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_Get_/results.txt628
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_ImplicitSplit_/results.txt98
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_MultirowBlockTo_/results.txt90
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_ParseIso8601_/results.txt53
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_ParseLim_/results.txt238
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_Parse_/results.txt242
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_Repr_/results.txt46
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_Shift_/results.txt459
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_SplitMake1969_/results.txt88
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_SplitMake_/results.txt160
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_StartOf1969_/results.txt151
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_StartOf_/results.txt314
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_To_/results.txt345
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_TzToDate_/results.txt52
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_UpdateTz_/results.txt64
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_Update_/results.txt236
-rw-r--r--yql/essentials/udfs/common/datetime2/test/canondata/test.test_yql-14977_/results.txt33
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.in30
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.in.attr89
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.sql19
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockGet.in6
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockGet.in.attr31
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockGet.sql36
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.in16
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.in.attr52
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.sql21
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.attr0
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.in15
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.in.attr17
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.sql30
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.in10
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.in.attr17
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.sql27
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockTo.in11
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockTo.in.attr66
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/BlockTo.sql51
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/EndOf.sql34
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Format.in4
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Format.in.attr17
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Format.sql6
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/FormatMicroseconds.sql15
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/From.in12
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/From.in.attr73
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/From.sql14
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Get.in10
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Get.in.attr17
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Get.sql23
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.in9
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.in.attr52
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.sql20
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.in24
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.in.attr49
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.sql20
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Parse.in55
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Parse.in.attr66
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Parse.sql25
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/ParseIso8601.sql1
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/ParseLim.sql19
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Repr.in0
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Repr.sql4
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Shift.in8
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Shift.in.attr17
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Shift.sql22
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/SplitMake.in49
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/SplitMake.in.attr52
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/SplitMake.sql18
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.cfg1
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.in8
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.in.attr52
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.sql18
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/StartOf.in15
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/StartOf.in.attr17
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/StartOf.sql21
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.cfg1
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.in3
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.in.attr17
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.sql20
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/To.in11
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/To.in.attr66
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/To.sql44
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/TzToDate.sql7
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Update.in1
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Update.in.attr17
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/Update.sql25
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/UpdateTz.sql9
-rw-r--r--yql/essentials/udfs/common/datetime2/test/cases/yql-14977.sql6
-rw-r--r--yql/essentials/udfs/common/datetime2/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json7
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_SplitMake_/results.txt486
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/SplitMake.sql27
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/default.cfg4
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/input.txt35
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/input.txt.attr31
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_narrow.txt15
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_narrow.txt.attr31
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_tz.txt48
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_tz.txt.attr37
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/tz.txt15
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/cases/tz.txt.attr16
-rw-r--r--yql/essentials/udfs/common/datetime2/test_bigdates/ya.make13
-rw-r--r--yql/essentials/udfs/common/datetime2/ya.make30
-rw-r--r--yql/essentials/udfs/common/digest/digest_udf.cpp410
-rw-r--r--yql/essentials/udfs/common/digest/test/canondata/result.json7
-rw-r--r--yql/essentials/udfs/common/digest/test/canondata/test.test_Basic_/results.txt506
-rw-r--r--yql/essentials/udfs/common/digest/test/cases/Basic.in4
-rw-r--r--yql/essentials/udfs/common/digest/test/cases/Basic.sql35
-rw-r--r--yql/essentials/udfs/common/digest/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/digest/ya.make42
-rw-r--r--yql/essentials/udfs/common/file/file_udf.cpp623
-rw-r--r--yql/essentials/udfs/common/file/ya.make17
-rw-r--r--yql/essentials/udfs/common/histogram/histogram_udf.cpp1018
-rw-r--r--yql/essentials/udfs/common/histogram/test/canondata/result.json57
-rw-r--r--yql/essentials/udfs/common/histogram/test/canondata/test.test_Algorithms_/results.txt476
-rw-r--r--yql/essentials/udfs/common/histogram/test/canondata/test.test_Basic_/results.txt338
-rw-r--r--yql/essentials/udfs/common/histogram/test/canondata/test.test_Distinct_/results.txt139
-rw-r--r--yql/essentials/udfs/common/histogram/test/canondata/test.test_Intervals_/results.txt290
-rw-r--r--yql/essentials/udfs/common/histogram/test/canondata/test.test_Linear_/results.txt330
-rw-r--r--yql/essentials/udfs/common/histogram/test/canondata/test.test_Logarithmic_/results.txt310
-rw-r--r--yql/essentials/udfs/common/histogram/test/canondata/test.test_Methods_/results.txt85
-rw-r--r--yql/essentials/udfs/common/histogram/test/canondata/test.test_Normalize_/results.txt338
-rw-r--r--yql/essentials/udfs/common/histogram/test/canondata/test.test_Print_/results.txt59
-rw-r--r--yql/essentials/udfs/common/histogram/test/canondata/test.test_ToCumulativeDistributionFunction_/results.txt444
-rw-r--r--yql/essentials/udfs/common/histogram/test/canondata/test.test_Weights_/results.txt221
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/Algorithms.sql16
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/Basic.sql12
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/Distinct.sql9
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/Intervals.sql12
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/Linear.in19
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/Linear.sql10
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/Logarithmic.in19
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/Logarithmic.sql10
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/Methods.sql12
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/Normalize.sql12
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/Print.sql12
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/ToCumulativeDistributionFunction.sql13
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/Weights.in16
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/Weights.sql12
-rw-r--r--yql/essentials/udfs/common/histogram/test/cases/default.in16
-rw-r--r--yql/essentials/udfs/common/histogram/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/histogram/ya.make32
-rw-r--r--yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp423
-rw-r--r--yql/essentials/udfs/common/hyperloglog/test/canondata/result.json7
-rw-r--r--yql/essentials/udfs/common/hyperloglog/test/canondata/test.test_Basic_/results.txt59
-rw-r--r--yql/essentials/udfs/common/hyperloglog/test/cases/Basic.in6000
-rw-r--r--yql/essentials/udfs/common/hyperloglog/test/cases/Basic.sql7
-rw-r--r--yql/essentials/udfs/common/hyperloglog/test/ya.make16
-rw-r--r--yql/essentials/udfs/common/hyperloglog/ya.make32
-rw-r--r--yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp477
-rw-r--r--yql/essentials/udfs/common/hyperscan/test/canondata/result.json17
-rw-r--r--yql/essentials/udfs/common/hyperscan/test/canondata/test.test_Basic_/results.txt441
-rw-r--r--yql/essentials/udfs/common/hyperscan/test/canondata/test.test_CharacterClasses_/results.txt59
-rw-r--r--yql/essentials/udfs/common/hyperscan/test/canondata/test.test_Error_/extracted8
-rw-r--r--yql/essentials/udfs/common/hyperscan/test/cases/Basic.in8
-rw-r--r--yql/essentials/udfs/common/hyperscan/test/cases/Basic.sql33
-rw-r--r--yql/essentials/udfs/common/hyperscan/test/cases/CharacterClasses.in4
-rw-r--r--yql/essentials/udfs/common/hyperscan/test/cases/CharacterClasses.sql9
-rw-r--r--yql/essentials/udfs/common/hyperscan/test/cases/Error.cfg2
-rw-r--r--yql/essentials/udfs/common/hyperscan/test/cases/Error.sql4
-rw-r--r--yql/essentials/udfs/common/hyperscan/test/ya.make17
-rw-r--r--yql/essentials/udfs/common/hyperscan/ya.make42
-rw-r--r--yql/essentials/udfs/common/ip_base/ip_base.cpp7
-rw-r--r--yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp1
-rw-r--r--yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h358
-rw-r--r--yql/essentials/udfs/common/ip_base/lib/ya.make19
-rw-r--r--yql/essentials/udfs/common/ip_base/test/canondata/result.json12
-rw-r--r--yql/essentials/udfs/common/ip_base/test/canondata/test.test_Basic_/results.txt374
-rw-r--r--yql/essentials/udfs/common/ip_base/test/canondata/test.test_Subnets_/results.txt184
-rw-r--r--yql/essentials/udfs/common/ip_base/test/cases/Basic.in9
-rw-r--r--yql/essentials/udfs/common/ip_base/test/cases/Basic.sql16
-rw-r--r--yql/essentials/udfs/common/ip_base/test/cases/Subnets.in5
-rw-r--r--yql/essentials/udfs/common/ip_base/test/cases/Subnets.in.attr1
-rw-r--r--yql/essentials/udfs/common/ip_base/test/cases/Subnets.sql16
-rw-r--r--yql/essentials/udfs/common/ip_base/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/ip_base/ya.make34
-rw-r--r--yql/essentials/udfs/common/json/json_udf.cpp120
-rw-r--r--yql/essentials/udfs/common/json/test/canondata/result.json7
-rw-r--r--yql/essentials/udfs/common/json/test/canondata/test.test_Basic_/results.txt57
-rw-r--r--yql/essentials/udfs/common/json/test/cases/Basic.sql12
-rw-r--r--yql/essentials/udfs/common/json/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/json/ya.make29
-rw-r--r--yql/essentials/udfs/common/json2/as_json_node.h115
-rw-r--r--yql/essentials/udfs/common/json2/compile_path.h70
-rw-r--r--yql/essentials/udfs/common/json2/json2_udf.cpp43
-rw-r--r--yql/essentials/udfs/common/json2/parse.h66
-rw-r--r--yql/essentials/udfs/common/json2/resource.h17
-rw-r--r--yql/essentials/udfs/common/json2/serialize.h89
-rw-r--r--yql/essentials/udfs/common/json2/sql_exists.h135
-rw-r--r--yql/essentials/udfs/common/json2/sql_query.h184
-rw-r--r--yql/essentials/udfs/common/json2/sql_value.h296
-rw-r--r--yql/essentials/udfs/common/json2/test/canondata/result.json42
-rw-r--r--yql/essentials/udfs/common/json2/test/canondata/test.test_AsJsonNode_/results.txt84
-rw-r--r--yql/essentials/udfs/common/json2/test/canondata/test.test_SerializeParse_/results.txt102
-rw-r--r--yql/essentials/udfs/common/json2/test/canondata/test.test_SqlExists_/results.txt195
-rw-r--r--yql/essentials/udfs/common/json2/test/canondata/test.test_SqlQueryError_/extracted10
-rw-r--r--yql/essentials/udfs/common/json2/test/canondata/test.test_SqlQuery_/results.txt400
-rw-r--r--yql/essentials/udfs/common/json2/test/canondata/test.test_SqlTryExistsError_/extracted10
-rw-r--r--yql/essentials/udfs/common/json2/test/canondata/test.test_SqlTryExists_/results.txt83
-rw-r--r--yql/essentials/udfs/common/json2/test/canondata/test.test_SqlValue_/results.txt1663
-rw-r--r--yql/essentials/udfs/common/json2/test/cases/AsJsonNode.sql9
-rw-r--r--yql/essentials/udfs/common/json2/test/cases/SerializeParse.sql15
-rw-r--r--yql/essentials/udfs/common/json2/test/cases/SqlExists.sql25
-rw-r--r--yql/essentials/udfs/common/json2/test/cases/SqlQuery.sql52
-rw-r--r--yql/essentials/udfs/common/json2/test/cases/SqlQueryError.cfg1
-rw-r--r--yql/essentials/udfs/common/json2/test/cases/SqlQueryError.sql7
-rw-r--r--yql/essentials/udfs/common/json2/test/cases/SqlTryExists.sql11
-rw-r--r--yql/essentials/udfs/common/json2/test/cases/SqlTryExistsError.cfg1
-rw-r--r--yql/essentials/udfs/common/json2/test/cases/SqlTryExistsError.sql7
-rw-r--r--yql/essentials/udfs/common/json2/test/cases/SqlValue.sql95
-rw-r--r--yql/essentials/udfs/common/json2/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/json2/ya.make33
-rw-r--r--yql/essentials/udfs/common/math/lib/erfinv.cpp114
-rw-r--r--yql/essentials/udfs/common/math/lib/erfinv.h7
-rw-r--r--yql/essentials/udfs/common/math/lib/round.h77
-rw-r--r--yql/essentials/udfs/common/math/lib/round_ut.cpp70
-rw-r--r--yql/essentials/udfs/common/math/lib/ut/ya.make11
-rw-r--r--yql/essentials/udfs/common/math/lib/ya.make11
-rw-r--r--yql/essentials/udfs/common/math/math_ir.cpp5
-rw-r--r--yql/essentials/udfs/common/math/math_ir.h150
-rw-r--r--yql/essentials/udfs/common/math/math_udf.cpp99
-rw-r--r--yql/essentials/udfs/common/math/test/canondata/result.json35
-rw-r--r--yql/essentials/udfs/common/math/test/canondata/test.test_NearbyInt_/results.txt238
-rw-r--r--yql/essentials/udfs/common/math/test/cases/ErfInv.sql24
-rw-r--r--yql/essentials/udfs/common/math/test/cases/ErfInvNoLLVM.sql25
-rw-r--r--yql/essentials/udfs/common/math/test/cases/IR.in4
-rw-r--r--yql/essentials/udfs/common/math/test/cases/IR.sql26
-rw-r--r--yql/essentials/udfs/common/math/test/cases/IR_LLVM_OFF.in4
-rw-r--r--yql/essentials/udfs/common/math/test/cases/IR_LLVM_OFF.sql27
-rw-r--r--yql/essentials/udfs/common/math/test/cases/NearbyInt.in0
-rw-r--r--yql/essentials/udfs/common/math/test/cases/NearbyInt.sql24
-rw-r--r--yql/essentials/udfs/common/math/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/math/ya.make92
-rw-r--r--yql/essentials/udfs/common/pire/pire_udf.cpp358
-rw-r--r--yql/essentials/udfs/common/pire/test/canondata/result.json12
-rw-r--r--yql/essentials/udfs/common/pire/test/canondata/test.test_Basic_/results.txt508
-rw-r--r--yql/essentials/udfs/common/pire/test/canondata/test.test_CharacterClasses_/results.txt59
-rw-r--r--yql/essentials/udfs/common/pire/test/cases/Basic.in8
-rw-r--r--yql/essentials/udfs/common/pire/test/cases/Basic.sql32
-rw-r--r--yql/essentials/udfs/common/pire/test/cases/CharacterClasses.in4
-rw-r--r--yql/essentials/udfs/common/pire/test/cases/CharacterClasses.sql9
-rw-r--r--yql/essentials/udfs/common/pire/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/pire/ya.make29
-rw-r--r--yql/essentials/udfs/common/protobuf/protobuf_udf.cpp143
-rw-r--r--yql/essentials/udfs/common/protobuf/test/canondata/result.json126
-rw-r--r--yql/essentials/udfs/common/protobuf/test/canondata/test.test_recursion_fail_/extracted11
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.in3
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.sql11
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/YQL-16111.in0
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/YQL-16111.sql64
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.in3
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.sql7
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.in2
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.sql17
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.in4
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.sql10
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.in1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.sql35
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.cfg2
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.in1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.sql32
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.in1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.sql35
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.in4
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.sql23
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.in4
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.sql24
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_any.in0
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_any.sql43
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_enum.in0
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_enum.sql50
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.in1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.sql47
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.in1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.sql36
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.in1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.sql113
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.in1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.sql40
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.in1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.sql39
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.in4
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.in.attr1
-rw-r--r--yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.sql82
-rw-r--r--yql/essentials/udfs/common/protobuf/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/protobuf/ya.make23
-rw-r--r--yql/essentials/udfs/common/python/bindings/py27_backports.c91
-rw-r--r--yql/essentials/udfs/common/python/bindings/py27_backports.h26
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_callable.cpp423
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_callable.h22
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp87
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_cast.cpp955
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_cast.h45
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp90
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_ctx.h120
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_decimal.cpp59
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_decimal.h12
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp122
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_dict.cpp683
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_dict.h50
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp722
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_errors.cpp72
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_errors.h24
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_gil.h37
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_iterator.cpp280
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_iterator.h23
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp705
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp382
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_list.cpp1116
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_list.h33
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_list_ut.cpp1025
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_number_ut.cpp359
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp56
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_ptr.h69
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_resource.cpp116
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_resource.h20
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp81
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_stream.cpp343
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_stream.h24
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp208
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_string_ut.cpp98
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_struct.cpp188
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_struct.h17
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp307
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_tuple.cpp61
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_tuple.h17
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp108
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp85
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_utils.cpp89
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_utils.h28
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp37
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_variant.cpp97
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_variant.h17
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp101
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_void.cpp117
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_void.h21
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_void_ut.cpp37
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_yql_module.cpp251
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_yql_module.h11
-rw-r--r--yql/essentials/udfs/common/python/bindings/typing.py188
-rw-r--r--yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h227
-rw-r--r--yql/essentials/udfs/common/python/bindings/ut3/ya.make37
-rw-r--r--yql/essentials/udfs/common/python/bindings/ya.make54
-rw-r--r--yql/essentials/udfs/common/python/main_py3/__main__.pyx50
-rw-r--r--yql/essentials/udfs/common/python/main_py3/include/main.h12
-rw-r--r--yql/essentials/udfs/common/python/main_py3/main.cpp9
-rw-r--r--yql/essentials/udfs/common/python/main_py3/ya.make13
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/canondata/result.json61
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_Excepthook_/extracted15
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_OptionalNested_/extracted14
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.in0
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.sql67
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.in0
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.sql11
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.in1
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.sql12
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.in0
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.sql20
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Data.in0
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Data.sql61
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.cfg1
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.sql23
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.in0
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.sql19
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.cfg1
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.sql7
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Switch.in0
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Switch.sql92
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/ya.make10
-rw-r--r--yql/essentials/udfs/common/python/python3_small/ya.make16
-rw-r--r--yql/essentials/udfs/common/python/python_udf/python_function_factory.h111
-rw-r--r--yql/essentials/udfs/common/python/python_udf/python_udf.cpp232
-rw-r--r--yql/essentials/udfs/common/python/python_udf/python_udf.h26
-rw-r--r--yql/essentials/udfs/common/python/python_udf/python_udfs_exports.exports5
-rw-r--r--yql/essentials/udfs/common/python/python_udf/ya.make20
-rw-r--r--yql/essentials/udfs/common/python/system_python/README.MD7
-rwxr-xr-xyql/essentials/udfs/common/python/system_python/build_system_python_udfs.sh7
-rw-r--r--yql/essentials/udfs/common/python/system_python/python3_10/ya.make16
-rw-r--r--yql/essentials/udfs/common/python/system_python/python3_11/ya.make16
-rw-r--r--yql/essentials/udfs/common/python/system_python/python3_12/ya.make16
-rw-r--r--yql/essentials/udfs/common/python/system_python/python3_8/ya.make16
-rw-r--r--yql/essentials/udfs/common/python/system_python/python3_9/ya.make16
-rw-r--r--yql/essentials/udfs/common/python/system_python/ya.make7
-rw-r--r--yql/essentials/udfs/common/python/ya.make10
-rw-r--r--yql/essentials/udfs/common/re2/re2_udf.cpp536
-rw-r--r--yql/essentials/udfs/common/re2/test/canondata/result.json52
-rw-r--r--yql/essentials/udfs/common/re2/test/canondata/test.test_BackslashInLike_/results.txt28
-rw-r--r--yql/essentials/udfs/common/re2/test/canondata/test.test_BasicOptions_/results.txt278
-rw-r--r--yql/essentials/udfs/common/re2/test/canondata/test.test_Basic_/results.txt257
-rw-r--r--yql/essentials/udfs/common/re2/test/canondata/test.test_DefOptions_/results.txt266
-rw-r--r--yql/essentials/udfs/common/re2/test/canondata/test.test_FindAndConsumeEmpty_/results.txt66
-rw-r--r--yql/essentials/udfs/common/re2/test/canondata/test.test_LikeEscape_/results.txt76
-rw-r--r--yql/essentials/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted14
-rw-r--r--yql/essentials/udfs/common/re2/test/canondata/test.test_MutableLambda_/results.txt52
-rw-r--r--yql/essentials/udfs/common/re2/test/canondata/test.test_SkipGroup_/results.txt105
-rw-r--r--yql/essentials/udfs/common/re2/test/canondata/test.test_Space_/results.txt28
-rw-r--r--yql/essentials/udfs/common/re2/test/cases/BackslashInLike.sql1
-rw-r--r--yql/essentials/udfs/common/re2/test/cases/Basic.in7
-rw-r--r--yql/essentials/udfs/common/re2/test/cases/Basic.sql21
-rw-r--r--yql/essentials/udfs/common/re2/test/cases/BasicOptions.in8
-rw-r--r--yql/essentials/udfs/common/re2/test/cases/BasicOptions.sql22
-rw-r--r--yql/essentials/udfs/common/re2/test/cases/DefOptions.sql19
-rw-r--r--yql/essentials/udfs/common/re2/test/cases/FindAndConsumeEmpty.sql6
-rw-r--r--yql/essentials/udfs/common/re2/test/cases/LikeEscape.sql7
-rw-r--r--yql/essentials/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg1
-rw-r--r--yql/essentials/udfs/common/re2/test/cases/MultipleCaptureGroups.sql4
-rw-r--r--yql/essentials/udfs/common/re2/test/cases/MutableLambda.in0
-rw-r--r--yql/essentials/udfs/common/re2/test/cases/MutableLambda.sql24
-rw-r--r--yql/essentials/udfs/common/re2/test/cases/SkipGroup.sql10
-rw-r--r--yql/essentials/udfs/common/re2/test/cases/Space.sql2
-rw-r--r--yql/essentials/udfs/common/re2/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/re2/ya.make30
-rw-r--r--yql/essentials/udfs/common/set/set_udf.cpp576
-rw-r--r--yql/essentials/udfs/common/set/test/canondata/result.json47
-rw-r--r--yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctDictInDict_/results.txt79
-rw-r--r--yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctDict_/results.txt103
-rw-r--r--yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctLazyList_/results.txt85
-rw-r--r--yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctLimit_/results.txt57
-rw-r--r--yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctSingular_/results.txt115
-rw-r--r--yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctStructInDict_/results.txt71
-rw-r--r--yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctTuple_/results.txt115
-rw-r--r--yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctVariant_/results.txt108
-rw-r--r--yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinct_/results.txt60
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinct.in15
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinct.in.attr30
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinct.sql9
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinctDict.sql8
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinctDictInDict.sql4
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinctLazyList.sql9
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.in15
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.in.attr30
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.sql9
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinctSingular.sql16
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinctStructInDict.sql4
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.in15
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.in.attr30
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.sql9
-rw-r--r--yql/essentials/udfs/common/set/test/cases/ListDistinctVariant.sql11
-rw-r--r--yql/essentials/udfs/common/set/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/set/ya.make25
-rw-r--r--yql/essentials/udfs/common/stat/stat_udf.cpp3
-rw-r--r--yql/essentials/udfs/common/stat/stat_udf_ut.cpp363
-rw-r--r--yql/essentials/udfs/common/stat/static/stat_udf.h75
-rw-r--r--yql/essentials/udfs/common/stat/static/static_udf.cpp10
-rw-r--r--yql/essentials/udfs/common/stat/static/ya.make19
-rw-r--r--yql/essentials/udfs/common/stat/ut/ya.make19
-rw-r--r--yql/essentials/udfs/common/stat/ya.make30
-rw-r--r--yql/essentials/udfs/common/streaming/streaming_udf.cpp829
-rw-r--r--yql/essentials/udfs/common/streaming/test/canondata/result.json44
-rw-r--r--yql/essentials/udfs/common/streaming/test/cases/Big.sql48
-rw-r--r--yql/essentials/udfs/common/streaming/test/cases/Empty.in100
-rw-r--r--yql/essentials/udfs/common/streaming/test/cases/Empty.sql3
-rw-r--r--yql/essentials/udfs/common/streaming/test/cases/File.in200
-rw-r--r--yql/essentials/udfs/common/streaming/test/cases/File.sql24
-rw-r--r--yql/essentials/udfs/common/streaming/test/cases/Simple.sql116
-rw-r--r--yql/essentials/udfs/common/streaming/test/cases/Yield.sql44
-rw-r--r--yql/essentials/udfs/common/streaming/test/cases/YieldSwitchEmpty.sql44
-rw-r--r--yql/essentials/udfs/common/streaming/test/ya.make16
-rw-r--r--yql/essentials/udfs/common/streaming/ya.make21
-rw-r--r--yql/essentials/udfs/common/string/string_udf.cpp926
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/result.json112
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_AsciiChecks_/results.txt124
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_Base32Decode_/results.txt79
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_Base32Encode_/results.txt44
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiChecks_/results.txt124
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt69
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockRemove_/results.txt173
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockReplace_/results.txt134
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockStreamFormat_/results.txt208
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUDF_/results.txt169
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUnsafeUDF_/results.txt158
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockTo_/results.txt88
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_ExtendAndTake_/results.txt60
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt147
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_List_/results.txt265
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_List_v0_/results.txt125
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_Remove_/results.txt173
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_ReplaceFirstLast_/results.txt84
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_Replace_/results.txt134
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_StreamFormat_/results.txt208
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_StringUDF_/results.txt169
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_StringUnsafeUDF_/results.txt158
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_To_/results.txt294
-rw-r--r--yql/essentials/udfs/common/string/test/cases/AsciiChecks.in5
-rw-r--r--yql/essentials/udfs/common/string/test/cases/AsciiChecks.sql10
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Base32Decode.in4
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Base32Decode.sql6
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Base32Encode.in3
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Base32Encode.sql5
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.in5
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.sql13
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockFind.sql7
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockRemove.sql16
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockReplace.sql13
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in5
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in.attr9
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.sql20
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockStringUDF.in4
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockStringUDF.sql18
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.in6
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.sql10
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockTo.in7
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockTo.sql9
-rw-r--r--yql/essentials/udfs/common/string/test/cases/ExtendAndTake.in3
-rw-r--r--yql/essentials/udfs/common/string/test/cases/ExtendAndTake.sql10
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Find.sql12
-rw-r--r--yql/essentials/udfs/common/string/test/cases/List.in6
-rw-r--r--yql/essentials/udfs/common/string/test/cases/List.sql12
-rw-r--r--yql/essentials/udfs/common/string/test/cases/List_v0.in6
-rw-r--r--yql/essentials/udfs/common/string/test/cases/List_v0.sql27
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Remove.sql14
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Replace.sql11
-rw-r--r--yql/essentials/udfs/common/string/test/cases/ReplaceFirstLast.sql10
-rw-r--r--yql/essentials/udfs/common/string/test/cases/StreamFormat.in5
-rw-r--r--yql/essentials/udfs/common/string/test/cases/StreamFormat.in.attr9
-rw-r--r--yql/essentials/udfs/common/string/test/cases/StreamFormat.sql19
-rw-r--r--yql/essentials/udfs/common/string/test/cases/StringUDF.in4
-rw-r--r--yql/essentials/udfs/common/string/test/cases/StringUDF.sql15
-rw-r--r--yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.in6
-rw-r--r--yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.sql7
-rw-r--r--yql/essentials/udfs/common/string/test/cases/To.in7
-rw-r--r--yql/essentials/udfs/common/string/test/cases/To.sql14
-rw-r--r--yql/essentials/udfs/common/string/test/cases/default.in6
-rw-r--r--yql/essentials/udfs/common/string/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/string/ya.make38
-rw-r--r--yql/essentials/udfs/common/top/test/canondata/result.json47
-rw-r--r--yql/essentials/udfs/common/top/test/canondata/test.test_BottomByTuple_/results.txt119
-rw-r--r--yql/essentials/udfs/common/top/test/canondata/test.test_BottomBy_/results.txt61
-rw-r--r--yql/essentials/udfs/common/top/test/canondata/test.test_Bottom_/results.txt59
-rw-r--r--yql/essentials/udfs/common/top/test/canondata/test.test_TopBy_/results.txt63
-rw-r--r--yql/essentials/udfs/common/top/test/canondata/test.test_TopList_/results.txt57
-rw-r--r--yql/essentials/udfs/common/top/test/canondata/test.test_TopTuple_/results.txt103
-rw-r--r--yql/essentials/udfs/common/top/test/canondata/test.test_TopVariant_/results.txt56
-rw-r--r--yql/essentials/udfs/common/top/test/canondata/test.test_Top_/results.txt57
-rw-r--r--yql/essentials/udfs/common/top/test/canondata/test.test_Window_/results.txt1030
-rw-r--r--yql/essentials/udfs/common/top/test/cases/Bottom.in15
-rw-r--r--yql/essentials/udfs/common/top/test/cases/Bottom.in.attr30
-rw-r--r--yql/essentials/udfs/common/top/test/cases/Bottom.sql9
-rw-r--r--yql/essentials/udfs/common/top/test/cases/BottomBy.in15
-rw-r--r--yql/essentials/udfs/common/top/test/cases/BottomBy.in.attr30
-rw-r--r--yql/essentials/udfs/common/top/test/cases/BottomBy.sql9
-rw-r--r--yql/essentials/udfs/common/top/test/cases/BottomByTuple.in15
-rw-r--r--yql/essentials/udfs/common/top/test/cases/BottomByTuple.in.attr30
-rw-r--r--yql/essentials/udfs/common/top/test/cases/BottomByTuple.sql9
-rw-r--r--yql/essentials/udfs/common/top/test/cases/Top.in15
-rw-r--r--yql/essentials/udfs/common/top/test/cases/Top.in.attr30
-rw-r--r--yql/essentials/udfs/common/top/test/cases/Top.sql9
-rw-r--r--yql/essentials/udfs/common/top/test/cases/TopBy.in15
-rw-r--r--yql/essentials/udfs/common/top/test/cases/TopBy.in.attr30
-rw-r--r--yql/essentials/udfs/common/top/test/cases/TopBy.sql9
-rw-r--r--yql/essentials/udfs/common/top/test/cases/TopList.sql4
-rw-r--r--yql/essentials/udfs/common/top/test/cases/TopTuple.in15
-rw-r--r--yql/essentials/udfs/common/top/test/cases/TopTuple.in.attr30
-rw-r--r--yql/essentials/udfs/common/top/test/cases/TopTuple.sql9
-rw-r--r--yql/essentials/udfs/common/top/test/cases/TopVariant.sql5
-rw-r--r--yql/essentials/udfs/common/top/test/cases/Window.sql30
-rw-r--r--yql/essentials/udfs/common/top/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/top/top_udf.cpp954
-rw-r--r--yql/essentials/udfs/common/top/ya.make30
-rw-r--r--yql/essentials/udfs/common/topfreq/static/static_udf.cpp10
-rw-r--r--yql/essentials/udfs/common/topfreq/static/topfreq.cpp213
-rw-r--r--yql/essentials/udfs/common/topfreq/static/topfreq.h97
-rw-r--r--yql/essentials/udfs/common/topfreq/static/topfreq_udf.h393
-rw-r--r--yql/essentials/udfs/common/topfreq/static/ya.make18
-rw-r--r--yql/essentials/udfs/common/topfreq/test/canondata/result.json27
-rw-r--r--yql/essentials/udfs/common/topfreq/test/canondata/test.test_Floats_/results.txt55
-rw-r--r--yql/essentials/udfs/common/topfreq/test/canondata/test.test_Mode_/results.txt68
-rw-r--r--yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreqStruct_/results.txt103
-rw-r--r--yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreqTuple_/results.txt97
-rw-r--r--yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreq_/results.txt83
-rw-r--r--yql/essentials/udfs/common/topfreq/test/cases/Floats.in0
-rw-r--r--yql/essentials/udfs/common/topfreq/test/cases/Floats.sql10
-rw-r--r--yql/essentials/udfs/common/topfreq/test/cases/Mode.in8
-rw-r--r--yql/essentials/udfs/common/topfreq/test/cases/Mode.in.attr30
-rw-r--r--yql/essentials/udfs/common/topfreq/test/cases/Mode.sql14
-rw-r--r--yql/essentials/udfs/common/topfreq/test/cases/TopFreq.in16
-rw-r--r--yql/essentials/udfs/common/topfreq/test/cases/TopFreq.in.attr30
-rw-r--r--yql/essentials/udfs/common/topfreq/test/cases/TopFreq.sql14
-rw-r--r--yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.in16
-rw-r--r--yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.in.attr30
-rw-r--r--yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.sql8
-rw-r--r--yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.in16
-rw-r--r--yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.in.attr30
-rw-r--r--yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.sql8
-rw-r--r--yql/essentials/udfs/common/topfreq/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/topfreq/topfreq_udf.cpp3
-rw-r--r--yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp451
-rw-r--r--yql/essentials/udfs/common/topfreq/ut/ya.make15
-rw-r--r--yql/essentials/udfs/common/topfreq/ya.make32
-rw-r--r--yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp1
-rw-r--r--yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h534
-rw-r--r--yql/essentials/udfs/common/unicode_base/lib/ya.make22
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/canondata/result.json67
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Find_/results.txt86
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/canondata/test.test_IsCategory_/results.txt164
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/canondata/test.test_List_/results.txt265
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Remove_/results.txt178
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Replace_/results.txt228
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Strip_/results.txt76
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted8
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted8
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted8
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt76
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/canondata/test.test_To_/results.txt102
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt198
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Unicode_/results.txt509
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/Find.sql13
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.sql21
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/List.in6
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/List.in.attr12
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/List.sql12
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/Remove.sql9
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/Replace.sql11
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/Strip.sql9
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/To.in8
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/To.in.attr12
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/To.sql9
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql9
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg2
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql3
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg2
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql3
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg2
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql3
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql17
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/Unicode.in7
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/Unicode.sql19
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/cases/default.in5
-rw-r--r--yql/essentials/udfs/common/unicode_base/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/unicode_base/unicode_base.cpp4
-rw-r--r--yql/essentials/udfs/common/unicode_base/ya.make30
-rw-r--r--yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp1
-rw-r--r--yql/essentials/udfs/common/url_base/lib/url_base_udf.h586
-rw-r--r--yql/essentials/udfs/common/url_base/lib/url_parse.cpp53
-rw-r--r--yql/essentials/udfs/common/url_base/lib/url_parse.h59
-rw-r--r--yql/essentials/udfs/common/url_base/lib/url_query.cpp243
-rw-r--r--yql/essentials/udfs/common/url_base/lib/url_query.h134
-rw-r--r--yql/essentials/udfs/common/url_base/lib/ya.make27
-rw-r--r--yql/essentials/udfs/common/url_base/test/canondata/result.json47
-rw-r--r--yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockPunycode_/results.txt106
-rw-r--r--yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockTld_/results.txt59
-rw-r--r--yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockUrl_/results.txt1212
-rw-r--r--yql/essentials/udfs/common/url_base/test/canondata/test.test_Punycode_/results.txt106
-rw-r--r--yql/essentials/udfs/common/url_base/test/canondata/test.test_Tld_/results.txt59
-rw-r--r--yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQueryMaxFieldsErr_/extracted8
-rw-r--r--yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQueryStrictErr_/extracted8
-rw-r--r--yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQuery_/results.txt1112
-rw-r--r--yql/essentials/udfs/common/url_base/test/canondata/test.test_Url_/results.txt1640
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/BlockPunycode.in4
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/BlockPunycode.sql10
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/BlockTld.in4
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/BlockTld.sql7
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/BlockUrl.in18
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/BlockUrl.sql27
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/Punycode.in4
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/Punycode.sql9
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/Tld.in4
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/Tld.sql6
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/Url.in18
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/Url.sql29
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/UrlQuery.in20
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/UrlQuery.sql15
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/UrlQueryMaxFieldsErr.cfg1
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/UrlQueryMaxFieldsErr.sql2
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/UrlQueryStrictErr.cfg1
-rw-r--r--yql/essentials/udfs/common/url_base/test/cases/UrlQueryStrictErr.sql2
-rw-r--r--yql/essentials/udfs/common/url_base/test/ya.make17
-rw-r--r--yql/essentials/udfs/common/url_base/url_base.cpp7
-rw-r--r--yql/essentials/udfs/common/url_base/ya.make32
-rw-r--r--yql/essentials/udfs/common/ya.make31
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/result.json172
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_AccessJson_/results.txt126
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_Access_/results.txt33
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_Attrs_/results.txt568
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_AutoConvertTo_/results.txt556
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_Contains_/results.txt122
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_ConvertTo_/results.txt245
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_Dicts_/results.txt178
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_EmptyDicts_/results.txt103
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_EmptyLists_/results.txt50
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_Equals_/results.txt124
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_From_/results.txt188
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToEmptyStruct_/results.txt34
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToWithAutoConvert_/results.txt338
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToWithNoStrict_/results.txt287
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertTo_/results.txt379
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericFrom_/results.txt345
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_GetHash_/results.txt116
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_Get_/results.txt57
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_GoodForYsonBadForJson_/results.txt82
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_ImplicitFromRes_/results.txt41
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_IsType_/results.txt154
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonSerializeSkipMapEntity_/results.txt124
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonWithNanAsString_/results.txt59
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonWithUtf8_/results.txt67
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_Lists_/results.txt142
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_Lookup_/results.txt225
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_NegativeArrayIndex_/results.txt133
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_ParseString_/results.txt128
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_Scalars_/results.txt462
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_SerializeDouble_/results.txt66
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_Serialize_/results.txt70
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_WeakYsonRest_/results.txt53
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_WithAttrs_/results.txt91
-rw-r--r--yql/essentials/udfs/common/yson2/test/canondata/test.test_YPath_/results.txt112
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/Access.sql4
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/AccessJson.sql12
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/Attrs.sql56
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/AutoConvertTo.sql53
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/Contains.sql11
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/ConvertTo.sql34
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/Dicts.sql13
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/EmptyDicts.sql9
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/EmptyLists.sql5
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/Equals.sql26
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/From.sql21
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/GenericConvertTo.sql19
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/GenericConvertToEmptyStruct.sql2
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/GenericConvertToWithAutoConvert.sql15
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/GenericConvertToWithNoStrict.sql15
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/GenericFrom.sql21
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/Get.sql8
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/GetHash.sql27
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/GoodForYsonBadForJson.sql7
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/ImplicitFromRes.sql4
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/IsType.sql12
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/JsonSerializeSkipMapEntity.sql18
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/JsonWithNanAsString.sql8
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/JsonWithUtf8.sql5
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/Lists.sql10
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/Lookup.sql29
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/NegativeArrayIndex.sql13
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/ParseString.sql11
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/Scalars.sql46
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/Serialize.sql3
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/SerializeDouble.sql13
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.in5
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.in.attr12
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.sql7
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/WithAttrs.sql7
-rw-r--r--yql/essentials/udfs/common/yson2/test/cases/YPath.sql13
-rw-r--r--yql/essentials/udfs/common/yson2/test/ya.make12
-rw-r--r--yql/essentials/udfs/common/yson2/ya.make32
-rw-r--r--yql/essentials/udfs/common/yson2/yson2_udf.cpp1203
-rw-r--r--yql/essentials/udfs/ya.make1
781 files changed, 76503 insertions, 0 deletions
diff --git a/yql/essentials/udfs/common/compress_base/compress_udf.cpp b/yql/essentials/udfs/common/compress_base/compress_udf.cpp
new file mode 100644
index 00000000000..efd2d0b3c54
--- /dev/null
+++ b/yql/essentials/udfs/common/compress_base/compress_udf.cpp
@@ -0,0 +1,17 @@
+#include "lib/compress_base_udf.h"
+
+using namespace NYql::NUdf;
+
+namespace NCompress {
+ SIMPLE_MODULE(TCompressModule, EXPORTED_COMPRESS_BASE_UDF);
+}
+
+namespace NDecompress {
+ SIMPLE_MODULE(TDecompressModule, EXPORTED_DECOMPRESS_BASE_UDF);
+}
+
+namespace NTryDecompress {
+ SIMPLE_MODULE(TTryDecompressModule, EXPORTED_TRY_DECOMPRESS_BASE_UDF);
+}
+
+REGISTER_MODULES(NCompress::TCompressModule, NDecompress::TDecompressModule, NTryDecompress::TTryDecompressModule);
diff --git a/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp
new file mode 100644
index 00000000000..237abe271eb
--- /dev/null
+++ b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp
@@ -0,0 +1 @@
+#include "compress_base_udf.h" \ No newline at end of file
diff --git a/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h
new file mode 100644
index 00000000000..58709134d6a
--- /dev/null
+++ b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h
@@ -0,0 +1,218 @@
+#pragma once
+
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+#include <library/cpp/streams/brotli/brotli.h>
+#include <library/cpp/streams/bzip2/bzip2.h>
+#include <library/cpp/streams/zstd/zstd.h>
+#include <library/cpp/streams/lzma/lzma.h>
+#include <library/cpp/streams/xz/decompress.h>
+
+#include <util/stream/mem.h>
+#include <util/stream/zlib.h>
+
+#include <contrib/libs/snappy/snappy.h>
+
+using namespace NYql::NUdf;
+
+namespace NCompress {
+ SIMPLE_UDF(TGzip, char*(TAutoMap<char*>, ui8)) {
+ TString result;
+ TStringOutput output(result);
+ TZLibCompress compress(&output, ZLib::GZip, args[1].Get<ui8>());
+ compress.Write(args[0].AsStringRef());
+ compress.Finish();
+ return valueBuilder->NewString(result);
+ }
+
+ SIMPLE_UDF(TZlib, char*(TAutoMap<char*>, ui8)) {
+ TString result;
+ TStringOutput output(result);
+ TZLibCompress compress(&output, ZLib::ZLib, args[1].Get<ui8>());
+ compress.Write(args[0].AsStringRef());
+ compress.Finish();
+ return valueBuilder->NewString(result);
+ }
+
+ SIMPLE_UDF(TBrotli, char*(TAutoMap<char*>, ui8)) {
+ TString result;
+ TStringOutput output(result);
+ TBrotliCompress compress(&output, args[1].Get<ui8>());
+ compress.Write(args[0].AsStringRef());
+ compress.Finish();
+ return valueBuilder->NewString(result);
+ }
+
+ SIMPLE_UDF(TLzma, char*(TAutoMap<char*>, ui8)) {
+ TString result;
+ TStringOutput output(result);
+ TLzmaCompress compress(&output, args[1].Get<ui8>());
+ compress.Write(args[0].AsStringRef());
+ compress.Finish();
+ return valueBuilder->NewString(result);
+ }
+
+ SIMPLE_UDF(TBZip2, char*(TAutoMap<char*>, ui8)) {
+ TString result;
+ TStringOutput output(result);
+ TBZipCompress compress(&output, args[1].Get<ui8>());
+ compress.Write(args[0].AsStringRef());
+ compress.Finish();
+ return valueBuilder->NewString(result);
+ }
+
+ SIMPLE_UDF(TSnappy, char*(TAutoMap<char*>)) {
+ TString result;
+ const TStringRef& input = args[0].AsStringRef();
+ snappy::Compress(input.Data(), input.Size(), &result);
+ return valueBuilder->NewString(result);
+ }
+
+ SIMPLE_UDF(TZstd, char*(TAutoMap<char*>, ui8)) {
+ TString result;
+ TStringOutput output(result);
+ TZstdCompress compress(&output, args[1].Get<ui8>());
+ compress.Write(args[0].AsStringRef());
+ compress.Finish();
+ return valueBuilder->NewString(result);
+ }
+}
+
+namespace NDecompress {
+ SIMPLE_UDF(TGzip, char*(TAutoMap<char*>)) {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TZLibDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ }
+
+ SIMPLE_UDF(TZlib, char*(TAutoMap<char*>)) {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TZLibDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ }
+
+ SIMPLE_UDF(TBrotli, char*(TAutoMap<char*>)) {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TBrotliDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ }
+
+ SIMPLE_UDF(TLzma, char*(TAutoMap<char*>)) {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TLzmaDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ }
+
+ SIMPLE_UDF(TBZip2, char*(TAutoMap<char*>)) {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TBZipDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ }
+
+ SIMPLE_UDF(TSnappy, char*(TAutoMap<char*>)) {
+ TString result;
+ const auto& value = args->AsStringRef();
+ if (snappy::Uncompress(value.Data(), value.Size(), &result)) {
+ return valueBuilder->NewString(result);
+ }
+
+ ythrow yexception() << "failed to decompress message with snappy";
+ }
+
+ SIMPLE_UDF(TZstd, char*(TAutoMap<char*>)) {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TZstdDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ }
+
+ SIMPLE_UDF(TXz, char*(TAutoMap<char*>)) {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TXzDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ }
+}
+
+namespace NTryDecompress {
+ SIMPLE_UDF(TGzip, TOptional<char*>(TAutoMap<char*>)) try {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TZLibDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ } catch (const std::exception&) {
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF(TZlib, TOptional<char*>(TAutoMap<char*>)) try {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TZLibDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ } catch (const std::exception&) {
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF(TBrotli, TOptional<char*>(TAutoMap<char*>)) try {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TBrotliDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ } catch (const std::exception&) {
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF(TLzma, TOptional<char*>(TAutoMap<char*>)) try {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TLzmaDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ } catch (const std::exception&) {
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF(TBZip2, TOptional<char*>(TAutoMap<char*>)) try {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TBZipDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ } catch (const std::exception&) {
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF(TSnappy, TOptional<char*>(TAutoMap<char*>)) {
+ TString result;
+ const auto& value = args->AsStringRef();
+ if (snappy::Uncompress(value.Data(), value.Size(), &result)) {
+ return valueBuilder->NewString(result);
+ }
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF(TZstd, TOptional<char*>(TAutoMap<char*>)) try {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TZstdDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ } catch (const std::exception&) {
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF(TXz, TOptional<char*>(TAutoMap<char*>)) try {
+ const auto& ref = args->AsStringRef();
+ TMemoryInput input(ref.Data(), ref.Size());
+ TXzDecompress decompress(&input);
+ return valueBuilder->NewString(decompress.ReadAll());
+ } catch (const std::exception&) {
+ return TUnboxedValuePod();
+ }
+}
+
+#define EXPORTED_COMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd
+#define EXPORTED_DECOMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd, TXz
+#define EXPORTED_TRY_DECOMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd, TXz
diff --git a/yql/essentials/udfs/common/compress_base/lib/ya.make b/yql/essentials/udfs/common/compress_base/lib/ya.make
new file mode 100644
index 00000000000..ca606d244a0
--- /dev/null
+++ b/yql/essentials/udfs/common/compress_base/lib/ya.make
@@ -0,0 +1,23 @@
+LIBRARY()
+
+YQL_ABI_VERSION(
+ 2
+ 23
+ 0
+)
+
+SRCS(
+ compress_base_udf.cpp
+)
+
+PEERDIR(
+ yql/essentials/public/udf
+ contrib/libs/snappy
+ library/cpp/streams/brotli
+ library/cpp/streams/bzip2
+ library/cpp/streams/lzma
+ library/cpp/streams/xz
+ library/cpp/streams/zstd
+)
+
+END()
diff --git a/yql/essentials/udfs/common/compress_base/test/canondata/result.json b/yql/essentials/udfs/common/compress_base/test/canondata/result.json
new file mode 100644
index 00000000000..5323168bebe
--- /dev/null
+++ b/yql/essentials/udfs/common/compress_base/test/canondata/result.json
@@ -0,0 +1,12 @@
+{
+ "test.test[RoundTrip]": [
+ {
+ "uri": "file://test.test_RoundTrip_/results.txt"
+ }
+ ],
+ "test.test[TryDecompress]": [
+ {
+ "uri": "file://test.test_TryDecompress_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/compress_base/test/canondata/test.test_RoundTrip_/results.txt b/yql/essentials/udfs/common/compress_base/test/canondata/test.test_RoundTrip_/results.txt
new file mode 100644
index 00000000000..2c0cefa419d
--- /dev/null
+++ b/yql/essentials/udfs/common/compress_base/test/canondata/test.test_RoundTrip_/results.txt
@@ -0,0 +1,124 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "gzip";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "zlib";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "brotli";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "lzma";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "bzip2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "zstd";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "snappy";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "H4sIAAAAAAAAAwMAAAAAAAAAAAA="
+ ];
+ "x^\3\0\0\0\0\1";
+ "k\0\3";
+ [
+ "XQAAAAEAg//7///AAAAA"
+ ];
+ [
+ "QlpoNRdyRThQkAAAAAA="
+ ];
+ [
+ "KLUv/SAAAQAA"
+ ];
+ "\0"
+ ];
+ [
+ [
+ "H4sIAAAAAAAAAzMEALfv3IMBAAAA"
+ ];
+ "x^3\4\0\0002\0002";
+ [
+ "CwCAMQM="
+ ];
+ [
+ "XQAAAAEAGMH7////4AAAAA=="
+ ];
+ [
+ "QlpoNTFBWSZTWWEEMGwAAAAIACAAIAAhGEaC7kinChIMIIYNgA=="
+ ];
+ [
+ "KLUv/QBYCQAAMQ=="
+ ];
+ "\1\0001"
+ ];
+ [
+ [
+ "H4sIAAAAAAAAAzM0MjYxNTO3sDQAAOWuHSYKAAAA"
+ ];
+ [
+ "eF4zNDI2MTUzt7A0AAALLAIO"
+ ];
+ [
+ "iwSAMTIzNDU2Nzg5MAM="
+ ];
+ [
+ "XQAAAAEAGIyCtsQRNFxO4dpOCbf//KPgAA=="
+ ];
+ [
+ "QlpoNTFBWSZTWVBoU7YAAACIAH/gIAAiAaaYQAwVXmjj6Yu5IpwoSCg0KdsA"
+ ];
+ [
+ "KLUv/QBYUQAAMTIzNDU2Nzg5MA=="
+ ];
+ "\n$1234567890"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/compress_base/test/canondata/test.test_TryDecompress_/results.txt b/yql/essentials/udfs/common/compress_base/test/canondata/test.test_TryDecompress_/results.txt
new file mode 100644
index 00000000000..649a6670a93
--- /dev/null
+++ b/yql/essentials/udfs/common/compress_base/test/canondata/test.test_TryDecompress_/results.txt
@@ -0,0 +1,188 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "ok_Gzip";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_Gzip";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "ok_Zlib";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_Zlib";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "ok_Brotli";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_Brotli";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "ok_Lzma";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_Lzma";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "ok_BZip2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_BZip2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "ok_Snappy";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_Snappy";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "ok_Zstd";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_Zstd";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ %true
+ ];
+ #;
+ [
+ %true
+ ];
+ #;
+ [
+ %true
+ ];
+ #;
+ [
+ %true
+ ];
+ #;
+ [
+ %true
+ ];
+ #;
+ [
+ %true
+ ];
+ #;
+ [
+ %true
+ ];
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/compress_base/test/cases/RoundTrip.sql b/yql/essentials/udfs/common/compress_base/test/cases/RoundTrip.sql
new file mode 100644
index 00000000000..4c8eba4aab3
--- /dev/null
+++ b/yql/essentials/udfs/common/compress_base/test/cases/RoundTrip.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+$level = 5;
+
+SELECT
+ Ensure(Compress::Gzip(value, $level), Decompress::Gzip(Compress::Gzip(value, $level)) == value, "gzip failed at: " || value) AS gzip,
+ Ensure(Compress::Zlib(value, $level), Decompress::Zlib(Compress::Zlib(value, $level)) == value, "zlib failed at: " || value) AS zlib,
+ Ensure(Compress::Brotli(value, $level), Decompress::Brotli(Compress::Brotli(value, $level)) == value, "brotli failed at: " || value) AS brotli,
+ Ensure(Compress::Lzma(value, $level), Decompress::Lzma(Compress::Lzma(value, $level)) == value, "lzma failed at: " || value) AS lzma,
+ Ensure(Compress::BZip2(value, $level), Decompress::BZip2(Compress::BZip2(value, $level)) == value, "bzip2 failed at: " || value) AS bzip2,
+ Ensure(Compress::Zstd(value, $level), Decompress::Zstd(Compress::Zstd(value, $level)) == value, "zstd failed at: " || value) AS zstd,
+ Ensure(Compress::Snappy(value), Decompress::Snappy(Compress::Snappy(value)) == value, "Snappy failed at: " || value) AS snappy,
+FROM Input;
diff --git a/yql/essentials/udfs/common/compress_base/test/cases/TryDecompress.sql b/yql/essentials/udfs/common/compress_base/test/cases/TryDecompress.sql
new file mode 100644
index 00000000000..a3e612ab6d5
--- /dev/null
+++ b/yql/essentials/udfs/common/compress_base/test/cases/TryDecompress.sql
@@ -0,0 +1,19 @@
+/* syntax version 1 */
+$bad = "Is not compressed!";
+
+SELECT
+ TryDecompress::Gzip(Compress::Gzip($bad, 3)) = $bad AS ok_Gzip,
+ TryDecompress::Gzip($bad) AS bad_Gzip,
+ TryDecompress::Zlib(Compress::Zlib($bad, 3)) = $bad AS ok_Zlib,
+ TryDecompress::Zlib($bad) AS bad_Zlib,
+ TryDecompress::Brotli(Compress::Brotli($bad, 3)) = $bad AS ok_Brotli,
+ TryDecompress::Brotli($bad) AS bad_Brotli,
+ TryDecompress::Lzma(Compress::Lzma($bad, 3)) = $bad AS ok_Lzma,
+ TryDecompress::Lzma($bad) AS bad_Lzma,
+ TryDecompress::BZip2(Compress::BZip2($bad, 3)) = $bad AS ok_BZip2,
+ TryDecompress::BZip2($bad) AS bad_BZip2,
+ TryDecompress::Snappy(Compress::Snappy($bad)) = $bad AS ok_Snappy,
+ TryDecompress::Snappy($bad) AS bad_Snappy,
+ TryDecompress::Zstd(Compress::Zstd($bad, 3)) = $bad AS ok_Zstd,
+ TryDecompress::Zstd($bad) AS bad_Zstd;
+
diff --git a/yql/essentials/udfs/common/compress_base/test/cases/default.in b/yql/essentials/udfs/common/compress_base/test/cases/default.in
new file mode 100644
index 00000000000..8fee3ddb782
--- /dev/null
+++ b/yql/essentials/udfs/common/compress_base/test/cases/default.in
@@ -0,0 +1,3 @@
+{"key"="1";"subkey"="2";"value"=""};
+{"key"="2";"subkey"="2";"value"="1"};
+{"key"="3";"subkey"="3";"value"="1234567890"};
diff --git a/yql/essentials/udfs/common/compress_base/test/ya.make b/yql/essentials/udfs/common/compress_base/test/ya.make
new file mode 100644
index 00000000000..7bc954ca08c
--- /dev/null
+++ b/yql/essentials/udfs/common/compress_base/test/ya.make
@@ -0,0 +1,11 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/compress_base)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+SIZE(MEDIUM)
+
+END()
diff --git a/yql/essentials/udfs/common/compress_base/ya.make b/yql/essentials/udfs/common/compress_base/ya.make
new file mode 100644
index 00000000000..4859a4e53cd
--- /dev/null
+++ b/yql/essentials/udfs/common/compress_base/ya.make
@@ -0,0 +1,22 @@
+YQL_UDF_CONTRIB(compress_udf)
+
+YQL_ABI_VERSION(
+ 2
+ 23
+ 0
+)
+
+SRCS(
+ compress_udf.cpp
+)
+
+PEERDIR(
+ yql/essentials/public/udf
+ yql/essentials/udfs/common/compress_base/lib
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ test
+)
diff --git a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp
new file mode 100644
index 00000000000..139890c9bd3
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp
@@ -0,0 +1,2396 @@
+#include <yql/essentials/minikql/mkql_type_ops.h>
+#include <yql/essentials/public/udf/tz/udf_tz.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/minikql/datetime/datetime.h>
+#include <yql/essentials/minikql/datetime/datetime64.h>
+
+#include <yql/essentials/public/udf/arrow/udf_arrow_helpers.h>
+
+#include <util/datetime/base.h>
+
+using namespace NKikimr;
+using namespace NUdf;
+using namespace NYql::DateTime;
+
+extern const char SplitName[] = "Split";
+extern const char ToSecondsName[] = "ToSeconds";
+extern const char ToMillisecondsName[] = "ToMilliseconds";
+extern const char ToMicrosecondsName[] = "ToMicroseconds";
+extern const char GetHourName[] = "GetHour";
+extern const char GetMinuteName[] = "GetMinute";
+extern const char GetSecondName[] = "GetSecond";
+extern const char GetMillisecondOfSecondName[] = "GetMillisecondOfSecond";
+extern const char GetMicrosecondOfSecondName[] = "GetMicrosecondOfSecond";
+
+extern const char TMResourceName[] = "DateTime2.TM";
+extern const char TM64ResourceName[] = "DateTime2.TM64";
+
+const auto UsecondsInDay = 86400000000ll;
+const auto UsecondsInHour = 3600000000ll;
+const auto UsecondsInMinute = 60000000ll;
+const auto UsecondsInSecond = 1000000ll;
+const auto UsecondsInMilliseconds = 1000ll;
+
+template <const char* TFuncName, typename TResult, ui32 ScaleAfterSeconds>
+class TToUnits {
+public:
+ typedef bool TTypeAwareMarker;
+ using TSignedResult = typename std::make_signed<TResult>::type;
+
+ static TResult DateCore(ui16 value) {
+ return value * ui32(86400) * TResult(ScaleAfterSeconds);
+ }
+
+ template<typename TTzDate>
+ static TResult TzBlockCore(TBlockItem tzDate);
+
+ template<>
+ static TResult TzBlockCore<TTzDate>(TBlockItem tzDate) {
+ return DateCore(tzDate.Get<ui16>());
+ }
+
+ template<>
+ static TResult TzBlockCore<TTzDatetime>(TBlockItem tzDate) {
+ return DatetimeCore(tzDate.Get<ui32>());
+ }
+
+ template<>
+ static TResult TzBlockCore<TTzTimestamp>(TBlockItem tzDate) {
+ return TimestampCore(tzDate.Get<ui64>());
+ }
+
+ static TResult DatetimeCore(ui32 value) {
+ return value * TResult(ScaleAfterSeconds);
+ }
+
+ static TResult TimestampCore(ui64 value) {
+ return TResult(value / (1000000u / ScaleAfterSeconds));
+ }
+
+ static TSignedResult IntervalCore(i64 value) {
+ return TSignedResult(value / (1000000u / ScaleAfterSeconds));
+ }
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef(TFuncName, std::strlen(TFuncName));
+ return name;
+ }
+
+ template<typename TTzDate, typename TOutput>
+ static auto MakeTzBlockExec() {
+ using TReader = TTzDateBlockReader<TTzDate, /*Nullable*/ false>;
+ return UnaryPreallocatedReaderExecImpl<TReader, TOutput, TzBlockCore<TTzDate>>;
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly)
+ {
+ if (Name() != name) {
+ return false;
+ }
+
+ try {
+ auto typeInfoHelper = builder.TypeInfoHelper();
+ TTupleTypeInspector tuple(*typeInfoHelper, userType);
+ Y_ENSURE(tuple);
+ Y_ENSURE(tuple.GetElementsCount() > 0);
+ TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
+ Y_ENSURE(argsTuple);
+ if (argsTuple.GetElementsCount() != 1) {
+ builder.SetError("Expected one argument");
+ return true;
+ }
+
+
+ auto argType = argsTuple.GetElementType(0);
+ TVector<const TType*> argBlockTypes;
+ argBlockTypes.push_back(argType);
+
+ TBlockTypeInspector block(*typeInfoHelper, argType);
+ if (block) {
+ Y_ENSURE(!block.IsScalar());
+ argType = block.GetItemType();
+ }
+
+ bool isOptional = false;
+ if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) {
+ argType = opt.GetItemType();
+ isOptional = true;
+ }
+
+
+ TDataTypeInspector data(*typeInfoHelper, argType);
+ if (!data) {
+ builder.SetError("Expected data type");
+ return true;
+ }
+
+ auto typeId = data.GetTypeId();
+ if (!(typeId == TDataType<TDate>::Id || typeId == TDataType<TTzDate>::Id ||
+ typeId == TDataType<TDatetime>::Id || typeId == TDataType<TTzDatetime>::Id ||
+ typeId == TDataType<TTimestamp>::Id || typeId == TDataType<TTzTimestamp>::Id ||
+ typeId == TDataType<TInterval>::Id)) {
+ builder.SetError(TStringBuilder() << "Type " << GetDataTypeInfo(GetDataSlot(typeId)).Name << " is not supported");
+ }
+
+ builder.Args()->Add(argsTuple.GetElementType(0)).Done();
+ const TType* retType;
+ if (typeId != TDataType<TInterval>::Id) {
+ retType = builder.SimpleType<TResult>();
+ } else {
+ retType = builder.SimpleType<TSignedResult>();
+ }
+
+ if (isOptional) {
+ retType = builder.Optional()->Item(retType).Build();
+ }
+
+ auto outputType = retType;
+ if (block) {
+ retType = builder.Block(block.IsScalar())->Item(retType).Build();
+ }
+
+ builder.Returns(retType);
+ builder.SupportsBlocks();
+ builder.IsStrict();
+
+ builder.UserType(userType);
+ if (!typesOnly) {
+ if (typeId == TDataType<TDate>::Id || typeId == TDataType<TTzDate>::Id) {
+ if (block) {
+ const auto exec = (typeId == TDataType<TTzDate>::Id)
+ ? MakeTzBlockExec<TTzDate, TResult>()
+ : UnaryPreallocatedExecImpl<ui16, TResult, DateCore>;
+
+ builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
+ exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
+ } else {
+ builder.Implementation(new TUnaryOverOptionalImpl<ui16, TResult, DateCore>());
+ }
+ }
+
+ if (typeId == TDataType<TDatetime>::Id || typeId == TDataType<TTzDatetime>::Id) {
+ if (block) {
+ const auto exec = (typeId == TDataType<TTzDatetime>::Id)
+ ? MakeTzBlockExec<TTzDatetime, TResult>()
+ : UnaryPreallocatedExecImpl<ui32, TResult, DatetimeCore>;
+
+ builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
+ exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
+ } else {
+ builder.Implementation(new TUnaryOverOptionalImpl<ui32, TResult, DatetimeCore>());
+ }
+ }
+
+ if (typeId == TDataType<TTimestamp>::Id || typeId == TDataType<TTzTimestamp>::Id) {
+ if (block) {
+ const auto exec = (typeId == TDataType<TTzTimestamp>::Id)
+ ? MakeTzBlockExec<TTzTimestamp, TResult>()
+ : UnaryPreallocatedExecImpl<ui64, TResult, TimestampCore>;
+
+ builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
+ exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
+ } else {
+ builder.Implementation(new TUnaryOverOptionalImpl<ui64, TResult, TimestampCore>());
+ }
+ }
+
+ if (typeId == TDataType<TInterval>::Id) {
+ if (block) {
+ builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
+ UnaryPreallocatedExecImpl<i64, TSignedResult, IntervalCore>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
+ } else {
+ builder.Implementation(new TUnaryOverOptionalImpl<i64, TSignedResult, IntervalCore>());
+ }
+ }
+ }
+ } catch (const std::exception& e) {
+ builder.SetError(TStringBuf(e.what()));
+ }
+
+ return true;
+ }
+};
+
+template <const char* TFuncName, typename TFieldStorage, TFieldStorage (*FieldFunc)(const TUnboxedValuePod&), ui32 Divisor, ui32 Scale, ui32 Limit, bool Fractional>
+struct TGetTimeComponent {
+ typedef bool TTypeAwareMarker;
+
+ template <typename TInput, bool AlwaysZero, bool InputFractional>
+ static TFieldStorage Core(TInput val) {
+ if constexpr (AlwaysZero) {
+ return 0;
+ }
+
+ if constexpr (InputFractional) {
+ if constexpr (Fractional) {
+ return (val / Scale) % Limit;
+ } else {
+ return (val / 1000000u / Scale) % Limit;
+ }
+ } else {
+ if constexpr (Fractional) {
+ return 0;
+ } else {
+ return (val / Scale) % Limit;
+ }
+ }
+ }
+
+ class TImpl : public TBoxedValue {
+ public:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
+ Y_UNUSED(valueBuilder);
+ if (!args[0]) {
+ return {};
+ }
+
+ return TUnboxedValuePod(TFieldStorage((FieldFunc(args[0])) / Divisor));
+ }
+ };
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef(TFuncName, std::strlen(TFuncName));
+ return name;
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly)
+ {
+ if (Name() != name) {
+ return false;
+ }
+
+ try {
+ auto typeInfoHelper = builder.TypeInfoHelper();
+ TTupleTypeInspector tuple(*typeInfoHelper, userType);
+ if (tuple) {
+ Y_ENSURE(tuple.GetElementsCount() > 0);
+ TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
+ Y_ENSURE(argsTuple);
+ if (argsTuple.GetElementsCount() != 1) {
+ builder.SetError("Expected one argument");
+ return true;
+ }
+
+
+ auto argType = argsTuple.GetElementType(0);
+ TVector<const TType*> argBlockTypes;
+ argBlockTypes.push_back(argType);
+
+ TBlockTypeInspector block(*typeInfoHelper, argType);
+ if (block) {
+ Y_ENSURE(!block.IsScalar());
+ argType = block.GetItemType();
+ }
+
+ bool isOptional = false;
+ if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) {
+ argType = opt.GetItemType();
+ isOptional = true;
+ }
+
+ TResourceTypeInspector res(*typeInfoHelper, argType);
+ if (!res) {
+ TDataTypeInspector data(*typeInfoHelper, argType);
+ if (!data) {
+ builder.SetError("Expected data type");
+ return true;
+ }
+
+ auto typeId = data.GetTypeId();
+ if (typeId == TDataType<TDate>::Id ||
+ typeId == TDataType<TDatetime>::Id ||
+ typeId == TDataType<TTimestamp>::Id) {
+
+ builder.Args()->Add(argsTuple.GetElementType(0)).Done();
+ const TType* retType = builder.SimpleType<TFieldStorage>();
+
+ if (isOptional) {
+ retType = builder.Optional()->Item(retType).Build();
+ }
+
+ auto outputType = retType;
+ if (block) {
+ retType = builder.Block(block.IsScalar())->Item(retType).Build();
+ }
+
+ builder.Returns(retType);
+ builder.SupportsBlocks();
+ builder.IsStrict();
+
+ builder.UserType(userType);
+ if (!typesOnly) {
+ if (typeId == TDataType<TDate>::Id) {
+ if (block) {
+ builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
+ UnaryPreallocatedExecImpl<ui16, TFieldStorage, Core<ui16, true, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
+ } else {
+ builder.Implementation(new TUnaryOverOptionalImpl<ui16, TFieldStorage, Core<ui16, true, false>>());
+ }
+ }
+
+ if (typeId == TDataType<TDatetime>::Id) {
+ if (block) {
+ builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
+ UnaryPreallocatedExecImpl<ui32, TFieldStorage, Core<ui32, false, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
+ } else {
+ builder.Implementation(new TUnaryOverOptionalImpl<ui32, TFieldStorage, Core<ui32, false, false>>());
+ }
+ }
+
+ if (typeId == TDataType<TTimestamp>::Id) {
+ if (block) {
+ builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(),
+ UnaryPreallocatedExecImpl<ui64, TFieldStorage, Core<ui64, false, true>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION));
+ } else {
+ builder.Implementation(new TUnaryOverOptionalImpl<ui64, TFieldStorage, Core<ui64, false, true>>());
+ }
+ }
+ }
+
+ return true;
+ }
+ } else {
+ Y_ENSURE(!block);
+ if (res.GetTag() != TStringRef::Of(TMResourceName)) {
+ builder.SetError("Unexpected resource tag");
+ return true;
+ }
+ }
+ }
+
+ // default implementation
+ builder.Args()->Add<TResource<TMResourceName>>().Flags(ICallablePayload::TArgumentFlags::AutoMap).Done();
+ builder.Returns<TFieldStorage>();
+ builder.IsStrict();
+ if (!typesOnly) {
+ builder.Implementation(new TImpl());
+ }
+ } catch (const std::exception& e) {
+ builder.SetError(TStringBuf(e.what()));
+ }
+
+ return true;
+ }
+};
+
+namespace {
+
+const TTMStorage& Reference(const NUdf::TUnboxedValuePod& value) {
+ return *reinterpret_cast<const TTMStorage*>(value.GetRawPtr());
+}
+
+TTMStorage& Reference(NUdf::TUnboxedValuePod& value) {
+ return *reinterpret_cast<TTMStorage*>(value.GetRawPtr());
+}
+
+const TTMStorage& Reference(const TBlockItem& value) {
+ return *reinterpret_cast<const TTMStorage*>(value.GetRawPtr());
+}
+
+Y_DECLARE_UNUSED TTMStorage& Reference(TBlockItem& value) {
+ return *reinterpret_cast<TTMStorage*>(value.GetRawPtr());
+}
+
+const TTM64Storage& Reference64(const NUdf::TUnboxedValuePod& value) {
+ return *reinterpret_cast<const TTM64Storage*>(value.GetRawPtr());
+}
+
+TTM64Storage& Reference64(NUdf::TUnboxedValuePod& value) {
+ return *reinterpret_cast<TTM64Storage*>(value.GetRawPtr());
+}
+
+template<typename TValue>
+TValue DoAddMonths(const TValue& date, i64 months, const NUdf::IDateBuilder& builder) {
+ auto result = date;
+ auto& storage = Reference(result);
+ if (!NYql::DateTime::DoAddMonths(storage, months, builder)) {
+ return TValue{};
+ }
+ return result;
+}
+
+template<typename TValue>
+TValue DoAddQuarters(const TValue& date, i64 quarters, const NUdf::IDateBuilder& builder) {
+ return DoAddMonths(date, quarters * 3ll, builder);
+}
+
+template<typename TValue>
+TValue DoAddYears(const TValue& date, i64 years, const NUdf::IDateBuilder& builder) {
+ auto result = date;
+ auto& storage = Reference(result);
+ if (!NYql::DateTime::DoAddYears(storage, years, builder)) {
+ return TValue{};
+ }
+ return result;
+}
+
+#define ACCESSORS(field, type) \
+ template<typename TValue> \
+ inline type Get##field(const TValue& tm) { \
+ return (type)Reference(tm).field; \
+ } \
+ template<typename TValue> \
+ Y_DECLARE_UNUSED inline void Set##field(TValue& tm, type value) { \
+ Reference(tm).field = value; \
+ }
+
+ ACCESSORS(Year, ui16)
+ ACCESSORS(DayOfYear, ui16)
+ ACCESSORS(WeekOfYear, ui8)
+ ACCESSORS(WeekOfYearIso8601, ui8)
+ ACCESSORS(DayOfWeek, ui8)
+ ACCESSORS(Month, ui8)
+ ACCESSORS(Day, ui8)
+ ACCESSORS(Hour, ui8)
+ ACCESSORS(Minute, ui8)
+ ACCESSORS(Second, ui8)
+ ACCESSORS(Microsecond, ui32)
+ ACCESSORS(TimezoneId, ui16)
+
+#undef ACCESSORS
+
+ inline bool ValidateYear(ui16 year) {
+ return year >= NUdf::MIN_YEAR - 1 || year <= NUdf::MAX_YEAR + 1;
+ }
+
+ inline bool ValidateMonth(ui8 month) {
+ return month >= 1 && month <= 12;
+ }
+
+ inline bool ValidateDay(ui8 day) {
+ return day >= 1 && day <= 31;
+ }
+
+ inline bool ValidateHour(ui8 hour) {
+ return hour < 24;
+ }
+
+ inline bool ValidateMinute(ui8 minute) {
+ return minute < 60;
+ }
+
+ inline bool ValidateSecond(ui8 second) {
+ return second < 60;
+ }
+
+ inline bool ValidateMicrosecond(ui32 microsecond) {
+ return microsecond < 1000000;
+ }
+
+ inline bool ValidateTimezoneId(ui16 timezoneId) {
+ const auto& zones = NUdf::GetTimezones();
+ return timezoneId < zones.size() && !zones[timezoneId].empty();
+ }
+
+ inline bool ValidateMonthShortName(const std::string_view& monthName, ui8& month) {
+ static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) {
+ int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size()));
+ if (cmp == 0)
+ return a.size() < b.size();
+ return cmp < 0;
+ };
+ static const std::map<std::string_view, ui8, decltype(cmp)> mp = {
+ {"jan", 1},
+ {"feb", 2},
+ {"mar", 3},
+ {"apr", 4},
+ {"may", 5},
+ {"jun", 6},
+ {"jul", 7},
+ {"aug", 8},
+ {"sep", 9},
+ {"oct", 10},
+ {"nov", 11},
+ {"dec", 12}
+ };
+ const auto& it = mp.find(monthName);
+ if (it != mp.end()) {
+ month = it -> second;
+ return true;
+ }
+ return false;
+ }
+
+ inline bool ValidateMonthFullName(const std::string_view& monthName, ui8& month) {
+ static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) {
+ int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size()));
+ if (cmp == 0)
+ return a.size() < b.size();
+ return cmp < 0;
+ };
+ static const std::map<std::string_view, ui8, decltype(cmp)> mp = {
+ {"january", 1},
+ {"february", 2},
+ {"march", 3},
+ {"april", 4},
+ {"may", 5},
+ {"june", 6},
+ {"july", 7},
+ {"august", 8},
+ {"september", 9},
+ {"october", 10},
+ {"november", 11},
+ {"december", 12}
+ };
+ const auto& it = mp.find(monthName);
+ if (it != mp.end()) {
+ month = it -> second;
+ return true;
+ }
+ return false;
+ }
+
+ inline bool ValidateDatetime(ui32 datetime) {
+ return datetime < MAX_DATETIME;
+ }
+
+ inline bool ValidateTimestamp(ui64 timestamp) {
+ return timestamp < MAX_TIMESTAMP;
+ }
+
+ inline bool ValidateInterval(i64 interval) {
+ return interval > -i64(MAX_TIMESTAMP) && interval < i64(MAX_TIMESTAMP);
+ }
+
+ // Split
+
+ template<typename TUserDataType, bool Nullable>
+ using TSplitArgReader = std::conditional_t<TTzDataType<TUserDataType>::Result,
+ TTzDateBlockReader<TUserDataType, Nullable>,
+ TFixedSizeBlockReader<typename TDataType<TUserDataType>::TLayout, Nullable>>;
+
+ template<typename TUserDataType>
+ struct TSplitKernelExec : TUnaryKernelExec<TSplitKernelExec<TUserDataType>, TSplitArgReader<TUserDataType, false>, TResourceArrayBuilder<false>> {
+ static void Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& valueBuilder);
+
+ template<typename TSink>
+ static void Process(const IValueBuilder* valueBuilder, TBlockItem arg, const TSink& sink) {
+ try {
+ TBlockItem res {0};
+ Split(arg, Reference(res), *valueBuilder);
+ sink(res);
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << e.what()).data());
+ }
+ }
+ };
+
+ template <typename TUserDataType>
+ class TSplit : public TBoxedValue {
+ const TSourcePosition Pos_;
+
+ public:
+ explicit TSplit(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override;
+
+ static bool DeclareSignature(
+ TStringRef name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly)
+ {
+ const auto typeInfoHelper = builder.TypeInfoHelper();
+
+ TTupleTypeInspector tuple(*typeInfoHelper, userType);
+ Y_ENSURE(tuple);
+ Y_ENSURE(tuple.GetElementsCount() > 0);
+ TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0));
+ Y_ENSURE(argsTuple);
+
+ if (argsTuple.GetElementsCount() != 1) {
+ builder.SetError("Expected one argument");
+ return true;
+ }
+ auto argType = argsTuple.GetElementType(0);
+
+ builder.UserType(userType);
+ builder.SupportsBlocks();
+ builder.IsStrict();
+
+ TBlockTypeInspector block(*typeInfoHelper, argType);
+ if (block) {
+ const auto* blockArgType = builder.Block(false)->Item<TUserDataType>().Build();
+ builder.Args()->Add(blockArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap);
+ const auto* retType = builder.Resource(TMResourceName);
+ const auto* blockRetType = builder.Block(false)->Item(retType).Build();
+ builder.Returns(blockRetType);
+
+ if (!typesOnly) {
+ builder.Implementation(new TSimpleArrowUdfImpl({blockArgType}, retType, block.IsScalar(),
+ TSplitKernelExec<TUserDataType>::Do, builder, TString(name), arrow::compute::NullHandling::COMPUTED_NO_PREALLOCATE));
+ }
+ } else {
+ builder.Args()->Add<TUserDataType>().Flags(ICallablePayload::TArgumentFlags::AutoMap);
+ if constexpr (NUdf::TDataType<TUserDataType>::Features & NYql::NUdf::BigDateType) {
+ builder.Returns(builder.Resource(TM64ResourceName));
+ } else {
+ builder.Returns(builder.Resource(TMResourceName));
+ }
+
+ if (!typesOnly) {
+ builder.Implementation(new TSplit<TUserDataType>(builder.GetSourcePosition()));
+ }
+ }
+
+ return true;
+ }
+ };
+
+ template <>
+ void TSplitKernelExec<TDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
+ storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>());
+ }
+
+ template <>
+ void TSplitKernelExec<TDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
+ storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>());
+ }
+
+ template <>
+ void TSplitKernelExec<TTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
+ storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>());
+ }
+
+ template <>
+ void TSplitKernelExec<TTzDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
+ storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>(), arg.GetTimezoneId());
+ }
+
+ template <>
+ void TSplitKernelExec<TTzDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
+ storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>(), arg.GetTimezoneId());
+ }
+
+ template <>
+ void TSplitKernelExec<TTzTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) {
+ storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>(), arg.GetTimezoneId());
+ }
+
+ template <>
+ void TSplitKernelExec<TDate32>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
+ ythrow yexception() << "Not implemented";
+ }
+
+ template <>
+ void TSplitKernelExec<TDatetime64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
+ ythrow yexception() << "Not implemented";
+ }
+
+ template <>
+ void TSplitKernelExec<TTimestamp64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) {
+ ythrow yexception() << "Not implemented";
+ }
+
+ template <>
+ TUnboxedValue TSplit<TDate>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const
+ {
+ try {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+
+ auto& builder = valueBuilder->GetDateBuilder();
+ TUnboxedValuePod result(0);
+ auto& storage = Reference(result);
+ storage.FromDate(builder, args[0].Get<ui16>());
+ return result;
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ template <>
+ TUnboxedValue TSplit<TDate32>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const
+ {
+ try {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+
+ TUnboxedValuePod result(0);
+ auto& storage = Reference64(result);
+ storage.FromDate32(valueBuilder->GetDateBuilder(), args[0].Get<i32>());
+ return result;
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ template <>
+ TUnboxedValue TSplit<TDatetime>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const
+ {
+ try {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+
+ auto& builder = valueBuilder->GetDateBuilder();
+ TUnboxedValuePod result(0);
+ auto& storage = Reference(result);
+ storage.FromDatetime(builder, args[0].Get<ui32>());
+ return result;
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ template <>
+ TUnboxedValue TSplit<TDatetime64>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const
+ {
+ try {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+
+ TUnboxedValuePod result(0);
+ auto& storage = Reference64(result);
+ storage.FromDatetime64(valueBuilder->GetDateBuilder(), args[0].Get<i64>());
+ return result;
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ template <>
+ TUnboxedValue TSplit<TTimestamp>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const
+ {
+ try {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+
+ auto& builder = valueBuilder->GetDateBuilder();
+ TUnboxedValuePod result(0);
+ auto& storage = Reference(result);
+ storage.FromTimestamp(builder, args[0].Get<ui64>());
+ return result;
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ template <>
+ TUnboxedValue TSplit<TTimestamp64>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const
+ {
+ try {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+
+ TUnboxedValuePod result(0);
+ auto& storage = Reference64(result);
+ storage.FromTimestamp64(valueBuilder->GetDateBuilder(), args[0].Get<i64>());
+ return result;
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ template <>
+ TUnboxedValue TSplit<TTzDate>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const
+ {
+ try {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+
+ auto& builder = valueBuilder->GetDateBuilder();
+ TUnboxedValuePod result(0);
+ auto& storage = Reference(result);
+ storage.FromDate(builder, args[0].Get<ui16>(), args[0].GetTimezoneId());
+ return result;
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ template <>
+ TUnboxedValue TSplit<TTzDatetime>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const
+ {
+ try {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+
+ auto& builder = valueBuilder->GetDateBuilder();
+ TUnboxedValuePod result(0);
+ auto& storage = Reference(result);
+ storage.FromDatetime(builder, args[0].Get<ui32>(), args[0].GetTimezoneId());
+ return result;
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ template <>
+ TUnboxedValue TSplit<TTzTimestamp>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const
+ {
+ try {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+
+ auto& builder = valueBuilder->GetDateBuilder();
+ TUnboxedValuePod result(0);
+ auto& storage = Reference(result);
+ storage.FromTimestamp(builder, args[0].Get<ui64>(), args[0].GetTimezoneId());
+ return result;
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ // Make*
+
+ template<typename TUserDataType, bool Nullable>
+ using TMakeResBuilder = std::conditional_t<TTzDataType<TUserDataType>::Result,
+ TTzDateArrayBuilder<TUserDataType, Nullable>,
+ TFixedSizeArrayBuilder<typename TDataType<TUserDataType>::TLayout, Nullable>>;
+
+ template<typename TUserDataType>
+ struct TMakeDateKernelExec : TUnaryKernelExec<TMakeDateKernelExec<TUserDataType>, TReaderTraits::TResource<false>, TMakeResBuilder<TUserDataType, false>> {
+ static TBlockItem Make(TTMStorage& storage, const IValueBuilder& valueBuilder);
+
+ template<typename TSink>
+ static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
+ auto& storage = Reference(item);
+ sink(TBlockItem(Make(storage, *valueBuilder)));
+ }
+ };
+
+ template<> TBlockItem TMakeDateKernelExec<TDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
+ TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ false));
+ return res;
+ }
+
+ template<> TBlockItem TMakeDateKernelExec<TDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
+ TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder()));
+ return res;
+ }
+
+ template<> TBlockItem TMakeDateKernelExec<TTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
+ TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder()));
+ return res;
+ }
+
+ template<> TBlockItem TMakeDateKernelExec<TTzDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
+ TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ true));
+ res.SetTimezoneId(storage.TimezoneId);
+ return res;
+ }
+
+ template<> TBlockItem TMakeDateKernelExec<TTzDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
+ TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder()));
+ res.SetTimezoneId(storage.TimezoneId);
+ return res;
+ }
+
+ template<> TBlockItem TMakeDateKernelExec<TTzTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) {
+ TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder()));
+ res.SetTimezoneId(storage.TimezoneId);
+ return res;
+ }
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDate, TDate(TAutoMap<TResource<TMResourceName>>)) {
+ auto& builder = valueBuilder->GetDateBuilder();
+ auto& storage = Reference(args[0]);
+ return TUnboxedValuePod(storage.ToDate(builder, false));
+ }
+ END_SIMPLE_ARROW_UDF(TMakeDate, TMakeDateKernelExec<TDate>::Do);
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDatetime, TDatetime(TAutoMap<TResource<TMResourceName>>)) {
+ auto& builder = valueBuilder->GetDateBuilder();
+ auto& storage = Reference(args[0]);
+ return TUnboxedValuePod(storage.ToDatetime(builder));
+ }
+ END_SIMPLE_ARROW_UDF(TMakeDatetime, TMakeDateKernelExec<TDatetime>::Do);
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTimestamp, TTimestamp(TAutoMap<TResource<TMResourceName>>)) {
+ auto& builder = valueBuilder->GetDateBuilder();
+ auto& storage = Reference(args[0]);
+ return TUnboxedValuePod(storage.ToTimestamp(builder));
+ }
+ END_SIMPLE_ARROW_UDF(TMakeTimestamp, TMakeDateKernelExec<TTimestamp>::Do);
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDate, TTzDate(TAutoMap<TResource<TMResourceName>>)) {
+ auto& builder = valueBuilder->GetDateBuilder();
+ auto& storage = Reference(args[0]);
+ try {
+ TUnboxedValuePod result(storage.ToDate(builder, true));
+ result.SetTimezoneId(storage.TimezoneId);
+ return result;
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << "Timestamp "
+ << storage.ToString()
+ << " cannot be casted to TzDate"
+ ).data());
+ }
+ }
+ END_SIMPLE_ARROW_UDF(TMakeTzDate, TMakeDateKernelExec<TTzDate>::Do);
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDatetime, TTzDatetime(TAutoMap<TResource<TMResourceName>>)) {
+ auto& builder = valueBuilder->GetDateBuilder();
+ auto& storage = Reference(args[0]);
+ TUnboxedValuePod result(storage.ToDatetime(builder));
+ result.SetTimezoneId(storage.TimezoneId);
+ return result;
+ }
+ END_SIMPLE_ARROW_UDF(TMakeTzDatetime, TMakeDateKernelExec<TTzDatetime>::Do);
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzTimestamp, TTzTimestamp(TAutoMap<TResource<TMResourceName>>)) {
+ auto& builder = valueBuilder->GetDateBuilder();
+ auto& storage = Reference(args[0]);
+ TUnboxedValuePod result(storage.ToTimestamp(builder));
+ result.SetTimezoneId(storage.TimezoneId);
+ return result;
+ }
+ END_SIMPLE_ARROW_UDF(TMakeTzTimestamp, TMakeDateKernelExec<TTzTimestamp>::Do);
+
+
+ SIMPLE_STRICT_UDF(TConvert, TResource<TM64ResourceName>(TAutoMap<TResource<TMResourceName>>)) {
+ Y_UNUSED(valueBuilder);
+ TUnboxedValuePod result(0);
+ auto& arg = Reference(args[0]);
+ auto& storage = Reference64(result);
+ storage.From(arg);
+ return result;
+ }
+
+ SIMPLE_STRICT_UDF(TMakeDate32, TDate32(TAutoMap<TResource<TM64ResourceName>>)) {
+ auto& storage = Reference64(args[0]);
+ return TUnboxedValuePod(storage.ToDate32(valueBuilder->GetDateBuilder()));
+ }
+
+ SIMPLE_STRICT_UDF(TMakeDatetime64, TDatetime64(TAutoMap<TResource<TM64ResourceName>>)) {
+ auto& storage = Reference64(args[0]);
+ return TUnboxedValuePod(storage.ToDatetime64(valueBuilder->GetDateBuilder()));
+ }
+
+ SIMPLE_STRICT_UDF(TMakeTimestamp64, TTimestamp64(TAutoMap<TResource<TM64ResourceName>>)) {
+ auto& storage = Reference64(args[0]);
+ return TUnboxedValuePod(storage.ToTimestamp64(valueBuilder->GetDateBuilder()));
+ }
+
+ // Get*
+
+#define GET_METHOD(field, type) \
+ SIMPLE_STRICT_UDF(TGet##field, type(TAutoMap<TResource<TMResourceName>>)) { \
+ Y_UNUSED(valueBuilder); \
+ return TUnboxedValuePod(Get##field(args[0])); \
+ }
+
+// #define GET_METHOD(field, type) \
+// struct TGet##field##KernelExec : TUnaryKernelExec<TGet##field##KernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<type, false>> { \
+// template<typename TSink> \
+// static void Process(TBlockItem item, const IValueBuilder& valueBuilder, const TSink& sink) { \
+// Y_UNUSED(valueBuilder); \
+// sink(TBlockItem(Get##field(item))); \
+// } \
+// }; \
+// BEGIN_SIMPLE_STRICT_ARROW_UDF(TGet##field, type(TAutoMap<TResource<TMResourceName>>)) { \
+// Y_UNUSED(valueBuilder); \
+// return TUnboxedValuePod(Get##field(args[0])); \
+// } \
+// END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGet##field, TGet##field##KernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
+
+ GET_METHOD(Year, ui16)
+ GET_METHOD(DayOfYear, ui16)
+ GET_METHOD(Month, ui8)
+
+ // template<typename TValue>
+ // TValue GetMonthNameValue(size_t idx) {
+ // static const std::array<TValue, 12U> monthNames = {{
+ // TValue::Embedded(TStringRef::Of("January")),
+ // TValue::Embedded(TStringRef::Of("February")),
+ // TValue::Embedded(TStringRef::Of("March")),
+ // TValue::Embedded(TStringRef::Of("April")),
+ // TValue::Embedded(TStringRef::Of("May")),
+ // TValue::Embedded(TStringRef::Of("June")),
+ // TValue::Embedded(TStringRef::Of("July")),
+ // TValue::Embedded(TStringRef::Of("August")),
+ // TValue::Embedded(TStringRef::Of("September")),
+ // TValue::Embedded(TStringRef::Of("October")),
+ // TValue::Embedded(TStringRef::Of("November")),
+ // TValue::Embedded(TStringRef::Of("December"))
+ // }};
+ // return monthNames.at(idx);
+ // }
+
+ // struct TGetMonthNameKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> {
+ // template<typename TSink>
+ // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
+ // Y_UNUSED(valueBuilder);
+ // sink(GetMonthNameValue<TBlockItem>(GetMonth(item) - 1U));
+ // }
+ // };
+
+ // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetMonthName, char*(TAutoMap<TResource<TMResourceName>>)) {
+ // Y_UNUSED(valueBuilder);
+ // return GetMonthNameValue<TUnboxedValue>(GetMonth(*args) - 1U);
+ // }
+ // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetMonthName, TGetMonthNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
+
+ SIMPLE_STRICT_UDF(TGetMonthName, char*(TAutoMap<TResource<TMResourceName>>)) {
+ Y_UNUSED(valueBuilder);
+ static const std::array<TUnboxedValue, 12U> monthNames = {{
+ TUnboxedValuePod::Embedded(TStringRef::Of("January")),
+ TUnboxedValuePod::Embedded(TStringRef::Of("February")),
+ TUnboxedValuePod::Embedded(TStringRef::Of("March")),
+ TUnboxedValuePod::Embedded(TStringRef::Of("April")),
+ TUnboxedValuePod::Embedded(TStringRef::Of("May")),
+ TUnboxedValuePod::Embedded(TStringRef::Of("June")),
+ TUnboxedValuePod::Embedded(TStringRef::Of("July")),
+ TUnboxedValuePod::Embedded(TStringRef::Of("August")),
+ TUnboxedValuePod::Embedded(TStringRef::Of("September")),
+ TUnboxedValuePod::Embedded(TStringRef::Of("October")),
+ TUnboxedValuePod::Embedded(TStringRef::Of("November")),
+ TUnboxedValuePod::Embedded(TStringRef::Of("December"))
+ }};
+ return monthNames.at(GetMonth(*args) - 1U);
+ }
+
+ GET_METHOD(WeekOfYear, ui8)
+ GET_METHOD(WeekOfYearIso8601, ui8)
+
+ // struct TGetDayOfMonthKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<ui8, false>> {
+ // template<typename TSink>
+ // static void Process(TBlockItem item, const TSink& sink) {
+ // sink(GetDay(item));
+ // }
+ // };
+
+ // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfMonth, ui8(TAutoMap<TResource<TMResourceName>>)) {
+ // Y_UNUSED(valueBuilder);
+ // return TUnboxedValuePod(GetDay(args[0]));
+ // }
+ // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfMonth, TGetDayOfMonthKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
+
+ SIMPLE_STRICT_UDF(TGetDayOfMonth, ui8(TAutoMap<TResource<TMResourceName>>)) {
+ Y_UNUSED(valueBuilder);
+ return TUnboxedValuePod(GetDay(args[0]));
+ }
+
+ GET_METHOD(DayOfWeek, ui8)
+
+ template<typename TValue>
+ TValue GetDayNameValue(size_t idx) {
+ static const std::array<TValue, 7U> dayNames = {{
+ TValue::Embedded(TStringRef::Of("Monday")),
+ TValue::Embedded(TStringRef::Of("Tuesday")),
+ TValue::Embedded(TStringRef::Of("Wednesday")),
+ TValue::Embedded(TStringRef::Of("Thursday")),
+ TValue::Embedded(TStringRef::Of("Friday")),
+ TValue::Embedded(TStringRef::Of("Saturday")),
+ TValue::Embedded(TStringRef::Of("Sunday"))
+ }};
+ return dayNames.at(idx);
+ }
+
+ SIMPLE_STRICT_UDF(TGetDayOfWeekName, char*(TAutoMap<TResource<TMResourceName>>)) {
+ Y_UNUSED(valueBuilder);
+ return GetDayNameValue<TUnboxedValuePod>(GetDayOfWeek(*args) - 1U);
+ }
+
+ // struct TGetDayOfWeekNameKernelExec : TUnaryKernelExec<TGetDayOfWeekNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> {
+ // template<typename TSink>
+ // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
+ // Y_UNUSED(valueBuilder);
+ // sink(GetDayNameValue<TBlockItem>(GetDayOfWeek(item) - 1U));
+ // }
+ // };
+
+ // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfWeekName, char*(TAutoMap<TResource<TMResourceName>>)) {
+ // Y_UNUSED(valueBuilder);
+ // return GetDayNameValue<TUnboxedValuePod>(GetDayOfWeek(*args) - 1U);
+ // }
+ // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfWeekName, TGetDayOfWeekNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION);
+
+ GET_METHOD(TimezoneId, ui16)
+
+ struct TTGetTimezoneNameKernelExec : TUnaryKernelExec<TTGetTimezoneNameKernelExec, TReaderTraits::TResource<false>, TStringArrayBuilder<arrow::BinaryType, false>> {
+ template<typename TSink>
+ static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
+ Y_UNUSED(valueBuilder);
+ auto timezoneId = GetTimezoneId(item);
+ if (timezoneId >= NUdf::GetTimezones().size()) {
+ sink(TBlockItem{});
+ } else {
+ sink(TBlockItem{NUdf::GetTimezones()[timezoneId]});
+ }
+ }
+ };
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetTimezoneName, char*(TAutoMap<TResource<TMResourceName>>)) {
+ auto timezoneId = GetTimezoneId(args[0]);
+ if (timezoneId >= NUdf::GetTimezones().size()) {
+ return TUnboxedValuePod();
+ }
+ return valueBuilder->NewString(NUdf::GetTimezones()[timezoneId]);
+ }
+ END_SIMPLE_ARROW_UDF(TGetTimezoneName, TTGetTimezoneNameKernelExec::Do);
+
+ // Update
+
+ class TUpdate : public TBoxedValue {
+ const TSourcePosition Pos_;
+ public:
+ explicit TUpdate(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override
+ {
+ try {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+ auto result = args[0];
+
+ if (args[1]) {
+ auto year = args[1].Get<ui16>();
+ if (!ValidateYear(year)) {
+ return TUnboxedValuePod();
+ }
+ SetYear(result, year);
+ }
+ if (args[2]) {
+ auto month = args[2].Get<ui8>();
+ if (!ValidateMonth(month)) {
+ return TUnboxedValuePod();
+ }
+ SetMonth(result, month);
+ }
+ if (args[3]) {
+ auto day = args[3].Get<ui8>();
+ if (!ValidateDay(day)) {
+ return TUnboxedValuePod();
+ }
+ SetDay(result, day);
+ }
+ if (args[4]) {
+ auto hour = args[4].Get<ui8>();
+ if (!ValidateHour(hour)) {
+ return TUnboxedValuePod();
+ }
+ SetHour(result, hour);
+ }
+ if (args[5]) {
+ auto minute = args[5].Get<ui8>();
+ if (!ValidateMinute(minute)) {
+ return TUnboxedValuePod();
+ }
+ SetMinute(result, minute);
+ }
+ if (args[6]) {
+ auto second = args[6].Get<ui8>();
+ if (!ValidateSecond(second)) {
+ return TUnboxedValuePod();
+ }
+ SetSecond(result, second);
+ }
+ if (args[7]) {
+ auto microsecond = args[7].Get<ui32>();
+ if (!ValidateMicrosecond(microsecond)) {
+ return TUnboxedValuePod();
+ }
+ SetMicrosecond(result, microsecond);
+ }
+ if (args[8]) {
+ auto timezoneId = args[8].Get<ui16>();
+ if (!ValidateTimezoneId(timezoneId)) {
+ return TUnboxedValuePod();
+ }
+ SetTimezoneId(result, timezoneId);
+ }
+
+ auto& builder = valueBuilder->GetDateBuilder();
+ auto& storage = Reference(result);
+ if (!storage.Validate(builder)) {
+ return TUnboxedValuePod();
+ }
+ return result;
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("Update");
+ return name;
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType*,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly)
+ {
+ if (Name() != name) {
+ return false;
+ }
+
+ auto resourceType = builder.Resource(TMResourceName);
+ auto optionalResourceType = builder.Optional()->Item(resourceType).Build();
+
+ builder.OptionalArgs(8).Args()->Add(resourceType).Flags(ICallablePayload::TArgumentFlags::AutoMap)
+ .Add(builder.Optional()->Item<ui16>().Build()).Name("Year")
+ .Add(builder.Optional()->Item<ui8>().Build()).Name("Month")
+ .Add(builder.Optional()->Item<ui8>().Build()).Name("Day")
+ .Add(builder.Optional()->Item<ui8>().Build()).Name("Hour")
+ .Add(builder.Optional()->Item<ui8>().Build()).Name("Minute")
+ .Add(builder.Optional()->Item<ui8>().Build()).Name("Second")
+ .Add(builder.Optional()->Item<ui32>().Build()).Name("Microsecond")
+ .Add(builder.Optional()->Item<ui16>().Build()).Name("TimezoneId");
+
+ builder.Returns(optionalResourceType);
+
+ if (!typesOnly) {
+ builder.Implementation(new TUpdate(builder.GetSourcePosition()));
+ }
+
+ builder.IsStrict();
+ return true;
+ }
+ };
+
+ // From*
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TFromSeconds, TOptional<TTimestamp>(TAutoMap<ui32>)) {
+ Y_UNUSED(valueBuilder);
+ auto res = args[0].Get<ui32>();
+ if (!ValidateDatetime(res)) {
+ return TUnboxedValuePod();
+ }
+ return TUnboxedValuePod((ui64)(res * 1000000ull));
+ }
+
+ using TFromSecondsKernel = TUnaryUnsafeFixedSizeFilterKernel<ui32, ui64,
+ [] (ui32 seconds) { return std::make_pair(ui64(seconds * 1000000ull), ValidateDatetime(seconds)); }>;
+ END_SIMPLE_ARROW_UDF(TFromSeconds, TFromSecondsKernel::Do);
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TFromMilliseconds, TOptional<TTimestamp>(TAutoMap<ui64>)) {
+ Y_UNUSED(valueBuilder);
+ auto res = args[0].Get<ui64>();
+ if (res >= MAX_TIMESTAMP / 1000u) {
+ return TUnboxedValuePod();
+ }
+ return TUnboxedValuePod(res * 1000u);
+ }
+
+ using TFromMillisecondsKernel = TUnaryUnsafeFixedSizeFilterKernel<ui64, ui64,
+ [] (ui64 milliseconds) { return std::make_pair(ui64(milliseconds * 1000u), milliseconds < MAX_TIMESTAMP / 1000u); }>;
+ END_SIMPLE_ARROW_UDF(TFromMilliseconds, TFromMillisecondsKernel::Do);
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TFromMicroseconds, TOptional<TTimestamp>(TAutoMap<ui64>)) {
+ Y_UNUSED(valueBuilder);
+ auto res = args[0].Get<ui64>();
+ if (!ValidateTimestamp(res)) {
+ return TUnboxedValuePod();
+ }
+ return TUnboxedValuePod(res);
+ }
+
+ using TFromMicrosecondsKernel = TUnaryUnsafeFixedSizeFilterKernel<ui64, ui64,
+ [] (ui64 timestamp) { return std::make_pair(timestamp, ValidateTimestamp(timestamp)); }>;
+ END_SIMPLE_ARROW_UDF(TFromMicroseconds, TFromMicrosecondsKernel::Do);
+
+ template <typename TInput, i64 Multiplier>
+ using TIntervalFromKernel = TUnaryUnsafeFixedSizeFilterKernel<TInput, i64,
+ [] (TInput interval) { return std::make_pair(i64(interval * Multiplier), ValidateInterval(interval)); }>;
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromDays, TOptional<TInterval>(TAutoMap<i32>)) {
+ Y_UNUSED(valueBuilder);
+ const i64 res = i64(args[0].Get<i32>()) * UsecondsInDay;
+ return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod();
+ }
+ END_SIMPLE_ARROW_UDF(TIntervalFromDays, (TIntervalFromKernel<i32, UsecondsInDay>::Do));
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromHours, TOptional<TInterval>(TAutoMap<i32>)) {
+ Y_UNUSED(valueBuilder);
+ const i64 res = i64(args[0].Get<i32>()) * UsecondsInHour;
+ return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod();
+ }
+ END_SIMPLE_ARROW_UDF(TIntervalFromHours, (TIntervalFromKernel<i32, UsecondsInHour>::Do));
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromMinutes, TOptional<TInterval>(TAutoMap<i32>)) {
+ Y_UNUSED(valueBuilder);
+ const i64 res = i64(args[0].Get<i32>()) * UsecondsInMinute;
+ return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod();
+ }
+ END_SIMPLE_ARROW_UDF(TIntervalFromMinutes, (TIntervalFromKernel<i32, UsecondsInMinute>::Do));
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromSeconds, TOptional<TInterval>(TAutoMap<i32>)) {
+ Y_UNUSED(valueBuilder);
+ const i64 res = i64(args[0].Get<i32>()) * UsecondsInSecond;
+ return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod();
+ }
+ END_SIMPLE_ARROW_UDF(TIntervalFromSeconds, (TIntervalFromKernel<i32, UsecondsInSecond>::Do));
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromMilliseconds, TOptional<TInterval>(TAutoMap<i64>)) {
+ Y_UNUSED(valueBuilder);
+ const i64 res = i64(args[0].Get<i64>()) * UsecondsInMilliseconds;
+ return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod();
+ }
+ END_SIMPLE_ARROW_UDF(TIntervalFromMilliseconds, (TIntervalFromKernel<i64, UsecondsInMilliseconds>::Do));
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromMicroseconds, TOptional<TInterval>(TAutoMap<i64>)) {
+ Y_UNUSED(valueBuilder);
+ const i64 res = args[0].Get<i64>();
+ return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod();
+ }
+ END_SIMPLE_ARROW_UDF(TIntervalFromMicroseconds, (TIntervalFromKernel<i64, 1>::Do));
+
+ // To*
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TToDays, i32(TAutoMap<TInterval>)) {
+ Y_UNUSED(valueBuilder);
+ return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInDay));
+ }
+ END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToDays,
+ (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInDay); }>),
+ arrow::compute::NullHandling::INTERSECTION);
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TToHours, i32(TAutoMap<TInterval>)) {
+ Y_UNUSED(valueBuilder);
+ return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInHour));
+ }
+ END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToHours,
+ (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInHour); }>),
+ arrow::compute::NullHandling::INTERSECTION);
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TToMinutes, i32(TAutoMap<TInterval>)) {
+ Y_UNUSED(valueBuilder);
+ return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInMinute));
+ }
+ END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToMinutes,
+ (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInMinute); }>),
+ arrow::compute::NullHandling::INTERSECTION);
+
+ // StartOf*
+
+ template<auto Core>
+ struct TStartOfKernelExec : TUnaryKernelExec<TStartOfKernelExec<Core>, TResourceBlockReader<false>, TResourceArrayBuilder<true>> {
+ template<typename TSink>
+ static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
+ if (auto res = Core(Reference(item), *valueBuilder)) {
+ Reference(item) = res.GetRef();
+ sink(item);
+ } else {
+ sink(TBlockItem{});
+ }
+
+ }
+ };
+
+ TMaybe<TTMStorage> StartOfYear(TTMStorage storage, const IValueBuilder& valueBuilder) {
+ storage.Month = 1;
+ storage.Day = 1;
+ storage.Hour = 0;
+ storage.Minute = 0;
+ storage.Second = 0;
+ storage.Microsecond = 0;
+ if (!storage.Validate(valueBuilder.GetDateBuilder())) {
+ return {};
+ }
+ return storage;
+ }
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfYear, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
+ auto result = args[0];
+ auto& storage = Reference(result);
+ if (auto res = StartOfYear(storage, *valueBuilder)) {
+ storage = res.GetRef();
+ return result;
+ }
+ return TUnboxedValuePod{};
+ }
+ END_SIMPLE_ARROW_UDF(TStartOfYear, TStartOfKernelExec<StartOfYear>::Do);
+
+ TMaybe<TTMStorage> StartOfQuarter(TTMStorage storage, const IValueBuilder& valueBuilder) {
+ storage.Month = (storage.Month - 1) / 3 * 3 + 1;
+ storage.Day = 1;
+ storage.Hour = 0;
+ storage.Minute = 0;
+ storage.Second = 0;
+ storage.Microsecond = 0;
+ if (!storage.Validate(valueBuilder.GetDateBuilder())) {
+ return {};
+ }
+ return storage;
+ }
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfQuarter, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
+ auto result = args[0];
+ auto& storage = Reference(result);
+ if (auto res = StartOfQuarter(storage, *valueBuilder)) {
+ storage = res.GetRef();
+ return result;
+ }
+ return TUnboxedValuePod{};
+ }
+ END_SIMPLE_ARROW_UDF(TStartOfQuarter, TStartOfKernelExec<StartOfQuarter>::Do);
+
+ TMaybe<TTMStorage> StartOfMonth(TTMStorage storage, const IValueBuilder& valueBuilder) {
+ storage.Day = 1;
+ storage.Hour = 0;
+ storage.Minute = 0;
+ storage.Second = 0;
+ storage.Microsecond = 0;
+ if (!storage.Validate(valueBuilder.GetDateBuilder())) {
+ return {};
+ }
+ return storage;
+ }
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfMonth, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
+ auto result = args[0];
+ auto& storage = Reference(result);
+ if (auto res = StartOfMonth(storage, *valueBuilder)) {
+ storage = res.GetRef();
+ return result;
+ }
+ return TUnboxedValuePod{};
+ }
+ END_SIMPLE_ARROW_UDF(TStartOfMonth, TStartOfKernelExec<StartOfMonth>::Do);
+
+ TMaybe<TTMStorage> EndOfMonth(TTMStorage storage, const IValueBuilder& valueBuilder) {
+ storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year));
+ storage.Hour = 0;
+ storage.Minute = 0;
+ storage.Second = 0;
+ storage.Microsecond = 0;
+
+ if (!storage.Validate(valueBuilder.GetDateBuilder())) {
+ return {};
+ }
+ return storage;
+ }
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfMonth, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
+ auto result = args[0];
+ auto& storage = Reference(result);
+ if (auto res = EndOfMonth(storage, *valueBuilder)) {
+ storage = res.GetRef();
+ return result;
+ }
+ return TUnboxedValuePod{};
+ }
+ END_SIMPLE_ARROW_UDF(TEndOfMonth, TStartOfKernelExec<EndOfMonth>::Do);
+
+ TMaybe<TTMStorage> StartOfWeek(TTMStorage storage, const IValueBuilder& valueBuilder) {
+ const ui32 shift = 86400u * (storage.DayOfWeek - 1u);
+ if (shift > storage.ToDatetime(valueBuilder.GetDateBuilder())) {
+ return {};
+ }
+ storage.FromDatetime(valueBuilder.GetDateBuilder(), storage.ToDatetime(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId);
+ storage.Hour = 0;
+ storage.Minute = 0;
+ storage.Second = 0;
+ storage.Microsecond = 0;
+ return storage;
+ }
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfWeek, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
+ auto result = args[0];
+ auto& storage = Reference(result);
+ if (auto res = StartOfWeek(storage, *valueBuilder)) {
+ storage = res.GetRef();
+ return result;
+ }
+ return TUnboxedValuePod{};
+ }
+ END_SIMPLE_ARROW_UDF(TStartOfWeek, TStartOfKernelExec<StartOfWeek>::Do);
+
+ TMaybe<TTMStorage> StartOfDay(TTMStorage storage, const IValueBuilder& valueBuilder) {
+ storage.Hour = 0;
+ storage.Minute = 0;
+ storage.Second = 0;
+ storage.Microsecond = 0;
+ auto& builder = valueBuilder.GetDateBuilder();
+ if (!storage.Validate(builder)) {
+ return {};
+ }
+ return storage;
+ }
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfDay, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) {
+ auto result = args[0];
+ auto& storage = Reference(result);
+ if (auto res = StartOfDay(storage, *valueBuilder)) {
+ storage = res.GetRef();
+ return result;
+ }
+ return TUnboxedValuePod{};
+ }
+ END_SIMPLE_ARROW_UDF(TStartOfDay, TStartOfKernelExec<StartOfDay>::Do);
+
+ TMaybe<TTMStorage> StartOf(TTMStorage storage, ui64 interval, const IValueBuilder& valueBuilder) {
+ if (interval >= 86400000000ull) {
+ // treat as StartOfDay
+ storage.Hour = 0;
+ storage.Minute = 0;
+ storage.Second = 0;
+ storage.Microsecond = 0;
+ } else {
+ auto current = storage.ToTimeOfDay();
+ auto rounded = current / interval * interval;
+ storage.FromTimeOfDay(rounded);
+ }
+
+ auto& builder = valueBuilder.GetDateBuilder();
+ if (!storage.Validate(builder)) {
+ return {};
+ }
+ return storage;
+ }
+
+ struct TStartOfBinaryKernelExec : TBinaryKernelExec<TStartOfBinaryKernelExec> {
+ template<typename TSink>
+ static void Process(const IValueBuilder* valueBuilder, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ auto& storage = Reference(arg1);
+ ui64 interval = std::abs(arg2.Get<i64>());
+ if (interval == 0) {
+ sink(arg1);
+ return;
+ }
+
+ if (auto res = StartOf(storage, interval, *valueBuilder)) {
+ storage = res.GetRef();
+ sink(arg1);
+ } else {
+ sink(TBlockItem{});
+ }
+ }
+ };
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOf, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, TAutoMap<TInterval>)) {
+ auto result = args[0];
+ ui64 interval = std::abs(args[1].Get<i64>());
+ if (interval == 0) {
+ return result;
+ }
+ if (auto res = StartOf(Reference(result), interval, *valueBuilder)) {
+ Reference(result) = res.GetRef();
+ return result;
+ }
+ return TUnboxedValuePod{};
+ }
+ END_SIMPLE_ARROW_UDF(TStartOf, TStartOfBinaryKernelExec::Do);
+
+ struct TTimeOfDayKernelExec : TUnaryKernelExec<TTimeOfDayKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<TDataType<TInterval>::TLayout, false>> {
+ template<typename TSink>
+ static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) {
+ Y_UNUSED(valueBuilder);
+ auto& storage = Reference(item);
+ sink(TBlockItem{(TDataType<TInterval>::TLayout)storage.ToTimeOfDay()});
+ }
+ };
+
+ const auto timeOfDayKernelExecDo = TTimeOfDayKernelExec::Do;
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TTimeOfDay, TInterval(TAutoMap<TResource<TMResourceName>>)) {
+ Y_UNUSED(valueBuilder);
+ auto& storage = Reference(args[0]);
+ return TUnboxedValuePod((i64)storage.ToTimeOfDay());
+ }
+ END_SIMPLE_ARROW_UDF(TTimeOfDay, timeOfDayKernelExecDo);
+
+
+ // Add ...
+
+ template<auto Core>
+ struct TAddKernelExec : TBinaryKernelExec<TAddKernelExec<Core>> {
+ template<typename TSink>
+ static void Process(const IValueBuilder* valueBuilder, TBlockItem date, TBlockItem arg, const TSink& sink) {
+ sink(Core(date, arg.Get<i32>(), valueBuilder->GetDateBuilder()));
+ }
+ };
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftYears, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) {
+ return DoAddYears(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder());
+ }
+ END_SIMPLE_ARROW_UDF(TShiftYears, TAddKernelExec<DoAddYears<TBlockItem>>::Do);
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftQuarters, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) {
+ return DoAddQuarters(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder());
+ }
+ END_SIMPLE_ARROW_UDF(TShiftQuarters, TAddKernelExec<DoAddQuarters<TBlockItem>>::Do);
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftMonths, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) {
+ return DoAddMonths(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder());
+ }
+ END_SIMPLE_ARROW_UDF(TShiftMonths, TAddKernelExec<DoAddMonths<TBlockItem>>::Do);
+
+ template<size_t Digits, bool Exacly = true>
+ struct PrintNDigits;
+
+ template<bool Exacly>
+ struct PrintNDigits<0U, Exacly> {
+ static constexpr ui32 Miltiplier = 1U;
+
+ template <typename T>
+ static constexpr size_t Do(T, char*) { return 0U; }
+ };
+
+ template<size_t Digits, bool Exacly>
+ struct PrintNDigits {
+ using TNextPrint = PrintNDigits<Digits - 1U, Exacly>;
+ static constexpr ui32 Miltiplier = TNextPrint::Miltiplier * 10U;
+
+ template <typename T>
+ static constexpr size_t Do(T in, char* out) {
+ in %= Miltiplier;
+ if (Exacly || in) {
+ *out = "0123456789"[in / TNextPrint::Miltiplier];
+ return 1U + TNextPrint::Do(in, ++out);
+ }
+ return 0U;
+ }
+ };
+
+ // Format
+
+ class TFormat : public TBoxedValue {
+ public:
+ explicit TFormat(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("Format");
+ return name;
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType*,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly)
+ {
+ if (Name() != name) {
+ return false;
+ }
+
+ auto resourceType = builder.Resource(TMResourceName);
+
+ auto stringType = builder.SimpleType<char*>();
+
+ auto boolType = builder.SimpleType<bool>();
+ auto optionalBoolType = builder.Optional()->Item(boolType).Build();
+
+ auto args = builder.Args();
+ args->Add(stringType);
+ args->Add(optionalBoolType).Name("AlwaysWriteFractionalSeconds");
+ args->Done();
+ builder.OptionalArgs(1);
+ builder.Returns(
+ builder.Callable(1)
+ ->Returns(stringType)
+ .Arg(resourceType)
+ .Flags(ICallablePayload::TArgumentFlags::AutoMap)
+ .Build()
+ );
+
+ if (!typesOnly) {
+ builder.Implementation(new TFormat(builder.GetSourcePosition()));
+ }
+
+ builder.IsStrict();
+
+ return true;
+ }
+
+ private:
+ using TPrintersList = std::vector<std::function<size_t(char*, const TUnboxedValuePod&, const IDateBuilder&)>>;
+
+ struct TDataPrinter {
+ const std::string_view Data;
+
+ size_t operator()(char* out, const TUnboxedValuePod&, const IDateBuilder&) const {
+ std::memcpy(out, Data.data(), Data.size());
+ return Data.size();
+ }
+ };
+
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try {
+ bool alwaysWriteFractionalSeconds = false;
+ if (auto val = args[1]) {
+ alwaysWriteFractionalSeconds = val.Get<bool>();
+ }
+
+ return TUnboxedValuePod(new TImpl(Pos_, args[0], alwaysWriteFractionalSeconds));
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+
+ class TImpl : public TBoxedValue {
+ public:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override
+ {
+ try {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+ const auto value = args[0];
+
+ auto& builder = valueBuilder->GetDateBuilder();
+
+ auto result = valueBuilder->NewStringNotFilled(ReservedSize_);
+ auto pos = result.AsStringRef().Data();
+ ui32 size = 0U;
+
+ for (const auto& printer : Printers_) {
+ if (const auto plus = printer(pos, value, builder)) {
+ size += plus;
+ pos += plus;
+ }
+ }
+
+ if (size < ReservedSize_) {
+ result = valueBuilder->SubString(result.Release(), 0U, size);
+ }
+
+ return result;
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ TImpl(TSourcePosition pos, TUnboxedValue format, bool alwaysWriteFractionalSeconds)
+ : Pos_(pos)
+ , Format_(format)
+ {
+ const std::string_view formatView(Format_.AsStringRef());
+ auto dataStart = formatView.begin();
+ size_t dataSize = 0U;
+
+ for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) {
+ if (*ptr != '%') {
+ ++dataSize;
+ continue;
+ }
+
+ if (dataSize) {
+ Printers_.emplace_back(TDataPrinter{std::string_view(&*dataStart, dataSize)});
+ ReservedSize_ += dataSize;
+ dataSize = 0U;
+ }
+
+ if (formatView.end() == ++ptr) {
+ ythrow yexception() << "format string ends with single %%";
+ }
+
+ switch (*ptr) {
+ case '%': {
+ static constexpr size_t size = 1;
+ Printers_.emplace_back([](char* out, const TUnboxedValuePod&, const IDateBuilder&) {
+ *out = '%';
+ return size;
+ });
+ ReservedSize_ += size;
+ break;
+ }
+ case 'Y': {
+ static constexpr size_t size = 4;
+ Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
+ return PrintNDigits<size>::Do(GetYear(value), out);
+ });
+ ReservedSize_ += size;
+ break;
+ }
+ case 'm': {
+ static constexpr size_t size = 2;
+ Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
+ return PrintNDigits<size>::Do(GetMonth(value), out);
+ });
+ ReservedSize_ += size;
+ break;
+ }
+ case 'd': {
+ static constexpr size_t size = 2;
+ Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
+ return PrintNDigits<size>::Do(GetDay(value), out);
+ });
+ ReservedSize_ += size;
+ break;
+ }
+ case 'H': {
+ static constexpr size_t size = 2;
+ Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
+ return PrintNDigits<size>::Do(GetHour(value), out);
+ });
+ ReservedSize_ += size;
+ break;
+ }
+ case 'M': {
+ static constexpr size_t size = 2;
+ Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
+ return PrintNDigits<size>::Do(GetMinute(value), out);
+ });
+ ReservedSize_ += size;
+ break;
+ }
+ case 'S':
+ Printers_.emplace_back([alwaysWriteFractionalSeconds](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
+ constexpr size_t size = 2;
+ if (const auto microsecond = GetMicrosecond(value); microsecond || alwaysWriteFractionalSeconds) {
+ out += PrintNDigits<size>::Do(GetSecond(value), out);
+ *out++ = '.';
+ constexpr size_t msize = 6;
+ auto addSz = alwaysWriteFractionalSeconds ?
+ PrintNDigits<msize, true>::Do(microsecond, out) :
+ PrintNDigits<msize, false>::Do(microsecond, out);
+ return size + 1U + addSz;
+ }
+ return PrintNDigits<size>::Do(GetSecond(value), out);
+ });
+ ReservedSize_ += 9;
+ break;
+
+ case 'z': {
+ static constexpr size_t size = 5;
+ Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder& builder) {
+ auto timezoneId = GetTimezoneId(value);
+ if (TTMStorage::IsUniversal(timezoneId)) {
+ std::memcpy(out, "+0000", size);
+ return size;
+ }
+ i32 shift;
+ if (!builder.GetTimezoneShift(GetYear(value), GetMonth(value), GetDay(value),
+ GetHour(value), GetMinute(value), GetSecond(value), timezoneId, shift))
+ {
+ std::memcpy(out, "+0000", size);
+ return size;
+ }
+
+ *out++ = shift > 0 ? '+' : '-';
+ shift = std::abs(shift);
+ out += PrintNDigits<2U>::Do(shift / 60U, out);
+ out += PrintNDigits<2U>::Do(shift % 60U, out);
+ return size;
+ });
+ ReservedSize_ += size;
+ break;
+ }
+ case 'Z':
+ Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
+ const auto timezoneId = GetTimezoneId(value);
+ const auto tzName = NUdf::GetTimezones()[timezoneId];
+ std::memcpy(out, tzName.data(), std::min(tzName.size(), MAX_TIMEZONE_NAME_LEN));
+ return tzName.size();
+ });
+ ReservedSize_ += MAX_TIMEZONE_NAME_LEN;
+ break;
+ case 'b': {
+ static constexpr size_t size = 3;
+ Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
+ static constexpr std::string_view mp[] {
+ "Jan",
+ "Feb",
+ "Mar",
+ "Apr",
+ "May",
+ "Jun",
+ "Jul",
+ "Aug",
+ "Sep",
+ "Oct",
+ "Nov",
+ "Dec"
+ };
+ auto month = GetMonth(value);
+ Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value");
+ std::memcpy(out, mp[month - 1].data(), size);
+ return size;
+ });
+ ReservedSize_ += size;
+ break;
+ }
+ case 'B': {
+ Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) {
+ static constexpr std::string_view mp[] {
+ "January",
+ "February",
+ "March",
+ "April",
+ "May",
+ "June",
+ "July",
+ "August",
+ "September",
+ "October",
+ "November",
+ "December"
+ };
+ auto month = GetMonth(value);
+ Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value");
+ const std::string_view monthFullName = mp[month - 1];
+ std::memcpy(out, monthFullName.data(), monthFullName.size());
+ return monthFullName.size();
+ });
+ ReservedSize_ += 9U; // MAX_MONTH_FULL_NAME_LEN
+ break;
+ }
+ default:
+ ythrow yexception() << "invalid format character: " << *ptr;
+ }
+
+ dataStart = ptr + 1U;
+ }
+
+ if (dataSize) {
+ Printers_.emplace_back(TDataPrinter{std::string_view(dataStart, dataSize)});
+ ReservedSize_ += dataSize;
+ }
+ }
+
+ private:
+ const TSourcePosition Pos_;
+
+ TUnboxedValue Format_;
+ TPrintersList Printers_{};
+ size_t ReservedSize_ = 0;
+ };
+
+ const TSourcePosition Pos_;
+ };
+
+ template<size_t Digits>
+ struct ParseExaclyNDigits;
+
+ template<>
+ struct ParseExaclyNDigits<0U> {
+ template <typename T>
+ static constexpr bool Do(std::string_view::const_iterator&, T&) {
+ return true;
+ }
+ };
+
+ template<size_t Digits>
+ struct ParseExaclyNDigits {
+ template <typename T>
+ static constexpr bool Do(std::string_view::const_iterator& it, T& out) {
+ const auto d = *it;
+ if (!std::isdigit(d)) {
+ return false;
+ }
+ out *= 10U;
+ out += d - '0';
+ return ParseExaclyNDigits<Digits - 1U>::Do(++it, out);
+ }
+ };
+
+ // Parse
+
+ class TParse : public TBoxedValue {
+ public:
+ class TFactory : public TBoxedValue {
+ public:
+ explicit TFactory(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try {
+ return TUnboxedValuePod(new TParse(args[0], Pos_));
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+
+ const TSourcePosition Pos_;
+ };
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("Parse");
+ return name;
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType*,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly)
+ {
+ if (Name() != name) {
+ return false;
+ }
+
+ auto resourceType = builder.Resource(TMResourceName);
+ auto optionalResourceType = builder.Optional()->Item(resourceType).Build();
+
+ builder.Args()->Add<char*>().Flags(ICallablePayload::TArgumentFlags::AutoMap)
+ .Add(builder.Optional()->Item<ui16>())
+ .Done()
+ .OptionalArgs(1);
+ builder.RunConfig<char*>().Returns(optionalResourceType);
+
+ if (!typesOnly) {
+ builder.Implementation(new TParse::TFactory(builder.GetSourcePosition()));
+ }
+
+ return true;
+ }
+
+ private:
+ const TSourcePosition Pos_;
+ const TUnboxedValue Format_;
+
+ std::vector<std::function<bool(std::string_view::const_iterator& it, size_t, TUnboxedValuePod&, const IDateBuilder&)>> Scanners_;
+
+ struct TDataScanner {
+ const std::string_view Data_;
+
+ bool operator()(std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) const {
+ if (limit < Data_.size() || !std::equal(Data_.begin(), Data_.end(), it)) {
+ return false;
+ }
+ std::advance(it, Data_.size());
+ return true;
+ }
+ };
+
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override
+ {
+ try {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+
+ const std::string_view buffer = args[0].AsStringRef();
+
+ TUnboxedValuePod result(0);
+ auto& storage = Reference(result);
+ storage.MakeDefault();
+
+ auto& builder = valueBuilder->GetDateBuilder();
+
+ auto it = buffer.begin();
+ for (const auto& scanner : Scanners_) {
+ if (!scanner(it, std::distance(it, buffer.end()), result, builder)) {
+ return TUnboxedValuePod();
+ }
+ }
+
+ if (buffer.end() != it || !storage.Validate(builder)) {
+ return TUnboxedValuePod();
+ }
+ return result;
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ TParse(const TUnboxedValuePod& runConfig, TSourcePosition pos)
+ : Pos_(pos)
+ , Format_(runConfig)
+ {
+ const std::string_view formatView(Format_.AsStringRef());
+ auto dataStart = formatView.begin();
+ size_t dataSize = 0U;
+
+ for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) {
+ if (*ptr != '%') {
+ ++dataSize;
+ continue;
+ }
+
+ if (dataSize) {
+ Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)});
+ dataSize = 0;
+ }
+
+ if (++ptr == formatView.end()) {
+ ythrow yexception() << "format string ends with single %%";
+ }
+
+ switch (*ptr) {
+ case '%':
+ Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) {
+ return limit > 0U && *it++ == '%';
+ });
+ break;
+
+ case 'Y': {
+ static constexpr size_t size = 4;
+ Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
+ ui32 year = 0U;
+ if (limit < size || !ParseExaclyNDigits<size>::Do(it, year) || !ValidateYear(year)) {
+ return false;
+ }
+ SetYear(result, year);
+ return true;
+ });
+ break;
+ }
+ case 'm': {
+ static constexpr size_t size = 2;
+ Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
+ ui32 month = 0U;
+ if (limit < size || !ParseExaclyNDigits<size>::Do(it, month) || !ValidateMonth(month)) {
+ return false;
+ }
+ SetMonth(result, month);
+ return true;
+ });
+ break;
+ }
+ case 'd': {
+ static constexpr size_t size = 2;
+ Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
+ ui32 day = 0U;
+ if (limit < size || !ParseExaclyNDigits<size>::Do(it, day) || !ValidateDay(day)) {
+ return false;
+ }
+ SetDay(result, day);
+ return true;
+ });
+ break;
+ }
+ case 'H': {
+ static constexpr size_t size = 2;
+ Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
+ ui32 hour = 0U;
+ if (limit < size || !ParseExaclyNDigits<size>::Do(it, hour) || !ValidateHour(hour)) {
+ return false;
+ }
+ SetHour(result, hour);
+ return true;
+ });
+ break;
+ }
+ case 'M': {
+ static constexpr size_t size = 2;
+ Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
+ ui32 minute = 0U;
+ if (limit < size || !ParseExaclyNDigits<size>::Do(it, minute) || !ValidateMinute(minute)) {
+ return false;
+ }
+ SetMinute(result, minute);
+ return true;
+ });
+ break;
+ }
+ case 'S': {
+ static constexpr size_t size = 2;
+ Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
+ ui32 second = 0U;
+ if (limit < size || !ParseExaclyNDigits<size>::Do(it, second) || !ValidateSecond(second)) {
+ return false;
+ }
+ SetSecond(result, second);
+ limit -= size;
+
+ if (!limit || *it != '.') {
+ return true;
+ }
+
+ ++it;
+ --limit;
+ ui32 usec = 0U;
+
+ size_t digits = 6U;
+ for (; limit; --limit) {
+ const auto c = *it;
+ if (!digits || !std::isdigit(c)) {
+ break;
+ }
+ usec *= 10U;
+ usec += c - '0';
+ ++it;
+ --digits;
+ }
+ for (; !digits && limit && std::isdigit(*it); --limit, ++it);
+ while (digits--) {
+ usec *= 10U;
+ }
+ SetMicrosecond(result, usec);
+ return true;
+ });
+ break;
+ }
+ case 'Z':
+ Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder& builder) {
+ const auto start = it;
+ while (limit > 0 && (std::isalnum(*it) || *it == '/' || *it == '_' || *it == '-' || *it == '+')) {
+ ++it;
+ --limit;
+ }
+ const auto size = std::distance(start, it);
+
+ ui32 timezoneId;
+ if (!builder.FindTimezoneId(TStringRef(&*start, size), timezoneId)) {
+ return false;
+ }
+ SetTimezoneId(result, timezoneId);
+ return true;
+ });
+ break;
+ case 'b': {
+ static constexpr size_t size = 3;
+ Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
+ const auto start = it;
+ size_t cnt = 0U;
+ while (limit > 0 && cnt < size && std::isalpha(*it)) {
+ ++it;
+ ++cnt;
+ --limit;
+ }
+ const std::string_view monthName{start, cnt};
+ ui8 month = 0U;
+ if (cnt < size || !ValidateMonthShortName(monthName, month)) {
+ return false;
+ }
+ SetMonth(result, month);
+ return true;
+ });
+ break;
+ }
+ case 'B': {
+ Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) {
+ const auto start = it;
+ size_t cnt = 0U;
+ while (limit > 0 && std::isalpha(*it)) {
+ ++it;
+ ++cnt;
+ --limit;
+ }
+
+ const std::string_view monthName{start, cnt};
+ ui8 month = 0U;
+ if (!ValidateMonthFullName(monthName, month)) {
+ return false;
+ }
+ SetMonth(result, month);
+ return true;
+ });
+ break;
+ }
+ default:
+ ythrow yexception() << "invalid format character: " << *ptr;
+ }
+
+ dataStart = ptr + 1U;
+ }
+
+ if (dataSize) {
+ Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)});
+ }
+ }
+ };
+
+#define PARSE_SPECIFIC_FORMAT(format) \
+ SIMPLE_STRICT_UDF(TParse##format, TOptional<TResource<TMResourceName>>(TAutoMap<char*>)) { \
+ auto str = args[0].AsStringRef(); \
+ TInstant instant; \
+ if (!TInstant::TryParse##format(TStringBuf(str.Data(), str.Size()), instant) || instant.Seconds() >= NUdf::MAX_DATETIME) { \
+ return TUnboxedValuePod(); \
+ } \
+ auto& builder = valueBuilder->GetDateBuilder(); \
+ TUnboxedValuePod result(0); \
+ auto& storage = Reference(result); \
+ storage.FromTimestamp(builder, instant.MicroSeconds()); \
+ return result; \
+ }
+
+ PARSE_SPECIFIC_FORMAT(Rfc822);
+ PARSE_SPECIFIC_FORMAT(Iso8601);
+ PARSE_SPECIFIC_FORMAT(Http);
+ PARSE_SPECIFIC_FORMAT(X509);
+
+ SIMPLE_MODULE(TDateTime2Module,
+ TUserDataTypeFuncFactory<true, true, SplitName, TSplit,
+ TDate,
+ TDatetime,
+ TTimestamp,
+ TTzDate,
+ TTzDatetime,
+ TTzTimestamp,
+ TDate32,
+ TDatetime64,
+ TTimestamp64>,
+
+ TMakeDate,
+ TMakeDatetime,
+ TMakeTimestamp,
+ TMakeTzDate,
+ TMakeTzDatetime,
+ TMakeTzTimestamp,
+
+ TConvert,
+
+ TMakeDate32,
+ TMakeDatetime64,
+ TMakeTimestamp64,
+
+ TGetYear,
+ TGetDayOfYear,
+ TGetMonth,
+ TGetMonthName,
+ TGetWeekOfYear,
+ TGetWeekOfYearIso8601,
+ TGetDayOfMonth,
+ TGetDayOfWeek,
+ TGetDayOfWeekName,
+ TGetTimeComponent<GetHourName, ui8, GetHour, 1u, 3600u, 24u, false>,
+ TGetTimeComponent<GetMinuteName, ui8, GetMinute, 1u, 60u, 60u, false>,
+ TGetTimeComponent<GetSecondName, ui8, GetSecond, 1u, 1u, 60u, false>,
+ TGetTimeComponent<GetMillisecondOfSecondName, ui32, GetMicrosecond, 1000u, 1000u, 1000u, true>,
+ TGetTimeComponent<GetMicrosecondOfSecondName, ui32, GetMicrosecond, 1u, 1u, 1000000u, true>,
+ TGetTimezoneId,
+ TGetTimezoneName,
+
+ TUpdate,
+
+ TFromSeconds,
+ TFromMilliseconds,
+ TFromMicroseconds,
+
+ TIntervalFromDays,
+ TIntervalFromHours,
+ TIntervalFromMinutes,
+ TIntervalFromSeconds,
+ TIntervalFromMilliseconds,
+ TIntervalFromMicroseconds,
+
+ TToDays,
+ TToHours,
+ TToMinutes,
+
+ TStartOfYear,
+ TStartOfQuarter,
+ TStartOfMonth,
+ TStartOfWeek,
+ TStartOfDay,
+ TStartOf,
+ TTimeOfDay,
+
+ TShiftYears,
+ TShiftQuarters,
+ TShiftMonths,
+
+ TEndOfMonth,
+
+ TToUnits<ToSecondsName, ui32, 1>,
+ TToUnits<ToMillisecondsName, ui64, 1000>,
+ TToUnits<ToMicrosecondsName, ui64, 1000000>,
+
+ TFormat,
+ TParse,
+
+ TParseRfc822,
+ TParseIso8601,
+ TParseHttp,
+ TParseX509
+ )
+}
+
+REGISTER_MODULES(TDateTime2Module)
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/result.json b/yql/essentials/udfs/common/datetime2/test/canondata/result.json
new file mode 100644
index 00000000000..6e475365ea6
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/result.json
@@ -0,0 +1,137 @@
+{
+ "test.test[BlockFrom]": [
+ {
+ "uri": "file://test.test_BlockFrom_/results.txt"
+ }
+ ],
+ "test.test[BlockGet]": [
+ {
+ "uri": "file://test.test_BlockGet_/results.txt"
+ }
+ ],
+ "test.test[BlockSplitMake]": [
+ {
+ "uri": "file://test.test_BlockSplitMake_/results.txt"
+ }
+ ],
+ "test.test[BlockStartOf]": [
+ {
+ "uri": "file://test.test_BlockStartOf_/results.txt"
+ }
+ ],
+ "test.test[BlockTmGet]": [
+ {
+ "uri": "file://test.test_BlockTmGet_/results.txt"
+ }
+ ],
+ "test.test[BlockTo]": [
+ {
+ "uri": "file://test.test_BlockTo_/results.txt"
+ }
+ ],
+ "test.test[EndOf]": [
+ {
+ "uri": "file://test.test_EndOf_/results.txt"
+ }
+ ],
+ "test.test[FormatMicroseconds]": [
+ {
+ "uri": "file://test.test_FormatMicroseconds_/results.txt"
+ }
+ ],
+ "test.test[Format]": [
+ {
+ "uri": "file://test.test_Format_/results.txt"
+ }
+ ],
+ "test.test[From]": [
+ {
+ "uri": "file://test.test_From_/results.txt"
+ }
+ ],
+ "test.test[Get]": [
+ {
+ "uri": "file://test.test_Get_/results.txt"
+ }
+ ],
+ "test.test[ImplicitSplit]": [
+ {
+ "uri": "file://test.test_ImplicitSplit_/results.txt"
+ }
+ ],
+ "test.test[MultirowBlockTo]": [
+ {
+ "uri": "file://test.test_MultirowBlockTo_/results.txt"
+ }
+ ],
+ "test.test[ParseIso8601]": [
+ {
+ "uri": "file://test.test_ParseIso8601_/results.txt"
+ }
+ ],
+ "test.test[ParseLim]": [
+ {
+ "uri": "file://test.test_ParseLim_/results.txt"
+ }
+ ],
+ "test.test[Parse]": [
+ {
+ "uri": "file://test.test_Parse_/results.txt"
+ }
+ ],
+ "test.test[Repr]": [
+ {
+ "uri": "file://test.test_Repr_/results.txt"
+ }
+ ],
+ "test.test[Shift]": [
+ {
+ "uri": "file://test.test_Shift_/results.txt"
+ }
+ ],
+ "test.test[SplitMake1969]": [
+ {
+ "uri": "file://test.test_SplitMake1969_/results.txt"
+ }
+ ],
+ "test.test[SplitMake]": [
+ {
+ "uri": "file://test.test_SplitMake_/results.txt"
+ }
+ ],
+ "test.test[StartOf1969]": [
+ {
+ "uri": "file://test.test_StartOf1969_/results.txt"
+ }
+ ],
+ "test.test[StartOf]": [
+ {
+ "uri": "file://test.test_StartOf_/results.txt"
+ }
+ ],
+ "test.test[To]": [
+ {
+ "uri": "file://test.test_To_/results.txt"
+ }
+ ],
+ "test.test[TzToDate]": [
+ {
+ "uri": "file://test.test_TzToDate_/results.txt"
+ }
+ ],
+ "test.test[UpdateTz]": [
+ {
+ "uri": "file://test.test_UpdateTz_/results.txt"
+ }
+ ],
+ "test.test[Update]": [
+ {
+ "uri": "file://test.test_Update_/results.txt"
+ }
+ ],
+ "test.test[yql-14977]": [
+ {
+ "uri": "file://test.test_yql-14977_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockFrom_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockFrom_/results.txt
new file mode 100644
index 00000000000..b053c8139f1
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockFrom_/results.txt
@@ -0,0 +1,206 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "ts_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Timestamp"
+ ]
+ ]
+ ];
+ [
+ "ts_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Timestamp"
+ ]
+ ]
+ ];
+ [
+ "ts_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Timestamp"
+ ]
+ ]
+ ];
+ [
+ "ts_empty";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Timestamp"
+ ]
+ ]
+ ];
+ [
+ "interval_days";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ];
+ [
+ "interval_hours";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ];
+ [
+ "interval_minutes";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ];
+ [
+ "interval_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ];
+ [
+ "interval_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ];
+ [
+ "interval_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ];
+ [
+ "interval_days_overflow";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ];
+ [
+ "interval_null";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "3875345000000"
+ ];
+ [
+ "3875345000000"
+ ];
+ [
+ "3875345000000"
+ ];
+ #;
+ [
+ "604800000000"
+ ];
+ [
+ "604800000000"
+ ];
+ [
+ "604800000000"
+ ];
+ [
+ "604800000000"
+ ];
+ [
+ "604800000000"
+ ];
+ [
+ "-604800000000"
+ ];
+ [
+ "8640000000000000"
+ ];
+ #
+ ];
+ [
+ [
+ "3875345000000"
+ ];
+ [
+ "3875345000000"
+ ];
+ [
+ "3875345000000"
+ ];
+ #;
+ [
+ "604800000000"
+ ];
+ [
+ "604800000000"
+ ];
+ [
+ "604800000000"
+ ];
+ [
+ "604800000000"
+ ];
+ [
+ "604800000000"
+ ];
+ [
+ "-604800000000"
+ ];
+ [
+ "8640000000000000"
+ ];
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockGet_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockGet_/results.txt
new file mode 100644
index 00000000000..9937150f592
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockGet_/results.txt
@@ -0,0 +1,188 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "date_hour";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ];
+ [
+ "date_minute";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ];
+ [
+ "date_second";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ];
+ [
+ "date_msec";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "date_usec";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "date_tz";
+ [
+ "DataType";
+ "Uint16"
+ ]
+ ];
+ [
+ "date_tzname";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "datetime_hour";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ];
+ [
+ "datetime_minute";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ];
+ [
+ "datetime_second";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ];
+ [
+ "datetime_msec";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "datetime_usec";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "datetime_tz";
+ [
+ "DataType";
+ "Uint16"
+ ]
+ ];
+ [
+ "datetime_tzname";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "timestamp_hour";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ];
+ [
+ "timestamp_minute";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ];
+ [
+ "timestamp_second";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ];
+ [
+ "timestamp_msec";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "timestamp_usec";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "timestamp_tz";
+ [
+ "DataType";
+ "Uint16"
+ ]
+ ];
+ [
+ "timestamp_tzname";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "GMT";
+ "21";
+ "20";
+ "19";
+ "0";
+ "0";
+ "0";
+ "GMT";
+ "21";
+ "20";
+ "19";
+ "345";
+ "345678";
+ "0";
+ "GMT"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockSplitMake_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockSplitMake_/results.txt
new file mode 100644
index 00000000000..f60b4dd0263
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockSplitMake_/results.txt
@@ -0,0 +1,76 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "rdate";
+ [
+ "DataType";
+ "Date"
+ ]
+ ];
+ [
+ "rdatetime";
+ [
+ "DataType";
+ "Datetime"
+ ]
+ ];
+ [
+ "rtimestamp";
+ [
+ "DataType";
+ "Timestamp"
+ ]
+ ];
+ [
+ "rtzdate";
+ [
+ "DataType";
+ "TzDate"
+ ]
+ ];
+ [
+ "rtzdatetime";
+ [
+ "DataType";
+ "TzDatetime"
+ ]
+ ];
+ [
+ "rtztimestamp";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "17880";
+ "1544835723";
+ "1544835723456789";
+ "2018-12-15,Europe/Moscow";
+ "2018-12-15T01:02:03,Europe/Moscow";
+ "2018-12-15T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "13148";
+ "1136073599";
+ "1136073599999999";
+ "2005-12-31,Canada/Central";
+ "2005-12-31T16:00:00,Canada/Central";
+ "2005-12-31T23:00:00,Canada/Central"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt
new file mode 100644
index 00000000000..20890003833
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt
@@ -0,0 +1,314 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column8";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column9";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column10";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ];
+ [
+ "column11";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ #;
+ #;
+ #;
+ #;
+ #;
+ [
+ "1970-01-01T04:00:00,Europe/Moscow"
+ ];
+ [
+ "1970-01-01T05:00:00,Europe/Moscow"
+ ];
+ [
+ "1970-01-01T05:00:00,Europe/Moscow"
+ ];
+ [
+ "1970-01-01T04:59:57,Europe/Moscow"
+ ];
+ [
+ "18000000000"
+ ];
+ [
+ "1970-01-31T00:00:00,Europe/Moscow"
+ ]
+ ];
+ [
+ [
+ "2018-01-01T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2018-10-01T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2018-12-01T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2018-12-10T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2018-12-15T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2018-12-15T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2018-12-15T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2018-12-15T01:00:00,Europe/Moscow"
+ ];
+ [
+ "2018-12-15T01:02:00,Europe/Moscow"
+ ];
+ [
+ "2018-12-15T01:01:57,Europe/Moscow"
+ ];
+ [
+ "3723456789"
+ ];
+ [
+ "2018-12-31T00:00:00,Europe/Moscow"
+ ]
+ ];
+ [
+ [
+ "2105-01-01T00:00:00,GMT"
+ ];
+ [
+ "2105-10-01T00:00:00,GMT"
+ ];
+ [
+ "2105-12-01T00:00:00,GMT"
+ ];
+ [
+ "2105-12-28T00:00:00,GMT"
+ ];
+ [
+ "2105-12-31T00:00:00,GMT"
+ ];
+ [
+ "2105-12-31T13:00:00,GMT"
+ ];
+ [
+ "2105-12-31T16:00:00,GMT"
+ ];
+ [
+ "2105-12-31T16:15:00,GMT"
+ ];
+ [
+ "2105-12-31T16:23:40,GMT"
+ ];
+ [
+ "2105-12-31T16:23:44,GMT"
+ ];
+ [
+ "59025000000"
+ ];
+ [
+ "2105-12-31T00:00:00,GMT"
+ ]
+ ];
+ [
+ [
+ "2106-01-01T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2106-01-01T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2106-01-01T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2105-12-28T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2106-01-01T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2106-01-01T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2106-01-01T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2106-01-01T01:00:00,Europe/Moscow"
+ ];
+ [
+ "2106-01-01T01:00:00,Europe/Moscow"
+ ];
+ [
+ "2106-01-01T00:59:58,Europe/Moscow"
+ ];
+ [
+ "3600000000"
+ ];
+ #
+ ];
+ [
+ [
+ "2019-01-01T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2019-07-01T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2019-07-01T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2019-07-22T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2019-07-24T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2019-07-24T00:00:00,Europe/Moscow"
+ ];
+ [
+ "2019-07-24T12:00:00,Europe/Moscow"
+ ];
+ [
+ "2019-07-24T12:00:00,Europe/Moscow"
+ ];
+ [
+ "2019-07-24T12:00:00,Europe/Moscow"
+ ];
+ [
+ "2019-07-24T11:59:57,Europe/Moscow"
+ ];
+ [
+ "43200000000"
+ ];
+ [
+ "2019-07-31T00:00:00,Europe/Moscow"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt
new file mode 100644
index 00000000000..262c45b5971
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt
@@ -0,0 +1,628 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "ryear";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint16"
+ ]
+ ]
+ ];
+ [
+ "rdayofyear";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint16"
+ ]
+ ]
+ ];
+ [
+ "rmonth";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rmonthname";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "rweekofyear";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rweekofyeariso8601";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rdayofmonth";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rdayofweek";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rdayofweekname";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "rhour";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rminute";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rsecond";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rmsec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "rusec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "rtz";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint16"
+ ]
+ ]
+ ];
+ [
+ "rtzname";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "1970"
+ ];
+ [
+ "1"
+ ];
+ [
+ "1"
+ ];
+ [
+ "January"
+ ];
+ [
+ "1"
+ ];
+ [
+ "1"
+ ];
+ [
+ "1"
+ ];
+ [
+ "4"
+ ];
+ [
+ "Thursday"
+ ];
+ [
+ "11"
+ ];
+ [
+ "14"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "GMT"
+ ]
+ ];
+ [
+ [
+ "1970"
+ ];
+ [
+ "2"
+ ];
+ [
+ "1"
+ ];
+ [
+ "January"
+ ];
+ [
+ "1"
+ ];
+ [
+ "1"
+ ];
+ [
+ "2"
+ ];
+ [
+ "5"
+ ];
+ [
+ "Friday"
+ ];
+ [
+ "14"
+ ];
+ [
+ "8"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "GMT"
+ ]
+ ];
+ [
+ [
+ "1970"
+ ];
+ [
+ "32"
+ ];
+ [
+ "2"
+ ];
+ [
+ "February"
+ ];
+ [
+ "5"
+ ];
+ [
+ "5"
+ ];
+ [
+ "1"
+ ];
+ [
+ "7"
+ ];
+ [
+ "Sunday"
+ ];
+ [
+ "17"
+ ];
+ [
+ "3"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "GMT"
+ ]
+ ];
+ [
+ [
+ "1970"
+ ];
+ [
+ "246"
+ ];
+ [
+ "9"
+ ];
+ [
+ "September"
+ ];
+ [
+ "36"
+ ];
+ [
+ "36"
+ ];
+ [
+ "3"
+ ];
+ [
+ "4"
+ ];
+ [
+ "Thursday"
+ ];
+ [
+ "7"
+ ];
+ [
+ "22"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "GMT"
+ ]
+ ];
+ [
+ [
+ "1970"
+ ];
+ [
+ "365"
+ ];
+ [
+ "12"
+ ];
+ [
+ "December"
+ ];
+ [
+ "53"
+ ];
+ [
+ "53"
+ ];
+ [
+ "31"
+ ];
+ [
+ "4"
+ ];
+ [
+ "Thursday"
+ ];
+ [
+ "23"
+ ];
+ [
+ "59"
+ ];
+ [
+ "59"
+ ];
+ [
+ "999"
+ ];
+ [
+ "999999"
+ ];
+ [
+ "0"
+ ];
+ [
+ "GMT"
+ ]
+ ];
+ [
+ [
+ "1971"
+ ];
+ [
+ "1"
+ ];
+ [
+ "1"
+ ];
+ [
+ "January"
+ ];
+ [
+ "1"
+ ];
+ [
+ "53"
+ ];
+ [
+ "1"
+ ];
+ [
+ "5"
+ ];
+ [
+ "Friday"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "GMT"
+ ]
+ ];
+ [
+ [
+ "1971"
+ ];
+ [
+ "14"
+ ];
+ [
+ "1"
+ ];
+ [
+ "January"
+ ];
+ [
+ "3"
+ ];
+ [
+ "2"
+ ];
+ [
+ "14"
+ ];
+ [
+ "4"
+ ];
+ [
+ "Thursday"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "GMT"
+ ]
+ ];
+ [
+ [
+ "1978"
+ ];
+ [
+ "25"
+ ];
+ [
+ "1"
+ ];
+ [
+ "January"
+ ];
+ [
+ "5"
+ ];
+ [
+ "4"
+ ];
+ [
+ "25"
+ ];
+ [
+ "3"
+ ];
+ [
+ "Wednesday"
+ ];
+ [
+ "16"
+ ];
+ [
+ "15"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "477"
+ ];
+ [
+ "Europe/Uzhgorod"
+ ]
+ ];
+ [
+ [
+ "2018"
+ ];
+ [
+ "335"
+ ];
+ [
+ "12"
+ ];
+ [
+ "December"
+ ];
+ [
+ "48"
+ ];
+ [
+ "48"
+ ];
+ [
+ "1"
+ ];
+ [
+ "6"
+ ];
+ [
+ "Saturday"
+ ];
+ [
+ "1"
+ ];
+ [
+ "2"
+ ];
+ [
+ "3"
+ ];
+ [
+ "456"
+ ];
+ [
+ "456789"
+ ];
+ [
+ "1"
+ ];
+ [
+ "Europe/Moscow"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockTo_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockTo_/results.txt
new file mode 100644
index 00000000000..1936b498e58
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockTo_/results.txt
@@ -0,0 +1,356 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "interval_to_days";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "interval_to_hours";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "interval_to_minutes";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "interval_to_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "interval_to_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "interval_to_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "date_to_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "datetime_to_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "timestamp_to_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "tzdate_to_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "tzdatetime_to_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "tztimestamp_to_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "date_to_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "datetime_to_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "timestamp_to_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "tzdate_to_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "tzdatetime_to_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "tztimestamp_to_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "date_to_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "datetime_to_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "timestamp_to_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "tzdate_to_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "tzdatetime_to_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "tztimestamp_to_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "interval_null";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "negative_1d";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "2"
+ ];
+ [
+ "58"
+ ];
+ [
+ "3480"
+ ];
+ [
+ "208800"
+ ];
+ [
+ "208800000"
+ ];
+ [
+ "208800000000"
+ ];
+ [
+ "1542844800"
+ ];
+ [
+ "1542921619"
+ ];
+ [
+ "1542921619"
+ ];
+ [
+ "1542758400"
+ ];
+ [
+ "1542910819"
+ ];
+ [
+ "1542910819"
+ ];
+ [
+ "1542844800000"
+ ];
+ [
+ "1542921619000"
+ ];
+ [
+ "1542921619345"
+ ];
+ [
+ "1542758400000"
+ ];
+ [
+ "1542910819000"
+ ];
+ [
+ "1542910819345"
+ ];
+ [
+ "1542844800000000"
+ ];
+ [
+ "1542921619000000"
+ ];
+ [
+ "1542921619345678"
+ ];
+ [
+ "1542758400000000"
+ ];
+ [
+ "1542910819000000"
+ ];
+ [
+ "1542910819345678"
+ ];
+ #;
+ [
+ "-1"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_EndOf_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_EndOf_/results.txt
new file mode 100644
index 00000000000..508cd4438ed
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_EndOf_/results.txt
@@ -0,0 +1,295 @@
+[
+ {
+ "Label" = "Normal cases";
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "2023-07-31 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2023-08-31 00:00:00 GMT"
+ ];
+ [
+ "2023-09-30 00:00:00 GMT"
+ ];
+ [
+ "2023-02-28 00:00:00 GMT"
+ ];
+ [
+ "2024-02-29 00:00:00 GMT"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Label" = "Minimal timestamp value";
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "1970-01-01 00:00:00 GMT"
+ ];
+ [
+ "1970-01-31 00:00:00 GMT"
+ ];
+ #
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Label" = "Maximum timestamp value";
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "2105-12-31 23:59:59.999999 GMT"
+ ];
+ [
+ "2105-12-31 00:00:00 GMT"
+ ];
+ [
+ "2105-12-31 00:00:00 GMT"
+ ];
+ #
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Label" = "Timestamp below minimum";
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "1969-12-31 23:59:59.999999 Atlantic/Azores"
+ ];
+ #
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Label" = "Timestamp above maximum";
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_FormatMicroseconds_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_FormatMicroseconds_/results.txt
new file mode 100644
index 00000000000..5d1dfe80a94
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_FormatMicroseconds_/results.txt
@@ -0,0 +1,98 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "2024-01-01 00:00:00"
+ ];
+ [
+ "2024-01-01 00:00:00.000000"
+ ];
+ [
+ "2024-01-01 00:00:00.000001"
+ ];
+ [
+ "2024-01-01 00:00:00.000001"
+ ];
+ [
+ "2024-01-01 00:00:00.05"
+ ];
+ [
+ "2024-01-01 00:00:00.050000"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Format_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Format_/results.txt
new file mode 100644
index 00000000000..31b8439bc94
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Format_/results.txt
@@ -0,0 +1,48 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "% year 1970 monthFullName January monthShortName Jan month 01 day 01 hours 00 minutes 00 seconds 00 tz +0000 tzname GMT text"
+ ]
+ ];
+ [
+ [
+ "% year 2018 monthFullName December monthShortName Dec month 12 day 01 hours 01 minutes 02 seconds 03.456789 tz +0300 tzname Europe/Moscow text"
+ ]
+ ];
+ [
+ [
+ "% year 2011 monthFullName March monthShortName Mar month 03 day 13 hours 03 minutes 15 seconds 00 tz -0700 tzname America/Los_Angeles text"
+ ]
+ ];
+ [
+ [
+ "% year 2011 monthFullName November monthShortName Nov month 11 day 06 hours 01 minutes 15 seconds 00 tz -0700 tzname America/Los_Angeles text"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_From_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_From_/results.txt
new file mode 100644
index 00000000000..2c503563e1c
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_From_/results.txt
@@ -0,0 +1,148 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "ts_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Timestamp"
+ ]
+ ]
+ ];
+ [
+ "ts_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Timestamp"
+ ]
+ ]
+ ];
+ [
+ "ts_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Timestamp"
+ ]
+ ]
+ ];
+ [
+ "ts_empty";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Timestamp"
+ ]
+ ]
+ ];
+ [
+ "interval_days";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ];
+ [
+ "interval_hours";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ];
+ [
+ "interval_minutes";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ];
+ [
+ "interval_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ];
+ [
+ "interval_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ];
+ [
+ "interval_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "3875345000000"
+ ];
+ [
+ "3875345000000"
+ ];
+ [
+ "3875345000000"
+ ];
+ #;
+ [
+ "604800000000"
+ ];
+ [
+ "604800000000"
+ ];
+ [
+ "604800000000"
+ ];
+ [
+ "604800000000"
+ ];
+ [
+ "604800000000"
+ ];
+ [
+ "-604800000000"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Get_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Get_/results.txt
new file mode 100644
index 00000000000..262c45b5971
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Get_/results.txt
@@ -0,0 +1,628 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "ryear";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint16"
+ ]
+ ]
+ ];
+ [
+ "rdayofyear";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint16"
+ ]
+ ]
+ ];
+ [
+ "rmonth";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rmonthname";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "rweekofyear";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rweekofyeariso8601";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rdayofmonth";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rdayofweek";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rdayofweekname";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "rhour";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rminute";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rsecond";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "rmsec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "rusec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "rtz";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint16"
+ ]
+ ]
+ ];
+ [
+ "rtzname";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "1970"
+ ];
+ [
+ "1"
+ ];
+ [
+ "1"
+ ];
+ [
+ "January"
+ ];
+ [
+ "1"
+ ];
+ [
+ "1"
+ ];
+ [
+ "1"
+ ];
+ [
+ "4"
+ ];
+ [
+ "Thursday"
+ ];
+ [
+ "11"
+ ];
+ [
+ "14"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "GMT"
+ ]
+ ];
+ [
+ [
+ "1970"
+ ];
+ [
+ "2"
+ ];
+ [
+ "1"
+ ];
+ [
+ "January"
+ ];
+ [
+ "1"
+ ];
+ [
+ "1"
+ ];
+ [
+ "2"
+ ];
+ [
+ "5"
+ ];
+ [
+ "Friday"
+ ];
+ [
+ "14"
+ ];
+ [
+ "8"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "GMT"
+ ]
+ ];
+ [
+ [
+ "1970"
+ ];
+ [
+ "32"
+ ];
+ [
+ "2"
+ ];
+ [
+ "February"
+ ];
+ [
+ "5"
+ ];
+ [
+ "5"
+ ];
+ [
+ "1"
+ ];
+ [
+ "7"
+ ];
+ [
+ "Sunday"
+ ];
+ [
+ "17"
+ ];
+ [
+ "3"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "GMT"
+ ]
+ ];
+ [
+ [
+ "1970"
+ ];
+ [
+ "246"
+ ];
+ [
+ "9"
+ ];
+ [
+ "September"
+ ];
+ [
+ "36"
+ ];
+ [
+ "36"
+ ];
+ [
+ "3"
+ ];
+ [
+ "4"
+ ];
+ [
+ "Thursday"
+ ];
+ [
+ "7"
+ ];
+ [
+ "22"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "GMT"
+ ]
+ ];
+ [
+ [
+ "1970"
+ ];
+ [
+ "365"
+ ];
+ [
+ "12"
+ ];
+ [
+ "December"
+ ];
+ [
+ "53"
+ ];
+ [
+ "53"
+ ];
+ [
+ "31"
+ ];
+ [
+ "4"
+ ];
+ [
+ "Thursday"
+ ];
+ [
+ "23"
+ ];
+ [
+ "59"
+ ];
+ [
+ "59"
+ ];
+ [
+ "999"
+ ];
+ [
+ "999999"
+ ];
+ [
+ "0"
+ ];
+ [
+ "GMT"
+ ]
+ ];
+ [
+ [
+ "1971"
+ ];
+ [
+ "1"
+ ];
+ [
+ "1"
+ ];
+ [
+ "January"
+ ];
+ [
+ "1"
+ ];
+ [
+ "53"
+ ];
+ [
+ "1"
+ ];
+ [
+ "5"
+ ];
+ [
+ "Friday"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "GMT"
+ ]
+ ];
+ [
+ [
+ "1971"
+ ];
+ [
+ "14"
+ ];
+ [
+ "1"
+ ];
+ [
+ "January"
+ ];
+ [
+ "3"
+ ];
+ [
+ "2"
+ ];
+ [
+ "14"
+ ];
+ [
+ "4"
+ ];
+ [
+ "Thursday"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "GMT"
+ ]
+ ];
+ [
+ [
+ "1978"
+ ];
+ [
+ "25"
+ ];
+ [
+ "1"
+ ];
+ [
+ "January"
+ ];
+ [
+ "5"
+ ];
+ [
+ "4"
+ ];
+ [
+ "25"
+ ];
+ [
+ "3"
+ ];
+ [
+ "Wednesday"
+ ];
+ [
+ "16"
+ ];
+ [
+ "15"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "0"
+ ];
+ [
+ "477"
+ ];
+ [
+ "Europe/Uzhgorod"
+ ]
+ ];
+ [
+ [
+ "2018"
+ ];
+ [
+ "335"
+ ];
+ [
+ "12"
+ ];
+ [
+ "December"
+ ];
+ [
+ "48"
+ ];
+ [
+ "48"
+ ];
+ [
+ "1"
+ ];
+ [
+ "6"
+ ];
+ [
+ "Saturday"
+ ];
+ [
+ "1"
+ ];
+ [
+ "2"
+ ];
+ [
+ "3"
+ ];
+ [
+ "456"
+ ];
+ [
+ "456789"
+ ];
+ [
+ "1"
+ ];
+ [
+ "Europe/Moscow"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ImplicitSplit_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ImplicitSplit_/results.txt
new file mode 100644
index 00000000000..ff03b0fa5df
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ImplicitSplit_/results.txt
@@ -0,0 +1,98 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "20181215 000000 GMT"
+ ];
+ [
+ "20181215 010203 GMT"
+ ];
+ [
+ "20181215 010203.456789 GMT"
+ ];
+ [
+ "20181215 000000 Europe/Moscow"
+ ];
+ [
+ "20181215 010203 Europe/Moscow"
+ ];
+ [
+ "20181215 010203.456789 Europe/Moscow"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_MultirowBlockTo_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_MultirowBlockTo_/results.txt
new file mode 100644
index 00000000000..4675d67fd0a
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_MultirowBlockTo_/results.txt
@@ -0,0 +1,90 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "interval1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "interval2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "interval3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "interval4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ #;
+ [
+ "4"
+ ];
+ #
+ ];
+ [
+ #;
+ [
+ "0"
+ ];
+ #;
+ #
+ ];
+ [
+ #;
+ [
+ "0"
+ ];
+ #;
+ #
+ ];
+ [
+ #;
+ #;
+ [
+ "13"
+ ];
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ParseIso8601_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ParseIso8601_/results.txt
new file mode 100644
index 00000000000..5f312e8bd8a
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ParseIso8601_/results.txt
@@ -0,0 +1,53 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ #;
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ParseLim_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ParseLim_/results.txt
new file mode 100644
index 00000000000..a4687cfa884
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ParseLim_/results.txt
@@ -0,0 +1,238 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column8";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column9";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column10";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column11";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column12";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column13";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column14";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column15";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column16";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column17";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "2105-12-31T00:00:00,GMT"
+ ];
+ #;
+ [
+ "2105-12-31T23:59:59,GMT"
+ ];
+ #;
+ [
+ "2105-12-31T23:59:59.999999,GMT"
+ ];
+ #;
+ [
+ "2105-12-31T00:00:00,Etc/GMT+11"
+ ];
+ [
+ "2106-01-01T00:00:00,Etc/GMT-1"
+ ];
+ #;
+ [
+ "2105-12-31T22:59:59.999999,Etc/GMT+1"
+ ];
+ [
+ "1970-01-01T00:00:00,GMT"
+ ];
+ #;
+ [
+ "1970-01-01T00:00:00,GMT"
+ ];
+ #;
+ #;
+ [
+ "1970-01-01T00:00:00,GMT"
+ ];
+ [
+ "1969-12-31T23:00:00,Etc/GMT+1"
+ ];
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Parse_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Parse_/results.txt
new file mode 100644
index 00000000000..14c088137b1
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Parse_/results.txt
@@ -0,0 +1,242 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column8";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column9";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "2011-03-08T01:02:03,Europe/Moscow"
+ ];
+ [
+ "20110308 010203 +0300"
+ ];
+ [
+ "2022-02-23T12:00:00,GMT"
+ ];
+ [
+ "20220223 120000 +0000"
+ ];
+ [
+ "20110308"
+ ];
+ [
+ "20110308"
+ ];
+ [
+ "2005-03-05T00:34:45,GMT"
+ ];
+ [
+ "2009-02-13T23:31:30,GMT"
+ ];
+ [
+ "1994-11-06T08:49:37,GMT"
+ ];
+ [
+ "2009-10-14T16:55:33,GMT"
+ ]
+ ];
+ [
+ [
+ "2011-03-08T01:02:03.22,Europe/Moscow"
+ ];
+ [
+ "20110308 010203.22 +0300"
+ ];
+ [
+ "2022-02-23T12:00:00.666666,GMT"
+ ];
+ [
+ "20220223 120000.666666 +0000"
+ ];
+ [
+ "20111108"
+ ];
+ [
+ "20111108"
+ ];
+ [
+ "2005-03-04T23:04:00,GMT"
+ ];
+ [
+ "2009-09-18T23:37:03.012331,GMT"
+ ];
+ [
+ "1994-11-06T08:49:37,GMT"
+ ];
+ [
+ "1999-01-04T07:42:12,GMT"
+ ]
+ ];
+ [
+ #;
+ #;
+ [
+ "2022-02-23T12:00:00.999999,GMT"
+ ];
+ [
+ "20220223 120000.999999 +0000"
+ ];
+ [
+ "20110108"
+ ];
+ [
+ "20110108"
+ ];
+ #;
+ #;
+ #;
+ #
+ ];
+ [
+ #;
+ #;
+ [
+ "2022-02-23T12:00:00.42,GMT"
+ ];
+ [
+ "20220223 120000.42 +0000"
+ ];
+ [
+ "20110108"
+ ];
+ [
+ "20110108"
+ ];
+ #;
+ #;
+ #;
+ #
+ ];
+ [
+ #;
+ #;
+ [
+ "2022-02-23T12:00:00.823874,GMT"
+ ];
+ [
+ "20220223 120000.823874 +0000"
+ ];
+ [
+ "20110108"
+ ];
+ [
+ "20110208"
+ ];
+ #;
+ #;
+ #;
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Repr_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Repr_/results.txt
new file mode 100644
index 00000000000..f4ff733c507
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Repr_/results.txt
@@ -0,0 +1,46 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "2016-08-15T00:00:00,GMT"
+ ];
+ [
+ "2017-01-01T13:00:00,Europe/Moscow"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Shift_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Shift_/results.txt
new file mode 100644
index 00000000000..a7d9edbd5a5
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Shift_/results.txt
@@ -0,0 +1,459 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column8";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column9";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column10";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column11";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column12";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column13";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column14";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "2011-11-17T21:20:19.345678,GMT"
+ ];
+ [
+ "2011-11-17T21:20:19.345678,GMT"
+ ];
+ [
+ "2005-11-17T21:20:19.345678,GMT"
+ ];
+ [
+ "1997-11-17T21:20:19.345678,GMT"
+ ];
+ [
+ "2001-11-17T21:20:19.345678,GMT"
+ ];
+ [
+ "2001-12-17T21:20:19.345678,GMT"
+ ];
+ [
+ "2002-02-17T21:20:19.345678,GMT"
+ ];
+ [
+ "2002-10-17T21:20:19.345678,GMT"
+ ];
+ [
+ "2002-11-17T21:20:19.345678,GMT"
+ ];
+ [
+ "2012-02-17T21:20:19.345678,GMT"
+ ];
+ [
+ "2001-10-17T21:20:19.345678,GMT"
+ ];
+ [
+ "2001-08-17T21:20:19.345678,GMT"
+ ];
+ [
+ "2000-12-17T21:20:19.345678,GMT"
+ ];
+ [
+ "2000-11-17T21:20:19.345678,GMT"
+ ];
+ [
+ "1991-08-17T21:20:19.345678,GMT"
+ ]
+ ];
+ [
+ [
+ "1980-01-01T11:14:00,GMT"
+ ];
+ [
+ "1980-01-01T11:14:00,GMT"
+ ];
+ [
+ "1974-01-01T11:14:00,GMT"
+ ];
+ #;
+ [
+ "1970-01-01T11:14:00,GMT"
+ ];
+ [
+ "1970-02-01T11:14:00,GMT"
+ ];
+ [
+ "1970-04-01T11:14:00,GMT"
+ ];
+ [
+ "1970-12-01T11:14:00,GMT"
+ ];
+ [
+ "1971-01-01T11:14:00,GMT"
+ ];
+ [
+ "1980-04-01T11:14:00,GMT"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #
+ ];
+ [
+ #;
+ #;
+ #;
+ [
+ "2101-12-01T01:08:00,Europe/Moscow"
+ ];
+ [
+ "2105-12-01T01:08:00,Europe/Moscow"
+ ];
+ [
+ "2106-01-01T01:08:00,Europe/Moscow"
+ ];
+ #;
+ #;
+ #;
+ #;
+ [
+ "2105-11-01T01:08:00,Europe/Moscow"
+ ];
+ [
+ "2105-09-01T01:08:00,Europe/Moscow"
+ ];
+ [
+ "2105-01-01T01:08:00,Europe/Moscow"
+ ];
+ [
+ "2104-12-01T01:08:00,Europe/Moscow"
+ ];
+ [
+ "2095-09-01T01:08:00,Europe/Moscow"
+ ]
+ ];
+ [
+ [
+ "2059-06-13T00:00:00,GMT"
+ ];
+ [
+ "2059-06-13T00:00:00,GMT"
+ ];
+ [
+ "2053-06-13T00:00:00,GMT"
+ ];
+ [
+ "2045-06-13T00:00:00,GMT"
+ ];
+ [
+ "2049-06-13T00:00:00,GMT"
+ ];
+ [
+ "2049-07-13T00:00:00,GMT"
+ ];
+ [
+ "2049-09-13T00:00:00,GMT"
+ ];
+ [
+ "2050-05-13T00:00:00,GMT"
+ ];
+ [
+ "2050-06-13T00:00:00,GMT"
+ ];
+ [
+ "2059-09-13T00:00:00,GMT"
+ ];
+ [
+ "2049-05-13T00:00:00,GMT"
+ ];
+ [
+ "2049-03-13T00:00:00,GMT"
+ ];
+ [
+ "2048-07-13T00:00:00,GMT"
+ ];
+ [
+ "2048-06-13T00:00:00,GMT"
+ ];
+ [
+ "2039-03-13T00:00:00,GMT"
+ ]
+ ];
+ [
+ [
+ "2010-01-31T16:15:00,Europe/Uzhgorod"
+ ];
+ [
+ "2010-01-31T16:15:00,Europe/Uzhgorod"
+ ];
+ [
+ "2004-01-31T16:15:00,Europe/Uzhgorod"
+ ];
+ [
+ "1996-01-31T16:15:00,Europe/Uzhgorod"
+ ];
+ [
+ "2000-01-31T16:15:00,Europe/Uzhgorod"
+ ];
+ [
+ "2000-02-29T16:15:00,Europe/Uzhgorod"
+ ];
+ [
+ "2000-04-30T16:15:00,Europe/Uzhgorod"
+ ];
+ [
+ "2000-12-31T16:15:00,Europe/Uzhgorod"
+ ];
+ [
+ "2001-01-31T16:15:00,Europe/Uzhgorod"
+ ];
+ [
+ "2010-04-30T16:15:00,Europe/Uzhgorod"
+ ];
+ [
+ "1999-12-31T16:15:00,Europe/Uzhgorod"
+ ];
+ [
+ "1999-10-31T16:15:00,Europe/Uzhgorod"
+ ];
+ [
+ "1999-02-28T16:15:00,Europe/Uzhgorod"
+ ];
+ [
+ "1999-01-31T16:15:00,Europe/Uzhgorod"
+ ];
+ [
+ "1989-10-31T16:15:00,Europe/Uzhgorod"
+ ]
+ ];
+ [
+ [
+ "2034-02-28T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "2034-02-28T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "2028-02-29T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "2020-02-29T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "2024-02-29T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "2024-03-29T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "2024-05-29T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "2025-01-29T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "2025-02-28T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "2034-05-29T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "2024-01-29T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "2023-11-29T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "2023-03-29T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "2023-02-28T01:02:03.456789,Europe/Moscow"
+ ];
+ [
+ "2013-11-29T01:02:03.456789,Europe/Moscow"
+ ]
+ ];
+ [
+ [
+ "1980-02-01T02:00:00.444123,Europe/Moscow"
+ ];
+ [
+ "1980-02-01T02:00:00.444123,Europe/Moscow"
+ ];
+ [
+ "1974-02-01T02:00:00.444123,Europe/Moscow"
+ ];
+ #;
+ [
+ "1970-02-01T02:00:00.444123,Europe/Moscow"
+ ];
+ [
+ "1970-03-01T02:00:00.444123,Europe/Moscow"
+ ];
+ [
+ "1970-05-01T02:00:00.444123,Europe/Moscow"
+ ];
+ [
+ "1971-01-01T02:00:00.444123,Europe/Moscow"
+ ];
+ [
+ "1971-02-01T02:00:00.444123,Europe/Moscow"
+ ];
+ [
+ "1980-05-01T02:00:00.444123,Europe/Moscow"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_SplitMake1969_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_SplitMake1969_/results.txt
new file mode 100644
index 00000000000..eb32dc98f66
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_SplitMake1969_/results.txt
@@ -0,0 +1,88 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "rdate";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Date"
+ ]
+ ]
+ ];
+ [
+ "rdatetime";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Datetime"
+ ]
+ ]
+ ];
+ [
+ "rtimestamp";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Timestamp"
+ ]
+ ]
+ ];
+ [
+ "rtzdate";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzDate"
+ ]
+ ]
+ ];
+ [
+ "rtzdatetime";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzDatetime"
+ ]
+ ]
+ ];
+ [
+ "rtztimestamp";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ #;
+ #;
+ #;
+ #;
+ [
+ "1969-12-31T23:00:00,Canada/Central"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_SplitMake_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_SplitMake_/results.txt
new file mode 100644
index 00000000000..01b159b33d6
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_SplitMake_/results.txt
@@ -0,0 +1,160 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "rdate";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Date"
+ ]
+ ]
+ ];
+ [
+ "rdatetime";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Datetime"
+ ]
+ ]
+ ];
+ [
+ "rtimestamp";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Timestamp"
+ ]
+ ]
+ ];
+ [
+ "rtzdate";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzDate"
+ ]
+ ]
+ ];
+ [
+ "rtzdatetime";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzDatetime"
+ ]
+ ]
+ ];
+ [
+ "rtztimestamp";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "TzTimestamp"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ #;
+ #;
+ #;
+ #;
+ #
+ ];
+ [
+ [
+ "0"
+ ];
+ [
+ "1800"
+ ];
+ [
+ "1800000000"
+ ];
+ #;
+ #;
+ [
+ "1970-01-01T05:00:00,Europe/Moscow"
+ ]
+ ];
+ [
+ [
+ "17880"
+ ];
+ [
+ "1544835723"
+ ];
+ [
+ "1544835723456789"
+ ];
+ [
+ "2018-12-15,Europe/Moscow"
+ ];
+ [
+ "2018-12-15T01:02:03,Europe/Moscow"
+ ];
+ [
+ "2018-12-15T01:02:03.456789,Europe/Moscow"
+ ]
+ ];
+ [
+ [
+ "49672"
+ ];
+ [
+ "4291747199"
+ ];
+ [
+ "4291747199999999"
+ ];
+ [
+ "2105-12-31,Canada/Central"
+ ];
+ [
+ "2105-12-31T16:00:00,Canada/Central"
+ ];
+ #
+ ];
+ [
+ #;
+ #;
+ #;
+ [
+ "2106-01-01,Europe/Moscow"
+ ];
+ [
+ "2106-01-01T01:00:00,Europe/Moscow"
+ ];
+ #
+ ];
+ [
+ #;
+ #;
+ #;
+ #;
+ #;
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_StartOf1969_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_StartOf1969_/results.txt
new file mode 100644
index 00000000000..54614afb16b
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_StartOf1969_/results.txt
@@ -0,0 +1,151 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column8";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column9";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column10";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ #;
+ #;
+ #;
+ #;
+ #;
+ [
+ "1969-12-31 20:00:00 Canada/Central"
+ ];
+ [
+ "1969-12-31 23:00:00 Canada/Central"
+ ];
+ [
+ "1969-12-31 23:00:00 Canada/Central"
+ ];
+ [
+ "1969-12-31 22:59:56 Canada/Central"
+ ];
+ [
+ "82800000000"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_StartOf_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_StartOf_/results.txt
new file mode 100644
index 00000000000..b548b00a7c7
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_StartOf_/results.txt
@@ -0,0 +1,314 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column8";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column9";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column10";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Interval"
+ ]
+ ]
+ ];
+ [
+ "column11";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ #;
+ #;
+ #;
+ #;
+ #;
+ [
+ "1970-01-01 04:00:00 Europe/Moscow"
+ ];
+ [
+ "1970-01-01 05:00:00 Europe/Moscow"
+ ];
+ [
+ "1970-01-01 05:00:00 Europe/Moscow"
+ ];
+ [
+ "1970-01-01 04:59:57 Europe/Moscow"
+ ];
+ [
+ "18000000000"
+ ];
+ [
+ "1970-01-31 00:00:00 Europe/Moscow"
+ ]
+ ];
+ [
+ [
+ "2018-01-01 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2018-10-01 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2018-12-01 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2018-12-10 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2018-12-15 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2018-12-15 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2018-12-15 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2018-12-15 01:00:00 Europe/Moscow"
+ ];
+ [
+ "2018-12-15 01:02:00 Europe/Moscow"
+ ];
+ [
+ "2018-12-15 01:01:57 Europe/Moscow"
+ ];
+ [
+ "3723456789"
+ ];
+ [
+ "2018-12-31 00:00:00 Europe/Moscow"
+ ]
+ ];
+ [
+ [
+ "2105-01-01 00:00:00 GMT"
+ ];
+ [
+ "2105-10-01 00:00:00 GMT"
+ ];
+ [
+ "2105-12-01 00:00:00 GMT"
+ ];
+ [
+ "2105-12-28 00:00:00 GMT"
+ ];
+ [
+ "2105-12-31 00:00:00 GMT"
+ ];
+ [
+ "2105-12-31 13:00:00 GMT"
+ ];
+ [
+ "2105-12-31 16:00:00 GMT"
+ ];
+ [
+ "2105-12-31 16:15:00 GMT"
+ ];
+ [
+ "2105-12-31 16:23:40 GMT"
+ ];
+ [
+ "2105-12-31 16:23:44 GMT"
+ ];
+ [
+ "59025000000"
+ ];
+ [
+ "2105-12-31 00:00:00 GMT"
+ ]
+ ];
+ [
+ [
+ "2106-01-01 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2106-01-01 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2106-01-01 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2105-12-28 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2106-01-01 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2106-01-01 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2106-01-01 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2106-01-01 01:00:00 Europe/Moscow"
+ ];
+ [
+ "2106-01-01 01:00:00 Europe/Moscow"
+ ];
+ [
+ "2106-01-01 00:59:58 Europe/Moscow"
+ ];
+ [
+ "3600000000"
+ ];
+ #
+ ];
+ [
+ [
+ "2019-01-01 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2019-07-01 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2019-07-01 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2019-07-22 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2019-07-24 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2019-07-24 00:00:00 Europe/Moscow"
+ ];
+ [
+ "2019-07-24 12:00:00 Europe/Moscow"
+ ];
+ [
+ "2019-07-24 12:00:00 Europe/Moscow"
+ ];
+ [
+ "2019-07-24 12:00:00 Europe/Moscow"
+ ];
+ [
+ "2019-07-24 11:59:57 Europe/Moscow"
+ ];
+ [
+ "43200000000"
+ ];
+ [
+ "2019-07-31 00:00:00 Europe/Moscow"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_To_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_To_/results.txt
new file mode 100644
index 00000000000..c7d4e10a864
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_To_/results.txt
@@ -0,0 +1,345 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "interval_to_days";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "interval_to_hours";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "interval_to_minutes";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "interval_to_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "interval_to_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "interval_to_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "date_to_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "datetime_to_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "timestamp_to_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "tzdate_to_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "tzdatetime_to_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "tztimestamp_to_seconds";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "date_to_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "datetime_to_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "timestamp_to_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "tzdate_to_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "tzdatetime_to_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "tztimestamp_to_msec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "date_to_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "datetime_to_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "timestamp_to_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "tzdate_to_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "tzdatetime_to_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "tztimestamp_to_usec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "negative_1d";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "2"
+ ];
+ [
+ "58"
+ ];
+ [
+ "3480"
+ ];
+ [
+ "208800"
+ ];
+ [
+ "208800000"
+ ];
+ [
+ "208800000000"
+ ];
+ [
+ "1542844800"
+ ];
+ [
+ "1542921619"
+ ];
+ [
+ "1542921619"
+ ];
+ [
+ "1542758400"
+ ];
+ [
+ "1542910819"
+ ];
+ [
+ "1542910819"
+ ];
+ [
+ "1542844800000"
+ ];
+ [
+ "1542921619000"
+ ];
+ [
+ "1542921619345"
+ ];
+ [
+ "1542758400000"
+ ];
+ [
+ "1542910819000"
+ ];
+ [
+ "1542910819345"
+ ];
+ [
+ "1542844800000000"
+ ];
+ [
+ "1542921619000000"
+ ];
+ [
+ "1542921619345678"
+ ];
+ [
+ "1542758400000000"
+ ];
+ [
+ "1542910819000000"
+ ];
+ [
+ "1542910819345678"
+ ];
+ [
+ "-1"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_TzToDate_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_TzToDate_/results.txt
new file mode 100644
index 00000000000..c6fd6ea8b99
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_TzToDate_/results.txt
@@ -0,0 +1,52 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column3";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "2000-01-01";
+ "2000-01-01,Europe/Moscow";
+ "1999-12-31";
+ "2000-01-01,Europe/Moscow"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_UpdateTz_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_UpdateTz_/results.txt
new file mode 100644
index 00000000000..9ebcadb565c
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_UpdateTz_/results.txt
@@ -0,0 +1,64 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "1999-12-31T21:00:00Z"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "1999-12-31T21:00:00Z"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Update_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Update_/results.txt
new file mode 100644
index 00000000000..75dfcd0b39a
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Update_/results.txt
@@ -0,0 +1,236 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column8";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column9";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column10";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column11";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column12";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column13";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column14";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column15";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column16";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column17";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "2005-12-01T21:20:19.345678,GMT"
+ ];
+ #;
+ [
+ "2001-07-01T21:20:19.345678,GMT"
+ ];
+ #;
+ [
+ "2001-12-20T21:20:19.345678,GMT"
+ ];
+ #;
+ #;
+ [
+ "2001-12-01T11:10:09.345678,GMT"
+ ];
+ [
+ "2001-12-01T11:20:19.345678,GMT"
+ ];
+ #;
+ [
+ "2001-12-01T21:10:19.345678,GMT"
+ ];
+ #;
+ [
+ "2001-12-01T21:20:09.345678,GMT"
+ ];
+ #;
+ [
+ "2001-12-01T21:20:19.123456,GMT"
+ ];
+ #;
+ [
+ "2001-12-01T21:20:19.345678,America/Creston"
+ ];
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_yql-14977_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_yql-14977_/results.txt
new file mode 100644
index 00000000000..c2ee1b5e2e5
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_yql-14977_/results.txt
@@ -0,0 +1,33 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "May/15/2022"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.in b/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.in
new file mode 100644
index 00000000000..96ebafbe3cc
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.in
@@ -0,0 +1,30 @@
+{
+ "fts_seconds" = 3875345u;
+ "fts_msec" = 3875345000u;
+ "fts_usec" = 3875345000000u;
+
+ "fdays" = 7;
+ "fhours" = 168;
+ "fminutes" = 10080;
+ "fseconds" = 604800;
+ "fmsec" = 604800000;
+ "fusec" = -604800000000;
+
+ "fdays_overflow" = 100000;
+ "fdays_null" = #;
+};
+{
+ "fts_seconds" = 3875345u;
+ "fts_msec" = 3875345000u;
+ "fts_usec" = 3875345000000u;
+
+ "fdays" = 7;
+ "fhours" = 168;
+ "fminutes" = 10080;
+ "fseconds" = 604800;
+ "fmsec" = 604800000;
+ "fusec" = -604800000000;
+
+ "fdays_overflow" = 100000;
+ "fdays_null" = #;
+};
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.in.attr
new file mode 100644
index 00000000000..6f1c2afd899
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.in.attr
@@ -0,0 +1,89 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "fts_seconds";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "fts_msec";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "fts_usec";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "fdays";
+ [
+ "DataType";
+ "Int16"
+ ]
+ ];
+ [
+ "fdays_overflow";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "fdays_null";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int16"
+ ]
+ ]
+ ];
+ [
+ "fhours";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "fminutes";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "fseconds";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "fmsec";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "fusec";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.sql b/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.sql
new file mode 100644
index 00000000000..fd23e1cfccb
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.sql
@@ -0,0 +1,19 @@
+/* syntax version 1 */
+pragma UseBlocks;
+
+select
+ DateTime::FromSeconds(fts_seconds) as ts_seconds,
+ DateTime::FromMilliseconds(fts_msec) as ts_msec,
+ DateTime::FromMicroseconds(fts_usec) as ts_usec,
+ DateTime::FromMicroseconds(fts_msec * fts_msec) as ts_empty,
+
+ DateTime::IntervalFromDays(fdays) as interval_days,
+ DateTime::IntervalFromHours(fhours) as interval_hours,
+ DateTime::IntervalFromMinutes(fminutes) as interval_minutes,
+ DateTime::IntervalFromSeconds(fseconds) as interval_seconds,
+ DateTime::IntervalFromMilliseconds(fmsec) as interval_msec,
+ DateTime::IntervalFromMicroseconds(fusec) as interval_usec,
+
+ DateTime::IntervalFromDays(fdays_overflow) as interval_days_overflow,
+ DateTime::IntervalFromDays(fdays_null) as interval_null,
+from Input
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.in b/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.in
new file mode 100644
index 00000000000..bd5a96985b8
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.in
@@ -0,0 +1,6 @@
+{
+ "fdate"="2018-11-22";
+ "fdatetime"="2018-11-22T21:20:19Z";
+ "ftimestamp"="2018-11-22T21:20:19.345678Z";
+};
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.in.attr
new file mode 100644
index 00000000000..d9a16bcd3d8
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.in.attr
@@ -0,0 +1,31 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "fdate";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.sql b/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.sql
new file mode 100644
index 00000000000..3a21ac8c81e
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.sql
@@ -0,0 +1,36 @@
+/* syntax version 1 */
+pragma UseBlocks;
+insert into @t
+ select
+ Unwrap(cast(fdate as Date)) as `date`,
+ Unwrap(cast(fdatetime as Datetime)) as `datetime`,
+ Unwrap(cast(ftimestamp as Timestamp)) as `timestamp`,
+ from Input;
+commit;
+
+SELECT
+ DateTime::GetHour(`date`) as date_hour,
+ DateTime::GetMinute(`date`) as date_minute,
+ DateTime::GetSecond(`date`) as date_second,
+ DateTime::GetMillisecondOfSecond(`date`) as date_msec,
+ DateTime::GetMicrosecondOfSecond(`date`) as date_usec,
+ DateTime::GetTimezoneId(`date`) as date_tz,
+ DateTime::GetTimezoneName(`date`) as date_tzname,
+
+ DateTime::GetHour(`datetime`) as datetime_hour,
+ DateTime::GetMinute(`datetime`) as datetime_minute,
+ DateTime::GetSecond(`datetime`) as datetime_second,
+ DateTime::GetMillisecondOfSecond(`datetime`) as datetime_msec,
+ DateTime::GetMicrosecondOfSecond(`datetime`) as datetime_usec,
+ DateTime::GetTimezoneId(`datetime`) as datetime_tz,
+ DateTime::GetTimezoneName(`datetime`) as datetime_tzname,
+
+ DateTime::GetHour(`timestamp`) as timestamp_hour,
+ DateTime::GetMinute(`timestamp`) as timestamp_minute,
+ DateTime::GetSecond(`timestamp`) as timestamp_second,
+ DateTime::GetMillisecondOfSecond(`timestamp`) as timestamp_msec,
+ DateTime::GetMicrosecondOfSecond(`timestamp`) as timestamp_usec,
+ DateTime::GetTimezoneId(`timestamp`) as timestamp_tz,
+ DateTime::GetTimezoneName(`timestamp`) as timestamp_tzname,
+FROM @t;
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.in b/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.in
new file mode 100644
index 00000000000..61aafc6f44d
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.in
@@ -0,0 +1,16 @@
+{
+ "fdate"="2018-12-15";
+ "fdatetime"="2018-12-15T01:02:03Z";
+ "ftimestamp"="2018-12-15T01:02:03.456789Z";
+ "ftzdate"="2018-12-15,Europe/Moscow";
+ "ftzdatetime"="2018-12-15T01:02:03,Europe/Moscow";
+ "ftztimestamp"="2018-12-15T01:02:03.456789,Europe/Moscow"
+};
+{
+ "fdate"="2005-12-31";
+ "fdatetime"="2005-12-31T23:59:59Z";
+ "ftimestamp"="2005-12-31T23:59:59.999999Z";
+ "ftzdate"="2005-12-31,Canada/Central";
+ "ftzdatetime"="2005-12-31T16:00:00,Canada/Central";
+ "ftztimestamp"="2005-12-31T23:00:00.000000,Canada/Central"
+};
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.in.attr
new file mode 100644
index 00000000000..876e4f8a19d
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.in.attr
@@ -0,0 +1,52 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "fdate";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftzdate";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftzdatetime";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftztimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.sql b/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.sql
new file mode 100644
index 00000000000..a6c0d65a83c
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.sql
@@ -0,0 +1,21 @@
+/* syntax version 1 */
+pragma UseBlocks;
+insert into @t
+ select
+ Unwrap(cast(fdate as Date)) as `date`,
+ Unwrap(cast(fdatetime as Datetime)) as `datetime`,
+ Unwrap(cast(ftimestamp as Timestamp)) as `timestamp`,
+ Unwrap(cast(ftzdate as TzDate)) as `tzdate`,
+ Unwrap(cast(ftzdatetime as TzDatetime)) as `tzdatetime`,
+ Unwrap(cast(ftztimestamp as TzTimestamp)) as `tztimestamp`
+ from Input;
+commit;
+
+select
+ DateTime::MakeDate(`date`) as rdate,
+ DateTime::MakeDatetime(`datetime`) as rdatetime,
+ DateTime::MakeTimestamp(`timestamp`) as rtimestamp,
+ DateTime::MakeTzDate(`tzdate`) as rtzdate,
+ DateTime::MakeTzDatetime(`tzdatetime`) as rtzdatetime,
+ DateTime::MakeTzTimestamp(`tztimestamp`) as rtztimestamp
+from @t; \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.attr b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.attr
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.attr
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.in b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.in
new file mode 100644
index 00000000000..f482585e720
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.in
@@ -0,0 +1,15 @@
+{
+ "ftztimestamp"="1970-01-01T05:00:00.000000,Europe/Moscow"
+};
+{
+ "ftztimestamp"="2018-12-15T01:02:03.456789,Europe/Moscow"
+};
+{
+ "ftztimestamp"="2105-12-31T16:23:45.000000,GMT"
+};
+{
+ "ftztimestamp"="2106-01-01T01:00:00.000000,Europe/Moscow"
+};
+{
+ "ftztimestamp"="2019-07-24T12:00:00,Europe/Moscow"
+};
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.in.attr
new file mode 100644
index 00000000000..3915337be3c
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.in.attr
@@ -0,0 +1,17 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "ftztimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.sql b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.sql
new file mode 100644
index 00000000000..e531d6f1c8d
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.sql
@@ -0,0 +1,30 @@
+/* syntax version 1 */
+pragma UseBlocks;
+insert into @t
+ select
+ cast(ftztimestamp as TzTimestamp) as `tztimestamp`,
+ from Input;
+
+commit;
+
+select
+ DateTime::StartOfYear(`tztimestamp`),
+
+ DateTime::StartOfQuarter(`tztimestamp`),
+
+ DateTime::StartOfMonth(`tztimestamp`),
+
+ DateTime::StartOfWeek(`tztimestamp`),
+
+ DateTime::StartOfDay(`tztimestamp`),
+
+ DateTime::StartOf(`tztimestamp`, Interval("PT13H")),
+
+ DateTime::StartOf(`tztimestamp`, Interval("PT4H")),
+ DateTime::StartOf(`tztimestamp`, Interval("PT15M")),
+ DateTime::StartOf(`tztimestamp`, Interval("PT20S")),
+ DateTime::StartOf(`tztimestamp`, Interval("PT7S")),
+ DateTime::TimeOfDay(`tztimestamp`),
+
+ DateTime::EndOfMonth(`tztimestamp`),
+from @t;
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.in b/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.in
new file mode 100644
index 00000000000..06d60295808
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.in
@@ -0,0 +1,10 @@
+{"ftztimestamp"="1970-01-01T11:14:00.000000,GMT"};
+{"ftztimestamp"="1970-01-02T14:08:00.000000,GMT"};
+{"ftztimestamp"="1970-02-01T17:03:00.000000,GMT"};
+{"ftztimestamp"="1970-09-03T07:22:00.000000,GMT"};
+{"ftztimestamp"="1970-12-31T23:59:59.999999,GMT"};
+{"ftztimestamp"="1971-01-01T00:00:00.000000,GMT"};
+{"ftztimestamp"="1971-01-14T00:00:00.000000,GMT"};
+{"ftztimestamp"="1978-01-25T16:15:00.000000,Europe/Uzhgorod"};
+{"ftztimestamp"="2018-12-01T01:02:03.456789,Europe/Moscow"};
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.in.attr
new file mode 100644
index 00000000000..3915337be3c
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.in.attr
@@ -0,0 +1,17 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "ftztimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.sql b/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.sql
new file mode 100644
index 00000000000..3087d4e78d4
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.sql
@@ -0,0 +1,27 @@
+/* syntax version 1 */
+pragma UseBlocks;
+insert into @t
+ select
+ cast(ftztimestamp as TzTimestamp) as `tm`,
+ from Input;
+
+commit;
+
+SELECT
+ DateTime::GetYear(tm) as ryear,
+ DateTime::GetDayOfYear(tm) as rdayofyear,
+ DateTime::GetMonth(tm) as rmonth,
+ DateTime::GetMonthName(tm) as rmonthname,
+ DateTime::GetWeekOfYear(tm) as rweekofyear,
+ DateTime::GetWeekOfYearIso8601(tm) as rweekofyeariso8601,
+ DateTime::GetDayOfMonth(tm) as rdayofmonth,
+ DateTime::GetDayOfWeek(tm) as rdayofweek,
+ DateTime::GetDayOfWeekName(tm) as rdayofweekname,
+ DateTime::GetHour(tm) as rhour,
+ DateTime::GetMinute(tm) as rminute,
+ DateTime::GetSecond(tm) as rsecond,
+ DateTime::GetMillisecondOfSecond(tm) as rmsec,
+ DateTime::GetMicrosecondOfSecond(tm) as rusec,
+ DateTime::GetTimezoneId(tm) as rtz,
+ DateTime::GetTimezoneName(tm) as rtzname
+FROM @t;
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.in b/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.in
new file mode 100644
index 00000000000..feb9044f382
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.in
@@ -0,0 +1,11 @@
+{
+ "fdate"="2018-11-22";
+ "fdatetime"="2018-11-22T21:20:19Z";
+ "ftimestamp"="2018-11-22T21:20:19.345678Z";
+ "finterval"="P2DT10H";
+ "ftzdate"="2018-11-22,Europe/Moscow";
+ "ftzdatetime"="2018-11-22T21:20:19,Europe/Moscow";
+ "ftztimestamp"="2018-11-22T21:20:19.345678,Europe/Moscow";
+ "finterval_1day"="P1D"
+};
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.in.attr
new file mode 100644
index 00000000000..cb97c1895c5
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.in.attr
@@ -0,0 +1,66 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "fdate";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "finterval";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftzdate";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftzdatetime";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftztimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "finterval_1day";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.sql b/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.sql
new file mode 100644
index 00000000000..5f5e68aa80f
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.sql
@@ -0,0 +1,51 @@
+/* syntax version 1 */
+pragma UseBlocks;
+insert into @t
+ select
+ cast(fdate as Date) as `date`,
+ cast(fdatetime as Datetime) as `datetime`,
+ cast(ftimestamp as Timestamp) as `timestamp`,
+ cast(finterval as Interval) as `interval`,
+ cast(ftzdate as TzDate) as `tzdate`,
+ cast(ftzdatetime as TzDatetime) as `tzdatetime`,
+ cast(ftztimestamp as TzTimestamp) as `tztimestamp`,
+ cast(null as Interval) as `interval_null`,
+ -cast(finterval_1day as Interval) as `negative_1d`,
+ from Input;
+
+commit;
+select
+ DateTime::ToDays(`interval`) as interval_to_days,
+ DateTime::ToHours(`interval`) as interval_to_hours,
+ DateTime::ToMinutes(`interval`) as interval_to_minutes,
+ DateTime::ToSeconds(`interval`) as interval_to_seconds,
+ DateTime::ToMilliseconds(`interval`) as interval_to_msec,
+ DateTime::ToMicroseconds(`interval`) as interval_to_usec,
+
+ DateTime::ToSeconds(`date`) as date_to_seconds,
+ DateTime::ToSeconds(`datetime`) as datetime_to_seconds,
+ DateTime::ToSeconds(`timestamp`) as timestamp_to_seconds,
+ DateTime::ToSeconds(`tzdate`) as tzdate_to_seconds,
+ DateTime::ToSeconds(`tzdatetime`) as tzdatetime_to_seconds,
+ DateTime::ToSeconds(`tztimestamp`) as tztimestamp_to_seconds,
+
+ DateTime::ToMilliseconds(`date`) as date_to_msec,
+ DateTime::ToMilliseconds(`datetime`) as datetime_to_msec,
+ DateTime::ToMilliseconds(`timestamp`) as timestamp_to_msec,
+ DateTime::ToMilliseconds(`tzdate`) as tzdate_to_msec,
+ DateTime::ToMilliseconds(`tzdatetime`) as tzdatetime_to_msec,
+ DateTime::ToMilliseconds(`tztimestamp`) as tztimestamp_to_msec,
+
+ DateTime::ToMicroseconds(`date`) as date_to_usec,
+ DateTime::ToMicroseconds(`datetime`) as datetime_to_usec,
+ DateTime::ToMicroseconds(`timestamp`) as timestamp_to_usec,
+ DateTime::ToMicroseconds(`tzdate`) as tzdate_to_usec,
+ DateTime::ToMicroseconds(`tzdatetime`) as tzdatetime_to_usec,
+ DateTime::ToMicroseconds(`tztimestamp`) as tztimestamp_to_usec,
+
+ DateTime::ToDays(`interval_null`) as interval_null,
+
+ /* Overflow test */
+ DateTime::ToDays(`negative_1d`) as negative_1d,
+from @t;
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/EndOf.sql b/yql/essentials/udfs/common/datetime2/test/cases/EndOf.sql
new file mode 100644
index 00000000000..61b4a29e536
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/EndOf.sql
@@ -0,0 +1,34 @@
+/* syntax version 1 */
+$format = DateTime::Format("%Y-%m-%d %H:%M:%S %Z");
+
+select
+ $format(DateTime::EndOfMonth(TzDateTime('2023-07-07T01:02:03,Europe/Moscow'))),
+ $format(DateTime::EndOfMonth(Date('2023-08-08'))),
+ $format(DateTime::EndOfMonth(Date('2023-09-09'))),
+ $format(DateTime::EndOfMonth(Date('2023-02-02'))),
+ $format(DateTime::EndOfMonth(Date('2024-02-02')))
+into result `Normal cases`;
+
+$tsMin = '1970-01-01T00:00:00.000000';
+$tsMax = '2105-12-31T23:59:59.999999';
+$tsBelow = '1969-12-31T23:59:59.999999';
+$tsAbove = '2106-01-01T00:00:00.000000';
+
+select $format(cast($tsMin || 'Z' as Timestamp))
+ , $format(DateTime::EndOfMonth(cast($tsMin || 'Z' as Timestamp)))
+ , $format(DateTime::EndOfMonth(cast($tsMin || ',Atlantic/Madeira' as Timestamp)))
+into result `Minimal timestamp value`;
+
+select $format(cast($tsMax || 'Z' as Timestamp))
+ , $format(DateTime::EndOfMonth(cast($tsMax || 'Z' as Timestamp)))
+ , $format(DateTime::EndOfMonth(cast('2105-12-12T00:00:00Z' as Timestamp)))
+ , $format(DateTime::EndOfMonth(cast($tsMax || ',Atlantic/Azores' as Timestamp)))
+into result `Maximum timestamp value`;
+
+select $format(cast($tsBelow || ',Atlantic/Azores' as TzTimestamp))
+ , $format(DateTime::EndOfMonth(cast($tsBelow || ',Atlantic/Azores' as TzTimestamp)))
+into result `Timestamp below minimum`;
+
+select $format(cast($tsAbove || ',Atlantic/Madeira' as TzTimestamp))
+ , $format(DateTime::EndOfMonth(cast($tsAbove || ',Atlantic/Madeira' as TzTimestamp)))
+into result `Timestamp above maximum`;
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Format.in b/yql/essentials/udfs/common/datetime2/test/cases/Format.in
new file mode 100644
index 00000000000..f9390e7c2be
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Format.in
@@ -0,0 +1,4 @@
+{"ftztimestamp" = "1970-01-01T00:00:00,GMT"};
+{"ftztimestamp" = "2018-12-01T01:02:03.456789,Europe/Moscow"};
+{"ftztimestamp" = "2011-03-13T02:15:00,America/Los_Angeles"};
+{"ftztimestamp" = "2011-11-06T01:15:00,America/Los_Angeles"};
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Format.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/Format.in.attr
new file mode 100644
index 00000000000..2cc4f8c0d68
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Format.in.attr
@@ -0,0 +1,17 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "ftztimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Format.sql b/yql/essentials/udfs/common/datetime2/test/cases/Format.sql
new file mode 100644
index 00000000000..25daf1105dc
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Format.sql
@@ -0,0 +1,6 @@
+/* syntax version 1 */
+$format = DateTime::Format("%% year %Y monthFullName %B monthShortName %b month %m day %d hours %H minutes %M seconds %S tz %z tzname %Z text");
+
+select
+ $format(DateTime::Split(cast(ftztimestamp as TzTimestamp)))
+from Input
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/FormatMicroseconds.sql b/yql/essentials/udfs/common/datetime2/test/cases/FormatMicroseconds.sql
new file mode 100644
index 00000000000..3517da3bf35
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/FormatMicroseconds.sql
@@ -0,0 +1,15 @@
+/* syntax version 1 */
+$parse = DateTime::Parse("%Y-%m-%d %H:%M:%S");
+
+$dt0 = $parse("2024-01-01 00:00:00");
+$dt1 = $parse("2024-01-01 00:00:00.000001");
+$dt2 = $parse("2024-01-01 00:00:00.05");
+
+$format = DateTime::Format("%Y-%m-%d %H:%M:%S");
+$format_ms = DateTime::Format("%Y-%m-%d %H:%M:%S", True as AlwaysWriteFractionalSeconds);
+
+SELECT
+ $format($dt0), $format_ms($dt0),
+ $format($dt1), $format_ms($dt1),
+ $format($dt2), $format_ms($dt2)
+;
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/From.in b/yql/essentials/udfs/common/datetime2/test/cases/From.in
new file mode 100644
index 00000000000..dd293eaaa7d
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/From.in
@@ -0,0 +1,12 @@
+{
+ "fts_seconds" = 3875345u;
+ "fts_msec" = 3875345000u;
+ "fts_usec" = 3875345000000u;
+
+ "fdays" = 7;
+ "fhours" = 168;
+ "fminutes" = 10080;
+ "fseconds" = 604800;
+ "fmsec" = 604800000;
+ "fusec" = -604800000000;
+};
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/From.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/From.in.attr
new file mode 100644
index 00000000000..538f83ab720
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/From.in.attr
@@ -0,0 +1,73 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "fts_seconds";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "fts_msec";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "fts_usec";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "fdays";
+ [
+ "DataType";
+ "Int16"
+ ]
+ ];
+ [
+ "fhours";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "fminutes";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "fseconds";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "fmsec";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "fusec";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/From.sql b/yql/essentials/udfs/common/datetime2/test/cases/From.sql
new file mode 100644
index 00000000000..c596e33f586
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/From.sql
@@ -0,0 +1,14 @@
+/* syntax version 1 */
+select
+ DateTime::FromSeconds(fts_seconds) as ts_seconds,
+ DateTime::FromMilliseconds(fts_msec) as ts_msec,
+ DateTime::FromMicroseconds(fts_usec) as ts_usec,
+ DateTime::FromMicroseconds(fts_msec * fts_msec) as ts_empty,
+
+ DateTime::IntervalFromDays(fdays) as interval_days,
+ DateTime::IntervalFromHours(fhours) as interval_hours,
+ DateTime::IntervalFromMinutes(fminutes) as interval_minutes,
+ DateTime::IntervalFromSeconds(fseconds) as interval_seconds,
+ DateTime::IntervalFromMilliseconds(fmsec) as interval_msec,
+ DateTime::IntervalFromMicroseconds(fusec) as interval_usec
+from Input
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Get.in b/yql/essentials/udfs/common/datetime2/test/cases/Get.in
new file mode 100644
index 00000000000..06d60295808
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Get.in
@@ -0,0 +1,10 @@
+{"ftztimestamp"="1970-01-01T11:14:00.000000,GMT"};
+{"ftztimestamp"="1970-01-02T14:08:00.000000,GMT"};
+{"ftztimestamp"="1970-02-01T17:03:00.000000,GMT"};
+{"ftztimestamp"="1970-09-03T07:22:00.000000,GMT"};
+{"ftztimestamp"="1970-12-31T23:59:59.999999,GMT"};
+{"ftztimestamp"="1971-01-01T00:00:00.000000,GMT"};
+{"ftztimestamp"="1971-01-14T00:00:00.000000,GMT"};
+{"ftztimestamp"="1978-01-25T16:15:00.000000,Europe/Uzhgorod"};
+{"ftztimestamp"="2018-12-01T01:02:03.456789,Europe/Moscow"};
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Get.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/Get.in.attr
new file mode 100644
index 00000000000..2cc4f8c0d68
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Get.in.attr
@@ -0,0 +1,17 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "ftztimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Get.sql b/yql/essentials/udfs/common/datetime2/test/cases/Get.sql
new file mode 100644
index 00000000000..99ec9528fcb
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Get.sql
@@ -0,0 +1,23 @@
+/* syntax version 1 */
+SELECT
+ DateTime::GetYear(tm) as ryear,
+ DateTime::GetDayOfYear(tm) as rdayofyear,
+ DateTime::GetMonth(tm) as rmonth,
+ DateTime::GetMonthName(tm) as rmonthname,
+ DateTime::GetWeekOfYear(tm) as rweekofyear,
+ DateTime::GetWeekOfYearIso8601(tm) as rweekofyeariso8601,
+ DateTime::GetDayOfMonth(tm) as rdayofmonth,
+ DateTime::GetDayOfWeek(tm) as rdayofweek,
+ DateTime::GetDayOfWeekName(tm) as rdayofweekname,
+ DateTime::GetHour(tm) as rhour,
+ DateTime::GetMinute(tm) as rminute,
+ DateTime::GetSecond(tm) as rsecond,
+ DateTime::GetMillisecondOfSecond(tm) as rmsec,
+ DateTime::GetMicrosecondOfSecond(tm) as rusec,
+ DateTime::GetTimezoneId(tm) as rtz,
+ DateTime::GetTimezoneName(tm) as rtzname
+FROM (
+ SELECT
+ DateTime::Split(CAST(ftztimestamp as TzTimestamp)) as tm
+ FROM Input
+);
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.in b/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.in
new file mode 100644
index 00000000000..26f8d006790
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.in
@@ -0,0 +1,9 @@
+{
+ "fdate"="2018-12-15";
+ "fdatetime"="2018-12-15T01:02:03Z";
+ "ftimestamp"="2018-12-15T01:02:03.456789Z";
+ "ftzdate"="2018-12-15,Europe/Moscow";
+ "ftzdatetime"="2018-12-15T01:02:03,Europe/Moscow";
+ "ftztimestamp"="2018-12-15T01:02:03.456789,Europe/Moscow"
+};
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.in.attr
new file mode 100644
index 00000000000..876e4f8a19d
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.in.attr
@@ -0,0 +1,52 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "fdate";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftzdate";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftzdatetime";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftztimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.sql b/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.sql
new file mode 100644
index 00000000000..df19228e828
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.sql
@@ -0,0 +1,20 @@
+/* syntax version 1 */
+$format = DateTime::Format("%Y%m%d %H%M%S %Z");
+
+select
+ $format(`date`),
+ $format(`datetime`),
+ $format(`timestamp`),
+ $format(`tzdate`),
+ $format(`tzdatetime`),
+ $format(`tztimestamp`)
+from (
+ select
+ cast(fdate as Date) as `date`,
+ cast(fdatetime as Datetime) as `datetime`,
+ cast(ftimestamp as Timestamp) as `timestamp`,
+ cast(ftzdate as TzDate) as `tzdate`,
+ cast(ftzdatetime as TzDatetime) as `tzdatetime`,
+ cast(ftztimestamp as TzTimestamp) as `tztimestamp`
+ from Input
+);
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.in b/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.in
new file mode 100644
index 00000000000..c6d52b77b32
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.in
@@ -0,0 +1,24 @@
+{
+ "finterval1"="P0Y2M";
+ "finterval2"=null;
+ "finterval3"="P4DT5H";
+ "finterval4"="P6Y7M"
+};
+{
+ "finterval1"="P20000Y4M";
+ "finterval2"="PT6H12M";
+ "finterval3"=null;
+ "finterval4"="P9Y10M"
+};
+{
+ "finterval1"=null;
+ "finterval2"="PT9H18M";
+ "finterval3"="P100000D";
+ "finterval4"="P12Y14M"
+};
+{
+ "finterval1"="P4Y8M";
+ "finterval2"=null;
+ "finterval3"="P13DT14H";
+ "finterval4"=null
+};
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.in.attr
new file mode 100644
index 00000000000..c8fb13b9d6e
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.in.attr
@@ -0,0 +1,49 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "finterval1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "finterval2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "finterval3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "finterval4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.sql b/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.sql
new file mode 100644
index 00000000000..1670be24513
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.sql
@@ -0,0 +1,20 @@
+/* syntax version 1 */
+
+pragma UseBlocks;
+insert into @t
+ select
+ cast(finterval1 as Interval) as `interval1`,
+ cast(finterval2 as Interval) as `interval2`,
+ cast(finterval3 as Interval) as `interval3`,
+ cast(finterval4 as Interval) as `interval4`
+from Input;
+
+commit;
+
+select
+ DateTime::ToDays(`interval1`) as `interval1`,
+ DateTime::ToDays(`interval2`) as `interval2`,
+ DateTime::ToDays(`interval3`) as `interval3`,
+ DateTime::ToDays(`interval4`) as `interval4`
+from @t;
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Parse.in b/yql/essentials/udfs/common/datetime2/test/cases/Parse.in
new file mode 100644
index 00000000000..fa560d488cd
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Parse.in
@@ -0,0 +1,55 @@
+{
+ "fdatetime1"="2011 03 08 010203 Europe/Moscow text";
+ "fdatetime2"="%text% 02/23/2022 12:00:00.";
+ "fdatetime3"="march/08/2011";
+ "fdatetime4"="mar/08/2011";
+
+ "frfc822"="Fri, 4 Mar 2005 19:34:45 EST";
+ "fiso8601"="2009-02-14T02:31:30+0300";
+ "fhttp"="Sunday, 06-Nov-94 08:49:37 GMT";
+ "fx509"="20091014165533Z";
+};
+{
+ "fdatetime1"="2011 03 08 010203.22 Europe/Moscow text";
+ "fdatetime2"="%text% 02/23/2022 12:00:00.666666";
+ "fdatetime3"="November/08/2011";
+ "fdatetime4"="Nov/08/2011";
+
+ "frfc822"="4 Mar 05 19:34 -0330";
+ "fiso8601"="2009-09-19 03:37:03.012331+04:00";
+ "fhttp"="Sun Nov 6 08:49:37 1994";
+ "fx509"="990104074212Z";
+};
+{
+ "fdatetime1"="2011 03 08 010203 Europe/Moscow bar";
+ "fdatetime2"="%text% 02/23/2022 12:00:00.999999999";
+ "fdatetime3"="JanUAry/08/2011";
+ "fdatetime4"="jAN/08/2011";
+
+ "frfc822"="17 Nov 2008 19:34:45";
+ "fiso8601"="1990-03-151Y15:16:17.18";
+ "fhttp"="1990-03-151Y15:16:17.18";
+ "fx509"="500101000000Z";
+};
+{
+ "fdatetime1"="2011 03 08 010203 Europe/Moscow bar";
+ "fdatetime2"="%text% 02/23/2022 12:00:00.42";
+ "fdatetime3"="JanUArY/08/2011";
+ "fdatetime4"="JAN/08/2011";
+
+ "frfc822"="17 Nov 2008 19:34:45";
+ "fiso8601"="1990-03-151Y15:16:17.182";
+ "fhttp"="1990-03-151Y15:16:17.182";
+ "fx509"="500101000000Z";
+};
+{
+ "fdatetime1"="2011 03 08 010203 Europe/Moscow bar";
+ "fdatetime2"="%text% 02/23/2022 12:00:00.82387468293473839939483932923";
+ "fdatetime3"="JanUArY/08/2011";
+ "fdatetime4"="feb/08/2011";
+
+ "frfc822"="17 Nov 2008 19:34:45";
+ "fiso8601"="1990-03-151Y15:16:17.182";
+ "fhttp"="1990-03-151Y15:16:17.182";
+ "fx509"="500101000000Z";
+};
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Parse.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/Parse.in.attr
new file mode 100644
index 00000000000..935646e0df7
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Parse.in.attr
@@ -0,0 +1,66 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "fdatetime1";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime4";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "frfc822";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fiso8601";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fhttp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fx509";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Parse.sql b/yql/essentials/udfs/common/datetime2/test/cases/Parse.sql
new file mode 100644
index 00000000000..b39fc6c2448
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Parse.sql
@@ -0,0 +1,25 @@
+/* syntax version 1 */
+$parse1 = DateTime::Parse("%Y %m %d %H%M%S %Z text");
+$parse2 = DateTime::Parse("%%text%% %m/%d/%Y %H:%M:%S");
+$parse3 = DateTime::Parse("%B/%d/%Y");
+$parse4 = DateTime::Parse("%b/%d/%Y");
+
+
+$format1 = DateTime::Format("%Y-%m-%dT%H:%M:%S,%Z");
+$format2 = DateTime::Format("%Y%m%d %H%M%S %z");
+$format3 = DateTime::Format("%Y%m%d");
+
+select
+ $format1($parse1(fdatetime1)),
+ $format2($parse1(fdatetime1)),
+ $format1($parse2(fdatetime2)),
+ $format2($parse2(fdatetime2)),
+
+ $format3($parse3(fdatetime3)),
+ $format3($parse4(fdatetime4)),
+
+ $format1(DateTime::ParseRfc822(frfc822)),
+ $format1(DateTime::ParseIso8601(fiso8601)),
+ $format1(DateTime::ParseHttp(fhttp)),
+ $format1(DateTime::ParseX509(fx509))
+from Input
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/ParseIso8601.sql b/yql/essentials/udfs/common/datetime2/test/cases/ParseIso8601.sql
new file mode 100644
index 00000000000..c79bdbf5f11
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/ParseIso8601.sql
@@ -0,0 +1 @@
+select DateTime::ParseIso8601("2106-01-01T00:00:00"), DateTime::ParseIso8601("2200-01-01T00:00:00"), DateTime::ParseIso8601("2106-02-01T00:00:00"); \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/ParseLim.sql b/yql/essentials/udfs/common/datetime2/test/cases/ParseLim.sql
new file mode 100644
index 00000000000..2ecd70fcbba
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/ParseLim.sql
@@ -0,0 +1,19 @@
+$dt_parser1 = DateTime::Parse('%Y-%m-%d');
+$dt_parser2 = DateTime::Parse('%Y-%m-%d %H:%M:%S');
+$dt_parser1z = DateTime::Parse('%Y-%m-%d %Z');
+$dt_parser2z = DateTime::Parse('%Y-%m-%d %H:%M:%S %Z');
+
+SELECT
+ $dt_parser1("2105-12-31"), $dt_parser1("2106-01-01"),
+ $dt_parser2("2105-12-31 23:59:59"), $dt_parser2("2106-01-01 00:00:00"),
+ $dt_parser2("2105-12-31 23:59:59.999999"), $dt_parser2("2106-01-01 00:00:00.000000"),
+ $dt_parser1z("2105-12-31 Etc/GMT+11"),
+ $dt_parser1z("2106-01-01 Etc/GMT-1"),
+ $dt_parser2z("2105-12-31 23:00:00 Etc/GMT+1"),
+ $dt_parser2z("2105-12-31 22:59:59.999999 Etc/GMT+1"),
+ $dt_parser1("1970-01-01"), $dt_parser1("1969-12-31"),
+ $dt_parser2("1970-01-01 00:00:00"), $dt_parser2("1969-12-31 23:59:59"),
+ $dt_parser2("1969-12-31 23:59:59.999999"), $dt_parser2("1970-01-01 00:00:00.000000"),
+ $dt_parser2z("1969-12-31 23:00:00 Etc/GMT+1"),
+ $dt_parser2z("1969-12-31 22:59:59.999999 Etc/GMT+1");
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Repr.in b/yql/essentials/udfs/common/datetime2/test/cases/Repr.in
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Repr.in
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Repr.sql b/yql/essentials/udfs/common/datetime2/test/cases/Repr.sql
new file mode 100644
index 00000000000..d92b0ade4e1
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Repr.sql
@@ -0,0 +1,4 @@
+/* syntax version 1 */
+select
+ DateTime::Parse("%Y.%m.%d")("2016.08.15"),
+ DateTime::Split(AddTimezone(DateTime("2017-01-01T10:00:00Z"),"Europe/Moscow"))
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Shift.in b/yql/essentials/udfs/common/datetime2/test/cases/Shift.in
new file mode 100644
index 00000000000..7f81c5d0746
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Shift.in
@@ -0,0 +1,8 @@
+{"ftztimestamp"="2001-11-17T21:20:19.345678,GMT"; };
+{"ftztimestamp"="1970-01-01T11:14:00.000000,GMT"};
+{"ftztimestamp"="2105-12-01T01:08:00.000000,Europe/Moscow"};
+{"ftztimestamp"="2049-06-13T00:00:00.000000,GMT"};
+{"ftztimestamp"="2000-01-31T16:15:00.000000,Europe/Uzhgorod"};
+{"ftztimestamp"="2024-02-29T01:02:03.456789,Europe/Moscow"};
+{"ftztimestamp"="1970-02-01T02:00:00.444123,Europe/Moscow"};
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Shift.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/Shift.in.attr
new file mode 100644
index 00000000000..3915337be3c
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Shift.in.attr
@@ -0,0 +1,17 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "ftztimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Shift.sql b/yql/essentials/udfs/common/datetime2/test/cases/Shift.sql
new file mode 100644
index 00000000000..b421c558683
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Shift.sql
@@ -0,0 +1,22 @@
+SELECT
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftYears(tm, 10)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftYears(tm, 10)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftQuarters(tm, 16)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftQuarters(tm, -16)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, 0)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, 1)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, 3)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, 11)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, 12)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, 123)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, -1)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, -3)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, -11)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, -12)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, -123)) as String)
+from (
+ select
+ cast(ftztimestamp as TzTimestamp) as tm
+ from Input
+);
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.in b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.in
new file mode 100644
index 00000000000..580acf3f863
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.in
@@ -0,0 +1,49 @@
+{
+ "fdate"="1945-05-09";
+ "fdatetime"="1945-05-09T00:00:00Z";
+ "ftimestamp"="1945-05-09T00:00:00.000000Z";
+ "ftzdate"="1945-05-09,Europe/Moscow";
+ "ftzdatetime"="1945-05-09T00:00:00,Europe/Moscow";
+ "ftztimestamp"="1945-05-09T00:00:00.000000,Europe/Moscow"
+};
+{
+ "fdate"="1970-01-01";
+ "fdatetime"="1970-01-01T00:30:00Z";
+ "ftimestamp"="1970-01-01T00:30:00.000000Z";
+ "ftzdate"="1970-01-01,Europe/Moscow";
+ "ftzdatetime"="1970-01-01T01:00:00,Europe/Moscow";
+ "ftztimestamp"="1970-01-01T05:00:00.000000,Europe/Moscow"
+};
+{
+ "fdate"="2018-12-15";
+ "fdatetime"="2018-12-15T01:02:03Z";
+ "ftimestamp"="2018-12-15T01:02:03.456789Z";
+ "ftzdate"="2018-12-15,Europe/Moscow";
+ "ftzdatetime"="2018-12-15T01:02:03,Europe/Moscow";
+ "ftztimestamp"="2018-12-15T01:02:03.456789,Europe/Moscow"
+};
+{
+ "fdate"="2105-12-31";
+ "fdatetime"="2105-12-31T23:59:59Z";
+ "ftimestamp"="2105-12-31T23:59:59.999999Z";
+ "ftzdate"="2105-12-31,Canada/Central";
+ "ftzdatetime"="2105-12-31T16:00:00,Canada/Central";
+ "ftztimestamp"="2105-12-31T23:00:00.000000,Canada/Central"
+};
+{
+ "fdate"="2106-01-01";
+ "fdatetime"="2106-01-01T00:00:00Z";
+ "ftimestamp"="2106-01-01T00:00:00.000000Z";
+ "ftzdate"="2106-01-01,Europe/Moscow";
+ "ftzdatetime"="2106-01-01T01:00:00,Europe/Moscow";
+ "ftztimestamp"="2106-01-01T05:00:00.000000,Europe/Moscow"
+};
+{
+ "fdate"="2117-11-07";
+ "fdatetime"="2117-11-07T00:00:00Z";
+ "ftimestamp"="2117-11-07T00:00:00.000000Z";
+ "ftzdate"="2117-11-07,Europe/Moscow";
+ "ftzdatetime"="2117-11-07T00:00:00,Europe/Moscow";
+ "ftztimestamp"="2117-11-07T00:00:00.000000,Europe/Moscow"
+};
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.in.attr
new file mode 100644
index 00000000000..876e4f8a19d
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.in.attr
@@ -0,0 +1,52 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "fdate";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftzdate";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftzdatetime";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftztimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.sql b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.sql
new file mode 100644
index 00000000000..9a8c08a8db1
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.sql
@@ -0,0 +1,18 @@
+/* syntax version 1 */
+select
+ DateTime::MakeDate(DateTime::Split(`date`)) as rdate,
+ DateTime::MakeDatetime(DateTime::Split(`datetime`)) as rdatetime,
+ DateTime::MakeTimestamp(DateTime::Split(`timestamp`)) as rtimestamp,
+ DateTime::MakeTzDate(DateTime::Split(`tzdate`)) as rtzdate,
+ DateTime::MakeTzDatetime(DateTime::Split(`tzdatetime`)) as rtzdatetime,
+ DateTime::MakeTzTimestamp(DateTime::Split(`tztimestamp`)) as rtztimestamp
+from (
+ select
+ cast(fdate as Date) as `date`,
+ cast(fdatetime as Datetime) as `datetime`,
+ cast(ftimestamp as Timestamp) as `timestamp`,
+ cast(ftzdate as TzDate) as `tzdate`,
+ cast(ftzdatetime as TzDatetime) as `tzdatetime`,
+ cast(ftztimestamp as TzTimestamp) as `tztimestamp`
+ from Input
+);
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.cfg b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.cfg
new file mode 100644
index 00000000000..b5a6eac7ad4
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.cfg
@@ -0,0 +1 @@
+in plato.Input SplitMake1969.in
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.in b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.in
new file mode 100644
index 00000000000..70a53282f38
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.in
@@ -0,0 +1,8 @@
+{
+ "fdate"="1969-12-31";
+ "fdatetime"="1969-12-31T23:00:00Z";
+ "ftimestamp"="1969-12-31T23:00:00.000000Z";
+ "ftzdate"="1969-12-31,Canada/Central";
+ "ftzdatetime"="1969-12-31T16:00:00,Canada/Central";
+ "ftztimestamp"="1969-12-31T23:00:00.000000,Canada/Central"
+}; \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.in.attr
new file mode 100644
index 00000000000..876e4f8a19d
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.in.attr
@@ -0,0 +1,52 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "fdate";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftzdate";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftzdatetime";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftztimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.sql b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.sql
new file mode 100644
index 00000000000..9a8c08a8db1
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.sql
@@ -0,0 +1,18 @@
+/* syntax version 1 */
+select
+ DateTime::MakeDate(DateTime::Split(`date`)) as rdate,
+ DateTime::MakeDatetime(DateTime::Split(`datetime`)) as rdatetime,
+ DateTime::MakeTimestamp(DateTime::Split(`timestamp`)) as rtimestamp,
+ DateTime::MakeTzDate(DateTime::Split(`tzdate`)) as rtzdate,
+ DateTime::MakeTzDatetime(DateTime::Split(`tzdatetime`)) as rtzdatetime,
+ DateTime::MakeTzTimestamp(DateTime::Split(`tztimestamp`)) as rtztimestamp
+from (
+ select
+ cast(fdate as Date) as `date`,
+ cast(fdatetime as Datetime) as `datetime`,
+ cast(ftimestamp as Timestamp) as `timestamp`,
+ cast(ftzdate as TzDate) as `tzdate`,
+ cast(ftzdatetime as TzDatetime) as `tzdatetime`,
+ cast(ftztimestamp as TzTimestamp) as `tztimestamp`
+ from Input
+);
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/StartOf.in b/yql/essentials/udfs/common/datetime2/test/cases/StartOf.in
new file mode 100644
index 00000000000..f482585e720
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/StartOf.in
@@ -0,0 +1,15 @@
+{
+ "ftztimestamp"="1970-01-01T05:00:00.000000,Europe/Moscow"
+};
+{
+ "ftztimestamp"="2018-12-15T01:02:03.456789,Europe/Moscow"
+};
+{
+ "ftztimestamp"="2105-12-31T16:23:45.000000,GMT"
+};
+{
+ "ftztimestamp"="2106-01-01T01:00:00.000000,Europe/Moscow"
+};
+{
+ "ftztimestamp"="2019-07-24T12:00:00,Europe/Moscow"
+};
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/StartOf.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/StartOf.in.attr
new file mode 100644
index 00000000000..2cc4f8c0d68
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/StartOf.in.attr
@@ -0,0 +1,17 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "ftztimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/StartOf.sql b/yql/essentials/udfs/common/datetime2/test/cases/StartOf.sql
new file mode 100644
index 00000000000..201db382300
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/StartOf.sql
@@ -0,0 +1,21 @@
+/* syntax version 1 */
+$format = DateTime::Format("%Y-%m-%d %H:%M:%S %Z");
+
+select
+ $format(DateTime::StartOfYear(`tztimestamp`)),
+ $format(DateTime::StartOfQuarter(`tztimestamp`)),
+ $format(DateTime::StartOfMonth(`tztimestamp`)),
+ $format(DateTime::StartOfWeek(`tztimestamp`)),
+ $format(DateTime::StartOfDay(`tztimestamp`)),
+ $format(DateTime::StartOf(`tztimestamp`, Interval("PT13H"))),
+ $format(DateTime::StartOf(`tztimestamp`, Interval("PT4H"))),
+ $format(DateTime::StartOf(`tztimestamp`, Interval("PT15M"))),
+ $format(DateTime::StartOf(`tztimestamp`, Interval("PT20S"))),
+ $format(DateTime::StartOf(`tztimestamp`, Interval("PT7S"))),
+ DateTime::TimeOfDay(`tztimestamp`),
+ $format(DateTime::EndOfMonth(`tztimestamp`)),
+from (
+ select
+ cast(ftztimestamp as TzTimestamp) as `tztimestamp`
+ from Input
+);
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.cfg b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.cfg
new file mode 100644
index 00000000000..d012f94fa22
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.cfg
@@ -0,0 +1 @@
+in plato.Input StartOf1969.in
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.in b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.in
new file mode 100644
index 00000000000..1711aa38134
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.in
@@ -0,0 +1,3 @@
+{
+ "ftztimestamp"="1969-12-31T23:00:00.000000,Canada/Central"
+}; \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.in.attr
new file mode 100644
index 00000000000..2cc4f8c0d68
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.in.attr
@@ -0,0 +1,17 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "ftztimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.sql b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.sql
new file mode 100644
index 00000000000..81fad126328
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.sql
@@ -0,0 +1,20 @@
+/* syntax version 1 */
+$format = DateTime::Format("%Y-%m-%d %H:%M:%S %Z");
+
+select
+ $format(DateTime::StartOfYear(`tztimestamp`)),
+ $format(DateTime::StartOfQuarter(`tztimestamp`)),
+ $format(DateTime::StartOfMonth(`tztimestamp`)),
+ $format(DateTime::StartOfWeek(`tztimestamp`)),
+ $format(DateTime::StartOfDay(`tztimestamp`)),
+ $format(DateTime::StartOf(`tztimestamp`, Interval("PT13H"))),
+ $format(DateTime::StartOf(`tztimestamp`, Interval("PT4H"))),
+ $format(DateTime::StartOf(`tztimestamp`, Interval("PT15M"))),
+ $format(DateTime::StartOf(`tztimestamp`, Interval("PT20S"))),
+ $format(DateTime::StartOf(`tztimestamp`, Interval("PT7S"))),
+ DateTime::TimeOfDay(`tztimestamp`)
+from (
+ select
+ cast(ftztimestamp as TzTimestamp) as `tztimestamp`
+ from Input
+);
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/To.in b/yql/essentials/udfs/common/datetime2/test/cases/To.in
new file mode 100644
index 00000000000..03be7f7e67e
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/To.in
@@ -0,0 +1,11 @@
+{
+ "fdate"="2018-11-22";
+ "fdatetime"="2018-11-22T21:20:19Z";
+ "ftimestamp"="2018-11-22T21:20:19.345678Z";
+ "finterval"="P2DT10H";
+ "ftzdate"="2018-11-22,Europe/Moscow";
+ "ftzdatetime"="2018-11-22T21:20:19,Europe/Moscow";
+ "ftztimestamp"="2018-11-22T21:20:19.345678,Europe/Moscow";
+ "finterval_1day"="P1D";
+};
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/To.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/To.in.attr
new file mode 100644
index 00000000000..cb97c1895c5
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/To.in.attr
@@ -0,0 +1,66 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "fdate";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "fdatetime";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "finterval";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftzdate";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftzdatetime";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ftztimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "finterval_1day";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/To.sql b/yql/essentials/udfs/common/datetime2/test/cases/To.sql
new file mode 100644
index 00000000000..53a1289b60c
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/To.sql
@@ -0,0 +1,44 @@
+/* syntax version 1 */
+select
+ DateTime::ToDays(`interval`) as interval_to_days,
+ DateTime::ToHours(`interval`) as interval_to_hours,
+ DateTime::ToMinutes(`interval`) as interval_to_minutes,
+ DateTime::ToSeconds(`interval`) as interval_to_seconds,
+ DateTime::ToMilliseconds(`interval`) as interval_to_msec,
+ DateTime::ToMicroseconds(`interval`) as interval_to_usec,
+
+ DateTime::ToSeconds(`date`) as date_to_seconds,
+ DateTime::ToSeconds(`datetime`) as datetime_to_seconds,
+ DateTime::ToSeconds(`timestamp`) as timestamp_to_seconds,
+ DateTime::ToSeconds(`tzdate`) as tzdate_to_seconds,
+ DateTime::ToSeconds(`tzdatetime`) as tzdatetime_to_seconds,
+ DateTime::ToSeconds(`tztimestamp`) as tztimestamp_to_seconds,
+
+ DateTime::ToMilliseconds(`date`) as date_to_msec,
+ DateTime::ToMilliseconds(`datetime`) as datetime_to_msec,
+ DateTime::ToMilliseconds(`timestamp`) as timestamp_to_msec,
+ DateTime::ToMilliseconds(`tzdate`) as tzdate_to_msec,
+ DateTime::ToMilliseconds(`tzdatetime`) as tzdatetime_to_msec,
+ DateTime::ToMilliseconds(`tztimestamp`) as tztimestamp_to_msec,
+
+ DateTime::ToMicroseconds(`date`) as date_to_usec,
+ DateTime::ToMicroseconds(`datetime`) as datetime_to_usec,
+ DateTime::ToMicroseconds(`timestamp`) as timestamp_to_usec,
+ DateTime::ToMicroseconds(`tzdate`) as tzdate_to_usec,
+ DateTime::ToMicroseconds(`tzdatetime`) as tzdatetime_to_usec,
+ DateTime::ToMicroseconds(`tztimestamp`) as tztimestamp_to_usec,
+
+ /* Overflow test */
+ DateTime::ToDays(`negative_1d`) as negative_1d,
+from (
+ select
+ cast(fdate as Date) as `date`,
+ cast(fdatetime as Datetime) as `datetime`,
+ cast(ftimestamp as Timestamp) as `timestamp`,
+ cast(finterval as Interval) as `interval`,
+ cast(ftzdate as TzDate) as `tzdate`,
+ cast(ftzdatetime as TzDatetime) as `tzdatetime`,
+ cast(ftztimestamp as TzTimestamp) as `tztimestamp`,
+ -cast(finterval_1day as Interval) as `negative_1d`,
+ from Input
+);
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/TzToDate.sql b/yql/essentials/udfs/common/datetime2/test/cases/TzToDate.sql
new file mode 100644
index 00000000000..383e2d831e9
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/TzToDate.sql
@@ -0,0 +1,7 @@
+/* syntax version 1 */
+select
+cast(DateTime::MakeDate(TzDatetime("2000-01-01T12:00:00,Europe/Moscow") ) as String),
+cast(DateTime::MakeTzDate(TzDatetime("2000-01-01T12:00:00,Europe/Moscow") ) as String),
+
+cast(DateTime::MakeDate(TzDatetime("2000-01-01T00:00:00,Europe/Moscow") ) as String),
+cast(DateTime::MakeTzDate(TzDatetime("2000-01-01T00:00:00,Europe/Moscow") ) as String);
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Update.in b/yql/essentials/udfs/common/datetime2/test/cases/Update.in
new file mode 100644
index 00000000000..07ac5350517
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Update.in
@@ -0,0 +1 @@
+{ "ftztimestamp"="2001-12-01T21:20:19.345678,GMT"; };
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Update.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/Update.in.attr
new file mode 100644
index 00000000000..3915337be3c
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Update.in.attr
@@ -0,0 +1,17 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "ftztimestamp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Update.sql b/yql/essentials/udfs/common/datetime2/test/cases/Update.sql
new file mode 100644
index 00000000000..59221221b37
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/Update.sql
@@ -0,0 +1,25 @@
+/* syntax version 1 */
+SELECT
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 2005)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 2200 as Year)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, NULL, 7)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 13 as Month)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, NULL, NULL, 20)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 32 as Day)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 2018, 2, 30)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, NULL, NULL, NULL, 11, 10, 9)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 11 as Hour)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 24 as Hour)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 10 as Minute)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 60 as Minute)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 9 as Second)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 60 as Second)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 123456 as Microsecond)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 2000000 as Microsecond)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 100 as TimezoneId)) as String),
+ cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 1000 as TimezoneId)) as String)
+from (
+ select
+ cast(ftztimestamp as TzTimestamp) as tm
+ from Input
+);
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/UpdateTz.sql b/yql/essentials/udfs/common/datetime2/test/cases/UpdateTz.sql
new file mode 100644
index 00000000000..b756270ef7d
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/UpdateTz.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+pragma warning("disable","4510");
+select cast(DateTime::MakeDatetime(
+ DateTime::Update(Datetime("2000-01-01T00:00:00Z"), Yql::TimezoneId("Europe/Moscow") as TimezoneId)
+) as string);
+
+select cast(DateTime::MakeDatetime(
+ DateTime::Update(Datetime("2000-01-01T00:00:00Z"), "Europe/Moscow" as Timezone)
+) as string);
diff --git a/yql/essentials/udfs/common/datetime2/test/cases/yql-14977.sql b/yql/essentials/udfs/common/datetime2/test/cases/yql-14977.sql
new file mode 100644
index 00000000000..92d2660425b
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/cases/yql-14977.sql
@@ -0,0 +1,6 @@
+/* syntax version 1 */
+$parse = DateTime::Parse("%B/%d/%Y");
+$format = DateTime::Format("%b/%d/%Y");
+
+select $format($parse("mAy/15/2022"));
+
diff --git a/yql/essentials/udfs/common/datetime2/test/ya.make b/yql/essentials/udfs/common/datetime2/test/ya.make
new file mode 100644
index 00000000000..78f345b118c
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/datetime2)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json
new file mode 100644
index 00000000000..f83bc10798d
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json
@@ -0,0 +1,7 @@
+{
+ "test.test[SplitMake]": [
+ {
+ "uri": "file://test.test_SplitMake_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_SplitMake_/results.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_SplitMake_/results.txt
new file mode 100644
index 00000000000..1adffe191a5
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_SplitMake_/results.txt
@@ -0,0 +1,486 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "dd";
+ [
+ "DataType";
+ "Date32"
+ ]
+ ];
+ [
+ "sdd";
+ [
+ "DataType";
+ "Date32"
+ ]
+ ];
+ [
+ "ddt";
+ [
+ "DataType";
+ "Date32"
+ ]
+ ];
+ [
+ "sddt";
+ [
+ "DataType";
+ "Date32"
+ ]
+ ];
+ [
+ "dts";
+ [
+ "DataType";
+ "Date32"
+ ]
+ ];
+ [
+ "sdts";
+ [
+ "DataType";
+ "Date32"
+ ]
+ ];
+ [
+ "dtd";
+ [
+ "DataType";
+ "Datetime64"
+ ]
+ ];
+ [
+ "sdtd";
+ [
+ "DataType";
+ "Datetime64"
+ ]
+ ];
+ [
+ "dtdt";
+ [
+ "DataType";
+ "Datetime64"
+ ]
+ ];
+ [
+ "sdtdt";
+ [
+ "DataType";
+ "Datetime64"
+ ]
+ ];
+ [
+ "dtts";
+ [
+ "DataType";
+ "Datetime64"
+ ]
+ ];
+ [
+ "sdtts";
+ [
+ "DataType";
+ "Datetime64"
+ ]
+ ];
+ [
+ "tsd";
+ [
+ "DataType";
+ "Timestamp64"
+ ]
+ ];
+ [
+ "stsd";
+ [
+ "DataType";
+ "Timestamp64"
+ ]
+ ];
+ [
+ "tsdt";
+ [
+ "DataType";
+ "Timestamp64"
+ ]
+ ];
+ [
+ "stsdt";
+ [
+ "DataType";
+ "Timestamp64"
+ ]
+ ];
+ [
+ "tsts";
+ [
+ "DataType";
+ "Timestamp64"
+ ]
+ ];
+ [
+ "ststs";
+ [
+ "DataType";
+ "Timestamp64"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "-53375809";
+ "-53375809";
+ "-53375809";
+ "-53375809";
+ "-53375809";
+ "-53375809";
+ "-4611669897600";
+ "-4611669897600";
+ "-4611669897600";
+ "-4611669897600";
+ "-4611669897600";
+ "-4611669897600";
+ "-4611669897600000000";
+ "-4611669897600000000";
+ "-4611669897600000000";
+ "-4611669897600000000";
+ "-4611669897600000000";
+ "-4611669897600000000"
+ ];
+ [
+ "-719163";
+ "-719163";
+ "-719163";
+ "-719163";
+ "-719163";
+ "-719163";
+ "-62135683200";
+ "-62135683200";
+ "-62135596801";
+ "-62135596801";
+ "-62135596801";
+ "-62135596801";
+ "-62135683200000000";
+ "-62135683200000000";
+ "-62135596801000000";
+ "-62135596801000000";
+ "-62135596800000001";
+ "-62135596800000001"
+ ];
+ [
+ "-719162";
+ "-719162";
+ "-719162";
+ "-719162";
+ "-719162";
+ "-719162";
+ "-62135596800";
+ "-62135596800";
+ "-62135596800";
+ "-62135596800";
+ "-62135596800";
+ "-62135596800";
+ "-62135596800000000";
+ "-62135596800000000";
+ "-62135596800000000";
+ "-62135596800000000";
+ "-62135596800000000";
+ "-62135596800000000"
+ ];
+ [
+ "-1";
+ "-1";
+ "-1";
+ "-1";
+ "-1";
+ "-1";
+ "-86400";
+ "-86400";
+ "-1";
+ "-1";
+ "-1";
+ "-1";
+ "-86400000000";
+ "-86400000000";
+ "-1000000";
+ "-1000000";
+ "-1";
+ "-1"
+ ];
+ [
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0"
+ ];
+ [
+ "1";
+ "1";
+ "0";
+ "0";
+ "0";
+ "0";
+ "86400";
+ "86400";
+ "1";
+ "1";
+ "0";
+ "0";
+ "86400000000";
+ "86400000000";
+ "1000000";
+ "1000000";
+ "1";
+ "1"
+ ];
+ [
+ "53375807";
+ "53375807";
+ "53375807";
+ "53375807";
+ "53375807";
+ "53375807";
+ "4611669724800";
+ "4611669724800";
+ "4611669811199";
+ "4611669811199";
+ "4611669811199";
+ "4611669811199";
+ "4611669724800000000";
+ "4611669724800000000";
+ "4611669811199000000";
+ "4611669811199000000";
+ "4611669811199999999";
+ "4611669811199999999"
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "dd";
+ [
+ "DataType";
+ "Date32"
+ ]
+ ];
+ [
+ "sdd";
+ [
+ "DataType";
+ "Date32"
+ ]
+ ];
+ [
+ "ddt";
+ [
+ "DataType";
+ "Date32"
+ ]
+ ];
+ [
+ "sddt";
+ [
+ "DataType";
+ "Date32"
+ ]
+ ];
+ [
+ "dts";
+ [
+ "DataType";
+ "Date32"
+ ]
+ ];
+ [
+ "sdts";
+ [
+ "DataType";
+ "Date32"
+ ]
+ ];
+ [
+ "dtd";
+ [
+ "DataType";
+ "Datetime64"
+ ]
+ ];
+ [
+ "sdtd";
+ [
+ "DataType";
+ "Datetime64"
+ ]
+ ];
+ [
+ "dtdt";
+ [
+ "DataType";
+ "Datetime64"
+ ]
+ ];
+ [
+ "sdtdt";
+ [
+ "DataType";
+ "Datetime64"
+ ]
+ ];
+ [
+ "dtts";
+ [
+ "DataType";
+ "Datetime64"
+ ]
+ ];
+ [
+ "sdtts";
+ [
+ "DataType";
+ "Datetime64"
+ ]
+ ];
+ [
+ "tsd";
+ [
+ "DataType";
+ "Timestamp64"
+ ]
+ ];
+ [
+ "stsd";
+ [
+ "DataType";
+ "Timestamp64"
+ ]
+ ];
+ [
+ "tsdt";
+ [
+ "DataType";
+ "Timestamp64"
+ ]
+ ];
+ [
+ "stsdt";
+ [
+ "DataType";
+ "Timestamp64"
+ ]
+ ];
+ [
+ "tsts";
+ [
+ "DataType";
+ "Timestamp64"
+ ]
+ ];
+ [
+ "ststs";
+ [
+ "DataType";
+ "Timestamp64"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0";
+ "0"
+ ];
+ [
+ "1";
+ "1";
+ "0";
+ "0";
+ "0";
+ "0";
+ "86400";
+ "86400";
+ "1";
+ "1";
+ "0";
+ "0";
+ "86400000000";
+ "86400000000";
+ "1000000";
+ "1000000";
+ "1";
+ "1"
+ ];
+ [
+ "49672";
+ "49672";
+ "49672";
+ "49672";
+ "49672";
+ "49672";
+ "4291660800";
+ "4291660800";
+ "4291747199";
+ "4291747199";
+ "4291747199";
+ "4291747199";
+ "4291660800000000";
+ "4291660800000000";
+ "4291747199000000";
+ "4291747199000000";
+ "4291747199999999";
+ "4291747199999999"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/SplitMake.sql b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/SplitMake.sql
new file mode 100644
index 00000000000..4ae3a8962cf
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/SplitMake.sql
@@ -0,0 +1,27 @@
+/* syntax version 1 */
+
+select
+ DateTime::MakeDate32(d32) as dd, DateTime::MakeDate32(DateTime::Split(d32)) as sdd,
+ DateTime::MakeDate32(dt64) as ddt, DateTime::MakeDate32(DateTime::Split(dt64)) as sddt,
+ DateTime::MakeDate32(ts64) as dts, DateTime::MakeDate32(DateTime::Split(ts64)) as sdts,
+ DateTime::MakeDatetime64(d32) as dtd, DateTime::MakeDatetime64(DateTime::Split(d32)) as sdtd,
+ DateTime::MakeDatetime64(dt64) as dtdt, DateTime::MakeDatetime64(DateTime::Split(dt64)) as sdtdt,
+ DateTime::MakeDatetime64(ts64) as dtts, DateTime::MakeDatetime64(DateTime::Split(ts64)) as sdtts,
+ DateTime::MakeTimestamp64(d32) as tsd, DateTime::MakeTimestamp64(DateTime::Split(d32)) as stsd,
+ DateTime::MakeTimestamp64(dt64) as tsdt, DateTime::MakeTimestamp64(DateTime::Split(dt64)) as stsdt,
+ DateTime::MakeTimestamp64(ts64) as tsts, DateTime::MakeTimestamp64(DateTime::Split(ts64)) as ststs
+from Input
+order by d32;
+
+select
+ DateTime::MakeDate32(d) as dd, DateTime::MakeDate32(DateTime::Split(d)) as sdd,
+ DateTime::MakeDate32(dt) as ddt, DateTime::MakeDate32(DateTime::Split(dt)) as sddt,
+ DateTime::MakeDate32(ts) as dts, DateTime::MakeDate32(DateTime::Split(ts)) as sdts,
+ DateTime::MakeDatetime64(d) as dtd, DateTime::MakeDatetime64(DateTime::Split(d)) as sdtd,
+ DateTime::MakeDatetime64(dt) as dtdt, DateTime::MakeDatetime64(DateTime::Split(dt)) as sdtdt,
+ DateTime::MakeDatetime64(ts) as dtts, DateTime::MakeDatetime64(DateTime::Split(ts)) as sdtts,
+ DateTime::MakeTimestamp64(d) as tsd, DateTime::MakeTimestamp64(DateTime::Split(d)) as stsd,
+ DateTime::MakeTimestamp64(dt) as tsdt, DateTime::MakeTimestamp64(DateTime::Split(dt)) as stsdt,
+ DateTime::MakeTimestamp64(ts) as tsts, DateTime::MakeTimestamp64(DateTime::Split(ts)) as ststs
+from InputNarrow
+order by d;
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/default.cfg b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/default.cfg
new file mode 100644
index 00000000000..864fb2ddcb4
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/default.cfg
@@ -0,0 +1,4 @@
+in plato.Input input.txt
+in plato.InputTz input_tz.txt
+in plato.InputNarrow input_narrow.txt
+in plato.Tz tz.txt
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input.txt
new file mode 100644
index 00000000000..f24562bbc6d
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input.txt
@@ -0,0 +1,35 @@
+{
+ "d32"=-53375809;
+ "dt64"=-4611669897600;
+ "ts64"=-4611669897600000000;
+};
+{
+ "d32"=-719163;
+ "dt64"=-62135596801;
+ "ts64"=-62135596800000001;
+};
+{
+ "d32"=-719162;
+ "dt64"=-62135596800;
+ "ts64"=-62135596800000000;
+};
+{
+ "d32"=-1;
+ "dt64"=-1;
+ "ts64"=-1;
+};
+{
+ "d32"=0;
+ "dt64"=0;
+ "ts64"=0;
+};
+{
+ "d32"=1;
+ "dt64"=1;
+ "ts64"=1;
+};
+{
+ "d32"=53375807;
+ "dt64"=4611669811199;
+ "ts64"=4611669811199999999;
+};
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input.txt.attr b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input.txt.attr
new file mode 100644
index 00000000000..773be61fbea
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input.txt.attr
@@ -0,0 +1,31 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "d32";
+ [
+ "DataType";
+ "Date32"
+ ]
+ ];
+ [
+ "dt64";
+ [
+ "DataType";
+ "Datetime64"
+ ]
+ ];
+ [
+ "ts64";
+ [
+ "DataType";
+ "Timestamp64"
+ ]
+ ]
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_narrow.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_narrow.txt
new file mode 100644
index 00000000000..d14b15511ad
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_narrow.txt
@@ -0,0 +1,15 @@
+{
+ "d"=0u;
+ "dt"=0u;
+ "ts"=0u;
+};
+{
+ "d"=1u;
+ "dt"=1u;
+ "ts"=1u;
+};
+{
+ "d"=49672u;
+ "dt"=4291747199u;
+ "ts"=4291747199999999u;
+};
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_narrow.txt.attr b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_narrow.txt.attr
new file mode 100644
index 00000000000..ba95961d80b
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_narrow.txt.attr
@@ -0,0 +1,31 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "d";
+ [
+ "DataType";
+ "Date"
+ ]
+ ];
+ [
+ "dt";
+ [
+ "DataType";
+ "Datetime"
+ ]
+ ];
+ [
+ "ts";
+ [
+ "DataType";
+ "Timestamp"
+ ]
+ ]
+ ]
+ ]
+ }
+}
+
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_tz.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_tz.txt
new file mode 100644
index 00000000000..e113d3564aa
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_tz.txt
@@ -0,0 +1,48 @@
+{
+ "rn"=-144169;
+ "d32"="-144169-1-1";
+ "dt64"="-144169-1-1T0:0:0";
+ "ts64"="-144169-1-1T0:0:0";
+};
+{
+ "rn"=-1;
+ "d32"="-1-1-1";
+ "dt64"="-1-1-1T23:59:59";
+ "ts64"="-1-1-1T23:59:59.999999";
+};
+{
+ "rn"=1;
+ "d32"="1-1-1";
+ "dt64"="1-1-1T0:0:0";
+ "ts64"="1-1-1T0:0:0";
+};
+{
+ "rn"=1969;
+ "d32"="1969-12-31";
+ "dt64"="1969-12-31T23:59:59";
+ "ts64"="1969-12-31T23:59:59.999999";
+};
+{
+ "rn"=1970;
+ "d32"="1970-1-1";
+ "dt64"="1970-1-1T0:0:0";
+ "ts64"="1970-1-1T0:0:0";
+};
+{
+ "rn"=2024;
+ "d32"="2024-7-1";
+ "dt64"="2024-7-1T0:0:0";
+ "ts64"="2024-7-1T0:0:0";
+};
+{
+ "rn"=2106;
+ "d32"="2106-1-1";
+ "dt64"="2106-1-1T0:0:0";
+ "ts64"="2106-1-1T0:0:0";
+};
+{
+ "rn"=148107;
+ "d32"="148107-12-31";
+ "dt64"="148107-12-31T23:59:59";
+ "ts64"="148107-12-31T23:59:59.999999";
+};
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_tz.txt.attr b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_tz.txt.attr
new file mode 100644
index 00000000000..59fc7869772
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_tz.txt.attr
@@ -0,0 +1,37 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "rn";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "d32";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "dt64";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ts64";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/tz.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/tz.txt
new file mode 100644
index 00000000000..77d1cf3174f
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/tz.txt
@@ -0,0 +1,15 @@
+{
+ "tz"="GMT";
+};
+{
+ "tz"="UTC";
+};
+{
+ "tz"="Europe/London";
+};
+{
+ "tz"="Europe/Moscow";
+};
+{
+ "tz"="Atlantic/Azores";
+};
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/tz.txt.attr b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/tz.txt.attr
new file mode 100644
index 00000000000..847643ec33f
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/tz.txt.attr
@@ -0,0 +1,16 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "tz";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/ya.make b/yql/essentials/udfs/common/datetime2/test_bigdates/ya.make
new file mode 100644
index 00000000000..78f345b118c
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/test_bigdates/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/datetime2)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/datetime2/ya.make b/yql/essentials/udfs/common/datetime2/ya.make
new file mode 100644
index 00000000000..cc8b450369f
--- /dev/null
+++ b/yql/essentials/udfs/common/datetime2/ya.make
@@ -0,0 +1,30 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+
+ FROM_SANDBOX(FILE 7319895543 OUT_NOAUTO libdatetime2_udf.so)
+
+ END()
+ELSE()
+YQL_UDF_CONTRIB(datetime2_udf)
+ YQL_ABI_VERSION(
+ 2
+ 40
+ 0
+ )
+ SRCS(
+ datetime_udf.cpp
+ )
+ PEERDIR(
+ util/draft
+ yql/essentials/public/udf/arrow
+ yql/essentials/minikql
+ yql/essentials/minikql/datetime
+ yql/essentials/public/udf/tz
+ )
+ END()
+ENDIF()
+
+RECURSE_FOR_TESTS(
+ test
+ test_bigdates
+)
diff --git a/yql/essentials/udfs/common/digest/digest_udf.cpp b/yql/essentials/udfs/common/digest/digest_udf.cpp
new file mode 100644
index 00000000000..491fe7a66ca
--- /dev/null
+++ b/yql/essentials/udfs/common/digest/digest_udf.cpp
@@ -0,0 +1,410 @@
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+
+#include <util/digest/murmur.h>
+#include <util/digest/city.h>
+#include <util/digest/numeric.h>
+#include <util/digest/fnv.h>
+
+#include <library/cpp/digest/argonish/argon2.h>
+#include <library/cpp/digest/argonish/blake2b.h>
+#include <library/cpp/digest/crc32c/crc32c.h>
+#include <library/cpp/digest/md5/md5.h>
+#include <library/cpp/digest/murmur/murmur.h>
+#include <library/cpp/digest/old_crc/crc.h>
+#include <library/cpp/digest/sfh/sfh.h>
+
+#include <contrib/libs/highwayhash/highwayhash/c_bindings.h>
+#include <contrib/libs/highwayhash/highwayhash/sip_hash.h>
+
+#include <contrib/libs/farmhash/farmhash.h>
+#include <contrib/libs/xxhash/xxhash.h>
+
+#include <openssl/sha.h>
+
+using namespace NKikimr;
+using namespace NUdf;
+
+namespace {
+ SIMPLE_STRICT_UDF(TCrc32c, ui32(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ ui32 hash = Crc32c(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
+ }
+
+ SIMPLE_STRICT_UDF(TCrc64, ui64(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ ui64 hash = crc64(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
+ }
+
+ SIMPLE_STRICT_UDF(TFnv32, ui32(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ ui32 hash = FnvHash<ui32>(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
+ }
+
+ SIMPLE_STRICT_UDF(TFnv64, ui64(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ ui64 hash = FnvHash<ui64>(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
+ }
+
+ SIMPLE_STRICT_UDF(TMurMurHash, ui64(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ ui64 hash = MurmurHash<ui64>(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
+ }
+
+ SIMPLE_STRICT_UDF(TMurMurHash32, ui32(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ ui32 hash = MurmurHash<ui32>(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
+ }
+
+ SIMPLE_STRICT_UDF(TMurMurHash2A, ui64(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ ui64 hash = TMurmurHash2A<ui64>{}.Update(inputRef.Data(), inputRef.Size()).Value();
+ return TUnboxedValuePod(hash);
+ }
+
+ SIMPLE_STRICT_UDF(TMurMurHash2A32, ui32(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ ui32 hash = TMurmurHash2A<ui32>{}.Update(inputRef.Data(), inputRef.Size()).Value();
+ return TUnboxedValuePod(hash);
+ }
+
+ SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TCityHash, ui64(TAutoMap<char*>, TOptional<ui64>), 1) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ ui64 hash;
+ if (args[1]) {
+ hash = CityHash64WithSeed(inputRef.Data(), inputRef.Size(), args[1].Get<ui64>());
+ } else {
+ hash = CityHash64(inputRef.Data(), inputRef.Size());
+ }
+ return TUnboxedValuePod(hash);
+ }
+
+ using TUi64Pair = NUdf::TTuple<ui64, ui64>;
+
+ class TCityHash128: public TBoxedValue {
+ public:
+ static TStringRef Name() {
+ static auto name = TStringRef::Of("CityHash128");
+ return name;
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build();
+ builder.Args(1)->Add<TAutoMap<char*>>();
+ builder.Returns(type);
+ if (!typesOnly) {
+ builder.Implementation(new TCityHash128);
+ }
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ TUnboxedValue* items = nullptr;
+ auto val = valueBuilder->NewArray(2U, items);
+ const auto& inputRef = args[0].AsStringRef();
+ uint128 hash = CityHash128(inputRef.Data(), inputRef.Size());
+ items[0] = TUnboxedValuePod(hash.first);
+ items[1] = TUnboxedValuePod(hash.second);
+ return val;
+ }
+ };
+
+ SIMPLE_STRICT_UDF(TNumericHash, ui64(TAutoMap<ui64>)) {
+ Y_UNUSED(valueBuilder);
+ ui64 input = args[0].Get<ui64>();
+ ui64 hash = (ui64)NumericHash(input);
+ return TUnboxedValuePod(hash);
+ }
+
+ SIMPLE_STRICT_UDF(TMd5Hex, char*(TAutoMap<char*>)) {
+ const auto& inputRef = args[0].AsStringRef();
+ MD5 md5;
+ const TString& hash = md5.Calc(inputRef);
+ return valueBuilder->NewString(hash);
+ }
+
+ SIMPLE_STRICT_UDF(TMd5Raw, char*(TAutoMap<char*>)) {
+ const auto& inputRef = args[0].AsStringRef();
+ MD5 md5;
+ const TString& hash = md5.CalcRaw(inputRef);
+ return valueBuilder->NewString(hash);
+ }
+
+ SIMPLE_STRICT_UDF(TMd5HalfMix, ui64(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ return TUnboxedValuePod(MD5::CalcHalfMix(args[0].AsStringRef()));
+ }
+
+ SIMPLE_STRICT_UDF(TArgon2, char*(TAutoMap<char*>, TAutoMap<char*>)) {
+ const static ui32 outSize = 32;
+ const static NArgonish::TArgon2Factory afactory;
+ const static THolder<NArgonish::IArgon2Base> argon2 = afactory.Create(
+ NArgonish::EArgon2Type::Argon2d, 1, 32, 1);
+
+ const TStringRef inputRef = args[0].AsStringRef();
+ const TStringRef saltRef = args[1].AsStringRef();
+ ui8 out[outSize];
+ argon2->Hash(reinterpret_cast<const ui8*>(inputRef.Data()), inputRef.Size(),
+ reinterpret_cast<const ui8*>(saltRef.Data()), saltRef.Size(),
+ out, outSize);
+ return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize));
+ }
+
+ SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TBlake2B, char*(TAutoMap<char*>, TOptional<char*>), 1) {
+ const static ui32 outSize = 32;
+ const static NArgonish::TBlake2BFactory bfactory;
+ const TStringRef inputRef = args[0].AsStringRef();
+
+ THolder<NArgonish::IBlake2Base> blake2b;
+ if (args[1]) {
+ const TStringRef keyRef = args[1].AsStringRef();
+ if (keyRef.Size() == 0) {
+ blake2b = bfactory.Create(outSize);
+ } else {
+ blake2b = bfactory.Create(outSize, reinterpret_cast<const ui8*>(keyRef.Data()), keyRef.Size());
+ }
+ } else {
+ blake2b = bfactory.Create(outSize);
+ }
+
+ ui8 out[outSize];
+ blake2b->Update(inputRef.Data(), inputRef.Size());
+ blake2b->Final(out, outSize);
+ return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize));
+ }
+
+ SIMPLE_STRICT_UDF(TSipHash, ui64(ui64, ui64, TAutoMap<char*>)) {
+ using namespace highwayhash;
+ Y_UNUSED(valueBuilder);
+ const TStringRef inputRef = args[2].AsStringRef();
+ const HH_U64 state[2] = {args[0].Get<ui64>(), args[1].Get<ui64>()};
+ ui64 hash = SipHash(state, inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
+ }
+
+ SIMPLE_STRICT_UDF(THighwayHash, ui64(ui64, ui64, ui64, ui64, TAutoMap<char*>)) {
+ using namespace highwayhash;
+ Y_UNUSED(valueBuilder);
+ const TStringRef inputRef = args[4].AsStringRef();
+ const uint64_t key[4] = {
+ args[0].Get<ui64>(),
+ args[1].Get<ui64>(),
+ args[2].Get<ui64>(),
+ args[3].Get<ui64>()};
+ ui64 hash = HighwayHash64(key, inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
+ }
+
+ SIMPLE_STRICT_UDF(TFarmHashFingerprint, ui64(TAutoMap<ui64>)) {
+ Y_UNUSED(valueBuilder);
+ ui64 input = args[0].Get<ui64>();
+ ui64 hash = util::Fingerprint(input);
+ return TUnboxedValuePod(hash);
+ }
+
+ SIMPLE_STRICT_UDF(TFarmHashFingerprint2, ui64(TAutoMap<ui64>, TAutoMap<ui64>)) {
+ Y_UNUSED(valueBuilder);
+ ui64 low = args[0].Get<ui64>();
+ ui64 high = args[1].Get<ui64>();
+ ui64 hash = util::Fingerprint(util::Uint128(low, high));
+ return TUnboxedValuePod(hash);
+ }
+
+ SIMPLE_STRICT_UDF(TFarmHashFingerprint32, ui32(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ auto hash = util::Fingerprint32(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(ui32(hash));
+ }
+
+ SIMPLE_STRICT_UDF(TFarmHashFingerprint64, ui64(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ auto hash = util::Fingerprint64(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(ui64(hash));
+ }
+
+ class TFarmHashFingerprint128: public TBoxedValue {
+ public:
+ static TStringRef Name() {
+ static auto name = TStringRef::Of("FarmHashFingerprint128");
+ return name;
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build();
+ builder.Args(1)->Add<TAutoMap<char*>>();
+ builder.Returns(type);
+ if (!typesOnly) {
+ builder.Implementation(new TFarmHashFingerprint128);
+ }
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ TUnboxedValue* items = nullptr;
+ auto val = valueBuilder->NewArray(2U, items);
+ const auto& inputRef = args[0].AsStringRef();
+ auto hash = util::Fingerprint128(inputRef.Data(), inputRef.Size());
+ items[0] = TUnboxedValuePod(static_cast<ui64>(hash.first));
+ items[1] = TUnboxedValuePod(static_cast<ui64>(hash.second));
+ return val;
+ }
+ };
+
+ SIMPLE_STRICT_UDF(TSuperFastHash, ui32(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ ui32 hash = SuperFastHash(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
+ }
+
+ SIMPLE_STRICT_UDF(TSha1, char*(TAutoMap<char*>)) {
+ const auto& inputRef = args[0].AsStringRef();
+ SHA_CTX sha;
+ SHA1_Init(&sha);
+ SHA1_Update(&sha, inputRef.Data(), inputRef.Size());
+ unsigned char hash[SHA_DIGEST_LENGTH];
+ SHA1_Final(hash, &sha);
+ return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash)));
+ }
+
+ SIMPLE_STRICT_UDF(TSha256, char*(TAutoMap<char*>)) {
+ const auto& inputRef = args[0].AsStringRef();
+ SHA256_CTX sha;
+ SHA256_Init(&sha);
+ SHA256_Update(&sha, inputRef.Data(), inputRef.Size());
+ unsigned char hash[SHA256_DIGEST_LENGTH];
+ SHA256_Final(hash, &sha);
+ return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash)));
+ }
+
+ SIMPLE_STRICT_UDF(TIntHash64, ui64(TAutoMap<ui64>)) {
+ Y_UNUSED(valueBuilder);
+ ui64 x = args[0].Get<ui64>();
+ x ^= 0x4CF2D2BAAE6DA887ULL;
+ x ^= x >> 33;
+ x *= 0xff51afd7ed558ccdULL;
+ x ^= x >> 33;
+ x *= 0xc4ceb9fe1a85ec53ULL;
+ x ^= x >> 33;
+ return TUnboxedValuePod(x);
+ }
+
+ SIMPLE_STRICT_UDF(TXXH3, ui64(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ const ui64 hash = XXH3_64bits(inputRef.Data(), inputRef.Size());
+ return TUnboxedValuePod(hash);
+ }
+
+ class TXXH3_128: public TBoxedValue {
+ public:
+ static TStringRef Name() {
+ static auto name = TStringRef::Of("XXH3_128");
+ return name;
+ }
+
+ static bool DeclareSignature(const TStringRef& name, TType*, IFunctionTypeInfoBuilder& builder, bool typesOnly) {
+ if (Name() == name) {
+ const auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build();
+ builder.Args(1)->Add<TAutoMap<char*>>();
+ builder.Returns(type);
+ if (!typesOnly) {
+ builder.Implementation(new TXXH3_128);
+ }
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
+ TUnboxedValue* items = nullptr;
+ auto val = valueBuilder->NewArray(2U, items);
+ const auto& inputRef = args[0].AsStringRef();
+ const auto hash = XXH3_128bits(inputRef.Data(), inputRef.Size());
+ items[0] = TUnboxedValuePod(ui64(hash.low64));
+ items[1] = TUnboxedValuePod(ui64(hash.high64));
+ return val;
+ }
+ };
+
+ SIMPLE_MODULE(TDigestModule,
+ TCrc32c,
+ TCrc64,
+ TFnv32,
+ TFnv64,
+ TMurMurHash,
+ TMurMurHash32,
+ TMurMurHash2A,
+ TMurMurHash2A32,
+ TCityHash,
+ TCityHash128,
+ TNumericHash,
+ TMd5Hex,
+ TMd5Raw,
+ TMd5HalfMix,
+ TArgon2,
+ TBlake2B,
+ TSipHash,
+ THighwayHash,
+ TFarmHashFingerprint,
+ TFarmHashFingerprint2,
+ TFarmHashFingerprint32,
+ TFarmHashFingerprint64,
+ TFarmHashFingerprint128,
+ TSuperFastHash,
+ TSha1,
+ TSha256,
+ TIntHash64,
+ TXXH3,
+ TXXH3_128
+ )
+
+}
+
+REGISTER_MODULES(TDigestModule)
diff --git a/yql/essentials/udfs/common/digest/test/canondata/result.json b/yql/essentials/udfs/common/digest/test/canondata/result.json
new file mode 100644
index 00000000000..fb6112fc5bc
--- /dev/null
+++ b/yql/essentials/udfs/common/digest/test/canondata/result.json
@@ -0,0 +1,7 @@
+{
+ "test.test[Basic]": [
+ {
+ "uri": "file://test.test_Basic_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/digest/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/digest/test/canondata/test.test_Basic_/results.txt
new file mode 100644
index 00000000000..f5b7b0fe785
--- /dev/null
+++ b/yql/essentials/udfs/common/digest/test/canondata/test.test_Basic_/results.txt
@@ -0,0 +1,506 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "crc32c";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "crc64";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "fnv32";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "fnv64";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "murmur";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "murmur32";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "murmur2a";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "murmur2a32";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "city";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "cityWithSeed";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "city128";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint64"
+ ];
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ];
+ [
+ "numeric";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "md5hex";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "md5raw";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "md5halfmix";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "argon2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "blake2b";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "blake2bunkeyed";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "blake2bkeyed";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "sip";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "highway";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "farmfing";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "farmfing2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "farmfing32";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "farmfing64";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "farmfing128";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint64"
+ ];
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ];
+ [
+ "sfh";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "sha1";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "sha256";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "inthash64";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "xxhash";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "xxhash128";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint64"
+ ];
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "2432014819";
+ "17728638330159804320";
+ "84696366";
+ "12638153115695167470";
+ "746762829127501960";
+ "1228156847";
+ "5654386555365545660";
+ "1466639702";
+ "11413460447292444913";
+ "684814019408231284";
+ [
+ "125830901799957853";
+ "7569582475522398857"
+ ];
+ "2320827452992767577";
+ "c4ca4238a0b923820dcc509a6f75849b";
+ [
+ "xMpCOKC5I4INzFCab3WEmw=="
+ ];
+ "14973526590288695970";
+ [
+ "vjqkEHgWMA9RlnF/Dlkiqoxsc4vOI5/ULLMuB4GuMME="
+ ];
+ [
+ "ks31eMRwhaWZIlbw3Pl9Cxnx8cneTV/jDDrOYZG25ds="
+ ];
+ [
+ "ks31eMRwhaWZIlbw3Pl9Cxnx8cneTV/jDDrOYZG25ds="
+ ];
+ [
+ "j04DEIdeE9LFquAFu4i7Q2YAyca0FDA3J8r1atHQF58="
+ ];
+ "1602385837086584249";
+ "5348322356177288628";
+ "10105606910506535461";
+ [
+ "2871837063151915165"
+ ];
+ "2552028077";
+ "9304157803607034849";
+ [
+ "5308794677172709703";
+ "4629402678501957187"
+ ];
+ "3927678806";
+ [
+ "NWoZK3kTsExUV00Ywo1G5jlUKKs="
+ ];
+ [
+ "a4ayc/80/OGda4BO/1o/V0etpOqiLx1JwB5S3beHW0s="
+ ];
+ "10577349846663553072";
+ "7335560060985733464";
+ [
+ "7335560060985733464";
+ "16085986526811796301"
+ ]
+ ];
+ [
+ "2208655895";
+ "4363576337578352290";
+ "84696365";
+ "12638153115695167469";
+ "602994839685422785";
+ "772897149";
+ "16472888669357673283";
+ "2351653828";
+ "17472595041006102391";
+ "8016373356242392939";
+ [
+ "13426016195983081906";
+ "17051066397148972982"
+ ];
+ "6174653592142994962";
+ "c81e728d9d4c2f636f067f89cc14862c";
+ [
+ "yB5yjZ1ML2NvBn+JzBSGLA=="
+ ];
+ "5861621074593582340";
+ [
+ "ixHhYBlEBiZ446+zgg0hd5Eocp+xgMpVyaLfhjJqxV8="
+ ];
+ [
+ "MSN823muHfp/+4fN5+qKgDUtMA7lrHWKbN3RnWcZJew="
+ ];
+ [
+ "MSN823muHfp/+4fN5+qKgDUtMA7lrHWKbN3RnWcZJew="
+ ];
+ [
+ "NBaDJdCQRJye6B+WJdI/OX/mIxQk3AgEHm4hM9qmLu8="
+ ];
+ "8789615690042391357";
+ "17360383380415224727";
+ "970024650806116628";
+ [
+ "5310514165246837948"
+ ];
+ "3946386795";
+ "6920640749119438759";
+ [
+ "2374933113219823160";
+ "4520448414947048260"
+ ];
+ "2190005025";
+ [
+ "2kuSN7rMzfGcB2DKt67EqDWQELA="
+ ];
+ [
+ "1HNeOiZeFu7gP1lxi5tdAwGcB9i2xR+Q2jpmbuwTqzU="
+ ];
+ "18198135717204167749";
+ "18128579709034668820";
+ [
+ "18128579709034668820";
+ "14642767882163838550"
+ ]
+ ];
+ [
+ "1909385492";
+ "15694391695266948643";
+ "84696364";
+ "12638153115695167468";
+ "15180167692696242062";
+ "2292183779";
+ "6734453432295282525";
+ "2128480519";
+ "11275350073939794026";
+ "1669883546352889947";
+ [
+ "15168680716710346397";
+ "13490672353767795293"
+ ];
+ "13529992206878991808";
+ "eccbc87e4b5ce2fe28308fd9f2a7baf3";
+ [
+ "7MvIfktc4v4oMI/Z8qe68w=="
+ ];
+ "13401401932333664167";
+ [
+ "kRpsyYine3lH5Es1XuUlgXRBY6HLMD001QCPKqdjG7w="
+ ];
+ [
+ "WBNIM3sPPhSGIBc9qqX5TQDYgXBdy/Cqg+/aumHS7eE="
+ ];
+ [
+ "WBNIM3sPPhSGIBc9qqX5TQDYgXBdy/Cqg+/aumHS7eE="
+ ];
+ [
+ "BQK9GdKOJxlDH7wMrQ1gHOPyviB18JDuIg2i0JFwkL4="
+ ];
+ "2874396847657928730";
+ "6919389025651885183";
+ "14522245769643814311";
+ [
+ "9221007817131939736"
+ ];
+ "1678875853";
+ "11991475895402502921";
+ [
+ "9295019677823677360";
+ "3668607519738437716"
+ ];
+ "2634537178";
+ [
+ "d95o2uzYI7q7tY7bHI4U1xBug7s="
+ ];
+ [
+ "TgdAhWK+24tgzgXB3s/jrRa3IjCWfeAfZAt+Rym0n84="
+ ];
+ "9624464864560415994";
+ "8296998437054084336";
+ [
+ "8296998437054084336";
+ "6903416366538802245"
+ ]
+ ];
+ [
+ "0";
+ "18446744073709551615";
+ "2166136261";
+ "14695981039346656037";
+ "0";
+ "0";
+ "0";
+ "0";
+ "11160318154034397263";
+ "12607432989128692740";
+ [
+ "18085479540095642321";
+ "11079402499652051579"
+ ];
+ "7654268697807496793";
+ "d41d8cd98f00b204e9800998ecf8427e";
+ [
+ "1B2M2Y8AsgTpgAmY7PhCfg=="
+ ];
+ "7203772011789518145";
+ [
+ "sW8qMzZE+95eqaAsJqn4Ne3l7QwOAklHIexxpYMRsPo="
+ ];
+ [
+ "DldRwCblQ7Loqy6wYJnaodHl30d3j3eH+qtFzfEv46g="
+ ];
+ [
+ "DldRwCblQ7Loqy6wYJnaodHl30d3j3eH+qtFzfEv46g="
+ ];
+ [
+ "gtMZwEiUXpGivpA1k/ith+ZulxJ3iI6tC6aVNsDsnk0="
+ ];
+ "16558958598623574096";
+ "9185752494698444901";
+ "0";
+ #;
+ "3696677242";
+ "11160318154034397263";
+ [
+ "4463240938071824939";
+ "4374473821787594281"
+ ];
+ "0";
+ [
+ "2jmj7l5rSw0yVb/vlWAYkK/YBwk="
+ ];
+ [
+ "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="
+ ];
+ "4761183170873013810";
+ "3244421341483603138";
+ [
+ "6918025063187695999";
+ "11072670137173121240"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/digest/test/cases/Basic.in b/yql/essentials/udfs/common/digest/test/cases/Basic.in
new file mode 100644
index 00000000000..c90696db42e
--- /dev/null
+++ b/yql/essentials/udfs/common/digest/test/cases/Basic.in
@@ -0,0 +1,4 @@
+{"key"="1";"subkey"="1";"value"=""};
+{"key"="2";"subkey"="2";"value"=""};
+{"key"="3";"subkey"="3";"value"=""};
+{"key"="";"subkey"="";"value"=""};
diff --git a/yql/essentials/udfs/common/digest/test/cases/Basic.sql b/yql/essentials/udfs/common/digest/test/cases/Basic.sql
new file mode 100644
index 00000000000..fbf6f218fba
--- /dev/null
+++ b/yql/essentials/udfs/common/digest/test/cases/Basic.sql
@@ -0,0 +1,35 @@
+/* syntax version 1 */
+SELECT
+ Digest::Crc32c(key) AS crc32c,
+ Digest::Crc64(key) AS crc64,
+ Digest::Fnv32(key) AS fnv32,
+ Digest::Fnv64(key) AS fnv64,
+ Digest::MurMurHash(key) AS murmur,
+ Digest::MurMurHash32(key) AS murmur32,
+ Digest::MurMurHash2A(key) AS murmur2a,
+ Digest::MurMurHash2A32(key) AS murmur2a32,
+ Digest::CityHash(key) AS city,
+ Digest::CityHash(key, 111) AS cityWithSeed,
+ Digest::CityHash128(key) AS city128,
+ Digest::NumericHash(COALESCE(CAST(key AS Uint64), 0)) AS numeric,
+ Digest::Md5Hex(key) AS md5hex,
+ Digest::Md5Raw(key) AS md5raw,
+ Digest::Md5HalfMix(key) AS md5halfmix,
+ Digest::Argon2(key, "12345678") AS argon2,
+ Digest::Blake2B(key) AS blake2b,
+ Digest::Blake2B(key, "") AS blake2bunkeyed,
+ Digest::Blake2B(key, "12345678") AS blake2bkeyed,
+ Digest::SipHash(111, 222, key) AS sip,
+ Digest::HighwayHash(111, 222, 333, 444, key) AS highway,
+ Digest::FarmHashFingerprint(COALESCE(CAST(key AS Uint64), 0u)) AS farmfing,
+ Digest::FarmHashFingerprint2(123ul, CAST(key AS Uint64)) AS farmfing2,
+ Digest::FarmHashFingerprint32(key) AS farmfing32,
+ Digest::FarmHashFingerprint64(key) AS farmfing64,
+ Digest::FarmHashFingerprint128(key) AS farmfing128,
+ Digest::SuperFastHash(key) AS sfh,
+ Digest::Sha1(key) as sha1,
+ Digest::Sha256(key) as sha256,
+ Digest::IntHash64(COALESCE(CAST(key AS Uint64), 0)) AS inthash64,
+ Digest::XXH3(key) AS xxhash,
+ Digest::XXH3_128(key) AS xxhash128
+FROM Input;
diff --git a/yql/essentials/udfs/common/digest/test/ya.make b/yql/essentials/udfs/common/digest/test/ya.make
new file mode 100644
index 00000000000..6c3cce54db0
--- /dev/null
+++ b/yql/essentials/udfs/common/digest/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/digest)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/digest/ya.make b/yql/essentials/udfs/common/digest/ya.make
new file mode 100644
index 00000000000..90ee1b02f36
--- /dev/null
+++ b/yql/essentials/udfs/common/digest/ya.make
@@ -0,0 +1,42 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+
+ FROM_SANDBOX(
+ FILE 7319896345 OUT_NOAUTO libdigest_udf.so
+ )
+
+ END()
+ELSE()
+YQL_UDF_CONTRIB(digest_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+ )
+
+ SRCS(
+ digest_udf.cpp
+ )
+
+ PEERDIR(
+ contrib/libs/farmhash
+ contrib/libs/highwayhash
+ contrib/libs/openssl
+ contrib/libs/xxhash
+ library/cpp/digest/argonish
+ library/cpp/digest/crc32c
+ library/cpp/digest/md5
+ library/cpp/digest/old_crc
+ library/cpp/digest/sfh
+ )
+
+ ADDINCL(contrib/libs/highwayhash)
+
+ END()
+
+ENDIF()
+
+RECURSE_FOR_TESTS(
+ test
+)
diff --git a/yql/essentials/udfs/common/file/file_udf.cpp b/yql/essentials/udfs/common/file/file_udf.cpp
new file mode 100644
index 00000000000..57db826591c
--- /dev/null
+++ b/yql/essentials/udfs/common/file/file_udf.cpp
@@ -0,0 +1,623 @@
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+#include <util/generic/yexception.h>
+#include <util/stream/buffered.h>
+#include <util/stream/file.h>
+#include <util/string/cast.h>
+#include <util/ysaveload.h>
+
+#include <functional>
+
+using namespace NKikimr;
+using namespace NUdf;
+
+extern const char ByLineFuncName[];
+const char ByLineFuncName[] = "ByLines";
+
+namespace {
+ namespace Helper {
+ template <class TUserType>
+ inline bool ConvertToUnboxed(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) {
+ Y_UNUSED(valueBuilder);
+ TUserType userType;
+ if (!TryFromString<TUserType>(curLine, userType)) {
+ return false;
+ }
+ result = TUnboxedValuePod(userType);
+ return true;
+ }
+
+ template <>
+ inline bool ConvertToUnboxed<const char*>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) {
+ result = valueBuilder.NewString(curLine);
+ return true;
+ }
+
+ template <>
+ inline bool ConvertToUnboxed<TUtf8>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) {
+ result = valueBuilder.NewString(curLine);
+ return true;
+ }
+
+ template <>
+ inline bool ConvertToUnboxed<TYson>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) {
+ result = valueBuilder.NewString(curLine);
+ return true;
+ }
+
+ template <>
+ inline bool ConvertToUnboxed<TJson>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) {
+ result = valueBuilder.NewString(curLine);
+ return true;
+ }
+
+ template <typename T>
+ struct TypeToTypeName {
+ static const char* Name() {
+ return "Unknown";
+ }
+ };
+ template <>
+ struct TypeToTypeName<bool> {
+ static constexpr const char* Name() {
+ return "Bool";
+ }
+ };
+ template <>
+ struct TypeToTypeName<i8> {
+ static constexpr const char* Name() {
+ return "Int8";
+ }
+ };
+ template <>
+ struct TypeToTypeName<ui8> {
+ static constexpr const char* Name() {
+ return "Uint8";
+ }
+ };
+ template <>
+ struct TypeToTypeName<i16> {
+ static constexpr const char* Name() {
+ return "Int16";
+ }
+ };
+ template <>
+ struct TypeToTypeName<ui16> {
+ static constexpr const char* Name() {
+ return "Uint16";
+ }
+ };
+ template <>
+ struct TypeToTypeName<ui32> {
+ static constexpr const char* Name() {
+ return "Uint32";
+ }
+ };
+ template <>
+ struct TypeToTypeName<ui64> {
+ static constexpr const char* Name() {
+ return "Uint64";
+ }
+ };
+ template <>
+ struct TypeToTypeName<i32> {
+ static constexpr const char* Name() {
+ return "Int32";
+ }
+ };
+ template <>
+ struct TypeToTypeName<i64> {
+ static constexpr const char* Name() {
+ return "Int64";
+ }
+ };
+ template <>
+ struct TypeToTypeName<float> {
+ static constexpr const char* Name() {
+ return "Float";
+ }
+ };
+ template <>
+ struct TypeToTypeName<double> {
+ static constexpr const char* Name() {
+ return "Double";
+ }
+ };
+ template <>
+ struct TypeToTypeName<const char*> {
+ static constexpr const char* Name() {
+ return "String";
+ }
+ };
+ template <>
+ struct TypeToTypeName<TUtf8> {
+ static constexpr const char* Name() {
+ return "Utf8";
+ }
+ };
+ template <>
+ struct TypeToTypeName<TYson> {
+ static constexpr const char* Name() {
+ return "Yson";
+ }
+ };
+ template <>
+ struct TypeToTypeName<TJson> {
+ static constexpr const char* Name() {
+ return "Json";
+ }
+ };
+ }
+
+ static const ui64 TAKE_UNLIM = -1;
+
+ bool SkipElements(IBoxedValue& iter, ui64 skip) {
+ for (; skip > 0; --skip) {
+ if (!TBoxedValueAccessor::Skip(iter)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ typedef std::function<void(const TString& message)> TTerminateFunc;
+
+ class TStreamMeta: public TThrRefBase {
+ public:
+ typedef TBuffered<TUnbufferedFileInput> TStream;
+ typedef TIntrusivePtr<TStreamMeta> TPtr;
+
+ TStreamMeta(TString filePath)
+ : FilePath(filePath)
+ {
+ // work in greedy mode to catch error on creation
+ Cached = DoCreateStream();
+ }
+
+ std::unique_ptr<TStream> CreateStream(TTerminateFunc terminateFunc) {
+ if (Cached) {
+ return std::move(Cached);
+ }
+
+ terminateFunc("The file iterator was already created. To scan file data multiple times please use ListCollect either over ParseFile or over some lazy function over it, e.g. ListMap");
+ Y_ABORT("Terminate unstoppable!");
+ }
+
+ bool GetLinesCount(ui64& count) const {
+ if (LinesCount == Unknown)
+ return false;
+ count = LinesCount;
+ return true;
+ }
+ void SetLinesCount(ui64 count) {
+ Y_DEBUG_ABORT_UNLESS(LinesCount == Unknown || count == LinesCount, "Set another value of count lines");
+ if (LinesCount == Unknown) {
+ LinesCount = count;
+ }
+ }
+
+ const TString& GetFilePath() const {
+ return FilePath;
+ }
+
+ private:
+ std::unique_ptr<TStream> DoCreateStream() {
+ static const auto bufferSize = 1 << 12;
+ TFile file(FilePath, OpenExisting | RdOnly | Seq);
+ if (FileSize == Unknown) {
+ FileSize = file.GetLength();
+ }
+ return std::make_unique<TBuffered<TUnbufferedFileInput>>(bufferSize, file);
+ }
+
+ TString FilePath;
+ static const ui64 Unknown = -1;
+ ui64 FileSize = Unknown;
+ ui64 LinesCount = Unknown;
+ std::unique_ptr<TStream> Cached;
+ };
+
+ class TEmptyIter: public TBoxedValue {
+ private:
+ bool Skip() override {
+ return false;
+ }
+ bool Next(TUnboxedValue&) override {
+ return false;
+ }
+
+ public:
+ TEmptyIter(TTerminateFunc terminateFunc)
+ : TerminateFunc(terminateFunc)
+ {
+ }
+
+ private:
+ const TTerminateFunc TerminateFunc;
+ };
+
+ class TLineSplitter {
+ public:
+ TLineSplitter(IInputStream& stream)
+ : Stream_(stream)
+ {
+ }
+
+ size_t Next(TString& st) {
+ st.clear();
+ char c;
+ size_t ret = 0;
+ if (HasPendingLineChar_) {
+ st.push_back(PendingLineChar_);
+ HasPendingLineChar_ = false;
+ ++ret;
+ }
+
+ while (Stream_.ReadChar(c)) {
+ ++ret;
+ if (c == '\n') {
+ break;
+ } else if (c == '\r') {
+ if (Stream_.ReadChar(c)) {
+ ++ret;
+ if (c != '\n') {
+ --ret;
+ PendingLineChar_ = c;
+ HasPendingLineChar_ = true;
+ }
+ }
+
+ break;
+ } else {
+ st.push_back(c);
+ }
+ }
+
+ return ret;
+ }
+
+ private:
+ IInputStream& Stream_;
+ bool HasPendingLineChar_ = false;
+ char PendingLineChar_ = 0;
+ };
+
+ template <class TUserType>
+ class TLineByLineBoxedValueIterator: public TBoxedValue {
+ public:
+ TLineByLineBoxedValueIterator(TStreamMeta::TPtr metaPtr, std::unique_ptr<TStreamMeta::TStream>&& stream, const IValueBuilder& valueBuilder, TTerminateFunc terminateFunc)
+ : MetaPtr(metaPtr)
+ , ValueBuilder(valueBuilder)
+ , Stream(std::move(stream))
+ , Splitter(*Stream)
+ , TerminateFunc(terminateFunc)
+ {
+ }
+
+ void SetLimit(ui64 limit = TAKE_UNLIM) {
+ Limit = limit;
+ }
+
+ private:
+ bool SkipLimit() {
+ if (Limit != TAKE_UNLIM) {
+ if (Limit == 0) {
+ return false;
+ }
+ --Limit;
+ }
+ return true;
+ }
+
+ bool Skip() final {
+ ++CurLineNum;
+ return Splitter.Next(CurLine) && SkipLimit();
+ }
+
+ bool Next(TUnboxedValue& value) override {
+ if (!Skip()) {
+ return false;
+ }
+ if (!Helper::ConvertToUnboxed<TUserType>(ValueBuilder, CurLine, value)) {
+ TStringBuilder sb;
+ sb << "File::ByLines failed to cast string '" << CurLine << "' to " << Helper::TypeToTypeName<TUserType>::Name() << Endl;
+ sb << "- path: " << MetaPtr->GetFilePath() << Endl;
+ sb << "- line: " << CurLineNum << Endl;
+ TerminateFunc(sb);
+ Y_ABORT("Terminate unstoppable!");
+ }
+ return true;
+ }
+
+ TStreamMeta::TPtr MetaPtr;
+ const IValueBuilder& ValueBuilder;
+
+ std::unique_ptr<TStreamMeta::TStream> Stream;
+ TLineSplitter Splitter;
+ TTerminateFunc TerminateFunc;
+ TString CurLine;
+ ui64 CurLineNum = 0;
+ ui64 Limit = TAKE_UNLIM;
+ TUnboxedValue Result;
+ };
+
+ template <class TUserType>
+ class TListByLineBoxedValue: public TBoxedValue {
+ public:
+ TListByLineBoxedValue(TStreamMeta::TPtr metaPtr, const IValueBuilder& valueBuilder, TTerminateFunc terminateFunc, ui64 skip = 0ULL, ui64 take = TAKE_UNLIM)
+ : MetaPtr(metaPtr)
+ , ValueBuilder(valueBuilder)
+ , TerminateFunc(terminateFunc)
+ , Skip(skip)
+ , Take(take)
+ {}
+ private:
+ bool HasFastListLength() const override {
+ ui64 tmp;
+ return MetaPtr->GetLinesCount(tmp);
+ }
+ ui64 GetListLength() const override {
+ ui64 length;
+ if (!MetaPtr->GetLinesCount(length)) {
+ length = Skip;
+ for (const auto iter = GetListIterator(); iter.Skip(); ++length)
+ continue;
+ if (Take == TAKE_UNLIM) {
+ MetaPtr->SetLinesCount(length);
+ }
+ }
+ if (length <= Skip) {
+ return 0;
+ }
+ return Min(length - Skip, Take);
+ }
+ ui64 GetEstimatedListLength() const override {
+ /// \todo some optimisation?
+ return GetListLength();
+ }
+
+ TUnboxedValue GetListIterator() const override {
+ try {
+ auto stream = MetaPtr->CreateStream(TerminateFunc);
+ IBoxedValuePtr iter(new TLineByLineBoxedValueIterator<TUserType>(MetaPtr, std::move(stream), ValueBuilder, TerminateFunc));
+ if (!Take || !SkipElements(*iter, Skip)) {
+ return TUnboxedValuePod(new TEmptyIter(TerminateFunc));
+ }
+ static_cast<TLineByLineBoxedValueIterator<TUserType>*>(iter.Get())->SetLimit(Take);
+ return TUnboxedValuePod(std::move(iter));
+ } catch (const std::exception& e) {
+ TerminateFunc(CurrentExceptionMessage());
+ Y_ABORT("Terminate unstoppable!");
+ }
+ }
+
+ IBoxedValuePtr SkipListImpl(const IValueBuilder& builder, ui64 count) const override {
+ return new TListByLineBoxedValue(MetaPtr, builder, TerminateFunc, Skip + count, Take == TAKE_UNLIM ? TAKE_UNLIM : Take - std::min(Take, count));
+ }
+ IBoxedValuePtr TakeListImpl(const IValueBuilder& builder, ui64 count) const override {
+ return new TListByLineBoxedValue(MetaPtr, builder, TerminateFunc, Skip, std::min(Take, count));
+ }
+
+ bool HasListItems() const override {
+ return true;
+ }
+
+ TStreamMeta::TPtr MetaPtr;
+ const IValueBuilder& ValueBuilder;
+ TTerminateFunc TerminateFunc;
+ ui64 Skip = 0ULL;
+ ui64 Take = TAKE_UNLIM;
+ };
+
+ template <class TUserType>
+ class TByLinesFunc: public TBoxedValue {
+ private:
+ TSourcePosition Pos_;
+
+ TByLinesFunc(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override {
+ try {
+ TString filePath(args[0].AsStringRef());
+ TStreamMeta::TPtr metaPtr(new TStreamMeta(filePath));
+ auto pos = Pos_;
+ auto terminateFunc = [pos](const TString& message) {
+ UdfTerminate((TStringBuilder() << pos << " " << message).data());
+ };
+ return TUnboxedValuePod(new TListByLineBoxedValue<TUserType>(metaPtr, *valueBuilder, terminateFunc));
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ public:
+ static void DeclareSignature(
+ TStringRef name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly)
+ {
+ Y_UNUSED(name);
+ builder.UserType(userType);
+ builder.SimpleSignature<TListType<TUserType>(char*)>();
+ if (!typesOnly) {
+ builder.Implementation(new TByLinesFunc<TUserType>(builder.GetSourcePosition()));
+ }
+ }
+ };
+
+ class TFolderListFromFile: public TBoxedValue {
+ private:
+ class TIterator : public TBoxedValue {
+ public:
+ TIterator(ui32 indexP, ui32 indexT, ui32 indexA, const IValueBuilder& valueBuilder, const TSourcePosition& pos, TString filePath)
+ : IndexP_(indexP)
+ , IndexT_(indexT)
+ , IndexA_(indexA)
+ , ValueBuilder_(valueBuilder)
+ , Pos_(pos)
+ , Input_(filePath)
+ {
+ }
+
+ private:
+ bool Next(NUdf::TUnboxedValue& value) override {
+ try {
+ TString type;
+ TString path;
+ TString attrs;
+ ::Load(&Input_, type);
+ if (!type) {
+ return false;
+ }
+ ::Load(&Input_, path);
+ ::Load(&Input_, attrs);
+
+ NUdf::TUnboxedValue* items = nullptr;
+ value = ValueBuilder_.NewArray(3, items);
+ items[IndexT_] = ValueBuilder_.NewString(type);
+ items[IndexP_] = ValueBuilder_.NewString(path);
+ items[IndexA_] = ValueBuilder_.NewString(attrs);
+ }
+ catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ return true;
+ }
+
+ private:
+ const ui32 IndexP_;
+ const ui32 IndexT_;
+ const ui32 IndexA_;
+ const IValueBuilder& ValueBuilder_;
+ const TSourcePosition Pos_;
+ TIFStream Input_;
+ };
+
+ class TList: public TBoxedValue {
+ public:
+ TList(ui32 indexP, ui32 indexT, ui32 indexA, const IValueBuilder& valueBuilder, const TSourcePosition& pos, TString filePath)
+ : IndexP_(indexP)
+ , IndexT_(indexT)
+ , IndexA_(indexA)
+ , ValueBuilder_(valueBuilder)
+ , Pos_(pos)
+ , FilePath_(std::move(filePath))
+ {
+ }
+
+ protected:
+ NUdf::TUnboxedValue GetListIterator() const override {
+ return NUdf::TUnboxedValuePod(new TIterator(IndexP_, IndexT_, IndexA_, ValueBuilder_, Pos_, FilePath_));
+ }
+
+ bool HasFastListLength() const override {
+ return bool(Length);
+ }
+
+ ui64 GetListLength() const override {
+ if (!Length) {
+ ui64 length = 0ULL;
+ for (const auto it = GetListIterator(); it.Skip();) {
+ ++length;
+ }
+
+ Length = length;
+ }
+
+ return *Length;
+ }
+
+ ui64 GetEstimatedListLength() const override {
+ return GetListLength();
+ }
+
+ bool HasListItems() const override {
+ if (HasItems) {
+ return *HasItems;
+ }
+
+ if (Length) {
+ HasItems = (*Length != 0);
+ return *HasItems;
+ }
+
+ auto iter = GetListIterator();
+ HasItems = iter.Skip();
+ return *HasItems;
+ }
+
+ protected:
+ const ui32 IndexP_;
+ const ui32 IndexT_;
+ const ui32 IndexA_;
+ const IValueBuilder& ValueBuilder_;
+ const TSourcePosition Pos_;
+ const TString FilePath_;
+ mutable TMaybe<ui64> Length;
+ mutable TMaybe<bool> HasItems;
+ };
+
+ public:
+ TFolderListFromFile(ui32 indexP, ui32 indexT, ui32 indexA, const TSourcePosition& pos)
+ : IndexP_(indexP)
+ , IndexT_(indexT)
+ , IndexA_(indexA)
+ , Pos_(pos)
+ {
+ }
+
+ static const ::NYql::NUdf::TStringRef& Name() {
+ static auto name = ::NYql::NUdf::TStringRef::Of("FolderListFromFile");
+ return name;
+ }
+
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override {
+ try {
+ TString filePath(args[0].AsStringRef());
+ return TUnboxedValuePod(new TList(IndexP_, IndexT_, IndexA_, *valueBuilder, Pos_, filePath));
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) {
+ if (Name() != name) {
+ // the only case when we return false
+ return false;
+ }
+
+ builder.UserType(userType);
+
+ ui32 indexP, indexT, indexA;
+ auto itemType = builder.Struct()
+ ->AddField<const char*>("Path", &indexP)
+ .AddField<const char*>("Type", &indexT)
+ .AddField<TYson>("Attributes", &indexA)
+ .Build();
+ auto resultType = builder.List()->Item(itemType).Build();
+
+ builder.Args()->Add<const char*>().Done().Returns(resultType);
+ if (!typesOnly) {
+ builder.Implementation(new TFolderListFromFile(indexP, indexT, indexA, builder.GetSourcePosition()));
+ }
+ return true;
+ }
+
+ private:
+ const ui32 IndexP_;
+ const ui32 IndexT_;
+ const ui32 IndexA_;
+ const TSourcePosition Pos_;
+ };
+
+ SIMPLE_MODULE(TFileModule,
+ TUserDataTypeFuncFactory<false, false, ByLineFuncName, TByLinesFunc, const char*, TUtf8, TYson, TJson, i8, ui8, i16, ui16, ui32, ui64, i32, i64, float, double, bool>,
+ TFolderListFromFile
+ )
+
+}
+
+REGISTER_MODULES(TFileModule)
diff --git a/yql/essentials/udfs/common/file/ya.make b/yql/essentials/udfs/common/file/ya.make
new file mode 100644
index 00000000000..250f0722d8e
--- /dev/null
+++ b/yql/essentials/udfs/common/file/ya.make
@@ -0,0 +1,17 @@
+YQL_UDF_CONTRIB(file_udf)
+
+YQL_ABI_VERSION(
+ 2
+ 27
+ 0
+)
+
+SRCS(
+ file_udf.cpp
+)
+
+PEERDIR(
+ yql/essentials/core
+)
+
+END()
diff --git a/yql/essentials/udfs/common/histogram/histogram_udf.cpp b/yql/essentials/udfs/common/histogram/histogram_udf.cpp
new file mode 100644
index 00000000000..3dcb2ca98ec
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/histogram_udf.cpp
@@ -0,0 +1,1018 @@
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+#include <library/cpp/histogram/adaptive/adaptive_histogram.h>
+#include <library/cpp/histogram/adaptive/block_histogram.h>
+
+#include <util/string/printf.h>
+#include <util/stream/format.h>
+
+#include <cmath>
+
+using namespace NKikimr;
+using namespace NUdf;
+using namespace NKiwiAggr;
+
+namespace {
+#define REGISTER_METHOD_UDF(name) \
+ T##name,
+
+#define HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(XX) \
+ XX(GetSumAboveBound) \
+ XX(GetSumBelowBound) \
+ XX(CalcUpperBound) \
+ XX(CalcLowerBound) \
+ XX(CalcUpperBoundSafe) \
+ XX(CalcLowerBoundSafe)
+
+#define HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(XX) \
+ XX(GetSumInRange)
+
+#define HISTOGRAM_ALGORITHMS_MAP(XX) \
+ XX(AdaptiveDistance) \
+ XX(AdaptiveWeight) \
+ XX(AdaptiveWard) \
+ XX(BlockWeight) \
+ XX(BlockWard)
+
+#define HISTOGRAM_FUNCTION_MAP(XX, arg) \
+ XX(Create, arg) \
+ XX(AddValue, arg) \
+ XX(GetResult, arg) \
+ XX(Serialize, arg) \
+ XX(Deserialize, arg) \
+ XX(Merge, arg)
+
+#define DECLARE_HISTOGRAM_RESOURCE_NAME(name) extern const char name##HistogramResourceName[] = "Histogram." #name;
+ HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_RESOURCE_NAME)
+ DECLARE_HISTOGRAM_RESOURCE_NAME(Linear)
+ DECLARE_HISTOGRAM_RESOURCE_NAME(Logarithmic)
+
+ class TLinearHistogram: public TAdaptiveWardHistogram {
+ public:
+ TLinearHistogram(double step, double begin, double end)
+ : TAdaptiveWardHistogram(1ULL << 24)
+ , Step(step)
+ , Begin(begin)
+ , End(end)
+ {
+ }
+
+ void Add(double value, double weight) override {
+ if (value < Begin) {
+ value = Begin;
+ } else if (value > End) {
+ value = End;
+ } else {
+ value = std::floor(value / Step + 0.5) * Step;
+ }
+ TAdaptiveWardHistogram::Add(value, weight);
+ }
+
+ void Add(const THistoRec&) override {
+ Y_ABORT("Not implemented");
+ }
+
+ protected:
+ double Step;
+ double Begin;
+ double End;
+ };
+
+ class TLogarithmicHistogram: public TLinearHistogram {
+ public:
+ TLogarithmicHistogram(double step, double begin, double end)
+ : TLinearHistogram(step, begin, end)
+ {
+ }
+
+ void Add(double value, double weight) override {
+ double base = std::log(value) / std::log(Step);
+ double prev = std::pow(Step, std::floor(base));
+ double next = std::pow(Step, std::ceil(base));
+ if (std::abs(value - next) > std::abs(value - prev)) {
+ value = prev;
+ } else {
+ value = next;
+ }
+
+ if (value < Begin) {
+ value = Begin;
+ } else if (value > End) {
+ value = End;
+ }
+
+ if (!std::isnan(value)) {
+ TAdaptiveWardHistogram::Add(value, weight);
+ }
+ }
+
+ void Add(const THistoRec&) override {
+ Y_ABORT("Not implemented");
+ }
+ };
+
+ template <typename THistogramType, const char* ResourceName>
+ class THistogram_Create: public TBoxedValue {
+ public:
+ THistogram_Create(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
+
+ static const TStringRef& Name() {
+ static auto name = TString(ResourceName).substr(10) + "Histogram_Create";
+ static auto nameRef = TStringRef(name);
+ return nameRef;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ try {
+ Y_UNUSED(valueBuilder);
+ THolder<THistogramResource> histogram(new THistogramResource(args[2].Get<ui32>()));
+ histogram->Get()->Add(args[0].Get<double>(), args[1].Get<double>());
+ return TUnboxedValuePod(histogram.Release());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<ResourceName>(double, double, ui32)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_Create<THistogramType, ResourceName>(builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ template <typename THistogramType, const char* ResourceName>
+ class THistogram_AddValue: public TBoxedValue {
+ public:
+ THistogram_AddValue(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
+
+ static const TStringRef& Name() {
+ static auto name = TString(ResourceName).substr(10) + "Histogram_AddValue";
+ static auto nameRef = TStringRef(name);
+ return nameRef;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ try {
+ Y_UNUSED(valueBuilder);
+ THistogramResource* resource = static_cast<THistogramResource*>(args[0].AsBoxed().Get());
+ resource->Get()->Add(args[1].Get<double>(), args[2].Get<double>());
+ return TUnboxedValuePod(args[0]);
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, double, double)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_AddValue<THistogramType, ResourceName>(builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ template <typename THistogramType, const char* ResourceName>
+ class THistogram_Serialize: public TBoxedValue {
+ public:
+ THistogram_Serialize(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
+
+ static const TStringRef& Name() {
+ static auto name = TString(ResourceName).substr(10) + "Histogram_Serialize";
+ static auto nameRef = TStringRef(name);
+ return nameRef;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ try {
+ THistogram proto;
+ TString result;
+ static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto);
+ Y_PROTOBUF_SUPPRESS_NODISCARD proto.SerializeToString(&result);
+ return valueBuilder->NewString(result);
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<char*(TResource<ResourceName>)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_Serialize<THistogramType, ResourceName>(builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ template <typename THistogramType, const char* ResourceName>
+ class THistogram_Deserialize: public TBoxedValue {
+ public:
+ THistogram_Deserialize(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
+
+ static const TStringRef& Name() {
+ static auto name = TString(ResourceName).substr(10) + "Histogram_Deserialize";
+ static auto nameRef = TStringRef(name);
+ return nameRef;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ try {
+ Y_UNUSED(valueBuilder);
+ THistogram proto;
+ Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef()));
+ THolder<THistogramResource> histogram(new THistogramResource(args[1].Get<ui32>()));
+ histogram->Get()->FromProto(proto);
+ return TUnboxedValuePod(histogram.Release());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<ResourceName>(char*, ui32)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_Deserialize<THistogramType, ResourceName>(builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ template <typename THistogramType, const char* ResourceName>
+ class THistogram_Merge: public TBoxedValue {
+ public:
+ THistogram_Merge(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
+
+ static const TStringRef& Name() {
+ static auto name = TString(ResourceName).substr(10) + "Histogram_Merge";
+ static auto nameRef = TStringRef(name);
+ return nameRef;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ try {
+ Y_UNUSED(valueBuilder);
+ THistogram proto;
+ static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto);
+ static_cast<THistogramResource*>(args[1].AsBoxed().Get())->Get()->Merge(proto, 1.0);
+ return TUnboxedValuePod(args[1]);
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, TResource<ResourceName>)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_Merge<THistogramType, ResourceName>(builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ struct THistogramIndexes {
+ static constexpr ui32 BinFieldsCount = 2U;
+ static constexpr ui32 ResultFieldsCount = 5U;
+
+ THistogramIndexes(IFunctionTypeInfoBuilder& builder) {
+ const auto binStructType = builder.Struct(BinFieldsCount)->AddField<double>("Position", &Position).AddField<double>("Frequency", &Frequency).Build();
+ const auto binsList = builder.List()->Item(binStructType).Build();
+ ResultStructType = builder.Struct(ResultFieldsCount)->AddField<char*>("Kind", &Kind).AddField<double>("Min", &Min).AddField<double>("Max", &Max).AddField<double>("WeightsSum", &WeightsSum).AddField("Bins", binsList, &Bins).Build();
+ }
+
+ ui32 Kind;
+ ui32 Min;
+ ui32 Max;
+ ui32 WeightsSum;
+ ui32 Bins;
+
+ ui32 Position;
+ ui32 Frequency;
+
+ TType* ResultStructType;
+ };
+
+ template <typename THistogramType, const char* ResourceName>
+ class THistogram_GetResult: public TBoxedValue {
+ public:
+ typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
+
+ THistogram_GetResult(const THistogramIndexes& histogramIndexes, TSourcePosition pos)
+ : HistogramIndexes(histogramIndexes)
+ , Pos_(pos)
+ {
+ }
+
+ static const TStringRef& Name() {
+ static auto name = TString(ResourceName).substr(10) + "Histogram_GetResult";
+ static auto nameRef = TStringRef(name);
+ return nameRef;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ THistogram proto;
+ auto histogram = static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get();
+ histogram->ToProto(proto);
+
+ auto size = proto.FreqSize();
+ TUnboxedValue* fields = nullptr;
+ auto result = valueBuilder->NewArray(HistogramIndexes.ResultFieldsCount, fields);
+ fields[HistogramIndexes.Kind] = valueBuilder->NewString(TStringBuf(ResourceName).Skip(10));
+ if (size) {
+ TUnboxedValue* items = nullptr;
+ fields[HistogramIndexes.Bins] = valueBuilder->NewArray(size, items);
+ fields[HistogramIndexes.Min] = TUnboxedValuePod(static_cast<double>(histogram->GetMinValue()));
+ fields[HistogramIndexes.Max] = TUnboxedValuePod(static_cast<double>(histogram->GetMaxValue()));
+ fields[HistogramIndexes.WeightsSum] = TUnboxedValuePod(static_cast<double>(histogram->GetSum()));
+ for (ui64 i = 0; i < size; ++i) {
+ TUnboxedValue* binFields = nullptr;
+ *items++ = valueBuilder->NewArray(HistogramIndexes.BinFieldsCount, binFields);
+ binFields[HistogramIndexes.Frequency] = TUnboxedValuePod(static_cast<double>(proto.GetFreq(i)));
+ binFields[HistogramIndexes.Position] = TUnboxedValuePod(static_cast<double>(proto.GetPosition(i)));
+ }
+ } else {
+ fields[HistogramIndexes.Bins] = valueBuilder->NewEmptyList();
+ fields[HistogramIndexes.Min] = TUnboxedValuePod(0.0);
+ fields[HistogramIndexes.Max] = TUnboxedValuePod(0.0);
+ fields[HistogramIndexes.WeightsSum] = TUnboxedValuePod(0.0);
+ }
+
+ return result;
+ }
+
+ public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ auto resource = builder.Resource(TStringRef(ResourceName, std::strlen(ResourceName)));
+
+ THistogramIndexes histogramIndexes(builder);
+
+ builder.Args()->Add(resource).Done().Returns(histogramIndexes.ResultStructType);
+
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_GetResult<THistogramType, ResourceName>(histogramIndexes, builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ const THistogramIndexes HistogramIndexes;
+ TSourcePosition Pos_;
+ };
+
+ template <>
+ TUnboxedValue THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const {
+ using THistogramResource = THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::THistogramResource;
+ try {
+ Y_UNUSED(valueBuilder);
+ THolder<THistogramResource> histogram(new THistogramResource(
+ args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>()));
+ histogram->Get()->Add(args[0].Get<double>(), 1.0);
+ return TUnboxedValuePod(histogram.Release());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ template <>
+ bool THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<LinearHistogramResourceName>(double, double, double, double)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_Create<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ template <>
+ TUnboxedValue THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const {
+ using THistogramResource = THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::THistogramResource;
+ try {
+ Y_UNUSED(valueBuilder);
+ THistogram proto;
+ Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef()));
+ THolder<THistogramResource> histogram(
+ new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>()));
+ histogram->Get()->FromProto(proto);
+ return TUnboxedValuePod(histogram.Release());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ template <>
+ bool THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<LinearHistogramResourceName>(char*, double, double, double)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ template <>
+ TUnboxedValue THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const {
+ using THistogramResource = THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource;
+ try {
+ Y_UNUSED(valueBuilder);
+ THolder<THistogramResource> histogram(new THistogramResource(
+ args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>()));
+ histogram->Get()->Add(args[0].Get<double>(), 1.0);
+ return TUnboxedValuePod(histogram.Release());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ template <>
+ bool THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(double, double, double, double)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ template <>
+ TUnboxedValue THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const {
+ using THistogramResource = THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource;
+ try {
+ Y_UNUSED(valueBuilder);
+ THistogram proto;
+ Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef()));
+ THolder<THistogramResource> histogram(
+ new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>()));
+ histogram->Get()->FromProto(proto);
+ return TUnboxedValuePod(histogram.Release());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ template <>
+ bool THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(char*, double, double, double)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ class THistogramPrint: public TBoxedValue {
+ public:
+ THistogramPrint(const THistogramIndexes& histogramIndexes)
+ : HistogramIndexes(histogramIndexes)
+ {
+ }
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("Print");
+ return name;
+ }
+
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ auto kind = args[0].GetElement(HistogramIndexes.Kind);
+ auto bins = args[0].GetElement(HistogramIndexes.Bins);
+ double min = args[0].GetElement(HistogramIndexes.Min).Get<double>();
+ double max = args[0].GetElement(HistogramIndexes.Max).Get<double>();
+ double weightsSum = args[0].GetElement(HistogramIndexes.WeightsSum).Get<double>();
+ auto binsIterator = bins.GetListIterator();
+
+ TStringBuilder result;
+ result << "Kind: " << (TStringBuf)kind.AsStringRef() << ' ';
+ result << Sprintf("Bins: %" PRIu64 " WeightsSum: %.3f Min: %.3f Max: %.3f",
+ bins.GetListLength(), weightsSum, min, max);
+ double maxFrequency = 0.0;
+ size_t maxPositionLength = 0;
+ size_t maxFrequencyLength = 0;
+ const ui8 bars = args[1].GetOrDefault<ui8>(25);
+
+ for (TUnboxedValue current; binsIterator.Next(current);) {
+ if (bars) {
+ double frequency = current.GetElement(HistogramIndexes.Frequency).Get<double>();
+ if (frequency > maxFrequency) {
+ maxFrequency = frequency;
+ }
+ }
+ size_t positionLength = Sprintf("%.3f", current.GetElement(HistogramIndexes.Position).Get<double>()).length();
+ size_t frequencyLength = Sprintf("%.3f", current.GetElement(HistogramIndexes.Frequency).Get<double>()).length();
+
+ if (positionLength > maxPositionLength) {
+ maxPositionLength = positionLength;
+ }
+ if (frequencyLength > maxFrequencyLength) {
+ maxFrequencyLength = frequencyLength;
+ }
+ }
+
+ binsIterator = bins.GetListIterator();
+ for (TUnboxedValue current; binsIterator.Next(current);) {
+ double position = current.GetElement(HistogramIndexes.Position).Get<double>();
+ double frequency = current.GetElement(HistogramIndexes.Frequency).Get<double>();
+ result << "\n";
+ if (bars && maxFrequency > 0) {
+ ui8 filledBars = static_cast<ui8>(bars * frequency / maxFrequency);
+ for (ui8 i = 0; i < bars; ++i) {
+ if (i < filledBars) {
+ result << "█";
+ } else {
+ result << "░";
+ }
+ }
+ }
+ result << " P: " << LeftPad(Sprintf("%.3f", position), maxPositionLength);
+ result << " F: " << LeftPad(Sprintf("%.3f", frequency), maxFrequencyLength);
+ }
+
+ return valueBuilder->NewString(result);
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ THistogramIndexes histogramIndexes(builder);
+ auto optionalUi8 = builder.Optional()->Item<ui8>().Build();
+
+ builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalUi8).Done().OptionalArgs(1).Returns<char*>();
+
+ if (!typesOnly) {
+ builder.Implementation(new THistogramPrint(histogramIndexes));
+ }
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ const THistogramIndexes HistogramIndexes;
+ };
+
+ class THistogramToCumulativeDistributionFunction: public TBoxedValue {
+ public:
+ THistogramToCumulativeDistributionFunction(const THistogramIndexes& histogramIndexes)
+ : HistogramIndexes(histogramIndexes)
+ {
+ }
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("ToCumulativeDistributionFunction");
+ return name;
+ }
+
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ TUnboxedValue* fields = nullptr;
+ auto result = valueBuilder->NewArray(HistogramIndexes.ResultFieldsCount, fields);
+ auto bins = args[0].GetElement(HistogramIndexes.Bins);
+ double minValue = args[0].GetElement(HistogramIndexes.Min).Get<double>();
+ double maxValue = args[0].GetElement(HistogramIndexes.Max).Get<double>();
+ double sum = 0.0;
+ double weightsSum = 0.0;
+ std::vector<TUnboxedValue> resultBins;
+ if (bins.HasFastListLength())
+ resultBins.reserve(bins.GetListLength());
+ const auto binsIterator = bins.GetListIterator();
+ for (TUnboxedValue current; binsIterator.Next(current);) {
+ TUnboxedValue* binFields = nullptr;
+ auto resultCurrent = valueBuilder->NewArray(HistogramIndexes.BinFieldsCount, binFields);
+ const auto frequency = current.GetElement(HistogramIndexes.Frequency).Get<double>();
+ sum += frequency;
+ weightsSum += sum;
+ binFields[HistogramIndexes.Frequency] = TUnboxedValuePod(sum);
+ binFields[HistogramIndexes.Position] = current.GetElement(HistogramIndexes.Position);
+ resultBins.emplace_back(std::move(resultCurrent));
+ }
+
+ auto kind = args[0].GetElement(HistogramIndexes.Kind);
+ fields[HistogramIndexes.Kind] = valueBuilder->AppendString(kind, "Cdf");
+ fields[HistogramIndexes.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size());
+ fields[HistogramIndexes.Max] = TUnboxedValuePod(maxValue);
+ fields[HistogramIndexes.Min] = TUnboxedValuePod(minValue);
+ fields[HistogramIndexes.WeightsSum] = TUnboxedValuePod(weightsSum);
+ return result;
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ THistogramIndexes histogramIndexes(builder);
+
+ builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Done().Returns(histogramIndexes.ResultStructType);
+
+ if (!typesOnly) {
+ builder.Implementation(new THistogramToCumulativeDistributionFunction(histogramIndexes));
+ }
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ const THistogramIndexes HistogramIndexes;
+ };
+
+ class THistogramNormalize: public TBoxedValue {
+ public:
+ THistogramNormalize(const THistogramIndexes& histogramIndexes)
+ : HistogramIndexes(histogramIndexes)
+ {
+ }
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("Normalize");
+ return name;
+ }
+
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ TUnboxedValue* fields = nullptr;
+ auto result = valueBuilder->NewArray(HistogramIndexes.ResultFieldsCount, fields);
+ auto bins = args[0].GetElement(HistogramIndexes.Bins);
+ double minValue = args[0].GetElement(HistogramIndexes.Min).Get<double>();
+ double maxValue = args[0].GetElement(HistogramIndexes.Max).Get<double>();
+ double area = args[1].GetOrDefault<double>(100.0);
+ bool cdfNormalization = args[2].GetOrDefault<bool>(false);
+ double sum = 0.0;
+ double weightsSum = 0.0;
+ double lastBinFrequency = 0.0;
+ std::vector<TUnboxedValue> resultBins;
+ if (bins.HasFastListLength())
+ resultBins.reserve(bins.GetListLength());
+ auto binsIterator = bins.GetListIterator();
+ for (TUnboxedValue current; binsIterator.Next(current);) {
+ sum += current.GetElement(HistogramIndexes.Frequency).Get<double>();
+ lastBinFrequency = current.GetElement(HistogramIndexes.Frequency).Get<double>();
+ }
+ binsIterator = bins.GetListIterator();
+ for (TUnboxedValue current; binsIterator.Next(current);) {
+ TUnboxedValue* binFields = nullptr;
+ auto resultCurrent = valueBuilder->NewArray(HistogramIndexes.BinFieldsCount, binFields);
+ double frequency = current.GetElement(HistogramIndexes.Frequency).Get<double>();
+ if (cdfNormalization) {
+ frequency = area * frequency / lastBinFrequency;
+ } else {
+ frequency = area * frequency / sum;
+ }
+ weightsSum += frequency;
+ binFields[HistogramIndexes.Frequency] = TUnboxedValuePod(frequency);
+ binFields[HistogramIndexes.Position] = current.GetElement(HistogramIndexes.Position);
+ resultBins.emplace_back(std::move(resultCurrent));
+ }
+
+ TUnboxedValue kind = args[0].GetElement(HistogramIndexes.Kind);
+ if (cdfNormalization) {
+ kind = valueBuilder->AppendString(kind, "Cdf");
+ }
+
+ fields[HistogramIndexes.Kind] = kind;
+ fields[HistogramIndexes.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size());
+ fields[HistogramIndexes.Max] = TUnboxedValuePod(maxValue);
+ fields[HistogramIndexes.Min] = TUnboxedValuePod(minValue);
+ fields[HistogramIndexes.WeightsSum] = TUnboxedValuePod(weightsSum);
+ return result;
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ THistogramIndexes histogramIndexes(builder);
+ auto optionalDouble = builder.Optional()->Item<double>().Build();
+ auto optionalCdfNormalization = builder.Optional()->Item<bool>().Build();
+ builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalDouble).Add(optionalCdfNormalization).Done().Returns(histogramIndexes.ResultStructType);
+ builder.OptionalArgs(1);
+ builder.OptionalArgs(2);
+ if (!typesOnly) {
+ builder.Implementation(new THistogramNormalize(histogramIndexes));
+ }
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ const THistogramIndexes HistogramIndexes;
+ };
+
+ template <bool twoArgs>
+ class THistogramMethodBase: public TBoxedValue {
+ public:
+ THistogramMethodBase(const THistogramIndexes& histogramIndexes, TSourcePosition pos)
+ : HistogramIndexes(histogramIndexes)
+ , Pos_(pos)
+ {
+ }
+
+ virtual TUnboxedValue GetResult(
+ const THistogram& input,
+ const TUnboxedValuePod* args) const = 0;
+
+ TUnboxedValue Run(
+ const IValueBuilder*,
+ const TUnboxedValuePod* args) const override {
+ try {
+ auto bins = args[0].GetElement(HistogramIndexes.Bins);
+ double min = args[0].GetElement(HistogramIndexes.Min).template Get<double>();
+ double max = args[0].GetElement(HistogramIndexes.Max).template Get<double>();
+ auto binsIterator = bins.GetListIterator();
+
+ THistogram histogram;
+ histogram.SetType(HT_ADAPTIVE_HISTOGRAM);
+ histogram.SetMinValue(min);
+ histogram.SetMaxValue(max);
+ for (TUnboxedValue current; binsIterator.Next(current);) {
+ double frequency = current.GetElement(HistogramIndexes.Frequency).template Get<double>();
+ double position = current.GetElement(HistogramIndexes.Position).template Get<double>();
+ histogram.AddFreq(frequency);
+ histogram.AddPosition(position);
+ }
+
+ return GetResult(histogram, args);
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ static THistogramIndexes DeclareSignatureBase(IFunctionTypeInfoBuilder& builder) {
+ THistogramIndexes histogramIndexes(builder);
+
+ if (twoArgs) {
+ builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Add<double>().Done().Returns<double>();
+ } else {
+ builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Done().Returns<double>();
+ }
+ return histogramIndexes;
+ }
+
+ protected:
+ const THistogramIndexes HistogramIndexes;
+ TSourcePosition Pos_;
+ };
+
+#define DECLARE_ONE_DOUBLE_ARG_METHOD_UDF(name) \
+ class T##name: public THistogramMethodBase<false> { \
+ public: \
+ T##name(const THistogramIndexes& histogramIndexes, TSourcePosition pos) \
+ : THistogramMethodBase<false>(histogramIndexes, pos) { \
+ } \
+ static const TStringRef& Name() { \
+ static auto name = TStringRef::Of(#name); \
+ return name; \
+ } \
+ static bool DeclareSignature( \
+ const TStringRef& name, \
+ TType* userType, \
+ IFunctionTypeInfoBuilder& builder, \
+ bool typesOnly) { \
+ Y_UNUSED(userType); \
+ if (Name() == name) { \
+ const auto& histogramIndexes = DeclareSignatureBase(builder); \
+ if (!typesOnly) { \
+ builder.Implementation(new T##name(histogramIndexes, \
+ builder.GetSourcePosition())); \
+ } \
+ return true; \
+ } else { \
+ return false; \
+ } \
+ } \
+ TUnboxedValue GetResult( \
+ const THistogram& input, \
+ const TUnboxedValuePod* args) const override { \
+ TAdaptiveWardHistogram histo(input, input.FreqSize()); \
+ double result = histo.name(args[1].Get<double>()); \
+ return TUnboxedValuePod(result); \
+ } \
+ };
+
+#define DECLARE_TWO_DOUBLE_ARG_METHOD_UDF(name) \
+ class T##name: public THistogramMethodBase<true> { \
+ public: \
+ T##name(const THistogramIndexes& histogramIndexes, TSourcePosition pos) \
+ : THistogramMethodBase<true>(histogramIndexes, pos) { \
+ } \
+ static const TStringRef& Name() { \
+ static auto name = TStringRef::Of(#name); \
+ return name; \
+ } \
+ static bool DeclareSignature( \
+ const TStringRef& name, \
+ TType* userType, \
+ IFunctionTypeInfoBuilder& builder, \
+ bool typesOnly) { \
+ Y_UNUSED(userType); \
+ if (Name() == name) { \
+ const auto& histogramIndexes = DeclareSignatureBase(builder); \
+ if (!typesOnly) { \
+ builder.Implementation(new T##name(histogramIndexes, \
+ builder.GetSourcePosition())); \
+ } \
+ return true; \
+ } else { \
+ return false; \
+ } \
+ } \
+ TUnboxedValue GetResult( \
+ const THistogram& input, \
+ const TUnboxedValuePod* args) const override { \
+ TAdaptiveWardHistogram histo(input, input.FreqSize()); \
+ double result = histo.name(args[1].Get<double>(), args[2].Get<double>()); \
+ return TUnboxedValuePod(result); \
+ } \
+ };
+
+#define DECLARE_HISTOGRAM_UDF(functionName, histogramName) \
+ THistogram_##functionName<T##histogramName##Histogram, histogramName##HistogramResourceName>,
+
+#define DECLARE_HISTOGRAM_UDFS(name) \
+ HISTOGRAM_FUNCTION_MAP(DECLARE_HISTOGRAM_UDF, name)
+
+ HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(DECLARE_ONE_DOUBLE_ARG_METHOD_UDF)
+ HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(DECLARE_TWO_DOUBLE_ARG_METHOD_UDF)
+
+ SIMPLE_MODULE(THistogramModule,
+ HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_UDFS)
+ HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF)
+ HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF)
+ DECLARE_HISTOGRAM_UDFS(Linear)
+ DECLARE_HISTOGRAM_UDFS(Logarithmic)
+ THistogramPrint,
+ THistogramNormalize,
+ THistogramToCumulativeDistributionFunction)
+}
+
+REGISTER_MODULES(THistogramModule)
diff --git a/yql/essentials/udfs/common/histogram/test/canondata/result.json b/yql/essentials/udfs/common/histogram/test/canondata/result.json
new file mode 100644
index 00000000000..06f9e726a92
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/canondata/result.json
@@ -0,0 +1,57 @@
+{
+ "test.test[Algorithms]": [
+ {
+ "uri": "file://test.test_Algorithms_/results.txt"
+ }
+ ],
+ "test.test[Basic]": [
+ {
+ "uri": "file://test.test_Basic_/results.txt"
+ }
+ ],
+ "test.test[Distinct]": [
+ {
+ "uri": "file://test.test_Distinct_/results.txt"
+ }
+ ],
+ "test.test[Intervals]": [
+ {
+ "uri": "file://test.test_Intervals_/results.txt"
+ }
+ ],
+ "test.test[Linear]": [
+ {
+ "uri": "file://test.test_Linear_/results.txt"
+ }
+ ],
+ "test.test[Logarithmic]": [
+ {
+ "uri": "file://test.test_Logarithmic_/results.txt"
+ }
+ ],
+ "test.test[Methods]": [
+ {
+ "uri": "file://test.test_Methods_/results.txt"
+ }
+ ],
+ "test.test[Normalize]": [
+ {
+ "uri": "file://test.test_Normalize_/results.txt"
+ }
+ ],
+ "test.test[Print]": [
+ {
+ "uri": "file://test.test_Print_/results.txt"
+ }
+ ],
+ "test.test[ToCumulativeDistributionFunction]": [
+ {
+ "uri": "file://test.test_ToCumulativeDistributionFunction_/results.txt"
+ }
+ ],
+ "test.test[Weights]": [
+ {
+ "uri": "file://test.test_Weights_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Algorithms_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Algorithms_/results.txt
new file mode 100644
index 00000000000..37e9d6a36a2
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Algorithms_/results.txt
@@ -0,0 +1,476 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "adaptive_distance";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "adaptive_weight";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "adaptive_ward";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "block_weight";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "block_ward";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "algo_equality_check";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "quality_equality_check";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ [
+ [
+ "8";
+ "2"
+ ];
+ [
+ "3";
+ "5"
+ ];
+ [
+ "5";
+ "7"
+ ]
+ ];
+ "AdaptiveDistance";
+ "7";
+ "0";
+ "16"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "6";
+ "2"
+ ];
+ [
+ "5";
+ "3.799999952316284"
+ ];
+ [
+ "5";
+ "7"
+ ]
+ ];
+ "AdaptiveWeight";
+ "7";
+ "0";
+ "16"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "2";
+ "0.5"
+ ];
+ [
+ "6";
+ "2.5"
+ ];
+ [
+ "8";
+ "6.25"
+ ]
+ ];
+ "AdaptiveWard";
+ "7";
+ "0";
+ "16"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "5";
+ "1.399999976158142"
+ ];
+ [
+ "6";
+ "4"
+ ];
+ [
+ "5";
+ "7"
+ ]
+ ];
+ "BlockWeight";
+ "7";
+ "0";
+ "16"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "8";
+ "2"
+ ];
+ [
+ "3";
+ "5"
+ ];
+ [
+ "5";
+ "7"
+ ]
+ ];
+ "BlockWard";
+ "7";
+ "0";
+ "16"
+ ]
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Basic_/results.txt
new file mode 100644
index 00000000000..75ce4f96b2a
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Basic_/results.txt
@@ -0,0 +1,338 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key_histogram";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "subkey_histogram";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "value_histogram";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ [
+ [
+ "1";
+ "0"
+ ];
+ [
+ "1";
+ "1"
+ ];
+ [
+ "3";
+ "2"
+ ];
+ [
+ "3";
+ "3"
+ ];
+ [
+ "1";
+ "4"
+ ];
+ [
+ "1";
+ "5"
+ ];
+ [
+ "1";
+ "6"
+ ];
+ [
+ "5";
+ "7"
+ ]
+ ];
+ "AdaptiveWard";
+ "7";
+ "0";
+ "16"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "1";
+ "0"
+ ];
+ [
+ "1";
+ "1"
+ ];
+ [
+ "3";
+ "2"
+ ];
+ [
+ "3";
+ "4"
+ ];
+ [
+ "1";
+ "8"
+ ];
+ [
+ "1";
+ "16"
+ ];
+ [
+ "1";
+ "32"
+ ];
+ [
+ "5";
+ "64"
+ ]
+ ];
+ "AdaptiveWard";
+ "64";
+ "0";
+ "16"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "1";
+ "-1"
+ ];
+ [
+ "1";
+ "1"
+ ];
+ [
+ "1";
+ "2"
+ ];
+ [
+ "1";
+ "8"
+ ];
+ [
+ "1";
+ "32"
+ ];
+ [
+ "1";
+ "128"
+ ];
+ [
+ "1";
+ "512"
+ ];
+ [
+ "9";
+ "2048"
+ ]
+ ];
+ "AdaptiveWard";
+ "2048";
+ "-1";
+ "16"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Distinct_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Distinct_/results.txt
new file mode 100644
index 00000000000..721c3a7f669
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Distinct_/results.txt
@@ -0,0 +1,139 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key_histogram";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "is_different";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ [
+ [
+ "1";
+ "0"
+ ];
+ [
+ "1";
+ "1"
+ ];
+ [
+ "1";
+ "2"
+ ];
+ [
+ "1";
+ "3"
+ ];
+ [
+ "1";
+ "4"
+ ];
+ [
+ "1";
+ "5"
+ ];
+ [
+ "1";
+ "6"
+ ];
+ [
+ "1";
+ "7"
+ ]
+ ];
+ "AdaptiveWard";
+ "7";
+ "0";
+ "8"
+ ]
+ ];
+ [
+ %true
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Intervals_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Intervals_/results.txt
new file mode 100644
index 00000000000..b19e1b600c5
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Intervals_/results.txt
@@ -0,0 +1,290 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key_histogram";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "subkey_histogram";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "value_histogram";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ [
+ [
+ "16";
+ "4.125"
+ ]
+ ];
+ "AdaptiveWard";
+ "4.124999999999999";
+ "4.124999999999999";
+ "16"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "9";
+ "3"
+ ];
+ [
+ "2";
+ "24"
+ ];
+ [
+ "5";
+ "64"
+ ]
+ ];
+ "AdaptiveWard";
+ "64";
+ "0";
+ "16"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "1";
+ "-1"
+ ];
+ [
+ "1";
+ "1"
+ ];
+ [
+ "1";
+ "2"
+ ];
+ [
+ "1";
+ "8"
+ ];
+ [
+ "1";
+ "32"
+ ];
+ [
+ "1";
+ "128"
+ ];
+ [
+ "1";
+ "512"
+ ];
+ [
+ "9";
+ "2048"
+ ]
+ ];
+ "AdaptiveWard";
+ "2048";
+ "-1";
+ "16"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Linear_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Linear_/results.txt
new file mode 100644
index 00000000000..4ec10ae8d39
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Linear_/results.txt
@@ -0,0 +1,330 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "default";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "linear_size";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "linear_min_max";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ [
+ [
+ "1";
+ "-2150"
+ ];
+ [
+ "1";
+ "-2050"
+ ];
+ [
+ "1";
+ "-50"
+ ];
+ [
+ "2";
+ "0"
+ ];
+ [
+ "1";
+ "10"
+ ];
+ [
+ "1";
+ "30"
+ ];
+ [
+ "1";
+ "90"
+ ];
+ [
+ "1";
+ "130"
+ ];
+ [
+ "1";
+ "510"
+ ];
+ [
+ "9";
+ "2050"
+ ]
+ ];
+ "Linear";
+ "2050";
+ "-2150";
+ "19"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "1";
+ "-2145"
+ ];
+ [
+ "1";
+ "-2046"
+ ];
+ [
+ "1";
+ "-33"
+ ];
+ [
+ "3";
+ "0"
+ ];
+ [
+ "1";
+ "33"
+ ];
+ [
+ "1";
+ "99"
+ ];
+ [
+ "1";
+ "132"
+ ];
+ [
+ "1";
+ "528"
+ ];
+ [
+ "9";
+ "2046"
+ ]
+ ];
+ "Linear";
+ "2046";
+ "-2145";
+ "19"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "9";
+ "100"
+ ];
+ [
+ "1";
+ "500"
+ ];
+ [
+ "9";
+ "1000"
+ ]
+ ];
+ "Linear";
+ "1000";
+ "100";
+ "19"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Logarithmic_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Logarithmic_/results.txt
new file mode 100644
index 00000000000..f465dcea214
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Logarithmic_/results.txt
@@ -0,0 +1,310 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "default";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "log_size";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "log_min_max";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ [
+ [
+ "1";
+ "1"
+ ];
+ [
+ "2";
+ "10"
+ ];
+ [
+ "3";
+ "100"
+ ];
+ [
+ "9";
+ "1000"
+ ]
+ ];
+ "Logarithmic";
+ "1000";
+ "1";
+ "15"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "1";
+ "2"
+ ];
+ [
+ "1";
+ "8"
+ ];
+ [
+ "1";
+ "32"
+ ];
+ [
+ "1";
+ "64"
+ ];
+ [
+ "1";
+ "128"
+ ];
+ [
+ "1";
+ "512"
+ ];
+ [
+ "9";
+ "2048"
+ ]
+ ];
+ "Logarithmic";
+ "2048";
+ "2";
+ "15"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "2";
+ "10"
+ ];
+ [
+ "1";
+ "32"
+ ];
+ [
+ "1";
+ "64"
+ ];
+ [
+ "1";
+ "128"
+ ];
+ [
+ "1";
+ "512"
+ ];
+ [
+ "9";
+ "2048"
+ ]
+ ];
+ "Logarithmic";
+ "2048";
+ "10";
+ "15"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Methods_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Methods_/results.txt
new file mode 100644
index 00000000000..0f0b131b12d
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Methods_/results.txt
@@ -0,0 +1,85 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "GetSumAboveBound";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "GetSumBelowBound";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "CalcUpperBound";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "CalcLowerBound";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "GetSumInRange";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "6.5"
+ ];
+ [
+ "9.5"
+ ];
+ [
+ "2.5"
+ ];
+ [
+ "6.166666666666667"
+ ];
+ [
+ "6.5"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Normalize_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Normalize_/results.txt
new file mode 100644
index 00000000000..c81b76c18a1
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Normalize_/results.txt
@@ -0,0 +1,338 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key_histogram";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "subkey_histogram";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "value_histogram";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ [
+ [
+ "6.25";
+ "0"
+ ];
+ [
+ "6.25";
+ "1"
+ ];
+ [
+ "18.75";
+ "2"
+ ];
+ [
+ "18.75";
+ "3"
+ ];
+ [
+ "6.25";
+ "4"
+ ];
+ [
+ "6.25";
+ "5"
+ ];
+ [
+ "6.25";
+ "6"
+ ];
+ [
+ "31.25";
+ "7"
+ ]
+ ];
+ "AdaptiveWard";
+ "7";
+ "0";
+ "100"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "0.0625";
+ "0"
+ ];
+ [
+ "0.0625";
+ "1"
+ ];
+ [
+ "0.1875";
+ "2"
+ ];
+ [
+ "0.1875";
+ "4"
+ ];
+ [
+ "0.0625";
+ "8"
+ ];
+ [
+ "0.0625";
+ "16"
+ ];
+ [
+ "0.0625";
+ "32"
+ ];
+ [
+ "0.3125";
+ "64"
+ ]
+ ];
+ "AdaptiveWard";
+ "64";
+ "0";
+ "1"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "-0.0625";
+ "-1"
+ ];
+ [
+ "-0.0625";
+ "1"
+ ];
+ [
+ "-0.0625";
+ "2"
+ ];
+ [
+ "-0.0625";
+ "8"
+ ];
+ [
+ "-0.0625";
+ "32"
+ ];
+ [
+ "-0.0625";
+ "128"
+ ];
+ [
+ "-0.0625";
+ "512"
+ ];
+ [
+ "-0.5625";
+ "2048"
+ ]
+ ];
+ "AdaptiveWard";
+ "2048";
+ "-1";
+ "-1"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Print_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Print_/results.txt
new file mode 100644
index 00000000000..bab67bf3e8c
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Print_/results.txt
@@ -0,0 +1,59 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key_histogram";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "subkey_histogram";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "value_histogram";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "Kind: AdaptiveWard Bins: 8 WeightsSum: 16.000 Min: 0.000 Max: 7.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 0.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 1.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 2.000 F: 3.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 3.000 F: 3.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 4.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 5.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 6.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88 P: 7.000 F: 5.000"
+ ];
+ [
+ "Kind: AdaptiveWard Bins: 8 WeightsSum: 16.000 Min: 0.000 Max: 64.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 0.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 1.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 2.000 F: 3.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 4.000 F: 3.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 8.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 16.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 32.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88 P: 64.000 F: 5.000"
+ ];
+ [
+ "Kind: AdaptiveWard Bins: 8 WeightsSum: 16.000 Min: -1.000 Max: 2048.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: -1.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 1.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 2.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 8.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 32.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 128.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 512.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88 P: 2048.000 F: 9.000"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_ToCumulativeDistributionFunction_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_ToCumulativeDistributionFunction_/results.txt
new file mode 100644
index 00000000000..9e2b2a96892
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_ToCumulativeDistributionFunction_/results.txt
@@ -0,0 +1,444 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key_histogram";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "subkey_histogram";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "value_histogram";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "subkey_norm_cdf_histogram";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ [
+ [
+ "1";
+ "0"
+ ];
+ [
+ "2";
+ "1"
+ ];
+ [
+ "5";
+ "2"
+ ];
+ [
+ "8";
+ "3"
+ ];
+ [
+ "9";
+ "4"
+ ];
+ [
+ "10";
+ "5"
+ ];
+ [
+ "11";
+ "6"
+ ];
+ [
+ "16";
+ "7"
+ ]
+ ];
+ "AdaptiveWardCdf";
+ "7";
+ "0";
+ "62"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "1";
+ "0"
+ ];
+ [
+ "2";
+ "1"
+ ];
+ [
+ "5";
+ "2"
+ ];
+ [
+ "8";
+ "4"
+ ];
+ [
+ "9";
+ "8"
+ ];
+ [
+ "10";
+ "16"
+ ];
+ [
+ "11";
+ "32"
+ ];
+ [
+ "16";
+ "64"
+ ]
+ ];
+ "AdaptiveWardCdf";
+ "64";
+ "0";
+ "62"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "1";
+ "-1"
+ ];
+ [
+ "2";
+ "1"
+ ];
+ [
+ "3";
+ "2"
+ ];
+ [
+ "4";
+ "8"
+ ];
+ [
+ "5";
+ "32"
+ ];
+ [
+ "6";
+ "128"
+ ];
+ [
+ "7";
+ "512"
+ ];
+ [
+ "16";
+ "2048"
+ ]
+ ];
+ "AdaptiveWardCdf";
+ "2048";
+ "-1";
+ "44"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "6.25";
+ "0"
+ ];
+ [
+ "12.5";
+ "1"
+ ];
+ [
+ "31.25";
+ "2"
+ ];
+ [
+ "50";
+ "4"
+ ];
+ [
+ "56.25";
+ "8"
+ ];
+ [
+ "62.5";
+ "16"
+ ];
+ [
+ "68.75";
+ "32"
+ ];
+ [
+ "100";
+ "64"
+ ]
+ ];
+ "AdaptiveWardCdfCdf";
+ "64";
+ "0";
+ "387.5"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Weights_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Weights_/results.txt
new file mode 100644
index 00000000000..6b9aac15ec2
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Weights_/results.txt
@@ -0,0 +1,221 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "basic_weight";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "weight_and_bins";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "Bins";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Position";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "Kind";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "Max";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "Min";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "WeightsSum";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "equality_check";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ [
+ [
+ "1";
+ "1"
+ ];
+ [
+ "2049";
+ "2"
+ ];
+ [
+ "1026";
+ "3"
+ ];
+ [
+ "4";
+ "4"
+ ];
+ [
+ "8";
+ "5"
+ ];
+ [
+ "16";
+ "6"
+ ];
+ [
+ "160";
+ "7"
+ ]
+ ];
+ "AdaptiveWard";
+ "7";
+ "1";
+ "3264"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "8192";
+ "2.5"
+ ];
+ [
+ "683";
+ "5.664714336395264"
+ ];
+ [
+ "10240";
+ "7"
+ ]
+ ];
+ "AdaptiveWard";
+ "7";
+ "1";
+ "19115"
+ ]
+ ];
+ [
+ %true
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/histogram/test/cases/Algorithms.sql b/yql/essentials/udfs/common/histogram/test/cases/Algorithms.sql
new file mode 100644
index 00000000000..02b2bf65fc6
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/Algorithms.sql
@@ -0,0 +1,16 @@
+/* syntax version 1 */
+SELECT
+ ADAPTIVE_DISTANCE_HISTOGRAM(key, 3) AS adaptive_distance,
+ ADAPTIVE_WEIGHT_HISTOGRAM(key, 3) AS adaptive_weight,
+ ADAPTIVE_WARD_HISTOGRAM(key, 3) AS adaptive_ward,
+ BLOCK_WEIGHT_HISTOGRAM(key, 3) AS block_weight,
+ BLOCK_WARD_HISTOGRAM(key, 3) AS block_ward,
+ Histogram::Print(ADAPTIVE_WEIGHT_HISTOGRAM(key, 3)) <> Histogram::Print(BLOCK_WEIGHT_HISTOGRAM(key, 3)) AS algo_equality_check,
+ Histogram::Print(ADAPTIVE_WEIGHT_HISTOGRAM(key, 3)) <> Histogram::Print(ADAPTIVE_WARD_HISTOGRAM(key, 3)) AS quality_equality_check
+FROM (
+ SELECT
+ CAST(key AS Double) AS key,
+ CAST(subkey AS Double) AS subkey,
+ CAST(value AS Double) AS value
+ FROM Input
+);
diff --git a/yql/essentials/udfs/common/histogram/test/cases/Basic.sql b/yql/essentials/udfs/common/histogram/test/cases/Basic.sql
new file mode 100644
index 00000000000..2e080bd2ba7
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/Basic.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+SELECT
+ HISTOGRAM(key) AS key_histogram,
+ HISTOGRAM(subkey) AS subkey_histogram,
+ HISTOGRAM(value) AS value_histogram
+FROM (
+ SELECT
+ CAST(key AS Double) AS key,
+ CAST(subkey AS Double) AS subkey,
+ CAST(value AS Double) AS value
+ FROM Input
+);
diff --git a/yql/essentials/udfs/common/histogram/test/cases/Distinct.sql b/yql/essentials/udfs/common/histogram/test/cases/Distinct.sql
new file mode 100644
index 00000000000..347ffe76f26
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/Distinct.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+SELECT
+ HISTOGRAM(DISTINCT key) AS key_histogram,
+ Histogram::Print(HISTOGRAM(key)) <> Histogram::Print(HISTOGRAM(DISTINCT key)) AS is_different
+FROM (
+ SELECT
+ CAST(key AS Double) AS key
+ FROM Input
+);
diff --git a/yql/essentials/udfs/common/histogram/test/cases/Intervals.sql b/yql/essentials/udfs/common/histogram/test/cases/Intervals.sql
new file mode 100644
index 00000000000..eaceab05780
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/Intervals.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+SELECT
+ HISTOGRAM(key, 1) AS key_histogram,
+ HISTOGRAM(subkey, 3) AS subkey_histogram,
+ HISTOGRAM(value, 1000000) AS value_histogram
+FROM (
+ SELECT
+ CAST(key AS Double) AS key,
+ CAST(subkey AS Double) AS subkey,
+ CAST(value AS Double) AS value
+ FROM Input
+);
diff --git a/yql/essentials/udfs/common/histogram/test/cases/Linear.in b/yql/essentials/udfs/common/histogram/test/cases/Linear.in
new file mode 100644
index 00000000000..22a04e936f1
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/Linear.in
@@ -0,0 +1,19 @@
+{"key"="0";"subkey"="0";"value"="-3"};
+{"key"="0";"subkey"="0";"value"="-49"};
+{"key"="2";"subkey"="2";"value"="2"};
+{"key"="3";"subkey"="4";"value"="8"};
+{"key"="4";"subkey"="8";"value"="32"};
+{"key"="5";"subkey"="16";"value"="88"};
+{"key"="5";"subkey"="16";"value"="128"};
+{"key"="6";"subkey"="32";"value"="512"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="2";"subkey"="2";"value"="2048"};
+{"key"="3";"subkey"="4";"value"="2048"};
+{"key"="2";"subkey"="2";"value"="2048"};
+{"key"="3";"subkey"="4";"value"="2048"};
+{"key"="0";"subkey"="0";"value"="-2049"};
+{"key"="0";"subkey"="0";"value"="-2149"};
diff --git a/yql/essentials/udfs/common/histogram/test/cases/Linear.sql b/yql/essentials/udfs/common/histogram/test/cases/Linear.sql
new file mode 100644
index 00000000000..08af7fc3019
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/Linear.sql
@@ -0,0 +1,10 @@
+/* syntax version 1 */
+SELECT
+ LinearHistogram(value) AS default,
+ LinearHistogram(value, 33) AS linear_size,
+ LinearHistogram(value, 100, 100, 1000) AS linear_min_max
+FROM (
+ SELECT
+ CAST(value AS Double) AS value
+ FROM Input
+);
diff --git a/yql/essentials/udfs/common/histogram/test/cases/Logarithmic.in b/yql/essentials/udfs/common/histogram/test/cases/Logarithmic.in
new file mode 100644
index 00000000000..22a04e936f1
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/Logarithmic.in
@@ -0,0 +1,19 @@
+{"key"="0";"subkey"="0";"value"="-3"};
+{"key"="0";"subkey"="0";"value"="-49"};
+{"key"="2";"subkey"="2";"value"="2"};
+{"key"="3";"subkey"="4";"value"="8"};
+{"key"="4";"subkey"="8";"value"="32"};
+{"key"="5";"subkey"="16";"value"="88"};
+{"key"="5";"subkey"="16";"value"="128"};
+{"key"="6";"subkey"="32";"value"="512"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="2";"subkey"="2";"value"="2048"};
+{"key"="3";"subkey"="4";"value"="2048"};
+{"key"="2";"subkey"="2";"value"="2048"};
+{"key"="3";"subkey"="4";"value"="2048"};
+{"key"="0";"subkey"="0";"value"="-2049"};
+{"key"="0";"subkey"="0";"value"="-2149"};
diff --git a/yql/essentials/udfs/common/histogram/test/cases/Logarithmic.sql b/yql/essentials/udfs/common/histogram/test/cases/Logarithmic.sql
new file mode 100644
index 00000000000..216488e91ec
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/Logarithmic.sql
@@ -0,0 +1,10 @@
+/* syntax version 1 */
+SELECT
+ LogarithmicHistogram(value) AS default,
+ LogHistogram(value, 2) AS log_size,
+ LogHistogram(value, 0.5, 10, 10000) AS log_min_max
+FROM (
+ SELECT
+ CAST(value AS Double) AS value
+ FROM Input
+);
diff --git a/yql/essentials/udfs/common/histogram/test/cases/Methods.sql b/yql/essentials/udfs/common/histogram/test/cases/Methods.sql
new file mode 100644
index 00000000000..8c351f80708
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/Methods.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+SELECT
+ Histogram::GetSumAboveBound(histo, 5.0) AS GetSumAboveBound,
+ Histogram::GetSumBelowBound(histo, 5.0) AS GetSumBelowBound,
+ Histogram::CalcUpperBound(histo, 5.0) AS CalcUpperBound,
+ Histogram::CalcLowerBound(histo, 5.0) AS CalcLowerBound,
+ Histogram::GetSumInRange(histo, 5.0, 20.0) AS GetSumInRange
+FROM (
+ SELECT
+ HISTOGRAM(CAST(key AS Double)) AS histo
+ FROM Input
+);
diff --git a/yql/essentials/udfs/common/histogram/test/cases/Normalize.sql b/yql/essentials/udfs/common/histogram/test/cases/Normalize.sql
new file mode 100644
index 00000000000..3dd4a12621a
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/Normalize.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+SELECT
+ Histogram::Normalize(HISTOGRAM(key)) AS key_histogram,
+ Histogram::Normalize(HISTOGRAM(subkey), 1.0) AS subkey_histogram,
+ Histogram::Normalize(HISTOGRAM(value), -1.0) AS value_histogram
+FROM (
+ SELECT
+ CAST(key AS Double) AS key,
+ CAST(subkey AS Double) AS subkey,
+ CAST(value AS Double) AS value
+ FROM Input
+);
diff --git a/yql/essentials/udfs/common/histogram/test/cases/Print.sql b/yql/essentials/udfs/common/histogram/test/cases/Print.sql
new file mode 100644
index 00000000000..5434e8dea31
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/Print.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+SELECT
+ Histogram::Print(HISTOGRAM(key)) AS key_histogram,
+ Histogram::Print(HISTOGRAM(subkey)) AS subkey_histogram,
+ Histogram::Print(HISTOGRAM(value), 50) AS value_histogram
+FROM (
+ SELECT
+ CAST(key AS Double) AS key,
+ CAST(subkey AS Double) AS subkey,
+ CAST(value AS Double) AS value
+ FROM Input
+);
diff --git a/yql/essentials/udfs/common/histogram/test/cases/ToCumulativeDistributionFunction.sql b/yql/essentials/udfs/common/histogram/test/cases/ToCumulativeDistributionFunction.sql
new file mode 100644
index 00000000000..e71db5c68f8
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/ToCumulativeDistributionFunction.sql
@@ -0,0 +1,13 @@
+/* syntax version 1 */
+SELECT
+ Histogram::ToCumulativeDistributionFunction(HISTOGRAM(key)) AS key_histogram,
+ Histogram::ToCumulativeDistributionFunction(HISTOGRAM(subkey)) AS subkey_histogram,
+ Histogram::ToCumulativeDistributionFunction(HISTOGRAM(value)) AS value_histogram,
+ Histogram::Normalize(Histogram::ToCumulativeDistributionFunction(HISTOGRAM(subkey)), 100, True) AS subkey_norm_cdf_histogram
+FROM (
+ SELECT
+ CAST(key AS Double) AS key,
+ CAST(subkey AS Double) AS subkey,
+ CAST(value AS Double) AS value
+ FROM Input
+);
diff --git a/yql/essentials/udfs/common/histogram/test/cases/Weights.in b/yql/essentials/udfs/common/histogram/test/cases/Weights.in
new file mode 100644
index 00000000000..0f50051d065
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/Weights.in
@@ -0,0 +1,16 @@
+{"key"="0";"subkey"="1000";"value"="-1"};
+{"key"="1";"subkey"="1";"value"="1"};
+{"key"="2";"subkey"="2";"value"="2"};
+{"key"="3";"subkey"="4";"value"="8"};
+{"key"="4";"subkey"="8";"value"="32"};
+{"key"="5";"subkey"="16";"value"="128"};
+{"key"="6";"subkey"="32";"value"="512"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="2";"subkey"="2";"value"="2048"};
+{"key"="3";"subkey"="4";"value"="2048"};
+{"key"="2";"subkey"="2";"value"="2048"};
+{"key"="3";"subkey"="4";"value"="2048"};
diff --git a/yql/essentials/udfs/common/histogram/test/cases/Weights.sql b/yql/essentials/udfs/common/histogram/test/cases/Weights.sql
new file mode 100644
index 00000000000..77229008f6e
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/Weights.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+SELECT
+ HISTOGRAM(key, value / subkey) AS basic_weight,
+ HISTOGRAM(key, value, 3) AS weight_and_bins,
+ Histogram::Print(HISTOGRAM(key)) <> Histogram::Print(HISTOGRAM(key, value)) AS equality_check
+FROM (
+ SELECT
+ CAST(key AS Double) AS key,
+ COALESCE(CAST(subkey AS Double), 1.0) AS subkey,
+ COALESCE(CAST(value AS Double), 1.0) AS value
+ FROM Input
+);
diff --git a/yql/essentials/udfs/common/histogram/test/cases/default.in b/yql/essentials/udfs/common/histogram/test/cases/default.in
new file mode 100644
index 00000000000..494ea1402e8
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/cases/default.in
@@ -0,0 +1,16 @@
+{"key"="0";"subkey"="0";"value"="-1"};
+{"key"="1";"subkey"="1";"value"="1"};
+{"key"="2";"subkey"="2";"value"="2"};
+{"key"="3";"subkey"="4";"value"="8"};
+{"key"="4";"subkey"="8";"value"="32"};
+{"key"="5";"subkey"="16";"value"="128"};
+{"key"="6";"subkey"="32";"value"="512"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="7";"subkey"="64";"value"="2048"};
+{"key"="2";"subkey"="2";"value"="2048"};
+{"key"="3";"subkey"="4";"value"="2048"};
+{"key"="2";"subkey"="2";"value"="2048"};
+{"key"="3";"subkey"="4";"value"="2048"};
diff --git a/yql/essentials/udfs/common/histogram/test/ya.make b/yql/essentials/udfs/common/histogram/test/ya.make
new file mode 100644
index 00000000000..e03673fbdfe
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+TIMEOUT(600)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+DEPENDS(yql/essentials/udfs/common/histogram)
+
+END()
diff --git a/yql/essentials/udfs/common/histogram/ya.make b/yql/essentials/udfs/common/histogram/ya.make
new file mode 100644
index 00000000000..937a3c68356
--- /dev/null
+++ b/yql/essentials/udfs/common/histogram/ya.make
@@ -0,0 +1,32 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+
+ FROM_SANDBOX(
+ FILE 7319896927 OUT_NOAUTO libhistogram_udf.so
+ )
+
+ END()
+ELSE()
+YQL_UDF_CONTRIB(histogram_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+ )
+
+ SRCS(
+ histogram_udf.cpp
+ )
+
+ PEERDIR(
+ library/cpp/histogram/adaptive
+ )
+
+ END()
+
+ENDIF()
+
+RECURSE_FOR_TESTS(
+ test
+) \ No newline at end of file
diff --git a/yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp b/yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp
new file mode 100644
index 00000000000..348fd69b9dc
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp
@@ -0,0 +1,423 @@
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+#include <library/cpp/hyperloglog/hyperloglog.h>
+
+#include <util/generic/hash_set.h>
+
+#include <variant>
+
+using namespace NKikimr;
+using namespace NUdf;
+
+namespace {
+ class THybridHyperLogLog {
+ private:
+ using THybridSet = THashSet<ui64, std::hash<ui64>, std::equal_to<ui64>, TStdAllocatorForUdf<ui64>>;
+ using THybridHll = THyperLogLogWithAlloc<TStdAllocatorForUdf<ui8>>;
+
+ explicit THybridHyperLogLog(unsigned precision)
+ : Var(THybridSet()), SizeLimit((1u << precision) / 8), Precision(precision)
+ { }
+
+ THybridHll ConvertToHyperLogLog() const {
+ auto res = THybridHll::Create(Precision);
+ for (auto& el : GetSetRef()) {
+ res.Update(el);
+ }
+ return res;
+ }
+
+ bool IsSet() const {
+ return Var.index() == 1;
+ }
+
+ const THybridSet& GetSetRef() const {
+ return std::get<1>(Var);
+ }
+
+ THybridSet& GetMutableSetRef() {
+ return std::get<1>(Var);
+ }
+
+ const THybridHll& GetHllRef() const {
+ return std::get<0>(Var);
+ }
+
+ THybridHll& GetMutableHllRef() {
+ return std::get<0>(Var);
+ }
+
+ public:
+ THybridHyperLogLog (THybridHyperLogLog&&) = default;
+
+ THybridHyperLogLog& operator=(THybridHyperLogLog&&) = default;
+
+ void Update(ui64 hash) {
+ if (IsSet()) {
+ GetMutableSetRef().insert(hash);
+ if (GetSetRef().size() >= SizeLimit) {
+ Var = ConvertToHyperLogLog();
+ }
+ } else {
+ GetMutableHllRef().Update(hash);
+ }
+ }
+
+ void Merge(const THybridHyperLogLog& rh) {
+ if (IsSet() && rh.IsSet()) {
+ GetMutableSetRef().insert(rh.GetSetRef().begin(), rh.GetSetRef().end());
+ if (GetSetRef().size() >= SizeLimit) {
+ Var = ConvertToHyperLogLog();
+ }
+ } else {
+ if (IsSet()) {
+ Var = ConvertToHyperLogLog();
+ }
+ if (rh.IsSet()) {
+ GetMutableHllRef().Merge(rh.ConvertToHyperLogLog());
+ } else {
+ GetMutableHllRef().Merge(rh.GetHllRef());
+ }
+ }
+ }
+
+ void Save(IOutputStream& out) const {
+ out.Write(static_cast<char>(Var.index()));
+ out.Write(static_cast<char>(Precision));
+ if (IsSet()) {
+ ::Save(&out, GetSetRef());
+ } else {
+ GetHllRef().Save(out);
+ }
+ }
+
+ ui64 Estimate() const {
+ if (IsSet()) {
+ return GetSetRef().size();
+ }
+ return GetHllRef().Estimate();
+ }
+
+ static THybridHyperLogLog Create(unsigned precision) {
+ Y_ENSURE(precision >= THyperLogLog::PRECISION_MIN && precision <= THyperLogLog::PRECISION_MAX);
+ return THybridHyperLogLog(precision);
+ }
+
+ static THybridHyperLogLog Load(IInputStream& in) {
+ char type;
+ Y_ENSURE(in.ReadChar(type));
+ char precision;
+ Y_ENSURE(in.ReadChar(precision));
+ auto res = Create(precision);
+ if (type) {
+ ::Load(&in, res.GetMutableSetRef());
+ } else {
+ res.Var = THybridHll::Load(in);
+ }
+ return res;
+ }
+
+ private:
+ std::variant<THybridHll, THybridSet> Var;
+
+ size_t SizeLimit;
+
+ unsigned Precision;
+ };
+
+ extern const char HyperLogLogResourceName[] = "HyperLogLog.State";
+
+ using THyperLogLogResource = TBoxedResource<THybridHyperLogLog, HyperLogLogResourceName>;
+
+ class THyperLogLog_Create: public TBoxedValue {
+ public:
+ THyperLogLog_Create(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ static const TStringRef& Name() {
+ static auto nameRef = TStringRef::Of("Create");
+ return nameRef;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder*,
+ const TUnboxedValuePod* args) const override {
+ try {
+ THolder<THyperLogLogResource> hll(new THyperLogLogResource(THybridHyperLogLog::Create(args[1].Get<ui32>())));
+ hll->Get()->Update(args[0].Get<ui64>());
+ return TUnboxedValuePod(hll.Release());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<HyperLogLogResourceName>(ui64, ui32)>();
+ if (!typesOnly) {
+ builder.Implementation(new THyperLogLog_Create(builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ class THyperLogLog_AddValue: public TBoxedValue {
+ public:
+ THyperLogLog_AddValue(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ static const TStringRef& Name() {
+ static auto nameRef = TStringRef::Of("AddValue");
+ return nameRef;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ try {
+ Y_UNUSED(valueBuilder);
+ THyperLogLogResource* resource = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get());
+ resource->Get()->Update(args[1].Get<ui64>());
+ return TUnboxedValuePod(args[0]);
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<HyperLogLogResourceName>(TResource<HyperLogLogResourceName>, ui64)>();
+ if (!typesOnly) {
+ builder.Implementation(new THyperLogLog_AddValue(builder.GetSourcePosition()));
+ }
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ class THyperLogLog_Serialize: public TBoxedValue {
+ public:
+ THyperLogLog_Serialize(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ public:
+ static const TStringRef& Name() {
+ static auto nameRef = TStringRef::Of("Serialize");
+ return nameRef;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ try {
+ TStringStream result;
+ static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get()->Save(result);
+ return valueBuilder->NewString(result.Str());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<char*(TResource<HyperLogLogResourceName>)>();
+ if (!typesOnly) {
+ builder.Implementation(new THyperLogLog_Serialize(builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ class THyperLogLog_Deserialize: public TBoxedValue {
+ public:
+ THyperLogLog_Deserialize(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ static const TStringRef& Name() {
+ static auto nameRef = TStringRef::Of("Deserialize");
+ return nameRef;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ try {
+ Y_UNUSED(valueBuilder);
+ const TString arg(args[0].AsStringRef());
+ TStringInput input(arg);
+ THolder<THyperLogLogResource> hll(new THyperLogLogResource(THybridHyperLogLog::Load(input)));
+ return TUnboxedValuePod(hll.Release());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<HyperLogLogResourceName>(char*)>();
+ if (!typesOnly) {
+ builder.Implementation(new THyperLogLog_Deserialize(builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ class THyperLogLog_Merge: public TBoxedValue {
+ public:
+ THyperLogLog_Merge(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ static const TStringRef& Name() {
+ static auto nameRef = TStringRef::Of("Merge");
+ return nameRef;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ try {
+ Y_UNUSED(valueBuilder);
+ auto left = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get();
+ static_cast<THyperLogLogResource*>(args[1].AsBoxed().Get())->Get()->Merge(*left);
+ return TUnboxedValuePod(args[1]);
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<HyperLogLogResourceName>(TResource<HyperLogLogResourceName>, TResource<HyperLogLogResourceName>)>();
+ if (!typesOnly) {
+ builder.Implementation(new THyperLogLog_Merge(builder.GetSourcePosition()));
+ }
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ class THyperLogLog_GetResult: public TBoxedValue {
+ public:
+ THyperLogLog_GetResult(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ static const TStringRef& Name() {
+ static auto nameRef = TStringRef::Of("GetResult");
+ return nameRef;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ Y_UNUSED(valueBuilder);
+ auto hll = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get();
+ return TUnboxedValuePod(hll->Estimate());
+ }
+
+ public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ auto resource = builder.Resource(HyperLogLogResourceName);
+ builder.Args()->Add(resource).Done().Returns<ui64>();
+
+ if (!typesOnly) {
+ builder.Implementation(new THyperLogLog_GetResult(builder.GetSourcePosition()));
+ }
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ SIMPLE_MODULE(THyperLogLogModule,
+ THyperLogLog_Create,
+ THyperLogLog_AddValue,
+ THyperLogLog_Serialize,
+ THyperLogLog_Deserialize,
+ THyperLogLog_Merge,
+ THyperLogLog_GetResult)
+}
+
+REGISTER_MODULES(THyperLogLogModule)
diff --git a/yql/essentials/udfs/common/hyperloglog/test/canondata/result.json b/yql/essentials/udfs/common/hyperloglog/test/canondata/result.json
new file mode 100644
index 00000000000..fb6112fc5bc
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperloglog/test/canondata/result.json
@@ -0,0 +1,7 @@
+{
+ "test.test[Basic]": [
+ {
+ "uri": "file://test.test_Basic_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/hyperloglog/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/hyperloglog/test/canondata/test.test_Basic_/results.txt
new file mode 100644
index 00000000000..8a7a259e2de
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperloglog/test/canondata/test.test_Basic_/results.txt
@@ -0,0 +1,59 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "str";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "double";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "int";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "5972"
+ ];
+ [
+ "1200"
+ ];
+ [
+ "5988"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/hyperloglog/test/cases/Basic.in b/yql/essentials/udfs/common/hyperloglog/test/cases/Basic.in
new file mode 100644
index 00000000000..d212651343d
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperloglog/test/cases/Basic.in
@@ -0,0 +1,6000 @@
+{"key"="-1000";"subkey"="-200";"value"="-11512"};
+{"key"="-999";"subkey"="-200";"value"="-11501"};
+{"key"="-998";"subkey"="-200";"value"="-11489"};
+{"key"="-997";"subkey"="-200";"value"="-11478"};
+{"key"="-996";"subkey"="-200";"value"="-11466"};
+{"key"="-995";"subkey"="-199";"value"="-11455"};
+{"key"="-994";"subkey"="-199";"value"="-11443"};
+{"key"="-993";"subkey"="-199";"value"="-11432"};
+{"key"="-992";"subkey"="-199";"value"="-11420"};
+{"key"="-991";"subkey"="-199";"value"="-11409"};
+{"key"="-990";"subkey"="-198";"value"="-11397"};
+{"key"="-989";"subkey"="-198";"value"="-11386"};
+{"key"="-988";"subkey"="-198";"value"="-11374"};
+{"key"="-987";"subkey"="-198";"value"="-11363"};
+{"key"="-986";"subkey"="-198";"value"="-11351"};
+{"key"="-985";"subkey"="-197";"value"="-11340"};
+{"key"="-984";"subkey"="-197";"value"="-11328"};
+{"key"="-983";"subkey"="-197";"value"="-11317"};
+{"key"="-982";"subkey"="-197";"value"="-11305"};
+{"key"="-981";"subkey"="-197";"value"="-11294"};
+{"key"="-980";"subkey"="-196";"value"="-11282"};
+{"key"="-979";"subkey"="-196";"value"="-11271"};
+{"key"="-978";"subkey"="-196";"value"="-11259"};
+{"key"="-977";"subkey"="-196";"value"="-11248"};
+{"key"="-976";"subkey"="-196";"value"="-11236"};
+{"key"="-975";"subkey"="-195";"value"="-11225"};
+{"key"="-974";"subkey"="-195";"value"="-11213"};
+{"key"="-973";"subkey"="-195";"value"="-11202"};
+{"key"="-972";"subkey"="-195";"value"="-11190"};
+{"key"="-971";"subkey"="-195";"value"="-11179"};
+{"key"="-970";"subkey"="-194";"value"="-11167"};
+{"key"="-969";"subkey"="-194";"value"="-11156"};
+{"key"="-968";"subkey"="-194";"value"="-11144"};
+{"key"="-967";"subkey"="-194";"value"="-11132"};
+{"key"="-966";"subkey"="-194";"value"="-11121"};
+{"key"="-965";"subkey"="-193";"value"="-11109"};
+{"key"="-964";"subkey"="-193";"value"="-11098"};
+{"key"="-963";"subkey"="-193";"value"="-11086"};
+{"key"="-962";"subkey"="-193";"value"="-11075"};
+{"key"="-961";"subkey"="-193";"value"="-11063"};
+{"key"="-960";"subkey"="-192";"value"="-11052"};
+{"key"="-959";"subkey"="-192";"value"="-11040"};
+{"key"="-958";"subkey"="-192";"value"="-11029"};
+{"key"="-957";"subkey"="-192";"value"="-11017"};
+{"key"="-956";"subkey"="-192";"value"="-11006"};
+{"key"="-955";"subkey"="-191";"value"="-10994"};
+{"key"="-954";"subkey"="-191";"value"="-10983"};
+{"key"="-953";"subkey"="-191";"value"="-10971"};
+{"key"="-952";"subkey"="-191";"value"="-10960"};
+{"key"="-951";"subkey"="-191";"value"="-10948"};
+{"key"="-950";"subkey"="-190";"value"="-10937"};
+{"key"="-949";"subkey"="-190";"value"="-10925"};
+{"key"="-948";"subkey"="-190";"value"="-10914"};
+{"key"="-947";"subkey"="-190";"value"="-10902"};
+{"key"="-946";"subkey"="-190";"value"="-10891"};
+{"key"="-945";"subkey"="-189";"value"="-10879"};
+{"key"="-944";"subkey"="-189";"value"="-10868"};
+{"key"="-943";"subkey"="-189";"value"="-10856"};
+{"key"="-942";"subkey"="-189";"value"="-10845"};
+{"key"="-941";"subkey"="-189";"value"="-10833"};
+{"key"="-940";"subkey"="-188";"value"="-10822"};
+{"key"="-939";"subkey"="-188";"value"="-10810"};
+{"key"="-938";"subkey"="-188";"value"="-10799"};
+{"key"="-937";"subkey"="-188";"value"="-10787"};
+{"key"="-936";"subkey"="-188";"value"="-10776"};
+{"key"="-935";"subkey"="-187";"value"="-10764"};
+{"key"="-934";"subkey"="-187";"value"="-10753"};
+{"key"="-933";"subkey"="-187";"value"="-10741"};
+{"key"="-932";"subkey"="-187";"value"="-10730"};
+{"key"="-931";"subkey"="-187";"value"="-10718"};
+{"key"="-930";"subkey"="-186";"value"="-10707"};
+{"key"="-929";"subkey"="-186";"value"="-10695"};
+{"key"="-928";"subkey"="-186";"value"="-10683"};
+{"key"="-927";"subkey"="-186";"value"="-10672"};
+{"key"="-926";"subkey"="-186";"value"="-10660"};
+{"key"="-925";"subkey"="-185";"value"="-10649"};
+{"key"="-924";"subkey"="-185";"value"="-10637"};
+{"key"="-923";"subkey"="-185";"value"="-10626"};
+{"key"="-922";"subkey"="-185";"value"="-10614"};
+{"key"="-921";"subkey"="-185";"value"="-10603"};
+{"key"="-920";"subkey"="-184";"value"="-10591"};
+{"key"="-919";"subkey"="-184";"value"="-10580"};
+{"key"="-918";"subkey"="-184";"value"="-10568"};
+{"key"="-917";"subkey"="-184";"value"="-10557"};
+{"key"="-916";"subkey"="-184";"value"="-10545"};
+{"key"="-915";"subkey"="-183";"value"="-10534"};
+{"key"="-914";"subkey"="-183";"value"="-10522"};
+{"key"="-913";"subkey"="-183";"value"="-10511"};
+{"key"="-912";"subkey"="-183";"value"="-10499"};
+{"key"="-911";"subkey"="-183";"value"="-10488"};
+{"key"="-910";"subkey"="-182";"value"="-10476"};
+{"key"="-909";"subkey"="-182";"value"="-10465"};
+{"key"="-908";"subkey"="-182";"value"="-10453"};
+{"key"="-907";"subkey"="-182";"value"="-10442"};
+{"key"="-906";"subkey"="-182";"value"="-10430"};
+{"key"="-905";"subkey"="-181";"value"="-10419"};
+{"key"="-904";"subkey"="-181";"value"="-10407"};
+{"key"="-903";"subkey"="-181";"value"="-10396"};
+{"key"="-902";"subkey"="-181";"value"="-10384"};
+{"key"="-901";"subkey"="-181";"value"="-10373"};
+{"key"="-900";"subkey"="-180";"value"="-10361"};
+{"key"="-899";"subkey"="-180";"value"="-10350"};
+{"key"="-898";"subkey"="-180";"value"="-10338"};
+{"key"="-897";"subkey"="-180";"value"="-10327"};
+{"key"="-896";"subkey"="-180";"value"="-10315"};
+{"key"="-895";"subkey"="-179";"value"="-10304"};
+{"key"="-894";"subkey"="-179";"value"="-10292"};
+{"key"="-893";"subkey"="-179";"value"="-10281"};
+{"key"="-892";"subkey"="-179";"value"="-10269"};
+{"key"="-891";"subkey"="-179";"value"="-10258"};
+{"key"="-890";"subkey"="-178";"value"="-10246"};
+{"key"="-889";"subkey"="-178";"value"="-10234"};
+{"key"="-888";"subkey"="-178";"value"="-10223"};
+{"key"="-887";"subkey"="-178";"value"="-10211"};
+{"key"="-886";"subkey"="-178";"value"="-10200"};
+{"key"="-885";"subkey"="-177";"value"="-10188"};
+{"key"="-884";"subkey"="-177";"value"="-10177"};
+{"key"="-883";"subkey"="-177";"value"="-10165"};
+{"key"="-882";"subkey"="-177";"value"="-10154"};
+{"key"="-881";"subkey"="-177";"value"="-10142"};
+{"key"="-880";"subkey"="-176";"value"="-10131"};
+{"key"="-879";"subkey"="-176";"value"="-10119"};
+{"key"="-878";"subkey"="-176";"value"="-10108"};
+{"key"="-877";"subkey"="-176";"value"="-10096"};
+{"key"="-876";"subkey"="-176";"value"="-10085"};
+{"key"="-875";"subkey"="-175";"value"="-10073"};
+{"key"="-874";"subkey"="-175";"value"="-10062"};
+{"key"="-873";"subkey"="-175";"value"="-10050"};
+{"key"="-872";"subkey"="-175";"value"="-10039"};
+{"key"="-871";"subkey"="-175";"value"="-10027"};
+{"key"="-870";"subkey"="-174";"value"="-10016"};
+{"key"="-869";"subkey"="-174";"value"="-10004"};
+{"key"="-868";"subkey"="-174";"value"="-9993"};
+{"key"="-867";"subkey"="-174";"value"="-9981"};
+{"key"="-866";"subkey"="-174";"value"="-9970"};
+{"key"="-865";"subkey"="-173";"value"="-9958"};
+{"key"="-864";"subkey"="-173";"value"="-9947"};
+{"key"="-863";"subkey"="-173";"value"="-9935"};
+{"key"="-862";"subkey"="-173";"value"="-9924"};
+{"key"="-861";"subkey"="-173";"value"="-9912"};
+{"key"="-860";"subkey"="-172";"value"="-9901"};
+{"key"="-859";"subkey"="-172";"value"="-9889"};
+{"key"="-858";"subkey"="-172";"value"="-9878"};
+{"key"="-857";"subkey"="-172";"value"="-9866"};
+{"key"="-856";"subkey"="-172";"value"="-9855"};
+{"key"="-855";"subkey"="-171";"value"="-9843"};
+{"key"="-854";"subkey"="-171";"value"="-9832"};
+{"key"="-853";"subkey"="-171";"value"="-9820"};
+{"key"="-852";"subkey"="-171";"value"="-9809"};
+{"key"="-851";"subkey"="-171";"value"="-9797"};
+{"key"="-850";"subkey"="-170";"value"="-9785"};
+{"key"="-849";"subkey"="-170";"value"="-9774"};
+{"key"="-848";"subkey"="-170";"value"="-9762"};
+{"key"="-847";"subkey"="-170";"value"="-9751"};
+{"key"="-846";"subkey"="-170";"value"="-9739"};
+{"key"="-845";"subkey"="-169";"value"="-9728"};
+{"key"="-844";"subkey"="-169";"value"="-9716"};
+{"key"="-843";"subkey"="-169";"value"="-9705"};
+{"key"="-842";"subkey"="-169";"value"="-9693"};
+{"key"="-841";"subkey"="-169";"value"="-9682"};
+{"key"="-840";"subkey"="-168";"value"="-9670"};
+{"key"="-839";"subkey"="-168";"value"="-9659"};
+{"key"="-838";"subkey"="-168";"value"="-9647"};
+{"key"="-837";"subkey"="-168";"value"="-9636"};
+{"key"="-836";"subkey"="-168";"value"="-9624"};
+{"key"="-835";"subkey"="-167";"value"="-9613"};
+{"key"="-834";"subkey"="-167";"value"="-9601"};
+{"key"="-833";"subkey"="-167";"value"="-9590"};
+{"key"="-832";"subkey"="-167";"value"="-9578"};
+{"key"="-831";"subkey"="-167";"value"="-9567"};
+{"key"="-830";"subkey"="-166";"value"="-9555"};
+{"key"="-829";"subkey"="-166";"value"="-9544"};
+{"key"="-828";"subkey"="-166";"value"="-9532"};
+{"key"="-827";"subkey"="-166";"value"="-9521"};
+{"key"="-826";"subkey"="-166";"value"="-9509"};
+{"key"="-825";"subkey"="-165";"value"="-9498"};
+{"key"="-824";"subkey"="-165";"value"="-9486"};
+{"key"="-823";"subkey"="-165";"value"="-9475"};
+{"key"="-822";"subkey"="-165";"value"="-9463"};
+{"key"="-821";"subkey"="-165";"value"="-9452"};
+{"key"="-820";"subkey"="-164";"value"="-9440"};
+{"key"="-819";"subkey"="-164";"value"="-9429"};
+{"key"="-818";"subkey"="-164";"value"="-9417"};
+{"key"="-817";"subkey"="-164";"value"="-9406"};
+{"key"="-816";"subkey"="-164";"value"="-9394"};
+{"key"="-815";"subkey"="-163";"value"="-9383"};
+{"key"="-814";"subkey"="-163";"value"="-9371"};
+{"key"="-813";"subkey"="-163";"value"="-9360"};
+{"key"="-812";"subkey"="-163";"value"="-9348"};
+{"key"="-811";"subkey"="-163";"value"="-9336"};
+{"key"="-810";"subkey"="-162";"value"="-9325"};
+{"key"="-809";"subkey"="-162";"value"="-9313"};
+{"key"="-808";"subkey"="-162";"value"="-9302"};
+{"key"="-807";"subkey"="-162";"value"="-9290"};
+{"key"="-806";"subkey"="-162";"value"="-9279"};
+{"key"="-805";"subkey"="-161";"value"="-9267"};
+{"key"="-804";"subkey"="-161";"value"="-9256"};
+{"key"="-803";"subkey"="-161";"value"="-9244"};
+{"key"="-802";"subkey"="-161";"value"="-9233"};
+{"key"="-801";"subkey"="-161";"value"="-9221"};
+{"key"="-800";"subkey"="-160";"value"="-9210"};
+{"key"="-799";"subkey"="-160";"value"="-9198"};
+{"key"="-798";"subkey"="-160";"value"="-9187"};
+{"key"="-797";"subkey"="-160";"value"="-9175"};
+{"key"="-796";"subkey"="-160";"value"="-9164"};
+{"key"="-795";"subkey"="-159";"value"="-9152"};
+{"key"="-794";"subkey"="-159";"value"="-9141"};
+{"key"="-793";"subkey"="-159";"value"="-9129"};
+{"key"="-792";"subkey"="-159";"value"="-9118"};
+{"key"="-791";"subkey"="-159";"value"="-9106"};
+{"key"="-790";"subkey"="-158";"value"="-9095"};
+{"key"="-789";"subkey"="-158";"value"="-9083"};
+{"key"="-788";"subkey"="-158";"value"="-9072"};
+{"key"="-787";"subkey"="-158";"value"="-9060"};
+{"key"="-786";"subkey"="-158";"value"="-9049"};
+{"key"="-785";"subkey"="-157";"value"="-9037"};
+{"key"="-784";"subkey"="-157";"value"="-9026"};
+{"key"="-783";"subkey"="-157";"value"="-9014"};
+{"key"="-782";"subkey"="-157";"value"="-9003"};
+{"key"="-781";"subkey"="-157";"value"="-8991"};
+{"key"="-780";"subkey"="-156";"value"="-8980"};
+{"key"="-779";"subkey"="-156";"value"="-8968"};
+{"key"="-778";"subkey"="-156";"value"="-8957"};
+{"key"="-777";"subkey"="-156";"value"="-8945"};
+{"key"="-776";"subkey"="-156";"value"="-8934"};
+{"key"="-775";"subkey"="-155";"value"="-8922"};
+{"key"="-774";"subkey"="-155";"value"="-8911"};
+{"key"="-773";"subkey"="-155";"value"="-8899"};
+{"key"="-772";"subkey"="-155";"value"="-8887"};
+{"key"="-771";"subkey"="-155";"value"="-8876"};
+{"key"="-770";"subkey"="-154";"value"="-8864"};
+{"key"="-769";"subkey"="-154";"value"="-8853"};
+{"key"="-768";"subkey"="-154";"value"="-8841"};
+{"key"="-767";"subkey"="-154";"value"="-8830"};
+{"key"="-766";"subkey"="-154";"value"="-8818"};
+{"key"="-765";"subkey"="-153";"value"="-8807"};
+{"key"="-764";"subkey"="-153";"value"="-8795"};
+{"key"="-763";"subkey"="-153";"value"="-8784"};
+{"key"="-762";"subkey"="-153";"value"="-8772"};
+{"key"="-761";"subkey"="-153";"value"="-8761"};
+{"key"="-760";"subkey"="-152";"value"="-8749"};
+{"key"="-759";"subkey"="-152";"value"="-8738"};
+{"key"="-758";"subkey"="-152";"value"="-8726"};
+{"key"="-757";"subkey"="-152";"value"="-8715"};
+{"key"="-756";"subkey"="-152";"value"="-8703"};
+{"key"="-755";"subkey"="-151";"value"="-8692"};
+{"key"="-754";"subkey"="-151";"value"="-8680"};
+{"key"="-753";"subkey"="-151";"value"="-8669"};
+{"key"="-752";"subkey"="-151";"value"="-8657"};
+{"key"="-751";"subkey"="-151";"value"="-8646"};
+{"key"="-750";"subkey"="-150";"value"="-8634"};
+{"key"="-749";"subkey"="-150";"value"="-8623"};
+{"key"="-748";"subkey"="-150";"value"="-8611"};
+{"key"="-747";"subkey"="-150";"value"="-8600"};
+{"key"="-746";"subkey"="-150";"value"="-8588"};
+{"key"="-745";"subkey"="-149";"value"="-8577"};
+{"key"="-744";"subkey"="-149";"value"="-8565"};
+{"key"="-743";"subkey"="-149";"value"="-8554"};
+{"key"="-742";"subkey"="-149";"value"="-8542"};
+{"key"="-741";"subkey"="-149";"value"="-8531"};
+{"key"="-740";"subkey"="-148";"value"="-8519"};
+{"key"="-739";"subkey"="-148";"value"="-8508"};
+{"key"="-738";"subkey"="-148";"value"="-8496"};
+{"key"="-737";"subkey"="-148";"value"="-8485"};
+{"key"="-736";"subkey"="-148";"value"="-8473"};
+{"key"="-735";"subkey"="-147";"value"="-8462"};
+{"key"="-734";"subkey"="-147";"value"="-8450"};
+{"key"="-733";"subkey"="-147";"value"="-8438"};
+{"key"="-732";"subkey"="-147";"value"="-8427"};
+{"key"="-731";"subkey"="-147";"value"="-8415"};
+{"key"="-730";"subkey"="-146";"value"="-8404"};
+{"key"="-729";"subkey"="-146";"value"="-8392"};
+{"key"="-728";"subkey"="-146";"value"="-8381"};
+{"key"="-727";"subkey"="-146";"value"="-8369"};
+{"key"="-726";"subkey"="-146";"value"="-8358"};
+{"key"="-725";"subkey"="-145";"value"="-8346"};
+{"key"="-724";"subkey"="-145";"value"="-8335"};
+{"key"="-723";"subkey"="-145";"value"="-8323"};
+{"key"="-722";"subkey"="-145";"value"="-8312"};
+{"key"="-721";"subkey"="-145";"value"="-8300"};
+{"key"="-720";"subkey"="-144";"value"="-8289"};
+{"key"="-719";"subkey"="-144";"value"="-8277"};
+{"key"="-718";"subkey"="-144";"value"="-8266"};
+{"key"="-717";"subkey"="-144";"value"="-8254"};
+{"key"="-716";"subkey"="-144";"value"="-8243"};
+{"key"="-715";"subkey"="-143";"value"="-8231"};
+{"key"="-714";"subkey"="-143";"value"="-8220"};
+{"key"="-713";"subkey"="-143";"value"="-8208"};
+{"key"="-712";"subkey"="-143";"value"="-8197"};
+{"key"="-711";"subkey"="-143";"value"="-8185"};
+{"key"="-710";"subkey"="-142";"value"="-8174"};
+{"key"="-709";"subkey"="-142";"value"="-8162"};
+{"key"="-708";"subkey"="-142";"value"="-8151"};
+{"key"="-707";"subkey"="-142";"value"="-8139"};
+{"key"="-706";"subkey"="-142";"value"="-8128"};
+{"key"="-705";"subkey"="-141";"value"="-8116"};
+{"key"="-704";"subkey"="-141";"value"="-8105"};
+{"key"="-703";"subkey"="-141";"value"="-8093"};
+{"key"="-702";"subkey"="-141";"value"="-8082"};
+{"key"="-701";"subkey"="-141";"value"="-8070"};
+{"key"="-700";"subkey"="-140";"value"="-8059"};
+{"key"="-699";"subkey"="-140";"value"="-8047"};
+{"key"="-698";"subkey"="-140";"value"="-8036"};
+{"key"="-697";"subkey"="-140";"value"="-8024"};
+{"key"="-696";"subkey"="-140";"value"="-8012"};
+{"key"="-695";"subkey"="-139";"value"="-8001"};
+{"key"="-694";"subkey"="-139";"value"="-7989"};
+{"key"="-693";"subkey"="-139";"value"="-7978"};
+{"key"="-692";"subkey"="-139";"value"="-7966"};
+{"key"="-691";"subkey"="-139";"value"="-7955"};
+{"key"="-690";"subkey"="-138";"value"="-7943"};
+{"key"="-689";"subkey"="-138";"value"="-7932"};
+{"key"="-688";"subkey"="-138";"value"="-7920"};
+{"key"="-687";"subkey"="-138";"value"="-7909"};
+{"key"="-686";"subkey"="-138";"value"="-7897"};
+{"key"="-685";"subkey"="-137";"value"="-7886"};
+{"key"="-684";"subkey"="-137";"value"="-7874"};
+{"key"="-683";"subkey"="-137";"value"="-7863"};
+{"key"="-682";"subkey"="-137";"value"="-7851"};
+{"key"="-681";"subkey"="-137";"value"="-7840"};
+{"key"="-680";"subkey"="-136";"value"="-7828"};
+{"key"="-679";"subkey"="-136";"value"="-7817"};
+{"key"="-678";"subkey"="-136";"value"="-7805"};
+{"key"="-677";"subkey"="-136";"value"="-7794"};
+{"key"="-676";"subkey"="-136";"value"="-7782"};
+{"key"="-675";"subkey"="-135";"value"="-7771"};
+{"key"="-674";"subkey"="-135";"value"="-7759"};
+{"key"="-673";"subkey"="-135";"value"="-7748"};
+{"key"="-672";"subkey"="-135";"value"="-7736"};
+{"key"="-671";"subkey"="-135";"value"="-7725"};
+{"key"="-670";"subkey"="-134";"value"="-7713"};
+{"key"="-669";"subkey"="-134";"value"="-7702"};
+{"key"="-668";"subkey"="-134";"value"="-7690"};
+{"key"="-667";"subkey"="-134";"value"="-7679"};
+{"key"="-666";"subkey"="-134";"value"="-7667"};
+{"key"="-665";"subkey"="-133";"value"="-7656"};
+{"key"="-664";"subkey"="-133";"value"="-7644"};
+{"key"="-663";"subkey"="-133";"value"="-7633"};
+{"key"="-662";"subkey"="-133";"value"="-7621"};
+{"key"="-661";"subkey"="-133";"value"="-7610"};
+{"key"="-660";"subkey"="-132";"value"="-7598"};
+{"key"="-659";"subkey"="-132";"value"="-7587"};
+{"key"="-658";"subkey"="-132";"value"="-7575"};
+{"key"="-657";"subkey"="-132";"value"="-7563"};
+{"key"="-656";"subkey"="-132";"value"="-7552"};
+{"key"="-655";"subkey"="-131";"value"="-7540"};
+{"key"="-654";"subkey"="-131";"value"="-7529"};
+{"key"="-653";"subkey"="-131";"value"="-7517"};
+{"key"="-652";"subkey"="-131";"value"="-7506"};
+{"key"="-651";"subkey"="-131";"value"="-7494"};
+{"key"="-650";"subkey"="-130";"value"="-7483"};
+{"key"="-649";"subkey"="-130";"value"="-7471"};
+{"key"="-648";"subkey"="-130";"value"="-7460"};
+{"key"="-647";"subkey"="-130";"value"="-7448"};
+{"key"="-646";"subkey"="-130";"value"="-7437"};
+{"key"="-645";"subkey"="-129";"value"="-7425"};
+{"key"="-644";"subkey"="-129";"value"="-7414"};
+{"key"="-643";"subkey"="-129";"value"="-7402"};
+{"key"="-642";"subkey"="-129";"value"="-7391"};
+{"key"="-641";"subkey"="-129";"value"="-7379"};
+{"key"="-640";"subkey"="-128";"value"="-7368"};
+{"key"="-639";"subkey"="-128";"value"="-7356"};
+{"key"="-638";"subkey"="-128";"value"="-7345"};
+{"key"="-637";"subkey"="-128";"value"="-7333"};
+{"key"="-636";"subkey"="-128";"value"="-7322"};
+{"key"="-635";"subkey"="-127";"value"="-7310"};
+{"key"="-634";"subkey"="-127";"value"="-7299"};
+{"key"="-633";"subkey"="-127";"value"="-7287"};
+{"key"="-632";"subkey"="-127";"value"="-7276"};
+{"key"="-631";"subkey"="-127";"value"="-7264"};
+{"key"="-630";"subkey"="-126";"value"="-7253"};
+{"key"="-629";"subkey"="-126";"value"="-7241"};
+{"key"="-628";"subkey"="-126";"value"="-7230"};
+{"key"="-627";"subkey"="-126";"value"="-7218"};
+{"key"="-626";"subkey"="-126";"value"="-7207"};
+{"key"="-625";"subkey"="-125";"value"="-7195"};
+{"key"="-624";"subkey"="-125";"value"="-7184"};
+{"key"="-623";"subkey"="-125";"value"="-7172"};
+{"key"="-622";"subkey"="-125";"value"="-7161"};
+{"key"="-621";"subkey"="-125";"value"="-7149"};
+{"key"="-620";"subkey"="-124";"value"="-7138"};
+{"key"="-619";"subkey"="-124";"value"="-7126"};
+{"key"="-618";"subkey"="-124";"value"="-7114"};
+{"key"="-617";"subkey"="-124";"value"="-7103"};
+{"key"="-616";"subkey"="-124";"value"="-7091"};
+{"key"="-615";"subkey"="-123";"value"="-7080"};
+{"key"="-614";"subkey"="-123";"value"="-7068"};
+{"key"="-613";"subkey"="-123";"value"="-7057"};
+{"key"="-612";"subkey"="-123";"value"="-7045"};
+{"key"="-611";"subkey"="-123";"value"="-7034"};
+{"key"="-610";"subkey"="-122";"value"="-7022"};
+{"key"="-609";"subkey"="-122";"value"="-7011"};
+{"key"="-608";"subkey"="-122";"value"="-6999"};
+{"key"="-607";"subkey"="-122";"value"="-6988"};
+{"key"="-606";"subkey"="-122";"value"="-6976"};
+{"key"="-605";"subkey"="-121";"value"="-6965"};
+{"key"="-604";"subkey"="-121";"value"="-6953"};
+{"key"="-603";"subkey"="-121";"value"="-6942"};
+{"key"="-602";"subkey"="-121";"value"="-6930"};
+{"key"="-601";"subkey"="-121";"value"="-6919"};
+{"key"="-600";"subkey"="-120";"value"="-6907"};
+{"key"="-599";"subkey"="-120";"value"="-6896"};
+{"key"="-598";"subkey"="-120";"value"="-6884"};
+{"key"="-597";"subkey"="-120";"value"="-6873"};
+{"key"="-596";"subkey"="-120";"value"="-6861"};
+{"key"="-595";"subkey"="-119";"value"="-6850"};
+{"key"="-594";"subkey"="-119";"value"="-6838"};
+{"key"="-593";"subkey"="-119";"value"="-6827"};
+{"key"="-592";"subkey"="-119";"value"="-6815"};
+{"key"="-591";"subkey"="-119";"value"="-6804"};
+{"key"="-590";"subkey"="-118";"value"="-6792"};
+{"key"="-589";"subkey"="-118";"value"="-6781"};
+{"key"="-588";"subkey"="-118";"value"="-6769"};
+{"key"="-587";"subkey"="-118";"value"="-6758"};
+{"key"="-586";"subkey"="-118";"value"="-6746"};
+{"key"="-585";"subkey"="-117";"value"="-6735"};
+{"key"="-584";"subkey"="-117";"value"="-6723"};
+{"key"="-583";"subkey"="-117";"value"="-6712"};
+{"key"="-582";"subkey"="-117";"value"="-6700"};
+{"key"="-581";"subkey"="-117";"value"="-6689"};
+{"key"="-580";"subkey"="-116";"value"="-6677"};
+{"key"="-579";"subkey"="-116";"value"="-6665"};
+{"key"="-578";"subkey"="-116";"value"="-6654"};
+{"key"="-577";"subkey"="-116";"value"="-6642"};
+{"key"="-576";"subkey"="-116";"value"="-6631"};
+{"key"="-575";"subkey"="-115";"value"="-6619"};
+{"key"="-574";"subkey"="-115";"value"="-6608"};
+{"key"="-573";"subkey"="-115";"value"="-6596"};
+{"key"="-572";"subkey"="-115";"value"="-6585"};
+{"key"="-571";"subkey"="-115";"value"="-6573"};
+{"key"="-570";"subkey"="-114";"value"="-6562"};
+{"key"="-569";"subkey"="-114";"value"="-6550"};
+{"key"="-568";"subkey"="-114";"value"="-6539"};
+{"key"="-567";"subkey"="-114";"value"="-6527"};
+{"key"="-566";"subkey"="-114";"value"="-6516"};
+{"key"="-565";"subkey"="-113";"value"="-6504"};
+{"key"="-564";"subkey"="-113";"value"="-6493"};
+{"key"="-563";"subkey"="-113";"value"="-6481"};
+{"key"="-562";"subkey"="-113";"value"="-6470"};
+{"key"="-561";"subkey"="-113";"value"="-6458"};
+{"key"="-560";"subkey"="-112";"value"="-6447"};
+{"key"="-559";"subkey"="-112";"value"="-6435"};
+{"key"="-558";"subkey"="-112";"value"="-6424"};
+{"key"="-557";"subkey"="-112";"value"="-6412"};
+{"key"="-556";"subkey"="-112";"value"="-6401"};
+{"key"="-555";"subkey"="-111";"value"="-6389"};
+{"key"="-554";"subkey"="-111";"value"="-6378"};
+{"key"="-553";"subkey"="-111";"value"="-6366"};
+{"key"="-552";"subkey"="-111";"value"="-6355"};
+{"key"="-551";"subkey"="-111";"value"="-6343"};
+{"key"="-550";"subkey"="-110";"value"="-6332"};
+{"key"="-549";"subkey"="-110";"value"="-6320"};
+{"key"="-548";"subkey"="-110";"value"="-6309"};
+{"key"="-547";"subkey"="-110";"value"="-6297"};
+{"key"="-546";"subkey"="-110";"value"="-6286"};
+{"key"="-545";"subkey"="-109";"value"="-6274"};
+{"key"="-544";"subkey"="-109";"value"="-6263"};
+{"key"="-543";"subkey"="-109";"value"="-6251"};
+{"key"="-542";"subkey"="-109";"value"="-6240"};
+{"key"="-541";"subkey"="-109";"value"="-6228"};
+{"key"="-540";"subkey"="-108";"value"="-6216"};
+{"key"="-539";"subkey"="-108";"value"="-6205"};
+{"key"="-538";"subkey"="-108";"value"="-6193"};
+{"key"="-537";"subkey"="-108";"value"="-6182"};
+{"key"="-536";"subkey"="-108";"value"="-6170"};
+{"key"="-535";"subkey"="-107";"value"="-6159"};
+{"key"="-534";"subkey"="-107";"value"="-6147"};
+{"key"="-533";"subkey"="-107";"value"="-6136"};
+{"key"="-532";"subkey"="-107";"value"="-6124"};
+{"key"="-531";"subkey"="-107";"value"="-6113"};
+{"key"="-530";"subkey"="-106";"value"="-6101"};
+{"key"="-529";"subkey"="-106";"value"="-6090"};
+{"key"="-528";"subkey"="-106";"value"="-6078"};
+{"key"="-527";"subkey"="-106";"value"="-6067"};
+{"key"="-526";"subkey"="-106";"value"="-6055"};
+{"key"="-525";"subkey"="-105";"value"="-6044"};
+{"key"="-524";"subkey"="-105";"value"="-6032"};
+{"key"="-523";"subkey"="-105";"value"="-6021"};
+{"key"="-522";"subkey"="-105";"value"="-6009"};
+{"key"="-521";"subkey"="-105";"value"="-5998"};
+{"key"="-520";"subkey"="-104";"value"="-5986"};
+{"key"="-519";"subkey"="-104";"value"="-5975"};
+{"key"="-518";"subkey"="-104";"value"="-5963"};
+{"key"="-517";"subkey"="-104";"value"="-5952"};
+{"key"="-516";"subkey"="-104";"value"="-5940"};
+{"key"="-515";"subkey"="-103";"value"="-5929"};
+{"key"="-514";"subkey"="-103";"value"="-5917"};
+{"key"="-513";"subkey"="-103";"value"="-5906"};
+{"key"="-512";"subkey"="-103";"value"="-5894"};
+{"key"="-511";"subkey"="-103";"value"="-5883"};
+{"key"="-510";"subkey"="-102";"value"="-5871"};
+{"key"="-509";"subkey"="-102";"value"="-5860"};
+{"key"="-508";"subkey"="-102";"value"="-5848"};
+{"key"="-507";"subkey"="-102";"value"="-5837"};
+{"key"="-506";"subkey"="-102";"value"="-5825"};
+{"key"="-505";"subkey"="-101";"value"="-5814"};
+{"key"="-504";"subkey"="-101";"value"="-5802"};
+{"key"="-503";"subkey"="-101";"value"="-5791"};
+{"key"="-502";"subkey"="-101";"value"="-5779"};
+{"key"="-501";"subkey"="-101";"value"="-5767"};
+{"key"="-500";"subkey"="-100";"value"="-5756"};
+{"key"="-499";"subkey"="-100";"value"="-5744"};
+{"key"="-498";"subkey"="-100";"value"="-5733"};
+{"key"="-497";"subkey"="-100";"value"="-5721"};
+{"key"="-496";"subkey"="-100";"value"="-5710"};
+{"key"="-495";"subkey"="-99";"value"="-5698"};
+{"key"="-494";"subkey"="-99";"value"="-5687"};
+{"key"="-493";"subkey"="-99";"value"="-5675"};
+{"key"="-492";"subkey"="-99";"value"="-5664"};
+{"key"="-491";"subkey"="-99";"value"="-5652"};
+{"key"="-490";"subkey"="-98";"value"="-5641"};
+{"key"="-489";"subkey"="-98";"value"="-5629"};
+{"key"="-488";"subkey"="-98";"value"="-5618"};
+{"key"="-487";"subkey"="-98";"value"="-5606"};
+{"key"="-486";"subkey"="-98";"value"="-5595"};
+{"key"="-485";"subkey"="-97";"value"="-5583"};
+{"key"="-484";"subkey"="-97";"value"="-5572"};
+{"key"="-483";"subkey"="-97";"value"="-5560"};
+{"key"="-482";"subkey"="-97";"value"="-5549"};
+{"key"="-481";"subkey"="-97";"value"="-5537"};
+{"key"="-480";"subkey"="-96";"value"="-5526"};
+{"key"="-479";"subkey"="-96";"value"="-5514"};
+{"key"="-478";"subkey"="-96";"value"="-5503"};
+{"key"="-477";"subkey"="-96";"value"="-5491"};
+{"key"="-476";"subkey"="-96";"value"="-5480"};
+{"key"="-475";"subkey"="-95";"value"="-5468"};
+{"key"="-474";"subkey"="-95";"value"="-5457"};
+{"key"="-473";"subkey"="-95";"value"="-5445"};
+{"key"="-472";"subkey"="-95";"value"="-5434"};
+{"key"="-471";"subkey"="-95";"value"="-5422"};
+{"key"="-470";"subkey"="-94";"value"="-5411"};
+{"key"="-469";"subkey"="-94";"value"="-5399"};
+{"key"="-468";"subkey"="-94";"value"="-5388"};
+{"key"="-467";"subkey"="-94";"value"="-5376"};
+{"key"="-466";"subkey"="-94";"value"="-5365"};
+{"key"="-465";"subkey"="-93";"value"="-5353"};
+{"key"="-464";"subkey"="-93";"value"="-5341"};
+{"key"="-463";"subkey"="-93";"value"="-5330"};
+{"key"="-462";"subkey"="-93";"value"="-5318"};
+{"key"="-461";"subkey"="-93";"value"="-5307"};
+{"key"="-460";"subkey"="-92";"value"="-5295"};
+{"key"="-459";"subkey"="-92";"value"="-5284"};
+{"key"="-458";"subkey"="-92";"value"="-5272"};
+{"key"="-457";"subkey"="-92";"value"="-5261"};
+{"key"="-456";"subkey"="-92";"value"="-5249"};
+{"key"="-455";"subkey"="-91";"value"="-5238"};
+{"key"="-454";"subkey"="-91";"value"="-5226"};
+{"key"="-453";"subkey"="-91";"value"="-5215"};
+{"key"="-452";"subkey"="-91";"value"="-5203"};
+{"key"="-451";"subkey"="-91";"value"="-5192"};
+{"key"="-450";"subkey"="-90";"value"="-5180"};
+{"key"="-449";"subkey"="-90";"value"="-5169"};
+{"key"="-448";"subkey"="-90";"value"="-5157"};
+{"key"="-447";"subkey"="-90";"value"="-5146"};
+{"key"="-446";"subkey"="-90";"value"="-5134"};
+{"key"="-445";"subkey"="-89";"value"="-5123"};
+{"key"="-444";"subkey"="-89";"value"="-5111"};
+{"key"="-443";"subkey"="-89";"value"="-5100"};
+{"key"="-442";"subkey"="-89";"value"="-5088"};
+{"key"="-441";"subkey"="-89";"value"="-5077"};
+{"key"="-440";"subkey"="-88";"value"="-5065"};
+{"key"="-439";"subkey"="-88";"value"="-5054"};
+{"key"="-438";"subkey"="-88";"value"="-5042"};
+{"key"="-437";"subkey"="-88";"value"="-5031"};
+{"key"="-436";"subkey"="-88";"value"="-5019"};
+{"key"="-435";"subkey"="-87";"value"="-5008"};
+{"key"="-434";"subkey"="-87";"value"="-4996"};
+{"key"="-433";"subkey"="-87";"value"="-4985"};
+{"key"="-432";"subkey"="-87";"value"="-4973"};
+{"key"="-431";"subkey"="-87";"value"="-4962"};
+{"key"="-430";"subkey"="-86";"value"="-4950"};
+{"key"="-429";"subkey"="-86";"value"="-4939"};
+{"key"="-428";"subkey"="-86";"value"="-4927"};
+{"key"="-427";"subkey"="-86";"value"="-4916"};
+{"key"="-426";"subkey"="-86";"value"="-4904"};
+{"key"="-425";"subkey"="-85";"value"="-4892"};
+{"key"="-424";"subkey"="-85";"value"="-4881"};
+{"key"="-423";"subkey"="-85";"value"="-4869"};
+{"key"="-422";"subkey"="-85";"value"="-4858"};
+{"key"="-421";"subkey"="-85";"value"="-4846"};
+{"key"="-420";"subkey"="-84";"value"="-4835"};
+{"key"="-419";"subkey"="-84";"value"="-4823"};
+{"key"="-418";"subkey"="-84";"value"="-4812"};
+{"key"="-417";"subkey"="-84";"value"="-4800"};
+{"key"="-416";"subkey"="-84";"value"="-4789"};
+{"key"="-415";"subkey"="-83";"value"="-4777"};
+{"key"="-414";"subkey"="-83";"value"="-4766"};
+{"key"="-413";"subkey"="-83";"value"="-4754"};
+{"key"="-412";"subkey"="-83";"value"="-4743"};
+{"key"="-411";"subkey"="-83";"value"="-4731"};
+{"key"="-410";"subkey"="-82";"value"="-4720"};
+{"key"="-409";"subkey"="-82";"value"="-4708"};
+{"key"="-408";"subkey"="-82";"value"="-4697"};
+{"key"="-407";"subkey"="-82";"value"="-4685"};
+{"key"="-406";"subkey"="-82";"value"="-4674"};
+{"key"="-405";"subkey"="-81";"value"="-4662"};
+{"key"="-404";"subkey"="-81";"value"="-4651"};
+{"key"="-403";"subkey"="-81";"value"="-4639"};
+{"key"="-402";"subkey"="-81";"value"="-4628"};
+{"key"="-401";"subkey"="-81";"value"="-4616"};
+{"key"="-400";"subkey"="-80";"value"="-4605"};
+{"key"="-399";"subkey"="-80";"value"="-4593"};
+{"key"="-398";"subkey"="-80";"value"="-4582"};
+{"key"="-397";"subkey"="-80";"value"="-4570"};
+{"key"="-396";"subkey"="-80";"value"="-4559"};
+{"key"="-395";"subkey"="-79";"value"="-4547"};
+{"key"="-394";"subkey"="-79";"value"="-4536"};
+{"key"="-393";"subkey"="-79";"value"="-4524"};
+{"key"="-392";"subkey"="-79";"value"="-4513"};
+{"key"="-391";"subkey"="-79";"value"="-4501"};
+{"key"="-390";"subkey"="-78";"value"="-4490"};
+{"key"="-389";"subkey"="-78";"value"="-4478"};
+{"key"="-388";"subkey"="-78";"value"="-4467"};
+{"key"="-387";"subkey"="-78";"value"="-4455"};
+{"key"="-386";"subkey"="-78";"value"="-4443"};
+{"key"="-385";"subkey"="-77";"value"="-4432"};
+{"key"="-384";"subkey"="-77";"value"="-4420"};
+{"key"="-383";"subkey"="-77";"value"="-4409"};
+{"key"="-382";"subkey"="-77";"value"="-4397"};
+{"key"="-381";"subkey"="-77";"value"="-4386"};
+{"key"="-380";"subkey"="-76";"value"="-4374"};
+{"key"="-379";"subkey"="-76";"value"="-4363"};
+{"key"="-378";"subkey"="-76";"value"="-4351"};
+{"key"="-377";"subkey"="-76";"value"="-4340"};
+{"key"="-376";"subkey"="-76";"value"="-4328"};
+{"key"="-375";"subkey"="-75";"value"="-4317"};
+{"key"="-374";"subkey"="-75";"value"="-4305"};
+{"key"="-373";"subkey"="-75";"value"="-4294"};
+{"key"="-372";"subkey"="-75";"value"="-4282"};
+{"key"="-371";"subkey"="-75";"value"="-4271"};
+{"key"="-370";"subkey"="-74";"value"="-4259"};
+{"key"="-369";"subkey"="-74";"value"="-4248"};
+{"key"="-368";"subkey"="-74";"value"="-4236"};
+{"key"="-367";"subkey"="-74";"value"="-4225"};
+{"key"="-366";"subkey"="-74";"value"="-4213"};
+{"key"="-365";"subkey"="-73";"value"="-4202"};
+{"key"="-364";"subkey"="-73";"value"="-4190"};
+{"key"="-363";"subkey"="-73";"value"="-4179"};
+{"key"="-362";"subkey"="-73";"value"="-4167"};
+{"key"="-361";"subkey"="-73";"value"="-4156"};
+{"key"="-360";"subkey"="-72";"value"="-4144"};
+{"key"="-359";"subkey"="-72";"value"="-4133"};
+{"key"="-358";"subkey"="-72";"value"="-4121"};
+{"key"="-357";"subkey"="-72";"value"="-4110"};
+{"key"="-356";"subkey"="-72";"value"="-4098"};
+{"key"="-355";"subkey"="-71";"value"="-4087"};
+{"key"="-354";"subkey"="-71";"value"="-4075"};
+{"key"="-353";"subkey"="-71";"value"="-4064"};
+{"key"="-352";"subkey"="-71";"value"="-4052"};
+{"key"="-351";"subkey"="-71";"value"="-4041"};
+{"key"="-350";"subkey"="-70";"value"="-4029"};
+{"key"="-349";"subkey"="-70";"value"="-4018"};
+{"key"="-348";"subkey"="-70";"value"="-4006"};
+{"key"="-347";"subkey"="-70";"value"="-3994"};
+{"key"="-346";"subkey"="-70";"value"="-3983"};
+{"key"="-345";"subkey"="-69";"value"="-3971"};
+{"key"="-344";"subkey"="-69";"value"="-3960"};
+{"key"="-343";"subkey"="-69";"value"="-3948"};
+{"key"="-342";"subkey"="-69";"value"="-3937"};
+{"key"="-341";"subkey"="-69";"value"="-3925"};
+{"key"="-340";"subkey"="-68";"value"="-3914"};
+{"key"="-339";"subkey"="-68";"value"="-3902"};
+{"key"="-338";"subkey"="-68";"value"="-3891"};
+{"key"="-337";"subkey"="-68";"value"="-3879"};
+{"key"="-336";"subkey"="-68";"value"="-3868"};
+{"key"="-335";"subkey"="-67";"value"="-3856"};
+{"key"="-334";"subkey"="-67";"value"="-3845"};
+{"key"="-333";"subkey"="-67";"value"="-3833"};
+{"key"="-332";"subkey"="-67";"value"="-3822"};
+{"key"="-331";"subkey"="-67";"value"="-3810"};
+{"key"="-330";"subkey"="-66";"value"="-3799"};
+{"key"="-329";"subkey"="-66";"value"="-3787"};
+{"key"="-328";"subkey"="-66";"value"="-3776"};
+{"key"="-327";"subkey"="-66";"value"="-3764"};
+{"key"="-326";"subkey"="-66";"value"="-3753"};
+{"key"="-325";"subkey"="-65";"value"="-3741"};
+{"key"="-324";"subkey"="-65";"value"="-3730"};
+{"key"="-323";"subkey"="-65";"value"="-3718"};
+{"key"="-322";"subkey"="-65";"value"="-3707"};
+{"key"="-321";"subkey"="-65";"value"="-3695"};
+{"key"="-320";"subkey"="-64";"value"="-3684"};
+{"key"="-319";"subkey"="-64";"value"="-3672"};
+{"key"="-318";"subkey"="-64";"value"="-3661"};
+{"key"="-317";"subkey"="-64";"value"="-3649"};
+{"key"="-316";"subkey"="-64";"value"="-3638"};
+{"key"="-315";"subkey"="-63";"value"="-3626"};
+{"key"="-314";"subkey"="-63";"value"="-3615"};
+{"key"="-313";"subkey"="-63";"value"="-3603"};
+{"key"="-312";"subkey"="-63";"value"="-3592"};
+{"key"="-311";"subkey"="-63";"value"="-3580"};
+{"key"="-310";"subkey"="-62";"value"="-3569"};
+{"key"="-309";"subkey"="-62";"value"="-3557"};
+{"key"="-308";"subkey"="-62";"value"="-3545"};
+{"key"="-307";"subkey"="-62";"value"="-3534"};
+{"key"="-306";"subkey"="-62";"value"="-3522"};
+{"key"="-305";"subkey"="-61";"value"="-3511"};
+{"key"="-304";"subkey"="-61";"value"="-3499"};
+{"key"="-303";"subkey"="-61";"value"="-3488"};
+{"key"="-302";"subkey"="-61";"value"="-3476"};
+{"key"="-301";"subkey"="-61";"value"="-3465"};
+{"key"="-300";"subkey"="-60";"value"="-3453"};
+{"key"="-299";"subkey"="-60";"value"="-3442"};
+{"key"="-298";"subkey"="-60";"value"="-3430"};
+{"key"="-297";"subkey"="-60";"value"="-3419"};
+{"key"="-296";"subkey"="-60";"value"="-3407"};
+{"key"="-295";"subkey"="-59";"value"="-3396"};
+{"key"="-294";"subkey"="-59";"value"="-3384"};
+{"key"="-293";"subkey"="-59";"value"="-3373"};
+{"key"="-292";"subkey"="-59";"value"="-3361"};
+{"key"="-291";"subkey"="-59";"value"="-3350"};
+{"key"="-290";"subkey"="-58";"value"="-3338"};
+{"key"="-289";"subkey"="-58";"value"="-3327"};
+{"key"="-288";"subkey"="-58";"value"="-3315"};
+{"key"="-287";"subkey"="-58";"value"="-3304"};
+{"key"="-286";"subkey"="-58";"value"="-3292"};
+{"key"="-285";"subkey"="-57";"value"="-3281"};
+{"key"="-284";"subkey"="-57";"value"="-3269"};
+{"key"="-283";"subkey"="-57";"value"="-3258"};
+{"key"="-282";"subkey"="-57";"value"="-3246"};
+{"key"="-281";"subkey"="-57";"value"="-3235"};
+{"key"="-280";"subkey"="-56";"value"="-3223"};
+{"key"="-279";"subkey"="-56";"value"="-3212"};
+{"key"="-278";"subkey"="-56";"value"="-3200"};
+{"key"="-277";"subkey"="-56";"value"="-3189"};
+{"key"="-276";"subkey"="-56";"value"="-3177"};
+{"key"="-275";"subkey"="-55";"value"="-3166"};
+{"key"="-274";"subkey"="-55";"value"="-3154"};
+{"key"="-273";"subkey"="-55";"value"="-3143"};
+{"key"="-272";"subkey"="-55";"value"="-3131"};
+{"key"="-271";"subkey"="-55";"value"="-3120"};
+{"key"="-270";"subkey"="-54";"value"="-3108"};
+{"key"="-269";"subkey"="-54";"value"="-3096"};
+{"key"="-268";"subkey"="-54";"value"="-3085"};
+{"key"="-267";"subkey"="-54";"value"="-3073"};
+{"key"="-266";"subkey"="-54";"value"="-3062"};
+{"key"="-265";"subkey"="-53";"value"="-3050"};
+{"key"="-264";"subkey"="-53";"value"="-3039"};
+{"key"="-263";"subkey"="-53";"value"="-3027"};
+{"key"="-262";"subkey"="-53";"value"="-3016"};
+{"key"="-261";"subkey"="-53";"value"="-3004"};
+{"key"="-260";"subkey"="-52";"value"="-2993"};
+{"key"="-259";"subkey"="-52";"value"="-2981"};
+{"key"="-258";"subkey"="-52";"value"="-2970"};
+{"key"="-257";"subkey"="-52";"value"="-2958"};
+{"key"="-256";"subkey"="-52";"value"="-2947"};
+{"key"="-255";"subkey"="-51";"value"="-2935"};
+{"key"="-254";"subkey"="-51";"value"="-2924"};
+{"key"="-253";"subkey"="-51";"value"="-2912"};
+{"key"="-252";"subkey"="-51";"value"="-2901"};
+{"key"="-251";"subkey"="-51";"value"="-2889"};
+{"key"="-250";"subkey"="-50";"value"="-2878"};
+{"key"="-249";"subkey"="-50";"value"="-2866"};
+{"key"="-248";"subkey"="-50";"value"="-2855"};
+{"key"="-247";"subkey"="-50";"value"="-2843"};
+{"key"="-246";"subkey"="-50";"value"="-2832"};
+{"key"="-245";"subkey"="-49";"value"="-2820"};
+{"key"="-244";"subkey"="-49";"value"="-2809"};
+{"key"="-243";"subkey"="-49";"value"="-2797"};
+{"key"="-242";"subkey"="-49";"value"="-2786"};
+{"key"="-241";"subkey"="-49";"value"="-2774"};
+{"key"="-240";"subkey"="-48";"value"="-2763"};
+{"key"="-239";"subkey"="-48";"value"="-2751"};
+{"key"="-238";"subkey"="-48";"value"="-2740"};
+{"key"="-237";"subkey"="-48";"value"="-2728"};
+{"key"="-236";"subkey"="-48";"value"="-2717"};
+{"key"="-235";"subkey"="-47";"value"="-2705"};
+{"key"="-234";"subkey"="-47";"value"="-2694"};
+{"key"="-233";"subkey"="-47";"value"="-2682"};
+{"key"="-232";"subkey"="-47";"value"="-2670"};
+{"key"="-231";"subkey"="-47";"value"="-2659"};
+{"key"="-230";"subkey"="-46";"value"="-2647"};
+{"key"="-229";"subkey"="-46";"value"="-2636"};
+{"key"="-228";"subkey"="-46";"value"="-2624"};
+{"key"="-227";"subkey"="-46";"value"="-2613"};
+{"key"="-226";"subkey"="-46";"value"="-2601"};
+{"key"="-225";"subkey"="-45";"value"="-2590"};
+{"key"="-224";"subkey"="-45";"value"="-2578"};
+{"key"="-223";"subkey"="-45";"value"="-2567"};
+{"key"="-222";"subkey"="-45";"value"="-2555"};
+{"key"="-221";"subkey"="-45";"value"="-2544"};
+{"key"="-220";"subkey"="-44";"value"="-2532"};
+{"key"="-219";"subkey"="-44";"value"="-2521"};
+{"key"="-218";"subkey"="-44";"value"="-2509"};
+{"key"="-217";"subkey"="-44";"value"="-2498"};
+{"key"="-216";"subkey"="-44";"value"="-2486"};
+{"key"="-215";"subkey"="-43";"value"="-2475"};
+{"key"="-214";"subkey"="-43";"value"="-2463"};
+{"key"="-213";"subkey"="-43";"value"="-2452"};
+{"key"="-212";"subkey"="-43";"value"="-2440"};
+{"key"="-211";"subkey"="-43";"value"="-2429"};
+{"key"="-210";"subkey"="-42";"value"="-2417"};
+{"key"="-209";"subkey"="-42";"value"="-2406"};
+{"key"="-208";"subkey"="-42";"value"="-2394"};
+{"key"="-207";"subkey"="-42";"value"="-2383"};
+{"key"="-206";"subkey"="-42";"value"="-2371"};
+{"key"="-205";"subkey"="-41";"value"="-2360"};
+{"key"="-204";"subkey"="-41";"value"="-2348"};
+{"key"="-203";"subkey"="-41";"value"="-2337"};
+{"key"="-202";"subkey"="-41";"value"="-2325"};
+{"key"="-201";"subkey"="-41";"value"="-2314"};
+{"key"="-200";"subkey"="-40";"value"="-2302"};
+{"key"="-199";"subkey"="-40";"value"="-2291"};
+{"key"="-198";"subkey"="-40";"value"="-2279"};
+{"key"="-197";"subkey"="-40";"value"="-2268"};
+{"key"="-196";"subkey"="-40";"value"="-2256"};
+{"key"="-195";"subkey"="-39";"value"="-2245"};
+{"key"="-194";"subkey"="-39";"value"="-2233"};
+{"key"="-193";"subkey"="-39";"value"="-2221"};
+{"key"="-192";"subkey"="-39";"value"="-2210"};
+{"key"="-191";"subkey"="-39";"value"="-2198"};
+{"key"="-190";"subkey"="-38";"value"="-2187"};
+{"key"="-189";"subkey"="-38";"value"="-2175"};
+{"key"="-188";"subkey"="-38";"value"="-2164"};
+{"key"="-187";"subkey"="-38";"value"="-2152"};
+{"key"="-186";"subkey"="-38";"value"="-2141"};
+{"key"="-185";"subkey"="-37";"value"="-2129"};
+{"key"="-184";"subkey"="-37";"value"="-2118"};
+{"key"="-183";"subkey"="-37";"value"="-2106"};
+{"key"="-182";"subkey"="-37";"value"="-2095"};
+{"key"="-181";"subkey"="-37";"value"="-2083"};
+{"key"="-180";"subkey"="-36";"value"="-2072"};
+{"key"="-179";"subkey"="-36";"value"="-2060"};
+{"key"="-178";"subkey"="-36";"value"="-2049"};
+{"key"="-177";"subkey"="-36";"value"="-2037"};
+{"key"="-176";"subkey"="-36";"value"="-2026"};
+{"key"="-175";"subkey"="-35";"value"="-2014"};
+{"key"="-174";"subkey"="-35";"value"="-2003"};
+{"key"="-173";"subkey"="-35";"value"="-1991"};
+{"key"="-172";"subkey"="-35";"value"="-1980"};
+{"key"="-171";"subkey"="-35";"value"="-1968"};
+{"key"="-170";"subkey"="-34";"value"="-1957"};
+{"key"="-169";"subkey"="-34";"value"="-1945"};
+{"key"="-168";"subkey"="-34";"value"="-1934"};
+{"key"="-167";"subkey"="-34";"value"="-1922"};
+{"key"="-166";"subkey"="-34";"value"="-1911"};
+{"key"="-165";"subkey"="-33";"value"="-1899"};
+{"key"="-164";"subkey"="-33";"value"="-1888"};
+{"key"="-163";"subkey"="-33";"value"="-1876"};
+{"key"="-162";"subkey"="-33";"value"="-1865"};
+{"key"="-161";"subkey"="-33";"value"="-1853"};
+{"key"="-160";"subkey"="-32";"value"="-1842"};
+{"key"="-159";"subkey"="-32";"value"="-1830"};
+{"key"="-158";"subkey"="-32";"value"="-1819"};
+{"key"="-157";"subkey"="-32";"value"="-1807"};
+{"key"="-156";"subkey"="-32";"value"="-1796"};
+{"key"="-155";"subkey"="-31";"value"="-1784"};
+{"key"="-154";"subkey"="-31";"value"="-1772"};
+{"key"="-153";"subkey"="-31";"value"="-1761"};
+{"key"="-152";"subkey"="-31";"value"="-1749"};
+{"key"="-151";"subkey"="-31";"value"="-1738"};
+{"key"="-150";"subkey"="-30";"value"="-1726"};
+{"key"="-149";"subkey"="-30";"value"="-1715"};
+{"key"="-148";"subkey"="-30";"value"="-1703"};
+{"key"="-147";"subkey"="-30";"value"="-1692"};
+{"key"="-146";"subkey"="-30";"value"="-1680"};
+{"key"="-145";"subkey"="-29";"value"="-1669"};
+{"key"="-144";"subkey"="-29";"value"="-1657"};
+{"key"="-143";"subkey"="-29";"value"="-1646"};
+{"key"="-142";"subkey"="-29";"value"="-1634"};
+{"key"="-141";"subkey"="-29";"value"="-1623"};
+{"key"="-140";"subkey"="-28";"value"="-1611"};
+{"key"="-139";"subkey"="-28";"value"="-1600"};
+{"key"="-138";"subkey"="-28";"value"="-1588"};
+{"key"="-137";"subkey"="-28";"value"="-1577"};
+{"key"="-136";"subkey"="-28";"value"="-1565"};
+{"key"="-135";"subkey"="-27";"value"="-1554"};
+{"key"="-134";"subkey"="-27";"value"="-1542"};
+{"key"="-133";"subkey"="-27";"value"="-1531"};
+{"key"="-132";"subkey"="-27";"value"="-1519"};
+{"key"="-131";"subkey"="-27";"value"="-1508"};
+{"key"="-130";"subkey"="-26";"value"="-1496"};
+{"key"="-129";"subkey"="-26";"value"="-1485"};
+{"key"="-128";"subkey"="-26";"value"="-1473"};
+{"key"="-127";"subkey"="-26";"value"="-1462"};
+{"key"="-126";"subkey"="-26";"value"="-1450"};
+{"key"="-125";"subkey"="-25";"value"="-1439"};
+{"key"="-124";"subkey"="-25";"value"="-1427"};
+{"key"="-123";"subkey"="-25";"value"="-1416"};
+{"key"="-122";"subkey"="-25";"value"="-1404"};
+{"key"="-121";"subkey"="-25";"value"="-1393"};
+{"key"="-120";"subkey"="-24";"value"="-1381"};
+{"key"="-119";"subkey"="-24";"value"="-1370"};
+{"key"="-118";"subkey"="-24";"value"="-1358"};
+{"key"="-117";"subkey"="-24";"value"="-1347"};
+{"key"="-116";"subkey"="-24";"value"="-1335"};
+{"key"="-115";"subkey"="-23";"value"="-1323"};
+{"key"="-114";"subkey"="-23";"value"="-1312"};
+{"key"="-113";"subkey"="-23";"value"="-1300"};
+{"key"="-112";"subkey"="-23";"value"="-1289"};
+{"key"="-111";"subkey"="-23";"value"="-1277"};
+{"key"="-110";"subkey"="-22";"value"="-1266"};
+{"key"="-109";"subkey"="-22";"value"="-1254"};
+{"key"="-108";"subkey"="-22";"value"="-1243"};
+{"key"="-107";"subkey"="-22";"value"="-1231"};
+{"key"="-106";"subkey"="-22";"value"="-1220"};
+{"key"="-105";"subkey"="-21";"value"="-1208"};
+{"key"="-104";"subkey"="-21";"value"="-1197"};
+{"key"="-103";"subkey"="-21";"value"="-1185"};
+{"key"="-102";"subkey"="-21";"value"="-1174"};
+{"key"="-101";"subkey"="-21";"value"="-1162"};
+{"key"="-100";"subkey"="-20";"value"="-1151"};
+{"key"="-99";"subkey"="-20";"value"="-1139"};
+{"key"="-98";"subkey"="-20";"value"="-1128"};
+{"key"="-97";"subkey"="-20";"value"="-1116"};
+{"key"="-96";"subkey"="-20";"value"="-1105"};
+{"key"="-95";"subkey"="-19";"value"="-1093"};
+{"key"="-94";"subkey"="-19";"value"="-1082"};
+{"key"="-93";"subkey"="-19";"value"="-1070"};
+{"key"="-92";"subkey"="-19";"value"="-1059"};
+{"key"="-91";"subkey"="-19";"value"="-1047"};
+{"key"="-90";"subkey"="-18";"value"="-1036"};
+{"key"="-89";"subkey"="-18";"value"="-1024"};
+{"key"="-88";"subkey"="-18";"value"="-1013"};
+{"key"="-87";"subkey"="-18";"value"="-1001"};
+{"key"="-86";"subkey"="-18";"value"="-990"};
+{"key"="-85";"subkey"="-17";"value"="-978"};
+{"key"="-84";"subkey"="-17";"value"="-967"};
+{"key"="-83";"subkey"="-17";"value"="-955"};
+{"key"="-82";"subkey"="-17";"value"="-944"};
+{"key"="-81";"subkey"="-17";"value"="-932"};
+{"key"="-80";"subkey"="-16";"value"="-921"};
+{"key"="-79";"subkey"="-16";"value"="-909"};
+{"key"="-78";"subkey"="-16";"value"="-898"};
+{"key"="-77";"subkey"="-16";"value"="-886"};
+{"key"="-76";"subkey"="-16";"value"="-874"};
+{"key"="-75";"subkey"="-15";"value"="-863"};
+{"key"="-74";"subkey"="-15";"value"="-851"};
+{"key"="-73";"subkey"="-15";"value"="-840"};
+{"key"="-72";"subkey"="-15";"value"="-828"};
+{"key"="-71";"subkey"="-15";"value"="-817"};
+{"key"="-70";"subkey"="-14";"value"="-805"};
+{"key"="-69";"subkey"="-14";"value"="-794"};
+{"key"="-68";"subkey"="-14";"value"="-782"};
+{"key"="-67";"subkey"="-14";"value"="-771"};
+{"key"="-66";"subkey"="-14";"value"="-759"};
+{"key"="-65";"subkey"="-13";"value"="-748"};
+{"key"="-64";"subkey"="-13";"value"="-736"};
+{"key"="-63";"subkey"="-13";"value"="-725"};
+{"key"="-62";"subkey"="-13";"value"="-713"};
+{"key"="-61";"subkey"="-13";"value"="-702"};
+{"key"="-60";"subkey"="-12";"value"="-690"};
+{"key"="-59";"subkey"="-12";"value"="-679"};
+{"key"="-58";"subkey"="-12";"value"="-667"};
+{"key"="-57";"subkey"="-12";"value"="-656"};
+{"key"="-56";"subkey"="-12";"value"="-644"};
+{"key"="-55";"subkey"="-11";"value"="-633"};
+{"key"="-54";"subkey"="-11";"value"="-621"};
+{"key"="-53";"subkey"="-11";"value"="-610"};
+{"key"="-52";"subkey"="-11";"value"="-598"};
+{"key"="-51";"subkey"="-11";"value"="-587"};
+{"key"="-50";"subkey"="-10";"value"="-575"};
+{"key"="-49";"subkey"="-10";"value"="-564"};
+{"key"="-48";"subkey"="-10";"value"="-552"};
+{"key"="-47";"subkey"="-10";"value"="-541"};
+{"key"="-46";"subkey"="-10";"value"="-529"};
+{"key"="-45";"subkey"="-9";"value"="-518"};
+{"key"="-44";"subkey"="-9";"value"="-506"};
+{"key"="-43";"subkey"="-9";"value"="-495"};
+{"key"="-42";"subkey"="-9";"value"="-483"};
+{"key"="-41";"subkey"="-9";"value"="-472"};
+{"key"="-40";"subkey"="-8";"value"="-460"};
+{"key"="-39";"subkey"="-8";"value"="-449"};
+{"key"="-38";"subkey"="-8";"value"="-437"};
+{"key"="-37";"subkey"="-8";"value"="-425"};
+{"key"="-36";"subkey"="-8";"value"="-414"};
+{"key"="-35";"subkey"="-7";"value"="-402"};
+{"key"="-34";"subkey"="-7";"value"="-391"};
+{"key"="-33";"subkey"="-7";"value"="-379"};
+{"key"="-32";"subkey"="-7";"value"="-368"};
+{"key"="-31";"subkey"="-7";"value"="-356"};
+{"key"="-30";"subkey"="-6";"value"="-345"};
+{"key"="-29";"subkey"="-6";"value"="-333"};
+{"key"="-28";"subkey"="-6";"value"="-322"};
+{"key"="-27";"subkey"="-6";"value"="-310"};
+{"key"="-26";"subkey"="-6";"value"="-299"};
+{"key"="-25";"subkey"="-5";"value"="-287"};
+{"key"="-24";"subkey"="-5";"value"="-276"};
+{"key"="-23";"subkey"="-5";"value"="-264"};
+{"key"="-22";"subkey"="-5";"value"="-253"};
+{"key"="-21";"subkey"="-5";"value"="-241"};
+{"key"="-20";"subkey"="-4";"value"="-230"};
+{"key"="-19";"subkey"="-4";"value"="-218"};
+{"key"="-18";"subkey"="-4";"value"="-207"};
+{"key"="-17";"subkey"="-4";"value"="-195"};
+{"key"="-16";"subkey"="-4";"value"="-184"};
+{"key"="-15";"subkey"="-3";"value"="-172"};
+{"key"="-14";"subkey"="-3";"value"="-161"};
+{"key"="-13";"subkey"="-3";"value"="-149"};
+{"key"="-12";"subkey"="-3";"value"="-138"};
+{"key"="-11";"subkey"="-3";"value"="-126"};
+{"key"="-10";"subkey"="-2";"value"="-115"};
+{"key"="-9";"subkey"="-2";"value"="-103"};
+{"key"="-8";"subkey"="-2";"value"="-92"};
+{"key"="-7";"subkey"="-2";"value"="-80"};
+{"key"="-6";"subkey"="-2";"value"="-69"};
+{"key"="-5";"subkey"="-1";"value"="-57"};
+{"key"="-4";"subkey"="-1";"value"="-46"};
+{"key"="-3";"subkey"="-1";"value"="-34"};
+{"key"="-2";"subkey"="-1";"value"="-23"};
+{"key"="-1";"subkey"="-1";"value"="-11"};
+{"key"="0";"subkey"="0";"value"="0"};
+{"key"="1";"subkey"="0";"value"="11"};
+{"key"="2";"subkey"="0";"value"="23"};
+{"key"="3";"subkey"="0";"value"="34"};
+{"key"="4";"subkey"="0";"value"="46"};
+{"key"="5";"subkey"="1";"value"="57"};
+{"key"="6";"subkey"="1";"value"="69"};
+{"key"="7";"subkey"="1";"value"="80"};
+{"key"="8";"subkey"="1";"value"="92"};
+{"key"="9";"subkey"="1";"value"="103"};
+{"key"="10";"subkey"="2";"value"="115"};
+{"key"="11";"subkey"="2";"value"="126"};
+{"key"="12";"subkey"="2";"value"="138"};
+{"key"="13";"subkey"="2";"value"="149"};
+{"key"="14";"subkey"="2";"value"="161"};
+{"key"="15";"subkey"="3";"value"="172"};
+{"key"="16";"subkey"="3";"value"="184"};
+{"key"="17";"subkey"="3";"value"="195"};
+{"key"="18";"subkey"="3";"value"="207"};
+{"key"="19";"subkey"="3";"value"="218"};
+{"key"="20";"subkey"="4";"value"="230"};
+{"key"="21";"subkey"="4";"value"="241"};
+{"key"="22";"subkey"="4";"value"="253"};
+{"key"="23";"subkey"="4";"value"="264"};
+{"key"="24";"subkey"="4";"value"="276"};
+{"key"="25";"subkey"="5";"value"="287"};
+{"key"="26";"subkey"="5";"value"="299"};
+{"key"="27";"subkey"="5";"value"="310"};
+{"key"="28";"subkey"="5";"value"="322"};
+{"key"="29";"subkey"="5";"value"="333"};
+{"key"="30";"subkey"="6";"value"="345"};
+{"key"="31";"subkey"="6";"value"="356"};
+{"key"="32";"subkey"="6";"value"="368"};
+{"key"="33";"subkey"="6";"value"="379"};
+{"key"="34";"subkey"="6";"value"="391"};
+{"key"="35";"subkey"="7";"value"="402"};
+{"key"="36";"subkey"="7";"value"="414"};
+{"key"="37";"subkey"="7";"value"="425"};
+{"key"="38";"subkey"="7";"value"="437"};
+{"key"="39";"subkey"="7";"value"="449"};
+{"key"="40";"subkey"="8";"value"="460"};
+{"key"="41";"subkey"="8";"value"="472"};
+{"key"="42";"subkey"="8";"value"="483"};
+{"key"="43";"subkey"="8";"value"="495"};
+{"key"="44";"subkey"="8";"value"="506"};
+{"key"="45";"subkey"="9";"value"="518"};
+{"key"="46";"subkey"="9";"value"="529"};
+{"key"="47";"subkey"="9";"value"="541"};
+{"key"="48";"subkey"="9";"value"="552"};
+{"key"="49";"subkey"="9";"value"="564"};
+{"key"="50";"subkey"="10";"value"="575"};
+{"key"="51";"subkey"="10";"value"="587"};
+{"key"="52";"subkey"="10";"value"="598"};
+{"key"="53";"subkey"="10";"value"="610"};
+{"key"="54";"subkey"="10";"value"="621"};
+{"key"="55";"subkey"="11";"value"="633"};
+{"key"="56";"subkey"="11";"value"="644"};
+{"key"="57";"subkey"="11";"value"="656"};
+{"key"="58";"subkey"="11";"value"="667"};
+{"key"="59";"subkey"="11";"value"="679"};
+{"key"="60";"subkey"="12";"value"="690"};
+{"key"="61";"subkey"="12";"value"="702"};
+{"key"="62";"subkey"="12";"value"="713"};
+{"key"="63";"subkey"="12";"value"="725"};
+{"key"="64";"subkey"="12";"value"="736"};
+{"key"="65";"subkey"="13";"value"="748"};
+{"key"="66";"subkey"="13";"value"="759"};
+{"key"="67";"subkey"="13";"value"="771"};
+{"key"="68";"subkey"="13";"value"="782"};
+{"key"="69";"subkey"="13";"value"="794"};
+{"key"="70";"subkey"="14";"value"="805"};
+{"key"="71";"subkey"="14";"value"="817"};
+{"key"="72";"subkey"="14";"value"="828"};
+{"key"="73";"subkey"="14";"value"="840"};
+{"key"="74";"subkey"="14";"value"="851"};
+{"key"="75";"subkey"="15";"value"="863"};
+{"key"="76";"subkey"="15";"value"="874"};
+{"key"="77";"subkey"="15";"value"="886"};
+{"key"="78";"subkey"="15";"value"="898"};
+{"key"="79";"subkey"="15";"value"="909"};
+{"key"="80";"subkey"="16";"value"="921"};
+{"key"="81";"subkey"="16";"value"="932"};
+{"key"="82";"subkey"="16";"value"="944"};
+{"key"="83";"subkey"="16";"value"="955"};
+{"key"="84";"subkey"="16";"value"="967"};
+{"key"="85";"subkey"="17";"value"="978"};
+{"key"="86";"subkey"="17";"value"="990"};
+{"key"="87";"subkey"="17";"value"="1001"};
+{"key"="88";"subkey"="17";"value"="1013"};
+{"key"="89";"subkey"="17";"value"="1024"};
+{"key"="90";"subkey"="18";"value"="1036"};
+{"key"="91";"subkey"="18";"value"="1047"};
+{"key"="92";"subkey"="18";"value"="1059"};
+{"key"="93";"subkey"="18";"value"="1070"};
+{"key"="94";"subkey"="18";"value"="1082"};
+{"key"="95";"subkey"="19";"value"="1093"};
+{"key"="96";"subkey"="19";"value"="1105"};
+{"key"="97";"subkey"="19";"value"="1116"};
+{"key"="98";"subkey"="19";"value"="1128"};
+{"key"="99";"subkey"="19";"value"="1139"};
+{"key"="100";"subkey"="20";"value"="460"};
+{"key"="101";"subkey"="20";"value"="466"};
+{"key"="102";"subkey"="20";"value"="471"};
+{"key"="103";"subkey"="20";"value"="477"};
+{"key"="104";"subkey"="20";"value"="483"};
+{"key"="105";"subkey"="21";"value"="488"};
+{"key"="106";"subkey"="21";"value"="494"};
+{"key"="107";"subkey"="21";"value"="499"};
+{"key"="108";"subkey"="21";"value"="505"};
+{"key"="109";"subkey"="21";"value"="511"};
+{"key"="110";"subkey"="22";"value"="517"};
+{"key"="111";"subkey"="22";"value"="522"};
+{"key"="112";"subkey"="22";"value"="528"};
+{"key"="113";"subkey"="22";"value"="534"};
+{"key"="114";"subkey"="22";"value"="539"};
+{"key"="115";"subkey"="23";"value"="545"};
+{"key"="116";"subkey"="23";"value"="551"};
+{"key"="117";"subkey"="23";"value"="557"};
+{"key"="118";"subkey"="23";"value"="562"};
+{"key"="119";"subkey"="23";"value"="568"};
+{"key"="120";"subkey"="24";"value"="574"};
+{"key"="121";"subkey"="24";"value"="580"};
+{"key"="122";"subkey"="24";"value"="586"};
+{"key"="123";"subkey"="24";"value"="591"};
+{"key"="124";"subkey"="24";"value"="597"};
+{"key"="125";"subkey"="25";"value"="603"};
+{"key"="126";"subkey"="25";"value"="609"};
+{"key"="127";"subkey"="25";"value"="615"};
+{"key"="128";"subkey"="25";"value"="621"};
+{"key"="129";"subkey"="25";"value"="626"};
+{"key"="130";"subkey"="26";"value"="632"};
+{"key"="131";"subkey"="26";"value"="638"};
+{"key"="132";"subkey"="26";"value"="644"};
+{"key"="133";"subkey"="26";"value"="650"};
+{"key"="134";"subkey"="26";"value"="656"};
+{"key"="135";"subkey"="27";"value"="662"};
+{"key"="136";"subkey"="27";"value"="668"};
+{"key"="137";"subkey"="27";"value"="674"};
+{"key"="138";"subkey"="27";"value"="679"};
+{"key"="139";"subkey"="27";"value"="685"};
+{"key"="140";"subkey"="28";"value"="691"};
+{"key"="141";"subkey"="28";"value"="697"};
+{"key"="142";"subkey"="28";"value"="703"};
+{"key"="143";"subkey"="28";"value"="709"};
+{"key"="144";"subkey"="28";"value"="715"};
+{"key"="145";"subkey"="29";"value"="721"};
+{"key"="146";"subkey"="29";"value"="727"};
+{"key"="147";"subkey"="29";"value"="733"};
+{"key"="148";"subkey"="29";"value"="739"};
+{"key"="149";"subkey"="29";"value"="745"};
+{"key"="150";"subkey"="30";"value"="751"};
+{"key"="151";"subkey"="30";"value"="757"};
+{"key"="152";"subkey"="30";"value"="763"};
+{"key"="153";"subkey"="30";"value"="769"};
+{"key"="154";"subkey"="30";"value"="775"};
+{"key"="155";"subkey"="31";"value"="781"};
+{"key"="156";"subkey"="31";"value"="787"};
+{"key"="157";"subkey"="31";"value"="793"};
+{"key"="158";"subkey"="31";"value"="799"};
+{"key"="159";"subkey"="31";"value"="805"};
+{"key"="160";"subkey"="32";"value"="812"};
+{"key"="161";"subkey"="32";"value"="818"};
+{"key"="162";"subkey"="32";"value"="824"};
+{"key"="163";"subkey"="32";"value"="830"};
+{"key"="164";"subkey"="32";"value"="836"};
+{"key"="165";"subkey"="33";"value"="842"};
+{"key"="166";"subkey"="33";"value"="848"};
+{"key"="167";"subkey"="33";"value"="854"};
+{"key"="168";"subkey"="33";"value"="860"};
+{"key"="169";"subkey"="33";"value"="866"};
+{"key"="170";"subkey"="34";"value"="873"};
+{"key"="171";"subkey"="34";"value"="879"};
+{"key"="172";"subkey"="34";"value"="885"};
+{"key"="173";"subkey"="34";"value"="891"};
+{"key"="174";"subkey"="34";"value"="897"};
+{"key"="175";"subkey"="35";"value"="903"};
+{"key"="176";"subkey"="35";"value"="910"};
+{"key"="177";"subkey"="35";"value"="916"};
+{"key"="178";"subkey"="35";"value"="922"};
+{"key"="179";"subkey"="35";"value"="928"};
+{"key"="180";"subkey"="36";"value"="934"};
+{"key"="181";"subkey"="36";"value"="940"};
+{"key"="182";"subkey"="36";"value"="947"};
+{"key"="183";"subkey"="36";"value"="953"};
+{"key"="184";"subkey"="36";"value"="959"};
+{"key"="185";"subkey"="37";"value"="965"};
+{"key"="186";"subkey"="37";"value"="971"};
+{"key"="187";"subkey"="37";"value"="978"};
+{"key"="188";"subkey"="37";"value"="984"};
+{"key"="189";"subkey"="37";"value"="990"};
+{"key"="190";"subkey"="38";"value"="996"};
+{"key"="191";"subkey"="38";"value"="1003"};
+{"key"="192";"subkey"="38";"value"="1009"};
+{"key"="193";"subkey"="38";"value"="1015"};
+{"key"="194";"subkey"="38";"value"="1021"};
+{"key"="195";"subkey"="39";"value"="1028"};
+{"key"="196";"subkey"="39";"value"="1034"};
+{"key"="197";"subkey"="39";"value"="1040"};
+{"key"="198";"subkey"="39";"value"="1047"};
+{"key"="199";"subkey"="39";"value"="1053"};
+{"key"="200";"subkey"="40";"value"="1059"};
+{"key"="201";"subkey"="40";"value"="1065"};
+{"key"="202";"subkey"="40";"value"="1072"};
+{"key"="203";"subkey"="40";"value"="1078"};
+{"key"="204";"subkey"="40";"value"="1084"};
+{"key"="205";"subkey"="41";"value"="1091"};
+{"key"="206";"subkey"="41";"value"="1097"};
+{"key"="207";"subkey"="41";"value"="1103"};
+{"key"="208";"subkey"="41";"value"="1110"};
+{"key"="209";"subkey"="41";"value"="1116"};
+{"key"="210";"subkey"="42";"value"="1122"};
+{"key"="211";"subkey"="42";"value"="1129"};
+{"key"="212";"subkey"="42";"value"="1135"};
+{"key"="213";"subkey"="42";"value"="1141"};
+{"key"="214";"subkey"="42";"value"="1148"};
+{"key"="215";"subkey"="43";"value"="1154"};
+{"key"="216";"subkey"="43";"value"="1161"};
+{"key"="217";"subkey"="43";"value"="1167"};
+{"key"="218";"subkey"="43";"value"="1173"};
+{"key"="219";"subkey"="43";"value"="1180"};
+{"key"="220";"subkey"="44";"value"="1186"};
+{"key"="221";"subkey"="44";"value"="1192"};
+{"key"="222";"subkey"="44";"value"="1199"};
+{"key"="223";"subkey"="44";"value"="1205"};
+{"key"="224";"subkey"="44";"value"="1212"};
+{"key"="225";"subkey"="45";"value"="1218"};
+{"key"="226";"subkey"="45";"value"="1225"};
+{"key"="227";"subkey"="45";"value"="1231"};
+{"key"="228";"subkey"="45";"value"="1237"};
+{"key"="229";"subkey"="45";"value"="1244"};
+{"key"="230";"subkey"="46";"value"="1250"};
+{"key"="231";"subkey"="46";"value"="1257"};
+{"key"="232";"subkey"="46";"value"="1263"};
+{"key"="233";"subkey"="46";"value"="1270"};
+{"key"="234";"subkey"="46";"value"="1276"};
+{"key"="235";"subkey"="47";"value"="1283"};
+{"key"="236";"subkey"="47";"value"="1289"};
+{"key"="237";"subkey"="47";"value"="1295"};
+{"key"="238";"subkey"="47";"value"="1302"};
+{"key"="239";"subkey"="47";"value"="1308"};
+{"key"="240";"subkey"="48";"value"="1315"};
+{"key"="241";"subkey"="48";"value"="1321"};
+{"key"="242";"subkey"="48";"value"="1328"};
+{"key"="243";"subkey"="48";"value"="1334"};
+{"key"="244";"subkey"="48";"value"="1341"};
+{"key"="245";"subkey"="49";"value"="1347"};
+{"key"="246";"subkey"="49";"value"="1354"};
+{"key"="247";"subkey"="49";"value"="1360"};
+{"key"="248";"subkey"="49";"value"="1367"};
+{"key"="249";"subkey"="49";"value"="1373"};
+{"key"="250";"subkey"="50";"value"="1380"};
+{"key"="251";"subkey"="50";"value"="1386"};
+{"key"="252";"subkey"="50";"value"="1393"};
+{"key"="253";"subkey"="50";"value"="1399"};
+{"key"="254";"subkey"="50";"value"="1406"};
+{"key"="255";"subkey"="51";"value"="1413"};
+{"key"="256";"subkey"="51";"value"="1419"};
+{"key"="257";"subkey"="51";"value"="1426"};
+{"key"="258";"subkey"="51";"value"="1432"};
+{"key"="259";"subkey"="51";"value"="1439"};
+{"key"="260";"subkey"="52";"value"="1445"};
+{"key"="261";"subkey"="52";"value"="1452"};
+{"key"="262";"subkey"="52";"value"="1458"};
+{"key"="263";"subkey"="52";"value"="1465"};
+{"key"="264";"subkey"="52";"value"="1472"};
+{"key"="265";"subkey"="53";"value"="1478"};
+{"key"="266";"subkey"="53";"value"="1485"};
+{"key"="267";"subkey"="53";"value"="1491"};
+{"key"="268";"subkey"="53";"value"="1498"};
+{"key"="269";"subkey"="53";"value"="1504"};
+{"key"="270";"subkey"="54";"value"="1511"};
+{"key"="271";"subkey"="54";"value"="1518"};
+{"key"="272";"subkey"="54";"value"="1524"};
+{"key"="273";"subkey"="54";"value"="1531"};
+{"key"="274";"subkey"="54";"value"="1537"};
+{"key"="275";"subkey"="55";"value"="1544"};
+{"key"="276";"subkey"="55";"value"="1551"};
+{"key"="277";"subkey"="55";"value"="1557"};
+{"key"="278";"subkey"="55";"value"="1564"};
+{"key"="279";"subkey"="55";"value"="1571"};
+{"key"="280";"subkey"="56";"value"="1577"};
+{"key"="281";"subkey"="56";"value"="1584"};
+{"key"="282";"subkey"="56";"value"="1591"};
+{"key"="283";"subkey"="56";"value"="1597"};
+{"key"="284";"subkey"="56";"value"="1604"};
+{"key"="285";"subkey"="57";"value"="1610"};
+{"key"="286";"subkey"="57";"value"="1617"};
+{"key"="287";"subkey"="57";"value"="1624"};
+{"key"="288";"subkey"="57";"value"="1630"};
+{"key"="289";"subkey"="57";"value"="1637"};
+{"key"="290";"subkey"="58";"value"="1644"};
+{"key"="291";"subkey"="58";"value"="1650"};
+{"key"="292";"subkey"="58";"value"="1657"};
+{"key"="293";"subkey"="58";"value"="1664"};
+{"key"="294";"subkey"="58";"value"="1670"};
+{"key"="295";"subkey"="59";"value"="1677"};
+{"key"="296";"subkey"="59";"value"="1684"};
+{"key"="297";"subkey"="59";"value"="1691"};
+{"key"="298";"subkey"="59";"value"="1697"};
+{"key"="299";"subkey"="59";"value"="1704"};
+{"key"="300";"subkey"="60";"value"="1711"};
+{"key"="301";"subkey"="60";"value"="1717"};
+{"key"="302";"subkey"="60";"value"="1724"};
+{"key"="303";"subkey"="60";"value"="1731"};
+{"key"="304";"subkey"="60";"value"="1737"};
+{"key"="305";"subkey"="61";"value"="1744"};
+{"key"="306";"subkey"="61";"value"="1751"};
+{"key"="307";"subkey"="61";"value"="1758"};
+{"key"="308";"subkey"="61";"value"="1764"};
+{"key"="309";"subkey"="61";"value"="1771"};
+{"key"="310";"subkey"="62";"value"="1778"};
+{"key"="311";"subkey"="62";"value"="1785"};
+{"key"="312";"subkey"="62";"value"="1791"};
+{"key"="313";"subkey"="62";"value"="1798"};
+{"key"="314";"subkey"="62";"value"="1805"};
+{"key"="315";"subkey"="63";"value"="1812"};
+{"key"="316";"subkey"="63";"value"="1818"};
+{"key"="317";"subkey"="63";"value"="1825"};
+{"key"="318";"subkey"="63";"value"="1832"};
+{"key"="319";"subkey"="63";"value"="1839"};
+{"key"="320";"subkey"="64";"value"="1845"};
+{"key"="321";"subkey"="64";"value"="1852"};
+{"key"="322";"subkey"="64";"value"="1859"};
+{"key"="323";"subkey"="64";"value"="1866"};
+{"key"="324";"subkey"="64";"value"="1872"};
+{"key"="325";"subkey"="65";"value"="1879"};
+{"key"="326";"subkey"="65";"value"="1886"};
+{"key"="327";"subkey"="65";"value"="1893"};
+{"key"="328";"subkey"="65";"value"="1900"};
+{"key"="329";"subkey"="65";"value"="1906"};
+{"key"="330";"subkey"="66";"value"="1913"};
+{"key"="331";"subkey"="66";"value"="1920"};
+{"key"="332";"subkey"="66";"value"="1927"};
+{"key"="333";"subkey"="66";"value"="1934"};
+{"key"="334";"subkey"="66";"value"="1940"};
+{"key"="335";"subkey"="67";"value"="1947"};
+{"key"="336";"subkey"="67";"value"="1954"};
+{"key"="337";"subkey"="67";"value"="1961"};
+{"key"="338";"subkey"="67";"value"="1968"};
+{"key"="339";"subkey"="67";"value"="1975"};
+{"key"="340";"subkey"="68";"value"="1981"};
+{"key"="341";"subkey"="68";"value"="1988"};
+{"key"="342";"subkey"="68";"value"="1995"};
+{"key"="343";"subkey"="68";"value"="2002"};
+{"key"="344";"subkey"="68";"value"="2009"};
+{"key"="345";"subkey"="69";"value"="2016"};
+{"key"="346";"subkey"="69";"value"="2022"};
+{"key"="347";"subkey"="69";"value"="2029"};
+{"key"="348";"subkey"="69";"value"="2036"};
+{"key"="349";"subkey"="69";"value"="2043"};
+{"key"="350";"subkey"="70";"value"="2050"};
+{"key"="351";"subkey"="70";"value"="2057"};
+{"key"="352";"subkey"="70";"value"="2063"};
+{"key"="353";"subkey"="70";"value"="2070"};
+{"key"="354";"subkey"="70";"value"="2077"};
+{"key"="355";"subkey"="71";"value"="2084"};
+{"key"="356";"subkey"="71";"value"="2091"};
+{"key"="357";"subkey"="71";"value"="2098"};
+{"key"="358";"subkey"="71";"value"="2105"};
+{"key"="359";"subkey"="71";"value"="2112"};
+{"key"="360";"subkey"="72";"value"="2118"};
+{"key"="361";"subkey"="72";"value"="2125"};
+{"key"="362";"subkey"="72";"value"="2132"};
+{"key"="363";"subkey"="72";"value"="2139"};
+{"key"="364";"subkey"="72";"value"="2146"};
+{"key"="365";"subkey"="73";"value"="2153"};
+{"key"="366";"subkey"="73";"value"="2160"};
+{"key"="367";"subkey"="73";"value"="2167"};
+{"key"="368";"subkey"="73";"value"="2174"};
+{"key"="369";"subkey"="73";"value"="2181"};
+{"key"="370";"subkey"="74";"value"="2187"};
+{"key"="371";"subkey"="74";"value"="2194"};
+{"key"="372";"subkey"="74";"value"="2201"};
+{"key"="373";"subkey"="74";"value"="2208"};
+{"key"="374";"subkey"="74";"value"="2215"};
+{"key"="375";"subkey"="75";"value"="2222"};
+{"key"="376";"subkey"="75";"value"="2229"};
+{"key"="377";"subkey"="75";"value"="2236"};
+{"key"="378";"subkey"="75";"value"="2243"};
+{"key"="379";"subkey"="75";"value"="2250"};
+{"key"="380";"subkey"="76";"value"="2257"};
+{"key"="381";"subkey"="76";"value"="2264"};
+{"key"="382";"subkey"="76";"value"="2271"};
+{"key"="383";"subkey"="76";"value"="2278"};
+{"key"="384";"subkey"="76";"value"="2285"};
+{"key"="385";"subkey"="77";"value"="2291"};
+{"key"="386";"subkey"="77";"value"="2298"};
+{"key"="387";"subkey"="77";"value"="2305"};
+{"key"="388";"subkey"="77";"value"="2312"};
+{"key"="389";"subkey"="77";"value"="2319"};
+{"key"="390";"subkey"="78";"value"="2326"};
+{"key"="391";"subkey"="78";"value"="2333"};
+{"key"="392";"subkey"="78";"value"="2340"};
+{"key"="393";"subkey"="78";"value"="2347"};
+{"key"="394";"subkey"="78";"value"="2354"};
+{"key"="395";"subkey"="79";"value"="2361"};
+{"key"="396";"subkey"="79";"value"="2368"};
+{"key"="397";"subkey"="79";"value"="2375"};
+{"key"="398";"subkey"="79";"value"="2382"};
+{"key"="399";"subkey"="79";"value"="2389"};
+{"key"="400";"subkey"="80";"value"="2396"};
+{"key"="401";"subkey"="80";"value"="2403"};
+{"key"="402";"subkey"="80";"value"="2410"};
+{"key"="403";"subkey"="80";"value"="2417"};
+{"key"="404";"subkey"="80";"value"="2424"};
+{"key"="405";"subkey"="81";"value"="2431"};
+{"key"="406";"subkey"="81";"value"="2438"};
+{"key"="407";"subkey"="81";"value"="2445"};
+{"key"="408";"subkey"="81";"value"="2452"};
+{"key"="409";"subkey"="81";"value"="2459"};
+{"key"="410";"subkey"="82";"value"="2466"};
+{"key"="411";"subkey"="82";"value"="2473"};
+{"key"="412";"subkey"="82";"value"="2480"};
+{"key"="413";"subkey"="82";"value"="2487"};
+{"key"="414";"subkey"="82";"value"="2494"};
+{"key"="415";"subkey"="83";"value"="2501"};
+{"key"="416";"subkey"="83";"value"="2508"};
+{"key"="417";"subkey"="83";"value"="2515"};
+{"key"="418";"subkey"="83";"value"="2522"};
+{"key"="419";"subkey"="83";"value"="2529"};
+{"key"="420";"subkey"="84";"value"="2536"};
+{"key"="421";"subkey"="84";"value"="2543"};
+{"key"="422";"subkey"="84";"value"="2550"};
+{"key"="423";"subkey"="84";"value"="2558"};
+{"key"="424";"subkey"="84";"value"="2565"};
+{"key"="425";"subkey"="85";"value"="2572"};
+{"key"="426";"subkey"="85";"value"="2579"};
+{"key"="427";"subkey"="85";"value"="2586"};
+{"key"="428";"subkey"="85";"value"="2593"};
+{"key"="429";"subkey"="85";"value"="2600"};
+{"key"="430";"subkey"="86";"value"="2607"};
+{"key"="431";"subkey"="86";"value"="2614"};
+{"key"="432";"subkey"="86";"value"="2621"};
+{"key"="433";"subkey"="86";"value"="2628"};
+{"key"="434";"subkey"="86";"value"="2635"};
+{"key"="435";"subkey"="87";"value"="2642"};
+{"key"="436";"subkey"="87";"value"="2649"};
+{"key"="437";"subkey"="87";"value"="2656"};
+{"key"="438";"subkey"="87";"value"="2664"};
+{"key"="439";"subkey"="87";"value"="2671"};
+{"key"="440";"subkey"="88";"value"="2678"};
+{"key"="441";"subkey"="88";"value"="2685"};
+{"key"="442";"subkey"="88";"value"="2692"};
+{"key"="443";"subkey"="88";"value"="2699"};
+{"key"="444";"subkey"="88";"value"="2706"};
+{"key"="445";"subkey"="89";"value"="2713"};
+{"key"="446";"subkey"="89";"value"="2720"};
+{"key"="447";"subkey"="89";"value"="2727"};
+{"key"="448";"subkey"="89";"value"="2734"};
+{"key"="449";"subkey"="89";"value"="2742"};
+{"key"="450";"subkey"="90";"value"="2749"};
+{"key"="451";"subkey"="90";"value"="2756"};
+{"key"="452";"subkey"="90";"value"="2763"};
+{"key"="453";"subkey"="90";"value"="2770"};
+{"key"="454";"subkey"="90";"value"="2777"};
+{"key"="455";"subkey"="91";"value"="2784"};
+{"key"="456";"subkey"="91";"value"="2791"};
+{"key"="457";"subkey"="91";"value"="2798"};
+{"key"="458";"subkey"="91";"value"="2806"};
+{"key"="459";"subkey"="91";"value"="2813"};
+{"key"="460";"subkey"="92";"value"="2820"};
+{"key"="461";"subkey"="92";"value"="2827"};
+{"key"="462";"subkey"="92";"value"="2834"};
+{"key"="463";"subkey"="92";"value"="2841"};
+{"key"="464";"subkey"="92";"value"="2848"};
+{"key"="465";"subkey"="93";"value"="2856"};
+{"key"="466";"subkey"="93";"value"="2863"};
+{"key"="467";"subkey"="93";"value"="2870"};
+{"key"="468";"subkey"="93";"value"="2877"};
+{"key"="469";"subkey"="93";"value"="2884"};
+{"key"="470";"subkey"="94";"value"="2891"};
+{"key"="471";"subkey"="94";"value"="2898"};
+{"key"="472";"subkey"="94";"value"="2906"};
+{"key"="473";"subkey"="94";"value"="2913"};
+{"key"="474";"subkey"="94";"value"="2920"};
+{"key"="475";"subkey"="95";"value"="2927"};
+{"key"="476";"subkey"="95";"value"="2934"};
+{"key"="477";"subkey"="95";"value"="2941"};
+{"key"="478";"subkey"="95";"value"="2949"};
+{"key"="479";"subkey"="95";"value"="2956"};
+{"key"="480";"subkey"="96";"value"="2963"};
+{"key"="481";"subkey"="96";"value"="2970"};
+{"key"="482";"subkey"="96";"value"="2977"};
+{"key"="483";"subkey"="96";"value"="2984"};
+{"key"="484";"subkey"="96";"value"="2992"};
+{"key"="485";"subkey"="97";"value"="2999"};
+{"key"="486";"subkey"="97";"value"="3006"};
+{"key"="487";"subkey"="97";"value"="3013"};
+{"key"="488";"subkey"="97";"value"="3020"};
+{"key"="489";"subkey"="97";"value"="3028"};
+{"key"="490";"subkey"="98";"value"="3035"};
+{"key"="491";"subkey"="98";"value"="3042"};
+{"key"="492";"subkey"="98";"value"="3049"};
+{"key"="493";"subkey"="98";"value"="3056"};
+{"key"="494";"subkey"="98";"value"="3064"};
+{"key"="495";"subkey"="99";"value"="3071"};
+{"key"="496";"subkey"="99";"value"="3078"};
+{"key"="497";"subkey"="99";"value"="3085"};
+{"key"="498";"subkey"="99";"value"="3092"};
+{"key"="499";"subkey"="99";"value"="3100"};
+{"key"="500";"subkey"="100";"value"="3107"};
+{"key"="501";"subkey"="100";"value"="3114"};
+{"key"="502";"subkey"="100";"value"="3121"};
+{"key"="503";"subkey"="100";"value"="3128"};
+{"key"="504";"subkey"="100";"value"="3136"};
+{"key"="505";"subkey"="101";"value"="3143"};
+{"key"="506";"subkey"="101";"value"="3150"};
+{"key"="507";"subkey"="101";"value"="3157"};
+{"key"="508";"subkey"="101";"value"="3165"};
+{"key"="509";"subkey"="101";"value"="3172"};
+{"key"="510";"subkey"="102";"value"="3179"};
+{"key"="511";"subkey"="102";"value"="3186"};
+{"key"="512";"subkey"="102";"value"="3194"};
+{"key"="513";"subkey"="102";"value"="3201"};
+{"key"="514";"subkey"="102";"value"="3208"};
+{"key"="515";"subkey"="103";"value"="3215"};
+{"key"="516";"subkey"="103";"value"="3222"};
+{"key"="517";"subkey"="103";"value"="3230"};
+{"key"="518";"subkey"="103";"value"="3237"};
+{"key"="519";"subkey"="103";"value"="3244"};
+{"key"="520";"subkey"="104";"value"="3251"};
+{"key"="521";"subkey"="104";"value"="3259"};
+{"key"="522";"subkey"="104";"value"="3266"};
+{"key"="523";"subkey"="104";"value"="3273"};
+{"key"="524";"subkey"="104";"value"="3281"};
+{"key"="525";"subkey"="105";"value"="3288"};
+{"key"="526";"subkey"="105";"value"="3295"};
+{"key"="527";"subkey"="105";"value"="3302"};
+{"key"="528";"subkey"="105";"value"="3310"};
+{"key"="529";"subkey"="105";"value"="3317"};
+{"key"="530";"subkey"="106";"value"="3324"};
+{"key"="531";"subkey"="106";"value"="3331"};
+{"key"="532";"subkey"="106";"value"="3339"};
+{"key"="533";"subkey"="106";"value"="3346"};
+{"key"="534";"subkey"="106";"value"="3353"};
+{"key"="535";"subkey"="107";"value"="3361"};
+{"key"="536";"subkey"="107";"value"="3368"};
+{"key"="537";"subkey"="107";"value"="3375"};
+{"key"="538";"subkey"="107";"value"="3382"};
+{"key"="539";"subkey"="107";"value"="3390"};
+{"key"="540";"subkey"="108";"value"="3397"};
+{"key"="541";"subkey"="108";"value"="3404"};
+{"key"="542";"subkey"="108";"value"="3412"};
+{"key"="543";"subkey"="108";"value"="3419"};
+{"key"="544";"subkey"="108";"value"="3426"};
+{"key"="545";"subkey"="109";"value"="3433"};
+{"key"="546";"subkey"="109";"value"="3441"};
+{"key"="547";"subkey"="109";"value"="3448"};
+{"key"="548";"subkey"="109";"value"="3455"};
+{"key"="549";"subkey"="109";"value"="3463"};
+{"key"="550";"subkey"="110";"value"="3470"};
+{"key"="551";"subkey"="110";"value"="3477"};
+{"key"="552";"subkey"="110";"value"="3485"};
+{"key"="553";"subkey"="110";"value"="3492"};
+{"key"="554";"subkey"="110";"value"="3499"};
+{"key"="555";"subkey"="111";"value"="3507"};
+{"key"="556";"subkey"="111";"value"="3514"};
+{"key"="557";"subkey"="111";"value"="3521"};
+{"key"="558";"subkey"="111";"value"="3528"};
+{"key"="559";"subkey"="111";"value"="3536"};
+{"key"="560";"subkey"="112";"value"="3543"};
+{"key"="561";"subkey"="112";"value"="3550"};
+{"key"="562";"subkey"="112";"value"="3558"};
+{"key"="563";"subkey"="112";"value"="3565"};
+{"key"="564";"subkey"="112";"value"="3572"};
+{"key"="565";"subkey"="113";"value"="3580"};
+{"key"="566";"subkey"="113";"value"="3587"};
+{"key"="567";"subkey"="113";"value"="3594"};
+{"key"="568";"subkey"="113";"value"="3602"};
+{"key"="569";"subkey"="113";"value"="3609"};
+{"key"="570";"subkey"="114";"value"="3617"};
+{"key"="571";"subkey"="114";"value"="3624"};
+{"key"="572";"subkey"="114";"value"="3631"};
+{"key"="573";"subkey"="114";"value"="3639"};
+{"key"="574";"subkey"="114";"value"="3646"};
+{"key"="575";"subkey"="115";"value"="3653"};
+{"key"="576";"subkey"="115";"value"="3661"};
+{"key"="577";"subkey"="115";"value"="3668"};
+{"key"="578";"subkey"="115";"value"="3675"};
+{"key"="579";"subkey"="115";"value"="3683"};
+{"key"="580";"subkey"="116";"value"="3690"};
+{"key"="581";"subkey"="116";"value"="3697"};
+{"key"="582";"subkey"="116";"value"="3705"};
+{"key"="583";"subkey"="116";"value"="3712"};
+{"key"="584";"subkey"="116";"value"="3720"};
+{"key"="585";"subkey"="117";"value"="3727"};
+{"key"="586";"subkey"="117";"value"="3734"};
+{"key"="587";"subkey"="117";"value"="3742"};
+{"key"="588";"subkey"="117";"value"="3749"};
+{"key"="589";"subkey"="117";"value"="3756"};
+{"key"="590";"subkey"="118";"value"="3764"};
+{"key"="591";"subkey"="118";"value"="3771"};
+{"key"="592";"subkey"="118";"value"="3779"};
+{"key"="593";"subkey"="118";"value"="3786"};
+{"key"="594";"subkey"="118";"value"="3793"};
+{"key"="595";"subkey"="119";"value"="3801"};
+{"key"="596";"subkey"="119";"value"="3808"};
+{"key"="597";"subkey"="119";"value"="3815"};
+{"key"="598";"subkey"="119";"value"="3823"};
+{"key"="599";"subkey"="119";"value"="3830"};
+{"key"="600";"subkey"="120";"value"="3838"};
+{"key"="601";"subkey"="120";"value"="3845"};
+{"key"="602";"subkey"="120";"value"="3852"};
+{"key"="603";"subkey"="120";"value"="3860"};
+{"key"="604";"subkey"="120";"value"="3867"};
+{"key"="605";"subkey"="121";"value"="3875"};
+{"key"="606";"subkey"="121";"value"="3882"};
+{"key"="607";"subkey"="121";"value"="3889"};
+{"key"="608";"subkey"="121";"value"="3897"};
+{"key"="609";"subkey"="121";"value"="3904"};
+{"key"="610";"subkey"="122";"value"="3912"};
+{"key"="611";"subkey"="122";"value"="3919"};
+{"key"="612";"subkey"="122";"value"="3927"};
+{"key"="613";"subkey"="122";"value"="3934"};
+{"key"="614";"subkey"="122";"value"="3941"};
+{"key"="615";"subkey"="123";"value"="3949"};
+{"key"="616";"subkey"="123";"value"="3956"};
+{"key"="617";"subkey"="123";"value"="3964"};
+{"key"="618";"subkey"="123";"value"="3971"};
+{"key"="619";"subkey"="123";"value"="3978"};
+{"key"="620";"subkey"="124";"value"="3986"};
+{"key"="621";"subkey"="124";"value"="3993"};
+{"key"="622";"subkey"="124";"value"="4001"};
+{"key"="623";"subkey"="124";"value"="4008"};
+{"key"="624";"subkey"="124";"value"="4016"};
+{"key"="625";"subkey"="125";"value"="4023"};
+{"key"="626";"subkey"="125";"value"="4031"};
+{"key"="627";"subkey"="125";"value"="4038"};
+{"key"="628";"subkey"="125";"value"="4045"};
+{"key"="629";"subkey"="125";"value"="4053"};
+{"key"="630";"subkey"="126";"value"="4060"};
+{"key"="631";"subkey"="126";"value"="4068"};
+{"key"="632";"subkey"="126";"value"="4075"};
+{"key"="633";"subkey"="126";"value"="4083"};
+{"key"="634";"subkey"="126";"value"="4090"};
+{"key"="635";"subkey"="127";"value"="4098"};
+{"key"="636";"subkey"="127";"value"="4105"};
+{"key"="637";"subkey"="127";"value"="4112"};
+{"key"="638";"subkey"="127";"value"="4120"};
+{"key"="639";"subkey"="127";"value"="4127"};
+{"key"="640";"subkey"="128";"value"="4135"};
+{"key"="641";"subkey"="128";"value"="4142"};
+{"key"="642";"subkey"="128";"value"="4150"};
+{"key"="643";"subkey"="128";"value"="4157"};
+{"key"="644";"subkey"="128";"value"="4165"};
+{"key"="645";"subkey"="129";"value"="4172"};
+{"key"="646";"subkey"="129";"value"="4180"};
+{"key"="647";"subkey"="129";"value"="4187"};
+{"key"="648";"subkey"="129";"value"="4195"};
+{"key"="649";"subkey"="129";"value"="4202"};
+{"key"="650";"subkey"="130";"value"="4210"};
+{"key"="651";"subkey"="130";"value"="4217"};
+{"key"="652";"subkey"="130";"value"="4224"};
+{"key"="653";"subkey"="130";"value"="4232"};
+{"key"="654";"subkey"="130";"value"="4239"};
+{"key"="655";"subkey"="131";"value"="4247"};
+{"key"="656";"subkey"="131";"value"="4254"};
+{"key"="657";"subkey"="131";"value"="4262"};
+{"key"="658";"subkey"="131";"value"="4269"};
+{"key"="659";"subkey"="131";"value"="4277"};
+{"key"="660";"subkey"="132";"value"="4284"};
+{"key"="661";"subkey"="132";"value"="4292"};
+{"key"="662";"subkey"="132";"value"="4299"};
+{"key"="663";"subkey"="132";"value"="4307"};
+{"key"="664";"subkey"="132";"value"="4314"};
+{"key"="665";"subkey"="133";"value"="4322"};
+{"key"="666";"subkey"="133";"value"="4329"};
+{"key"="667";"subkey"="133";"value"="4337"};
+{"key"="668";"subkey"="133";"value"="4344"};
+{"key"="669";"subkey"="133";"value"="4352"};
+{"key"="670";"subkey"="134";"value"="4359"};
+{"key"="671";"subkey"="134";"value"="4367"};
+{"key"="672";"subkey"="134";"value"="4374"};
+{"key"="673";"subkey"="134";"value"="4382"};
+{"key"="674";"subkey"="134";"value"="4389"};
+{"key"="675";"subkey"="135";"value"="4397"};
+{"key"="676";"subkey"="135";"value"="4404"};
+{"key"="677";"subkey"="135";"value"="4412"};
+{"key"="678";"subkey"="135";"value"="4419"};
+{"key"="679";"subkey"="135";"value"="4427"};
+{"key"="680";"subkey"="136";"value"="4435"};
+{"key"="681";"subkey"="136";"value"="4442"};
+{"key"="682";"subkey"="136";"value"="4450"};
+{"key"="683";"subkey"="136";"value"="4457"};
+{"key"="684";"subkey"="136";"value"="4465"};
+{"key"="685";"subkey"="137";"value"="4472"};
+{"key"="686";"subkey"="137";"value"="4480"};
+{"key"="687";"subkey"="137";"value"="4487"};
+{"key"="688";"subkey"="137";"value"="4495"};
+{"key"="689";"subkey"="137";"value"="4502"};
+{"key"="690";"subkey"="138";"value"="4510"};
+{"key"="691";"subkey"="138";"value"="4517"};
+{"key"="692";"subkey"="138";"value"="4525"};
+{"key"="693";"subkey"="138";"value"="4532"};
+{"key"="694";"subkey"="138";"value"="4540"};
+{"key"="695";"subkey"="139";"value"="4548"};
+{"key"="696";"subkey"="139";"value"="4555"};
+{"key"="697";"subkey"="139";"value"="4563"};
+{"key"="698";"subkey"="139";"value"="4570"};
+{"key"="699";"subkey"="139";"value"="4578"};
+{"key"="700";"subkey"="140";"value"="4585"};
+{"key"="701";"subkey"="140";"value"="4593"};
+{"key"="702";"subkey"="140";"value"="4600"};
+{"key"="703";"subkey"="140";"value"="4608"};
+{"key"="704";"subkey"="140";"value"="4615"};
+{"key"="705";"subkey"="141";"value"="4623"};
+{"key"="706";"subkey"="141";"value"="4631"};
+{"key"="707";"subkey"="141";"value"="4638"};
+{"key"="708";"subkey"="141";"value"="4646"};
+{"key"="709";"subkey"="141";"value"="4653"};
+{"key"="710";"subkey"="142";"value"="4661"};
+{"key"="711";"subkey"="142";"value"="4668"};
+{"key"="712";"subkey"="142";"value"="4676"};
+{"key"="713";"subkey"="142";"value"="4684"};
+{"key"="714";"subkey"="142";"value"="4691"};
+{"key"="715";"subkey"="143";"value"="4699"};
+{"key"="716";"subkey"="143";"value"="4706"};
+{"key"="717";"subkey"="143";"value"="4714"};
+{"key"="718";"subkey"="143";"value"="4721"};
+{"key"="719";"subkey"="143";"value"="4729"};
+{"key"="720";"subkey"="144";"value"="4737"};
+{"key"="721";"subkey"="144";"value"="4744"};
+{"key"="722";"subkey"="144";"value"="4752"};
+{"key"="723";"subkey"="144";"value"="4759"};
+{"key"="724";"subkey"="144";"value"="4767"};
+{"key"="725";"subkey"="145";"value"="4774"};
+{"key"="726";"subkey"="145";"value"="4782"};
+{"key"="727";"subkey"="145";"value"="4790"};
+{"key"="728";"subkey"="145";"value"="4797"};
+{"key"="729";"subkey"="145";"value"="4805"};
+{"key"="730";"subkey"="146";"value"="4812"};
+{"key"="731";"subkey"="146";"value"="4820"};
+{"key"="732";"subkey"="146";"value"="4828"};
+{"key"="733";"subkey"="146";"value"="4835"};
+{"key"="734";"subkey"="146";"value"="4843"};
+{"key"="735";"subkey"="147";"value"="4850"};
+{"key"="736";"subkey"="147";"value"="4858"};
+{"key"="737";"subkey"="147";"value"="4866"};
+{"key"="738";"subkey"="147";"value"="4873"};
+{"key"="739";"subkey"="147";"value"="4881"};
+{"key"="740";"subkey"="148";"value"="4888"};
+{"key"="741";"subkey"="148";"value"="4896"};
+{"key"="742";"subkey"="148";"value"="4904"};
+{"key"="743";"subkey"="148";"value"="4911"};
+{"key"="744";"subkey"="148";"value"="4919"};
+{"key"="745";"subkey"="149";"value"="4926"};
+{"key"="746";"subkey"="149";"value"="4934"};
+{"key"="747";"subkey"="149";"value"="4942"};
+{"key"="748";"subkey"="149";"value"="4949"};
+{"key"="749";"subkey"="149";"value"="4957"};
+{"key"="750";"subkey"="150";"value"="4965"};
+{"key"="751";"subkey"="150";"value"="4972"};
+{"key"="752";"subkey"="150";"value"="4980"};
+{"key"="753";"subkey"="150";"value"="4987"};
+{"key"="754";"subkey"="150";"value"="4995"};
+{"key"="755";"subkey"="151";"value"="5003"};
+{"key"="756";"subkey"="151";"value"="5010"};
+{"key"="757";"subkey"="151";"value"="5018"};
+{"key"="758";"subkey"="151";"value"="5026"};
+{"key"="759";"subkey"="151";"value"="5033"};
+{"key"="760";"subkey"="152";"value"="5041"};
+{"key"="761";"subkey"="152";"value"="5048"};
+{"key"="762";"subkey"="152";"value"="5056"};
+{"key"="763";"subkey"="152";"value"="5064"};
+{"key"="764";"subkey"="152";"value"="5071"};
+{"key"="765";"subkey"="153";"value"="5079"};
+{"key"="766";"subkey"="153";"value"="5087"};
+{"key"="767";"subkey"="153";"value"="5094"};
+{"key"="768";"subkey"="153";"value"="5102"};
+{"key"="769";"subkey"="153";"value"="5110"};
+{"key"="770";"subkey"="154";"value"="5117"};
+{"key"="771";"subkey"="154";"value"="5125"};
+{"key"="772";"subkey"="154";"value"="5133"};
+{"key"="773";"subkey"="154";"value"="5140"};
+{"key"="774";"subkey"="154";"value"="5148"};
+{"key"="775";"subkey"="155";"value"="5155"};
+{"key"="776";"subkey"="155";"value"="5163"};
+{"key"="777";"subkey"="155";"value"="5171"};
+{"key"="778";"subkey"="155";"value"="5178"};
+{"key"="779";"subkey"="155";"value"="5186"};
+{"key"="780";"subkey"="156";"value"="5194"};
+{"key"="781";"subkey"="156";"value"="5201"};
+{"key"="782";"subkey"="156";"value"="5209"};
+{"key"="783";"subkey"="156";"value"="5217"};
+{"key"="784";"subkey"="156";"value"="5224"};
+{"key"="785";"subkey"="157";"value"="5232"};
+{"key"="786";"subkey"="157";"value"="5240"};
+{"key"="787";"subkey"="157";"value"="5247"};
+{"key"="788";"subkey"="157";"value"="5255"};
+{"key"="789";"subkey"="157";"value"="5263"};
+{"key"="790";"subkey"="158";"value"="5270"};
+{"key"="791";"subkey"="158";"value"="5278"};
+{"key"="792";"subkey"="158";"value"="5286"};
+{"key"="793";"subkey"="158";"value"="5293"};
+{"key"="794";"subkey"="158";"value"="5301"};
+{"key"="795";"subkey"="159";"value"="5309"};
+{"key"="796";"subkey"="159";"value"="5316"};
+{"key"="797";"subkey"="159";"value"="5324"};
+{"key"="798";"subkey"="159";"value"="5332"};
+{"key"="799";"subkey"="159";"value"="5340"};
+{"key"="800";"subkey"="160";"value"="5347"};
+{"key"="801";"subkey"="160";"value"="5355"};
+{"key"="802";"subkey"="160";"value"="5363"};
+{"key"="803";"subkey"="160";"value"="5370"};
+{"key"="804";"subkey"="160";"value"="5378"};
+{"key"="805";"subkey"="161";"value"="5386"};
+{"key"="806";"subkey"="161";"value"="5393"};
+{"key"="807";"subkey"="161";"value"="5401"};
+{"key"="808";"subkey"="161";"value"="5409"};
+{"key"="809";"subkey"="161";"value"="5416"};
+{"key"="810";"subkey"="162";"value"="5424"};
+{"key"="811";"subkey"="162";"value"="5432"};
+{"key"="812";"subkey"="162";"value"="5439"};
+{"key"="813";"subkey"="162";"value"="5447"};
+{"key"="814";"subkey"="162";"value"="5455"};
+{"key"="815";"subkey"="163";"value"="5463"};
+{"key"="816";"subkey"="163";"value"="5470"};
+{"key"="817";"subkey"="163";"value"="5478"};
+{"key"="818";"subkey"="163";"value"="5486"};
+{"key"="819";"subkey"="163";"value"="5493"};
+{"key"="820";"subkey"="164";"value"="5501"};
+{"key"="821";"subkey"="164";"value"="5509"};
+{"key"="822";"subkey"="164";"value"="5517"};
+{"key"="823";"subkey"="164";"value"="5524"};
+{"key"="824";"subkey"="164";"value"="5532"};
+{"key"="825";"subkey"="165";"value"="5540"};
+{"key"="826";"subkey"="165";"value"="5547"};
+{"key"="827";"subkey"="165";"value"="5555"};
+{"key"="828";"subkey"="165";"value"="5563"};
+{"key"="829";"subkey"="165";"value"="5571"};
+{"key"="830";"subkey"="166";"value"="5578"};
+{"key"="831";"subkey"="166";"value"="5586"};
+{"key"="832";"subkey"="166";"value"="5594"};
+{"key"="833";"subkey"="166";"value"="5601"};
+{"key"="834";"subkey"="166";"value"="5609"};
+{"key"="835";"subkey"="167";"value"="5617"};
+{"key"="836";"subkey"="167";"value"="5625"};
+{"key"="837";"subkey"="167";"value"="5632"};
+{"key"="838";"subkey"="167";"value"="5640"};
+{"key"="839";"subkey"="167";"value"="5648"};
+{"key"="840";"subkey"="168";"value"="5656"};
+{"key"="841";"subkey"="168";"value"="5663"};
+{"key"="842";"subkey"="168";"value"="5671"};
+{"key"="843";"subkey"="168";"value"="5679"};
+{"key"="844";"subkey"="168";"value"="5687"};
+{"key"="845";"subkey"="169";"value"="5694"};
+{"key"="846";"subkey"="169";"value"="5702"};
+{"key"="847";"subkey"="169";"value"="5710"};
+{"key"="848";"subkey"="169";"value"="5717"};
+{"key"="849";"subkey"="169";"value"="5725"};
+{"key"="850";"subkey"="170";"value"="5733"};
+{"key"="851";"subkey"="170";"value"="5741"};
+{"key"="852";"subkey"="170";"value"="5748"};
+{"key"="853";"subkey"="170";"value"="5756"};
+{"key"="854";"subkey"="170";"value"="5764"};
+{"key"="855";"subkey"="171";"value"="5772"};
+{"key"="856";"subkey"="171";"value"="5779"};
+{"key"="857";"subkey"="171";"value"="5787"};
+{"key"="858";"subkey"="171";"value"="5795"};
+{"key"="859";"subkey"="171";"value"="5803"};
+{"key"="860";"subkey"="172";"value"="5810"};
+{"key"="861";"subkey"="172";"value"="5818"};
+{"key"="862";"subkey"="172";"value"="5826"};
+{"key"="863";"subkey"="172";"value"="5834"};
+{"key"="864";"subkey"="172";"value"="5841"};
+{"key"="865";"subkey"="173";"value"="5849"};
+{"key"="866";"subkey"="173";"value"="5857"};
+{"key"="867";"subkey"="173";"value"="5865"};
+{"key"="868";"subkey"="173";"value"="5873"};
+{"key"="869";"subkey"="173";"value"="5880"};
+{"key"="870";"subkey"="174";"value"="5888"};
+{"key"="871";"subkey"="174";"value"="5896"};
+{"key"="872";"subkey"="174";"value"="5904"};
+{"key"="873";"subkey"="174";"value"="5911"};
+{"key"="874";"subkey"="174";"value"="5919"};
+{"key"="875";"subkey"="175";"value"="5927"};
+{"key"="876";"subkey"="175";"value"="5935"};
+{"key"="877";"subkey"="175";"value"="5942"};
+{"key"="878";"subkey"="175";"value"="5950"};
+{"key"="879";"subkey"="175";"value"="5958"};
+{"key"="880";"subkey"="176";"value"="5966"};
+{"key"="881";"subkey"="176";"value"="5974"};
+{"key"="882";"subkey"="176";"value"="5981"};
+{"key"="883";"subkey"="176";"value"="5989"};
+{"key"="884";"subkey"="176";"value"="5997"};
+{"key"="885";"subkey"="177";"value"="6005"};
+{"key"="886";"subkey"="177";"value"="6013"};
+{"key"="887";"subkey"="177";"value"="6020"};
+{"key"="888";"subkey"="177";"value"="6028"};
+{"key"="889";"subkey"="177";"value"="6036"};
+{"key"="890";"subkey"="178";"value"="6044"};
+{"key"="891";"subkey"="178";"value"="6051"};
+{"key"="892";"subkey"="178";"value"="6059"};
+{"key"="893";"subkey"="178";"value"="6067"};
+{"key"="894";"subkey"="178";"value"="6075"};
+{"key"="895";"subkey"="179";"value"="6083"};
+{"key"="896";"subkey"="179";"value"="6090"};
+{"key"="897";"subkey"="179";"value"="6098"};
+{"key"="898";"subkey"="179";"value"="6106"};
+{"key"="899";"subkey"="179";"value"="6114"};
+{"key"="900";"subkey"="180";"value"="6122"};
+{"key"="901";"subkey"="180";"value"="6129"};
+{"key"="902";"subkey"="180";"value"="6137"};
+{"key"="903";"subkey"="180";"value"="6145"};
+{"key"="904";"subkey"="180";"value"="6153"};
+{"key"="905";"subkey"="181";"value"="6161"};
+{"key"="906";"subkey"="181";"value"="6168"};
+{"key"="907";"subkey"="181";"value"="6176"};
+{"key"="908";"subkey"="181";"value"="6184"};
+{"key"="909";"subkey"="181";"value"="6192"};
+{"key"="910";"subkey"="182";"value"="6200"};
+{"key"="911";"subkey"="182";"value"="6208"};
+{"key"="912";"subkey"="182";"value"="6215"};
+{"key"="913";"subkey"="182";"value"="6223"};
+{"key"="914";"subkey"="182";"value"="6231"};
+{"key"="915";"subkey"="183";"value"="6239"};
+{"key"="916";"subkey"="183";"value"="6247"};
+{"key"="917";"subkey"="183";"value"="6254"};
+{"key"="918";"subkey"="183";"value"="6262"};
+{"key"="919";"subkey"="183";"value"="6270"};
+{"key"="920";"subkey"="184";"value"="6278"};
+{"key"="921";"subkey"="184";"value"="6286"};
+{"key"="922";"subkey"="184";"value"="6294"};
+{"key"="923";"subkey"="184";"value"="6301"};
+{"key"="924";"subkey"="184";"value"="6309"};
+{"key"="925";"subkey"="185";"value"="6317"};
+{"key"="926";"subkey"="185";"value"="6325"};
+{"key"="927";"subkey"="185";"value"="6333"};
+{"key"="928";"subkey"="185";"value"="6341"};
+{"key"="929";"subkey"="185";"value"="6348"};
+{"key"="930";"subkey"="186";"value"="6356"};
+{"key"="931";"subkey"="186";"value"="6364"};
+{"key"="932";"subkey"="186";"value"="6372"};
+{"key"="933";"subkey"="186";"value"="6380"};
+{"key"="934";"subkey"="186";"value"="6388"};
+{"key"="935";"subkey"="187";"value"="6395"};
+{"key"="936";"subkey"="187";"value"="6403"};
+{"key"="937";"subkey"="187";"value"="6411"};
+{"key"="938";"subkey"="187";"value"="6419"};
+{"key"="939";"subkey"="187";"value"="6427"};
+{"key"="940";"subkey"="188";"value"="6435"};
+{"key"="941";"subkey"="188";"value"="6442"};
+{"key"="942";"subkey"="188";"value"="6450"};
+{"key"="943";"subkey"="188";"value"="6458"};
+{"key"="944";"subkey"="188";"value"="6466"};
+{"key"="945";"subkey"="189";"value"="6474"};
+{"key"="946";"subkey"="189";"value"="6482"};
+{"key"="947";"subkey"="189";"value"="6490"};
+{"key"="948";"subkey"="189";"value"="6497"};
+{"key"="949";"subkey"="189";"value"="6505"};
+{"key"="950";"subkey"="190";"value"="6513"};
+{"key"="951";"subkey"="190";"value"="6521"};
+{"key"="952";"subkey"="190";"value"="6529"};
+{"key"="953";"subkey"="190";"value"="6537"};
+{"key"="954";"subkey"="190";"value"="6545"};
+{"key"="955";"subkey"="191";"value"="6552"};
+{"key"="956";"subkey"="191";"value"="6560"};
+{"key"="957";"subkey"="191";"value"="6568"};
+{"key"="958";"subkey"="191";"value"="6576"};
+{"key"="959";"subkey"="191";"value"="6584"};
+{"key"="960";"subkey"="192";"value"="6592"};
+{"key"="961";"subkey"="192";"value"="6600"};
+{"key"="962";"subkey"="192";"value"="6607"};
+{"key"="963";"subkey"="192";"value"="6615"};
+{"key"="964";"subkey"="192";"value"="6623"};
+{"key"="965";"subkey"="193";"value"="6631"};
+{"key"="966";"subkey"="193";"value"="6639"};
+{"key"="967";"subkey"="193";"value"="6647"};
+{"key"="968";"subkey"="193";"value"="6655"};
+{"key"="969";"subkey"="193";"value"="6663"};
+{"key"="970";"subkey"="194";"value"="6670"};
+{"key"="971";"subkey"="194";"value"="6678"};
+{"key"="972";"subkey"="194";"value"="6686"};
+{"key"="973";"subkey"="194";"value"="6694"};
+{"key"="974";"subkey"="194";"value"="6702"};
+{"key"="975";"subkey"="195";"value"="6710"};
+{"key"="976";"subkey"="195";"value"="6718"};
+{"key"="977";"subkey"="195";"value"="6726"};
+{"key"="978";"subkey"="195";"value"="6734"};
+{"key"="979";"subkey"="195";"value"="6741"};
+{"key"="980";"subkey"="196";"value"="6749"};
+{"key"="981";"subkey"="196";"value"="6757"};
+{"key"="982";"subkey"="196";"value"="6765"};
+{"key"="983";"subkey"="196";"value"="6773"};
+{"key"="984";"subkey"="196";"value"="6781"};
+{"key"="985";"subkey"="197";"value"="6789"};
+{"key"="986";"subkey"="197";"value"="6797"};
+{"key"="987";"subkey"="197";"value"="6805"};
+{"key"="988";"subkey"="197";"value"="6812"};
+{"key"="989";"subkey"="197";"value"="6820"};
+{"key"="990";"subkey"="198";"value"="6828"};
+{"key"="991";"subkey"="198";"value"="6836"};
+{"key"="992";"subkey"="198";"value"="6844"};
+{"key"="993";"subkey"="198";"value"="6852"};
+{"key"="994";"subkey"="198";"value"="6860"};
+{"key"="995";"subkey"="199";"value"="6868"};
+{"key"="996";"subkey"="199";"value"="6876"};
+{"key"="997";"subkey"="199";"value"="6884"};
+{"key"="998";"subkey"="199";"value"="6891"};
+{"key"="999";"subkey"="199";"value"="6899"};
+{"key"="1000";"subkey"="200";"value"="6907"};
+{"key"="1001";"subkey"="200";"value"="6915"};
+{"key"="1002";"subkey"="200";"value"="6923"};
+{"key"="1003";"subkey"="200";"value"="6931"};
+{"key"="1004";"subkey"="200";"value"="6939"};
+{"key"="1005";"subkey"="201";"value"="6947"};
+{"key"="1006";"subkey"="201";"value"="6955"};
+{"key"="1007";"subkey"="201";"value"="6963"};
+{"key"="1008";"subkey"="201";"value"="6971"};
+{"key"="1009";"subkey"="201";"value"="6978"};
+{"key"="1010";"subkey"="202";"value"="6986"};
+{"key"="1011";"subkey"="202";"value"="6994"};
+{"key"="1012";"subkey"="202";"value"="7002"};
+{"key"="1013";"subkey"="202";"value"="7010"};
+{"key"="1014";"subkey"="202";"value"="7018"};
+{"key"="1015";"subkey"="203";"value"="7026"};
+{"key"="1016";"subkey"="203";"value"="7034"};
+{"key"="1017";"subkey"="203";"value"="7042"};
+{"key"="1018";"subkey"="203";"value"="7050"};
+{"key"="1019";"subkey"="203";"value"="7058"};
+{"key"="1020";"subkey"="204";"value"="7066"};
+{"key"="1021";"subkey"="204";"value"="7074"};
+{"key"="1022";"subkey"="204";"value"="7081"};
+{"key"="1023";"subkey"="204";"value"="7089"};
+{"key"="1024";"subkey"="204";"value"="7097"};
+{"key"="1025";"subkey"="205";"value"="7105"};
+{"key"="1026";"subkey"="205";"value"="7113"};
+{"key"="1027";"subkey"="205";"value"="7121"};
+{"key"="1028";"subkey"="205";"value"="7129"};
+{"key"="1029";"subkey"="205";"value"="7137"};
+{"key"="1030";"subkey"="206";"value"="7145"};
+{"key"="1031";"subkey"="206";"value"="7153"};
+{"key"="1032";"subkey"="206";"value"="7161"};
+{"key"="1033";"subkey"="206";"value"="7169"};
+{"key"="1034";"subkey"="206";"value"="7177"};
+{"key"="1035";"subkey"="207";"value"="7185"};
+{"key"="1036";"subkey"="207";"value"="7193"};
+{"key"="1037";"subkey"="207";"value"="7201"};
+{"key"="1038";"subkey"="207";"value"="7208"};
+{"key"="1039";"subkey"="207";"value"="7216"};
+{"key"="1040";"subkey"="208";"value"="7224"};
+{"key"="1041";"subkey"="208";"value"="7232"};
+{"key"="1042";"subkey"="208";"value"="7240"};
+{"key"="1043";"subkey"="208";"value"="7248"};
+{"key"="1044";"subkey"="208";"value"="7256"};
+{"key"="1045";"subkey"="209";"value"="7264"};
+{"key"="1046";"subkey"="209";"value"="7272"};
+{"key"="1047";"subkey"="209";"value"="7280"};
+{"key"="1048";"subkey"="209";"value"="7288"};
+{"key"="1049";"subkey"="209";"value"="7296"};
+{"key"="1050";"subkey"="210";"value"="7304"};
+{"key"="1051";"subkey"="210";"value"="7312"};
+{"key"="1052";"subkey"="210";"value"="7320"};
+{"key"="1053";"subkey"="210";"value"="7328"};
+{"key"="1054";"subkey"="210";"value"="7336"};
+{"key"="1055";"subkey"="211";"value"="7344"};
+{"key"="1056";"subkey"="211";"value"="7352"};
+{"key"="1057";"subkey"="211";"value"="7360"};
+{"key"="1058";"subkey"="211";"value"="7368"};
+{"key"="1059";"subkey"="211";"value"="7376"};
+{"key"="1060";"subkey"="212";"value"="7383"};
+{"key"="1061";"subkey"="212";"value"="7391"};
+{"key"="1062";"subkey"="212";"value"="7399"};
+{"key"="1063";"subkey"="212";"value"="7407"};
+{"key"="1064";"subkey"="212";"value"="7415"};
+{"key"="1065";"subkey"="213";"value"="7423"};
+{"key"="1066";"subkey"="213";"value"="7431"};
+{"key"="1067";"subkey"="213";"value"="7439"};
+{"key"="1068";"subkey"="213";"value"="7447"};
+{"key"="1069";"subkey"="213";"value"="7455"};
+{"key"="1070";"subkey"="214";"value"="7463"};
+{"key"="1071";"subkey"="214";"value"="7471"};
+{"key"="1072";"subkey"="214";"value"="7479"};
+{"key"="1073";"subkey"="214";"value"="7487"};
+{"key"="1074";"subkey"="214";"value"="7495"};
+{"key"="1075";"subkey"="215";"value"="7503"};
+{"key"="1076";"subkey"="215";"value"="7511"};
+{"key"="1077";"subkey"="215";"value"="7519"};
+{"key"="1078";"subkey"="215";"value"="7527"};
+{"key"="1079";"subkey"="215";"value"="7535"};
+{"key"="1080";"subkey"="216";"value"="7543"};
+{"key"="1081";"subkey"="216";"value"="7551"};
+{"key"="1082";"subkey"="216";"value"="7559"};
+{"key"="1083";"subkey"="216";"value"="7567"};
+{"key"="1084";"subkey"="216";"value"="7575"};
+{"key"="1085";"subkey"="217";"value"="7583"};
+{"key"="1086";"subkey"="217";"value"="7591"};
+{"key"="1087";"subkey"="217";"value"="7599"};
+{"key"="1088";"subkey"="217";"value"="7607"};
+{"key"="1089";"subkey"="217";"value"="7615"};
+{"key"="1090";"subkey"="218";"value"="7623"};
+{"key"="1091";"subkey"="218";"value"="7631"};
+{"key"="1092";"subkey"="218";"value"="7639"};
+{"key"="1093";"subkey"="218";"value"="7647"};
+{"key"="1094";"subkey"="218";"value"="7655"};
+{"key"="1095";"subkey"="219";"value"="7663"};
+{"key"="1096";"subkey"="219";"value"="7671"};
+{"key"="1097";"subkey"="219";"value"="7679"};
+{"key"="1098";"subkey"="219";"value"="7687"};
+{"key"="1099";"subkey"="219";"value"="7695"};
+{"key"="1100";"subkey"="220";"value"="7703"};
+{"key"="1101";"subkey"="220";"value"="7711"};
+{"key"="1102";"subkey"="220";"value"="7719"};
+{"key"="1103";"subkey"="220";"value"="7727"};
+{"key"="1104";"subkey"="220";"value"="7735"};
+{"key"="1105";"subkey"="221";"value"="7743"};
+{"key"="1106";"subkey"="221";"value"="7751"};
+{"key"="1107";"subkey"="221";"value"="7759"};
+{"key"="1108";"subkey"="221";"value"="7767"};
+{"key"="1109";"subkey"="221";"value"="7775"};
+{"key"="1110";"subkey"="222";"value"="7783"};
+{"key"="1111";"subkey"="222";"value"="7791"};
+{"key"="1112";"subkey"="222";"value"="7799"};
+{"key"="1113";"subkey"="222";"value"="7807"};
+{"key"="1114";"subkey"="222";"value"="7815"};
+{"key"="1115";"subkey"="223";"value"="7823"};
+{"key"="1116";"subkey"="223";"value"="7831"};
+{"key"="1117";"subkey"="223";"value"="7839"};
+{"key"="1118";"subkey"="223";"value"="7847"};
+{"key"="1119";"subkey"="223";"value"="7855"};
+{"key"="1120";"subkey"="224";"value"="7863"};
+{"key"="1121";"subkey"="224";"value"="7871"};
+{"key"="1122";"subkey"="224";"value"="7879"};
+{"key"="1123";"subkey"="224";"value"="7887"};
+{"key"="1124";"subkey"="224";"value"="7895"};
+{"key"="1125";"subkey"="225";"value"="7903"};
+{"key"="1126";"subkey"="225";"value"="7911"};
+{"key"="1127";"subkey"="225";"value"="7919"};
+{"key"="1128";"subkey"="225";"value"="7927"};
+{"key"="1129";"subkey"="225";"value"="7935"};
+{"key"="1130";"subkey"="226";"value"="7943"};
+{"key"="1131";"subkey"="226";"value"="7951"};
+{"key"="1132";"subkey"="226";"value"="7959"};
+{"key"="1133";"subkey"="226";"value"="7967"};
+{"key"="1134";"subkey"="226";"value"="7975"};
+{"key"="1135";"subkey"="227";"value"="7984"};
+{"key"="1136";"subkey"="227";"value"="7992"};
+{"key"="1137";"subkey"="227";"value"="8000"};
+{"key"="1138";"subkey"="227";"value"="8008"};
+{"key"="1139";"subkey"="227";"value"="8016"};
+{"key"="1140";"subkey"="228";"value"="8024"};
+{"key"="1141";"subkey"="228";"value"="8032"};
+{"key"="1142";"subkey"="228";"value"="8040"};
+{"key"="1143";"subkey"="228";"value"="8048"};
+{"key"="1144";"subkey"="228";"value"="8056"};
+{"key"="1145";"subkey"="229";"value"="8064"};
+{"key"="1146";"subkey"="229";"value"="8072"};
+{"key"="1147";"subkey"="229";"value"="8080"};
+{"key"="1148";"subkey"="229";"value"="8088"};
+{"key"="1149";"subkey"="229";"value"="8096"};
+{"key"="1150";"subkey"="230";"value"="8104"};
+{"key"="1151";"subkey"="230";"value"="8112"};
+{"key"="1152";"subkey"="230";"value"="8120"};
+{"key"="1153";"subkey"="230";"value"="8128"};
+{"key"="1154";"subkey"="230";"value"="8136"};
+{"key"="1155";"subkey"="231";"value"="8144"};
+{"key"="1156";"subkey"="231";"value"="8152"};
+{"key"="1157";"subkey"="231";"value"="8160"};
+{"key"="1158";"subkey"="231";"value"="8169"};
+{"key"="1159";"subkey"="231";"value"="8177"};
+{"key"="1160";"subkey"="232";"value"="8185"};
+{"key"="1161";"subkey"="232";"value"="8193"};
+{"key"="1162";"subkey"="232";"value"="8201"};
+{"key"="1163";"subkey"="232";"value"="8209"};
+{"key"="1164";"subkey"="232";"value"="8217"};
+{"key"="1165";"subkey"="233";"value"="8225"};
+{"key"="1166";"subkey"="233";"value"="8233"};
+{"key"="1167";"subkey"="233";"value"="8241"};
+{"key"="1168";"subkey"="233";"value"="8249"};
+{"key"="1169";"subkey"="233";"value"="8257"};
+{"key"="1170";"subkey"="234";"value"="8265"};
+{"key"="1171";"subkey"="234";"value"="8273"};
+{"key"="1172";"subkey"="234";"value"="8281"};
+{"key"="1173";"subkey"="234";"value"="8289"};
+{"key"="1174";"subkey"="234";"value"="8298"};
+{"key"="1175";"subkey"="235";"value"="8306"};
+{"key"="1176";"subkey"="235";"value"="8314"};
+{"key"="1177";"subkey"="235";"value"="8322"};
+{"key"="1178";"subkey"="235";"value"="8330"};
+{"key"="1179";"subkey"="235";"value"="8338"};
+{"key"="1180";"subkey"="236";"value"="8346"};
+{"key"="1181";"subkey"="236";"value"="8354"};
+{"key"="1182";"subkey"="236";"value"="8362"};
+{"key"="1183";"subkey"="236";"value"="8370"};
+{"key"="1184";"subkey"="236";"value"="8378"};
+{"key"="1185";"subkey"="237";"value"="8386"};
+{"key"="1186";"subkey"="237";"value"="8394"};
+{"key"="1187";"subkey"="237";"value"="8402"};
+{"key"="1188";"subkey"="237";"value"="8411"};
+{"key"="1189";"subkey"="237";"value"="8419"};
+{"key"="1190";"subkey"="238";"value"="8427"};
+{"key"="1191";"subkey"="238";"value"="8435"};
+{"key"="1192";"subkey"="238";"value"="8443"};
+{"key"="1193";"subkey"="238";"value"="8451"};
+{"key"="1194";"subkey"="238";"value"="8459"};
+{"key"="1195";"subkey"="239";"value"="8467"};
+{"key"="1196";"subkey"="239";"value"="8475"};
+{"key"="1197";"subkey"="239";"value"="8483"};
+{"key"="1198";"subkey"="239";"value"="8491"};
+{"key"="1199";"subkey"="239";"value"="8500"};
+{"key"="1200";"subkey"="240";"value"="8508"};
+{"key"="1201";"subkey"="240";"value"="8516"};
+{"key"="1202";"subkey"="240";"value"="8524"};
+{"key"="1203";"subkey"="240";"value"="8532"};
+{"key"="1204";"subkey"="240";"value"="8540"};
+{"key"="1205";"subkey"="241";"value"="8548"};
+{"key"="1206";"subkey"="241";"value"="8556"};
+{"key"="1207";"subkey"="241";"value"="8564"};
+{"key"="1208";"subkey"="241";"value"="8572"};
+{"key"="1209";"subkey"="241";"value"="8580"};
+{"key"="1210";"subkey"="242";"value"="8589"};
+{"key"="1211";"subkey"="242";"value"="8597"};
+{"key"="1212";"subkey"="242";"value"="8605"};
+{"key"="1213";"subkey"="242";"value"="8613"};
+{"key"="1214";"subkey"="242";"value"="8621"};
+{"key"="1215";"subkey"="243";"value"="8629"};
+{"key"="1216";"subkey"="243";"value"="8637"};
+{"key"="1217";"subkey"="243";"value"="8645"};
+{"key"="1218";"subkey"="243";"value"="8653"};
+{"key"="1219";"subkey"="243";"value"="8661"};
+{"key"="1220";"subkey"="244";"value"="8670"};
+{"key"="1221";"subkey"="244";"value"="8678"};
+{"key"="1222";"subkey"="244";"value"="8686"};
+{"key"="1223";"subkey"="244";"value"="8694"};
+{"key"="1224";"subkey"="244";"value"="8702"};
+{"key"="1225";"subkey"="245";"value"="8710"};
+{"key"="1226";"subkey"="245";"value"="8718"};
+{"key"="1227";"subkey"="245";"value"="8726"};
+{"key"="1228";"subkey"="245";"value"="8734"};
+{"key"="1229";"subkey"="245";"value"="8743"};
+{"key"="1230";"subkey"="246";"value"="8751"};
+{"key"="1231";"subkey"="246";"value"="8759"};
+{"key"="1232";"subkey"="246";"value"="8767"};
+{"key"="1233";"subkey"="246";"value"="8775"};
+{"key"="1234";"subkey"="246";"value"="8783"};
+{"key"="1235";"subkey"="247";"value"="8791"};
+{"key"="1236";"subkey"="247";"value"="8799"};
+{"key"="1237";"subkey"="247";"value"="8807"};
+{"key"="1238";"subkey"="247";"value"="8816"};
+{"key"="1239";"subkey"="247";"value"="8824"};
+{"key"="1240";"subkey"="248";"value"="8832"};
+{"key"="1241";"subkey"="248";"value"="8840"};
+{"key"="1242";"subkey"="248";"value"="8848"};
+{"key"="1243";"subkey"="248";"value"="8856"};
+{"key"="1244";"subkey"="248";"value"="8864"};
+{"key"="1245";"subkey"="249";"value"="8872"};
+{"key"="1246";"subkey"="249";"value"="8881"};
+{"key"="1247";"subkey"="249";"value"="8889"};
+{"key"="1248";"subkey"="249";"value"="8897"};
+{"key"="1249";"subkey"="249";"value"="8905"};
+{"key"="1250";"subkey"="250";"value"="8913"};
+{"key"="1251";"subkey"="250";"value"="8921"};
+{"key"="1252";"subkey"="250";"value"="8929"};
+{"key"="1253";"subkey"="250";"value"="8938"};
+{"key"="1254";"subkey"="250";"value"="8946"};
+{"key"="1255";"subkey"="251";"value"="8954"};
+{"key"="1256";"subkey"="251";"value"="8962"};
+{"key"="1257";"subkey"="251";"value"="8970"};
+{"key"="1258";"subkey"="251";"value"="8978"};
+{"key"="1259";"subkey"="251";"value"="8986"};
+{"key"="1260";"subkey"="252";"value"="8994"};
+{"key"="1261";"subkey"="252";"value"="9003"};
+{"key"="1262";"subkey"="252";"value"="9011"};
+{"key"="1263";"subkey"="252";"value"="9019"};
+{"key"="1264";"subkey"="252";"value"="9027"};
+{"key"="1265";"subkey"="253";"value"="9035"};
+{"key"="1266";"subkey"="253";"value"="9043"};
+{"key"="1267";"subkey"="253";"value"="9051"};
+{"key"="1268";"subkey"="253";"value"="9060"};
+{"key"="1269";"subkey"="253";"value"="9068"};
+{"key"="1270";"subkey"="254";"value"="9076"};
+{"key"="1271";"subkey"="254";"value"="9084"};
+{"key"="1272";"subkey"="254";"value"="9092"};
+{"key"="1273";"subkey"="254";"value"="9100"};
+{"key"="1274";"subkey"="254";"value"="9108"};
+{"key"="1275";"subkey"="255";"value"="9117"};
+{"key"="1276";"subkey"="255";"value"="9125"};
+{"key"="1277";"subkey"="255";"value"="9133"};
+{"key"="1278";"subkey"="255";"value"="9141"};
+{"key"="1279";"subkey"="255";"value"="9149"};
+{"key"="1280";"subkey"="256";"value"="9157"};
+{"key"="1281";"subkey"="256";"value"="9166"};
+{"key"="1282";"subkey"="256";"value"="9174"};
+{"key"="1283";"subkey"="256";"value"="9182"};
+{"key"="1284";"subkey"="256";"value"="9190"};
+{"key"="1285";"subkey"="257";"value"="9198"};
+{"key"="1286";"subkey"="257";"value"="9206"};
+{"key"="1287";"subkey"="257";"value"="9215"};
+{"key"="1288";"subkey"="257";"value"="9223"};
+{"key"="1289";"subkey"="257";"value"="9231"};
+{"key"="1290";"subkey"="258";"value"="9239"};
+{"key"="1291";"subkey"="258";"value"="9247"};
+{"key"="1292";"subkey"="258";"value"="9255"};
+{"key"="1293";"subkey"="258";"value"="9263"};
+{"key"="1294";"subkey"="258";"value"="9272"};
+{"key"="1295";"subkey"="259";"value"="9280"};
+{"key"="1296";"subkey"="259";"value"="9288"};
+{"key"="1297";"subkey"="259";"value"="9296"};
+{"key"="1298";"subkey"="259";"value"="9304"};
+{"key"="1299";"subkey"="259";"value"="9312"};
+{"key"="1300";"subkey"="260";"value"="9321"};
+{"key"="1301";"subkey"="260";"value"="9329"};
+{"key"="1302";"subkey"="260";"value"="9337"};
+{"key"="1303";"subkey"="260";"value"="9345"};
+{"key"="1304";"subkey"="260";"value"="9353"};
+{"key"="1305";"subkey"="261";"value"="9362"};
+{"key"="1306";"subkey"="261";"value"="9370"};
+{"key"="1307";"subkey"="261";"value"="9378"};
+{"key"="1308";"subkey"="261";"value"="9386"};
+{"key"="1309";"subkey"="261";"value"="9394"};
+{"key"="1310";"subkey"="262";"value"="9402"};
+{"key"="1311";"subkey"="262";"value"="9411"};
+{"key"="1312";"subkey"="262";"value"="9419"};
+{"key"="1313";"subkey"="262";"value"="9427"};
+{"key"="1314";"subkey"="262";"value"="9435"};
+{"key"="1315";"subkey"="263";"value"="9443"};
+{"key"="1316";"subkey"="263";"value"="9451"};
+{"key"="1317";"subkey"="263";"value"="9460"};
+{"key"="1318";"subkey"="263";"value"="9468"};
+{"key"="1319";"subkey"="263";"value"="9476"};
+{"key"="1320";"subkey"="264";"value"="9484"};
+{"key"="1321";"subkey"="264";"value"="9492"};
+{"key"="1322";"subkey"="264";"value"="9501"};
+{"key"="1323";"subkey"="264";"value"="9509"};
+{"key"="1324";"subkey"="264";"value"="9517"};
+{"key"="1325";"subkey"="265";"value"="9525"};
+{"key"="1326";"subkey"="265";"value"="9533"};
+{"key"="1327";"subkey"="265";"value"="9542"};
+{"key"="1328";"subkey"="265";"value"="9550"};
+{"key"="1329";"subkey"="265";"value"="9558"};
+{"key"="1330";"subkey"="266";"value"="9566"};
+{"key"="1331";"subkey"="266";"value"="9574"};
+{"key"="1332";"subkey"="266";"value"="9582"};
+{"key"="1333";"subkey"="266";"value"="9591"};
+{"key"="1334";"subkey"="266";"value"="9599"};
+{"key"="1335";"subkey"="267";"value"="9607"};
+{"key"="1336";"subkey"="267";"value"="9615"};
+{"key"="1337";"subkey"="267";"value"="9623"};
+{"key"="1338";"subkey"="267";"value"="9632"};
+{"key"="1339";"subkey"="267";"value"="9640"};
+{"key"="1340";"subkey"="268";"value"="9648"};
+{"key"="1341";"subkey"="268";"value"="9656"};
+{"key"="1342";"subkey"="268";"value"="9664"};
+{"key"="1343";"subkey"="268";"value"="9673"};
+{"key"="1344";"subkey"="268";"value"="9681"};
+{"key"="1345";"subkey"="269";"value"="9689"};
+{"key"="1346";"subkey"="269";"value"="9697"};
+{"key"="1347";"subkey"="269";"value"="9705"};
+{"key"="1348";"subkey"="269";"value"="9714"};
+{"key"="1349";"subkey"="269";"value"="9722"};
+{"key"="1350";"subkey"="270";"value"="9730"};
+{"key"="1351";"subkey"="270";"value"="9738"};
+{"key"="1352";"subkey"="270";"value"="9747"};
+{"key"="1353";"subkey"="270";"value"="9755"};
+{"key"="1354";"subkey"="270";"value"="9763"};
+{"key"="1355";"subkey"="271";"value"="9771"};
+{"key"="1356";"subkey"="271";"value"="9779"};
+{"key"="1357";"subkey"="271";"value"="9788"};
+{"key"="1358";"subkey"="271";"value"="9796"};
+{"key"="1359";"subkey"="271";"value"="9804"};
+{"key"="1360";"subkey"="272";"value"="9812"};
+{"key"="1361";"subkey"="272";"value"="9820"};
+{"key"="1362";"subkey"="272";"value"="9829"};
+{"key"="1363";"subkey"="272";"value"="9837"};
+{"key"="1364";"subkey"="272";"value"="9845"};
+{"key"="1365";"subkey"="273";"value"="9853"};
+{"key"="1366";"subkey"="273";"value"="9862"};
+{"key"="1367";"subkey"="273";"value"="9870"};
+{"key"="1368";"subkey"="273";"value"="9878"};
+{"key"="1369";"subkey"="273";"value"="9886"};
+{"key"="1370";"subkey"="274";"value"="9894"};
+{"key"="1371";"subkey"="274";"value"="9903"};
+{"key"="1372";"subkey"="274";"value"="9911"};
+{"key"="1373";"subkey"="274";"value"="9919"};
+{"key"="1374";"subkey"="274";"value"="9927"};
+{"key"="1375";"subkey"="275";"value"="9936"};
+{"key"="1376";"subkey"="275";"value"="9944"};
+{"key"="1377";"subkey"="275";"value"="9952"};
+{"key"="1378";"subkey"="275";"value"="9960"};
+{"key"="1379";"subkey"="275";"value"="9968"};
+{"key"="1380";"subkey"="276";"value"="9977"};
+{"key"="1381";"subkey"="276";"value"="9985"};
+{"key"="1382";"subkey"="276";"value"="9993"};
+{"key"="1383";"subkey"="276";"value"="10001"};
+{"key"="1384";"subkey"="276";"value"="10010"};
+{"key"="1385";"subkey"="277";"value"="10018"};
+{"key"="1386";"subkey"="277";"value"="10026"};
+{"key"="1387";"subkey"="277";"value"="10034"};
+{"key"="1388";"subkey"="277";"value"="10043"};
+{"key"="1389";"subkey"="277";"value"="10051"};
+{"key"="1390";"subkey"="278";"value"="10059"};
+{"key"="1391";"subkey"="278";"value"="10067"};
+{"key"="1392";"subkey"="278";"value"="10075"};
+{"key"="1393";"subkey"="278";"value"="10084"};
+{"key"="1394";"subkey"="278";"value"="10092"};
+{"key"="1395";"subkey"="279";"value"="10100"};
+{"key"="1396";"subkey"="279";"value"="10108"};
+{"key"="1397";"subkey"="279";"value"="10117"};
+{"key"="1398";"subkey"="279";"value"="10125"};
+{"key"="1399";"subkey"="279";"value"="10133"};
+{"key"="1400";"subkey"="280";"value"="10141"};
+{"key"="1401";"subkey"="280";"value"="10150"};
+{"key"="1402";"subkey"="280";"value"="10158"};
+{"key"="1403";"subkey"="280";"value"="10166"};
+{"key"="1404";"subkey"="280";"value"="10174"};
+{"key"="1405";"subkey"="281";"value"="10183"};
+{"key"="1406";"subkey"="281";"value"="10191"};
+{"key"="1407";"subkey"="281";"value"="10199"};
+{"key"="1408";"subkey"="281";"value"="10207"};
+{"key"="1409";"subkey"="281";"value"="10216"};
+{"key"="1410";"subkey"="282";"value"="10224"};
+{"key"="1411";"subkey"="282";"value"="10232"};
+{"key"="1412";"subkey"="282";"value"="10240"};
+{"key"="1413";"subkey"="282";"value"="10249"};
+{"key"="1414";"subkey"="282";"value"="10257"};
+{"key"="1415";"subkey"="283";"value"="10265"};
+{"key"="1416";"subkey"="283";"value"="10273"};
+{"key"="1417";"subkey"="283";"value"="10282"};
+{"key"="1418";"subkey"="283";"value"="10290"};
+{"key"="1419";"subkey"="283";"value"="10298"};
+{"key"="1420";"subkey"="284";"value"="10306"};
+{"key"="1421";"subkey"="284";"value"="10315"};
+{"key"="1422";"subkey"="284";"value"="10323"};
+{"key"="1423";"subkey"="284";"value"="10331"};
+{"key"="1424";"subkey"="284";"value"="10339"};
+{"key"="1425";"subkey"="285";"value"="10348"};
+{"key"="1426";"subkey"="285";"value"="10356"};
+{"key"="1427";"subkey"="285";"value"="10364"};
+{"key"="1428";"subkey"="285";"value"="10373"};
+{"key"="1429";"subkey"="285";"value"="10381"};
+{"key"="1430";"subkey"="286";"value"="10389"};
+{"key"="1431";"subkey"="286";"value"="10397"};
+{"key"="1432";"subkey"="286";"value"="10406"};
+{"key"="1433";"subkey"="286";"value"="10414"};
+{"key"="1434";"subkey"="286";"value"="10422"};
+{"key"="1435";"subkey"="287";"value"="10430"};
+{"key"="1436";"subkey"="287";"value"="10439"};
+{"key"="1437";"subkey"="287";"value"="10447"};
+{"key"="1438";"subkey"="287";"value"="10455"};
+{"key"="1439";"subkey"="287";"value"="10463"};
+{"key"="1440";"subkey"="288";"value"="10472"};
+{"key"="1441";"subkey"="288";"value"="10480"};
+{"key"="1442";"subkey"="288";"value"="10488"};
+{"key"="1443";"subkey"="288";"value"="10497"};
+{"key"="1444";"subkey"="288";"value"="10505"};
+{"key"="1445";"subkey"="289";"value"="10513"};
+{"key"="1446";"subkey"="289";"value"="10521"};
+{"key"="1447";"subkey"="289";"value"="10530"};
+{"key"="1448";"subkey"="289";"value"="10538"};
+{"key"="1449";"subkey"="289";"value"="10546"};
+{"key"="1450";"subkey"="290";"value"="10555"};
+{"key"="1451";"subkey"="290";"value"="10563"};
+{"key"="1452";"subkey"="290";"value"="10571"};
+{"key"="1453";"subkey"="290";"value"="10579"};
+{"key"="1454";"subkey"="290";"value"="10588"};
+{"key"="1455";"subkey"="291";"value"="10596"};
+{"key"="1456";"subkey"="291";"value"="10604"};
+{"key"="1457";"subkey"="291";"value"="10612"};
+{"key"="1458";"subkey"="291";"value"="10621"};
+{"key"="1459";"subkey"="291";"value"="10629"};
+{"key"="1460";"subkey"="292";"value"="10637"};
+{"key"="1461";"subkey"="292";"value"="10646"};
+{"key"="1462";"subkey"="292";"value"="10654"};
+{"key"="1463";"subkey"="292";"value"="10662"};
+{"key"="1464";"subkey"="292";"value"="10670"};
+{"key"="1465";"subkey"="293";"value"="10679"};
+{"key"="1466";"subkey"="293";"value"="10687"};
+{"key"="1467";"subkey"="293";"value"="10695"};
+{"key"="1468";"subkey"="293";"value"="10704"};
+{"key"="1469";"subkey"="293";"value"="10712"};
+{"key"="1470";"subkey"="294";"value"="10720"};
+{"key"="1471";"subkey"="294";"value"="10729"};
+{"key"="1472";"subkey"="294";"value"="10737"};
+{"key"="1473";"subkey"="294";"value"="10745"};
+{"key"="1474";"subkey"="294";"value"="10753"};
+{"key"="1475";"subkey"="295";"value"="10762"};
+{"key"="1476";"subkey"="295";"value"="10770"};
+{"key"="1477";"subkey"="295";"value"="10778"};
+{"key"="1478";"subkey"="295";"value"="10787"};
+{"key"="1479";"subkey"="295";"value"="10795"};
+{"key"="1480";"subkey"="296";"value"="10803"};
+{"key"="1481";"subkey"="296";"value"="10812"};
+{"key"="1482";"subkey"="296";"value"="10820"};
+{"key"="1483";"subkey"="296";"value"="10828"};
+{"key"="1484";"subkey"="296";"value"="10836"};
+{"key"="1485";"subkey"="297";"value"="10845"};
+{"key"="1486";"subkey"="297";"value"="10853"};
+{"key"="1487";"subkey"="297";"value"="10861"};
+{"key"="1488";"subkey"="297";"value"="10870"};
+{"key"="1489";"subkey"="297";"value"="10878"};
+{"key"="1490";"subkey"="298";"value"="10886"};
+{"key"="1491";"subkey"="298";"value"="10895"};
+{"key"="1492";"subkey"="298";"value"="10903"};
+{"key"="1493";"subkey"="298";"value"="10911"};
+{"key"="1494";"subkey"="298";"value"="10919"};
+{"key"="1495";"subkey"="299";"value"="10928"};
+{"key"="1496";"subkey"="299";"value"="10936"};
+{"key"="1497";"subkey"="299";"value"="10944"};
+{"key"="1498";"subkey"="299";"value"="10953"};
+{"key"="1499";"subkey"="299";"value"="10961"};
+{"key"="1500";"subkey"="300";"value"="10969"};
+{"key"="1501";"subkey"="300";"value"="10978"};
+{"key"="1502";"subkey"="300";"value"="10986"};
+{"key"="1503";"subkey"="300";"value"="10994"};
+{"key"="1504";"subkey"="300";"value"="11003"};
+{"key"="1505";"subkey"="301";"value"="11011"};
+{"key"="1506";"subkey"="301";"value"="11019"};
+{"key"="1507";"subkey"="301";"value"="11028"};
+{"key"="1508";"subkey"="301";"value"="11036"};
+{"key"="1509";"subkey"="301";"value"="11044"};
+{"key"="1510";"subkey"="302";"value"="11052"};
+{"key"="1511";"subkey"="302";"value"="11061"};
+{"key"="1512";"subkey"="302";"value"="11069"};
+{"key"="1513";"subkey"="302";"value"="11077"};
+{"key"="1514";"subkey"="302";"value"="11086"};
+{"key"="1515";"subkey"="303";"value"="11094"};
+{"key"="1516";"subkey"="303";"value"="11102"};
+{"key"="1517";"subkey"="303";"value"="11111"};
+{"key"="1518";"subkey"="303";"value"="11119"};
+{"key"="1519";"subkey"="303";"value"="11127"};
+{"key"="1520";"subkey"="304";"value"="11136"};
+{"key"="1521";"subkey"="304";"value"="11144"};
+{"key"="1522";"subkey"="304";"value"="11152"};
+{"key"="1523";"subkey"="304";"value"="11161"};
+{"key"="1524";"subkey"="304";"value"="11169"};
+{"key"="1525";"subkey"="305";"value"="11177"};
+{"key"="1526";"subkey"="305";"value"="11186"};
+{"key"="1527";"subkey"="305";"value"="11194"};
+{"key"="1528";"subkey"="305";"value"="11202"};
+{"key"="1529";"subkey"="305";"value"="11211"};
+{"key"="1530";"subkey"="306";"value"="11219"};
+{"key"="1531";"subkey"="306";"value"="11227"};
+{"key"="1532";"subkey"="306";"value"="11236"};
+{"key"="1533";"subkey"="306";"value"="11244"};
+{"key"="1534";"subkey"="306";"value"="11252"};
+{"key"="1535";"subkey"="307";"value"="11261"};
+{"key"="1536";"subkey"="307";"value"="11269"};
+{"key"="1537";"subkey"="307";"value"="11277"};
+{"key"="1538";"subkey"="307";"value"="11286"};
+{"key"="1539";"subkey"="307";"value"="11294"};
+{"key"="1540";"subkey"="308";"value"="11302"};
+{"key"="1541";"subkey"="308";"value"="11311"};
+{"key"="1542";"subkey"="308";"value"="11319"};
+{"key"="1543";"subkey"="308";"value"="11327"};
+{"key"="1544";"subkey"="308";"value"="11336"};
+{"key"="1545";"subkey"="309";"value"="11344"};
+{"key"="1546";"subkey"="309";"value"="11352"};
+{"key"="1547";"subkey"="309";"value"="11361"};
+{"key"="1548";"subkey"="309";"value"="11369"};
+{"key"="1549";"subkey"="309";"value"="11377"};
+{"key"="1550";"subkey"="310";"value"="11386"};
+{"key"="1551";"subkey"="310";"value"="11394"};
+{"key"="1552";"subkey"="310";"value"="11403"};
+{"key"="1553";"subkey"="310";"value"="11411"};
+{"key"="1554";"subkey"="310";"value"="11419"};
+{"key"="1555";"subkey"="311";"value"="11428"};
+{"key"="1556";"subkey"="311";"value"="11436"};
+{"key"="1557";"subkey"="311";"value"="11444"};
+{"key"="1558";"subkey"="311";"value"="11453"};
+{"key"="1559";"subkey"="311";"value"="11461"};
+{"key"="1560";"subkey"="312";"value"="11469"};
+{"key"="1561";"subkey"="312";"value"="11478"};
+{"key"="1562";"subkey"="312";"value"="11486"};
+{"key"="1563";"subkey"="312";"value"="11494"};
+{"key"="1564";"subkey"="312";"value"="11503"};
+{"key"="1565";"subkey"="313";"value"="11511"};
+{"key"="1566";"subkey"="313";"value"="11519"};
+{"key"="1567";"subkey"="313";"value"="11528"};
+{"key"="1568";"subkey"="313";"value"="11536"};
+{"key"="1569";"subkey"="313";"value"="11545"};
+{"key"="1570";"subkey"="314";"value"="11553"};
+{"key"="1571";"subkey"="314";"value"="11561"};
+{"key"="1572";"subkey"="314";"value"="11570"};
+{"key"="1573";"subkey"="314";"value"="11578"};
+{"key"="1574";"subkey"="314";"value"="11586"};
+{"key"="1575";"subkey"="315";"value"="11595"};
+{"key"="1576";"subkey"="315";"value"="11603"};
+{"key"="1577";"subkey"="315";"value"="11611"};
+{"key"="1578";"subkey"="315";"value"="11620"};
+{"key"="1579";"subkey"="315";"value"="11628"};
+{"key"="1580";"subkey"="316";"value"="11636"};
+{"key"="1581";"subkey"="316";"value"="11645"};
+{"key"="1582";"subkey"="316";"value"="11653"};
+{"key"="1583";"subkey"="316";"value"="11662"};
+{"key"="1584";"subkey"="316";"value"="11670"};
+{"key"="1585";"subkey"="317";"value"="11678"};
+{"key"="1586";"subkey"="317";"value"="11687"};
+{"key"="1587";"subkey"="317";"value"="11695"};
+{"key"="1588";"subkey"="317";"value"="11703"};
+{"key"="1589";"subkey"="317";"value"="11712"};
+{"key"="1590";"subkey"="318";"value"="11720"};
+{"key"="1591";"subkey"="318";"value"="11729"};
+{"key"="1592";"subkey"="318";"value"="11737"};
+{"key"="1593";"subkey"="318";"value"="11745"};
+{"key"="1594";"subkey"="318";"value"="11754"};
+{"key"="1595";"subkey"="319";"value"="11762"};
+{"key"="1596";"subkey"="319";"value"="11770"};
+{"key"="1597";"subkey"="319";"value"="11779"};
+{"key"="1598";"subkey"="319";"value"="11787"};
+{"key"="1599";"subkey"="319";"value"="11796"};
+{"key"="1600";"subkey"="320";"value"="11804"};
+{"key"="1601";"subkey"="320";"value"="11812"};
+{"key"="1602";"subkey"="320";"value"="11821"};
+{"key"="1603";"subkey"="320";"value"="11829"};
+{"key"="1604";"subkey"="320";"value"="11837"};
+{"key"="1605";"subkey"="321";"value"="11846"};
+{"key"="1606";"subkey"="321";"value"="11854"};
+{"key"="1607";"subkey"="321";"value"="11863"};
+{"key"="1608";"subkey"="321";"value"="11871"};
+{"key"="1609";"subkey"="321";"value"="11879"};
+{"key"="1610";"subkey"="322";"value"="11888"};
+{"key"="1611";"subkey"="322";"value"="11896"};
+{"key"="1612";"subkey"="322";"value"="11904"};
+{"key"="1613";"subkey"="322";"value"="11913"};
+{"key"="1614";"subkey"="322";"value"="11921"};
+{"key"="1615";"subkey"="323";"value"="11930"};
+{"key"="1616";"subkey"="323";"value"="11938"};
+{"key"="1617";"subkey"="323";"value"="11946"};
+{"key"="1618";"subkey"="323";"value"="11955"};
+{"key"="1619";"subkey"="323";"value"="11963"};
+{"key"="1620";"subkey"="324";"value"="11972"};
+{"key"="1621";"subkey"="324";"value"="11980"};
+{"key"="1622";"subkey"="324";"value"="11988"};
+{"key"="1623";"subkey"="324";"value"="11997"};
+{"key"="1624";"subkey"="324";"value"="12005"};
+{"key"="1625";"subkey"="325";"value"="12014"};
+{"key"="1626";"subkey"="325";"value"="12022"};
+{"key"="1627";"subkey"="325";"value"="12030"};
+{"key"="1628";"subkey"="325";"value"="12039"};
+{"key"="1629";"subkey"="325";"value"="12047"};
+{"key"="1630";"subkey"="326";"value"="12056"};
+{"key"="1631";"subkey"="326";"value"="12064"};
+{"key"="1632";"subkey"="326";"value"="12072"};
+{"key"="1633";"subkey"="326";"value"="12081"};
+{"key"="1634";"subkey"="326";"value"="12089"};
+{"key"="1635";"subkey"="327";"value"="12098"};
+{"key"="1636";"subkey"="327";"value"="12106"};
+{"key"="1637";"subkey"="327";"value"="12114"};
+{"key"="1638";"subkey"="327";"value"="12123"};
+{"key"="1639";"subkey"="327";"value"="12131"};
+{"key"="1640";"subkey"="328";"value"="12140"};
+{"key"="1641";"subkey"="328";"value"="12148"};
+{"key"="1642";"subkey"="328";"value"="12156"};
+{"key"="1643";"subkey"="328";"value"="12165"};
+{"key"="1644";"subkey"="328";"value"="12173"};
+{"key"="1645";"subkey"="329";"value"="12182"};
+{"key"="1646";"subkey"="329";"value"="12190"};
+{"key"="1647";"subkey"="329";"value"="12198"};
+{"key"="1648";"subkey"="329";"value"="12207"};
+{"key"="1649";"subkey"="329";"value"="12215"};
+{"key"="1650";"subkey"="330";"value"="12224"};
+{"key"="1651";"subkey"="330";"value"="12232"};
+{"key"="1652";"subkey"="330";"value"="12240"};
+{"key"="1653";"subkey"="330";"value"="12249"};
+{"key"="1654";"subkey"="330";"value"="12257"};
+{"key"="1655";"subkey"="331";"value"="12266"};
+{"key"="1656";"subkey"="331";"value"="12274"};
+{"key"="1657";"subkey"="331";"value"="12282"};
+{"key"="1658";"subkey"="331";"value"="12291"};
+{"key"="1659";"subkey"="331";"value"="12299"};
+{"key"="1660";"subkey"="332";"value"="12308"};
+{"key"="1661";"subkey"="332";"value"="12316"};
+{"key"="1662";"subkey"="332";"value"="12325"};
+{"key"="1663";"subkey"="332";"value"="12333"};
+{"key"="1664";"subkey"="332";"value"="12341"};
+{"key"="1665";"subkey"="333";"value"="12350"};
+{"key"="1666";"subkey"="333";"value"="12358"};
+{"key"="1667";"subkey"="333";"value"="12367"};
+{"key"="1668";"subkey"="333";"value"="12375"};
+{"key"="1669";"subkey"="333";"value"="12383"};
+{"key"="1670";"subkey"="334";"value"="12392"};
+{"key"="1671";"subkey"="334";"value"="12400"};
+{"key"="1672";"subkey"="334";"value"="12409"};
+{"key"="1673";"subkey"="334";"value"="12417"};
+{"key"="1674";"subkey"="334";"value"="12426"};
+{"key"="1675";"subkey"="335";"value"="12434"};
+{"key"="1676";"subkey"="335";"value"="12442"};
+{"key"="1677";"subkey"="335";"value"="12451"};
+{"key"="1678";"subkey"="335";"value"="12459"};
+{"key"="1679";"subkey"="335";"value"="12468"};
+{"key"="1680";"subkey"="336";"value"="12476"};
+{"key"="1681";"subkey"="336";"value"="12485"};
+{"key"="1682";"subkey"="336";"value"="12493"};
+{"key"="1683";"subkey"="336";"value"="12501"};
+{"key"="1684";"subkey"="336";"value"="12510"};
+{"key"="1685";"subkey"="337";"value"="12518"};
+{"key"="1686";"subkey"="337";"value"="12527"};
+{"key"="1687";"subkey"="337";"value"="12535"};
+{"key"="1688";"subkey"="337";"value"="12544"};
+{"key"="1689";"subkey"="337";"value"="12552"};
+{"key"="1690";"subkey"="338";"value"="12560"};
+{"key"="1691";"subkey"="338";"value"="12569"};
+{"key"="1692";"subkey"="338";"value"="12577"};
+{"key"="1693";"subkey"="338";"value"="12586"};
+{"key"="1694";"subkey"="338";"value"="12594"};
+{"key"="1695";"subkey"="339";"value"="12603"};
+{"key"="1696";"subkey"="339";"value"="12611"};
+{"key"="1697";"subkey"="339";"value"="12619"};
+{"key"="1698";"subkey"="339";"value"="12628"};
+{"key"="1699";"subkey"="339";"value"="12636"};
+{"key"="1700";"subkey"="340";"value"="12645"};
+{"key"="1701";"subkey"="340";"value"="12653"};
+{"key"="1702";"subkey"="340";"value"="12662"};
+{"key"="1703";"subkey"="340";"value"="12670"};
+{"key"="1704";"subkey"="340";"value"="12679"};
+{"key"="1705";"subkey"="341";"value"="12687"};
+{"key"="1706";"subkey"="341";"value"="12695"};
+{"key"="1707";"subkey"="341";"value"="12704"};
+{"key"="1708";"subkey"="341";"value"="12712"};
+{"key"="1709";"subkey"="341";"value"="12721"};
+{"key"="1710";"subkey"="342";"value"="12729"};
+{"key"="1711";"subkey"="342";"value"="12738"};
+{"key"="1712";"subkey"="342";"value"="12746"};
+{"key"="1713";"subkey"="342";"value"="12755"};
+{"key"="1714";"subkey"="342";"value"="12763"};
+{"key"="1715";"subkey"="343";"value"="12771"};
+{"key"="1716";"subkey"="343";"value"="12780"};
+{"key"="1717";"subkey"="343";"value"="12788"};
+{"key"="1718";"subkey"="343";"value"="12797"};
+{"key"="1719";"subkey"="343";"value"="12805"};
+{"key"="1720";"subkey"="344";"value"="12814"};
+{"key"="1721";"subkey"="344";"value"="12822"};
+{"key"="1722";"subkey"="344";"value"="12831"};
+{"key"="1723";"subkey"="344";"value"="12839"};
+{"key"="1724";"subkey"="344";"value"="12847"};
+{"key"="1725";"subkey"="345";"value"="12856"};
+{"key"="1726";"subkey"="345";"value"="12864"};
+{"key"="1727";"subkey"="345";"value"="12873"};
+{"key"="1728";"subkey"="345";"value"="12881"};
+{"key"="1729";"subkey"="345";"value"="12890"};
+{"key"="1730";"subkey"="346";"value"="12898"};
+{"key"="1731";"subkey"="346";"value"="12907"};
+{"key"="1732";"subkey"="346";"value"="12915"};
+{"key"="1733";"subkey"="346";"value"="12924"};
+{"key"="1734";"subkey"="346";"value"="12932"};
+{"key"="1735";"subkey"="347";"value"="12940"};
+{"key"="1736";"subkey"="347";"value"="12949"};
+{"key"="1737";"subkey"="347";"value"="12957"};
+{"key"="1738";"subkey"="347";"value"="12966"};
+{"key"="1739";"subkey"="347";"value"="12974"};
+{"key"="1740";"subkey"="348";"value"="12983"};
+{"key"="1741";"subkey"="348";"value"="12991"};
+{"key"="1742";"subkey"="348";"value"="13000"};
+{"key"="1743";"subkey"="348";"value"="13008"};
+{"key"="1744";"subkey"="348";"value"="13017"};
+{"key"="1745";"subkey"="349";"value"="13025"};
+{"key"="1746";"subkey"="349";"value"="13034"};
+{"key"="1747";"subkey"="349";"value"="13042"};
+{"key"="1748";"subkey"="349";"value"="13050"};
+{"key"="1749";"subkey"="349";"value"="13059"};
+{"key"="1750";"subkey"="350";"value"="13067"};
+{"key"="1751";"subkey"="350";"value"="13076"};
+{"key"="1752";"subkey"="350";"value"="13084"};
+{"key"="1753";"subkey"="350";"value"="13093"};
+{"key"="1754";"subkey"="350";"value"="13101"};
+{"key"="1755";"subkey"="351";"value"="13110"};
+{"key"="1756";"subkey"="351";"value"="13118"};
+{"key"="1757";"subkey"="351";"value"="13127"};
+{"key"="1758";"subkey"="351";"value"="13135"};
+{"key"="1759";"subkey"="351";"value"="13144"};
+{"key"="1760";"subkey"="352";"value"="13152"};
+{"key"="1761";"subkey"="352";"value"="13161"};
+{"key"="1762";"subkey"="352";"value"="13169"};
+{"key"="1763";"subkey"="352";"value"="13178"};
+{"key"="1764";"subkey"="352";"value"="13186"};
+{"key"="1765";"subkey"="353";"value"="13194"};
+{"key"="1766";"subkey"="353";"value"="13203"};
+{"key"="1767";"subkey"="353";"value"="13211"};
+{"key"="1768";"subkey"="353";"value"="13220"};
+{"key"="1769";"subkey"="353";"value"="13228"};
+{"key"="1770";"subkey"="354";"value"="13237"};
+{"key"="1771";"subkey"="354";"value"="13245"};
+{"key"="1772";"subkey"="354";"value"="13254"};
+{"key"="1773";"subkey"="354";"value"="13262"};
+{"key"="1774";"subkey"="354";"value"="13271"};
+{"key"="1775";"subkey"="355";"value"="13279"};
+{"key"="1776";"subkey"="355";"value"="13288"};
+{"key"="1777";"subkey"="355";"value"="13296"};
+{"key"="1778";"subkey"="355";"value"="13305"};
+{"key"="1779";"subkey"="355";"value"="13313"};
+{"key"="1780";"subkey"="356";"value"="13322"};
+{"key"="1781";"subkey"="356";"value"="13330"};
+{"key"="1782";"subkey"="356";"value"="13339"};
+{"key"="1783";"subkey"="356";"value"="13347"};
+{"key"="1784";"subkey"="356";"value"="13356"};
+{"key"="1785";"subkey"="357";"value"="13364"};
+{"key"="1786";"subkey"="357";"value"="13373"};
+{"key"="1787";"subkey"="357";"value"="13381"};
+{"key"="1788";"subkey"="357";"value"="13390"};
+{"key"="1789";"subkey"="357";"value"="13398"};
+{"key"="1790";"subkey"="358";"value"="13407"};
+{"key"="1791";"subkey"="358";"value"="13415"};
+{"key"="1792";"subkey"="358";"value"="13424"};
+{"key"="1793";"subkey"="358";"value"="13432"};
+{"key"="1794";"subkey"="358";"value"="13441"};
+{"key"="1795";"subkey"="359";"value"="13449"};
+{"key"="1796";"subkey"="359";"value"="13457"};
+{"key"="1797";"subkey"="359";"value"="13466"};
+{"key"="1798";"subkey"="359";"value"="13474"};
+{"key"="1799";"subkey"="359";"value"="13483"};
+{"key"="1800";"subkey"="360";"value"="13491"};
+{"key"="1801";"subkey"="360";"value"="13500"};
+{"key"="1802";"subkey"="360";"value"="13508"};
+{"key"="1803";"subkey"="360";"value"="13517"};
+{"key"="1804";"subkey"="360";"value"="13525"};
+{"key"="1805";"subkey"="361";"value"="13534"};
+{"key"="1806";"subkey"="361";"value"="13542"};
+{"key"="1807";"subkey"="361";"value"="13551"};
+{"key"="1808";"subkey"="361";"value"="13559"};
+{"key"="1809";"subkey"="361";"value"="13568"};
+{"key"="1810";"subkey"="362";"value"="13576"};
+{"key"="1811";"subkey"="362";"value"="13585"};
+{"key"="1812";"subkey"="362";"value"="13593"};
+{"key"="1813";"subkey"="362";"value"="13602"};
+{"key"="1814";"subkey"="362";"value"="13610"};
+{"key"="1815";"subkey"="363";"value"="13619"};
+{"key"="1816";"subkey"="363";"value"="13627"};
+{"key"="1817";"subkey"="363";"value"="13636"};
+{"key"="1818";"subkey"="363";"value"="13644"};
+{"key"="1819";"subkey"="363";"value"="13653"};
+{"key"="1820";"subkey"="364";"value"="13661"};
+{"key"="1821";"subkey"="364";"value"="13670"};
+{"key"="1822";"subkey"="364";"value"="13679"};
+{"key"="1823";"subkey"="364";"value"="13687"};
+{"key"="1824";"subkey"="364";"value"="13696"};
+{"key"="1825";"subkey"="365";"value"="13704"};
+{"key"="1826";"subkey"="365";"value"="13713"};
+{"key"="1827";"subkey"="365";"value"="13721"};
+{"key"="1828";"subkey"="365";"value"="13730"};
+{"key"="1829";"subkey"="365";"value"="13738"};
+{"key"="1830";"subkey"="366";"value"="13747"};
+{"key"="1831";"subkey"="366";"value"="13755"};
+{"key"="1832";"subkey"="366";"value"="13764"};
+{"key"="1833";"subkey"="366";"value"="13772"};
+{"key"="1834";"subkey"="366";"value"="13781"};
+{"key"="1835";"subkey"="367";"value"="13789"};
+{"key"="1836";"subkey"="367";"value"="13798"};
+{"key"="1837";"subkey"="367";"value"="13806"};
+{"key"="1838";"subkey"="367";"value"="13815"};
+{"key"="1839";"subkey"="367";"value"="13823"};
+{"key"="1840";"subkey"="368";"value"="13832"};
+{"key"="1841";"subkey"="368";"value"="13840"};
+{"key"="1842";"subkey"="368";"value"="13849"};
+{"key"="1843";"subkey"="368";"value"="13857"};
+{"key"="1844";"subkey"="368";"value"="13866"};
+{"key"="1845";"subkey"="369";"value"="13874"};
+{"key"="1846";"subkey"="369";"value"="13883"};
+{"key"="1847";"subkey"="369";"value"="13891"};
+{"key"="1848";"subkey"="369";"value"="13900"};
+{"key"="1849";"subkey"="369";"value"="13908"};
+{"key"="1850";"subkey"="370";"value"="13917"};
+{"key"="1851";"subkey"="370";"value"="13925"};
+{"key"="1852";"subkey"="370";"value"="13934"};
+{"key"="1853";"subkey"="370";"value"="13943"};
+{"key"="1854";"subkey"="370";"value"="13951"};
+{"key"="1855";"subkey"="371";"value"="13960"};
+{"key"="1856";"subkey"="371";"value"="13968"};
+{"key"="1857";"subkey"="371";"value"="13977"};
+{"key"="1858";"subkey"="371";"value"="13985"};
+{"key"="1859";"subkey"="371";"value"="13994"};
+{"key"="1860";"subkey"="372";"value"="14002"};
+{"key"="1861";"subkey"="372";"value"="14011"};
+{"key"="1862";"subkey"="372";"value"="14019"};
+{"key"="1863";"subkey"="372";"value"="14028"};
+{"key"="1864";"subkey"="372";"value"="14036"};
+{"key"="1865";"subkey"="373";"value"="14045"};
+{"key"="1866";"subkey"="373";"value"="14053"};
+{"key"="1867";"subkey"="373";"value"="14062"};
+{"key"="1868";"subkey"="373";"value"="14070"};
+{"key"="1869";"subkey"="373";"value"="14079"};
+{"key"="1870";"subkey"="374";"value"="14088"};
+{"key"="1871";"subkey"="374";"value"="14096"};
+{"key"="1872";"subkey"="374";"value"="14105"};
+{"key"="1873";"subkey"="374";"value"="14113"};
+{"key"="1874";"subkey"="374";"value"="14122"};
+{"key"="1875";"subkey"="375";"value"="14130"};
+{"key"="1876";"subkey"="375";"value"="14139"};
+{"key"="1877";"subkey"="375";"value"="14147"};
+{"key"="1878";"subkey"="375";"value"="14156"};
+{"key"="1879";"subkey"="375";"value"="14164"};
+{"key"="1880";"subkey"="376";"value"="14173"};
+{"key"="1881";"subkey"="376";"value"="14181"};
+{"key"="1882";"subkey"="376";"value"="14190"};
+{"key"="1883";"subkey"="376";"value"="14198"};
+{"key"="1884";"subkey"="376";"value"="14207"};
+{"key"="1885";"subkey"="377";"value"="14216"};
+{"key"="1886";"subkey"="377";"value"="14224"};
+{"key"="1887";"subkey"="377";"value"="14233"};
+{"key"="1888";"subkey"="377";"value"="14241"};
+{"key"="1889";"subkey"="377";"value"="14250"};
+{"key"="1890";"subkey"="378";"value"="14258"};
+{"key"="1891";"subkey"="378";"value"="14267"};
+{"key"="1892";"subkey"="378";"value"="14275"};
+{"key"="1893";"subkey"="378";"value"="14284"};
+{"key"="1894";"subkey"="378";"value"="14292"};
+{"key"="1895";"subkey"="379";"value"="14301"};
+{"key"="1896";"subkey"="379";"value"="14310"};
+{"key"="1897";"subkey"="379";"value"="14318"};
+{"key"="1898";"subkey"="379";"value"="14327"};
+{"key"="1899";"subkey"="379";"value"="14335"};
+{"key"="1900";"subkey"="380";"value"="14344"};
+{"key"="1901";"subkey"="380";"value"="14352"};
+{"key"="1902";"subkey"="380";"value"="14361"};
+{"key"="1903";"subkey"="380";"value"="14369"};
+{"key"="1904";"subkey"="380";"value"="14378"};
+{"key"="1905";"subkey"="381";"value"="14387"};
+{"key"="1906";"subkey"="381";"value"="14395"};
+{"key"="1907";"subkey"="381";"value"="14404"};
+{"key"="1908";"subkey"="381";"value"="14412"};
+{"key"="1909";"subkey"="381";"value"="14421"};
+{"key"="1910";"subkey"="382";"value"="14429"};
+{"key"="1911";"subkey"="382";"value"="14438"};
+{"key"="1912";"subkey"="382";"value"="14446"};
+{"key"="1913";"subkey"="382";"value"="14455"};
+{"key"="1914";"subkey"="382";"value"="14464"};
+{"key"="1915";"subkey"="383";"value"="14472"};
+{"key"="1916";"subkey"="383";"value"="14481"};
+{"key"="1917";"subkey"="383";"value"="14489"};
+{"key"="1918";"subkey"="383";"value"="14498"};
+{"key"="1919";"subkey"="383";"value"="14506"};
+{"key"="1920";"subkey"="384";"value"="14515"};
+{"key"="1921";"subkey"="384";"value"="14523"};
+{"key"="1922";"subkey"="384";"value"="14532"};
+{"key"="1923";"subkey"="384";"value"="14541"};
+{"key"="1924";"subkey"="384";"value"="14549"};
+{"key"="1925";"subkey"="385";"value"="14558"};
+{"key"="1926";"subkey"="385";"value"="14566"};
+{"key"="1927";"subkey"="385";"value"="14575"};
+{"key"="1928";"subkey"="385";"value"="14583"};
+{"key"="1929";"subkey"="385";"value"="14592"};
+{"key"="1930";"subkey"="386";"value"="14600"};
+{"key"="1931";"subkey"="386";"value"="14609"};
+{"key"="1932";"subkey"="386";"value"="14618"};
+{"key"="1933";"subkey"="386";"value"="14626"};
+{"key"="1934";"subkey"="386";"value"="14635"};
+{"key"="1935";"subkey"="387";"value"="14643"};
+{"key"="1936";"subkey"="387";"value"="14652"};
+{"key"="1937";"subkey"="387";"value"="14660"};
+{"key"="1938";"subkey"="387";"value"="14669"};
+{"key"="1939";"subkey"="387";"value"="14678"};
+{"key"="1940";"subkey"="388";"value"="14686"};
+{"key"="1941";"subkey"="388";"value"="14695"};
+{"key"="1942";"subkey"="388";"value"="14703"};
+{"key"="1943";"subkey"="388";"value"="14712"};
+{"key"="1944";"subkey"="388";"value"="14720"};
+{"key"="1945";"subkey"="389";"value"="14729"};
+{"key"="1946";"subkey"="389";"value"="14738"};
+{"key"="1947";"subkey"="389";"value"="14746"};
+{"key"="1948";"subkey"="389";"value"="14755"};
+{"key"="1949";"subkey"="389";"value"="14763"};
+{"key"="1950";"subkey"="390";"value"="14772"};
+{"key"="1951";"subkey"="390";"value"="14780"};
+{"key"="1952";"subkey"="390";"value"="14789"};
+{"key"="1953";"subkey"="390";"value"="14798"};
+{"key"="1954";"subkey"="390";"value"="14806"};
+{"key"="1955";"subkey"="391";"value"="14815"};
+{"key"="1956";"subkey"="391";"value"="14823"};
+{"key"="1957";"subkey"="391";"value"="14832"};
+{"key"="1958";"subkey"="391";"value"="14841"};
+{"key"="1959";"subkey"="391";"value"="14849"};
+{"key"="1960";"subkey"="392";"value"="14858"};
+{"key"="1961";"subkey"="392";"value"="14866"};
+{"key"="1962";"subkey"="392";"value"="14875"};
+{"key"="1963";"subkey"="392";"value"="14883"};
+{"key"="1964";"subkey"="392";"value"="14892"};
+{"key"="1965";"subkey"="393";"value"="14901"};
+{"key"="1966";"subkey"="393";"value"="14909"};
+{"key"="1967";"subkey"="393";"value"="14918"};
+{"key"="1968";"subkey"="393";"value"="14926"};
+{"key"="1969";"subkey"="393";"value"="14935"};
+{"key"="1970";"subkey"="394";"value"="14944"};
+{"key"="1971";"subkey"="394";"value"="14952"};
+{"key"="1972";"subkey"="394";"value"="14961"};
+{"key"="1973";"subkey"="394";"value"="14969"};
+{"key"="1974";"subkey"="394";"value"="14978"};
+{"key"="1975";"subkey"="395";"value"="14986"};
+{"key"="1976";"subkey"="395";"value"="14995"};
+{"key"="1977";"subkey"="395";"value"="15004"};
+{"key"="1978";"subkey"="395";"value"="15012"};
+{"key"="1979";"subkey"="395";"value"="15021"};
+{"key"="1980";"subkey"="396";"value"="15029"};
+{"key"="1981";"subkey"="396";"value"="15038"};
+{"key"="1982";"subkey"="396";"value"="15047"};
+{"key"="1983";"subkey"="396";"value"="15055"};
+{"key"="1984";"subkey"="396";"value"="15064"};
+{"key"="1985";"subkey"="397";"value"="15072"};
+{"key"="1986";"subkey"="397";"value"="15081"};
+{"key"="1987";"subkey"="397";"value"="15090"};
+{"key"="1988";"subkey"="397";"value"="15098"};
+{"key"="1989";"subkey"="397";"value"="15107"};
+{"key"="1990";"subkey"="398";"value"="15115"};
+{"key"="1991";"subkey"="398";"value"="15124"};
+{"key"="1992";"subkey"="398";"value"="15133"};
+{"key"="1993";"subkey"="398";"value"="15141"};
+{"key"="1994";"subkey"="398";"value"="15150"};
+{"key"="1995";"subkey"="399";"value"="15158"};
+{"key"="1996";"subkey"="399";"value"="15167"};
+{"key"="1997";"subkey"="399";"value"="15176"};
+{"key"="1998";"subkey"="399";"value"="15184"};
+{"key"="1999";"subkey"="399";"value"="15193"};
+{"key"="2000";"subkey"="400";"value"="15201"};
+{"key"="2001";"subkey"="400";"value"="15210"};
+{"key"="2002";"subkey"="400";"value"="15219"};
+{"key"="2003";"subkey"="400";"value"="15227"};
+{"key"="2004";"subkey"="400";"value"="15236"};
+{"key"="2005";"subkey"="401";"value"="15244"};
+{"key"="2006";"subkey"="401";"value"="15253"};
+{"key"="2007";"subkey"="401";"value"="15262"};
+{"key"="2008";"subkey"="401";"value"="15270"};
+{"key"="2009";"subkey"="401";"value"="15279"};
+{"key"="2010";"subkey"="402";"value"="15287"};
+{"key"="2011";"subkey"="402";"value"="15296"};
+{"key"="2012";"subkey"="402";"value"="15305"};
+{"key"="2013";"subkey"="402";"value"="15313"};
+{"key"="2014";"subkey"="402";"value"="15322"};
+{"key"="2015";"subkey"="403";"value"="15330"};
+{"key"="2016";"subkey"="403";"value"="15339"};
+{"key"="2017";"subkey"="403";"value"="15348"};
+{"key"="2018";"subkey"="403";"value"="15356"};
+{"key"="2019";"subkey"="403";"value"="15365"};
+{"key"="2020";"subkey"="404";"value"="15373"};
+{"key"="2021";"subkey"="404";"value"="15382"};
+{"key"="2022";"subkey"="404";"value"="15391"};
+{"key"="2023";"subkey"="404";"value"="15399"};
+{"key"="2024";"subkey"="404";"value"="15408"};
+{"key"="2025";"subkey"="405";"value"="15416"};
+{"key"="2026";"subkey"="405";"value"="15425"};
+{"key"="2027";"subkey"="405";"value"="15434"};
+{"key"="2028";"subkey"="405";"value"="15442"};
+{"key"="2029";"subkey"="405";"value"="15451"};
+{"key"="2030";"subkey"="406";"value"="15460"};
+{"key"="2031";"subkey"="406";"value"="15468"};
+{"key"="2032";"subkey"="406";"value"="15477"};
+{"key"="2033";"subkey"="406";"value"="15485"};
+{"key"="2034";"subkey"="406";"value"="15494"};
+{"key"="2035";"subkey"="407";"value"="15503"};
+{"key"="2036";"subkey"="407";"value"="15511"};
+{"key"="2037";"subkey"="407";"value"="15520"};
+{"key"="2038";"subkey"="407";"value"="15528"};
+{"key"="2039";"subkey"="407";"value"="15537"};
+{"key"="2040";"subkey"="408";"value"="15546"};
+{"key"="2041";"subkey"="408";"value"="15554"};
+{"key"="2042";"subkey"="408";"value"="15563"};
+{"key"="2043";"subkey"="408";"value"="15572"};
+{"key"="2044";"subkey"="408";"value"="15580"};
+{"key"="2045";"subkey"="409";"value"="15589"};
+{"key"="2046";"subkey"="409";"value"="15597"};
+{"key"="2047";"subkey"="409";"value"="15606"};
+{"key"="2048";"subkey"="409";"value"="15615"};
+{"key"="2049";"subkey"="409";"value"="15623"};
+{"key"="2050";"subkey"="410";"value"="15632"};
+{"key"="2051";"subkey"="410";"value"="15641"};
+{"key"="2052";"subkey"="410";"value"="15649"};
+{"key"="2053";"subkey"="410";"value"="15658"};
+{"key"="2054";"subkey"="410";"value"="15666"};
+{"key"="2055";"subkey"="411";"value"="15675"};
+{"key"="2056";"subkey"="411";"value"="15684"};
+{"key"="2057";"subkey"="411";"value"="15692"};
+{"key"="2058";"subkey"="411";"value"="15701"};
+{"key"="2059";"subkey"="411";"value"="15710"};
+{"key"="2060";"subkey"="412";"value"="15718"};
+{"key"="2061";"subkey"="412";"value"="15727"};
+{"key"="2062";"subkey"="412";"value"="15736"};
+{"key"="2063";"subkey"="412";"value"="15744"};
+{"key"="2064";"subkey"="412";"value"="15753"};
+{"key"="2065";"subkey"="413";"value"="15761"};
+{"key"="2066";"subkey"="413";"value"="15770"};
+{"key"="2067";"subkey"="413";"value"="15779"};
+{"key"="2068";"subkey"="413";"value"="15787"};
+{"key"="2069";"subkey"="413";"value"="15796"};
+{"key"="2070";"subkey"="414";"value"="15805"};
+{"key"="2071";"subkey"="414";"value"="15813"};
+{"key"="2072";"subkey"="414";"value"="15822"};
+{"key"="2073";"subkey"="414";"value"="15830"};
+{"key"="2074";"subkey"="414";"value"="15839"};
+{"key"="2075";"subkey"="415";"value"="15848"};
+{"key"="2076";"subkey"="415";"value"="15856"};
+{"key"="2077";"subkey"="415";"value"="15865"};
+{"key"="2078";"subkey"="415";"value"="15874"};
+{"key"="2079";"subkey"="415";"value"="15882"};
+{"key"="2080";"subkey"="416";"value"="15891"};
+{"key"="2081";"subkey"="416";"value"="15900"};
+{"key"="2082";"subkey"="416";"value"="15908"};
+{"key"="2083";"subkey"="416";"value"="15917"};
+{"key"="2084";"subkey"="416";"value"="15926"};
+{"key"="2085";"subkey"="417";"value"="15934"};
+{"key"="2086";"subkey"="417";"value"="15943"};
+{"key"="2087";"subkey"="417";"value"="15951"};
+{"key"="2088";"subkey"="417";"value"="15960"};
+{"key"="2089";"subkey"="417";"value"="15969"};
+{"key"="2090";"subkey"="418";"value"="15977"};
+{"key"="2091";"subkey"="418";"value"="15986"};
+{"key"="2092";"subkey"="418";"value"="15995"};
+{"key"="2093";"subkey"="418";"value"="16003"};
+{"key"="2094";"subkey"="418";"value"="16012"};
+{"key"="2095";"subkey"="419";"value"="16021"};
+{"key"="2096";"subkey"="419";"value"="16029"};
+{"key"="2097";"subkey"="419";"value"="16038"};
+{"key"="2098";"subkey"="419";"value"="16047"};
+{"key"="2099";"subkey"="419";"value"="16055"};
+{"key"="2100";"subkey"="420";"value"="16064"};
+{"key"="2101";"subkey"="420";"value"="16073"};
+{"key"="2102";"subkey"="420";"value"="16081"};
+{"key"="2103";"subkey"="420";"value"="16090"};
+{"key"="2104";"subkey"="420";"value"="16098"};
+{"key"="2105";"subkey"="421";"value"="16107"};
+{"key"="2106";"subkey"="421";"value"="16116"};
+{"key"="2107";"subkey"="421";"value"="16124"};
+{"key"="2108";"subkey"="421";"value"="16133"};
+{"key"="2109";"subkey"="421";"value"="16142"};
+{"key"="2110";"subkey"="422";"value"="16150"};
+{"key"="2111";"subkey"="422";"value"="16159"};
+{"key"="2112";"subkey"="422";"value"="16168"};
+{"key"="2113";"subkey"="422";"value"="16176"};
+{"key"="2114";"subkey"="422";"value"="16185"};
+{"key"="2115";"subkey"="423";"value"="16194"};
+{"key"="2116";"subkey"="423";"value"="16202"};
+{"key"="2117";"subkey"="423";"value"="16211"};
+{"key"="2118";"subkey"="423";"value"="16220"};
+{"key"="2119";"subkey"="423";"value"="16228"};
+{"key"="2120";"subkey"="424";"value"="16237"};
+{"key"="2121";"subkey"="424";"value"="16246"};
+{"key"="2122";"subkey"="424";"value"="16254"};
+{"key"="2123";"subkey"="424";"value"="16263"};
+{"key"="2124";"subkey"="424";"value"="16272"};
+{"key"="2125";"subkey"="425";"value"="16280"};
+{"key"="2126";"subkey"="425";"value"="16289"};
+{"key"="2127";"subkey"="425";"value"="16298"};
+{"key"="2128";"subkey"="425";"value"="16306"};
+{"key"="2129";"subkey"="425";"value"="16315"};
+{"key"="2130";"subkey"="426";"value"="16324"};
+{"key"="2131";"subkey"="426";"value"="16332"};
+{"key"="2132";"subkey"="426";"value"="16341"};
+{"key"="2133";"subkey"="426";"value"="16350"};
+{"key"="2134";"subkey"="426";"value"="16358"};
+{"key"="2135";"subkey"="427";"value"="16367"};
+{"key"="2136";"subkey"="427";"value"="16376"};
+{"key"="2137";"subkey"="427";"value"="16384"};
+{"key"="2138";"subkey"="427";"value"="16393"};
+{"key"="2139";"subkey"="427";"value"="16402"};
+{"key"="2140";"subkey"="428";"value"="16410"};
+{"key"="2141";"subkey"="428";"value"="16419"};
+{"key"="2142";"subkey"="428";"value"="16428"};
+{"key"="2143";"subkey"="428";"value"="16436"};
+{"key"="2144";"subkey"="428";"value"="16445"};
+{"key"="2145";"subkey"="429";"value"="16454"};
+{"key"="2146";"subkey"="429";"value"="16462"};
+{"key"="2147";"subkey"="429";"value"="16471"};
+{"key"="2148";"subkey"="429";"value"="16480"};
+{"key"="2149";"subkey"="429";"value"="16488"};
+{"key"="2150";"subkey"="430";"value"="16497"};
+{"key"="2151";"subkey"="430";"value"="16506"};
+{"key"="2152";"subkey"="430";"value"="16514"};
+{"key"="2153";"subkey"="430";"value"="16523"};
+{"key"="2154";"subkey"="430";"value"="16532"};
+{"key"="2155";"subkey"="431";"value"="16540"};
+{"key"="2156";"subkey"="431";"value"="16549"};
+{"key"="2157";"subkey"="431";"value"="16558"};
+{"key"="2158";"subkey"="431";"value"="16566"};
+{"key"="2159";"subkey"="431";"value"="16575"};
+{"key"="2160";"subkey"="432";"value"="16584"};
+{"key"="2161";"subkey"="432";"value"="16592"};
+{"key"="2162";"subkey"="432";"value"="16601"};
+{"key"="2163";"subkey"="432";"value"="16610"};
+{"key"="2164";"subkey"="432";"value"="16618"};
+{"key"="2165";"subkey"="433";"value"="16627"};
+{"key"="2166";"subkey"="433";"value"="16636"};
+{"key"="2167";"subkey"="433";"value"="16644"};
+{"key"="2168";"subkey"="433";"value"="16653"};
+{"key"="2169";"subkey"="433";"value"="16662"};
+{"key"="2170";"subkey"="434";"value"="16670"};
+{"key"="2171";"subkey"="434";"value"="16679"};
+{"key"="2172";"subkey"="434";"value"="16688"};
+{"key"="2173";"subkey"="434";"value"="16697"};
+{"key"="2174";"subkey"="434";"value"="16705"};
+{"key"="2175";"subkey"="435";"value"="16714"};
+{"key"="2176";"subkey"="435";"value"="16723"};
+{"key"="2177";"subkey"="435";"value"="16731"};
+{"key"="2178";"subkey"="435";"value"="16740"};
+{"key"="2179";"subkey"="435";"value"="16749"};
+{"key"="2180";"subkey"="436";"value"="16757"};
+{"key"="2181";"subkey"="436";"value"="16766"};
+{"key"="2182";"subkey"="436";"value"="16775"};
+{"key"="2183";"subkey"="436";"value"="16783"};
+{"key"="2184";"subkey"="436";"value"="16792"};
+{"key"="2185";"subkey"="437";"value"="16801"};
+{"key"="2186";"subkey"="437";"value"="16809"};
+{"key"="2187";"subkey"="437";"value"="16818"};
+{"key"="2188";"subkey"="437";"value"="16827"};
+{"key"="2189";"subkey"="437";"value"="16836"};
+{"key"="2190";"subkey"="438";"value"="16844"};
+{"key"="2191";"subkey"="438";"value"="16853"};
+{"key"="2192";"subkey"="438";"value"="16862"};
+{"key"="2193";"subkey"="438";"value"="16870"};
+{"key"="2194";"subkey"="438";"value"="16879"};
+{"key"="2195";"subkey"="439";"value"="16888"};
+{"key"="2196";"subkey"="439";"value"="16896"};
+{"key"="2197";"subkey"="439";"value"="16905"};
+{"key"="2198";"subkey"="439";"value"="16914"};
+{"key"="2199";"subkey"="439";"value"="16922"};
+{"key"="2200";"subkey"="440";"value"="16931"};
+{"key"="2201";"subkey"="440";"value"="16940"};
+{"key"="2202";"subkey"="440";"value"="16949"};
+{"key"="2203";"subkey"="440";"value"="16957"};
+{"key"="2204";"subkey"="440";"value"="16966"};
+{"key"="2205";"subkey"="441";"value"="16975"};
+{"key"="2206";"subkey"="441";"value"="16983"};
+{"key"="2207";"subkey"="441";"value"="16992"};
+{"key"="2208";"subkey"="441";"value"="17001"};
+{"key"="2209";"subkey"="441";"value"="17009"};
+{"key"="2210";"subkey"="442";"value"="17018"};
+{"key"="2211";"subkey"="442";"value"="17027"};
+{"key"="2212";"subkey"="442";"value"="17036"};
+{"key"="2213";"subkey"="442";"value"="17044"};
+{"key"="2214";"subkey"="442";"value"="17053"};
+{"key"="2215";"subkey"="443";"value"="17062"};
+{"key"="2216";"subkey"="443";"value"="17070"};
+{"key"="2217";"subkey"="443";"value"="17079"};
+{"key"="2218";"subkey"="443";"value"="17088"};
+{"key"="2219";"subkey"="443";"value"="17096"};
+{"key"="2220";"subkey"="444";"value"="17105"};
+{"key"="2221";"subkey"="444";"value"="17114"};
+{"key"="2222";"subkey"="444";"value"="17123"};
+{"key"="2223";"subkey"="444";"value"="17131"};
+{"key"="2224";"subkey"="444";"value"="17140"};
+{"key"="2225";"subkey"="445";"value"="17149"};
+{"key"="2226";"subkey"="445";"value"="17157"};
+{"key"="2227";"subkey"="445";"value"="17166"};
+{"key"="2228";"subkey"="445";"value"="17175"};
+{"key"="2229";"subkey"="445";"value"="17184"};
+{"key"="2230";"subkey"="446";"value"="17192"};
+{"key"="2231";"subkey"="446";"value"="17201"};
+{"key"="2232";"subkey"="446";"value"="17210"};
+{"key"="2233";"subkey"="446";"value"="17218"};
+{"key"="2234";"subkey"="446";"value"="17227"};
+{"key"="2235";"subkey"="447";"value"="17236"};
+{"key"="2236";"subkey"="447";"value"="17245"};
+{"key"="2237";"subkey"="447";"value"="17253"};
+{"key"="2238";"subkey"="447";"value"="17262"};
+{"key"="2239";"subkey"="447";"value"="17271"};
+{"key"="2240";"subkey"="448";"value"="17279"};
+{"key"="2241";"subkey"="448";"value"="17288"};
+{"key"="2242";"subkey"="448";"value"="17297"};
+{"key"="2243";"subkey"="448";"value"="17306"};
+{"key"="2244";"subkey"="448";"value"="17314"};
+{"key"="2245";"subkey"="449";"value"="17323"};
+{"key"="2246";"subkey"="449";"value"="17332"};
+{"key"="2247";"subkey"="449";"value"="17340"};
+{"key"="2248";"subkey"="449";"value"="17349"};
+{"key"="2249";"subkey"="449";"value"="17358"};
+{"key"="2250";"subkey"="450";"value"="17367"};
+{"key"="2251";"subkey"="450";"value"="17375"};
+{"key"="2252";"subkey"="450";"value"="17384"};
+{"key"="2253";"subkey"="450";"value"="17393"};
+{"key"="2254";"subkey"="450";"value"="17401"};
+{"key"="2255";"subkey"="451";"value"="17410"};
+{"key"="2256";"subkey"="451";"value"="17419"};
+{"key"="2257";"subkey"="451";"value"="17428"};
+{"key"="2258";"subkey"="451";"value"="17436"};
+{"key"="2259";"subkey"="451";"value"="17445"};
+{"key"="2260";"subkey"="452";"value"="17454"};
+{"key"="2261";"subkey"="452";"value"="17462"};
+{"key"="2262";"subkey"="452";"value"="17471"};
+{"key"="2263";"subkey"="452";"value"="17480"};
+{"key"="2264";"subkey"="452";"value"="17489"};
+{"key"="2265";"subkey"="453";"value"="17497"};
+{"key"="2266";"subkey"="453";"value"="17506"};
+{"key"="2267";"subkey"="453";"value"="17515"};
+{"key"="2268";"subkey"="453";"value"="17524"};
+{"key"="2269";"subkey"="453";"value"="17532"};
+{"key"="2270";"subkey"="454";"value"="17541"};
+{"key"="2271";"subkey"="454";"value"="17550"};
+{"key"="2272";"subkey"="454";"value"="17558"};
+{"key"="2273";"subkey"="454";"value"="17567"};
+{"key"="2274";"subkey"="454";"value"="17576"};
+{"key"="2275";"subkey"="455";"value"="17585"};
+{"key"="2276";"subkey"="455";"value"="17593"};
+{"key"="2277";"subkey"="455";"value"="17602"};
+{"key"="2278";"subkey"="455";"value"="17611"};
+{"key"="2279";"subkey"="455";"value"="17620"};
+{"key"="2280";"subkey"="456";"value"="17628"};
+{"key"="2281";"subkey"="456";"value"="17637"};
+{"key"="2282";"subkey"="456";"value"="17646"};
+{"key"="2283";"subkey"="456";"value"="17654"};
+{"key"="2284";"subkey"="456";"value"="17663"};
+{"key"="2285";"subkey"="457";"value"="17672"};
+{"key"="2286";"subkey"="457";"value"="17681"};
+{"key"="2287";"subkey"="457";"value"="17689"};
+{"key"="2288";"subkey"="457";"value"="17698"};
+{"key"="2289";"subkey"="457";"value"="17707"};
+{"key"="2290";"subkey"="458";"value"="17716"};
+{"key"="2291";"subkey"="458";"value"="17724"};
+{"key"="2292";"subkey"="458";"value"="17733"};
+{"key"="2293";"subkey"="458";"value"="17742"};
+{"key"="2294";"subkey"="458";"value"="17751"};
+{"key"="2295";"subkey"="459";"value"="17759"};
+{"key"="2296";"subkey"="459";"value"="17768"};
+{"key"="2297";"subkey"="459";"value"="17777"};
+{"key"="2298";"subkey"="459";"value"="17786"};
+{"key"="2299";"subkey"="459";"value"="17794"};
+{"key"="2300";"subkey"="460";"value"="17803"};
+{"key"="2301";"subkey"="460";"value"="17812"};
+{"key"="2302";"subkey"="460";"value"="17821"};
+{"key"="2303";"subkey"="460";"value"="17829"};
+{"key"="2304";"subkey"="460";"value"="17838"};
+{"key"="2305";"subkey"="461";"value"="17847"};
+{"key"="2306";"subkey"="461";"value"="17855"};
+{"key"="2307";"subkey"="461";"value"="17864"};
+{"key"="2308";"subkey"="461";"value"="17873"};
+{"key"="2309";"subkey"="461";"value"="17882"};
+{"key"="2310";"subkey"="462";"value"="17890"};
+{"key"="2311";"subkey"="462";"value"="17899"};
+{"key"="2312";"subkey"="462";"value"="17908"};
+{"key"="2313";"subkey"="462";"value"="17917"};
+{"key"="2314";"subkey"="462";"value"="17925"};
+{"key"="2315";"subkey"="463";"value"="17934"};
+{"key"="2316";"subkey"="463";"value"="17943"};
+{"key"="2317";"subkey"="463";"value"="17952"};
+{"key"="2318";"subkey"="463";"value"="17960"};
+{"key"="2319";"subkey"="463";"value"="17969"};
+{"key"="2320";"subkey"="464";"value"="17978"};
+{"key"="2321";"subkey"="464";"value"="17987"};
+{"key"="2322";"subkey"="464";"value"="17995"};
+{"key"="2323";"subkey"="464";"value"="18004"};
+{"key"="2324";"subkey"="464";"value"="18013"};
+{"key"="2325";"subkey"="465";"value"="18022"};
+{"key"="2326";"subkey"="465";"value"="18030"};
+{"key"="2327";"subkey"="465";"value"="18039"};
+{"key"="2328";"subkey"="465";"value"="18048"};
+{"key"="2329";"subkey"="465";"value"="18057"};
+{"key"="2330";"subkey"="466";"value"="18065"};
+{"key"="2331";"subkey"="466";"value"="18074"};
+{"key"="2332";"subkey"="466";"value"="18083"};
+{"key"="2333";"subkey"="466";"value"="18092"};
+{"key"="2334";"subkey"="466";"value"="18100"};
+{"key"="2335";"subkey"="467";"value"="18109"};
+{"key"="2336";"subkey"="467";"value"="18118"};
+{"key"="2337";"subkey"="467";"value"="18127"};
+{"key"="2338";"subkey"="467";"value"="18135"};
+{"key"="2339";"subkey"="467";"value"="18144"};
+{"key"="2340";"subkey"="468";"value"="18153"};
+{"key"="2341";"subkey"="468";"value"="18162"};
+{"key"="2342";"subkey"="468";"value"="18171"};
+{"key"="2343";"subkey"="468";"value"="18179"};
+{"key"="2344";"subkey"="468";"value"="18188"};
+{"key"="2345";"subkey"="469";"value"="18197"};
+{"key"="2346";"subkey"="469";"value"="18206"};
+{"key"="2347";"subkey"="469";"value"="18214"};
+{"key"="2348";"subkey"="469";"value"="18223"};
+{"key"="2349";"subkey"="469";"value"="18232"};
+{"key"="2350";"subkey"="470";"value"="18241"};
+{"key"="2351";"subkey"="470";"value"="18249"};
+{"key"="2352";"subkey"="470";"value"="18258"};
+{"key"="2353";"subkey"="470";"value"="18267"};
+{"key"="2354";"subkey"="470";"value"="18276"};
+{"key"="2355";"subkey"="471";"value"="18284"};
+{"key"="2356";"subkey"="471";"value"="18293"};
+{"key"="2357";"subkey"="471";"value"="18302"};
+{"key"="2358";"subkey"="471";"value"="18311"};
+{"key"="2359";"subkey"="471";"value"="18319"};
+{"key"="2360";"subkey"="472";"value"="18328"};
+{"key"="2361";"subkey"="472";"value"="18337"};
+{"key"="2362";"subkey"="472";"value"="18346"};
+{"key"="2363";"subkey"="472";"value"="18355"};
+{"key"="2364";"subkey"="472";"value"="18363"};
+{"key"="2365";"subkey"="473";"value"="18372"};
+{"key"="2366";"subkey"="473";"value"="18381"};
+{"key"="2367";"subkey"="473";"value"="18390"};
+{"key"="2368";"subkey"="473";"value"="18398"};
+{"key"="2369";"subkey"="473";"value"="18407"};
+{"key"="2370";"subkey"="474";"value"="18416"};
+{"key"="2371";"subkey"="474";"value"="18425"};
+{"key"="2372";"subkey"="474";"value"="18433"};
+{"key"="2373";"subkey"="474";"value"="18442"};
+{"key"="2374";"subkey"="474";"value"="18451"};
+{"key"="2375";"subkey"="475";"value"="18460"};
+{"key"="2376";"subkey"="475";"value"="18469"};
+{"key"="2377";"subkey"="475";"value"="18477"};
+{"key"="2378";"subkey"="475";"value"="18486"};
+{"key"="2379";"subkey"="475";"value"="18495"};
+{"key"="2380";"subkey"="476";"value"="18504"};
+{"key"="2381";"subkey"="476";"value"="18512"};
+{"key"="2382";"subkey"="476";"value"="18521"};
+{"key"="2383";"subkey"="476";"value"="18530"};
+{"key"="2384";"subkey"="476";"value"="18539"};
+{"key"="2385";"subkey"="477";"value"="18548"};
+{"key"="2386";"subkey"="477";"value"="18556"};
+{"key"="2387";"subkey"="477";"value"="18565"};
+{"key"="2388";"subkey"="477";"value"="18574"};
+{"key"="2389";"subkey"="477";"value"="18583"};
+{"key"="2390";"subkey"="478";"value"="18591"};
+{"key"="2391";"subkey"="478";"value"="18600"};
+{"key"="2392";"subkey"="478";"value"="18609"};
+{"key"="2393";"subkey"="478";"value"="18618"};
+{"key"="2394";"subkey"="478";"value"="18627"};
+{"key"="2395";"subkey"="479";"value"="18635"};
+{"key"="2396";"subkey"="479";"value"="18644"};
+{"key"="2397";"subkey"="479";"value"="18653"};
+{"key"="2398";"subkey"="479";"value"="18662"};
+{"key"="2399";"subkey"="479";"value"="18670"};
+{"key"="2400";"subkey"="480";"value"="18679"};
+{"key"="2401";"subkey"="480";"value"="18688"};
+{"key"="2402";"subkey"="480";"value"="18697"};
+{"key"="2403";"subkey"="480";"value"="18706"};
+{"key"="2404";"subkey"="480";"value"="18714"};
+{"key"="2405";"subkey"="481";"value"="18723"};
+{"key"="2406";"subkey"="481";"value"="18732"};
+{"key"="2407";"subkey"="481";"value"="18741"};
+{"key"="2408";"subkey"="481";"value"="18750"};
+{"key"="2409";"subkey"="481";"value"="18758"};
+{"key"="2410";"subkey"="482";"value"="18767"};
+{"key"="2411";"subkey"="482";"value"="18776"};
+{"key"="2412";"subkey"="482";"value"="18785"};
+{"key"="2413";"subkey"="482";"value"="18793"};
+{"key"="2414";"subkey"="482";"value"="18802"};
+{"key"="2415";"subkey"="483";"value"="18811"};
+{"key"="2416";"subkey"="483";"value"="18820"};
+{"key"="2417";"subkey"="483";"value"="18829"};
+{"key"="2418";"subkey"="483";"value"="18837"};
+{"key"="2419";"subkey"="483";"value"="18846"};
+{"key"="2420";"subkey"="484";"value"="18855"};
+{"key"="2421";"subkey"="484";"value"="18864"};
+{"key"="2422";"subkey"="484";"value"="18873"};
+{"key"="2423";"subkey"="484";"value"="18881"};
+{"key"="2424";"subkey"="484";"value"="18890"};
+{"key"="2425";"subkey"="485";"value"="18899"};
+{"key"="2426";"subkey"="485";"value"="18908"};
+{"key"="2427";"subkey"="485";"value"="18917"};
+{"key"="2428";"subkey"="485";"value"="18925"};
+{"key"="2429";"subkey"="485";"value"="18934"};
+{"key"="2430";"subkey"="486";"value"="18943"};
+{"key"="2431";"subkey"="486";"value"="18952"};
+{"key"="2432";"subkey"="486";"value"="18961"};
+{"key"="2433";"subkey"="486";"value"="18969"};
+{"key"="2434";"subkey"="486";"value"="18978"};
+{"key"="2435";"subkey"="487";"value"="18987"};
+{"key"="2436";"subkey"="487";"value"="18996"};
+{"key"="2437";"subkey"="487";"value"="19005"};
+{"key"="2438";"subkey"="487";"value"="19013"};
+{"key"="2439";"subkey"="487";"value"="19022"};
+{"key"="2440";"subkey"="488";"value"="19031"};
+{"key"="2441";"subkey"="488";"value"="19040"};
+{"key"="2442";"subkey"="488";"value"="19048"};
+{"key"="2443";"subkey"="488";"value"="19057"};
+{"key"="2444";"subkey"="488";"value"="19066"};
+{"key"="2445";"subkey"="489";"value"="19075"};
+{"key"="2446";"subkey"="489";"value"="19084"};
+{"key"="2447";"subkey"="489";"value"="19093"};
+{"key"="2448";"subkey"="489";"value"="19101"};
+{"key"="2449";"subkey"="489";"value"="19110"};
+{"key"="2450";"subkey"="490";"value"="19119"};
+{"key"="2451";"subkey"="490";"value"="19128"};
+{"key"="2452";"subkey"="490";"value"="19137"};
+{"key"="2453";"subkey"="490";"value"="19145"};
+{"key"="2454";"subkey"="490";"value"="19154"};
+{"key"="2455";"subkey"="491";"value"="19163"};
+{"key"="2456";"subkey"="491";"value"="19172"};
+{"key"="2457";"subkey"="491";"value"="19181"};
+{"key"="2458";"subkey"="491";"value"="19189"};
+{"key"="2459";"subkey"="491";"value"="19198"};
+{"key"="2460";"subkey"="492";"value"="19207"};
+{"key"="2461";"subkey"="492";"value"="19216"};
+{"key"="2462";"subkey"="492";"value"="19225"};
+{"key"="2463";"subkey"="492";"value"="19233"};
+{"key"="2464";"subkey"="492";"value"="19242"};
+{"key"="2465";"subkey"="493";"value"="19251"};
+{"key"="2466";"subkey"="493";"value"="19260"};
+{"key"="2467";"subkey"="493";"value"="19269"};
+{"key"="2468";"subkey"="493";"value"="19277"};
+{"key"="2469";"subkey"="493";"value"="19286"};
+{"key"="2470";"subkey"="494";"value"="19295"};
+{"key"="2471";"subkey"="494";"value"="19304"};
+{"key"="2472";"subkey"="494";"value"="19313"};
+{"key"="2473";"subkey"="494";"value"="19322"};
+{"key"="2474";"subkey"="494";"value"="19330"};
+{"key"="2475";"subkey"="495";"value"="19339"};
+{"key"="2476";"subkey"="495";"value"="19348"};
+{"key"="2477";"subkey"="495";"value"="19357"};
+{"key"="2478";"subkey"="495";"value"="19366"};
+{"key"="2479";"subkey"="495";"value"="19374"};
+{"key"="2480";"subkey"="496";"value"="19383"};
+{"key"="2481";"subkey"="496";"value"="19392"};
+{"key"="2482";"subkey"="496";"value"="19401"};
+{"key"="2483";"subkey"="496";"value"="19410"};
+{"key"="2484";"subkey"="496";"value"="19418"};
+{"key"="2485";"subkey"="497";"value"="19427"};
+{"key"="2486";"subkey"="497";"value"="19436"};
+{"key"="2487";"subkey"="497";"value"="19445"};
+{"key"="2488";"subkey"="497";"value"="19454"};
+{"key"="2489";"subkey"="497";"value"="19463"};
+{"key"="2490";"subkey"="498";"value"="19471"};
+{"key"="2491";"subkey"="498";"value"="19480"};
+{"key"="2492";"subkey"="498";"value"="19489"};
+{"key"="2493";"subkey"="498";"value"="19498"};
+{"key"="2494";"subkey"="498";"value"="19507"};
+{"key"="2495";"subkey"="499";"value"="19515"};
+{"key"="2496";"subkey"="499";"value"="19524"};
+{"key"="2497";"subkey"="499";"value"="19533"};
+{"key"="2498";"subkey"="499";"value"="19542"};
+{"key"="2499";"subkey"="499";"value"="19551"};
+{"key"="2500";"subkey"="500";"value"="19560"};
+{"key"="2501";"subkey"="500";"value"="19568"};
+{"key"="2502";"subkey"="500";"value"="19577"};
+{"key"="2503";"subkey"="500";"value"="19586"};
+{"key"="2504";"subkey"="500";"value"="19595"};
+{"key"="2505";"subkey"="501";"value"="19604"};
+{"key"="2506";"subkey"="501";"value"="19613"};
+{"key"="2507";"subkey"="501";"value"="19621"};
+{"key"="2508";"subkey"="501";"value"="19630"};
+{"key"="2509";"subkey"="501";"value"="19639"};
+{"key"="2510";"subkey"="502";"value"="19648"};
+{"key"="2511";"subkey"="502";"value"="19657"};
+{"key"="2512";"subkey"="502";"value"="19666"};
+{"key"="2513";"subkey"="502";"value"="19674"};
+{"key"="2514";"subkey"="502";"value"="19683"};
+{"key"="2515";"subkey"="503";"value"="19692"};
+{"key"="2516";"subkey"="503";"value"="19701"};
+{"key"="2517";"subkey"="503";"value"="19710"};
+{"key"="2518";"subkey"="503";"value"="19719"};
+{"key"="2519";"subkey"="503";"value"="19727"};
+{"key"="2520";"subkey"="504";"value"="19736"};
+{"key"="2521";"subkey"="504";"value"="19745"};
+{"key"="2522";"subkey"="504";"value"="19754"};
+{"key"="2523";"subkey"="504";"value"="19763"};
+{"key"="2524";"subkey"="504";"value"="19772"};
+{"key"="2525";"subkey"="505";"value"="19780"};
+{"key"="2526";"subkey"="505";"value"="19789"};
+{"key"="2527";"subkey"="505";"value"="19798"};
+{"key"="2528";"subkey"="505";"value"="19807"};
+{"key"="2529";"subkey"="505";"value"="19816"};
+{"key"="2530";"subkey"="506";"value"="19825"};
+{"key"="2531";"subkey"="506";"value"="19833"};
+{"key"="2532";"subkey"="506";"value"="19842"};
+{"key"="2533";"subkey"="506";"value"="19851"};
+{"key"="2534";"subkey"="506";"value"="19860"};
+{"key"="2535";"subkey"="507";"value"="19869"};
+{"key"="2536";"subkey"="507";"value"="19878"};
+{"key"="2537";"subkey"="507";"value"="19886"};
+{"key"="2538";"subkey"="507";"value"="19895"};
+{"key"="2539";"subkey"="507";"value"="19904"};
+{"key"="2540";"subkey"="508";"value"="19913"};
+{"key"="2541";"subkey"="508";"value"="19922"};
+{"key"="2542";"subkey"="508";"value"="19931"};
+{"key"="2543";"subkey"="508";"value"="19939"};
+{"key"="2544";"subkey"="508";"value"="19948"};
+{"key"="2545";"subkey"="509";"value"="19957"};
+{"key"="2546";"subkey"="509";"value"="19966"};
+{"key"="2547";"subkey"="509";"value"="19975"};
+{"key"="2548";"subkey"="509";"value"="19984"};
+{"key"="2549";"subkey"="509";"value"="19992"};
+{"key"="2550";"subkey"="510";"value"="20001"};
+{"key"="2551";"subkey"="510";"value"="20010"};
+{"key"="2552";"subkey"="510";"value"="20019"};
+{"key"="2553";"subkey"="510";"value"="20028"};
+{"key"="2554";"subkey"="510";"value"="20037"};
+{"key"="2555";"subkey"="511";"value"="20046"};
+{"key"="2556";"subkey"="511";"value"="20054"};
+{"key"="2557";"subkey"="511";"value"="20063"};
+{"key"="2558";"subkey"="511";"value"="20072"};
+{"key"="2559";"subkey"="511";"value"="20081"};
+{"key"="2560";"subkey"="512";"value"="20090"};
+{"key"="2561";"subkey"="512";"value"="20099"};
+{"key"="2562";"subkey"="512";"value"="20107"};
+{"key"="2563";"subkey"="512";"value"="20116"};
+{"key"="2564";"subkey"="512";"value"="20125"};
+{"key"="2565";"subkey"="513";"value"="20134"};
+{"key"="2566";"subkey"="513";"value"="20143"};
+{"key"="2567";"subkey"="513";"value"="20152"};
+{"key"="2568";"subkey"="513";"value"="20161"};
+{"key"="2569";"subkey"="513";"value"="20169"};
+{"key"="2570";"subkey"="514";"value"="20178"};
+{"key"="2571";"subkey"="514";"value"="20187"};
+{"key"="2572";"subkey"="514";"value"="20196"};
+{"key"="2573";"subkey"="514";"value"="20205"};
+{"key"="2574";"subkey"="514";"value"="20214"};
+{"key"="2575";"subkey"="515";"value"="20223"};
+{"key"="2576";"subkey"="515";"value"="20231"};
+{"key"="2577";"subkey"="515";"value"="20240"};
+{"key"="2578";"subkey"="515";"value"="20249"};
+{"key"="2579";"subkey"="515";"value"="20258"};
+{"key"="2580";"subkey"="516";"value"="20267"};
+{"key"="2581";"subkey"="516";"value"="20276"};
+{"key"="2582";"subkey"="516";"value"="20285"};
+{"key"="2583";"subkey"="516";"value"="20293"};
+{"key"="2584";"subkey"="516";"value"="20302"};
+{"key"="2585";"subkey"="517";"value"="20311"};
+{"key"="2586";"subkey"="517";"value"="20320"};
+{"key"="2587";"subkey"="517";"value"="20329"};
+{"key"="2588";"subkey"="517";"value"="20338"};
+{"key"="2589";"subkey"="517";"value"="20347"};
+{"key"="2590";"subkey"="518";"value"="20355"};
+{"key"="2591";"subkey"="518";"value"="20364"};
+{"key"="2592";"subkey"="518";"value"="20373"};
+{"key"="2593";"subkey"="518";"value"="20382"};
+{"key"="2594";"subkey"="518";"value"="20391"};
+{"key"="2595";"subkey"="519";"value"="20400"};
+{"key"="2596";"subkey"="519";"value"="20409"};
+{"key"="2597";"subkey"="519";"value"="20417"};
+{"key"="2598";"subkey"="519";"value"="20426"};
+{"key"="2599";"subkey"="519";"value"="20435"};
+{"key"="2600";"subkey"="520";"value"="20444"};
+{"key"="2601";"subkey"="520";"value"="20453"};
+{"key"="2602";"subkey"="520";"value"="20462"};
+{"key"="2603";"subkey"="520";"value"="20471"};
+{"key"="2604";"subkey"="520";"value"="20479"};
+{"key"="2605";"subkey"="521";"value"="20488"};
+{"key"="2606";"subkey"="521";"value"="20497"};
+{"key"="2607";"subkey"="521";"value"="20506"};
+{"key"="2608";"subkey"="521";"value"="20515"};
+{"key"="2609";"subkey"="521";"value"="20524"};
+{"key"="2610";"subkey"="522";"value"="20533"};
+{"key"="2611";"subkey"="522";"value"="20542"};
+{"key"="2612";"subkey"="522";"value"="20550"};
+{"key"="2613";"subkey"="522";"value"="20559"};
+{"key"="2614";"subkey"="522";"value"="20568"};
+{"key"="2615";"subkey"="523";"value"="20577"};
+{"key"="2616";"subkey"="523";"value"="20586"};
+{"key"="2617";"subkey"="523";"value"="20595"};
+{"key"="2618";"subkey"="523";"value"="20604"};
+{"key"="2619";"subkey"="523";"value"="20612"};
+{"key"="2620";"subkey"="524";"value"="20621"};
+{"key"="2621";"subkey"="524";"value"="20630"};
+{"key"="2622";"subkey"="524";"value"="20639"};
+{"key"="2623";"subkey"="524";"value"="20648"};
+{"key"="2624";"subkey"="524";"value"="20657"};
+{"key"="2625";"subkey"="525";"value"="20666"};
+{"key"="2626";"subkey"="525";"value"="20675"};
+{"key"="2627";"subkey"="525";"value"="20683"};
+{"key"="2628";"subkey"="525";"value"="20692"};
+{"key"="2629";"subkey"="525";"value"="20701"};
+{"key"="2630";"subkey"="526";"value"="20710"};
+{"key"="2631";"subkey"="526";"value"="20719"};
+{"key"="2632";"subkey"="526";"value"="20728"};
+{"key"="2633";"subkey"="526";"value"="20737"};
+{"key"="2634";"subkey"="526";"value"="20746"};
+{"key"="2635";"subkey"="527";"value"="20754"};
+{"key"="2636";"subkey"="527";"value"="20763"};
+{"key"="2637";"subkey"="527";"value"="20772"};
+{"key"="2638";"subkey"="527";"value"="20781"};
+{"key"="2639";"subkey"="527";"value"="20790"};
+{"key"="2640";"subkey"="528";"value"="20799"};
+{"key"="2641";"subkey"="528";"value"="20808"};
+{"key"="2642";"subkey"="528";"value"="20817"};
+{"key"="2643";"subkey"="528";"value"="20825"};
+{"key"="2644";"subkey"="528";"value"="20834"};
+{"key"="2645";"subkey"="529";"value"="20843"};
+{"key"="2646";"subkey"="529";"value"="20852"};
+{"key"="2647";"subkey"="529";"value"="20861"};
+{"key"="2648";"subkey"="529";"value"="20870"};
+{"key"="2649";"subkey"="529";"value"="20879"};
+{"key"="2650";"subkey"="530";"value"="20888"};
+{"key"="2651";"subkey"="530";"value"="20897"};
+{"key"="2652";"subkey"="530";"value"="20905"};
+{"key"="2653";"subkey"="530";"value"="20914"};
+{"key"="2654";"subkey"="530";"value"="20923"};
+{"key"="2655";"subkey"="531";"value"="20932"};
+{"key"="2656";"subkey"="531";"value"="20941"};
+{"key"="2657";"subkey"="531";"value"="20950"};
+{"key"="2658";"subkey"="531";"value"="20959"};
+{"key"="2659";"subkey"="531";"value"="20968"};
+{"key"="2660";"subkey"="532";"value"="20976"};
+{"key"="2661";"subkey"="532";"value"="20985"};
+{"key"="2662";"subkey"="532";"value"="20994"};
+{"key"="2663";"subkey"="532";"value"="21003"};
+{"key"="2664";"subkey"="532";"value"="21012"};
+{"key"="2665";"subkey"="533";"value"="21021"};
+{"key"="2666";"subkey"="533";"value"="21030"};
+{"key"="2667";"subkey"="533";"value"="21039"};
+{"key"="2668";"subkey"="533";"value"="21048"};
+{"key"="2669";"subkey"="533";"value"="21056"};
+{"key"="2670";"subkey"="534";"value"="21065"};
+{"key"="2671";"subkey"="534";"value"="21074"};
+{"key"="2672";"subkey"="534";"value"="21083"};
+{"key"="2673";"subkey"="534";"value"="21092"};
+{"key"="2674";"subkey"="534";"value"="21101"};
+{"key"="2675";"subkey"="535";"value"="21110"};
+{"key"="2676";"subkey"="535";"value"="21119"};
+{"key"="2677";"subkey"="535";"value"="21128"};
+{"key"="2678";"subkey"="535";"value"="21136"};
+{"key"="2679";"subkey"="535";"value"="21145"};
+{"key"="2680";"subkey"="536";"value"="21154"};
+{"key"="2681";"subkey"="536";"value"="21163"};
+{"key"="2682";"subkey"="536";"value"="21172"};
+{"key"="2683";"subkey"="536";"value"="21181"};
+{"key"="2684";"subkey"="536";"value"="21190"};
+{"key"="2685";"subkey"="537";"value"="21199"};
+{"key"="2686";"subkey"="537";"value"="21208"};
+{"key"="2687";"subkey"="537";"value"="21217"};
+{"key"="2688";"subkey"="537";"value"="21225"};
+{"key"="2689";"subkey"="537";"value"="21234"};
+{"key"="2690";"subkey"="538";"value"="21243"};
+{"key"="2691";"subkey"="538";"value"="21252"};
+{"key"="2692";"subkey"="538";"value"="21261"};
+{"key"="2693";"subkey"="538";"value"="21270"};
+{"key"="2694";"subkey"="538";"value"="21279"};
+{"key"="2695";"subkey"="539";"value"="21288"};
+{"key"="2696";"subkey"="539";"value"="21297"};
+{"key"="2697";"subkey"="539";"value"="21306"};
+{"key"="2698";"subkey"="539";"value"="21314"};
+{"key"="2699";"subkey"="539";"value"="21323"};
+{"key"="2700";"subkey"="540";"value"="21332"};
+{"key"="2701";"subkey"="540";"value"="21341"};
+{"key"="2702";"subkey"="540";"value"="21350"};
+{"key"="2703";"subkey"="540";"value"="21359"};
+{"key"="2704";"subkey"="540";"value"="21368"};
+{"key"="2705";"subkey"="541";"value"="21377"};
+{"key"="2706";"subkey"="541";"value"="21386"};
+{"key"="2707";"subkey"="541";"value"="21395"};
+{"key"="2708";"subkey"="541";"value"="21403"};
+{"key"="2709";"subkey"="541";"value"="21412"};
+{"key"="2710";"subkey"="542";"value"="21421"};
+{"key"="2711";"subkey"="542";"value"="21430"};
+{"key"="2712";"subkey"="542";"value"="21439"};
+{"key"="2713";"subkey"="542";"value"="21448"};
+{"key"="2714";"subkey"="542";"value"="21457"};
+{"key"="2715";"subkey"="543";"value"="21466"};
+{"key"="2716";"subkey"="543";"value"="21475"};
+{"key"="2717";"subkey"="543";"value"="21484"};
+{"key"="2718";"subkey"="543";"value"="21492"};
+{"key"="2719";"subkey"="543";"value"="21501"};
+{"key"="2720";"subkey"="544";"value"="21510"};
+{"key"="2721";"subkey"="544";"value"="21519"};
+{"key"="2722";"subkey"="544";"value"="21528"};
+{"key"="2723";"subkey"="544";"value"="21537"};
+{"key"="2724";"subkey"="544";"value"="21546"};
+{"key"="2725";"subkey"="545";"value"="21555"};
+{"key"="2726";"subkey"="545";"value"="21564"};
+{"key"="2727";"subkey"="545";"value"="21573"};
+{"key"="2728";"subkey"="545";"value"="21582"};
+{"key"="2729";"subkey"="545";"value"="21591"};
+{"key"="2730";"subkey"="546";"value"="21599"};
+{"key"="2731";"subkey"="546";"value"="21608"};
+{"key"="2732";"subkey"="546";"value"="21617"};
+{"key"="2733";"subkey"="546";"value"="21626"};
+{"key"="2734";"subkey"="546";"value"="21635"};
+{"key"="2735";"subkey"="547";"value"="21644"};
+{"key"="2736";"subkey"="547";"value"="21653"};
+{"key"="2737";"subkey"="547";"value"="21662"};
+{"key"="2738";"subkey"="547";"value"="21671"};
+{"key"="2739";"subkey"="547";"value"="21680"};
+{"key"="2740";"subkey"="548";"value"="21689"};
+{"key"="2741";"subkey"="548";"value"="21697"};
+{"key"="2742";"subkey"="548";"value"="21706"};
+{"key"="2743";"subkey"="548";"value"="21715"};
+{"key"="2744";"subkey"="548";"value"="21724"};
+{"key"="2745";"subkey"="549";"value"="21733"};
+{"key"="2746";"subkey"="549";"value"="21742"};
+{"key"="2747";"subkey"="549";"value"="21751"};
+{"key"="2748";"subkey"="549";"value"="21760"};
+{"key"="2749";"subkey"="549";"value"="21769"};
+{"key"="2750";"subkey"="550";"value"="21778"};
+{"key"="2751";"subkey"="550";"value"="21787"};
+{"key"="2752";"subkey"="550";"value"="21796"};
+{"key"="2753";"subkey"="550";"value"="21804"};
+{"key"="2754";"subkey"="550";"value"="21813"};
+{"key"="2755";"subkey"="551";"value"="21822"};
+{"key"="2756";"subkey"="551";"value"="21831"};
+{"key"="2757";"subkey"="551";"value"="21840"};
+{"key"="2758";"subkey"="551";"value"="21849"};
+{"key"="2759";"subkey"="551";"value"="21858"};
+{"key"="2760";"subkey"="552";"value"="21867"};
+{"key"="2761";"subkey"="552";"value"="21876"};
+{"key"="2762";"subkey"="552";"value"="21885"};
+{"key"="2763";"subkey"="552";"value"="21894"};
+{"key"="2764";"subkey"="552";"value"="21903"};
+{"key"="2765";"subkey"="553";"value"="21912"};
+{"key"="2766";"subkey"="553";"value"="21920"};
+{"key"="2767";"subkey"="553";"value"="21929"};
+{"key"="2768";"subkey"="553";"value"="21938"};
+{"key"="2769";"subkey"="553";"value"="21947"};
+{"key"="2770";"subkey"="554";"value"="21956"};
+{"key"="2771";"subkey"="554";"value"="21965"};
+{"key"="2772";"subkey"="554";"value"="21974"};
+{"key"="2773";"subkey"="554";"value"="21983"};
+{"key"="2774";"subkey"="554";"value"="21992"};
+{"key"="2775";"subkey"="555";"value"="22001"};
+{"key"="2776";"subkey"="555";"value"="22010"};
+{"key"="2777";"subkey"="555";"value"="22019"};
+{"key"="2778";"subkey"="555";"value"="22028"};
+{"key"="2779";"subkey"="555";"value"="22037"};
+{"key"="2780";"subkey"="556";"value"="22045"};
+{"key"="2781";"subkey"="556";"value"="22054"};
+{"key"="2782";"subkey"="556";"value"="22063"};
+{"key"="2783";"subkey"="556";"value"="22072"};
+{"key"="2784";"subkey"="556";"value"="22081"};
+{"key"="2785";"subkey"="557";"value"="22090"};
+{"key"="2786";"subkey"="557";"value"="22099"};
+{"key"="2787";"subkey"="557";"value"="22108"};
+{"key"="2788";"subkey"="557";"value"="22117"};
+{"key"="2789";"subkey"="557";"value"="22126"};
+{"key"="2790";"subkey"="558";"value"="22135"};
+{"key"="2791";"subkey"="558";"value"="22144"};
+{"key"="2792";"subkey"="558";"value"="22153"};
+{"key"="2793";"subkey"="558";"value"="22162"};
+{"key"="2794";"subkey"="558";"value"="22171"};
+{"key"="2795";"subkey"="559";"value"="22179"};
+{"key"="2796";"subkey"="559";"value"="22188"};
+{"key"="2797";"subkey"="559";"value"="22197"};
+{"key"="2798";"subkey"="559";"value"="22206"};
+{"key"="2799";"subkey"="559";"value"="22215"};
+{"key"="2800";"subkey"="560";"value"="22224"};
+{"key"="2801";"subkey"="560";"value"="22233"};
+{"key"="2802";"subkey"="560";"value"="22242"};
+{"key"="2803";"subkey"="560";"value"="22251"};
+{"key"="2804";"subkey"="560";"value"="22260"};
+{"key"="2805";"subkey"="561";"value"="22269"};
+{"key"="2806";"subkey"="561";"value"="22278"};
+{"key"="2807";"subkey"="561";"value"="22287"};
+{"key"="2808";"subkey"="561";"value"="22296"};
+{"key"="2809";"subkey"="561";"value"="22305"};
+{"key"="2810";"subkey"="562";"value"="22314"};
+{"key"="2811";"subkey"="562";"value"="22322"};
+{"key"="2812";"subkey"="562";"value"="22331"};
+{"key"="2813";"subkey"="562";"value"="22340"};
+{"key"="2814";"subkey"="562";"value"="22349"};
+{"key"="2815";"subkey"="563";"value"="22358"};
+{"key"="2816";"subkey"="563";"value"="22367"};
+{"key"="2817";"subkey"="563";"value"="22376"};
+{"key"="2818";"subkey"="563";"value"="22385"};
+{"key"="2819";"subkey"="563";"value"="22394"};
+{"key"="2820";"subkey"="564";"value"="22403"};
+{"key"="2821";"subkey"="564";"value"="22412"};
+{"key"="2822";"subkey"="564";"value"="22421"};
+{"key"="2823";"subkey"="564";"value"="22430"};
+{"key"="2824";"subkey"="564";"value"="22439"};
+{"key"="2825";"subkey"="565";"value"="22448"};
+{"key"="2826";"subkey"="565";"value"="22457"};
+{"key"="2827";"subkey"="565";"value"="22466"};
+{"key"="2828";"subkey"="565";"value"="22475"};
+{"key"="2829";"subkey"="565";"value"="22483"};
+{"key"="2830";"subkey"="566";"value"="22492"};
+{"key"="2831";"subkey"="566";"value"="22501"};
+{"key"="2832";"subkey"="566";"value"="22510"};
+{"key"="2833";"subkey"="566";"value"="22519"};
+{"key"="2834";"subkey"="566";"value"="22528"};
+{"key"="2835";"subkey"="567";"value"="22537"};
+{"key"="2836";"subkey"="567";"value"="22546"};
+{"key"="2837";"subkey"="567";"value"="22555"};
+{"key"="2838";"subkey"="567";"value"="22564"};
+{"key"="2839";"subkey"="567";"value"="22573"};
+{"key"="2840";"subkey"="568";"value"="22582"};
+{"key"="2841";"subkey"="568";"value"="22591"};
+{"key"="2842";"subkey"="568";"value"="22600"};
+{"key"="2843";"subkey"="568";"value"="22609"};
+{"key"="2844";"subkey"="568";"value"="22618"};
+{"key"="2845";"subkey"="569";"value"="22627"};
+{"key"="2846";"subkey"="569";"value"="22636"};
+{"key"="2847";"subkey"="569";"value"="22645"};
+{"key"="2848";"subkey"="569";"value"="22654"};
+{"key"="2849";"subkey"="569";"value"="22663"};
+{"key"="2850";"subkey"="570";"value"="22671"};
+{"key"="2851";"subkey"="570";"value"="22680"};
+{"key"="2852";"subkey"="570";"value"="22689"};
+{"key"="2853";"subkey"="570";"value"="22698"};
+{"key"="2854";"subkey"="570";"value"="22707"};
+{"key"="2855";"subkey"="571";"value"="22716"};
+{"key"="2856";"subkey"="571";"value"="22725"};
+{"key"="2857";"subkey"="571";"value"="22734"};
+{"key"="2858";"subkey"="571";"value"="22743"};
+{"key"="2859";"subkey"="571";"value"="22752"};
+{"key"="2860";"subkey"="572";"value"="22761"};
+{"key"="2861";"subkey"="572";"value"="22770"};
+{"key"="2862";"subkey"="572";"value"="22779"};
+{"key"="2863";"subkey"="572";"value"="22788"};
+{"key"="2864";"subkey"="572";"value"="22797"};
+{"key"="2865";"subkey"="573";"value"="22806"};
+{"key"="2866";"subkey"="573";"value"="22815"};
+{"key"="2867";"subkey"="573";"value"="22824"};
+{"key"="2868";"subkey"="573";"value"="22833"};
+{"key"="2869";"subkey"="573";"value"="22842"};
+{"key"="2870";"subkey"="574";"value"="22851"};
+{"key"="2871";"subkey"="574";"value"="22860"};
+{"key"="2872";"subkey"="574";"value"="22869"};
+{"key"="2873";"subkey"="574";"value"="22878"};
+{"key"="2874";"subkey"="574";"value"="22886"};
+{"key"="2875";"subkey"="575";"value"="22895"};
+{"key"="2876";"subkey"="575";"value"="22904"};
+{"key"="2877";"subkey"="575";"value"="22913"};
+{"key"="2878";"subkey"="575";"value"="22922"};
+{"key"="2879";"subkey"="575";"value"="22931"};
+{"key"="2880";"subkey"="576";"value"="22940"};
+{"key"="2881";"subkey"="576";"value"="22949"};
+{"key"="2882";"subkey"="576";"value"="22958"};
+{"key"="2883";"subkey"="576";"value"="22967"};
+{"key"="2884";"subkey"="576";"value"="22976"};
+{"key"="2885";"subkey"="577";"value"="22985"};
+{"key"="2886";"subkey"="577";"value"="22994"};
+{"key"="2887";"subkey"="577";"value"="23003"};
+{"key"="2888";"subkey"="577";"value"="23012"};
+{"key"="2889";"subkey"="577";"value"="23021"};
+{"key"="2890";"subkey"="578";"value"="23030"};
+{"key"="2891";"subkey"="578";"value"="23039"};
+{"key"="2892";"subkey"="578";"value"="23048"};
+{"key"="2893";"subkey"="578";"value"="23057"};
+{"key"="2894";"subkey"="578";"value"="23066"};
+{"key"="2895";"subkey"="579";"value"="23075"};
+{"key"="2896";"subkey"="579";"value"="23084"};
+{"key"="2897";"subkey"="579";"value"="23093"};
+{"key"="2898";"subkey"="579";"value"="23102"};
+{"key"="2899";"subkey"="579";"value"="23111"};
+{"key"="2900";"subkey"="580";"value"="23120"};
+{"key"="2901";"subkey"="580";"value"="23129"};
+{"key"="2902";"subkey"="580";"value"="23138"};
+{"key"="2903";"subkey"="580";"value"="23147"};
+{"key"="2904";"subkey"="580";"value"="23156"};
+{"key"="2905";"subkey"="581";"value"="23165"};
+{"key"="2906";"subkey"="581";"value"="23173"};
+{"key"="2907";"subkey"="581";"value"="23182"};
+{"key"="2908";"subkey"="581";"value"="23191"};
+{"key"="2909";"subkey"="581";"value"="23200"};
+{"key"="2910";"subkey"="582";"value"="23209"};
+{"key"="2911";"subkey"="582";"value"="23218"};
+{"key"="2912";"subkey"="582";"value"="23227"};
+{"key"="2913";"subkey"="582";"value"="23236"};
+{"key"="2914";"subkey"="582";"value"="23245"};
+{"key"="2915";"subkey"="583";"value"="23254"};
+{"key"="2916";"subkey"="583";"value"="23263"};
+{"key"="2917";"subkey"="583";"value"="23272"};
+{"key"="2918";"subkey"="583";"value"="23281"};
+{"key"="2919";"subkey"="583";"value"="23290"};
+{"key"="2920";"subkey"="584";"value"="23299"};
+{"key"="2921";"subkey"="584";"value"="23308"};
+{"key"="2922";"subkey"="584";"value"="23317"};
+{"key"="2923";"subkey"="584";"value"="23326"};
+{"key"="2924";"subkey"="584";"value"="23335"};
+{"key"="2925";"subkey"="585";"value"="23344"};
+{"key"="2926";"subkey"="585";"value"="23353"};
+{"key"="2927";"subkey"="585";"value"="23362"};
+{"key"="2928";"subkey"="585";"value"="23371"};
+{"key"="2929";"subkey"="585";"value"="23380"};
+{"key"="2930";"subkey"="586";"value"="23389"};
+{"key"="2931";"subkey"="586";"value"="23398"};
+{"key"="2932";"subkey"="586";"value"="23407"};
+{"key"="2933";"subkey"="586";"value"="23416"};
+{"key"="2934";"subkey"="586";"value"="23425"};
+{"key"="2935";"subkey"="587";"value"="23434"};
+{"key"="2936";"subkey"="587";"value"="23443"};
+{"key"="2937";"subkey"="587";"value"="23452"};
+{"key"="2938";"subkey"="587";"value"="23461"};
+{"key"="2939";"subkey"="587";"value"="23470"};
+{"key"="2940";"subkey"="588";"value"="23479"};
+{"key"="2941";"subkey"="588";"value"="23488"};
+{"key"="2942";"subkey"="588";"value"="23497"};
+{"key"="2943";"subkey"="588";"value"="23506"};
+{"key"="2944";"subkey"="588";"value"="23515"};
+{"key"="2945";"subkey"="589";"value"="23524"};
+{"key"="2946";"subkey"="589";"value"="23533"};
+{"key"="2947";"subkey"="589";"value"="23542"};
+{"key"="2948";"subkey"="589";"value"="23551"};
+{"key"="2949";"subkey"="589";"value"="23560"};
+{"key"="2950";"subkey"="590";"value"="23569"};
+{"key"="2951";"subkey"="590";"value"="23578"};
+{"key"="2952";"subkey"="590";"value"="23587"};
+{"key"="2953";"subkey"="590";"value"="23596"};
+{"key"="2954";"subkey"="590";"value"="23605"};
+{"key"="2955";"subkey"="591";"value"="23614"};
+{"key"="2956";"subkey"="591";"value"="23623"};
+{"key"="2957";"subkey"="591";"value"="23632"};
+{"key"="2958";"subkey"="591";"value"="23641"};
+{"key"="2959";"subkey"="591";"value"="23650"};
+{"key"="2960";"subkey"="592";"value"="23659"};
+{"key"="2961";"subkey"="592";"value"="23668"};
+{"key"="2962";"subkey"="592";"value"="23677"};
+{"key"="2963";"subkey"="592";"value"="23686"};
+{"key"="2964";"subkey"="592";"value"="23695"};
+{"key"="2965";"subkey"="593";"value"="23704"};
+{"key"="2966";"subkey"="593";"value"="23713"};
+{"key"="2967";"subkey"="593";"value"="23722"};
+{"key"="2968";"subkey"="593";"value"="23731"};
+{"key"="2969";"subkey"="593";"value"="23740"};
+{"key"="2970";"subkey"="594";"value"="23749"};
+{"key"="2971";"subkey"="594";"value"="23758"};
+{"key"="2972";"subkey"="594";"value"="23767"};
+{"key"="2973";"subkey"="594";"value"="23776"};
+{"key"="2974";"subkey"="594";"value"="23785"};
+{"key"="2975";"subkey"="595";"value"="23794"};
+{"key"="2976";"subkey"="595";"value"="23803"};
+{"key"="2977";"subkey"="595";"value"="23812"};
+{"key"="2978";"subkey"="595";"value"="23821"};
+{"key"="2979";"subkey"="595";"value"="23830"};
+{"key"="2980";"subkey"="596";"value"="23839"};
+{"key"="2981";"subkey"="596";"value"="23848"};
+{"key"="2982";"subkey"="596";"value"="23857"};
+{"key"="2983";"subkey"="596";"value"="23866"};
+{"key"="2984";"subkey"="596";"value"="23875"};
+{"key"="2985";"subkey"="597";"value"="23884"};
+{"key"="2986";"subkey"="597";"value"="23893"};
+{"key"="2987";"subkey"="597";"value"="23902"};
+{"key"="2988";"subkey"="597";"value"="23911"};
+{"key"="2989";"subkey"="597";"value"="23920"};
+{"key"="2990";"subkey"="598";"value"="23929"};
+{"key"="2991";"subkey"="598";"value"="23938"};
+{"key"="2992";"subkey"="598";"value"="23947"};
+{"key"="2993";"subkey"="598";"value"="23956"};
+{"key"="2994";"subkey"="598";"value"="23965"};
+{"key"="2995";"subkey"="599";"value"="23974"};
+{"key"="2996";"subkey"="599";"value"="23983"};
+{"key"="2997";"subkey"="599";"value"="23992"};
+{"key"="2998";"subkey"="599";"value"="24001"};
+{"key"="2999";"subkey"="599";"value"="24010"};
+{"key"="3000";"subkey"="600";"value"="24019"};
+{"key"="3001";"subkey"="600";"value"="24028"};
+{"key"="3002";"subkey"="600";"value"="24037"};
+{"key"="3003";"subkey"="600";"value"="24046"};
+{"key"="3004";"subkey"="600";"value"="24055"};
+{"key"="3005";"subkey"="601";"value"="24064"};
+{"key"="3006";"subkey"="601";"value"="24073"};
+{"key"="3007";"subkey"="601";"value"="24082"};
+{"key"="3008";"subkey"="601";"value"="24091"};
+{"key"="3009";"subkey"="601";"value"="24100"};
+{"key"="3010";"subkey"="602";"value"="24109"};
+{"key"="3011";"subkey"="602";"value"="24118"};
+{"key"="3012";"subkey"="602";"value"="24127"};
+{"key"="3013";"subkey"="602";"value"="24136"};
+{"key"="3014";"subkey"="602";"value"="24145"};
+{"key"="3015";"subkey"="603";"value"="24154"};
+{"key"="3016";"subkey"="603";"value"="24163"};
+{"key"="3017";"subkey"="603";"value"="24172"};
+{"key"="3018";"subkey"="603";"value"="24181"};
+{"key"="3019";"subkey"="603";"value"="24190"};
+{"key"="3020";"subkey"="604";"value"="24199"};
+{"key"="3021";"subkey"="604";"value"="24208"};
+{"key"="3022";"subkey"="604";"value"="24217"};
+{"key"="3023";"subkey"="604";"value"="24226"};
+{"key"="3024";"subkey"="604";"value"="24235"};
+{"key"="3025";"subkey"="605";"value"="24244"};
+{"key"="3026";"subkey"="605";"value"="24253"};
+{"key"="3027";"subkey"="605";"value"="24262"};
+{"key"="3028";"subkey"="605";"value"="24271"};
+{"key"="3029";"subkey"="605";"value"="24280"};
+{"key"="3030";"subkey"="606";"value"="24289"};
+{"key"="3031";"subkey"="606";"value"="24298"};
+{"key"="3032";"subkey"="606";"value"="24307"};
+{"key"="3033";"subkey"="606";"value"="24316"};
+{"key"="3034";"subkey"="606";"value"="24325"};
+{"key"="3035";"subkey"="607";"value"="24334"};
+{"key"="3036";"subkey"="607";"value"="24343"};
+{"key"="3037";"subkey"="607";"value"="24352"};
+{"key"="3038";"subkey"="607";"value"="24361"};
+{"key"="3039";"subkey"="607";"value"="24370"};
+{"key"="3040";"subkey"="608";"value"="24379"};
+{"key"="3041";"subkey"="608";"value"="24388"};
+{"key"="3042";"subkey"="608";"value"="24397"};
+{"key"="3043";"subkey"="608";"value"="24406"};
+{"key"="3044";"subkey"="608";"value"="24415"};
+{"key"="3045";"subkey"="609";"value"="24424"};
+{"key"="3046";"subkey"="609";"value"="24433"};
+{"key"="3047";"subkey"="609";"value"="24442"};
+{"key"="3048";"subkey"="609";"value"="24451"};
+{"key"="3049";"subkey"="609";"value"="24460"};
+{"key"="3050";"subkey"="610";"value"="24469"};
+{"key"="3051";"subkey"="610";"value"="24478"};
+{"key"="3052";"subkey"="610";"value"="24487"};
+{"key"="3053";"subkey"="610";"value"="24496"};
+{"key"="3054";"subkey"="610";"value"="24505"};
+{"key"="3055";"subkey"="611";"value"="24514"};
+{"key"="3056";"subkey"="611";"value"="24523"};
+{"key"="3057";"subkey"="611";"value"="24533"};
+{"key"="3058";"subkey"="611";"value"="24542"};
+{"key"="3059";"subkey"="611";"value"="24551"};
+{"key"="3060";"subkey"="612";"value"="24560"};
+{"key"="3061";"subkey"="612";"value"="24569"};
+{"key"="3062";"subkey"="612";"value"="24578"};
+{"key"="3063";"subkey"="612";"value"="24587"};
+{"key"="3064";"subkey"="612";"value"="24596"};
+{"key"="3065";"subkey"="613";"value"="24605"};
+{"key"="3066";"subkey"="613";"value"="24614"};
+{"key"="3067";"subkey"="613";"value"="24623"};
+{"key"="3068";"subkey"="613";"value"="24632"};
+{"key"="3069";"subkey"="613";"value"="24641"};
+{"key"="3070";"subkey"="614";"value"="24650"};
+{"key"="3071";"subkey"="614";"value"="24659"};
+{"key"="3072";"subkey"="614";"value"="24668"};
+{"key"="3073";"subkey"="614";"value"="24677"};
+{"key"="3074";"subkey"="614";"value"="24686"};
+{"key"="3075";"subkey"="615";"value"="24695"};
+{"key"="3076";"subkey"="615";"value"="24704"};
+{"key"="3077";"subkey"="615";"value"="24713"};
+{"key"="3078";"subkey"="615";"value"="24722"};
+{"key"="3079";"subkey"="615";"value"="24731"};
+{"key"="3080";"subkey"="616";"value"="24740"};
+{"key"="3081";"subkey"="616";"value"="24749"};
+{"key"="3082";"subkey"="616";"value"="24758"};
+{"key"="3083";"subkey"="616";"value"="24767"};
+{"key"="3084";"subkey"="616";"value"="24776"};
+{"key"="3085";"subkey"="617";"value"="24785"};
+{"key"="3086";"subkey"="617";"value"="24794"};
+{"key"="3087";"subkey"="617";"value"="24803"};
+{"key"="3088";"subkey"="617";"value"="24812"};
+{"key"="3089";"subkey"="617";"value"="24821"};
+{"key"="3090";"subkey"="618";"value"="24831"};
+{"key"="3091";"subkey"="618";"value"="24840"};
+{"key"="3092";"subkey"="618";"value"="24849"};
+{"key"="3093";"subkey"="618";"value"="24858"};
+{"key"="3094";"subkey"="618";"value"="24867"};
+{"key"="3095";"subkey"="619";"value"="24876"};
+{"key"="3096";"subkey"="619";"value"="24885"};
+{"key"="3097";"subkey"="619";"value"="24894"};
+{"key"="3098";"subkey"="619";"value"="24903"};
+{"key"="3099";"subkey"="619";"value"="24912"};
+{"key"="3100";"subkey"="620";"value"="24921"};
+{"key"="3101";"subkey"="620";"value"="24930"};
+{"key"="3102";"subkey"="620";"value"="24939"};
+{"key"="3103";"subkey"="620";"value"="24948"};
+{"key"="3104";"subkey"="620";"value"="24957"};
+{"key"="3105";"subkey"="621";"value"="24966"};
+{"key"="3106";"subkey"="621";"value"="24975"};
+{"key"="3107";"subkey"="621";"value"="24984"};
+{"key"="3108";"subkey"="621";"value"="24993"};
+{"key"="3109";"subkey"="621";"value"="25002"};
+{"key"="3110";"subkey"="622";"value"="25011"};
+{"key"="3111";"subkey"="622";"value"="25020"};
+{"key"="3112";"subkey"="622";"value"="25029"};
+{"key"="3113";"subkey"="622";"value"="25038"};
+{"key"="3114";"subkey"="622";"value"="25047"};
+{"key"="3115";"subkey"="623";"value"="25057"};
+{"key"="3116";"subkey"="623";"value"="25066"};
+{"key"="3117";"subkey"="623";"value"="25075"};
+{"key"="3118";"subkey"="623";"value"="25084"};
+{"key"="3119";"subkey"="623";"value"="25093"};
+{"key"="3120";"subkey"="624";"value"="25102"};
+{"key"="3121";"subkey"="624";"value"="25111"};
+{"key"="3122";"subkey"="624";"value"="25120"};
+{"key"="3123";"subkey"="624";"value"="25129"};
+{"key"="3124";"subkey"="624";"value"="25138"};
+{"key"="3125";"subkey"="625";"value"="25147"};
+{"key"="3126";"subkey"="625";"value"="25156"};
+{"key"="3127";"subkey"="625";"value"="25165"};
+{"key"="3128";"subkey"="625";"value"="25174"};
+{"key"="3129";"subkey"="625";"value"="25183"};
+{"key"="3130";"subkey"="626";"value"="25192"};
+{"key"="3131";"subkey"="626";"value"="25201"};
+{"key"="3132";"subkey"="626";"value"="25210"};
+{"key"="3133";"subkey"="626";"value"="25219"};
+{"key"="3134";"subkey"="626";"value"="25228"};
+{"key"="3135";"subkey"="627";"value"="25237"};
+{"key"="3136";"subkey"="627";"value"="25247"};
+{"key"="3137";"subkey"="627";"value"="25256"};
+{"key"="3138";"subkey"="627";"value"="25265"};
+{"key"="3139";"subkey"="627";"value"="25274"};
+{"key"="3140";"subkey"="628";"value"="25283"};
+{"key"="3141";"subkey"="628";"value"="25292"};
+{"key"="3142";"subkey"="628";"value"="25301"};
+{"key"="3143";"subkey"="628";"value"="25310"};
+{"key"="3144";"subkey"="628";"value"="25319"};
+{"key"="3145";"subkey"="629";"value"="25328"};
+{"key"="3146";"subkey"="629";"value"="25337"};
+{"key"="3147";"subkey"="629";"value"="25346"};
+{"key"="3148";"subkey"="629";"value"="25355"};
+{"key"="3149";"subkey"="629";"value"="25364"};
+{"key"="3150";"subkey"="630";"value"="25373"};
+{"key"="3151";"subkey"="630";"value"="25382"};
+{"key"="3152";"subkey"="630";"value"="25391"};
+{"key"="3153";"subkey"="630";"value"="25400"};
+{"key"="3154";"subkey"="630";"value"="25409"};
+{"key"="3155";"subkey"="631";"value"="25419"};
+{"key"="3156";"subkey"="631";"value"="25428"};
+{"key"="3157";"subkey"="631";"value"="25437"};
+{"key"="3158";"subkey"="631";"value"="25446"};
+{"key"="3159";"subkey"="631";"value"="25455"};
+{"key"="3160";"subkey"="632";"value"="25464"};
+{"key"="3161";"subkey"="632";"value"="25473"};
+{"key"="3162";"subkey"="632";"value"="25482"};
+{"key"="3163";"subkey"="632";"value"="25491"};
+{"key"="3164";"subkey"="632";"value"="25500"};
+{"key"="3165";"subkey"="633";"value"="25509"};
+{"key"="3166";"subkey"="633";"value"="25518"};
+{"key"="3167";"subkey"="633";"value"="25527"};
+{"key"="3168";"subkey"="633";"value"="25536"};
+{"key"="3169";"subkey"="633";"value"="25545"};
+{"key"="3170";"subkey"="634";"value"="25554"};
+{"key"="3171";"subkey"="634";"value"="25563"};
+{"key"="3172";"subkey"="634";"value"="25573"};
+{"key"="3173";"subkey"="634";"value"="25582"};
+{"key"="3174";"subkey"="634";"value"="25591"};
+{"key"="3175";"subkey"="635";"value"="25600"};
+{"key"="3176";"subkey"="635";"value"="25609"};
+{"key"="3177";"subkey"="635";"value"="25618"};
+{"key"="3178";"subkey"="635";"value"="25627"};
+{"key"="3179";"subkey"="635";"value"="25636"};
+{"key"="3180";"subkey"="636";"value"="25645"};
+{"key"="3181";"subkey"="636";"value"="25654"};
+{"key"="3182";"subkey"="636";"value"="25663"};
+{"key"="3183";"subkey"="636";"value"="25672"};
+{"key"="3184";"subkey"="636";"value"="25681"};
+{"key"="3185";"subkey"="637";"value"="25690"};
+{"key"="3186";"subkey"="637";"value"="25699"};
+{"key"="3187";"subkey"="637";"value"="25709"};
+{"key"="3188";"subkey"="637";"value"="25718"};
+{"key"="3189";"subkey"="637";"value"="25727"};
+{"key"="3190";"subkey"="638";"value"="25736"};
+{"key"="3191";"subkey"="638";"value"="25745"};
+{"key"="3192";"subkey"="638";"value"="25754"};
+{"key"="3193";"subkey"="638";"value"="25763"};
+{"key"="3194";"subkey"="638";"value"="25772"};
+{"key"="3195";"subkey"="639";"value"="25781"};
+{"key"="3196";"subkey"="639";"value"="25790"};
+{"key"="3197";"subkey"="639";"value"="25799"};
+{"key"="3198";"subkey"="639";"value"="25808"};
+{"key"="3199";"subkey"="639";"value"="25817"};
+{"key"="3200";"subkey"="640";"value"="25826"};
+{"key"="3201";"subkey"="640";"value"="25835"};
+{"key"="3202";"subkey"="640";"value"="25845"};
+{"key"="3203";"subkey"="640";"value"="25854"};
+{"key"="3204";"subkey"="640";"value"="25863"};
+{"key"="3205";"subkey"="641";"value"="25872"};
+{"key"="3206";"subkey"="641";"value"="25881"};
+{"key"="3207";"subkey"="641";"value"="25890"};
+{"key"="3208";"subkey"="641";"value"="25899"};
+{"key"="3209";"subkey"="641";"value"="25908"};
+{"key"="3210";"subkey"="642";"value"="25917"};
+{"key"="3211";"subkey"="642";"value"="25926"};
+{"key"="3212";"subkey"="642";"value"="25935"};
+{"key"="3213";"subkey"="642";"value"="25944"};
+{"key"="3214";"subkey"="642";"value"="25953"};
+{"key"="3215";"subkey"="643";"value"="25962"};
+{"key"="3216";"subkey"="643";"value"="25972"};
+{"key"="3217";"subkey"="643";"value"="25981"};
+{"key"="3218";"subkey"="643";"value"="25990"};
+{"key"="3219";"subkey"="643";"value"="25999"};
+{"key"="3220";"subkey"="644";"value"="26008"};
+{"key"="3221";"subkey"="644";"value"="26017"};
+{"key"="3222";"subkey"="644";"value"="26026"};
+{"key"="3223";"subkey"="644";"value"="26035"};
+{"key"="3224";"subkey"="644";"value"="26044"};
+{"key"="3225";"subkey"="645";"value"="26053"};
+{"key"="3226";"subkey"="645";"value"="26062"};
+{"key"="3227";"subkey"="645";"value"="26071"};
+{"key"="3228";"subkey"="645";"value"="26081"};
+{"key"="3229";"subkey"="645";"value"="26090"};
+{"key"="3230";"subkey"="646";"value"="26099"};
+{"key"="3231";"subkey"="646";"value"="26108"};
+{"key"="3232";"subkey"="646";"value"="26117"};
+{"key"="3233";"subkey"="646";"value"="26126"};
+{"key"="3234";"subkey"="646";"value"="26135"};
+{"key"="3235";"subkey"="647";"value"="26144"};
+{"key"="3236";"subkey"="647";"value"="26153"};
+{"key"="3237";"subkey"="647";"value"="26162"};
+{"key"="3238";"subkey"="647";"value"="26171"};
+{"key"="3239";"subkey"="647";"value"="26180"};
+{"key"="3240";"subkey"="648";"value"="26189"};
+{"key"="3241";"subkey"="648";"value"="26199"};
+{"key"="3242";"subkey"="648";"value"="26208"};
+{"key"="3243";"subkey"="648";"value"="26217"};
+{"key"="3244";"subkey"="648";"value"="26226"};
+{"key"="3245";"subkey"="649";"value"="26235"};
+{"key"="3246";"subkey"="649";"value"="26244"};
+{"key"="3247";"subkey"="649";"value"="26253"};
+{"key"="3248";"subkey"="649";"value"="26262"};
+{"key"="3249";"subkey"="649";"value"="26271"};
+{"key"="3250";"subkey"="650";"value"="26280"};
+{"key"="3251";"subkey"="650";"value"="26289"};
+{"key"="3252";"subkey"="650";"value"="26299"};
+{"key"="3253";"subkey"="650";"value"="26308"};
+{"key"="3254";"subkey"="650";"value"="26317"};
+{"key"="3255";"subkey"="651";"value"="26326"};
+{"key"="3256";"subkey"="651";"value"="26335"};
+{"key"="3257";"subkey"="651";"value"="26344"};
+{"key"="3258";"subkey"="651";"value"="26353"};
+{"key"="3259";"subkey"="651";"value"="26362"};
+{"key"="3260";"subkey"="652";"value"="26371"};
+{"key"="3261";"subkey"="652";"value"="26380"};
+{"key"="3262";"subkey"="652";"value"="26389"};
+{"key"="3263";"subkey"="652";"value"="26398"};
+{"key"="3264";"subkey"="652";"value"="26408"};
+{"key"="3265";"subkey"="653";"value"="26417"};
+{"key"="3266";"subkey"="653";"value"="26426"};
+{"key"="3267";"subkey"="653";"value"="26435"};
+{"key"="3268";"subkey"="653";"value"="26444"};
+{"key"="3269";"subkey"="653";"value"="26453"};
+{"key"="3270";"subkey"="654";"value"="26462"};
+{"key"="3271";"subkey"="654";"value"="26471"};
+{"key"="3272";"subkey"="654";"value"="26480"};
+{"key"="3273";"subkey"="654";"value"="26489"};
+{"key"="3274";"subkey"="654";"value"="26498"};
+{"key"="3275";"subkey"="655";"value"="26508"};
+{"key"="3276";"subkey"="655";"value"="26517"};
+{"key"="3277";"subkey"="655";"value"="26526"};
+{"key"="3278";"subkey"="655";"value"="26535"};
+{"key"="3279";"subkey"="655";"value"="26544"};
+{"key"="3280";"subkey"="656";"value"="26553"};
+{"key"="3281";"subkey"="656";"value"="26562"};
+{"key"="3282";"subkey"="656";"value"="26571"};
+{"key"="3283";"subkey"="656";"value"="26580"};
+{"key"="3284";"subkey"="656";"value"="26589"};
+{"key"="3285";"subkey"="657";"value"="26599"};
+{"key"="3286";"subkey"="657";"value"="26608"};
+{"key"="3287";"subkey"="657";"value"="26617"};
+{"key"="3288";"subkey"="657";"value"="26626"};
+{"key"="3289";"subkey"="657";"value"="26635"};
+{"key"="3290";"subkey"="658";"value"="26644"};
+{"key"="3291";"subkey"="658";"value"="26653"};
+{"key"="3292";"subkey"="658";"value"="26662"};
+{"key"="3293";"subkey"="658";"value"="26671"};
+{"key"="3294";"subkey"="658";"value"="26680"};
+{"key"="3295";"subkey"="659";"value"="26690"};
+{"key"="3296";"subkey"="659";"value"="26699"};
+{"key"="3297";"subkey"="659";"value"="26708"};
+{"key"="3298";"subkey"="659";"value"="26717"};
+{"key"="3299";"subkey"="659";"value"="26726"};
+{"key"="3300";"subkey"="660";"value"="26735"};
+{"key"="3301";"subkey"="660";"value"="26744"};
+{"key"="3302";"subkey"="660";"value"="26753"};
+{"key"="3303";"subkey"="660";"value"="26762"};
+{"key"="3304";"subkey"="660";"value"="26771"};
+{"key"="3305";"subkey"="661";"value"="26781"};
+{"key"="3306";"subkey"="661";"value"="26790"};
+{"key"="3307";"subkey"="661";"value"="26799"};
+{"key"="3308";"subkey"="661";"value"="26808"};
+{"key"="3309";"subkey"="661";"value"="26817"};
+{"key"="3310";"subkey"="662";"value"="26826"};
+{"key"="3311";"subkey"="662";"value"="26835"};
+{"key"="3312";"subkey"="662";"value"="26844"};
+{"key"="3313";"subkey"="662";"value"="26853"};
+{"key"="3314";"subkey"="662";"value"="26862"};
+{"key"="3315";"subkey"="663";"value"="26872"};
+{"key"="3316";"subkey"="663";"value"="26881"};
+{"key"="3317";"subkey"="663";"value"="26890"};
+{"key"="3318";"subkey"="663";"value"="26899"};
+{"key"="3319";"subkey"="663";"value"="26908"};
+{"key"="3320";"subkey"="664";"value"="26917"};
+{"key"="3321";"subkey"="664";"value"="26926"};
+{"key"="3322";"subkey"="664";"value"="26935"};
+{"key"="3323";"subkey"="664";"value"="26944"};
+{"key"="3324";"subkey"="664";"value"="26954"};
+{"key"="3325";"subkey"="665";"value"="26963"};
+{"key"="3326";"subkey"="665";"value"="26972"};
+{"key"="3327";"subkey"="665";"value"="26981"};
+{"key"="3328";"subkey"="665";"value"="26990"};
+{"key"="3329";"subkey"="665";"value"="26999"};
+{"key"="3330";"subkey"="666";"value"="27008"};
+{"key"="3331";"subkey"="666";"value"="27017"};
+{"key"="3332";"subkey"="666";"value"="27026"};
+{"key"="3333";"subkey"="666";"value"="27036"};
+{"key"="3334";"subkey"="666";"value"="27045"};
+{"key"="3335";"subkey"="667";"value"="27054"};
+{"key"="3336";"subkey"="667";"value"="27063"};
+{"key"="3337";"subkey"="667";"value"="27072"};
+{"key"="3338";"subkey"="667";"value"="27081"};
+{"key"="3339";"subkey"="667";"value"="27090"};
+{"key"="3340";"subkey"="668";"value"="27099"};
+{"key"="3341";"subkey"="668";"value"="27108"};
+{"key"="3342";"subkey"="668";"value"="27118"};
+{"key"="3343";"subkey"="668";"value"="27127"};
+{"key"="3344";"subkey"="668";"value"="27136"};
+{"key"="3345";"subkey"="669";"value"="27145"};
+{"key"="3346";"subkey"="669";"value"="27154"};
+{"key"="3347";"subkey"="669";"value"="27163"};
+{"key"="3348";"subkey"="669";"value"="27172"};
+{"key"="3349";"subkey"="669";"value"="27181"};
+{"key"="3350";"subkey"="670";"value"="27190"};
+{"key"="3351";"subkey"="670";"value"="27200"};
+{"key"="3352";"subkey"="670";"value"="27209"};
+{"key"="3353";"subkey"="670";"value"="27218"};
+{"key"="3354";"subkey"="670";"value"="27227"};
+{"key"="3355";"subkey"="671";"value"="27236"};
+{"key"="3356";"subkey"="671";"value"="27245"};
+{"key"="3357";"subkey"="671";"value"="27254"};
+{"key"="3358";"subkey"="671";"value"="27263"};
+{"key"="3359";"subkey"="671";"value"="27273"};
+{"key"="3360";"subkey"="672";"value"="27282"};
+{"key"="3361";"subkey"="672";"value"="27291"};
+{"key"="3362";"subkey"="672";"value"="27300"};
+{"key"="3363";"subkey"="672";"value"="27309"};
+{"key"="3364";"subkey"="672";"value"="27318"};
+{"key"="3365";"subkey"="673";"value"="27327"};
+{"key"="3366";"subkey"="673";"value"="27336"};
+{"key"="3367";"subkey"="673";"value"="27346"};
+{"key"="3368";"subkey"="673";"value"="27355"};
+{"key"="3369";"subkey"="673";"value"="27364"};
+{"key"="3370";"subkey"="674";"value"="27373"};
+{"key"="3371";"subkey"="674";"value"="27382"};
+{"key"="3372";"subkey"="674";"value"="27391"};
+{"key"="3373";"subkey"="674";"value"="27400"};
+{"key"="3374";"subkey"="674";"value"="27409"};
+{"key"="3375";"subkey"="675";"value"="27419"};
+{"key"="3376";"subkey"="675";"value"="27428"};
+{"key"="3377";"subkey"="675";"value"="27437"};
+{"key"="3378";"subkey"="675";"value"="27446"};
+{"key"="3379";"subkey"="675";"value"="27455"};
+{"key"="3380";"subkey"="676";"value"="27464"};
+{"key"="3381";"subkey"="676";"value"="27473"};
+{"key"="3382";"subkey"="676";"value"="27482"};
+{"key"="3383";"subkey"="676";"value"="27492"};
+{"key"="3384";"subkey"="676";"value"="27501"};
+{"key"="3385";"subkey"="677";"value"="27510"};
+{"key"="3386";"subkey"="677";"value"="27519"};
+{"key"="3387";"subkey"="677";"value"="27528"};
+{"key"="3388";"subkey"="677";"value"="27537"};
+{"key"="3389";"subkey"="677";"value"="27546"};
+{"key"="3390";"subkey"="678";"value"="27555"};
+{"key"="3391";"subkey"="678";"value"="27565"};
+{"key"="3392";"subkey"="678";"value"="27574"};
+{"key"="3393";"subkey"="678";"value"="27583"};
+{"key"="3394";"subkey"="678";"value"="27592"};
+{"key"="3395";"subkey"="679";"value"="27601"};
+{"key"="3396";"subkey"="679";"value"="27610"};
+{"key"="3397";"subkey"="679";"value"="27619"};
+{"key"="3398";"subkey"="679";"value"="27628"};
+{"key"="3399";"subkey"="679";"value"="27638"};
+{"key"="3400";"subkey"="680";"value"="27647"};
+{"key"="3401";"subkey"="680";"value"="27656"};
+{"key"="3402";"subkey"="680";"value"="27665"};
+{"key"="3403";"subkey"="680";"value"="27674"};
+{"key"="3404";"subkey"="680";"value"="27683"};
+{"key"="3405";"subkey"="681";"value"="27692"};
+{"key"="3406";"subkey"="681";"value"="27701"};
+{"key"="3407";"subkey"="681";"value"="27711"};
+{"key"="3408";"subkey"="681";"value"="27720"};
+{"key"="3409";"subkey"="681";"value"="27729"};
+{"key"="3410";"subkey"="682";"value"="27738"};
+{"key"="3411";"subkey"="682";"value"="27747"};
+{"key"="3412";"subkey"="682";"value"="27756"};
+{"key"="3413";"subkey"="682";"value"="27765"};
+{"key"="3414";"subkey"="682";"value"="27775"};
+{"key"="3415";"subkey"="683";"value"="27784"};
+{"key"="3416";"subkey"="683";"value"="27793"};
+{"key"="3417";"subkey"="683";"value"="27802"};
+{"key"="3418";"subkey"="683";"value"="27811"};
+{"key"="3419";"subkey"="683";"value"="27820"};
+{"key"="3420";"subkey"="684";"value"="27829"};
+{"key"="3421";"subkey"="684";"value"="27839"};
+{"key"="3422";"subkey"="684";"value"="27848"};
+{"key"="3423";"subkey"="684";"value"="27857"};
+{"key"="3424";"subkey"="684";"value"="27866"};
+{"key"="3425";"subkey"="685";"value"="27875"};
+{"key"="3426";"subkey"="685";"value"="27884"};
+{"key"="3427";"subkey"="685";"value"="27893"};
+{"key"="3428";"subkey"="685";"value"="27903"};
+{"key"="3429";"subkey"="685";"value"="27912"};
+{"key"="3430";"subkey"="686";"value"="27921"};
+{"key"="3431";"subkey"="686";"value"="27930"};
+{"key"="3432";"subkey"="686";"value"="27939"};
+{"key"="3433";"subkey"="686";"value"="27948"};
+{"key"="3434";"subkey"="686";"value"="27957"};
+{"key"="3435";"subkey"="687";"value"="27966"};
+{"key"="3436";"subkey"="687";"value"="27976"};
+{"key"="3437";"subkey"="687";"value"="27985"};
+{"key"="3438";"subkey"="687";"value"="27994"};
+{"key"="3439";"subkey"="687";"value"="28003"};
+{"key"="3440";"subkey"="688";"value"="28012"};
+{"key"="3441";"subkey"="688";"value"="28021"};
+{"key"="3442";"subkey"="688";"value"="28030"};
+{"key"="3443";"subkey"="688";"value"="28040"};
+{"key"="3444";"subkey"="688";"value"="28049"};
+{"key"="3445";"subkey"="689";"value"="28058"};
+{"key"="3446";"subkey"="689";"value"="28067"};
+{"key"="3447";"subkey"="689";"value"="28076"};
+{"key"="3448";"subkey"="689";"value"="28085"};
+{"key"="3449";"subkey"="689";"value"="28095"};
+{"key"="3450";"subkey"="690";"value"="28104"};
+{"key"="3451";"subkey"="690";"value"="28113"};
+{"key"="3452";"subkey"="690";"value"="28122"};
+{"key"="3453";"subkey"="690";"value"="28131"};
+{"key"="3454";"subkey"="690";"value"="28140"};
+{"key"="3455";"subkey"="691";"value"="28149"};
+{"key"="3456";"subkey"="691";"value"="28159"};
+{"key"="3457";"subkey"="691";"value"="28168"};
+{"key"="3458";"subkey"="691";"value"="28177"};
+{"key"="3459";"subkey"="691";"value"="28186"};
+{"key"="3460";"subkey"="692";"value"="28195"};
+{"key"="3461";"subkey"="692";"value"="28204"};
+{"key"="3462";"subkey"="692";"value"="28213"};
+{"key"="3463";"subkey"="692";"value"="28223"};
+{"key"="3464";"subkey"="692";"value"="28232"};
+{"key"="3465";"subkey"="693";"value"="28241"};
+{"key"="3466";"subkey"="693";"value"="28250"};
+{"key"="3467";"subkey"="693";"value"="28259"};
+{"key"="3468";"subkey"="693";"value"="28268"};
+{"key"="3469";"subkey"="693";"value"="28277"};
+{"key"="3470";"subkey"="694";"value"="28287"};
+{"key"="3471";"subkey"="694";"value"="28296"};
+{"key"="3472";"subkey"="694";"value"="28305"};
+{"key"="3473";"subkey"="694";"value"="28314"};
+{"key"="3474";"subkey"="694";"value"="28323"};
+{"key"="3475";"subkey"="695";"value"="28332"};
+{"key"="3476";"subkey"="695";"value"="28342"};
+{"key"="3477";"subkey"="695";"value"="28351"};
+{"key"="3478";"subkey"="695";"value"="28360"};
+{"key"="3479";"subkey"="695";"value"="28369"};
+{"key"="3480";"subkey"="696";"value"="28378"};
+{"key"="3481";"subkey"="696";"value"="28387"};
+{"key"="3482";"subkey"="696";"value"="28396"};
+{"key"="3483";"subkey"="696";"value"="28406"};
+{"key"="3484";"subkey"="696";"value"="28415"};
+{"key"="3485";"subkey"="697";"value"="28424"};
+{"key"="3486";"subkey"="697";"value"="28433"};
+{"key"="3487";"subkey"="697";"value"="28442"};
+{"key"="3488";"subkey"="697";"value"="28451"};
+{"key"="3489";"subkey"="697";"value"="28461"};
+{"key"="3490";"subkey"="698";"value"="28470"};
+{"key"="3491";"subkey"="698";"value"="28479"};
+{"key"="3492";"subkey"="698";"value"="28488"};
+{"key"="3493";"subkey"="698";"value"="28497"};
+{"key"="3494";"subkey"="698";"value"="28506"};
+{"key"="3495";"subkey"="699";"value"="28516"};
+{"key"="3496";"subkey"="699";"value"="28525"};
+{"key"="3497";"subkey"="699";"value"="28534"};
+{"key"="3498";"subkey"="699";"value"="28543"};
+{"key"="3499";"subkey"="699";"value"="28552"};
+{"key"="3500";"subkey"="700";"value"="28561"};
+{"key"="3501";"subkey"="700";"value"="28570"};
+{"key"="3502";"subkey"="700";"value"="28580"};
+{"key"="3503";"subkey"="700";"value"="28589"};
+{"key"="3504";"subkey"="700";"value"="28598"};
+{"key"="3505";"subkey"="701";"value"="28607"};
+{"key"="3506";"subkey"="701";"value"="28616"};
+{"key"="3507";"subkey"="701";"value"="28625"};
+{"key"="3508";"subkey"="701";"value"="28635"};
+{"key"="3509";"subkey"="701";"value"="28644"};
+{"key"="3510";"subkey"="702";"value"="28653"};
+{"key"="3511";"subkey"="702";"value"="28662"};
+{"key"="3512";"subkey"="702";"value"="28671"};
+{"key"="3513";"subkey"="702";"value"="28680"};
+{"key"="3514";"subkey"="702";"value"="28690"};
+{"key"="3515";"subkey"="703";"value"="28699"};
+{"key"="3516";"subkey"="703";"value"="28708"};
+{"key"="3517";"subkey"="703";"value"="28717"};
+{"key"="3518";"subkey"="703";"value"="28726"};
+{"key"="3519";"subkey"="703";"value"="28735"};
+{"key"="3520";"subkey"="704";"value"="28745"};
+{"key"="3521";"subkey"="704";"value"="28754"};
+{"key"="3522";"subkey"="704";"value"="28763"};
+{"key"="3523";"subkey"="704";"value"="28772"};
+{"key"="3524";"subkey"="704";"value"="28781"};
+{"key"="3525";"subkey"="705";"value"="28790"};
+{"key"="3526";"subkey"="705";"value"="28800"};
+{"key"="3527";"subkey"="705";"value"="28809"};
+{"key"="3528";"subkey"="705";"value"="28818"};
+{"key"="3529";"subkey"="705";"value"="28827"};
+{"key"="3530";"subkey"="706";"value"="28836"};
+{"key"="3531";"subkey"="706";"value"="28845"};
+{"key"="3532";"subkey"="706";"value"="28855"};
+{"key"="3533";"subkey"="706";"value"="28864"};
+{"key"="3534";"subkey"="706";"value"="28873"};
+{"key"="3535";"subkey"="707";"value"="28882"};
+{"key"="3536";"subkey"="707";"value"="28891"};
+{"key"="3537";"subkey"="707";"value"="28900"};
+{"key"="3538";"subkey"="707";"value"="28910"};
+{"key"="3539";"subkey"="707";"value"="28919"};
+{"key"="3540";"subkey"="708";"value"="28928"};
+{"key"="3541";"subkey"="708";"value"="28937"};
+{"key"="3542";"subkey"="708";"value"="28946"};
+{"key"="3543";"subkey"="708";"value"="28955"};
+{"key"="3544";"subkey"="708";"value"="28965"};
+{"key"="3545";"subkey"="709";"value"="28974"};
+{"key"="3546";"subkey"="709";"value"="28983"};
+{"key"="3547";"subkey"="709";"value"="28992"};
+{"key"="3548";"subkey"="709";"value"="29001"};
+{"key"="3549";"subkey"="709";"value"="29011"};
+{"key"="3550";"subkey"="710";"value"="29020"};
+{"key"="3551";"subkey"="710";"value"="29029"};
+{"key"="3552";"subkey"="710";"value"="29038"};
+{"key"="3553";"subkey"="710";"value"="29047"};
+{"key"="3554";"subkey"="710";"value"="29056"};
+{"key"="3555";"subkey"="711";"value"="29066"};
+{"key"="3556";"subkey"="711";"value"="29075"};
+{"key"="3557";"subkey"="711";"value"="29084"};
+{"key"="3558";"subkey"="711";"value"="29093"};
+{"key"="3559";"subkey"="711";"value"="29102"};
+{"key"="3560";"subkey"="712";"value"="29111"};
+{"key"="3561";"subkey"="712";"value"="29121"};
+{"key"="3562";"subkey"="712";"value"="29130"};
+{"key"="3563";"subkey"="712";"value"="29139"};
+{"key"="3564";"subkey"="712";"value"="29148"};
+{"key"="3565";"subkey"="713";"value"="29157"};
+{"key"="3566";"subkey"="713";"value"="29167"};
+{"key"="3567";"subkey"="713";"value"="29176"};
+{"key"="3568";"subkey"="713";"value"="29185"};
+{"key"="3569";"subkey"="713";"value"="29194"};
+{"key"="3570";"subkey"="714";"value"="29203"};
+{"key"="3571";"subkey"="714";"value"="29212"};
+{"key"="3572";"subkey"="714";"value"="29222"};
+{"key"="3573";"subkey"="714";"value"="29231"};
+{"key"="3574";"subkey"="714";"value"="29240"};
+{"key"="3575";"subkey"="715";"value"="29249"};
+{"key"="3576";"subkey"="715";"value"="29258"};
+{"key"="3577";"subkey"="715";"value"="29268"};
+{"key"="3578";"subkey"="715";"value"="29277"};
+{"key"="3579";"subkey"="715";"value"="29286"};
+{"key"="3580";"subkey"="716";"value"="29295"};
+{"key"="3581";"subkey"="716";"value"="29304"};
+{"key"="3582";"subkey"="716";"value"="29313"};
+{"key"="3583";"subkey"="716";"value"="29323"};
+{"key"="3584";"subkey"="716";"value"="29332"};
+{"key"="3585";"subkey"="717";"value"="29341"};
+{"key"="3586";"subkey"="717";"value"="29350"};
+{"key"="3587";"subkey"="717";"value"="29359"};
+{"key"="3588";"subkey"="717";"value"="29369"};
+{"key"="3589";"subkey"="717";"value"="29378"};
+{"key"="3590";"subkey"="718";"value"="29387"};
+{"key"="3591";"subkey"="718";"value"="29396"};
+{"key"="3592";"subkey"="718";"value"="29405"};
+{"key"="3593";"subkey"="718";"value"="29414"};
+{"key"="3594";"subkey"="718";"value"="29424"};
+{"key"="3595";"subkey"="719";"value"="29433"};
+{"key"="3596";"subkey"="719";"value"="29442"};
+{"key"="3597";"subkey"="719";"value"="29451"};
+{"key"="3598";"subkey"="719";"value"="29460"};
+{"key"="3599";"subkey"="719";"value"="29470"};
+{"key"="3600";"subkey"="720";"value"="29479"};
+{"key"="3601";"subkey"="720";"value"="29488"};
+{"key"="3602";"subkey"="720";"value"="29497"};
+{"key"="3603";"subkey"="720";"value"="29506"};
+{"key"="3604";"subkey"="720";"value"="29516"};
+{"key"="3605";"subkey"="721";"value"="29525"};
+{"key"="3606";"subkey"="721";"value"="29534"};
+{"key"="3607";"subkey"="721";"value"="29543"};
+{"key"="3608";"subkey"="721";"value"="29552"};
+{"key"="3609";"subkey"="721";"value"="29561"};
+{"key"="3610";"subkey"="722";"value"="29571"};
+{"key"="3611";"subkey"="722";"value"="29580"};
+{"key"="3612";"subkey"="722";"value"="29589"};
+{"key"="3613";"subkey"="722";"value"="29598"};
+{"key"="3614";"subkey"="722";"value"="29607"};
+{"key"="3615";"subkey"="723";"value"="29617"};
+{"key"="3616";"subkey"="723";"value"="29626"};
+{"key"="3617";"subkey"="723";"value"="29635"};
+{"key"="3618";"subkey"="723";"value"="29644"};
+{"key"="3619";"subkey"="723";"value"="29653"};
+{"key"="3620";"subkey"="724";"value"="29663"};
+{"key"="3621";"subkey"="724";"value"="29672"};
+{"key"="3622";"subkey"="724";"value"="29681"};
+{"key"="3623";"subkey"="724";"value"="29690"};
+{"key"="3624";"subkey"="724";"value"="29699"};
+{"key"="3625";"subkey"="725";"value"="29709"};
+{"key"="3626";"subkey"="725";"value"="29718"};
+{"key"="3627";"subkey"="725";"value"="29727"};
+{"key"="3628";"subkey"="725";"value"="29736"};
+{"key"="3629";"subkey"="725";"value"="29745"};
+{"key"="3630";"subkey"="726";"value"="29755"};
+{"key"="3631";"subkey"="726";"value"="29764"};
+{"key"="3632";"subkey"="726";"value"="29773"};
+{"key"="3633";"subkey"="726";"value"="29782"};
+{"key"="3634";"subkey"="726";"value"="29791"};
+{"key"="3635";"subkey"="727";"value"="29801"};
+{"key"="3636";"subkey"="727";"value"="29810"};
+{"key"="3637";"subkey"="727";"value"="29819"};
+{"key"="3638";"subkey"="727";"value"="29828"};
+{"key"="3639";"subkey"="727";"value"="29837"};
+{"key"="3640";"subkey"="728";"value"="29847"};
+{"key"="3641";"subkey"="728";"value"="29856"};
+{"key"="3642";"subkey"="728";"value"="29865"};
+{"key"="3643";"subkey"="728";"value"="29874"};
+{"key"="3644";"subkey"="728";"value"="29883"};
+{"key"="3645";"subkey"="729";"value"="29893"};
+{"key"="3646";"subkey"="729";"value"="29902"};
+{"key"="3647";"subkey"="729";"value"="29911"};
+{"key"="3648";"subkey"="729";"value"="29920"};
+{"key"="3649";"subkey"="729";"value"="29929"};
+{"key"="3650";"subkey"="730";"value"="29939"};
+{"key"="3651";"subkey"="730";"value"="29948"};
+{"key"="3652";"subkey"="730";"value"="29957"};
+{"key"="3653";"subkey"="730";"value"="29966"};
+{"key"="3654";"subkey"="730";"value"="29975"};
+{"key"="3655";"subkey"="731";"value"="29985"};
+{"key"="3656";"subkey"="731";"value"="29994"};
+{"key"="3657";"subkey"="731";"value"="30003"};
+{"key"="3658";"subkey"="731";"value"="30012"};
+{"key"="3659";"subkey"="731";"value"="30021"};
+{"key"="3660";"subkey"="732";"value"="30031"};
+{"key"="3661";"subkey"="732";"value"="30040"};
+{"key"="3662";"subkey"="732";"value"="30049"};
+{"key"="3663";"subkey"="732";"value"="30058"};
+{"key"="3664";"subkey"="732";"value"="30067"};
+{"key"="3665";"subkey"="733";"value"="30077"};
+{"key"="3666";"subkey"="733";"value"="30086"};
+{"key"="3667";"subkey"="733";"value"="30095"};
+{"key"="3668";"subkey"="733";"value"="30104"};
+{"key"="3669";"subkey"="733";"value"="30113"};
+{"key"="3670";"subkey"="734";"value"="30123"};
+{"key"="3671";"subkey"="734";"value"="30132"};
+{"key"="3672";"subkey"="734";"value"="30141"};
+{"key"="3673";"subkey"="734";"value"="30150"};
+{"key"="3674";"subkey"="734";"value"="30159"};
+{"key"="3675";"subkey"="735";"value"="30169"};
+{"key"="3676";"subkey"="735";"value"="30178"};
+{"key"="3677";"subkey"="735";"value"="30187"};
+{"key"="3678";"subkey"="735";"value"="30196"};
+{"key"="3679";"subkey"="735";"value"="30206"};
+{"key"="3680";"subkey"="736";"value"="30215"};
+{"key"="3681";"subkey"="736";"value"="30224"};
+{"key"="3682";"subkey"="736";"value"="30233"};
+{"key"="3683";"subkey"="736";"value"="30242"};
+{"key"="3684";"subkey"="736";"value"="30252"};
+{"key"="3685";"subkey"="737";"value"="30261"};
+{"key"="3686";"subkey"="737";"value"="30270"};
+{"key"="3687";"subkey"="737";"value"="30279"};
+{"key"="3688";"subkey"="737";"value"="30288"};
+{"key"="3689";"subkey"="737";"value"="30298"};
+{"key"="3690";"subkey"="738";"value"="30307"};
+{"key"="3691";"subkey"="738";"value"="30316"};
+{"key"="3692";"subkey"="738";"value"="30325"};
+{"key"="3693";"subkey"="738";"value"="30335"};
+{"key"="3694";"subkey"="738";"value"="30344"};
+{"key"="3695";"subkey"="739";"value"="30353"};
+{"key"="3696";"subkey"="739";"value"="30362"};
+{"key"="3697";"subkey"="739";"value"="30371"};
+{"key"="3698";"subkey"="739";"value"="30381"};
+{"key"="3699";"subkey"="739";"value"="30390"};
+{"key"="3700";"subkey"="740";"value"="30399"};
+{"key"="3701";"subkey"="740";"value"="30408"};
+{"key"="3702";"subkey"="740";"value"="30417"};
+{"key"="3703";"subkey"="740";"value"="30427"};
+{"key"="3704";"subkey"="740";"value"="30436"};
+{"key"="3705";"subkey"="741";"value"="30445"};
+{"key"="3706";"subkey"="741";"value"="30454"};
+{"key"="3707";"subkey"="741";"value"="30464"};
+{"key"="3708";"subkey"="741";"value"="30473"};
+{"key"="3709";"subkey"="741";"value"="30482"};
+{"key"="3710";"subkey"="742";"value"="30491"};
+{"key"="3711";"subkey"="742";"value"="30500"};
+{"key"="3712";"subkey"="742";"value"="30510"};
+{"key"="3713";"subkey"="742";"value"="30519"};
+{"key"="3714";"subkey"="742";"value"="30528"};
+{"key"="3715";"subkey"="743";"value"="30537"};
+{"key"="3716";"subkey"="743";"value"="30547"};
+{"key"="3717";"subkey"="743";"value"="30556"};
+{"key"="3718";"subkey"="743";"value"="30565"};
+{"key"="3719";"subkey"="743";"value"="30574"};
+{"key"="3720";"subkey"="744";"value"="30583"};
+{"key"="3721";"subkey"="744";"value"="30593"};
+{"key"="3722";"subkey"="744";"value"="30602"};
+{"key"="3723";"subkey"="744";"value"="30611"};
+{"key"="3724";"subkey"="744";"value"="30620"};
+{"key"="3725";"subkey"="745";"value"="30630"};
+{"key"="3726";"subkey"="745";"value"="30639"};
+{"key"="3727";"subkey"="745";"value"="30648"};
+{"key"="3728";"subkey"="745";"value"="30657"};
+{"key"="3729";"subkey"="745";"value"="30666"};
+{"key"="3730";"subkey"="746";"value"="30676"};
+{"key"="3731";"subkey"="746";"value"="30685"};
+{"key"="3732";"subkey"="746";"value"="30694"};
+{"key"="3733";"subkey"="746";"value"="30703"};
+{"key"="3734";"subkey"="746";"value"="30713"};
+{"key"="3735";"subkey"="747";"value"="30722"};
+{"key"="3736";"subkey"="747";"value"="30731"};
+{"key"="3737";"subkey"="747";"value"="30740"};
+{"key"="3738";"subkey"="747";"value"="30749"};
+{"key"="3739";"subkey"="747";"value"="30759"};
+{"key"="3740";"subkey"="748";"value"="30768"};
+{"key"="3741";"subkey"="748";"value"="30777"};
+{"key"="3742";"subkey"="748";"value"="30786"};
+{"key"="3743";"subkey"="748";"value"="30796"};
+{"key"="3744";"subkey"="748";"value"="30805"};
+{"key"="3745";"subkey"="749";"value"="30814"};
+{"key"="3746";"subkey"="749";"value"="30823"};
+{"key"="3747";"subkey"="749";"value"="30832"};
+{"key"="3748";"subkey"="749";"value"="30842"};
+{"key"="3749";"subkey"="749";"value"="30851"};
+{"key"="3750";"subkey"="750";"value"="30860"};
+{"key"="3751";"subkey"="750";"value"="30869"};
+{"key"="3752";"subkey"="750";"value"="30879"};
+{"key"="3753";"subkey"="750";"value"="30888"};
+{"key"="3754";"subkey"="750";"value"="30897"};
+{"key"="3755";"subkey"="751";"value"="30906"};
+{"key"="3756";"subkey"="751";"value"="30916"};
+{"key"="3757";"subkey"="751";"value"="30925"};
+{"key"="3758";"subkey"="751";"value"="30934"};
+{"key"="3759";"subkey"="751";"value"="30943"};
+{"key"="3760";"subkey"="752";"value"="30952"};
+{"key"="3761";"subkey"="752";"value"="30962"};
+{"key"="3762";"subkey"="752";"value"="30971"};
+{"key"="3763";"subkey"="752";"value"="30980"};
+{"key"="3764";"subkey"="752";"value"="30989"};
+{"key"="3765";"subkey"="753";"value"="30999"};
+{"key"="3766";"subkey"="753";"value"="31008"};
+{"key"="3767";"subkey"="753";"value"="31017"};
+{"key"="3768";"subkey"="753";"value"="31026"};
+{"key"="3769";"subkey"="753";"value"="31036"};
+{"key"="3770";"subkey"="754";"value"="31045"};
+{"key"="3771";"subkey"="754";"value"="31054"};
+{"key"="3772";"subkey"="754";"value"="31063"};
+{"key"="3773";"subkey"="754";"value"="31073"};
+{"key"="3774";"subkey"="754";"value"="31082"};
+{"key"="3775";"subkey"="755";"value"="31091"};
+{"key"="3776";"subkey"="755";"value"="31100"};
+{"key"="3777";"subkey"="755";"value"="31109"};
+{"key"="3778";"subkey"="755";"value"="31119"};
+{"key"="3779";"subkey"="755";"value"="31128"};
+{"key"="3780";"subkey"="756";"value"="31137"};
+{"key"="3781";"subkey"="756";"value"="31146"};
+{"key"="3782";"subkey"="756";"value"="31156"};
+{"key"="3783";"subkey"="756";"value"="31165"};
+{"key"="3784";"subkey"="756";"value"="31174"};
+{"key"="3785";"subkey"="757";"value"="31183"};
+{"key"="3786";"subkey"="757";"value"="31193"};
+{"key"="3787";"subkey"="757";"value"="31202"};
+{"key"="3788";"subkey"="757";"value"="31211"};
+{"key"="3789";"subkey"="757";"value"="31220"};
+{"key"="3790";"subkey"="758";"value"="31230"};
+{"key"="3791";"subkey"="758";"value"="31239"};
+{"key"="3792";"subkey"="758";"value"="31248"};
+{"key"="3793";"subkey"="758";"value"="31257"};
+{"key"="3794";"subkey"="758";"value"="31267"};
+{"key"="3795";"subkey"="759";"value"="31276"};
+{"key"="3796";"subkey"="759";"value"="31285"};
+{"key"="3797";"subkey"="759";"value"="31294"};
+{"key"="3798";"subkey"="759";"value"="31303"};
+{"key"="3799";"subkey"="759";"value"="31313"};
+{"key"="3800";"subkey"="760";"value"="31322"};
+{"key"="3801";"subkey"="760";"value"="31331"};
+{"key"="3802";"subkey"="760";"value"="31340"};
+{"key"="3803";"subkey"="760";"value"="31350"};
+{"key"="3804";"subkey"="760";"value"="31359"};
+{"key"="3805";"subkey"="761";"value"="31368"};
+{"key"="3806";"subkey"="761";"value"="31377"};
+{"key"="3807";"subkey"="761";"value"="31387"};
+{"key"="3808";"subkey"="761";"value"="31396"};
+{"key"="3809";"subkey"="761";"value"="31405"};
+{"key"="3810";"subkey"="762";"value"="31414"};
+{"key"="3811";"subkey"="762";"value"="31424"};
+{"key"="3812";"subkey"="762";"value"="31433"};
+{"key"="3813";"subkey"="762";"value"="31442"};
+{"key"="3814";"subkey"="762";"value"="31451"};
+{"key"="3815";"subkey"="763";"value"="31461"};
+{"key"="3816";"subkey"="763";"value"="31470"};
+{"key"="3817";"subkey"="763";"value"="31479"};
+{"key"="3818";"subkey"="763";"value"="31488"};
+{"key"="3819";"subkey"="763";"value"="31498"};
+{"key"="3820";"subkey"="764";"value"="31507"};
+{"key"="3821";"subkey"="764";"value"="31516"};
+{"key"="3822";"subkey"="764";"value"="31525"};
+{"key"="3823";"subkey"="764";"value"="31535"};
+{"key"="3824";"subkey"="764";"value"="31544"};
+{"key"="3825";"subkey"="765";"value"="31553"};
+{"key"="3826";"subkey"="765";"value"="31562"};
+{"key"="3827";"subkey"="765";"value"="31572"};
+{"key"="3828";"subkey"="765";"value"="31581"};
+{"key"="3829";"subkey"="765";"value"="31590"};
+{"key"="3830";"subkey"="766";"value"="31599"};
+{"key"="3831";"subkey"="766";"value"="31609"};
+{"key"="3832";"subkey"="766";"value"="31618"};
+{"key"="3833";"subkey"="766";"value"="31627"};
+{"key"="3834";"subkey"="766";"value"="31636"};
+{"key"="3835";"subkey"="767";"value"="31646"};
+{"key"="3836";"subkey"="767";"value"="31655"};
+{"key"="3837";"subkey"="767";"value"="31664"};
+{"key"="3838";"subkey"="767";"value"="31673"};
+{"key"="3839";"subkey"="767";"value"="31683"};
+{"key"="3840";"subkey"="768";"value"="31692"};
+{"key"="3841";"subkey"="768";"value"="31701"};
+{"key"="3842";"subkey"="768";"value"="31710"};
+{"key"="3843";"subkey"="768";"value"="31720"};
+{"key"="3844";"subkey"="768";"value"="31729"};
+{"key"="3845";"subkey"="769";"value"="31738"};
+{"key"="3846";"subkey"="769";"value"="31747"};
+{"key"="3847";"subkey"="769";"value"="31757"};
+{"key"="3848";"subkey"="769";"value"="31766"};
+{"key"="3849";"subkey"="769";"value"="31775"};
+{"key"="3850";"subkey"="770";"value"="31784"};
+{"key"="3851";"subkey"="770";"value"="31794"};
+{"key"="3852";"subkey"="770";"value"="31803"};
+{"key"="3853";"subkey"="770";"value"="31812"};
+{"key"="3854";"subkey"="770";"value"="31821"};
+{"key"="3855";"subkey"="771";"value"="31831"};
+{"key"="3856";"subkey"="771";"value"="31840"};
+{"key"="3857";"subkey"="771";"value"="31849"};
+{"key"="3858";"subkey"="771";"value"="31858"};
+{"key"="3859";"subkey"="771";"value"="31868"};
+{"key"="3860";"subkey"="772";"value"="31877"};
+{"key"="3861";"subkey"="772";"value"="31886"};
+{"key"="3862";"subkey"="772";"value"="31896"};
+{"key"="3863";"subkey"="772";"value"="31905"};
+{"key"="3864";"subkey"="772";"value"="31914"};
+{"key"="3865";"subkey"="773";"value"="31923"};
+{"key"="3866";"subkey"="773";"value"="31933"};
+{"key"="3867";"subkey"="773";"value"="31942"};
+{"key"="3868";"subkey"="773";"value"="31951"};
+{"key"="3869";"subkey"="773";"value"="31960"};
+{"key"="3870";"subkey"="774";"value"="31970"};
+{"key"="3871";"subkey"="774";"value"="31979"};
+{"key"="3872";"subkey"="774";"value"="31988"};
+{"key"="3873";"subkey"="774";"value"="31997"};
+{"key"="3874";"subkey"="774";"value"="32007"};
+{"key"="3875";"subkey"="775";"value"="32016"};
+{"key"="3876";"subkey"="775";"value"="32025"};
+{"key"="3877";"subkey"="775";"value"="32034"};
+{"key"="3878";"subkey"="775";"value"="32044"};
+{"key"="3879";"subkey"="775";"value"="32053"};
+{"key"="3880";"subkey"="776";"value"="32062"};
+{"key"="3881";"subkey"="776";"value"="32071"};
+{"key"="3882";"subkey"="776";"value"="32081"};
+{"key"="3883";"subkey"="776";"value"="32090"};
+{"key"="3884";"subkey"="776";"value"="32099"};
+{"key"="3885";"subkey"="777";"value"="32109"};
+{"key"="3886";"subkey"="777";"value"="32118"};
+{"key"="3887";"subkey"="777";"value"="32127"};
+{"key"="3888";"subkey"="777";"value"="32136"};
+{"key"="3889";"subkey"="777";"value"="32146"};
+{"key"="3890";"subkey"="778";"value"="32155"};
+{"key"="3891";"subkey"="778";"value"="32164"};
+{"key"="3892";"subkey"="778";"value"="32173"};
+{"key"="3893";"subkey"="778";"value"="32183"};
+{"key"="3894";"subkey"="778";"value"="32192"};
+{"key"="3895";"subkey"="779";"value"="32201"};
+{"key"="3896";"subkey"="779";"value"="32210"};
+{"key"="3897";"subkey"="779";"value"="32220"};
+{"key"="3898";"subkey"="779";"value"="32229"};
+{"key"="3899";"subkey"="779";"value"="32238"};
+{"key"="3900";"subkey"="780";"value"="32248"};
+{"key"="3901";"subkey"="780";"value"="32257"};
+{"key"="3902";"subkey"="780";"value"="32266"};
+{"key"="3903";"subkey"="780";"value"="32275"};
+{"key"="3904";"subkey"="780";"value"="32285"};
+{"key"="3905";"subkey"="781";"value"="32294"};
+{"key"="3906";"subkey"="781";"value"="32303"};
+{"key"="3907";"subkey"="781";"value"="32312"};
+{"key"="3908";"subkey"="781";"value"="32322"};
+{"key"="3909";"subkey"="781";"value"="32331"};
+{"key"="3910";"subkey"="782";"value"="32340"};
+{"key"="3911";"subkey"="782";"value"="32350"};
+{"key"="3912";"subkey"="782";"value"="32359"};
+{"key"="3913";"subkey"="782";"value"="32368"};
+{"key"="3914";"subkey"="782";"value"="32377"};
+{"key"="3915";"subkey"="783";"value"="32387"};
+{"key"="3916";"subkey"="783";"value"="32396"};
+{"key"="3917";"subkey"="783";"value"="32405"};
+{"key"="3918";"subkey"="783";"value"="32414"};
+{"key"="3919";"subkey"="783";"value"="32424"};
+{"key"="3920";"subkey"="784";"value"="32433"};
+{"key"="3921";"subkey"="784";"value"="32442"};
+{"key"="3922";"subkey"="784";"value"="32452"};
+{"key"="3923";"subkey"="784";"value"="32461"};
+{"key"="3924";"subkey"="784";"value"="32470"};
+{"key"="3925";"subkey"="785";"value"="32479"};
+{"key"="3926";"subkey"="785";"value"="32489"};
+{"key"="3927";"subkey"="785";"value"="32498"};
+{"key"="3928";"subkey"="785";"value"="32507"};
+{"key"="3929";"subkey"="785";"value"="32516"};
+{"key"="3930";"subkey"="786";"value"="32526"};
+{"key"="3931";"subkey"="786";"value"="32535"};
+{"key"="3932";"subkey"="786";"value"="32544"};
+{"key"="3933";"subkey"="786";"value"="32554"};
+{"key"="3934";"subkey"="786";"value"="32563"};
+{"key"="3935";"subkey"="787";"value"="32572"};
+{"key"="3936";"subkey"="787";"value"="32581"};
+{"key"="3937";"subkey"="787";"value"="32591"};
+{"key"="3938";"subkey"="787";"value"="32600"};
+{"key"="3939";"subkey"="787";"value"="32609"};
+{"key"="3940";"subkey"="788";"value"="32619"};
+{"key"="3941";"subkey"="788";"value"="32628"};
+{"key"="3942";"subkey"="788";"value"="32637"};
+{"key"="3943";"subkey"="788";"value"="32646"};
+{"key"="3944";"subkey"="788";"value"="32656"};
+{"key"="3945";"subkey"="789";"value"="32665"};
+{"key"="3946";"subkey"="789";"value"="32674"};
+{"key"="3947";"subkey"="789";"value"="32683"};
+{"key"="3948";"subkey"="789";"value"="32693"};
+{"key"="3949";"subkey"="789";"value"="32702"};
+{"key"="3950";"subkey"="790";"value"="32711"};
+{"key"="3951";"subkey"="790";"value"="32721"};
+{"key"="3952";"subkey"="790";"value"="32730"};
+{"key"="3953";"subkey"="790";"value"="32739"};
+{"key"="3954";"subkey"="790";"value"="32748"};
+{"key"="3955";"subkey"="791";"value"="32758"};
+{"key"="3956";"subkey"="791";"value"="32767"};
+{"key"="3957";"subkey"="791";"value"="32776"};
+{"key"="3958";"subkey"="791";"value"="32786"};
+{"key"="3959";"subkey"="791";"value"="32795"};
+{"key"="3960";"subkey"="792";"value"="32804"};
+{"key"="3961";"subkey"="792";"value"="32813"};
+{"key"="3962";"subkey"="792";"value"="32823"};
+{"key"="3963";"subkey"="792";"value"="32832"};
+{"key"="3964";"subkey"="792";"value"="32841"};
+{"key"="3965";"subkey"="793";"value"="32851"};
+{"key"="3966";"subkey"="793";"value"="32860"};
+{"key"="3967";"subkey"="793";"value"="32869"};
+{"key"="3968";"subkey"="793";"value"="32878"};
+{"key"="3969";"subkey"="793";"value"="32888"};
+{"key"="3970";"subkey"="794";"value"="32897"};
+{"key"="3971";"subkey"="794";"value"="32906"};
+{"key"="3972";"subkey"="794";"value"="32916"};
+{"key"="3973";"subkey"="794";"value"="32925"};
+{"key"="3974";"subkey"="794";"value"="32934"};
+{"key"="3975";"subkey"="795";"value"="32943"};
+{"key"="3976";"subkey"="795";"value"="32953"};
+{"key"="3977";"subkey"="795";"value"="32962"};
+{"key"="3978";"subkey"="795";"value"="32971"};
+{"key"="3979";"subkey"="795";"value"="32981"};
+{"key"="3980";"subkey"="796";"value"="32990"};
+{"key"="3981";"subkey"="796";"value"="32999"};
+{"key"="3982";"subkey"="796";"value"="33008"};
+{"key"="3983";"subkey"="796";"value"="33018"};
+{"key"="3984";"subkey"="796";"value"="33027"};
+{"key"="3985";"subkey"="797";"value"="33036"};
+{"key"="3986";"subkey"="797";"value"="33046"};
+{"key"="3987";"subkey"="797";"value"="33055"};
+{"key"="3988";"subkey"="797";"value"="33064"};
+{"key"="3989";"subkey"="797";"value"="33073"};
+{"key"="3990";"subkey"="798";"value"="33083"};
+{"key"="3991";"subkey"="798";"value"="33092"};
+{"key"="3992";"subkey"="798";"value"="33101"};
+{"key"="3993";"subkey"="798";"value"="33111"};
+{"key"="3994";"subkey"="798";"value"="33120"};
+{"key"="3995";"subkey"="799";"value"="33129"};
+{"key"="3996";"subkey"="799";"value"="33139"};
+{"key"="3997";"subkey"="799";"value"="33148"};
+{"key"="3998";"subkey"="799";"value"="33157"};
+{"key"="3999";"subkey"="799";"value"="33166"};
+{"key"="4000";"subkey"="800";"value"="33176"};
+{"key"="4001";"subkey"="800";"value"="33185"};
+{"key"="4002";"subkey"="800";"value"="33194"};
+{"key"="4003";"subkey"="800";"value"="33204"};
+{"key"="4004";"subkey"="800";"value"="33213"};
+{"key"="4005";"subkey"="801";"value"="33222"};
+{"key"="4006";"subkey"="801";"value"="33231"};
+{"key"="4007";"subkey"="801";"value"="33241"};
+{"key"="4008";"subkey"="801";"value"="33250"};
+{"key"="4009";"subkey"="801";"value"="33259"};
+{"key"="4010";"subkey"="802";"value"="33269"};
+{"key"="4011";"subkey"="802";"value"="33278"};
+{"key"="4012";"subkey"="802";"value"="33287"};
+{"key"="4013";"subkey"="802";"value"="33297"};
+{"key"="4014";"subkey"="802";"value"="33306"};
+{"key"="4015";"subkey"="803";"value"="33315"};
+{"key"="4016";"subkey"="803";"value"="33324"};
+{"key"="4017";"subkey"="803";"value"="33334"};
+{"key"="4018";"subkey"="803";"value"="33343"};
+{"key"="4019";"subkey"="803";"value"="33352"};
+{"key"="4020";"subkey"="804";"value"="33362"};
+{"key"="4021";"subkey"="804";"value"="33371"};
+{"key"="4022";"subkey"="804";"value"="33380"};
+{"key"="4023";"subkey"="804";"value"="33390"};
+{"key"="4024";"subkey"="804";"value"="33399"};
+{"key"="4025";"subkey"="805";"value"="33408"};
+{"key"="4026";"subkey"="805";"value"="33417"};
+{"key"="4027";"subkey"="805";"value"="33427"};
+{"key"="4028";"subkey"="805";"value"="33436"};
+{"key"="4029";"subkey"="805";"value"="33445"};
+{"key"="4030";"subkey"="806";"value"="33455"};
+{"key"="4031";"subkey"="806";"value"="33464"};
+{"key"="4032";"subkey"="806";"value"="33473"};
+{"key"="4033";"subkey"="806";"value"="33483"};
+{"key"="4034";"subkey"="806";"value"="33492"};
+{"key"="4035";"subkey"="807";"value"="33501"};
+{"key"="4036";"subkey"="807";"value"="33510"};
+{"key"="4037";"subkey"="807";"value"="33520"};
+{"key"="4038";"subkey"="807";"value"="33529"};
+{"key"="4039";"subkey"="807";"value"="33538"};
+{"key"="4040";"subkey"="808";"value"="33548"};
+{"key"="4041";"subkey"="808";"value"="33557"};
+{"key"="4042";"subkey"="808";"value"="33566"};
+{"key"="4043";"subkey"="808";"value"="33576"};
+{"key"="4044";"subkey"="808";"value"="33585"};
+{"key"="4045";"subkey"="809";"value"="33594"};
+{"key"="4046";"subkey"="809";"value"="33603"};
+{"key"="4047";"subkey"="809";"value"="33613"};
+{"key"="4048";"subkey"="809";"value"="33622"};
+{"key"="4049";"subkey"="809";"value"="33631"};
+{"key"="4050";"subkey"="810";"value"="33641"};
+{"key"="4051";"subkey"="810";"value"="33650"};
+{"key"="4052";"subkey"="810";"value"="33659"};
+{"key"="4053";"subkey"="810";"value"="33669"};
+{"key"="4054";"subkey"="810";"value"="33678"};
+{"key"="4055";"subkey"="811";"value"="33687"};
+{"key"="4056";"subkey"="811";"value"="33697"};
+{"key"="4057";"subkey"="811";"value"="33706"};
+{"key"="4058";"subkey"="811";"value"="33715"};
+{"key"="4059";"subkey"="811";"value"="33724"};
+{"key"="4060";"subkey"="812";"value"="33734"};
+{"key"="4061";"subkey"="812";"value"="33743"};
+{"key"="4062";"subkey"="812";"value"="33752"};
+{"key"="4063";"subkey"="812";"value"="33762"};
+{"key"="4064";"subkey"="812";"value"="33771"};
+{"key"="4065";"subkey"="813";"value"="33780"};
+{"key"="4066";"subkey"="813";"value"="33790"};
+{"key"="4067";"subkey"="813";"value"="33799"};
+{"key"="4068";"subkey"="813";"value"="33808"};
+{"key"="4069";"subkey"="813";"value"="33818"};
+{"key"="4070";"subkey"="814";"value"="33827"};
+{"key"="4071";"subkey"="814";"value"="33836"};
+{"key"="4072";"subkey"="814";"value"="33846"};
+{"key"="4073";"subkey"="814";"value"="33855"};
+{"key"="4074";"subkey"="814";"value"="33864"};
+{"key"="4075";"subkey"="815";"value"="33873"};
+{"key"="4076";"subkey"="815";"value"="33883"};
+{"key"="4077";"subkey"="815";"value"="33892"};
+{"key"="4078";"subkey"="815";"value"="33901"};
+{"key"="4079";"subkey"="815";"value"="33911"};
+{"key"="4080";"subkey"="816";"value"="33920"};
+{"key"="4081";"subkey"="816";"value"="33929"};
+{"key"="4082";"subkey"="816";"value"="33939"};
+{"key"="4083";"subkey"="816";"value"="33948"};
+{"key"="4084";"subkey"="816";"value"="33957"};
+{"key"="4085";"subkey"="817";"value"="33967"};
+{"key"="4086";"subkey"="817";"value"="33976"};
+{"key"="4087";"subkey"="817";"value"="33985"};
+{"key"="4088";"subkey"="817";"value"="33995"};
+{"key"="4089";"subkey"="817";"value"="34004"};
+{"key"="4090";"subkey"="818";"value"="34013"};
+{"key"="4091";"subkey"="818";"value"="34022"};
+{"key"="4092";"subkey"="818";"value"="34032"};
+{"key"="4093";"subkey"="818";"value"="34041"};
+{"key"="4094";"subkey"="818";"value"="34050"};
+{"key"="4095";"subkey"="819";"value"="34060"};
+{"key"="4096";"subkey"="819";"value"="34069"};
+{"key"="4097";"subkey"="819";"value"="34078"};
+{"key"="4098";"subkey"="819";"value"="34088"};
+{"key"="4099";"subkey"="819";"value"="34097"};
+{"key"="4100";"subkey"="820";"value"="34106"};
+{"key"="4101";"subkey"="820";"value"="34116"};
+{"key"="4102";"subkey"="820";"value"="34125"};
+{"key"="4103";"subkey"="820";"value"="34134"};
+{"key"="4104";"subkey"="820";"value"="34144"};
+{"key"="4105";"subkey"="821";"value"="34153"};
+{"key"="4106";"subkey"="821";"value"="34162"};
+{"key"="4107";"subkey"="821";"value"="34172"};
+{"key"="4108";"subkey"="821";"value"="34181"};
+{"key"="4109";"subkey"="821";"value"="34190"};
+{"key"="4110";"subkey"="822";"value"="34200"};
+{"key"="4111";"subkey"="822";"value"="34209"};
+{"key"="4112";"subkey"="822";"value"="34218"};
+{"key"="4113";"subkey"="822";"value"="34228"};
+{"key"="4114";"subkey"="822";"value"="34237"};
+{"key"="4115";"subkey"="823";"value"="34246"};
+{"key"="4116";"subkey"="823";"value"="34255"};
+{"key"="4117";"subkey"="823";"value"="34265"};
+{"key"="4118";"subkey"="823";"value"="34274"};
+{"key"="4119";"subkey"="823";"value"="34283"};
+{"key"="4120";"subkey"="824";"value"="34293"};
+{"key"="4121";"subkey"="824";"value"="34302"};
+{"key"="4122";"subkey"="824";"value"="34311"};
+{"key"="4123";"subkey"="824";"value"="34321"};
+{"key"="4124";"subkey"="824";"value"="34330"};
+{"key"="4125";"subkey"="825";"value"="34339"};
+{"key"="4126";"subkey"="825";"value"="34349"};
+{"key"="4127";"subkey"="825";"value"="34358"};
+{"key"="4128";"subkey"="825";"value"="34367"};
+{"key"="4129";"subkey"="825";"value"="34377"};
+{"key"="4130";"subkey"="826";"value"="34386"};
+{"key"="4131";"subkey"="826";"value"="34395"};
+{"key"="4132";"subkey"="826";"value"="34405"};
+{"key"="4133";"subkey"="826";"value"="34414"};
+{"key"="4134";"subkey"="826";"value"="34423"};
+{"key"="4135";"subkey"="827";"value"="34433"};
+{"key"="4136";"subkey"="827";"value"="34442"};
+{"key"="4137";"subkey"="827";"value"="34451"};
+{"key"="4138";"subkey"="827";"value"="34461"};
+{"key"="4139";"subkey"="827";"value"="34470"};
+{"key"="4140";"subkey"="828";"value"="34479"};
+{"key"="4141";"subkey"="828";"value"="34489"};
+{"key"="4142";"subkey"="828";"value"="34498"};
+{"key"="4143";"subkey"="828";"value"="34507"};
+{"key"="4144";"subkey"="828";"value"="34517"};
+{"key"="4145";"subkey"="829";"value"="34526"};
+{"key"="4146";"subkey"="829";"value"="34535"};
+{"key"="4147";"subkey"="829";"value"="34545"};
+{"key"="4148";"subkey"="829";"value"="34554"};
+{"key"="4149";"subkey"="829";"value"="34563"};
+{"key"="4150";"subkey"="830";"value"="34573"};
+{"key"="4151";"subkey"="830";"value"="34582"};
+{"key"="4152";"subkey"="830";"value"="34591"};
+{"key"="4153";"subkey"="830";"value"="34601"};
+{"key"="4154";"subkey"="830";"value"="34610"};
+{"key"="4155";"subkey"="831";"value"="34619"};
+{"key"="4156";"subkey"="831";"value"="34629"};
+{"key"="4157";"subkey"="831";"value"="34638"};
+{"key"="4158";"subkey"="831";"value"="34647"};
+{"key"="4159";"subkey"="831";"value"="34657"};
+{"key"="4160";"subkey"="832";"value"="34666"};
+{"key"="4161";"subkey"="832";"value"="34675"};
+{"key"="4162";"subkey"="832";"value"="34685"};
+{"key"="4163";"subkey"="832";"value"="34694"};
+{"key"="4164";"subkey"="832";"value"="34703"};
+{"key"="4165";"subkey"="833";"value"="34713"};
+{"key"="4166";"subkey"="833";"value"="34722"};
+{"key"="4167";"subkey"="833";"value"="34731"};
+{"key"="4168";"subkey"="833";"value"="34741"};
+{"key"="4169";"subkey"="833";"value"="34750"};
+{"key"="4170";"subkey"="834";"value"="34759"};
+{"key"="4171";"subkey"="834";"value"="34769"};
+{"key"="4172";"subkey"="834";"value"="34778"};
+{"key"="4173";"subkey"="834";"value"="34787"};
+{"key"="4174";"subkey"="834";"value"="34797"};
+{"key"="4175";"subkey"="835";"value"="34806"};
+{"key"="4176";"subkey"="835";"value"="34815"};
+{"key"="4177";"subkey"="835";"value"="34825"};
+{"key"="4178";"subkey"="835";"value"="34834"};
+{"key"="4179";"subkey"="835";"value"="34843"};
+{"key"="4180";"subkey"="836";"value"="34853"};
+{"key"="4181";"subkey"="836";"value"="34862"};
+{"key"="4182";"subkey"="836";"value"="34871"};
+{"key"="4183";"subkey"="836";"value"="34881"};
+{"key"="4184";"subkey"="836";"value"="34890"};
+{"key"="4185";"subkey"="837";"value"="34899"};
+{"key"="4186";"subkey"="837";"value"="34909"};
+{"key"="4187";"subkey"="837";"value"="34918"};
+{"key"="4188";"subkey"="837";"value"="34927"};
+{"key"="4189";"subkey"="837";"value"="34937"};
+{"key"="4190";"subkey"="838";"value"="34946"};
+{"key"="4191";"subkey"="838";"value"="34955"};
+{"key"="4192";"subkey"="838";"value"="34965"};
+{"key"="4193";"subkey"="838";"value"="34974"};
+{"key"="4194";"subkey"="838";"value"="34983"};
+{"key"="4195";"subkey"="839";"value"="34993"};
+{"key"="4196";"subkey"="839";"value"="35002"};
+{"key"="4197";"subkey"="839";"value"="35011"};
+{"key"="4198";"subkey"="839";"value"="35021"};
+{"key"="4199";"subkey"="839";"value"="35030"};
+{"key"="4200";"subkey"="840";"value"="35039"};
+{"key"="4201";"subkey"="840";"value"="35049"};
+{"key"="4202";"subkey"="840";"value"="35058"};
+{"key"="4203";"subkey"="840";"value"="35067"};
+{"key"="4204";"subkey"="840";"value"="35077"};
+{"key"="4205";"subkey"="841";"value"="35086"};
+{"key"="4206";"subkey"="841";"value"="35095"};
+{"key"="4207";"subkey"="841";"value"="35105"};
+{"key"="4208";"subkey"="841";"value"="35114"};
+{"key"="4209";"subkey"="841";"value"="35124"};
+{"key"="4210";"subkey"="842";"value"="35133"};
+{"key"="4211";"subkey"="842";"value"="35142"};
+{"key"="4212";"subkey"="842";"value"="35152"};
+{"key"="4213";"subkey"="842";"value"="35161"};
+{"key"="4214";"subkey"="842";"value"="35170"};
+{"key"="4215";"subkey"="843";"value"="35180"};
+{"key"="4216";"subkey"="843";"value"="35189"};
+{"key"="4217";"subkey"="843";"value"="35198"};
+{"key"="4218";"subkey"="843";"value"="35208"};
+{"key"="4219";"subkey"="843";"value"="35217"};
+{"key"="4220";"subkey"="844";"value"="35226"};
+{"key"="4221";"subkey"="844";"value"="35236"};
+{"key"="4222";"subkey"="844";"value"="35245"};
+{"key"="4223";"subkey"="844";"value"="35254"};
+{"key"="4224";"subkey"="844";"value"="35264"};
+{"key"="4225";"subkey"="845";"value"="35273"};
+{"key"="4226";"subkey"="845";"value"="35282"};
+{"key"="4227";"subkey"="845";"value"="35292"};
+{"key"="4228";"subkey"="845";"value"="35301"};
+{"key"="4229";"subkey"="845";"value"="35310"};
+{"key"="4230";"subkey"="846";"value"="35320"};
+{"key"="4231";"subkey"="846";"value"="35329"};
+{"key"="4232";"subkey"="846";"value"="35339"};
+{"key"="4233";"subkey"="846";"value"="35348"};
+{"key"="4234";"subkey"="846";"value"="35357"};
+{"key"="4235";"subkey"="847";"value"="35367"};
+{"key"="4236";"subkey"="847";"value"="35376"};
+{"key"="4237";"subkey"="847";"value"="35385"};
+{"key"="4238";"subkey"="847";"value"="35395"};
+{"key"="4239";"subkey"="847";"value"="35404"};
+{"key"="4240";"subkey"="848";"value"="35413"};
+{"key"="4241";"subkey"="848";"value"="35423"};
+{"key"="4242";"subkey"="848";"value"="35432"};
+{"key"="4243";"subkey"="848";"value"="35441"};
+{"key"="4244";"subkey"="848";"value"="35451"};
+{"key"="4245";"subkey"="849";"value"="35460"};
+{"key"="4246";"subkey"="849";"value"="35469"};
+{"key"="4247";"subkey"="849";"value"="35479"};
+{"key"="4248";"subkey"="849";"value"="35488"};
+{"key"="4249";"subkey"="849";"value"="35498"};
+{"key"="4250";"subkey"="850";"value"="35507"};
+{"key"="4251";"subkey"="850";"value"="35516"};
+{"key"="4252";"subkey"="850";"value"="35526"};
+{"key"="4253";"subkey"="850";"value"="35535"};
+{"key"="4254";"subkey"="850";"value"="35544"};
+{"key"="4255";"subkey"="851";"value"="35554"};
+{"key"="4256";"subkey"="851";"value"="35563"};
+{"key"="4257";"subkey"="851";"value"="35572"};
+{"key"="4258";"subkey"="851";"value"="35582"};
+{"key"="4259";"subkey"="851";"value"="35591"};
+{"key"="4260";"subkey"="852";"value"="35600"};
+{"key"="4261";"subkey"="852";"value"="35610"};
+{"key"="4262";"subkey"="852";"value"="35619"};
+{"key"="4263";"subkey"="852";"value"="35628"};
+{"key"="4264";"subkey"="852";"value"="35638"};
+{"key"="4265";"subkey"="853";"value"="35647"};
+{"key"="4266";"subkey"="853";"value"="35657"};
+{"key"="4267";"subkey"="853";"value"="35666"};
+{"key"="4268";"subkey"="853";"value"="35675"};
+{"key"="4269";"subkey"="853";"value"="35685"};
+{"key"="4270";"subkey"="854";"value"="35694"};
+{"key"="4271";"subkey"="854";"value"="35703"};
+{"key"="4272";"subkey"="854";"value"="35713"};
+{"key"="4273";"subkey"="854";"value"="35722"};
+{"key"="4274";"subkey"="854";"value"="35731"};
+{"key"="4275";"subkey"="855";"value"="35741"};
+{"key"="4276";"subkey"="855";"value"="35750"};
+{"key"="4277";"subkey"="855";"value"="35760"};
+{"key"="4278";"subkey"="855";"value"="35769"};
+{"key"="4279";"subkey"="855";"value"="35778"};
+{"key"="4280";"subkey"="856";"value"="35788"};
+{"key"="4281";"subkey"="856";"value"="35797"};
+{"key"="4282";"subkey"="856";"value"="35806"};
+{"key"="4283";"subkey"="856";"value"="35816"};
+{"key"="4284";"subkey"="856";"value"="35825"};
+{"key"="4285";"subkey"="857";"value"="35834"};
+{"key"="4286";"subkey"="857";"value"="35844"};
+{"key"="4287";"subkey"="857";"value"="35853"};
+{"key"="4288";"subkey"="857";"value"="35863"};
+{"key"="4289";"subkey"="857";"value"="35872"};
+{"key"="4290";"subkey"="858";"value"="35881"};
+{"key"="4291";"subkey"="858";"value"="35891"};
+{"key"="4292";"subkey"="858";"value"="35900"};
+{"key"="4293";"subkey"="858";"value"="35909"};
+{"key"="4294";"subkey"="858";"value"="35919"};
+{"key"="4295";"subkey"="859";"value"="35928"};
+{"key"="4296";"subkey"="859";"value"="35937"};
+{"key"="4297";"subkey"="859";"value"="35947"};
+{"key"="4298";"subkey"="859";"value"="35956"};
+{"key"="4299";"subkey"="859";"value"="35966"};
+{"key"="4300";"subkey"="860";"value"="35975"};
+{"key"="4301";"subkey"="860";"value"="35984"};
+{"key"="4302";"subkey"="860";"value"="35994"};
+{"key"="4303";"subkey"="860";"value"="36003"};
+{"key"="4304";"subkey"="860";"value"="36012"};
+{"key"="4305";"subkey"="861";"value"="36022"};
+{"key"="4306";"subkey"="861";"value"="36031"};
+{"key"="4307";"subkey"="861";"value"="36040"};
+{"key"="4308";"subkey"="861";"value"="36050"};
+{"key"="4309";"subkey"="861";"value"="36059"};
+{"key"="4310";"subkey"="862";"value"="36069"};
+{"key"="4311";"subkey"="862";"value"="36078"};
+{"key"="4312";"subkey"="862";"value"="36087"};
+{"key"="4313";"subkey"="862";"value"="36097"};
+{"key"="4314";"subkey"="862";"value"="36106"};
+{"key"="4315";"subkey"="863";"value"="36115"};
+{"key"="4316";"subkey"="863";"value"="36125"};
+{"key"="4317";"subkey"="863";"value"="36134"};
+{"key"="4318";"subkey"="863";"value"="36144"};
+{"key"="4319";"subkey"="863";"value"="36153"};
+{"key"="4320";"subkey"="864";"value"="36162"};
+{"key"="4321";"subkey"="864";"value"="36172"};
+{"key"="4322";"subkey"="864";"value"="36181"};
+{"key"="4323";"subkey"="864";"value"="36190"};
+{"key"="4324";"subkey"="864";"value"="36200"};
+{"key"="4325";"subkey"="865";"value"="36209"};
+{"key"="4326";"subkey"="865";"value"="36218"};
+{"key"="4327";"subkey"="865";"value"="36228"};
+{"key"="4328";"subkey"="865";"value"="36237"};
+{"key"="4329";"subkey"="865";"value"="36247"};
+{"key"="4330";"subkey"="866";"value"="36256"};
+{"key"="4331";"subkey"="866";"value"="36265"};
+{"key"="4332";"subkey"="866";"value"="36275"};
+{"key"="4333";"subkey"="866";"value"="36284"};
+{"key"="4334";"subkey"="866";"value"="36293"};
+{"key"="4335";"subkey"="867";"value"="36303"};
+{"key"="4336";"subkey"="867";"value"="36312"};
+{"key"="4337";"subkey"="867";"value"="36322"};
+{"key"="4338";"subkey"="867";"value"="36331"};
+{"key"="4339";"subkey"="867";"value"="36340"};
+{"key"="4340";"subkey"="868";"value"="36350"};
+{"key"="4341";"subkey"="868";"value"="36359"};
+{"key"="4342";"subkey"="868";"value"="36368"};
+{"key"="4343";"subkey"="868";"value"="36378"};
+{"key"="4344";"subkey"="868";"value"="36387"};
+{"key"="4345";"subkey"="869";"value"="36397"};
+{"key"="4346";"subkey"="869";"value"="36406"};
+{"key"="4347";"subkey"="869";"value"="36415"};
+{"key"="4348";"subkey"="869";"value"="36425"};
+{"key"="4349";"subkey"="869";"value"="36434"};
+{"key"="4350";"subkey"="870";"value"="36444"};
+{"key"="4351";"subkey"="870";"value"="36453"};
+{"key"="4352";"subkey"="870";"value"="36462"};
+{"key"="4353";"subkey"="870";"value"="36472"};
+{"key"="4354";"subkey"="870";"value"="36481"};
+{"key"="4355";"subkey"="871";"value"="36490"};
+{"key"="4356";"subkey"="871";"value"="36500"};
+{"key"="4357";"subkey"="871";"value"="36509"};
+{"key"="4358";"subkey"="871";"value"="36519"};
+{"key"="4359";"subkey"="871";"value"="36528"};
+{"key"="4360";"subkey"="872";"value"="36537"};
+{"key"="4361";"subkey"="872";"value"="36547"};
+{"key"="4362";"subkey"="872";"value"="36556"};
+{"key"="4363";"subkey"="872";"value"="36565"};
+{"key"="4364";"subkey"="872";"value"="36575"};
+{"key"="4365";"subkey"="873";"value"="36584"};
+{"key"="4366";"subkey"="873";"value"="36594"};
+{"key"="4367";"subkey"="873";"value"="36603"};
+{"key"="4368";"subkey"="873";"value"="36612"};
+{"key"="4369";"subkey"="873";"value"="36622"};
+{"key"="4370";"subkey"="874";"value"="36631"};
+{"key"="4371";"subkey"="874";"value"="36640"};
+{"key"="4372";"subkey"="874";"value"="36650"};
+{"key"="4373";"subkey"="874";"value"="36659"};
+{"key"="4374";"subkey"="874";"value"="36669"};
+{"key"="4375";"subkey"="875";"value"="36678"};
+{"key"="4376";"subkey"="875";"value"="36687"};
+{"key"="4377";"subkey"="875";"value"="36697"};
+{"key"="4378";"subkey"="875";"value"="36706"};
+{"key"="4379";"subkey"="875";"value"="36716"};
+{"key"="4380";"subkey"="876";"value"="36725"};
+{"key"="4381";"subkey"="876";"value"="36734"};
+{"key"="4382";"subkey"="876";"value"="36744"};
+{"key"="4383";"subkey"="876";"value"="36753"};
+{"key"="4384";"subkey"="876";"value"="36762"};
+{"key"="4385";"subkey"="877";"value"="36772"};
+{"key"="4386";"subkey"="877";"value"="36781"};
+{"key"="4387";"subkey"="877";"value"="36791"};
+{"key"="4388";"subkey"="877";"value"="36800"};
+{"key"="4389";"subkey"="877";"value"="36809"};
+{"key"="4390";"subkey"="878";"value"="36819"};
+{"key"="4391";"subkey"="878";"value"="36828"};
+{"key"="4392";"subkey"="878";"value"="36838"};
+{"key"="4393";"subkey"="878";"value"="36847"};
+{"key"="4394";"subkey"="878";"value"="36856"};
+{"key"="4395";"subkey"="879";"value"="36866"};
+{"key"="4396";"subkey"="879";"value"="36875"};
+{"key"="4397";"subkey"="879";"value"="36885"};
+{"key"="4398";"subkey"="879";"value"="36894"};
+{"key"="4399";"subkey"="879";"value"="36903"};
+{"key"="4400";"subkey"="880";"value"="36913"};
+{"key"="4401";"subkey"="880";"value"="36922"};
+{"key"="4402";"subkey"="880";"value"="36931"};
+{"key"="4403";"subkey"="880";"value"="36941"};
+{"key"="4404";"subkey"="880";"value"="36950"};
+{"key"="4405";"subkey"="881";"value"="36960"};
+{"key"="4406";"subkey"="881";"value"="36969"};
+{"key"="4407";"subkey"="881";"value"="36978"};
+{"key"="4408";"subkey"="881";"value"="36988"};
+{"key"="4409";"subkey"="881";"value"="36997"};
+{"key"="4410";"subkey"="882";"value"="37007"};
+{"key"="4411";"subkey"="882";"value"="37016"};
+{"key"="4412";"subkey"="882";"value"="37025"};
+{"key"="4413";"subkey"="882";"value"="37035"};
+{"key"="4414";"subkey"="882";"value"="37044"};
+{"key"="4415";"subkey"="883";"value"="37054"};
+{"key"="4416";"subkey"="883";"value"="37063"};
+{"key"="4417";"subkey"="883";"value"="37072"};
+{"key"="4418";"subkey"="883";"value"="37082"};
+{"key"="4419";"subkey"="883";"value"="37091"};
+{"key"="4420";"subkey"="884";"value"="37101"};
+{"key"="4421";"subkey"="884";"value"="37110"};
+{"key"="4422";"subkey"="884";"value"="37119"};
+{"key"="4423";"subkey"="884";"value"="37129"};
+{"key"="4424";"subkey"="884";"value"="37138"};
+{"key"="4425";"subkey"="885";"value"="37147"};
+{"key"="4426";"subkey"="885";"value"="37157"};
+{"key"="4427";"subkey"="885";"value"="37166"};
+{"key"="4428";"subkey"="885";"value"="37176"};
+{"key"="4429";"subkey"="885";"value"="37185"};
+{"key"="4430";"subkey"="886";"value"="37194"};
+{"key"="4431";"subkey"="886";"value"="37204"};
+{"key"="4432";"subkey"="886";"value"="37213"};
+{"key"="4433";"subkey"="886";"value"="37223"};
+{"key"="4434";"subkey"="886";"value"="37232"};
+{"key"="4435";"subkey"="887";"value"="37241"};
+{"key"="4436";"subkey"="887";"value"="37251"};
+{"key"="4437";"subkey"="887";"value"="37260"};
+{"key"="4438";"subkey"="887";"value"="37270"};
+{"key"="4439";"subkey"="887";"value"="37279"};
+{"key"="4440";"subkey"="888";"value"="37288"};
+{"key"="4441";"subkey"="888";"value"="37298"};
+{"key"="4442";"subkey"="888";"value"="37307"};
+{"key"="4443";"subkey"="888";"value"="37317"};
+{"key"="4444";"subkey"="888";"value"="37326"};
+{"key"="4445";"subkey"="889";"value"="37335"};
+{"key"="4446";"subkey"="889";"value"="37345"};
+{"key"="4447";"subkey"="889";"value"="37354"};
+{"key"="4448";"subkey"="889";"value"="37364"};
+{"key"="4449";"subkey"="889";"value"="37373"};
+{"key"="4450";"subkey"="890";"value"="37382"};
+{"key"="4451";"subkey"="890";"value"="37392"};
+{"key"="4452";"subkey"="890";"value"="37401"};
+{"key"="4453";"subkey"="890";"value"="37411"};
+{"key"="4454";"subkey"="890";"value"="37420"};
+{"key"="4455";"subkey"="891";"value"="37429"};
+{"key"="4456";"subkey"="891";"value"="37439"};
+{"key"="4457";"subkey"="891";"value"="37448"};
+{"key"="4458";"subkey"="891";"value"="37458"};
+{"key"="4459";"subkey"="891";"value"="37467"};
+{"key"="4460";"subkey"="892";"value"="37476"};
+{"key"="4461";"subkey"="892";"value"="37486"};
+{"key"="4462";"subkey"="892";"value"="37495"};
+{"key"="4463";"subkey"="892";"value"="37505"};
+{"key"="4464";"subkey"="892";"value"="37514"};
+{"key"="4465";"subkey"="893";"value"="37523"};
+{"key"="4466";"subkey"="893";"value"="37533"};
+{"key"="4467";"subkey"="893";"value"="37542"};
+{"key"="4468";"subkey"="893";"value"="37552"};
+{"key"="4469";"subkey"="893";"value"="37561"};
+{"key"="4470";"subkey"="894";"value"="37570"};
+{"key"="4471";"subkey"="894";"value"="37580"};
+{"key"="4472";"subkey"="894";"value"="37589"};
+{"key"="4473";"subkey"="894";"value"="37599"};
+{"key"="4474";"subkey"="894";"value"="37608"};
+{"key"="4475";"subkey"="895";"value"="37618"};
+{"key"="4476";"subkey"="895";"value"="37627"};
+{"key"="4477";"subkey"="895";"value"="37636"};
+{"key"="4478";"subkey"="895";"value"="37646"};
+{"key"="4479";"subkey"="895";"value"="37655"};
+{"key"="4480";"subkey"="896";"value"="37665"};
+{"key"="4481";"subkey"="896";"value"="37674"};
+{"key"="4482";"subkey"="896";"value"="37683"};
+{"key"="4483";"subkey"="896";"value"="37693"};
+{"key"="4484";"subkey"="896";"value"="37702"};
+{"key"="4485";"subkey"="897";"value"="37712"};
+{"key"="4486";"subkey"="897";"value"="37721"};
+{"key"="4487";"subkey"="897";"value"="37730"};
+{"key"="4488";"subkey"="897";"value"="37740"};
+{"key"="4489";"subkey"="897";"value"="37749"};
+{"key"="4490";"subkey"="898";"value"="37759"};
+{"key"="4491";"subkey"="898";"value"="37768"};
+{"key"="4492";"subkey"="898";"value"="37777"};
+{"key"="4493";"subkey"="898";"value"="37787"};
+{"key"="4494";"subkey"="898";"value"="37796"};
+{"key"="4495";"subkey"="899";"value"="37806"};
+{"key"="4496";"subkey"="899";"value"="37815"};
+{"key"="4497";"subkey"="899";"value"="37825"};
+{"key"="4498";"subkey"="899";"value"="37834"};
+{"key"="4499";"subkey"="899";"value"="37843"};
+{"key"="4500";"subkey"="900";"value"="37853"};
+{"key"="4501";"subkey"="900";"value"="37862"};
+{"key"="4502";"subkey"="900";"value"="37872"};
+{"key"="4503";"subkey"="900";"value"="37881"};
+{"key"="4504";"subkey"="900";"value"="37890"};
+{"key"="4505";"subkey"="901";"value"="37900"};
+{"key"="4506";"subkey"="901";"value"="37909"};
+{"key"="4507";"subkey"="901";"value"="37919"};
+{"key"="4508";"subkey"="901";"value"="37928"};
+{"key"="4509";"subkey"="901";"value"="37937"};
+{"key"="4510";"subkey"="902";"value"="37947"};
+{"key"="4511";"subkey"="902";"value"="37956"};
+{"key"="4512";"subkey"="902";"value"="37966"};
+{"key"="4513";"subkey"="902";"value"="37975"};
+{"key"="4514";"subkey"="902";"value"="37985"};
+{"key"="4515";"subkey"="903";"value"="37994"};
+{"key"="4516";"subkey"="903";"value"="38003"};
+{"key"="4517";"subkey"="903";"value"="38013"};
+{"key"="4518";"subkey"="903";"value"="38022"};
+{"key"="4519";"subkey"="903";"value"="38032"};
+{"key"="4520";"subkey"="904";"value"="38041"};
+{"key"="4521";"subkey"="904";"value"="38050"};
+{"key"="4522";"subkey"="904";"value"="38060"};
+{"key"="4523";"subkey"="904";"value"="38069"};
+{"key"="4524";"subkey"="904";"value"="38079"};
+{"key"="4525";"subkey"="905";"value"="38088"};
+{"key"="4526";"subkey"="905";"value"="38098"};
+{"key"="4527";"subkey"="905";"value"="38107"};
+{"key"="4528";"subkey"="905";"value"="38116"};
+{"key"="4529";"subkey"="905";"value"="38126"};
+{"key"="4530";"subkey"="906";"value"="38135"};
+{"key"="4531";"subkey"="906";"value"="38145"};
+{"key"="4532";"subkey"="906";"value"="38154"};
+{"key"="4533";"subkey"="906";"value"="38163"};
+{"key"="4534";"subkey"="906";"value"="38173"};
+{"key"="4535";"subkey"="907";"value"="38182"};
+{"key"="4536";"subkey"="907";"value"="38192"};
+{"key"="4537";"subkey"="907";"value"="38201"};
+{"key"="4538";"subkey"="907";"value"="38211"};
+{"key"="4539";"subkey"="907";"value"="38220"};
+{"key"="4540";"subkey"="908";"value"="38229"};
+{"key"="4541";"subkey"="908";"value"="38239"};
+{"key"="4542";"subkey"="908";"value"="38248"};
+{"key"="4543";"subkey"="908";"value"="38258"};
+{"key"="4544";"subkey"="908";"value"="38267"};
+{"key"="4545";"subkey"="909";"value"="38277"};
+{"key"="4546";"subkey"="909";"value"="38286"};
+{"key"="4547";"subkey"="909";"value"="38295"};
+{"key"="4548";"subkey"="909";"value"="38305"};
+{"key"="4549";"subkey"="909";"value"="38314"};
+{"key"="4550";"subkey"="910";"value"="38324"};
+{"key"="4551";"subkey"="910";"value"="38333"};
+{"key"="4552";"subkey"="910";"value"="38342"};
+{"key"="4553";"subkey"="910";"value"="38352"};
+{"key"="4554";"subkey"="910";"value"="38361"};
+{"key"="4555";"subkey"="911";"value"="38371"};
+{"key"="4556";"subkey"="911";"value"="38380"};
+{"key"="4557";"subkey"="911";"value"="38390"};
+{"key"="4558";"subkey"="911";"value"="38399"};
+{"key"="4559";"subkey"="911";"value"="38408"};
+{"key"="4560";"subkey"="912";"value"="38418"};
+{"key"="4561";"subkey"="912";"value"="38427"};
+{"key"="4562";"subkey"="912";"value"="38437"};
+{"key"="4563";"subkey"="912";"value"="38446"};
+{"key"="4564";"subkey"="912";"value"="38456"};
+{"key"="4565";"subkey"="913";"value"="38465"};
+{"key"="4566";"subkey"="913";"value"="38474"};
+{"key"="4567";"subkey"="913";"value"="38484"};
+{"key"="4568";"subkey"="913";"value"="38493"};
+{"key"="4569";"subkey"="913";"value"="38503"};
+{"key"="4570";"subkey"="914";"value"="38512"};
+{"key"="4571";"subkey"="914";"value"="38522"};
+{"key"="4572";"subkey"="914";"value"="38531"};
+{"key"="4573";"subkey"="914";"value"="38540"};
+{"key"="4574";"subkey"="914";"value"="38550"};
+{"key"="4575";"subkey"="915";"value"="38559"};
+{"key"="4576";"subkey"="915";"value"="38569"};
+{"key"="4577";"subkey"="915";"value"="38578"};
+{"key"="4578";"subkey"="915";"value"="38588"};
+{"key"="4579";"subkey"="915";"value"="38597"};
+{"key"="4580";"subkey"="916";"value"="38606"};
+{"key"="4581";"subkey"="916";"value"="38616"};
+{"key"="4582";"subkey"="916";"value"="38625"};
+{"key"="4583";"subkey"="916";"value"="38635"};
+{"key"="4584";"subkey"="916";"value"="38644"};
+{"key"="4585";"subkey"="917";"value"="38654"};
+{"key"="4586";"subkey"="917";"value"="38663"};
+{"key"="4587";"subkey"="917";"value"="38672"};
+{"key"="4588";"subkey"="917";"value"="38682"};
+{"key"="4589";"subkey"="917";"value"="38691"};
+{"key"="4590";"subkey"="918";"value"="38701"};
+{"key"="4591";"subkey"="918";"value"="38710"};
+{"key"="4592";"subkey"="918";"value"="38720"};
+{"key"="4593";"subkey"="918";"value"="38729"};
+{"key"="4594";"subkey"="918";"value"="38738"};
+{"key"="4595";"subkey"="919";"value"="38748"};
+{"key"="4596";"subkey"="919";"value"="38757"};
+{"key"="4597";"subkey"="919";"value"="38767"};
+{"key"="4598";"subkey"="919";"value"="38776"};
+{"key"="4599";"subkey"="919";"value"="38786"};
+{"key"="4600";"subkey"="920";"value"="38795"};
+{"key"="4601";"subkey"="920";"value"="38804"};
+{"key"="4602";"subkey"="920";"value"="38814"};
+{"key"="4603";"subkey"="920";"value"="38823"};
+{"key"="4604";"subkey"="920";"value"="38833"};
+{"key"="4605";"subkey"="921";"value"="38842"};
+{"key"="4606";"subkey"="921";"value"="38852"};
+{"key"="4607";"subkey"="921";"value"="38861"};
+{"key"="4608";"subkey"="921";"value"="38871"};
+{"key"="4609";"subkey"="921";"value"="38880"};
+{"key"="4610";"subkey"="922";"value"="38889"};
+{"key"="4611";"subkey"="922";"value"="38899"};
+{"key"="4612";"subkey"="922";"value"="38908"};
+{"key"="4613";"subkey"="922";"value"="38918"};
+{"key"="4614";"subkey"="922";"value"="38927"};
+{"key"="4615";"subkey"="923";"value"="38937"};
+{"key"="4616";"subkey"="923";"value"="38946"};
+{"key"="4617";"subkey"="923";"value"="38955"};
+{"key"="4618";"subkey"="923";"value"="38965"};
+{"key"="4619";"subkey"="923";"value"="38974"};
+{"key"="4620";"subkey"="924";"value"="38984"};
+{"key"="4621";"subkey"="924";"value"="38993"};
+{"key"="4622";"subkey"="924";"value"="39003"};
+{"key"="4623";"subkey"="924";"value"="39012"};
+{"key"="4624";"subkey"="924";"value"="39022"};
+{"key"="4625";"subkey"="925";"value"="39031"};
+{"key"="4626";"subkey"="925";"value"="39040"};
+{"key"="4627";"subkey"="925";"value"="39050"};
+{"key"="4628";"subkey"="925";"value"="39059"};
+{"key"="4629";"subkey"="925";"value"="39069"};
+{"key"="4630";"subkey"="926";"value"="39078"};
+{"key"="4631";"subkey"="926";"value"="39088"};
+{"key"="4632";"subkey"="926";"value"="39097"};
+{"key"="4633";"subkey"="926";"value"="39106"};
+{"key"="4634";"subkey"="926";"value"="39116"};
+{"key"="4635";"subkey"="927";"value"="39125"};
+{"key"="4636";"subkey"="927";"value"="39135"};
+{"key"="4637";"subkey"="927";"value"="39144"};
+{"key"="4638";"subkey"="927";"value"="39154"};
+{"key"="4639";"subkey"="927";"value"="39163"};
+{"key"="4640";"subkey"="928";"value"="39173"};
+{"key"="4641";"subkey"="928";"value"="39182"};
+{"key"="4642";"subkey"="928";"value"="39191"};
+{"key"="4643";"subkey"="928";"value"="39201"};
+{"key"="4644";"subkey"="928";"value"="39210"};
+{"key"="4645";"subkey"="929";"value"="39220"};
+{"key"="4646";"subkey"="929";"value"="39229"};
+{"key"="4647";"subkey"="929";"value"="39239"};
+{"key"="4648";"subkey"="929";"value"="39248"};
+{"key"="4649";"subkey"="929";"value"="39258"};
+{"key"="4650";"subkey"="930";"value"="39267"};
+{"key"="4651";"subkey"="930";"value"="39276"};
+{"key"="4652";"subkey"="930";"value"="39286"};
+{"key"="4653";"subkey"="930";"value"="39295"};
+{"key"="4654";"subkey"="930";"value"="39305"};
+{"key"="4655";"subkey"="931";"value"="39314"};
+{"key"="4656";"subkey"="931";"value"="39324"};
+{"key"="4657";"subkey"="931";"value"="39333"};
+{"key"="4658";"subkey"="931";"value"="39343"};
+{"key"="4659";"subkey"="931";"value"="39352"};
+{"key"="4660";"subkey"="932";"value"="39361"};
+{"key"="4661";"subkey"="932";"value"="39371"};
+{"key"="4662";"subkey"="932";"value"="39380"};
+{"key"="4663";"subkey"="932";"value"="39390"};
+{"key"="4664";"subkey"="932";"value"="39399"};
+{"key"="4665";"subkey"="933";"value"="39409"};
+{"key"="4666";"subkey"="933";"value"="39418"};
+{"key"="4667";"subkey"="933";"value"="39428"};
+{"key"="4668";"subkey"="933";"value"="39437"};
+{"key"="4669";"subkey"="933";"value"="39446"};
+{"key"="4670";"subkey"="934";"value"="39456"};
+{"key"="4671";"subkey"="934";"value"="39465"};
+{"key"="4672";"subkey"="934";"value"="39475"};
+{"key"="4673";"subkey"="934";"value"="39484"};
+{"key"="4674";"subkey"="934";"value"="39494"};
+{"key"="4675";"subkey"="935";"value"="39503"};
+{"key"="4676";"subkey"="935";"value"="39513"};
+{"key"="4677";"subkey"="935";"value"="39522"};
+{"key"="4678";"subkey"="935";"value"="39532"};
+{"key"="4679";"subkey"="935";"value"="39541"};
+{"key"="4680";"subkey"="936";"value"="39550"};
+{"key"="4681";"subkey"="936";"value"="39560"};
+{"key"="4682";"subkey"="936";"value"="39569"};
+{"key"="4683";"subkey"="936";"value"="39579"};
+{"key"="4684";"subkey"="936";"value"="39588"};
+{"key"="4685";"subkey"="937";"value"="39598"};
+{"key"="4686";"subkey"="937";"value"="39607"};
+{"key"="4687";"subkey"="937";"value"="39617"};
+{"key"="4688";"subkey"="937";"value"="39626"};
+{"key"="4689";"subkey"="937";"value"="39635"};
+{"key"="4690";"subkey"="938";"value"="39645"};
+{"key"="4691";"subkey"="938";"value"="39654"};
+{"key"="4692";"subkey"="938";"value"="39664"};
+{"key"="4693";"subkey"="938";"value"="39673"};
+{"key"="4694";"subkey"="938";"value"="39683"};
+{"key"="4695";"subkey"="939";"value"="39692"};
+{"key"="4696";"subkey"="939";"value"="39702"};
+{"key"="4697";"subkey"="939";"value"="39711"};
+{"key"="4698";"subkey"="939";"value"="39721"};
+{"key"="4699";"subkey"="939";"value"="39730"};
+{"key"="4700";"subkey"="940";"value"="39739"};
+{"key"="4701";"subkey"="940";"value"="39749"};
+{"key"="4702";"subkey"="940";"value"="39758"};
+{"key"="4703";"subkey"="940";"value"="39768"};
+{"key"="4704";"subkey"="940";"value"="39777"};
+{"key"="4705";"subkey"="941";"value"="39787"};
+{"key"="4706";"subkey"="941";"value"="39796"};
+{"key"="4707";"subkey"="941";"value"="39806"};
+{"key"="4708";"subkey"="941";"value"="39815"};
+{"key"="4709";"subkey"="941";"value"="39825"};
+{"key"="4710";"subkey"="942";"value"="39834"};
+{"key"="4711";"subkey"="942";"value"="39844"};
+{"key"="4712";"subkey"="942";"value"="39853"};
+{"key"="4713";"subkey"="942";"value"="39862"};
+{"key"="4714";"subkey"="942";"value"="39872"};
+{"key"="4715";"subkey"="943";"value"="39881"};
+{"key"="4716";"subkey"="943";"value"="39891"};
+{"key"="4717";"subkey"="943";"value"="39900"};
+{"key"="4718";"subkey"="943";"value"="39910"};
+{"key"="4719";"subkey"="943";"value"="39919"};
+{"key"="4720";"subkey"="944";"value"="39929"};
+{"key"="4721";"subkey"="944";"value"="39938"};
+{"key"="4722";"subkey"="944";"value"="39948"};
+{"key"="4723";"subkey"="944";"value"="39957"};
+{"key"="4724";"subkey"="944";"value"="39966"};
+{"key"="4725";"subkey"="945";"value"="39976"};
+{"key"="4726";"subkey"="945";"value"="39985"};
+{"key"="4727";"subkey"="945";"value"="39995"};
+{"key"="4728";"subkey"="945";"value"="40004"};
+{"key"="4729";"subkey"="945";"value"="40014"};
+{"key"="4730";"subkey"="946";"value"="40023"};
+{"key"="4731";"subkey"="946";"value"="40033"};
+{"key"="4732";"subkey"="946";"value"="40042"};
+{"key"="4733";"subkey"="946";"value"="40052"};
+{"key"="4734";"subkey"="946";"value"="40061"};
+{"key"="4735";"subkey"="947";"value"="40071"};
+{"key"="4736";"subkey"="947";"value"="40080"};
+{"key"="4737";"subkey"="947";"value"="40089"};
+{"key"="4738";"subkey"="947";"value"="40099"};
+{"key"="4739";"subkey"="947";"value"="40108"};
+{"key"="4740";"subkey"="948";"value"="40118"};
+{"key"="4741";"subkey"="948";"value"="40127"};
+{"key"="4742";"subkey"="948";"value"="40137"};
+{"key"="4743";"subkey"="948";"value"="40146"};
+{"key"="4744";"subkey"="948";"value"="40156"};
+{"key"="4745";"subkey"="949";"value"="40165"};
+{"key"="4746";"subkey"="949";"value"="40175"};
+{"key"="4747";"subkey"="949";"value"="40184"};
+{"key"="4748";"subkey"="949";"value"="40194"};
+{"key"="4749";"subkey"="949";"value"="40203"};
+{"key"="4750";"subkey"="950";"value"="40213"};
+{"key"="4751";"subkey"="950";"value"="40222"};
+{"key"="4752";"subkey"="950";"value"="40231"};
+{"key"="4753";"subkey"="950";"value"="40241"};
+{"key"="4754";"subkey"="950";"value"="40250"};
+{"key"="4755";"subkey"="951";"value"="40260"};
+{"key"="4756";"subkey"="951";"value"="40269"};
+{"key"="4757";"subkey"="951";"value"="40279"};
+{"key"="4758";"subkey"="951";"value"="40288"};
+{"key"="4759";"subkey"="951";"value"="40298"};
+{"key"="4760";"subkey"="952";"value"="40307"};
+{"key"="4761";"subkey"="952";"value"="40317"};
+{"key"="4762";"subkey"="952";"value"="40326"};
+{"key"="4763";"subkey"="952";"value"="40336"};
+{"key"="4764";"subkey"="952";"value"="40345"};
+{"key"="4765";"subkey"="953";"value"="40355"};
+{"key"="4766";"subkey"="953";"value"="40364"};
+{"key"="4767";"subkey"="953";"value"="40373"};
+{"key"="4768";"subkey"="953";"value"="40383"};
+{"key"="4769";"subkey"="953";"value"="40392"};
+{"key"="4770";"subkey"="954";"value"="40402"};
+{"key"="4771";"subkey"="954";"value"="40411"};
+{"key"="4772";"subkey"="954";"value"="40421"};
+{"key"="4773";"subkey"="954";"value"="40430"};
+{"key"="4774";"subkey"="954";"value"="40440"};
+{"key"="4775";"subkey"="955";"value"="40449"};
+{"key"="4776";"subkey"="955";"value"="40459"};
+{"key"="4777";"subkey"="955";"value"="40468"};
+{"key"="4778";"subkey"="955";"value"="40478"};
+{"key"="4779";"subkey"="955";"value"="40487"};
+{"key"="4780";"subkey"="956";"value"="40497"};
+{"key"="4781";"subkey"="956";"value"="40506"};
+{"key"="4782";"subkey"="956";"value"="40516"};
+{"key"="4783";"subkey"="956";"value"="40525"};
+{"key"="4784";"subkey"="956";"value"="40534"};
+{"key"="4785";"subkey"="957";"value"="40544"};
+{"key"="4786";"subkey"="957";"value"="40553"};
+{"key"="4787";"subkey"="957";"value"="40563"};
+{"key"="4788";"subkey"="957";"value"="40572"};
+{"key"="4789";"subkey"="957";"value"="40582"};
+{"key"="4790";"subkey"="958";"value"="40591"};
+{"key"="4791";"subkey"="958";"value"="40601"};
+{"key"="4792";"subkey"="958";"value"="40610"};
+{"key"="4793";"subkey"="958";"value"="40620"};
+{"key"="4794";"subkey"="958";"value"="40629"};
+{"key"="4795";"subkey"="959";"value"="40639"};
+{"key"="4796";"subkey"="959";"value"="40648"};
+{"key"="4797";"subkey"="959";"value"="40658"};
+{"key"="4798";"subkey"="959";"value"="40667"};
+{"key"="4799";"subkey"="959";"value"="40677"};
+{"key"="4800";"subkey"="960";"value"="40686"};
+{"key"="4801";"subkey"="960";"value"="40696"};
+{"key"="4802";"subkey"="960";"value"="40705"};
+{"key"="4803";"subkey"="960";"value"="40715"};
+{"key"="4804";"subkey"="960";"value"="40724"};
+{"key"="4805";"subkey"="961";"value"="40733"};
+{"key"="4806";"subkey"="961";"value"="40743"};
+{"key"="4807";"subkey"="961";"value"="40752"};
+{"key"="4808";"subkey"="961";"value"="40762"};
+{"key"="4809";"subkey"="961";"value"="40771"};
+{"key"="4810";"subkey"="962";"value"="40781"};
+{"key"="4811";"subkey"="962";"value"="40790"};
+{"key"="4812";"subkey"="962";"value"="40800"};
+{"key"="4813";"subkey"="962";"value"="40809"};
+{"key"="4814";"subkey"="962";"value"="40819"};
+{"key"="4815";"subkey"="963";"value"="40828"};
+{"key"="4816";"subkey"="963";"value"="40838"};
+{"key"="4817";"subkey"="963";"value"="40847"};
+{"key"="4818";"subkey"="963";"value"="40857"};
+{"key"="4819";"subkey"="963";"value"="40866"};
+{"key"="4820";"subkey"="964";"value"="40876"};
+{"key"="4821";"subkey"="964";"value"="40885"};
+{"key"="4822";"subkey"="964";"value"="40895"};
+{"key"="4823";"subkey"="964";"value"="40904"};
+{"key"="4824";"subkey"="964";"value"="40914"};
+{"key"="4825";"subkey"="965";"value"="40923"};
+{"key"="4826";"subkey"="965";"value"="40933"};
+{"key"="4827";"subkey"="965";"value"="40942"};
+{"key"="4828";"subkey"="965";"value"="40952"};
+{"key"="4829";"subkey"="965";"value"="40961"};
+{"key"="4830";"subkey"="966";"value"="40970"};
+{"key"="4831";"subkey"="966";"value"="40980"};
+{"key"="4832";"subkey"="966";"value"="40989"};
+{"key"="4833";"subkey"="966";"value"="40999"};
+{"key"="4834";"subkey"="966";"value"="41008"};
+{"key"="4835";"subkey"="967";"value"="41018"};
+{"key"="4836";"subkey"="967";"value"="41027"};
+{"key"="4837";"subkey"="967";"value"="41037"};
+{"key"="4838";"subkey"="967";"value"="41046"};
+{"key"="4839";"subkey"="967";"value"="41056"};
+{"key"="4840";"subkey"="968";"value"="41065"};
+{"key"="4841";"subkey"="968";"value"="41075"};
+{"key"="4842";"subkey"="968";"value"="41084"};
+{"key"="4843";"subkey"="968";"value"="41094"};
+{"key"="4844";"subkey"="968";"value"="41103"};
+{"key"="4845";"subkey"="969";"value"="41113"};
+{"key"="4846";"subkey"="969";"value"="41122"};
+{"key"="4847";"subkey"="969";"value"="41132"};
+{"key"="4848";"subkey"="969";"value"="41141"};
+{"key"="4849";"subkey"="969";"value"="41151"};
+{"key"="4850";"subkey"="970";"value"="41160"};
+{"key"="4851";"subkey"="970";"value"="41170"};
+{"key"="4852";"subkey"="970";"value"="41179"};
+{"key"="4853";"subkey"="970";"value"="41189"};
+{"key"="4854";"subkey"="970";"value"="41198"};
+{"key"="4855";"subkey"="971";"value"="41208"};
+{"key"="4856";"subkey"="971";"value"="41217"};
+{"key"="4857";"subkey"="971";"value"="41227"};
+{"key"="4858";"subkey"="971";"value"="41236"};
+{"key"="4859";"subkey"="971";"value"="41246"};
+{"key"="4860";"subkey"="972";"value"="41255"};
+{"key"="4861";"subkey"="972";"value"="41265"};
+{"key"="4862";"subkey"="972";"value"="41274"};
+{"key"="4863";"subkey"="972";"value"="41284"};
+{"key"="4864";"subkey"="972";"value"="41293"};
+{"key"="4865";"subkey"="973";"value"="41302"};
+{"key"="4866";"subkey"="973";"value"="41312"};
+{"key"="4867";"subkey"="973";"value"="41321"};
+{"key"="4868";"subkey"="973";"value"="41331"};
+{"key"="4869";"subkey"="973";"value"="41340"};
+{"key"="4870";"subkey"="974";"value"="41350"};
+{"key"="4871";"subkey"="974";"value"="41359"};
+{"key"="4872";"subkey"="974";"value"="41369"};
+{"key"="4873";"subkey"="974";"value"="41378"};
+{"key"="4874";"subkey"="974";"value"="41388"};
+{"key"="4875";"subkey"="975";"value"="41397"};
+{"key"="4876";"subkey"="975";"value"="41407"};
+{"key"="4877";"subkey"="975";"value"="41416"};
+{"key"="4878";"subkey"="975";"value"="41426"};
+{"key"="4879";"subkey"="975";"value"="41435"};
+{"key"="4880";"subkey"="976";"value"="41445"};
+{"key"="4881";"subkey"="976";"value"="41454"};
+{"key"="4882";"subkey"="976";"value"="41464"};
+{"key"="4883";"subkey"="976";"value"="41473"};
+{"key"="4884";"subkey"="976";"value"="41483"};
+{"key"="4885";"subkey"="977";"value"="41492"};
+{"key"="4886";"subkey"="977";"value"="41502"};
+{"key"="4887";"subkey"="977";"value"="41511"};
+{"key"="4888";"subkey"="977";"value"="41521"};
+{"key"="4889";"subkey"="977";"value"="41530"};
+{"key"="4890";"subkey"="978";"value"="41540"};
+{"key"="4891";"subkey"="978";"value"="41549"};
+{"key"="4892";"subkey"="978";"value"="41559"};
+{"key"="4893";"subkey"="978";"value"="41568"};
+{"key"="4894";"subkey"="978";"value"="41578"};
+{"key"="4895";"subkey"="979";"value"="41587"};
+{"key"="4896";"subkey"="979";"value"="41597"};
+{"key"="4897";"subkey"="979";"value"="41606"};
+{"key"="4898";"subkey"="979";"value"="41616"};
+{"key"="4899";"subkey"="979";"value"="41625"};
+{"key"="4900";"subkey"="980";"value"="41635"};
+{"key"="4901";"subkey"="980";"value"="41644"};
+{"key"="4902";"subkey"="980";"value"="41654"};
+{"key"="4903";"subkey"="980";"value"="41663"};
+{"key"="4904";"subkey"="980";"value"="41673"};
+{"key"="4905";"subkey"="981";"value"="41682"};
+{"key"="4906";"subkey"="981";"value"="41692"};
+{"key"="4907";"subkey"="981";"value"="41701"};
+{"key"="4908";"subkey"="981";"value"="41711"};
+{"key"="4909";"subkey"="981";"value"="41720"};
+{"key"="4910";"subkey"="982";"value"="41730"};
+{"key"="4911";"subkey"="982";"value"="41739"};
+{"key"="4912";"subkey"="982";"value"="41749"};
+{"key"="4913";"subkey"="982";"value"="41758"};
+{"key"="4914";"subkey"="982";"value"="41768"};
+{"key"="4915";"subkey"="983";"value"="41777"};
+{"key"="4916";"subkey"="983";"value"="41787"};
+{"key"="4917";"subkey"="983";"value"="41796"};
+{"key"="4918";"subkey"="983";"value"="41806"};
+{"key"="4919";"subkey"="983";"value"="41815"};
+{"key"="4920";"subkey"="984";"value"="41825"};
+{"key"="4921";"subkey"="984";"value"="41834"};
+{"key"="4922";"subkey"="984";"value"="41844"};
+{"key"="4923";"subkey"="984";"value"="41853"};
+{"key"="4924";"subkey"="984";"value"="41863"};
+{"key"="4925";"subkey"="985";"value"="41872"};
+{"key"="4926";"subkey"="985";"value"="41882"};
+{"key"="4927";"subkey"="985";"value"="41891"};
+{"key"="4928";"subkey"="985";"value"="41901"};
+{"key"="4929";"subkey"="985";"value"="41910"};
+{"key"="4930";"subkey"="986";"value"="41920"};
+{"key"="4931";"subkey"="986";"value"="41929"};
+{"key"="4932";"subkey"="986";"value"="41939"};
+{"key"="4933";"subkey"="986";"value"="41948"};
+{"key"="4934";"subkey"="986";"value"="41958"};
+{"key"="4935";"subkey"="987";"value"="41967"};
+{"key"="4936";"subkey"="987";"value"="41977"};
+{"key"="4937";"subkey"="987";"value"="41986"};
+{"key"="4938";"subkey"="987";"value"="41996"};
+{"key"="4939";"subkey"="987";"value"="42005"};
+{"key"="4940";"subkey"="988";"value"="42015"};
+{"key"="4941";"subkey"="988";"value"="42024"};
+{"key"="4942";"subkey"="988";"value"="42034"};
+{"key"="4943";"subkey"="988";"value"="42043"};
+{"key"="4944";"subkey"="988";"value"="42053"};
+{"key"="4945";"subkey"="989";"value"="42062"};
+{"key"="4946";"subkey"="989";"value"="42072"};
+{"key"="4947";"subkey"="989";"value"="42081"};
+{"key"="4948";"subkey"="989";"value"="42091"};
+{"key"="4949";"subkey"="989";"value"="42100"};
+{"key"="4950";"subkey"="990";"value"="42110"};
+{"key"="4951";"subkey"="990";"value"="42119"};
+{"key"="4952";"subkey"="990";"value"="42129"};
+{"key"="4953";"subkey"="990";"value"="42138"};
+{"key"="4954";"subkey"="990";"value"="42148"};
+{"key"="4955";"subkey"="991";"value"="42157"};
+{"key"="4956";"subkey"="991";"value"="42167"};
+{"key"="4957";"subkey"="991";"value"="42176"};
+{"key"="4958";"subkey"="991";"value"="42186"};
+{"key"="4959";"subkey"="991";"value"="42195"};
+{"key"="4960";"subkey"="992";"value"="42205"};
+{"key"="4961";"subkey"="992";"value"="42214"};
+{"key"="4962";"subkey"="992";"value"="42224"};
+{"key"="4963";"subkey"="992";"value"="42233"};
+{"key"="4964";"subkey"="992";"value"="42243"};
+{"key"="4965";"subkey"="993";"value"="42252"};
+{"key"="4966";"subkey"="993";"value"="42262"};
+{"key"="4967";"subkey"="993";"value"="42272"};
+{"key"="4968";"subkey"="993";"value"="42281"};
+{"key"="4969";"subkey"="993";"value"="42291"};
+{"key"="4970";"subkey"="994";"value"="42300"};
+{"key"="4971";"subkey"="994";"value"="42310"};
+{"key"="4972";"subkey"="994";"value"="42319"};
+{"key"="4973";"subkey"="994";"value"="42329"};
+{"key"="4974";"subkey"="994";"value"="42338"};
+{"key"="4975";"subkey"="995";"value"="42348"};
+{"key"="4976";"subkey"="995";"value"="42357"};
+{"key"="4977";"subkey"="995";"value"="42367"};
+{"key"="4978";"subkey"="995";"value"="42376"};
+{"key"="4979";"subkey"="995";"value"="42386"};
+{"key"="4980";"subkey"="996";"value"="42395"};
+{"key"="4981";"subkey"="996";"value"="42405"};
+{"key"="4982";"subkey"="996";"value"="42414"};
+{"key"="4983";"subkey"="996";"value"="42424"};
+{"key"="4984";"subkey"="996";"value"="42433"};
+{"key"="4985";"subkey"="997";"value"="42443"};
+{"key"="4986";"subkey"="997";"value"="42452"};
+{"key"="4987";"subkey"="997";"value"="42462"};
+{"key"="4988";"subkey"="997";"value"="42471"};
+{"key"="4989";"subkey"="997";"value"="42481"};
+{"key"="4990";"subkey"="998";"value"="42490"};
+{"key"="4991";"subkey"="998";"value"="42500"};
+{"key"="4992";"subkey"="998";"value"="42509"};
+{"key"="4993";"subkey"="998";"value"="42519"};
+{"key"="4994";"subkey"="998";"value"="42528"};
+{"key"="4995";"subkey"="999";"value"="42538"};
+{"key"="4996";"subkey"="999";"value"="42547"};
+{"key"="4997";"subkey"="999";"value"="42557"};
+{"key"="4998";"subkey"="999";"value"="42566"};
+{"key"="4999";"subkey"="999";"value"="42576"};
diff --git a/yql/essentials/udfs/common/hyperloglog/test/cases/Basic.sql b/yql/essentials/udfs/common/hyperloglog/test/cases/Basic.sql
new file mode 100644
index 00000000000..b092c781271
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperloglog/test/cases/Basic.sql
@@ -0,0 +1,7 @@
+/* syntax version 1 */
+SELECT
+ HyperLogLog(key) AS str,
+ CountDistinctEstimate(CAST(subkey AS Double)) AS `double`,
+ HLL(CAST(value AS Int64), 18) AS `int`
+FROM Input;
+
diff --git a/yql/essentials/udfs/common/hyperloglog/test/ya.make b/yql/essentials/udfs/common/hyperloglog/test/ya.make
new file mode 100644
index 00000000000..5eac077dcfa
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperloglog/test/ya.make
@@ -0,0 +1,16 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(
+ yql/essentials/udfs/common/hyperloglog
+ yql/essentials/udfs/common/digest
+)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/hyperloglog/ya.make b/yql/essentials/udfs/common/hyperloglog/ya.make
new file mode 100644
index 00000000000..b89a154d66b
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperloglog/ya.make
@@ -0,0 +1,32 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+
+ FROM_SANDBOX(
+ FILE 7319897411 OUT_NOAUTO libhyperloglog_udf.so
+ )
+
+ END()
+ELSE()
+YQL_UDF_CONTRIB(hyperloglog_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+ )
+
+ SRCS(
+ hyperloglog_udf.cpp
+ )
+
+ PEERDIR(
+ library/cpp/hyperloglog
+ )
+
+ END()
+
+ENDIF()
+
+RECURSE_FOR_TESTS(
+ test
+) \ No newline at end of file
diff --git a/yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp b/yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp
new file mode 100644
index 00000000000..6559e4a8425
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp
@@ -0,0 +1,477 @@
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_type_builder.h>
+#include <yql/essentials/public/udf/udf_registrator.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+#include <library/cpp/regex/hyperscan/hyperscan.h>
+#include <library/cpp/regex/pcre/regexp.h>
+
+#include <util/charset/utf8.h>
+#include <util/string/split.h>
+#include <util/string/builder.h>
+#include <util/system/cpu_id.h>
+
+using namespace NHyperscan;
+using namespace NKikimr;
+using namespace NUdf;
+
+namespace {
+ using TOptions = ui32;
+ class THyperscanUdfBase: public TBoxedValue {
+ protected:
+ constexpr static const char* IGNORE_CASE_PREFIX = "(?i)";
+ static void SetCommonOptions(TString& regex, TOptions& options) {
+ options |= HS_FLAG_ALLOWEMPTY;
+ if (regex.StartsWith(IGNORE_CASE_PREFIX)) {
+ options |= HS_FLAG_CASELESS;
+ regex = regex.substr(4);
+ }
+ if (UTF8Detect(regex) == UTF8) {
+ options |= HS_FLAG_UTF8;
+ }
+ if (NX86::HaveAVX2()) {
+ options |= HS_CPU_FEATURES_AVX2;
+ }
+ }
+ };
+
+ class THyperscanMatch: public THyperscanUdfBase {
+ public:
+ enum class EMode {
+ NORMAL,
+ BACKTRACKING,
+ MULTI
+ };
+
+ class TFactory: public THyperscanUdfBase {
+ public:
+ TFactory(
+ TSourcePosition pos,
+ bool surroundMode,
+ THyperscanMatch::EMode mode,
+ size_t regexpsCount = 0)
+ : Pos_(pos)
+ , SurroundMode(surroundMode)
+ , Mode(mode)
+ , RegexpsCount(regexpsCount)
+ {
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ return TUnboxedValuePod(
+ new THyperscanMatch(
+ valueBuilder,
+ args[0],
+ SurroundMode,
+ Mode,
+ Pos_,
+ RegexpsCount));
+ }
+
+ TSourcePosition Pos_;
+ bool SurroundMode;
+ THyperscanMatch::EMode Mode;
+ size_t RegexpsCount;
+ };
+
+ static const TStringRef& Name(bool isGrep, THyperscanMatch::EMode mode) {
+ static auto match = TStringRef::Of("Match");
+ static auto grep = TStringRef::Of("Grep");
+ static auto backtrackingMatch = TStringRef::Of("BacktrackingMatch");
+ static auto backtrackingGrep = TStringRef::Of("BacktrackingGrep");
+ static auto multiMatch = TStringRef::Of("MultiMatch");
+ static auto multiGrep = TStringRef::Of("MultiGrep");
+ if (isGrep) {
+ switch (mode) {
+ case THyperscanMatch::EMode::NORMAL:
+ return grep;
+ case THyperscanMatch::EMode::BACKTRACKING:
+ return backtrackingGrep;
+ case THyperscanMatch::EMode::MULTI:
+ return multiGrep;
+ }
+ } else {
+ switch (mode) {
+ case THyperscanMatch::EMode::NORMAL:
+ return match;
+ case THyperscanMatch::EMode::BACKTRACKING:
+ return backtrackingMatch;
+ case THyperscanMatch::EMode::MULTI:
+ return multiMatch;
+ }
+ }
+
+ Y_ABORT("Unexpected");
+ }
+
+ THyperscanMatch(
+ const IValueBuilder*,
+ const TUnboxedValuePod& runConfig,
+ bool surroundMode,
+ THyperscanMatch::EMode mode,
+ TSourcePosition pos,
+ size_t regexpsCount)
+ : Regex_(runConfig.AsStringRef())
+ , Mode(mode)
+ , Pos_(pos)
+ , RegexpsCount(regexpsCount)
+ {
+ try {
+ TOptions options = 0;
+ int pcreOptions = REG_EXTENDED;
+ if (Mode == THyperscanMatch::EMode::BACKTRACKING && Regex_.StartsWith(IGNORE_CASE_PREFIX)) {
+ pcreOptions |= REG_ICASE;
+ }
+ auto regex = Regex_;
+ SetCommonOptions(regex, options);
+ switch (mode) {
+ case THyperscanMatch::EMode::NORMAL: {
+ if (!surroundMode) {
+ regex = TStringBuilder() << '^' << regex << '$';
+ }
+ Database_ = Compile(regex, options);
+ break;
+ }
+ case THyperscanMatch::EMode::BACKTRACKING: {
+ if (!surroundMode) {
+ regex = TStringBuilder() << '^' << regex << '$';
+ }
+ try {
+ Database_ = Compile(regex, options);
+ Mode = THyperscanMatch::EMode::NORMAL;
+ } catch (const TCompileException&) {
+ options |= HS_FLAG_PREFILTER;
+ Database_ = Compile(regex, options);
+ Fallback_ = TRegExMatch(regex, pcreOptions);
+ }
+ break;
+ }
+ case THyperscanMatch::EMode::MULTI: {
+ std::vector<TString> regexes;
+ TVector<const char*> cregexes;
+ TVector<TOptions> flags;
+ TVector<TOptions> ids;
+
+ const auto func = [&regexes, &flags, surroundMode](const std::string_view& token) {
+ TString regex(token);
+
+ TOptions opt = 0;
+ SetCommonOptions(regex, opt);
+
+ if (!surroundMode) {
+ regex = TStringBuilder() << '^' << regex << '$';
+ }
+
+ regexes.emplace_back(std::move(regex));
+ flags.emplace_back(opt);
+ };
+ StringSplitter(Regex_).Split('\n').Consume(func);
+
+ std::transform(regexes.cbegin(), regexes.cend(), std::back_inserter(cregexes), std::bind(&TString::c_str, std::placeholders::_1));
+ ids.resize(regexes.size());
+ std::iota(ids.begin(), ids.end(), 0);
+
+ Database_ = CompileMulti(cregexes, flags, ids);
+ break;
+ }
+ }
+ Scratch_ = MakeScratch(Database_);
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final try {
+ TUnboxedValue* items = nullptr;
+ TUnboxedValue tuple;
+ size_t i = 0;
+
+ if (Mode == THyperscanMatch::EMode::MULTI) {
+ tuple = valueBuilder->NewArray(RegexpsCount, items);
+ for (i = 0; i < RegexpsCount; ++i) {
+ items[i] = TUnboxedValuePod(false);
+ }
+ }
+
+ if (args[0]) {
+ // XXX: StringRef data might not be a NTBS, though the function
+ // <TRegExMatch::Match> expects ASCIIZ string. Explicitly copy
+ // the given argument string and append the NUL terminator to it.
+ const TString input(args[0].AsStringRef());
+ if (Y_UNLIKELY(Mode == THyperscanMatch::EMode::MULTI)) {
+ auto callback = [items] (TOptions id, ui64 /* from */, ui64 /* to */) {
+ items[id] = TUnboxedValuePod(true);
+ };
+ Scan(Database_, Scratch_, input, callback);
+ return tuple;
+ } else {
+ bool matches = Matches(Database_, Scratch_, input);
+ if (matches && Mode == THyperscanMatch::EMode::BACKTRACKING) {
+ matches = Fallback_.Match(input.data());
+ }
+ return TUnboxedValuePod(matches);
+ }
+
+ } else {
+ return Mode == THyperscanMatch::EMode::MULTI ? tuple : TUnboxedValue(TUnboxedValuePod(false));
+ }
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+
+ private:
+ const TString Regex_;
+ THyperscanMatch::EMode Mode;
+ const TSourcePosition Pos_;
+ const size_t RegexpsCount;
+ TDatabase Database_;
+ TScratch Scratch_;
+ TRegExMatch Fallback_;
+ };
+
+ class THyperscanCapture: public THyperscanUdfBase {
+ public:
+ class TFactory: public THyperscanUdfBase {
+ public:
+ TFactory(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ private:
+ TUnboxedValue Run(const IValueBuilder*,
+ const TUnboxedValuePod* args) const final try {
+ return TUnboxedValuePod(new THyperscanCapture(args[0], Pos_));
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("Capture");
+ return name;
+ }
+
+ THyperscanCapture(const TUnboxedValuePod& runConfig, TSourcePosition pos)
+ : Pos_(pos)
+ {
+ Regex_ = runConfig.AsStringRef();
+ TOptions options = HS_FLAG_SOM_LEFTMOST;
+
+ SetCommonOptions(Regex_, options);
+
+ Database_ = Compile(Regex_, options);
+ Scratch_ = MakeScratch(Database_);
+ }
+
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final try {
+ if (const auto arg = args[0]) {
+
+ TUnboxedValue result;
+ auto callback = [valueBuilder, arg, &result] (TOptions id, ui64 from, ui64 to) {
+ Y_UNUSED(id);
+ if (!result) {
+ result = valueBuilder->SubString(arg, from, to);
+ }
+ };
+ Scan(Database_, Scratch_, arg.AsStringRef(), callback);
+ return result;
+ }
+
+ return TUnboxedValue();
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+
+ TSourcePosition Pos_;
+ TString Regex_;
+ TDatabase Database_;
+ TScratch Scratch_;
+ };
+
+ class THyperscanReplace: public THyperscanUdfBase {
+ public:
+ class TFactory: public THyperscanUdfBase {
+ public:
+ TFactory(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ private:
+ TUnboxedValue Run(const IValueBuilder*,
+ const TUnboxedValuePod* args) const final try {
+ return TUnboxedValuePod(new THyperscanReplace(args[0], Pos_));
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("Replace");
+ return name;
+ }
+
+ THyperscanReplace(const TUnboxedValuePod& runConfig, TSourcePosition pos)
+ : Pos_(pos)
+ {
+ Regex_ = runConfig.AsStringRef();
+ TOptions options = HS_FLAG_SOM_LEFTMOST;
+
+ SetCommonOptions(Regex_, options);
+
+
+ Database_ = Compile(Regex_, options);
+ Scratch_ = MakeScratch(Database_);
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final try {
+ if (args[0]) {
+ const std::string_view input(args[0].AsStringRef());
+ const std::string_view replacement(args[1].AsStringRef());
+
+ ui64 index = 0;
+ TStringBuilder result;
+ auto callback = [input, replacement, &index, &result] (TOptions id, ui64 from, ui64 to) {
+ Y_UNUSED(id);
+ if (index != from) {
+ result << input.substr(index, from - index);
+ }
+ result << replacement;
+ index = to;
+ };
+ Scan(Database_, Scratch_, input, callback);
+
+ if (!index) {
+ return args[0];
+ }
+
+ result << input.substr(index);
+ return valueBuilder->NewString(result);
+ }
+
+ return TUnboxedValue();
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+
+ TSourcePosition Pos_;
+ TString Regex_;
+ TDatabase Database_;
+ TScratch Scratch_;
+ };
+
+ class THyperscanModule: public IUdfModule {
+ public:
+ TStringRef Name() const {
+ return TStringRef::Of("Hyperscan");
+ }
+
+ void CleanupOnTerminate() const final {
+ }
+
+ void GetAllFunctions(IFunctionsSink& sink) const final {
+ sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::NORMAL));
+ sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::NORMAL));
+ sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::BACKTRACKING));
+ sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::BACKTRACKING));
+ sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::MULTI))->SetTypeAwareness();
+ sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::MULTI))->SetTypeAwareness();
+ sink.Add(THyperscanCapture::Name());
+ sink.Add(THyperscanReplace::Name());
+ }
+
+ void BuildFunctionTypeInfo(
+ const TStringRef& name,
+ TType* userType,
+ const TStringRef& typeConfig,
+ ui32 flags,
+ IFunctionTypeInfoBuilder& builder) const final {
+ try {
+ Y_UNUSED(userType);
+
+ bool typesOnly = (flags & TFlags::TypesOnly);
+ bool isMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::NORMAL) == name);
+ bool isGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::NORMAL) == name);
+ bool isBacktrackingMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::BACKTRACKING) == name);
+ bool isBacktrackingGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::BACKTRACKING) == name);
+ bool isMultiMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::MULTI) == name);
+ bool isMultiGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::MULTI) == name);
+
+ if (isMatch || isGrep) {
+ builder.SimpleSignature<bool(TOptional<char*>)>()
+ .RunConfig<const char*>();
+
+ if (!typesOnly) {
+ builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isGrep, THyperscanMatch::EMode::NORMAL));
+ }
+ } else if (isBacktrackingMatch || isBacktrackingGrep) {
+ builder.SimpleSignature<bool(TOptional<char*>)>()
+ .RunConfig<const char*>();
+
+ if (!typesOnly) {
+ builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isBacktrackingGrep, THyperscanMatch::EMode::BACKTRACKING));
+ }
+ } else if (isMultiMatch || isMultiGrep) {
+ auto boolType = builder.SimpleType<bool>();
+ auto optionalStringType = builder.Optional()->Item<char*>().Build();
+ const std::string_view regexp(typeConfig);
+ size_t regexpCount = std::count(regexp.begin(), regexp.end(), '\n') + 1;
+ auto tuple = builder.Tuple();
+ for (size_t i = 0; i < regexpCount; ++i) {
+ tuple->Add(boolType);
+ }
+ auto tupleType = tuple->Build();
+ builder.Args(1)->Add(optionalStringType).Done().Returns(tupleType).RunConfig<char*>();
+
+ if (!typesOnly) {
+ builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isMultiGrep, THyperscanMatch::EMode::MULTI, regexpCount));
+ }
+ } else if (THyperscanCapture::Name() == name) {
+ builder.SimpleSignature<TOptional<char*>(TOptional<char*>)>()
+ .RunConfig<char*>();
+
+ if (!typesOnly) {
+ builder.Implementation(new THyperscanCapture::TFactory(builder.GetSourcePosition()));
+ }
+ } else if (THyperscanReplace::Name() == name) {
+ builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>()
+ .RunConfig<char*>();
+
+ if (!typesOnly) {
+ builder.Implementation(new THyperscanReplace::TFactory(builder.GetSourcePosition()));
+ }
+ }
+ } catch (const std::exception& e) {
+ builder.SetError(CurrentExceptionMessage());
+ }
+ }
+ };
+
+ class TPcreModule : public THyperscanModule {
+ public:
+ TStringRef Name() const {
+ return TStringRef::Of("Pcre");
+ }
+ };
+}
+
+REGISTER_MODULES(THyperscanModule, TPcreModule)
diff --git a/yql/essentials/udfs/common/hyperscan/test/canondata/result.json b/yql/essentials/udfs/common/hyperscan/test/canondata/result.json
new file mode 100644
index 00000000000..93e6411aec8
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperscan/test/canondata/result.json
@@ -0,0 +1,17 @@
+{
+ "test.test[Basic]": [
+ {
+ "uri": "file://test.test_Basic_/results.txt"
+ }
+ ],
+ "test.test[CharacterClasses]": [
+ {
+ "uri": "file://test.test_CharacterClasses_/results.txt"
+ }
+ ],
+ "test.test[Error]": [
+ {
+ "uri": "file://test.test_Error_/extracted"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_Basic_/results.txt
new file mode 100644
index 00000000000..7e4dd70678c
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_Basic_/results.txt
@@ -0,0 +1,441 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "match";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "grep";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "insensitive_grep";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "multi_match";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ];
+ [
+ "some_multi_match";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "multi_match2";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ];
+ [
+ "some_multi_match2a";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "some_multi_match2b";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "some_multi_match2c";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "capture";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "capture_many";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "replace";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "backtracking";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "";
+ %false;
+ %false;
+ %false;
+ [
+ %false;
+ %true;
+ %false;
+ %true;
+ %false;
+ %true;
+ %false
+ ];
+ %false;
+ [
+ %false;
+ %true;
+ %false;
+ %true;
+ %false
+ ];
+ %false;
+ %true;
+ %false;
+ #;
+ #;
+ [
+ ""
+ ];
+ %false
+ ];
+ [
+ "a";
+ %true;
+ %false;
+ %false;
+ [
+ %true;
+ %false;
+ %true;
+ %false;
+ %true;
+ %false;
+ %false
+ ];
+ %true;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ %false;
+ %false;
+ #;
+ #;
+ [
+ "a"
+ ];
+ %false
+ ];
+ [
+ "aax";
+ %true;
+ %false;
+ %false;
+ [
+ %true;
+ %false;
+ %true;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %true;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ %false;
+ %false;
+ [
+ "aa"
+ ];
+ #;
+ [
+ "aax"
+ ];
+ %true
+ ];
+ [
+ "xaax";
+ %false;
+ %false;
+ %false;
+ [
+ %false;
+ %false;
+ %true;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ %false;
+ %false;
+ [
+ "xaa"
+ ];
+ [
+ "xa"
+ ];
+ [
+ "bax"
+ ];
+ %false
+ ];
+ [
+ "xaaxaaxaa";
+ %false;
+ %true;
+ %true;
+ [
+ %false;
+ %false;
+ %true;
+ %false;
+ %true;
+ %false;
+ %true
+ ];
+ %false;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ %false;
+ %false;
+ [
+ "xaa"
+ ];
+ [
+ "xa"
+ ];
+ [
+ "bababa"
+ ];
+ %false
+ ];
+ [
+ "XAXA";
+ %false;
+ %false;
+ %true;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ %false;
+ %false;
+ #;
+ #;
+ [
+ "XAXA"
+ ];
+ %false
+ ];
+ [
+ "7";
+ %false;
+ %false;
+ %false;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ %false;
+ %false;
+ #;
+ #;
+ [
+ "7"
+ ];
+ %false
+ ];
+ [
+ "QC transfer task JAVA";
+ %false;
+ %false;
+ %false;
+ [
+ %false;
+ %false;
+ %true;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ [
+ %false;
+ %false;
+ %true;
+ %false;
+ %true
+ ];
+ %false;
+ %false;
+ %true;
+ #;
+ #;
+ [
+ "QC transfer task JAVA"
+ ];
+ %false
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_CharacterClasses_/results.txt b/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_CharacterClasses_/results.txt
new file mode 100644
index 00000000000..7fe80ff82a7
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_CharacterClasses_/results.txt
@@ -0,0 +1,59 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "digits";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "spaces";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "xx000xx";
+ %true;
+ %false
+ ];
+ [
+ "lLlLl";
+ %false;
+ %false
+ ];
+ [
+ "a1 b2 c3";
+ %true;
+ %true
+ ];
+ [
+ "xxx yyy";
+ %false;
+ %true
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_Error_/extracted b/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_Error_/extracted
new file mode 100644
index 00000000000..4d090620be0
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_Error_/extracted
@@ -0,0 +1,8 @@
+<tmp_path>/program.sql:<main>: Fatal: Execution
+
+ <tmp_path>/program.sql:<main>:8:1: Fatal: Execution of node: YtMap!
+ SELECT $match(value) AS match FROM Input;
+ ^
+ <tmp_path>/program.sql:<main>:6:21: Fatal: library/cpp/regex/hyperscan/hyperscan.cpp:102: Failed to compile regex: ^*$. Error message (hyperscan): Invalid repeat at index 1.
+ $match = Hyperscan::Match("*");
+ ^ \ No newline at end of file
diff --git a/yql/essentials/udfs/common/hyperscan/test/cases/Basic.in b/yql/essentials/udfs/common/hyperscan/test/cases/Basic.in
new file mode 100644
index 00000000000..ddc62722474
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperscan/test/cases/Basic.in
@@ -0,0 +1,8 @@
+{"key"="1";"subkey"="1";"value"=""};
+{"key"="2";"subkey"="2";"value"="a"};
+{"key"="3";"subkey"="3";"value"="aax"};
+{"key"="4";"subkey"="4";"value"="xaax"};
+{"key"="5";"subkey"="5";"value"="xaaxaaxaa"};
+{"key"="6";"subkey"="6";"value"="XAXA"};
+{"key"="7";"subkey"="7";"value"="7"};
+{"key"="8";"subkey"="8";"value"="QC transfer task JAVA"};
diff --git a/yql/essentials/udfs/common/hyperscan/test/cases/Basic.sql b/yql/essentials/udfs/common/hyperscan/test/cases/Basic.sql
new file mode 100644
index 00000000000..4df22e6603a
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperscan/test/cases/Basic.sql
@@ -0,0 +1,33 @@
+/* syntax version 1 */
+$match = Hyperscan::Match("a.*");
+$grep = Hyperscan::Grep("axa");
+$insensitive_grep = Hyperscan::Grep("(?i)axa");
+$multi_match = Hyperscan::MultiMatch(@@a.*
+.*a.*
+.*a
+.*axa.*@@);
+$multi_match2 = Hyperscan::MultiMatch(@@YQL.*
+QC.*
+.*transfer task.*@@);
+
+$capture = Hyperscan::Capture(".*a{2}.*");
+$capture_many = Hyperscan::Capture(".*x(a+).*");
+$replace = Hyperscan::Replace("xa");
+$backtracking_grep = Hyperscan::BacktrackingGrep("(?<!xa)ax");
+
+SELECT
+ value,
+ $match(value) AS match,
+ $grep(value) AS grep,
+ $insensitive_grep(value) AS insensitive_grep,
+ $multi_match(value) AS multi_match,
+ $multi_match(value).0 AS some_multi_match,
+ $multi_match2(value) AS multi_match2,
+ $multi_match2(value).0 AS some_multi_match2a,
+ $multi_match2(value).1 AS some_multi_match2b,
+ $multi_match2(value).2 AS some_multi_match2c,
+ $capture(value) AS capture,
+ $capture_many(value) AS capture_many,
+ $replace(value, "b") AS replace,
+ $backtracking_grep(value) as backtracking
+FROM Input;
diff --git a/yql/essentials/udfs/common/hyperscan/test/cases/CharacterClasses.in b/yql/essentials/udfs/common/hyperscan/test/cases/CharacterClasses.in
new file mode 100644
index 00000000000..e2737f40a1e
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperscan/test/cases/CharacterClasses.in
@@ -0,0 +1,4 @@
+{"key"="1";"subkey"="1";"value"="xx000xx"};
+{"key"="2";"subkey"="2";"value"="lLlLl"};
+{"key"="3";"subkey"="3";"value"="a1 b2 c3"};
+{"key"="4";"subkey"="4";"value"="xxx yyy"};
diff --git a/yql/essentials/udfs/common/hyperscan/test/cases/CharacterClasses.sql b/yql/essentials/udfs/common/hyperscan/test/cases/CharacterClasses.sql
new file mode 100644
index 00000000000..4f19373b653
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperscan/test/cases/CharacterClasses.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+$digits = Hyperscan::Grep("\\d+");
+$spaces = Hyperscan::Grep("\\s+");
+
+SELECT
+ value,
+ $digits(value) AS digits,
+ $spaces(value) AS spaces
+FROM Input;
diff --git a/yql/essentials/udfs/common/hyperscan/test/cases/Error.cfg b/yql/essentials/udfs/common/hyperscan/test/cases/Error.cfg
new file mode 100644
index 00000000000..7f181f61d6a
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperscan/test/cases/Error.cfg
@@ -0,0 +1,2 @@
+in yt.plato.Input Basic.in
+xfail
diff --git a/yql/essentials/udfs/common/hyperscan/test/cases/Error.sql b/yql/essentials/udfs/common/hyperscan/test/cases/Error.sql
new file mode 100644
index 00000000000..26ece06ca0b
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperscan/test/cases/Error.sql
@@ -0,0 +1,4 @@
+/* syntax version 1 */
+PRAGMA config.flags("LLVM","OFF"); -- TODO: fix error handling with LLVM
+$match = Hyperscan::Match("*");
+SELECT $match(value) AS match FROM Input;
diff --git a/yql/essentials/udfs/common/hyperscan/test/ya.make b/yql/essentials/udfs/common/hyperscan/test/ya.make
new file mode 100644
index 00000000000..2aa229b0b69
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperscan/test/ya.make
@@ -0,0 +1,17 @@
+IF (OS_LINUX AND CLANG)
+
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/hyperscan)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
+
+ENDIF()
diff --git a/yql/essentials/udfs/common/hyperscan/ya.make b/yql/essentials/udfs/common/hyperscan/ya.make
new file mode 100644
index 00000000000..49c95d67c26
--- /dev/null
+++ b/yql/essentials/udfs/common/hyperscan/ya.make
@@ -0,0 +1,42 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+
+ FROM_SANDBOX(
+ FILE 7319899245 OUT_NOAUTO libhyperscan_udf.so
+ )
+
+ END()
+ELSE()
+
+ # NO_BUILD_IF does not like logical expressions by now
+ # see DEVTOOLSSUPPORT-44378
+ IF (NOT OS_LINUX OR NOT CLANG)
+ SET(DISABLE_HYPERSCAN_BUILD)
+ ENDIF()
+
+ NO_BUILD_IF(DISABLE_HYPERSCAN_BUILD)
+
+YQL_UDF_CONTRIB(hyperscan_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 27
+ 0
+ )
+
+ SRCS(
+ hyperscan_udf.cpp
+ )
+
+ PEERDIR(
+ library/cpp/regex/hyperscan
+ library/cpp/regex/pcre
+ )
+
+ END()
+
+ENDIF()
+
+RECURSE_FOR_TESTS(
+ test
+) \ No newline at end of file
diff --git a/yql/essentials/udfs/common/ip_base/ip_base.cpp b/yql/essentials/udfs/common/ip_base/ip_base.cpp
new file mode 100644
index 00000000000..1c017e2a5d2
--- /dev/null
+++ b/yql/essentials/udfs/common/ip_base/ip_base.cpp
@@ -0,0 +1,7 @@
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+#include "lib/ip_base_udf.h"
+
+SIMPLE_MODULE(TIpModule, EXPORTED_IP_BASE_UDF)
+REGISTER_MODULES(TIpModule)
+
diff --git a/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp
new file mode 100644
index 00000000000..a0617e77283
--- /dev/null
+++ b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp
@@ -0,0 +1 @@
+#include "ip_base_udf.h" \ No newline at end of file
diff --git a/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h
new file mode 100644
index 00000000000..dfb9cc29c0c
--- /dev/null
+++ b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h
@@ -0,0 +1,358 @@
+#pragma once
+
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+#include <library/cpp/ipv6_address/ipv6_address.h>
+#include <library/cpp/ipmath/ipmath.h>
+#include <util/generic/buffer.h>
+
+namespace {
+ using TAutoMapString = NKikimr::NUdf::TAutoMap<char*>;
+ using TOptionalString = NKikimr::NUdf::TOptional<char*>;
+ using TOptionalByte = NKikimr::NUdf::TOptional<ui8>;
+ using TStringRef = NKikimr::NUdf::TStringRef;
+ using TUnboxedValue = NKikimr::NUdf::TUnboxedValue;
+ using TUnboxedValuePod = NKikimr::NUdf::TUnboxedValuePod;
+
+ ui8 GetAddressRangePrefix(const TIpAddressRange& range) {
+ if (range.Contains(TIpv6Address(ui128(0), TIpv6Address::Ipv6)) && range.Contains(TIpv6Address(ui128(-1), TIpv6Address::Ipv6))) {
+ return 0;
+ }
+ if (range.Size() == 0) {
+ return range.Type() == TIpv6Address::Ipv4 ? 32 : 128;
+ }
+ ui128 size = range.Size();
+ size_t sizeLog = MostSignificantBit(size);
+ return ui8((range.Type() == TIpv6Address::Ipv4 ? 32 : 128) - sizeLog);
+ }
+
+ struct TRawIp4 {
+ ui8 a, b, c, d;
+
+ static TRawIp4 FromIpAddress(const TIpv6Address& addr) {
+ ui128 x = addr;
+ return {
+ ui8(x >> 24 & 0xff),
+ ui8(x >> 16 & 0xff),
+ ui8(x >> 8 & 0xff),
+ ui8(x & 0xff)
+ };
+ }
+
+ static TRawIp4 MaskFromPrefix(ui8 prefix) {
+ ui128 x = ui128(-1) << int(32 - prefix);
+ x &= ui128(ui32(-1));
+ return FromIpAddress({x, TIpv6Address::Ipv4});
+ }
+
+ TIpv6Address ToIpAddress() const {
+ return {a, b, c, d};
+ }
+
+ std::pair<TRawIp4, TRawIp4> ApplyMask(const TRawIp4& mask) const {
+ return {{
+ ui8(a & mask.a),
+ ui8(b & mask.b),
+ ui8(c & mask.c),
+ ui8(d & mask.d)
+ },{
+ ui8(a | ~mask.a),
+ ui8(b | ~mask.b),
+ ui8(c | ~mask.c),
+ ui8(d | ~mask.d)
+ }};
+ }
+ };
+
+ struct TRawIp4Subnet {
+ TRawIp4 base, mask;
+
+ static TRawIp4Subnet FromIpRange(const TIpAddressRange& range) {
+ return {TRawIp4::FromIpAddress(*range.Begin()), TRawIp4::MaskFromPrefix(GetAddressRangePrefix(range))};
+ }
+
+ TIpAddressRange ToIpRange() const {
+ auto range = base.ApplyMask(mask);
+ return {range.first.ToIpAddress(), range.second.ToIpAddress()};
+ }
+ };
+
+ struct TRawIp6 {
+ ui8 a1, a0, b1, b0, c1, c0, d1, d0, e1, e0, f1, f0, g1, g0, h1, h0;
+
+ static TRawIp6 FromIpAddress(const TIpv6Address& addr) {
+ ui128 x = addr;
+ return {
+ ui8(x >> 120 & 0xff), ui8(x >> 112 & 0xff),
+ ui8(x >> 104 & 0xff), ui8(x >> 96 & 0xff),
+ ui8(x >> 88 & 0xff), ui8(x >> 80 & 0xff),
+ ui8(x >> 72 & 0xff), ui8(x >> 64 & 0xff),
+ ui8(x >> 56 & 0xff), ui8(x >> 48 & 0xff),
+ ui8(x >> 40 & 0xff), ui8(x >> 32 & 0xff),
+ ui8(x >> 24 & 0xff), ui8(x >> 16 & 0xff),
+ ui8(x >> 8 & 0xff), ui8(x & 0xff)
+ };
+ }
+
+ static TRawIp6 MaskFromPrefix(ui8 prefix) {
+ ui128 x = prefix == 0 ? ui128(0) : ui128(-1) << int(128 - prefix);
+ return FromIpAddress({x, TIpv6Address::Ipv6});
+ }
+
+ TIpv6Address ToIpAddress() const {
+ return {ui16(ui32(a1) << ui32(8) | ui32(a0)),
+ ui16(ui32(b1) << ui32(8) | ui32(b0)),
+ ui16(ui32(c1) << ui32(8) | ui32(c0)),
+ ui16(ui32(d1) << ui32(8) | ui32(d0)),
+ ui16(ui32(e1) << ui32(8) | ui32(e0)),
+ ui16(ui32(f1) << ui32(8) | ui32(f0)),
+ ui16(ui32(g1) << ui32(8) | ui32(g0)),
+ ui16(ui32(h1) << ui32(8) | ui32(h0)),
+ };
+ }
+
+ std::pair<TRawIp6, TRawIp6> ApplyMask(const TRawIp6& mask) const {
+ return { {
+ ui8(a1 & mask.a1),
+ ui8(a0 & mask.a0),
+ ui8(b1 & mask.b1),
+ ui8(b0 & mask.b0),
+ ui8(c1 & mask.c1),
+ ui8(c0 & mask.c0),
+ ui8(d1 & mask.d1),
+ ui8(d0 & mask.d0),
+ ui8(e1 & mask.e1),
+ ui8(e0 & mask.e0),
+ ui8(f1 & mask.f1),
+ ui8(f0 & mask.f0),
+ ui8(g1 & mask.g1),
+ ui8(g0 & mask.g0),
+ ui8(h1 & mask.h1),
+ ui8(h0 & mask.h0)
+ }, {
+ ui8(a1 | ~mask.a1),
+ ui8(a0 | ~mask.a0),
+ ui8(b1 | ~mask.b1),
+ ui8(b0 | ~mask.b0),
+ ui8(c1 | ~mask.c1),
+ ui8(c0 | ~mask.c0),
+ ui8(d1 | ~mask.d1),
+ ui8(d0 | ~mask.d0),
+ ui8(e1 | ~mask.e1),
+ ui8(e0 | ~mask.e0),
+ ui8(f1 | ~mask.f1),
+ ui8(f0 | ~mask.f0),
+ ui8(g1 | ~mask.g1),
+ ui8(g0 | ~mask.g0),
+ ui8(h1 | ~mask.h1),
+ ui8(h0 | ~mask.h0)
+ }};
+ }
+ };
+
+ struct TRawIp6Subnet {
+ TRawIp6 base, mask;
+
+ static TRawIp6Subnet FromIpRange(const TIpAddressRange& range) {
+ return {TRawIp6::FromIpAddress(*range.Begin()), TRawIp6::MaskFromPrefix(GetAddressRangePrefix(range))};
+ }
+
+ TIpAddressRange ToIpRange() const {
+ auto range = base.ApplyMask(mask);
+ return {range.first.ToIpAddress(), range.second.ToIpAddress()};
+ }
+ };
+
+ TIpv6Address DeserializeAddress(const TStringRef& str) {
+ TIpv6Address addr;
+ if (str.Size() == 4) {
+ TRawIp4 addr4;
+ memcpy(&addr4, str.Data(), sizeof addr4);
+ addr = addr4.ToIpAddress();
+ } else if (str.Size() == 16) {
+ TRawIp6 addr6;
+ memcpy(&addr6, str.Data(), sizeof addr6);
+ addr = addr6.ToIpAddress();
+ } else {
+ ythrow yexception() << "Incorrect size of input, expected "
+ << "4 or 16, got " << str.Size();
+ }
+ return addr;
+ }
+
+ TIpAddressRange DeserializeSubnet(const TStringRef& str) {
+ TIpAddressRange range;
+ if (str.Size() == sizeof(TRawIp4Subnet)) {
+ TRawIp4Subnet subnet4;
+ memcpy(&subnet4, str.Data(), sizeof subnet4);
+ range = subnet4.ToIpRange();
+ } else if (str.Size() == sizeof(TRawIp6Subnet)) {
+ TRawIp6Subnet subnet6;
+ memcpy(&subnet6, str.Data(), sizeof subnet6);
+ range = subnet6.ToIpRange();
+ } else {
+ ythrow yexception() << "Invalid binary representation";
+ }
+ return range;
+ }
+
+ TString SerializeAddress(const TIpv6Address& addr) {
+ Y_ENSURE(addr.Type() == TIpv6Address::Ipv4 || addr.Type() == TIpv6Address::Ipv6);
+ TString res;
+ if (addr.Type() == TIpv6Address::Ipv4) {
+ auto addr4 = TRawIp4::FromIpAddress(addr);
+ res = TString(reinterpret_cast<const char *>(&addr4), sizeof addr4);
+ } else if (addr.Type() == TIpv6Address::Ipv6) {
+ auto addr6 = TRawIp6::FromIpAddress(addr);
+ res = TString(reinterpret_cast<const char *>(&addr6), sizeof addr6);
+ }
+ return res;
+ }
+
+ TString SerializeSubnet(const TIpAddressRange& range) {
+ TString res;
+ if (range.Type() == TIpv6Address::Ipv4) {
+ auto subnet4 = TRawIp4Subnet::FromIpRange(range);
+ res = TString(reinterpret_cast<const char *>(&subnet4), sizeof subnet4);
+ } else if (range.Type() == TIpv6Address::Ipv6) {
+ auto subnet6 = TRawIp6Subnet::FromIpRange(range);
+ res = TString(reinterpret_cast<const char *>(&subnet6), sizeof subnet6);
+ }
+ return res;
+ }
+
+ SIMPLE_STRICT_UDF(TFromString, TOptionalString(TAutoMapString)) {
+ TIpv6Address addr = TIpv6Address::FromString(args[0].AsStringRef());
+ if (addr.Type() != TIpv6Address::Ipv4 && addr.Type() != TIpv6Address::Ipv6) {
+ return TUnboxedValue();
+ }
+ return valueBuilder->NewString(SerializeAddress(addr));
+ }
+
+ SIMPLE_STRICT_UDF(TSubnetFromString, TOptionalString(TAutoMapString)) {
+ TIpAddressRange range = TIpAddressRange::FromCompactString(args[0].AsStringRef());
+ auto res = SerializeSubnet(range);
+ return res ? valueBuilder->NewString(res) : TUnboxedValue(TUnboxedValuePod());
+ }
+
+ SIMPLE_UDF(TToString, char*(TAutoMapString)) {
+ return valueBuilder->NewString(DeserializeAddress(args[0].AsStringRef()).ToString(false));
+ }
+
+ SIMPLE_UDF(TSubnetToString, char*(TAutoMapString)) {
+ TStringBuilder result;
+ auto range = DeserializeSubnet(args[0].AsStringRef());
+ result << (*range.Begin()).ToString(false);
+ result << '/';
+ result << ToString(GetAddressRangePrefix(range));
+ return valueBuilder->NewString(result);
+ }
+
+ SIMPLE_UDF(TSubnetMatch, bool(TAutoMapString, TAutoMapString)) {
+ Y_UNUSED(valueBuilder);
+ auto range1 = DeserializeSubnet(args[0].AsStringRef());
+ if (args[1].AsStringRef().Size() == sizeof(TRawIp4) || args[1].AsStringRef().Size() == sizeof(TRawIp6)) {
+ auto addr2 = DeserializeAddress(args[1].AsStringRef());
+ return TUnboxedValuePod(range1.Contains(addr2));
+ } else { // second argument is a whole subnet, not a single address
+ auto range2 = DeserializeSubnet(args[1].AsStringRef());
+ return TUnboxedValuePod(range1.Contains(range2));
+ }
+ }
+
+ SIMPLE_STRICT_UDF(TIsIPv4, bool(TOptionalString)) {
+ Y_UNUSED(valueBuilder);
+ bool result = false;
+ if (args[0]) {
+ const auto ref = args[0].AsStringRef();
+ result = ref.Size() == 4;
+ }
+ return TUnboxedValuePod(result);
+ }
+
+ SIMPLE_STRICT_UDF(TIsIPv6, bool(TOptionalString)) {
+ Y_UNUSED(valueBuilder);
+ bool result = false;
+ if (args[0]) {
+ const auto ref = args[0].AsStringRef();
+ result = ref.Size() == 16;
+ }
+ return TUnboxedValuePod(result);
+ }
+
+ SIMPLE_STRICT_UDF(TIsEmbeddedIPv4, bool(TOptionalString)) {
+ Y_UNUSED(valueBuilder);
+ bool result = false;
+ if (args[0]) {
+ const auto ref = args[0].AsStringRef();
+ if (ref.Size() == 16) {
+ result = DeserializeAddress(ref).Isv4MappedTov6();
+ }
+ }
+ return TUnboxedValuePod(result);
+ }
+
+ SIMPLE_UDF(TConvertToIPv6, char*(TAutoMapString)) {
+ const auto& ref = args[0].AsStringRef();
+ if (ref.Size() == 16) {
+ return valueBuilder->NewString(ref);
+ } else if (ref.Size() == 4) {
+ TIpv6Address addr4 = DeserializeAddress(ref);
+ auto addr6 = TIpv6Address(ui128(addr4) | ui128(0xFFFF) << 32, TIpv6Address::Ipv6);
+ return valueBuilder->NewString(SerializeAddress(addr6));
+ } else {
+ ythrow yexception() << "Incorrect size of input, expected "
+ << "4 or 16, got " << ref.Size();
+ }
+ }
+
+ SIMPLE_UDF_WITH_OPTIONAL_ARGS(TGetSubnet, char*(TAutoMapString, TOptionalByte), 1) {
+ const auto ref = args[0].AsStringRef();
+ ui8 subnetSize = args[1].GetOrDefault<ui8>(0);
+ TIpv6Address addr = DeserializeAddress(ref);
+ if (ref.Size() == 4) {
+ if (!subnetSize) {
+ subnetSize = 24;
+ }
+ if (subnetSize > 32) {
+ subnetSize = 32;
+ }
+ } else if (ref.Size() == 16) {
+ if (!subnetSize) {
+ subnetSize = 64;
+ }
+ if (subnetSize > 128) {
+ subnetSize = 128;
+ }
+ } else {
+ ythrow yexception() << "Incorrect size of input, expected "
+ << "4 or 16, got " << ref.Size();
+ }
+ TIpv6Address beg = LowerBoundForPrefix(addr, subnetSize);
+ return valueBuilder->NewString(SerializeAddress(beg));
+ }
+
+ SIMPLE_UDF(TGetSubnetByMask, char*(TAutoMapString, TAutoMapString)) {
+ const auto refBase = args[0].AsStringRef();
+ const auto refMask = args[1].AsStringRef();
+ TIpv6Address addrBase = DeserializeAddress(refBase);
+ TIpv6Address addrMask = DeserializeAddress(refMask);
+ if (addrBase.Type() != addrMask.Type()) {
+ ythrow yexception() << "Base and mask differ in length";
+ }
+ return valueBuilder->NewString(SerializeAddress(TIpv6Address(ui128(addrBase) & ui128(addrMask), addrBase.Type())));
+ }
+
+#define EXPORTED_IP_BASE_UDF \
+ TFromString, \
+ TSubnetFromString, \
+ TToString, \
+ TSubnetToString, \
+ TIsIPv4, \
+ TIsIPv6, \
+ TIsEmbeddedIPv4, \
+ TConvertToIPv6, \
+ TGetSubnet, \
+ TSubnetMatch, \
+ TGetSubnetByMask
+}
diff --git a/yql/essentials/udfs/common/ip_base/lib/ya.make b/yql/essentials/udfs/common/ip_base/lib/ya.make
new file mode 100644
index 00000000000..72633514771
--- /dev/null
+++ b/yql/essentials/udfs/common/ip_base/lib/ya.make
@@ -0,0 +1,19 @@
+LIBRARY()
+
+YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+)
+
+SRCS(
+ ip_base_udf.cpp
+)
+
+PEERDIR(
+ yql/essentials/public/udf
+ library/cpp/ipmath
+ library/cpp/ipv6_address
+)
+
+END()
diff --git a/yql/essentials/udfs/common/ip_base/test/canondata/result.json b/yql/essentials/udfs/common/ip_base/test/canondata/result.json
new file mode 100644
index 00000000000..a9602f6bf0c
--- /dev/null
+++ b/yql/essentials/udfs/common/ip_base/test/canondata/result.json
@@ -0,0 +1,12 @@
+{
+ "test.test[Basic]": [
+ {
+ "uri": "file://test.test_Basic_/results.txt"
+ }
+ ],
+ "test.test[Subnets]": [
+ {
+ "uri": "file://test.test_Subnets_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/ip_base/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/ip_base/test/canondata/test.test_Basic_/results.txt
new file mode 100644
index 00000000000..c62c9cbd35f
--- /dev/null
+++ b/yql/essentials/udfs/common/ip_base/test/canondata/test.test_Basic_/results.txt
@@ -0,0 +1,374 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "internal_representation";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "round_trip";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "is_ipv4";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "is_ipv6";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "is_embedded_ipv4";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "all_ipv6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "default_subnet";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "small_subnet";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "large_subnet";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "single_subnet4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "single_subnet6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "\x7F\0\0\1"
+ ];
+ [
+ "127.0.0.1"
+ ];
+ %true;
+ %false;
+ %false;
+ [
+ "::ffff:127.0.0.1"
+ ];
+ [
+ "127.0.0.0"
+ ];
+ [
+ "127.0.0.1"
+ ];
+ [
+ "127.0.0.0"
+ ];
+ [
+ "127.0.0.1"
+ ];
+ [
+ "127.0.0.1"
+ ]
+ ];
+ [
+ [
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1"
+ ];
+ [
+ "::1"
+ ];
+ %false;
+ %true;
+ %false;
+ [
+ "::1"
+ ];
+ [
+ "::"
+ ];
+ [
+ "::"
+ ];
+ [
+ "::"
+ ];
+ [
+ "::"
+ ];
+ [
+ "::1"
+ ]
+ ];
+ [
+ [
+ [
+ "1bTBAw=="
+ ]
+ ];
+ [
+ "213.180.193.3"
+ ];
+ %true;
+ %false;
+ %false;
+ [
+ "::ffff:213.180.193.3"
+ ];
+ [
+ "213.180.193.0"
+ ];
+ [
+ "213.180.193.3"
+ ];
+ [
+ "213.180.0.0"
+ ];
+ [
+ "213.180.193.3"
+ ];
+ [
+ "213.180.193.3"
+ ]
+ ];
+ [
+ [
+ [
+ "KgIGuAAAAAAAAAAAAAAAAw=="
+ ]
+ ];
+ [
+ "2a02:6b8::3"
+ ];
+ %false;
+ %true;
+ %false;
+ [
+ "2a02:6b8::3"
+ ];
+ [
+ "2a02:6b8::"
+ ];
+ [
+ "2a02:6b8::"
+ ];
+ [
+ "2a02::"
+ ];
+ [
+ "2a02:6b8::"
+ ];
+ [
+ "2a02:6b8::3"
+ ]
+ ];
+ [
+ [
+ [
+ "JADLACBIAAEAAAAAaBwbZQ=="
+ ]
+ ];
+ [
+ "2400:cb00:2048:1::681c:1b65"
+ ];
+ %false;
+ %true;
+ %false;
+ [
+ "2400:cb00:2048:1::681c:1b65"
+ ];
+ [
+ "2400:cb00:2048:1::"
+ ];
+ [
+ "2400:cb00:2048:1::681c:1b60"
+ ];
+ [
+ "2400::"
+ ];
+ [
+ "2400:cb00::"
+ ];
+ [
+ "2400:cb00:2048:1::681c:1b65"
+ ]
+ ];
+ [
+ [
+ [
+ "/oAAAAAAAAACFbL//qlnzg=="
+ ]
+ ];
+ [
+ "fe80::215:b2ff:fea9:67ce"
+ ];
+ %false;
+ %true;
+ %false;
+ [
+ "fe80::215:b2ff:fea9:67ce"
+ ];
+ [
+ "fe80::"
+ ];
+ [
+ "fe80::215:b2ff:fea9:67c8"
+ ];
+ [
+ "fe80::"
+ ];
+ [
+ "fe80::"
+ ];
+ [
+ "fe80::215:b2ff:fea9:67ce"
+ ]
+ ];
+ [
+ [
+ [
+ "AAAAAAAAAAAAAP//TUubAw=="
+ ]
+ ];
+ [
+ "::ffff:77.75.155.3"
+ ];
+ %false;
+ %true;
+ %true;
+ [
+ "::ffff:77.75.155.3"
+ ];
+ [
+ "::"
+ ];
+ [
+ "::ffff:77.75.155.0"
+ ];
+ [
+ "::"
+ ];
+ [
+ "::"
+ ];
+ [
+ "::ffff:77.75.155.3"
+ ]
+ ];
+ [
+ #;
+ #;
+ %false;
+ %false;
+ %false;
+ #;
+ #;
+ #;
+ #;
+ #;
+ #
+ ];
+ [
+ [
+ "\0\0\0\0"
+ ];
+ [
+ "0.0.0.0"
+ ];
+ %true;
+ %false;
+ %false;
+ [
+ "::ffff:0.0.0.0"
+ ];
+ [
+ "0.0.0.0"
+ ];
+ [
+ "0.0.0.0"
+ ];
+ [
+ "0.0.0.0"
+ ];
+ [
+ "0.0.0.0"
+ ];
+ [
+ "0.0.0.0"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/ip_base/test/canondata/test.test_Subnets_/results.txt b/yql/essentials/udfs/common/ip_base/test/canondata/test.test_Subnets_/results.txt
new file mode 100644
index 00000000000..c6f8ac61364
--- /dev/null
+++ b/yql/essentials/udfs/common/ip_base/test/canondata/test.test_Subnets_/results.txt
@@ -0,0 +1,184 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "internal1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "string1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "subnet1_subnet2_match";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "subnet1_ip1_match";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "subnet2_ip1_match";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "ip1_ip2_mask_subnet";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ "wKgAAP///wA="
+ ]
+ ];
+ [
+ "192.168.0.0/24"
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ %false
+ ];
+ [
+ "192.0.0.0"
+ ]
+ ];
+ [
+ [
+ [
+ "CgAAAP//AAA="
+ ]
+ ];
+ [
+ "10.0.0.0/16"
+ ];
+ [
+ %false
+ ];
+ [
+ %true
+ ];
+ [
+ %false
+ ];
+ [
+ "10.0.0.0"
+ ]
+ ];
+ [
+ [
+ "\0\0\0\0\0\0\0\0"
+ ];
+ [
+ "0.0.0.0/0"
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ "0.0.0.0"
+ ]
+ ];
+ [
+ [
+ [
+ "KgIGuAweShgAAAaWAAAAAP///////////////wAAAAA="
+ ]
+ ];
+ [
+ "2a02:6b8:c1e:4a18:0:696::/96"
+ ];
+ [
+ %false
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ "::696:0:0"
+ ]
+ ];
+ [
+ [
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ ];
+ [
+ "::/0"
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ "::ffff:192.168.0.2"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/ip_base/test/cases/Basic.in b/yql/essentials/udfs/common/ip_base/test/cases/Basic.in
new file mode 100644
index 00000000000..4aa20599141
--- /dev/null
+++ b/yql/essentials/udfs/common/ip_base/test/cases/Basic.in
@@ -0,0 +1,9 @@
+{"key"="127.0.0.1";"subkey"="";"value"=""};
+{"key"="::1";"subkey"="";"value"=""};
+{"key"="213.180.193.3";"subkey"="";"value"=""};
+{"key"="2a02:6b8::3";"subkey"="";"value"=""};
+{"key"="2400:cb00:2048:1::681c:1b65";"subkey"="";"value"=""};
+{"key"="fe80::215:b2ff:fea9:67ce";"subkey"="";"value"=""};
+{"key"="::ffff:77.75.155.3";"subkey"="";"value"=""};
+{"key"="sdfsdfsdf";"subkey"="";"value"=""};
+{"key"="0.0.0.0";"subkey"="";value=""}; \ No newline at end of file
diff --git a/yql/essentials/udfs/common/ip_base/test/cases/Basic.sql b/yql/essentials/udfs/common/ip_base/test/cases/Basic.sql
new file mode 100644
index 00000000000..1b875bc7313
--- /dev/null
+++ b/yql/essentials/udfs/common/ip_base/test/cases/Basic.sql
@@ -0,0 +1,16 @@
+/* syntax version 1 */
+SELECT
+ internal_representation AS internal_representation,
+ Ip::ToString(internal_representation) AS round_trip,
+ Ip::IsIPv4(internal_representation) AS is_ipv4,
+ Ip::IsIPv6(internal_representation) AS is_ipv6,
+ Ip::IsEmbeddedIPv4(internal_representation) AS is_embedded_ipv4,
+ Ip::ToString(Ip::ConvertToIPv6(internal_representation)) AS all_ipv6,
+ Ip::ToString(Ip::GetSubnet(internal_representation)) AS default_subnet,
+ Ip::ToString(Ip::GetSubnet(internal_representation, 125)) AS small_subnet,
+ Ip::ToString(Ip::GetSubnet(internal_representation, 16)) AS large_subnet,
+ Ip::ToString(Ip::GetSubnet(internal_representation, 32)) AS single_subnet4,
+ Ip::ToString(Ip::GetSubnet(internal_representation, 128)) AS single_subnet6
+FROM (
+ SELECT Ip::FromString(key) AS internal_representation FROM Input
+);
diff --git a/yql/essentials/udfs/common/ip_base/test/cases/Subnets.in b/yql/essentials/udfs/common/ip_base/test/cases/Subnets.in
new file mode 100644
index 00000000000..b2e2a1d02c1
--- /dev/null
+++ b/yql/essentials/udfs/common/ip_base/test/cases/Subnets.in
@@ -0,0 +1,5 @@
+{"subnet1"="192.168.0.1/24";"subnet2"="192.168.0.1/28";"ip1"="192.168.0.32";"ip2"="255.0.0.0"};
+{"subnet1"="10.0.0.1/16";"subnet2"="127.0.0.1/16";"ip1"="10.0.10.128";"ip2"="255.0.240.0"};
+{"subnet1"="0.0.0.0/0";"subnet2"="1.1.1.1/32";"ip1"="1.1.1.1";"ip2"="0.0.0.0"};
+{"subnet1"="2a02:6b8:c1e:4a18:0:696:ec65:0/96";"subnet2"="2a02:6b8:c1e:4a18::/12";"ip1"="2a02:6b8:c1e:4a18:0:696:ec65:0";"ip2"="::ffff:ffff:0:0"};
+{"subnet1"="::/0";"subnet2"="::ffff:192.168.0.1/96";"ip1"="::ffff:192.168.0.2";"ip2"="ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}; \ No newline at end of file
diff --git a/yql/essentials/udfs/common/ip_base/test/cases/Subnets.in.attr b/yql/essentials/udfs/common/ip_base/test/cases/Subnets.in.attr
new file mode 100644
index 00000000000..01b3c2afea5
--- /dev/null
+++ b/yql/essentials/udfs/common/ip_base/test/cases/Subnets.in.attr
@@ -0,0 +1 @@
+{schema=[{name=subnet1;type=string};{name=subnet2;type=string};{name=ip1;type=string};{name=ip2;type=string}]} \ No newline at end of file
diff --git a/yql/essentials/udfs/common/ip_base/test/cases/Subnets.sql b/yql/essentials/udfs/common/ip_base/test/cases/Subnets.sql
new file mode 100644
index 00000000000..43a7b143872
--- /dev/null
+++ b/yql/essentials/udfs/common/ip_base/test/cases/Subnets.sql
@@ -0,0 +1,16 @@
+/* syntax version 1 */
+SELECT
+ subnet1 AS internal1,
+ Ip::SubnetToString(subnet1) AS string1,
+ Ip::SubnetMatch(subnet1, subnet2) AS subnet1_subnet2_match,
+ Ip::SubnetMatch(subnet1, ip1) AS subnet1_ip1_match,
+ Ip::SubnetMatch(subnet2, ip1) AS subnet2_ip1_match,
+ Ip::ToString(Ip::GetSubnetByMask(ip1, ip2)) AS ip1_ip2_mask_subnet
+FROM (
+ SELECT
+ Ip::SubnetFromString(subnet1) AS subnet1,
+ Ip::SubnetFromString(subnet2) AS subnet2,
+ Ip::FromString(ip1) AS ip1,
+ Ip::FromString(ip2) AS ip2
+ FROM Input
+); \ No newline at end of file
diff --git a/yql/essentials/udfs/common/ip_base/test/ya.make b/yql/essentials/udfs/common/ip_base/test/ya.make
new file mode 100644
index 00000000000..883a487b013
--- /dev/null
+++ b/yql/essentials/udfs/common/ip_base/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+DEPENDS(yql/essentials/udfs/common/ip_base)
+
+END()
diff --git a/yql/essentials/udfs/common/ip_base/ya.make b/yql/essentials/udfs/common/ip_base/ya.make
new file mode 100644
index 00000000000..b43780285cd
--- /dev/null
+++ b/yql/essentials/udfs/common/ip_base/ya.make
@@ -0,0 +1,34 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+
+ FROM_SANDBOX(
+ FILE 7319899828 OUT_NOAUTO libip_udf.so
+ )
+
+ END()
+
+ELSE()
+
+YQL_UDF_CONTRIB(ip_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+ )
+
+ SRCS(
+ ip_base.cpp
+ )
+
+ PEERDIR(
+ yql/essentials/udfs/common/ip_base/lib
+ )
+
+ END()
+
+ENDIF()
+
+RECURSE_FOR_TESTS(
+ test
+) \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json/json_udf.cpp b/yql/essentials/udfs/common/json/json_udf.cpp
new file mode 100644
index 00000000000..3a7916bed74
--- /dev/null
+++ b/yql/essentials/udfs/common/json/json_udf.cpp
@@ -0,0 +1,120 @@
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+#include <library/cpp/json/easy_parse/json_easy_parser.h>
+
+using namespace NKikimr;
+using namespace NUdf;
+
+namespace {
+ class TGetField: public TBoxedValue {
+ public:
+ typedef bool TTypeAwareMarker;
+
+ public:
+ static TStringRef Name() {
+ return TStringRef::Of("GetField");
+ }
+
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ if (!args[0]) {
+ return valueBuilder->NewEmptyList();
+ }
+
+ const TString json(args[0].AsStringRef());
+ const TString field(args[1].AsStringRef());
+
+ if (field.empty()) {
+ return valueBuilder->NewEmptyList();
+ }
+
+ NJson::TJsonParser parser;
+ parser.AddField(field, false);
+
+ TVector<TString> result;
+ parser.Parse(json, &result);
+
+ TUnboxedValue* items = nullptr;
+ const auto list = valueBuilder->NewArray(result.size(), items);
+ for (const TString& item : result) {
+ *items++ = valueBuilder->NewString(item);
+ }
+
+ return list;
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ if (Name() == name) {
+ bool useString = true;
+ bool isOptional = true;
+ if (userType) {
+ // support of an overload with Json/Json? input type
+ auto typeHelper = builder.TypeInfoHelper();
+ auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType);
+ if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) {
+ builder.SetError("Missing or invalid user type.");
+ return true;
+ }
+
+ auto argsTypeTuple = userTypeInspector.GetElementType(0);
+ auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple);
+ if (!argsTypeInspector) {
+ builder.SetError("Invalid user type - expected tuple.");
+ return true;
+ }
+
+ if (argsTypeInspector.GetElementsCount() != 2) {
+ builder.SetError("Invalid user type - expected two arguments.");
+ return true;
+ }
+
+ auto inputType = argsTypeInspector.GetElementType(0);
+ auto optInspector = TOptionalTypeInspector(*typeHelper, inputType);
+ auto dataType = inputType;
+ if (optInspector) {
+ dataType = optInspector.GetItemType();
+ } else {
+ isOptional = false;
+ }
+
+ auto dataInspector = TDataTypeInspector(*typeHelper, dataType);
+ if (dataInspector && dataInspector.GetTypeId() == TDataType<TJson>::Id) {
+ useString = false;
+ builder.UserType(userType);
+ }
+ }
+
+ auto retType = builder.List()->Item<char*>().Build();
+ if (useString) {
+ builder.Args()->Add(builder.Optional()->Item<char*>().Build()).Add<char*>().Done().Returns(retType);
+ } else {
+ auto type = builder.SimpleType<TJson>();
+ if (isOptional) {
+ builder.Args()->Add(builder.Optional()->Item(type).Build()).Add<char*>().Done().Returns(retType);
+ } else {
+ builder.Args()->Add(type).Add<char*>().Done().Returns(retType);
+ }
+ }
+
+ if (!typesOnly) {
+ builder.Implementation(new TGetField);
+ }
+
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
+ }
+ };
+}
+
+SIMPLE_MODULE(TJsonModule,
+ TGetField)
+
+REGISTER_MODULES(TJsonModule)
diff --git a/yql/essentials/udfs/common/json/test/canondata/result.json b/yql/essentials/udfs/common/json/test/canondata/result.json
new file mode 100644
index 00000000000..fb6112fc5bc
--- /dev/null
+++ b/yql/essentials/udfs/common/json/test/canondata/result.json
@@ -0,0 +1,7 @@
+{
+ "test.test[Basic]": [
+ {
+ "uri": "file://test.test_Basic_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/json/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/json/test/canondata/test.test_Basic_/results.txt
new file mode 100644
index 00000000000..8cd3200dab4
--- /dev/null
+++ b/yql/essentials/udfs/common/json/test/canondata/test.test_Basic_/results.txt
@@ -0,0 +1,57 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "11"
+ ];
+ [
+ ""
+ ];
+ []
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json/test/cases/Basic.sql b/yql/essentials/udfs/common/json/test/cases/Basic.sql
new file mode 100644
index 00000000000..512246d7668
--- /dev/null
+++ b/yql/essentials/udfs/common/json/test/cases/Basic.sql
@@ -0,0 +1,12 @@
+/* syntax version 0 */
+$json1 = @@{
+ "x": {
+ "y": ["15", "11", "17"],
+ "z": 1
+ }
+}@@;
+
+SELECT
+ Json::GetField($json1, "/x/y/[1]"),
+ Json::GetField("[]", "/"),
+ Json::GetField($json1, "///");
diff --git a/yql/essentials/udfs/common/json/test/ya.make b/yql/essentials/udfs/common/json/test/ya.make
new file mode 100644
index 00000000000..d0260816188
--- /dev/null
+++ b/yql/essentials/udfs/common/json/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/json)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/json/ya.make b/yql/essentials/udfs/common/json/ya.make
new file mode 100644
index 00000000000..ac0dbd375d6
--- /dev/null
+++ b/yql/essentials/udfs/common/json/ya.make
@@ -0,0 +1,29 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+ FROM_SANDBOX(FILE 7319900360 OUT_NOAUTO libjson_udf.so
+ )
+ END()
+ELSE ()
+YQL_UDF_CONTRIB(json_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+ )
+
+ SRCS(
+ json_udf.cpp
+ )
+
+ PEERDIR(
+ library/cpp/json/easy_parse
+ )
+
+ END()
+ENDIF ()
+
+
+RECURSE_FOR_TESTS(
+ test
+)
diff --git a/yql/essentials/udfs/common/json2/as_json_node.h b/yql/essentials/udfs/common/json2/as_json_node.h
new file mode 100644
index 00000000000..c7463fffa66
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/as_json_node.h
@@ -0,0 +1,115 @@
+#pragma once
+
+#include "resource.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/minikql/dom/node.h>
+#include <yql/essentials/minikql/dom/json.h>
+
+namespace NJson2Udf {
+ using namespace NKikimr;
+ using namespace NUdf;
+ using namespace NYql;
+ using namespace NDom;
+
+ template <typename TSource>
+ class TAsJsonNode: public TBoxedValue {
+ public:
+ TAsJsonNode(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
+
+ static TStringRef Name();
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (name != Name()) {
+ return false;
+ }
+
+ auto optionalSourceType = builder.Optional()->Item<TSource>().Build();
+ auto resourceType = builder.Resource(JSON_NODE_RESOURCE_NAME);
+ builder.Args()
+ ->Add(optionalSourceType)
+ .Done()
+ .Returns(resourceType);
+
+ if (!typesOnly) {
+ builder.Implementation(new TAsJsonNode<TSource>(builder.GetSourcePosition()));
+ }
+
+ builder.IsStrict();
+ return true;
+ }
+
+ private:
+ const size_t MaxParseErrors = 10;
+
+ static TUnboxedValue Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder);
+
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final {
+ Y_UNUSED(valueBuilder);
+ try {
+ if (!args[0].HasValue()) {
+ return MakeEntity();
+ }
+ return Interpret(args[0], valueBuilder);
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ TSourcePosition Pos_;
+ };
+
+ template <>
+ TStringRef TAsJsonNode<TUtf8>::Name() {
+ return TStringRef::Of("Utf8AsJsonNode");
+ }
+
+ template <>
+ TUnboxedValue TAsJsonNode<TUtf8>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) {
+ return MakeString(sourceValue.AsStringRef(), valueBuilder);
+ }
+
+ template <>
+ TStringRef TAsJsonNode<double>::Name() {
+ return TStringRef::Of("DoubleAsJsonNode");
+ }
+
+ template <>
+ TUnboxedValue TAsJsonNode<double>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) {
+ Y_UNUSED(valueBuilder);
+ return MakeDouble(sourceValue.Get<double>());
+ }
+
+ template <>
+ TStringRef TAsJsonNode<bool>::Name() {
+ return TStringRef::Of("BoolAsJsonNode");
+ }
+
+ template <>
+ TUnboxedValue TAsJsonNode<bool>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) {
+ Y_UNUSED(valueBuilder);
+ return MakeBool(sourceValue.Get<bool>());
+ }
+
+ template <>
+ TStringRef TAsJsonNode<TJson>::Name() {
+ return TStringRef::Of("JsonAsJsonNode");
+ }
+
+ template <>
+ TUnboxedValue TAsJsonNode<TJson>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) {
+ return TryParseJsonDom(sourceValue.AsStringRef(), valueBuilder);
+ }
+}
+
diff --git a/yql/essentials/udfs/common/json2/compile_path.h b/yql/essentials/udfs/common/json2/compile_path.h
new file mode 100644
index 00000000000..8239cfc1eee
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/compile_path.h
@@ -0,0 +1,70 @@
+#pragma once
+
+#include "resource.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+namespace NJson2Udf {
+ using namespace NKikimr;
+ using namespace NUdf;
+ using namespace NYql;
+
+ class TCompilePath: public TBoxedValue {
+ public:
+ TCompilePath(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("CompilePath");
+ return name;
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (name != Name()) {
+ return false;
+ }
+
+ auto resourceType = builder.Resource(JSONPATH_RESOURCE_NAME);
+ builder.Args()
+ ->Add<NUdf::TUtf8>()
+ .Done()
+ .Returns(resourceType);
+
+ if (!typesOnly) {
+ builder.Implementation(new TCompilePath(builder.GetSourcePosition()));
+ }
+ return true;
+ }
+
+ private:
+ const size_t MaxParseErrors = 10;
+
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final {
+ Y_UNUSED(valueBuilder);
+ try {
+ TIssues issues;
+ const auto jsonPath = NJsonPath::ParseJsonPath(args[0].AsStringRef(), issues, MaxParseErrors);
+ if (!issues.Empty()) {
+ ythrow yexception() << "Error parsing jsonpath:" << Endl << issues.ToString();
+ }
+
+ return TUnboxedValuePod(new TJsonPathResource(jsonPath));
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ TSourcePosition Pos_;
+ };
+}
+
diff --git a/yql/essentials/udfs/common/json2/json2_udf.cpp b/yql/essentials/udfs/common/json2/json2_udf.cpp
new file mode 100644
index 00000000000..96ef6ccf00b
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/json2_udf.cpp
@@ -0,0 +1,43 @@
+#include "as_json_node.h"
+#include "compile_path.h"
+#include "parse.h"
+#include "serialize.h"
+#include "sql_exists.h"
+#include "sql_query.h"
+#include "sql_value.h"
+
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+namespace NJson2Udf {
+ SIMPLE_MODULE(TJson2Module,
+ TParse,
+ TSerialize<EDataSlot::Json>,
+ TSerialize<EDataSlot::JsonDocument>,
+ TCompilePath,
+ TSqlValue<EDataSlot::Json, TUtf8>,
+ TSqlValue<EDataSlot::Json, TUtf8, true>,
+ TSqlValue<EDataSlot::Json, i64>,
+ TSqlValue<EDataSlot::Json, double>,
+ TSqlValue<EDataSlot::Json, bool>,
+ TSqlValue<EDataSlot::JsonDocument, TUtf8>,
+ TSqlValue<EDataSlot::JsonDocument, TUtf8, true>,
+ TSqlValue<EDataSlot::JsonDocument, i64>,
+ TSqlValue<EDataSlot::JsonDocument, double>,
+ TSqlValue<EDataSlot::JsonDocument, bool>,
+ TSqlExists<EDataSlot::Json, false>,
+ TSqlExists<EDataSlot::Json, true>,
+ TSqlExists<EDataSlot::JsonDocument, false>,
+ TSqlExists<EDataSlot::JsonDocument, true>,
+ TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>,
+ TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>,
+ TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>,
+ TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>,
+ TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>,
+ TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>,
+ TAsJsonNode<TUtf8>,
+ TAsJsonNode<double>,
+ TAsJsonNode<bool>,
+ TAsJsonNode<TJson>)
+}
+
+REGISTER_MODULES(NJson2Udf::TJson2Module)
diff --git a/yql/essentials/udfs/common/json2/parse.h b/yql/essentials/udfs/common/json2/parse.h
new file mode 100644
index 00000000000..0020c164c2b
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/parse.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include "resource.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/minikql/dom/json.h>
+
+#include <library/cpp/json/json_reader.h>
+
+namespace NJson2Udf {
+ using namespace NKikimr;
+ using namespace NUdf;
+ using namespace NYql;
+ using namespace NDom;
+
+ class TParse: public TBoxedValue {
+ public:
+ TParse(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("Parse");
+ return name;
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (name != Name()) {
+ return false;
+ }
+
+ builder.Args()
+ ->Add<TAutoMap<TJson>>()
+ .Done()
+ .Returns<TJsonNodeResource>();
+
+ if (!typesOnly) {
+ builder.Implementation(new TParse(builder.GetSourcePosition()));
+ }
+ return true;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final {
+ Y_UNUSED(valueBuilder);
+ try {
+ const auto json = args[0].AsStringRef();
+ return TryParseJsonDom(json, valueBuilder);
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ TSourcePosition Pos_;
+ };
+}
+
diff --git a/yql/essentials/udfs/common/json2/resource.h b/yql/essentials/udfs/common/json2/resource.h
new file mode 100644
index 00000000000..aa65b14818d
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/resource.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/minikql/jsonpath/jsonpath.h>
+
+namespace NJson2Udf {
+ using namespace NKikimr;
+ using namespace NUdf;
+ using namespace NYql;
+
+ extern const char JSONPATH_RESOURCE_NAME[] = "JsonPath";
+ using TJsonPathResource = TBoxedResource<NJsonPath::TJsonPathPtr, JSONPATH_RESOURCE_NAME>;
+
+ extern const char JSON_NODE_RESOURCE_NAME[] = "JsonNode";
+ using TJsonNodeResource = TResource<JSON_NODE_RESOURCE_NAME>;
+}
+
diff --git a/yql/essentials/udfs/common/json2/serialize.h b/yql/essentials/udfs/common/json2/serialize.h
new file mode 100644
index 00000000000..a7077cb6e6d
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/serialize.h
@@ -0,0 +1,89 @@
+#pragma once
+
+#include "resource.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/minikql/dom/json.h>
+
+#include <yql/essentials/types/binary_json/write.h>
+
+namespace NJson2Udf {
+ using namespace NKikimr;
+ using namespace NUdf;
+ using namespace NYql;
+ using namespace NDom;
+ using namespace NBinaryJson;
+
+ template <EDataSlot ResultType>
+ class TSerialize : public TBoxedValue {
+ public:
+ TSerialize(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
+
+ static const TStringRef& Name();
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (name != Name()) {
+ return false;
+ }
+
+ TType* resultType = nullptr;
+ if constexpr (ResultType == EDataSlot::Json) {
+ resultType = builder.SimpleType<TJson>();
+ } else {
+ resultType = builder.SimpleType<TJsonDocument>();
+ }
+
+ builder.Args()
+ ->Add<TAutoMap<TJsonNodeResource>>()
+ .Done()
+ .Returns(resultType);
+
+ if (!typesOnly) {
+ builder.Implementation(new TSerialize(builder.GetSourcePosition()));
+ }
+ return true;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final {
+ try {
+ const TUnboxedValue& jsonDom = args[0];
+
+ if constexpr (ResultType == EDataSlot::Json) {
+ return valueBuilder->NewString(SerializeJsonDom(jsonDom));
+ } else {
+ const auto binaryJson = SerializeToBinaryJson(jsonDom);
+ return valueBuilder->NewString(TStringBuf(binaryJson.Data(), binaryJson.Size()));
+ }
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ TSourcePosition Pos_;
+ };
+
+ template <>
+ const TStringRef& TSerialize<EDataSlot::Json>::Name() {
+ static auto name = TStringRef::Of("Serialize");
+ return name;
+ }
+
+ template <>
+ const TStringRef& TSerialize<EDataSlot::JsonDocument>::Name() {
+ static auto name = TStringRef::Of("SerializeToJsonDocument");
+ return name;
+ }
+}
+
diff --git a/yql/essentials/udfs/common/json2/sql_exists.h b/yql/essentials/udfs/common/json2/sql_exists.h
new file mode 100644
index 00000000000..8a049b49d42
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/sql_exists.h
@@ -0,0 +1,135 @@
+#pragma once
+
+#include "resource.h"
+#include "compile_path.h"
+
+#include <yql/essentials/public/udf/udf_type_builder.h>
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+#include <util/generic/yexception.h>
+
+namespace NJson2Udf {
+ using namespace NKikimr;
+ using namespace NUdf;
+ using namespace NYql;
+ using namespace NJsonPath;
+
+ template <EDataSlot InputType, bool ThrowException>
+ class TSqlExists: public TBoxedValue {
+ public:
+ explicit TSqlExists(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
+
+ static TStringRef Name();
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (name != Name()) {
+ return false;
+ }
+
+ auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME);
+ TType* inputType = nullptr;
+ if constexpr (InputType == EDataSlot::JsonDocument) {
+ inputType = builder.SimpleType<TJsonDocument>();
+ } else {
+ inputType = jsonType;
+ }
+ auto inputOptionalType = builder.Optional()->Item(inputType).Build();
+ auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME);
+ auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build();
+ auto optionalBoolType = builder.Optional()->Item<bool>().Build();
+
+ if constexpr (ThrowException) {
+ builder.Args()
+ ->Add(inputOptionalType)
+ .Add(jsonPathType)
+ .Add(dictType)
+ .Done()
+ .Returns(optionalBoolType);
+ } else {
+ builder.Args()
+ ->Add(inputOptionalType)
+ .Add(jsonPathType)
+ .Add(dictType)
+ .Add(optionalBoolType)
+ .Done()
+ .Returns(optionalBoolType);
+ }
+
+ if (!typesOnly) {
+ builder.Implementation(new TSqlExists(builder.GetSourcePosition()));
+ }
+ if constexpr (!ThrowException) {
+ builder.IsStrict();
+ }
+ return true;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final {
+ Y_UNUSED(valueBuilder);
+ try {
+ if (!args[0].HasValue()) {
+ return TUnboxedValuePod();
+ }
+
+ TValue jsonDom;
+ if constexpr (InputType == EDataSlot::JsonDocument) {
+ jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor());
+ } else {
+ jsonDom = TValue(args[0]);
+ }
+
+ auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get());
+ const auto& jsonPath = *jsonPathResource->Get();
+ const auto variables = DictToVariables(args[2]);
+
+ const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder);
+ if (result.IsError()) {
+ if constexpr (ThrowException) {
+ ythrow yexception() << "Error executing jsonpath:" << Endl << result.GetError() << Endl;
+ } else {
+ return args[3];
+ }
+ }
+
+ return TUnboxedValuePod(!result.GetNodes().empty());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ TSourcePosition Pos_;
+ };
+
+ template <>
+ TStringRef TSqlExists<EDataSlot::Json, false>::Name() {
+ return "SqlExists";
+ }
+
+ template <>
+ TStringRef TSqlExists<EDataSlot::Json, true>::Name() {
+ return "SqlTryExists";
+ }
+
+ template <>
+ TStringRef TSqlExists<EDataSlot::JsonDocument, false>::Name() {
+ return "JsonDocumentSqlExists";
+ }
+
+ template <>
+ TStringRef TSqlExists<EDataSlot::JsonDocument, true>::Name() {
+ return "JsonDocumentSqlTryExists";
+ }
+}
+
diff --git a/yql/essentials/udfs/common/json2/sql_query.h b/yql/essentials/udfs/common/json2/sql_query.h
new file mode 100644
index 00000000000..cb3bafd3b0b
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/sql_query.h
@@ -0,0 +1,184 @@
+#pragma once
+
+#include "resource.h"
+#include "compile_path.h"
+
+#include <yql/essentials/core/sql_types/yql_atom_enums.h>
+#include <yql/essentials/public/udf/udf_type_builder.h>
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/minikql/dom/node.h>
+
+#include <util/generic/yexception.h>
+
+namespace NJson2Udf {
+ using namespace NKikimr;
+ using namespace NUdf;
+ using namespace NYql;
+ using namespace NDom;
+ using namespace NJsonPath;
+
+ template <EDataSlot InputType, EJsonQueryWrap Mode>
+ class TSqlQuery: public TBoxedValue {
+ public:
+ explicit TSqlQuery(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
+
+ static TStringRef Name();
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (name != Name()) {
+ return false;
+ }
+
+ auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME);
+ auto optionalJsonType = builder.Optional()->Item(jsonType).Build();
+ TType* inputType = nullptr;
+ if constexpr (InputType == EDataSlot::JsonDocument) {
+ inputType = builder.SimpleType<TJsonDocument>();
+ } else {
+ inputType = jsonType;
+ }
+ auto inputOptionalType = builder.Optional()->Item(inputType).Build();
+ auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME);
+ auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build();
+
+ /*
+ Arguments:
+ 0. Resource<JsonNode>? or JsonDocument?. Input json
+ 1. Resource<JsonPath>. Jsonpath to execute on json
+ 2. Dict<TUtf8, Resource<JsonNode>>. Variables to pass into jsonpath
+ 3. Bool. True - throw on empty result, false otherwise
+ 4. Resource<JsonNode>?. Default value to return on empty result. Ignored if 2d argument is true
+ 5. Bool. True - throw on error, false - otherwise
+ 6. Resource<JsonNode>?. Default value to return on error. Ignored if 4th argument is true
+ */
+ // we can't mark TSqlQuery as strict due to runtime throw policy setting
+ // TODO: optimizer can mark SqlQuery as strict if 3th/5th arguments are literal booleans
+ builder.Args()
+ ->Add(inputOptionalType)
+ .Add(jsonPathType)
+ .Add(dictType)
+ .Add<bool>()
+ .Add(optionalJsonType)
+ .Add<bool>()
+ .Add(optionalJsonType)
+ .Done()
+ .Returns(optionalJsonType);
+
+ if (!typesOnly) {
+ builder.Implementation(new TSqlQuery(builder.GetSourcePosition()));
+ }
+ return true;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final {
+ Y_UNUSED(valueBuilder);
+ try {
+ if (!args[0].HasValue()) {
+ return TUnboxedValuePod();
+ }
+
+ TValue jsonDom;
+ if constexpr (InputType == EDataSlot::JsonDocument) {
+ jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor());
+ } else {
+ jsonDom = TValue(args[0]);
+ }
+
+ auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get());
+ const auto& jsonPath = *jsonPathResource->Get();
+
+ const bool throwOnEmpty = args[3].Get<bool>();
+ const auto emptyDefault = args[4];
+ const bool throwOnError = args[5].Get<bool>();
+ const auto errorDefault = args[6];
+ const auto variables = DictToVariables(args[2]);
+
+ auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder);
+
+ const auto handleCase = [](TStringBuf message, bool throws, const TUnboxedValuePod& caseDefault) {
+ if (throws) {
+ ythrow yexception() << message;
+ }
+ return caseDefault;
+ };
+
+ if (result.IsError()) {
+ return handleCase(TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl, throwOnError, errorDefault);
+ }
+
+ auto& nodes = result.GetNodes();
+ const bool isSingleStruct = nodes.size() == 1 && (nodes[0].Is(EValueType::Array) || nodes[0].Is(EValueType::Object));
+ if (Mode == EJsonQueryWrap::Wrap || (Mode == EJsonQueryWrap::ConditionalWrap && !isSingleStruct)) {
+ TVector<TUnboxedValue> converted;
+ converted.reserve(nodes.size());
+ for (auto& node : nodes) {
+ converted.push_back(node.ConvertToUnboxedValue(valueBuilder));
+ }
+ return MakeList(converted.data(), converted.size(), valueBuilder);
+ }
+
+ if (nodes.empty()) {
+ return handleCase("Empty result", throwOnEmpty, emptyDefault);
+ }
+
+ // No wrapping is applicable and result is not empty. Result must be a single object or array
+ if (nodes.size() > 1) {
+ return handleCase("Result consists of multiple items", throwOnError, errorDefault);
+ }
+
+ if (!nodes[0].Is(EValueType::Array) && !nodes[0].Is(EValueType::Object)) {
+ return handleCase("Result is neither object nor array", throwOnError, errorDefault);
+ }
+
+ return nodes[0].ConvertToUnboxedValue(valueBuilder);
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ TSourcePosition Pos_;
+ };
+
+ template <>
+ TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>::Name() {
+ return "SqlQuery";
+ }
+
+ template <>
+ TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>::Name() {
+ return "SqlQueryWrap";
+ }
+
+ template <>
+ TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>::Name() {
+ return "SqlQueryConditionalWrap";
+ }
+
+ template <>
+ TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>::Name() {
+ return "JsonDocumentSqlQuery";
+ }
+
+ template <>
+ TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>::Name() {
+ return "JsonDocumentSqlQueryWrap";
+ }
+
+ template <>
+ TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>::Name() {
+ return "JsonDocumentSqlQueryConditionalWrap";
+ }
+}
+
diff --git a/yql/essentials/udfs/common/json2/sql_value.h b/yql/essentials/udfs/common/json2/sql_value.h
new file mode 100644
index 00000000000..8d3318a8c54
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/sql_value.h
@@ -0,0 +1,296 @@
+#pragma once
+
+#include "resource.h"
+#include "compile_path.h"
+
+#include <yql/essentials/public/udf/udf_type_builder.h>
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/minikql/dom/node.h>
+
+#include <yql/essentials/types/binary_json/read.h>
+
+#include <util/generic/yexception.h>
+#include <util/generic/ylimits.h>
+#include <util/string/cast.h>
+
+namespace NJson2Udf {
+ using namespace NKikimr;
+ using namespace NUdf;
+ using namespace NYql;
+ using namespace NDom;
+ using namespace NJsonPath;
+
+ namespace {
+ template <class TValueType, bool ForceConvert = false>
+ TUnboxedValue TryConvertJson(const IValueBuilder* valueBuilder, const TUnboxedValue& source) {
+ Y_UNUSED(valueBuilder);
+ Y_UNUSED(source);
+ Y_ABORT("Unsupported type");
+ }
+
+ template <>
+ TUnboxedValue TryConvertJson<TUtf8>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) {
+ Y_UNUSED(valueBuilder);
+ if (IsNodeType(source, ENodeType::String)) {
+ return source;
+ }
+ return {};
+ }
+
+ template <>
+ TUnboxedValue TryConvertJson<TUtf8, true>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) {
+ switch (GetNodeType(source)) {
+ case ENodeType::String:
+ return source;
+ case ENodeType::Uint64:
+ return valueBuilder->NewString(ToString(source.Get<ui64>())).Release();
+ case ENodeType::Int64:
+ return valueBuilder->NewString(ToString(source.Get<i64>())).Release();
+ case ENodeType::Bool:
+ return source.Get<bool>() ? TUnboxedValuePod::Embedded("true") : TUnboxedValuePod::Embedded("false");
+ case ENodeType::Double:
+ return valueBuilder->NewString(ToString(source.Get<double>())).Release();
+ case ENodeType::Entity:
+ return TUnboxedValuePod::Embedded("null");
+ case ENodeType::List:
+ case ENodeType::Dict:
+ case ENodeType::Attr:
+ return {};
+ }
+ }
+
+ template <>
+ TUnboxedValue TryConvertJson<i64>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) {
+ Y_UNUSED(valueBuilder);
+ if (!source.IsEmbedded()) {
+ return {};
+ }
+
+ if (IsNodeType(source, ENodeType::Int64)) {
+ return TUnboxedValuePod(source.Get<i64>());
+ } else if (IsNodeType(source, ENodeType::Uint64) && source.Get<ui64>() < Max<i64>()) {
+ return TUnboxedValuePod(static_cast<i64>(source.Get<ui64>()));
+ } else if (IsNodeType(source, ENodeType::Double) && static_cast<i64>(source.Get<double>()) == source.Get<double>()) {
+ return TUnboxedValuePod(static_cast<i64>(source.Get<double>()));
+ }
+
+ return {};
+ }
+
+ template <>
+ TUnboxedValue TryConvertJson<double>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) {
+ Y_UNUSED(valueBuilder);
+ if (!source.IsEmbedded()) {
+ return {};
+ }
+
+ if (IsNodeType(source, ENodeType::Double)) {
+ return TUnboxedValuePod(source.Get<double>());
+ } else if (IsNodeType(source, ENodeType::Int64)) {
+ return TUnboxedValuePod(static_cast<double>(source.Get<i64>()));
+ } else if (IsNodeType(source, ENodeType::Uint64)) {
+ return TUnboxedValuePod(static_cast<double>(source.Get<ui64>()));
+ }
+
+ return {};
+ }
+
+ template <>
+ TUnboxedValue TryConvertJson<bool>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) {
+ Y_UNUSED(valueBuilder);
+ if (!source.IsEmbedded() || !IsNodeType(source, ENodeType::Bool)) {
+ return {};
+ }
+ return {TUnboxedValuePod(source.Get<bool>())};
+ }
+ }
+
+ template <EDataSlot InputType, class TValueType, bool ForceConvert = false>
+ class TSqlValue: public TBoxedValue {
+ public:
+ enum class TErrorCode : ui8 {
+ Empty = 0,
+ Error = 1
+ };
+
+ TSqlValue(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
+
+ static TStringRef Name();
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (name != Name()) {
+ return false;
+ }
+
+ auto optionalValueType = builder.Optional()->Item<TValueType>().Build();
+ auto errorTupleType = builder.Tuple(2)->Add<ui8>().Add<char*>().Build();
+ auto returnTypeTuple = builder.Tuple(2)
+ ->Add(errorTupleType)
+ .Add(optionalValueType)
+ .Build();
+ auto returnType = builder.Variant()->Over(returnTypeTuple).Build();
+
+ TType* jsonType = nullptr;
+ if constexpr (InputType == EDataSlot::Json) {
+ jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME);
+ } else {
+ jsonType = builder.SimpleType<TJsonDocument>();
+ }
+ auto optionalJsonType = builder.Optional()->Item(jsonType).Build();
+ auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME);
+ auto dictType = builder.Dict()->Key<TUtf8>().Value(builder.Resource(JSON_NODE_RESOURCE_NAME)).Build();
+
+ builder.Args()
+ ->Add(optionalJsonType)
+ .Add(jsonPathType)
+ .Add(dictType)
+ .Done()
+ .Returns(returnType);
+
+ builder.IsStrict();
+
+ if (!typesOnly) {
+ builder.Implementation(new TSqlValue(builder.GetSourcePosition()));
+ }
+ return true;
+ }
+
+ private:
+ TUnboxedValue BuildErrorResult(const IValueBuilder* valueBuilder, TErrorCode code, const TStringBuf message) const {
+ TUnboxedValue* items = nullptr;
+ auto errorTuple = valueBuilder->NewArray(2, items);
+ items[0] = TUnboxedValuePod(static_cast<ui8>(code));
+ items[1] = valueBuilder->NewString(message);
+ return valueBuilder->NewVariant(0, std::move(errorTuple));
+ }
+
+ TUnboxedValue BuildSuccessfulResult(const IValueBuilder* valueBuilder, TUnboxedValue&& value) const {
+ return valueBuilder->NewVariant(1, std::move(value));
+ }
+
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final {
+ try {
+ if (!args[0].HasValue()) {
+ return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod());
+ }
+
+ TValue jsonDom;
+ if constexpr (InputType == EDataSlot::JsonDocument) {
+ jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor());
+ } else {
+ jsonDom = TValue(args[0]);
+ }
+
+ auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get());
+ const auto& jsonPath = *jsonPathResource->Get();
+ const auto variables = DictToVariables(args[2]);
+
+ const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder);
+
+ if (result.IsError()) {
+ return BuildErrorResult(valueBuilder, TErrorCode::Error, TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl);
+ }
+
+ const auto& nodes = result.GetNodes();
+ if (nodes.empty()) {
+ return BuildErrorResult(valueBuilder, TErrorCode::Empty, "Result is empty");
+ }
+
+ if (nodes.size() > 1) {
+ return BuildErrorResult(valueBuilder, TErrorCode::Error, "Result consists of multiple items");
+ }
+
+ const auto& value = nodes[0];
+ if (value.Is(EValueType::Array) || value.Is(EValueType::Object)) {
+ // SqlValue can return only scalar values
+ return BuildErrorResult(valueBuilder, TErrorCode::Error, "Extracted JSON value is either object or array");
+ }
+
+ if (value.Is(EValueType::Null)) {
+ // JSON nulls must be converted to SQL nulls
+ return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod());
+ }
+
+ const auto source = value.ConvertToUnboxedValue(valueBuilder);
+ TUnboxedValue convertedValue = TryConvertJson<TValueType, ForceConvert>(valueBuilder, source);
+ if (!convertedValue) {
+ // error while converting JSON value type to TValueType
+ return BuildErrorResult(valueBuilder, TErrorCode::Error, "Cannot convert extracted JSON value to target type");
+ }
+
+ return BuildSuccessfulResult(valueBuilder, std::move(convertedValue));
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ TSourcePosition Pos_;
+ };
+
+ template <EDataSlot InputType, class TValueType, bool ForceConvert>
+ TStringRef TSqlValue<InputType, TValueType, ForceConvert>::Name() {
+ Y_ABORT("Unknown name");
+ }
+
+ template<>
+ TStringRef TSqlValue<EDataSlot::Json, TUtf8, true>::Name() {
+ return TStringRef::Of("SqlValueConvertToUtf8");
+ }
+
+ template <>
+ TStringRef TSqlValue<EDataSlot::Json, TUtf8>::Name() {
+ return TStringRef::Of("SqlValueUtf8");
+ }
+
+ template <>
+ TStringRef TSqlValue<EDataSlot::Json, i64>::Name() {
+ return TStringRef::Of("SqlValueInt64");
+ }
+
+ template <>
+ TStringRef TSqlValue<EDataSlot::Json, double>::Name() {
+ return TStringRef::Of("SqlValueNumber");
+ }
+
+ template <>
+ TStringRef TSqlValue<EDataSlot::Json, bool>::Name() {
+ return TStringRef::Of("SqlValueBool");
+ }
+
+ template<>
+ TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8, true>::Name() {
+ return TStringRef::Of("JsonDocumentSqlValueConvertToUtf8");
+ }
+
+ template <>
+ TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8>::Name() {
+ return TStringRef::Of("JsonDocumentSqlValueUtf8");
+ }
+
+ template <>
+ TStringRef TSqlValue<EDataSlot::JsonDocument, i64>::Name() {
+ return TStringRef::Of("JsonDocumentSqlValueInt64");
+ }
+
+ template <>
+ TStringRef TSqlValue<EDataSlot::JsonDocument, double>::Name() {
+ return TStringRef::Of("JsonDocumentSqlValueNumber");
+ }
+
+ template <>
+ TStringRef TSqlValue<EDataSlot::JsonDocument, bool>::Name() {
+ return TStringRef::Of("JsonDocumentSqlValueBool");
+ }
+
+}
diff --git a/yql/essentials/udfs/common/json2/test/canondata/result.json b/yql/essentials/udfs/common/json2/test/canondata/result.json
new file mode 100644
index 00000000000..086f5e77ead
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/canondata/result.json
@@ -0,0 +1,42 @@
+{
+ "test.test[AsJsonNode]": [
+ {
+ "uri": "file://test.test_AsJsonNode_/results.txt"
+ }
+ ],
+ "test.test[SerializeParse]": [
+ {
+ "uri": "file://test.test_SerializeParse_/results.txt"
+ }
+ ],
+ "test.test[SqlExists]": [
+ {
+ "uri": "file://test.test_SqlExists_/results.txt"
+ }
+ ],
+ "test.test[SqlQueryError]": [
+ {
+ "uri": "file://test.test_SqlQueryError_/extracted"
+ }
+ ],
+ "test.test[SqlQuery]": [
+ {
+ "uri": "file://test.test_SqlQuery_/results.txt"
+ }
+ ],
+ "test.test[SqlTryExistsError]": [
+ {
+ "uri": "file://test.test_SqlTryExistsError_/extracted"
+ }
+ ],
+ "test.test[SqlTryExists]": [
+ {
+ "uri": "file://test.test_SqlTryExists_/results.txt"
+ }
+ ],
+ "test.test[SqlValue]": [
+ {
+ "uri": "file://test.test_SqlValue_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_AsJsonNode_/results.txt b/yql/essentials/udfs/common/json2/test/canondata/test.test_AsJsonNode_/results.txt
new file mode 100644
index 00000000000..fd6bba35bca
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_AsJsonNode_/results.txt
@@ -0,0 +1,84 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Json"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Json"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "DataType";
+ "Json"
+ ]
+ ];
+ [
+ "column3";
+ [
+ "DataType";
+ "Json"
+ ]
+ ];
+ [
+ "column4";
+ [
+ "DataType";
+ "Json"
+ ]
+ ];
+ [
+ "column5";
+ [
+ "DataType";
+ "Json"
+ ]
+ ];
+ [
+ "column6";
+ [
+ "DataType";
+ "Json"
+ ]
+ ];
+ [
+ "column7";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "\"string\"";
+ "null";
+ "1.2345";
+ "null";
+ "true";
+ "null";
+ "{\"key\":28}";
+ "null"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_SerializeParse_/results.txt b/yql/essentials/udfs/common/json2/test/canondata/test.test_SerializeParse_/results.txt
new file mode 100644
index 00000000000..58a867b34e4
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_SerializeParse_/results.txt
@@ -0,0 +1,102 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Json"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Json"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "DataType";
+ "Json"
+ ]
+ ];
+ [
+ "column3";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "[]";
+ "{}";
+ "[1,3,4,5,6]";
+ "{\"x\":1234}"
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "JsonDocument"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "JsonDocument"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "DataType";
+ "JsonDocument"
+ ]
+ ];
+ [
+ "column3";
+ [
+ "DataType";
+ "JsonDocument"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "[]";
+ "{}";
+ "[1,3,4,5,6]";
+ "{\"x\":1234}"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlExists_/results.txt b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlExists_/results.txt
new file mode 100644
index 00000000000..1b74c43a71d
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlExists_/results.txt
@@ -0,0 +1,195 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ [
+ %false
+ ];
+ [
+ %false
+ ];
+ [
+ %false
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ %false
+ ];
+ [
+ %true
+ ];
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlQueryError_/extracted b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlQueryError_/extracted
new file mode 100644
index 00000000000..12f3e1927de
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlQueryError_/extracted
@@ -0,0 +1,10 @@
+<tmp_path>/program.sql:<main>: Fatal: Execution
+
+ <tmp_path>/program.sql:<main>:12:1: Fatal: Execution of node: Result
+ SELECT
+ ^
+ <tmp_path>/program.sql:<main>:14:12: Fatal: yql/essentials/udfs/common/json2/sql_query.h:xxx: Error executing jsonpath:
+jsonpath:1:8: Error: Member not found, code: 4702
+
+ Json2::SqlQuery($jsonpath_error, $path, AsDict(), false, NULL, true, NULL);
+ ^ \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlQuery_/results.txt b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlQuery_/results.txt
new file mode 100644
index 00000000000..0773abf2be8
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlQuery_/results.txt
@@ -0,0 +1,400 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "{\"y\":123}"
+ ];
+ [
+ "[123,456]"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ [
+ "{}"
+ ];
+ [
+ "[]"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ [
+ "{}"
+ ];
+ [
+ "[]"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "[123]"
+ ];
+ [
+ "[123]"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "{\"y\":123}"
+ ];
+ [
+ "[{\"y\":123}]"
+ ];
+ [
+ "{\"y\":123}"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "[123,456]"
+ ];
+ [
+ "[[123,456]]"
+ ];
+ [
+ "[123,456]"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "{}"
+ ];
+ [
+ "[]"
+ ];
+ [
+ "[]"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlTryExistsError_/extracted b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlTryExistsError_/extracted
new file mode 100644
index 00000000000..7761bc9a04e
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlTryExistsError_/extracted
@@ -0,0 +1,10 @@
+<tmp_path>/program.sql:<main>: Fatal: Execution
+
+ <tmp_path>/program.sql:<main>:12:1: Fatal: Execution of node: Result
+ SELECT
+ ^
+ <tmp_path>/program.sql:<main>:14:12: Fatal: yql/essentials/udfs/common/json2/sql_exists.h:xxx: Error executing jsonpath:
+jsonpath:1:8: Error: Expected object, code: 4701
+
+ Json2::SqlTryExists($json, $path, AsDict());
+ ^ \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlTryExists_/results.txt b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlTryExists_/results.txt
new file mode 100644
index 00000000000..4a5f62bc86b
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlTryExists_/results.txt
@@ -0,0 +1,83 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlValue_/results.txt b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlValue_/results.txt
new file mode 100644
index 00000000000..b5aeb82c9ab
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlValue_/results.txt
@@ -0,0 +1,1663 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "1";
+ [
+ "some string value"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Error executing jsonpath:\njsonpath:1:8: Error: Member not found, code: 4702\n"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Error executing jsonpath:\njsonpath:1:8: Error: Expected object, code: 4701\n"
+ ]
+ ];
+ [
+ "1";
+ #
+ ];
+ [
+ "1";
+ #
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "1";
+ [
+ "2856"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Error executing jsonpath:\njsonpath:1:8: Error: Member not found, code: 4702\n"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Error executing jsonpath:\njsonpath:1:8: Error: Expected object, code: 4701\n"
+ ]
+ ];
+ [
+ "1";
+ #
+ ];
+ [
+ "1";
+ #
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "1";
+ [
+ "2.71828"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Error executing jsonpath:\njsonpath:1:8: Error: Member not found, code: 4702\n"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Error executing jsonpath:\njsonpath:1:8: Error: Expected object, code: 4701\n"
+ ]
+ ];
+ [
+ "1";
+ #
+ ];
+ [
+ "1";
+ #
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "1";
+ [
+ %true
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Error executing jsonpath:\njsonpath:1:8: Error: Member not found, code: 4702\n"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Error executing jsonpath:\njsonpath:1:8: Error: Expected object, code: 4701\n"
+ ]
+ ];
+ [
+ "1";
+ #
+ ];
+ [
+ "1";
+ #
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "1";
+ [
+ "some string value"
+ ]
+ ];
+ [
+ "1";
+ [
+ "2856"
+ ]
+ ];
+ [
+ "1";
+ [
+ "2.71828"
+ ]
+ ];
+ [
+ "1";
+ [
+ "true"
+ ]
+ ];
+ [
+ "1";
+ #
+ ];
+ [
+ "1";
+ #
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "0";
+ [
+ "1";
+ "Cannot convert extracted JSON value to target type"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Cannot convert extracted JSON value to target type"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Cannot convert extracted JSON value to target type"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "0";
+ [
+ "1";
+ "Cannot convert extracted JSON value to target type"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Cannot convert extracted JSON value to target type"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Cannot convert extracted JSON value to target type"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "0";
+ [
+ "1";
+ "Cannot convert extracted JSON value to target type"
+ ]
+ ];
+ [
+ "1";
+ [
+ "2856"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Cannot convert extracted JSON value to target type"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "0";
+ [
+ "1";
+ "Cannot convert extracted JSON value to target type"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Cannot convert extracted JSON value to target type"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Cannot convert extracted JSON value to target type"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Uint8"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "0";
+ [
+ "1";
+ "Extracted JSON value is either object or array"
+ ]
+ ];
+ [
+ "0";
+ [
+ "1";
+ "Extracted JSON value is either object or array"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/cases/AsJsonNode.sql b/yql/essentials/udfs/common/json2/test/cases/AsJsonNode.sql
new file mode 100644
index 00000000000..2d85d5576ce
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/cases/AsJsonNode.sql
@@ -0,0 +1,9 @@
+SELECT
+ Json2::Utf8AsJsonNode(CAST("string" as Utf8)),
+ Json2::Utf8AsJsonNode(NULL),
+ Json2::DoubleAsJsonNode(1.2345),
+ Json2::DoubleAsJsonNode(NULL),
+ Json2::BoolAsJsonNode(true),
+ Json2::BoolAsJsonNode(NULL),
+ Json2::JsonAsJsonNode(CAST(@@{"key": 28}@@ as Json)),
+ Json2::JsonAsJsonNode(NULL);
diff --git a/yql/essentials/udfs/common/json2/test/cases/SerializeParse.sql b/yql/essentials/udfs/common/json2/test/cases/SerializeParse.sql
new file mode 100644
index 00000000000..1d5eb42d0cf
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/cases/SerializeParse.sql
@@ -0,0 +1,15 @@
+$id = ($json) -> { RETURN Json2::Serialize(Json2::Parse($json)); };
+
+SELECT
+ $id("[]"),
+ $id("{}"),
+ $id("[1, 3, 4, 5, 6]"),
+ $id(@@{"x": 1234}@@);
+
+$id_jd = ($json) -> { RETURN Json2::SerializeToJsonDocument(Json2::Parse($json)); };
+
+SELECT
+ $id_jd("[]"),
+ $id_jd("{}"),
+ $id_jd("[1, 3, 4, 5, 6]"),
+ $id_jd(@@{"x": 1234}@@);
diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlExists.sql b/yql/essentials/udfs/common/json2/test/cases/SqlExists.sql
new file mode 100644
index 00000000000..34f475fe5a8
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/cases/SqlExists.sql
@@ -0,0 +1,25 @@
+/* syntax version 1 */
+
+$path = Json2::CompilePath("strict $.x");
+
+-- Key exists
+SELECT
+ Json2::SqlExists(CAST(@@{"x": 123}@@ as Json), $path, AsDict(), false),
+ Json2::SqlExists(CAST(@@{"x": {"key": "value"}}@@ as Json), $path, AsDict(), false),
+ Json2::SqlExists(CAST(@@{"x": [1, 2, 3]}@@ as Json), $path, AsDict(), false),
+ Json2::SqlExists(CAST(@@{"x": null}@@ as Json), $path, AsDict(), false);
+
+-- Key is missing
+SELECT
+ Json2::SqlExists(NULL, $path, AsDict(), false),
+ Json2::SqlExists(CAST(@@{"not_x": 123}@@ as Json), $path, AsDict(), false),
+ Json2::SqlExists(CAST("{}" as Json), $path, AsDict(), false),
+ Json2::SqlExists(CAST("[]" as Json), $path, AsDict(), false);
+
+-- Error handling
+$json = CAST("[]" as Json);
+
+SELECT
+ Json2::SqlExists($json, $path, AsDict(), false),
+ Json2::SqlExists($json, $path, AsDict(), true),
+ Json2::SqlExists($json, $path, AsDict(), NULL); \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlQuery.sql b/yql/essentials/udfs/common/json2/test/cases/SqlQuery.sql
new file mode 100644
index 00000000000..38750aec512
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/cases/SqlQuery.sql
@@ -0,0 +1,52 @@
+/* syntax version 1 */
+
+$path = Json2::CompilePath("strict $.x");
+$array = CAST("[]" as Json);
+$object = CAST("{}" as Json);
+
+-- Valid cases
+$nested_object = CAST(@@{"x": {"y": 123}}@@ as Json);
+$nested_array = CAST(@@{"x": [123, 456]}@@ as Json);
+SELECT
+ Json2::SqlQuery($nested_object, $path, AsDict(), false, $array, false, $object),
+ Json2::SqlQuery($nested_array, $path, AsDict(), false, $array, false, $object);
+
+-- Null handling
+SELECT
+ Json2::SqlQuery(NULL, $path, AsDict(), false, $array, false, $object);
+
+-- Errors
+$jsonpath_error = CAST(@@{"y": []}@@ as Json);
+SELECT
+ Json2::SqlQuery($jsonpath_error, $path, AsDict(), false, $array, false, NULL),
+ Json2::SqlQuery($jsonpath_error, $path, AsDict(), false, $array, false, $object),
+ Json2::SqlQuery($jsonpath_error, $path, AsDict(), false, $object, false, $array);
+
+$mismatch_error = CAST(@@{"x": 123}@@ as Json);
+SELECT
+ Json2::SqlQuery($mismatch_error, $path, AsDict(), false, $array, false, NULL),
+ Json2::SqlQuery($mismatch_error, $path, AsDict(), false, $array, false, $object),
+ Json2::SqlQuery($mismatch_error, $path, AsDict(), false, $object, false, $array);
+
+-- Wrap
+$nested_value = CAST(@@{"x": 123}@@ as Json);
+SELECT
+ Json2::SqlQueryWrap($nested_value, $path, AsDict(), false, $object, false, $array),
+ Json2::SqlQueryConditionalWrap($nested_value, $path, AsDict(), false, $object, false, $array);
+
+SELECT
+ Json2::SqlQuery($nested_object, $path, AsDict(), false, $object, false, $array),
+ Json2::SqlQueryWrap($nested_object, $path, AsDict(), false, $object, false, $array),
+ Json2::SqlQueryConditionalWrap($nested_object, $path, AsDict(), false, $object, false, $array);
+
+SELECT
+ Json2::SqlQuery($nested_array, $path, AsDict(), false, $object, false, $array),
+ Json2::SqlQueryWrap($nested_array, $path, AsDict(), false, $object, false, $array),
+ Json2::SqlQueryConditionalWrap($nested_array, $path, AsDict(), false, $object, false, $array);
+
+-- Wrap empty result
+$path_lax = Json2::CompilePath("lax $.x");
+SELECT
+ Json2::SqlQuery($object, $path_lax, AsDict(), false, $object, false, $object),
+ Json2::SqlQueryWrap($object, $path_lax, AsDict(), false, $object, false, $object),
+ Json2::SqlQueryConditionalWrap($object, $path_lax, AsDict(), false, $object, false, $object); \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlQueryError.cfg b/yql/essentials/udfs/common/json2/test/cases/SqlQueryError.cfg
new file mode 100644
index 00000000000..eb2e5315d1e
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/cases/SqlQueryError.cfg
@@ -0,0 +1 @@
+xfail \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlQueryError.sql b/yql/essentials/udfs/common/json2/test/cases/SqlQueryError.sql
new file mode 100644
index 00000000000..4aaa329fc06
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/cases/SqlQueryError.sql
@@ -0,0 +1,7 @@
+/* syntax version 1 */
+
+$path = Json2::CompilePath("strict $.x");
+
+$jsonpath_error = CAST(@@{"y": []}@@ as Json);
+SELECT
+ Json2::SqlQuery($jsonpath_error, $path, AsDict(), false, NULL, true, NULL); \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlTryExists.sql b/yql/essentials/udfs/common/json2/test/cases/SqlTryExists.sql
new file mode 100644
index 00000000000..f42bd5628db
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/cases/SqlTryExists.sql
@@ -0,0 +1,11 @@
+/* syntax version 1 */
+
+$path = Json2::CompilePath("strict $.x");
+
+-- Key exists
+SELECT
+ Json2::SqlTryExists(NULL, $path, AsDict()),
+ Json2::SqlTryExists(CAST(@@{"x": 123}@@ as Json), $path, AsDict()),
+ Json2::SqlTryExists(CAST(@@{"x": {"key": "value"}}@@ as Json), $path, AsDict()),
+ Json2::SqlTryExists(CAST(@@{"x": [1, 2, 3]}@@ as Json), $path, AsDict()),
+ Json2::SqlTryExists(CAST(@@{"x": null}@@ as Json), $path, AsDict());
diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlTryExistsError.cfg b/yql/essentials/udfs/common/json2/test/cases/SqlTryExistsError.cfg
new file mode 100644
index 00000000000..eb2e5315d1e
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/cases/SqlTryExistsError.cfg
@@ -0,0 +1 @@
+xfail \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlTryExistsError.sql b/yql/essentials/udfs/common/json2/test/cases/SqlTryExistsError.sql
new file mode 100644
index 00000000000..3d0440b3cad
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/cases/SqlTryExistsError.sql
@@ -0,0 +1,7 @@
+/* syntax version 1 */
+
+$path = Json2::CompilePath("strict $.x");
+$json = CAST("[]" as Json);
+
+SELECT
+ Json2::SqlTryExists($json, $path, AsDict()); \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlValue.sql b/yql/essentials/udfs/common/json2/test/cases/SqlValue.sql
new file mode 100644
index 00000000000..8f86edee795
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/cases/SqlValue.sql
@@ -0,0 +1,95 @@
+/* syntax version 1 */
+
+-- Plain cases
+$path = Json2::CompilePath("strict $.x");
+$empty_object = CAST("{}" as Json);
+$empty_array = CAST("[]" as Json);
+$null_key = CAST(@@{
+ "x": null
+}@@ as Json);
+
+$string_json = CAST(@@{
+ "x": "some string value"
+}@@ as Json);
+SELECT
+ Json2::SqlValueUtf8($string_json, $path, AsDict()),
+ Json2::SqlValueUtf8($empty_object, $path, AsDict()),
+ Json2::SqlValueUtf8($empty_array, $path, AsDict()),
+ Json2::SqlValueUtf8($null_key, $path, AsDict()),
+ Json2::SqlValueUtf8(NULL, $path, AsDict());
+
+$int64_json = CAST(@@{
+ "x": 2856
+}@@ as Json);
+SELECT
+ Json2::SqlValueInt64($int64_json, $path, AsDict()),
+ Json2::SqlValueInt64($empty_object, $path, AsDict()),
+ Json2::SqlValueInt64($empty_array, $path, AsDict()),
+ Json2::SqlValueInt64($null_key, $path, AsDict()),
+ Json2::SqlValueInt64(NULL, $path, AsDict());
+
+$double_json = CAST(@@{
+ "x": 2.71828
+}@@ as Json);
+SELECT
+ Json2::SqlValueNumber($double_json, $path, AsDict()),
+ Json2::SqlValueNumber($empty_object, $path, AsDict()),
+ Json2::SqlValueNumber($empty_array, $path, AsDict()),
+ Json2::SqlValueNumber($null_key, $path, AsDict()),
+ Json2::SqlValueNumber(NULL, $path, AsDict());
+
+$bool_json = CAST(@@{
+ "x": true
+}@@ as Json);
+SELECT
+ Json2::SqlValueBool($bool_json, $path, AsDict()),
+ Json2::SqlValueBool($empty_object, $path, AsDict()),
+ Json2::SqlValueBool($empty_array, $path, AsDict()),
+ Json2::SqlValueBool($null_key, $path, AsDict()),
+ Json2::SqlValueBool(NULL, $path, AsDict());
+
+-- Convert cases
+SELECT
+ Json2::SqlValueConvertToUtf8($string_json, $path, AsDict()),
+ Json2::SqlValueConvertToUtf8($int64_json, $path, AsDict()),
+ Json2::SqlValueConvertToUtf8($double_json, $path, AsDict()),
+ Json2::SqlValueConvertToUtf8($bool_json, $path, AsDict()),
+ -- NOTE: Here SQL null must be returned, not "null" string
+ Json2::SqlValueConvertToUtf8($null_key, $path, AsDict()),
+ Json2::SqlValueConvertToUtf8(NULL, $path, AsDict());
+
+-- Error cases
+SELECT
+ Json2::SqlValueUtf8($int64_json, $path, AsDict()),
+ Json2::SqlValueUtf8($double_json, $path, AsDict()),
+ Json2::SqlValueUtf8($bool_json, $path, AsDict());
+
+SELECT
+ Json2::SqlValueInt64($string_json, $path, AsDict()),
+ Json2::SqlValueInt64($double_json, $path, AsDict()),
+ Json2::SqlValueInt64($bool_json, $path, AsDict());
+
+SELECT
+ Json2::SqlValueNumber($string_json, $path, AsDict()),
+ -- NOTE: Here int64 is automatically converted to double as it is possible without precision loss
+ Json2::SqlValueNumber($int64_json, $path, AsDict()),
+ Json2::SqlValueNumber($bool_json, $path, AsDict());
+
+SELECT
+ Json2::SqlValueBool($string_json, $path, AsDict()),
+ Json2::SqlValueBool($int64_json, $path, AsDict()),
+ Json2::SqlValueBool($double_json, $path, AsDict());
+
+$nested_object_json = CAST(@@{
+ "x": {
+ "a": 1
+ }
+}@@ as Json);
+
+$nested_array_json = CAST(@@{
+ "x": [29, 32, "some string"]
+}@@ as Json);
+
+SELECT
+ Json2::SqlValueBool($nested_object_json, $path, AsDict()),
+ Json2::SqlValueBool($nested_array_json, $path, AsDict()); \ No newline at end of file
diff --git a/yql/essentials/udfs/common/json2/test/ya.make b/yql/essentials/udfs/common/json2/test/ya.make
new file mode 100644
index 00000000000..c7079c1bd86
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/json2)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/json2/ya.make b/yql/essentials/udfs/common/json2/ya.make
new file mode 100644
index 00000000000..202b4aee9ca
--- /dev/null
+++ b/yql/essentials/udfs/common/json2/ya.make
@@ -0,0 +1,33 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+ FROM_SANDBOX(FILE 7319901430 OUT_NOAUTO libjson2_udf.so
+ )
+ END()
+ELSE ()
+YQL_UDF_CONTRIB(json2_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+ )
+
+ SRCS(
+ json2_udf.cpp
+ )
+
+ PEERDIR(
+ yql/essentials/core/sql_types
+ yql/essentials/types/binary_json
+ yql/essentials/minikql/dom
+ yql/essentials/minikql/jsonpath
+ )
+
+ END()
+ENDIF ()
+
+
+RECURSE_FOR_TESTS(
+ test
+)
+
diff --git a/yql/essentials/udfs/common/math/lib/erfinv.cpp b/yql/essentials/udfs/common/math/lib/erfinv.cpp
new file mode 100644
index 00000000000..b762ec28070
--- /dev/null
+++ b/yql/essentials/udfs/common/math/lib/erfinv.cpp
@@ -0,0 +1,114 @@
+#include <cmath>
+#include <array>
+#include <numeric>
+
+#include "erfinv.h"
+
+template <size_t N>
+static double polEval(double x, const std::array<double, N>& coef) {
+ static_assert(N > 0, "Array coef[] should not be empty.");
+ return std::accumulate(coef.crbegin() + 1, coef.crend(), coef[N - 1],
+ [x] (auto init, auto cur) {
+ return std::move(init) * x + cur;
+ });
+}
+
+namespace NMathUdf {
+
+// https://www.jstor.org/stable/2347330
+double ErfInv(double x) {
+ static constexpr std::array<double, 8> a = {
+ 1.1975323115670912564578e0,
+ 4.7072688112383978012285e1,
+ 6.9706266534389598238465e2,
+ 4.8548868893843886794648e3,
+ 1.6235862515167575384252e4,
+ 2.3782041382114385731252e4,
+ 1.1819493347062294404278e4,
+ 8.8709406962545514830200e2,
+ };
+ static constexpr std::array<double, 8> b = {
+ 1.,
+ 4.2313330701600911252e1,
+ 6.8718700749205790830e2,
+ 5.3941960214247511077e3,
+ 2.1213794301586595867e4,
+ 3.9307895800092710610e4,
+ 2.8729085735721942674e4,
+ 5.2264952788528545610e3,
+ };
+ static constexpr std::array<double, 8> c = {
+ 1.42343711074968357734e0,
+ 4.63033784615654529590e0,
+ 5.76949722146069140550e0,
+ 3.64784832476320460504e0,
+ 1.27045825245236838258e0,
+ 2.41780725177450611770e-1,
+ 2.27238449892691845833e-2,
+ 7.74545014278341407640e-4,
+ };
+ static constexpr std::array<double, 8> d = {
+ 1.4142135623730950488016887e0,
+ 2.9036514445419946173133295e0,
+ 2.3707661626024532365971225e0,
+ 9.7547832001787427186894837e-1,
+ 2.0945065210512749128288442e-1,
+ 2.1494160384252876777097297e-2,
+ 7.7441459065157709165577218e-4,
+ 1.4859850019840355905497876e-9,
+ };
+ static constexpr std::array<double, 8> e = {
+ 6.65790464350110377720e0,
+ 5.46378491116411436990e0,
+ 1.78482653991729133580e0,
+ 2.96560571828504891230e-1,
+ 2.65321895265761230930e-2,
+ 1.24266094738807843860e-3,
+ 2.71155556874348757815e-5,
+ 2.01033439929228813265e-7,
+ };
+ static constexpr std::array<double, 8> f = {
+ 1.414213562373095048801689e0,
+ 8.482908416595164588112026e-1,
+ 1.936480946950659106176712e-1,
+ 2.103693768272068968719679e-2,
+ 1.112800997078859844711555e-3,
+ 2.611088405080593625138020e-5,
+ 2.010321207683943062279931e-7,
+ 2.891024605872965461538222e-15,
+ };
+
+ if (isnan(x) || x <= -1. || x >= 1.) {
+ if (x == 1.) {
+ return std::numeric_limits<double>::infinity();
+ }
+ if (x == -1.) {
+ return -std::numeric_limits<double>::infinity();
+ }
+ return std::numeric_limits<double>::quiet_NaN();
+ }
+
+ double sign = (x > 0) - (x < 0);
+ x = abs(x);
+ if (x < 1e-7) {
+ return sign * x / M_2_SQRTPI;
+ }
+
+ double ans;
+ if (x <= 0.85) {
+ double r = 0.180625 - 0.25 * x * x;
+ ans = x * polEval(r, a) / polEval(r, b);
+ } else {
+ double r = std::sqrt(M_LN2 - log(1. - x)) - 1.6;
+ if (r <= 3.4) {
+ ans = polEval(r, c) / polEval(r, d);
+ } else {
+ r -= 3.4;
+ ans = polEval(r, e) / polEval(r, f);
+ }
+ }
+
+ return ans * sign;
+}
+
+}
diff --git a/yql/essentials/udfs/common/math/lib/erfinv.h b/yql/essentials/udfs/common/math/lib/erfinv.h
new file mode 100644
index 00000000000..1ced5a07e65
--- /dev/null
+++ b/yql/essentials/udfs/common/math/lib/erfinv.h
@@ -0,0 +1,7 @@
+#pragma once
+
+namespace NMathUdf {
+
+double ErfInv(double x);
+
+}
diff --git a/yql/essentials/udfs/common/math/lib/round.h b/yql/essentials/udfs/common/math/lib/round.h
new file mode 100644
index 00000000000..f59700da88f
--- /dev/null
+++ b/yql/essentials/udfs/common/math/lib/round.h
@@ -0,0 +1,77 @@
+#pragma once
+
+#include <util/system/types.h>
+#include <cmath>
+#include <optional>
+#include <fenv.h>
+
+namespace NMathUdf {
+
+template <class T>
+inline T RoundToDecimal(T v, int decShift) {
+ T div = std::pow(T(10), decShift);
+ return std::floor(v / div + T(0.5)) * div;
+}
+
+inline std::optional<i64> Mod(i64 value, i64 m) {
+ if (!m) {
+ return {};
+ }
+
+ const i64 result = value % m;
+ if ((result < 0 && m > 0) || (result > 0 && m < 0)) {
+ return result + m;
+ }
+ return result;
+}
+
+inline std::optional<i64> Rem(i64 value, i64 m) {
+ if (!m) {
+ return {};
+ }
+
+ const i64 result = value % m;
+ if (result < 0 && value > 0) {
+ return result + m;
+ }
+
+ if (result > 0 && value < 0) {
+ return result - m;
+ }
+ return result;
+}
+
+inline std::optional<i64> NearbyIntImpl(double value, decltype(FE_DOWNWARD) mode) {
+ if (!::isfinite(value)) {
+ return {};
+ }
+
+ auto prevMode = ::fegetround();
+ ::fesetround(mode);
+ auto res = ::nearbyint(value);
+ ::fesetround(prevMode);
+ // cast to i64 gives wrong sign above 9223372036854774784
+ // lower bound is adjusted to -9223372036854774784 as well
+ if (res < double(std::numeric_limits<i64>::min() + 513) || res > double(std::numeric_limits<i64>::max() - 512)) {
+ return {};
+ }
+
+ return static_cast<i64>(res);
+}
+
+inline std::optional<i64> NearbyInt(double value, ui32 mode) {
+ switch (mode) {
+ case 0:
+ return NearbyIntImpl(value, FE_DOWNWARD);
+ case 1:
+ return NearbyIntImpl(value, FE_TONEAREST);
+ case 2:
+ return NearbyIntImpl(value, FE_TOWARDZERO);
+ case 3:
+ return NearbyIntImpl(value, FE_UPWARD);
+ default:
+ return {};
+ }
+}
+
+}
diff --git a/yql/essentials/udfs/common/math/lib/round_ut.cpp b/yql/essentials/udfs/common/math/lib/round_ut.cpp
new file mode 100644
index 00000000000..4d0e96e4dc3
--- /dev/null
+++ b/yql/essentials/udfs/common/math/lib/round_ut.cpp
@@ -0,0 +1,70 @@
+#include "round.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/string/hex.h>
+
+using namespace NMathUdf;
+
+Y_UNIT_TEST_SUITE(TRound) {
+ Y_UNIT_TEST(Basic) {
+ double value = 1930.0 / 3361.0;
+ double result = RoundToDecimal<long double>(value, -3);
+ double answer = 0.574;
+ UNIT_ASSERT_VALUES_EQUAL(
+ HexEncode(&result, sizeof(double)),
+ HexEncode(&answer, sizeof(double)));
+ }
+
+ Y_UNIT_TEST(Mod) {
+ UNIT_ASSERT_VALUES_EQUAL(*Mod(-1, 7), 6);
+ UNIT_ASSERT_VALUES_EQUAL(*Mod(1, 7), 1);
+ UNIT_ASSERT_VALUES_EQUAL(*Mod(0, 7), 0);
+
+ UNIT_ASSERT_VALUES_EQUAL(*Mod(-1, -7), -1);
+ UNIT_ASSERT_VALUES_EQUAL(*Mod(1, -7), -6);
+ UNIT_ASSERT_VALUES_EQUAL(*Mod(0, -7), 0);
+
+ UNIT_ASSERT_VALUES_EQUAL(*Mod(-15, 7), 6);
+ UNIT_ASSERT_VALUES_EQUAL(*Mod(15, 7), 1);
+ UNIT_ASSERT_VALUES_EQUAL(*Mod(14, 7), 0);
+ UNIT_ASSERT_VALUES_EQUAL(*Mod(-14, 7), 0);
+
+ UNIT_ASSERT_VALUES_EQUAL(*Mod(-15, -7), -1);
+ UNIT_ASSERT_VALUES_EQUAL(*Mod(15, -7), -6);
+ UNIT_ASSERT_VALUES_EQUAL(*Mod(14, -7), 0);
+ UNIT_ASSERT_VALUES_EQUAL(*Mod(-14, -7), 0);
+
+ UNIT_ASSERT(!Mod(-14, 0));
+ }
+
+ Y_UNIT_TEST(Rem) {
+ UNIT_ASSERT_VALUES_EQUAL(*Rem(-1, 7), -1);
+ UNIT_ASSERT_VALUES_EQUAL(*Rem(1, 7), 1);
+ UNIT_ASSERT_VALUES_EQUAL(*Rem(0, 7), 0);
+
+ UNIT_ASSERT_VALUES_EQUAL(*Rem(-1, -7), -1);
+ UNIT_ASSERT_VALUES_EQUAL(*Rem(1, -7), 1);
+ UNIT_ASSERT_VALUES_EQUAL(*Rem(0, -7), 0);
+
+ UNIT_ASSERT_VALUES_EQUAL(*Rem(-15, 7), -1);
+ UNIT_ASSERT_VALUES_EQUAL(*Rem(15, 7), 1);
+ UNIT_ASSERT_VALUES_EQUAL(*Rem(14, 7), 0);
+ UNIT_ASSERT_VALUES_EQUAL(*Rem(-14, 7), 0);
+
+ UNIT_ASSERT_VALUES_EQUAL(*Rem(-15, -7), -1);
+ UNIT_ASSERT_VALUES_EQUAL(*Rem(15, -7), 1);
+ UNIT_ASSERT_VALUES_EQUAL(*Rem(14, -7), 0);
+ UNIT_ASSERT_VALUES_EQUAL(*Rem(-14, -7), 0);
+ UNIT_ASSERT(!Rem(-14, 0));
+ }
+
+ Y_UNIT_TEST(NearbyInt) {
+ const i64 maxV = 9223372036854774784ll;
+ const i64 minV = -9223372036854774784ll;
+ UNIT_ASSERT_VALUES_EQUAL((i64)(double)(maxV), maxV);
+ UNIT_ASSERT_VALUES_EQUAL((i64)(double)(minV), minV);
+
+ UNIT_ASSERT_VALUES_UNEQUAL((i64)(double)(maxV + 1), maxV + 1);
+ }
+}
diff --git a/yql/essentials/udfs/common/math/lib/ut/ya.make b/yql/essentials/udfs/common/math/lib/ut/ya.make
new file mode 100644
index 00000000000..c1efcde3b47
--- /dev/null
+++ b/yql/essentials/udfs/common/math/lib/ut/ya.make
@@ -0,0 +1,11 @@
+IF (OS_LINUX)
+IF (NOT WITH_VALGRIND)
+ UNITTEST_FOR(yql/essentials/udfs/common/math/lib)
+
+ SRCS(
+ round_ut.cpp
+ )
+
+ END()
+ENDIF()
+ENDIF()
diff --git a/yql/essentials/udfs/common/math/lib/ya.make b/yql/essentials/udfs/common/math/lib/ya.make
new file mode 100644
index 00000000000..54b882a8438
--- /dev/null
+++ b/yql/essentials/udfs/common/math/lib/ya.make
@@ -0,0 +1,11 @@
+LIBRARY()
+
+SRCS(
+ erfinv.cpp
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/yql/essentials/udfs/common/math/math_ir.cpp b/yql/essentials/udfs/common/math/math_ir.cpp
new file mode 100644
index 00000000000..a63968dbe00
--- /dev/null
+++ b/yql/essentials/udfs/common/math/math_ir.cpp
@@ -0,0 +1,5 @@
+#define LLVM_BC
+
+#include "math_ir.h"
+
+#include <util/generic/ymath.cpp>
diff --git a/yql/essentials/udfs/common/math/math_ir.h b/yql/essentials/udfs/common/math/math_ir.h
new file mode 100644
index 00000000000..ee788ee90ba
--- /dev/null
+++ b/yql/essentials/udfs/common/math/math_ir.h
@@ -0,0 +1,150 @@
+#pragma once
+
+#include <yql/essentials/udfs/common/math/lib/round.h>
+#include <yql/essentials/udfs/common/math/lib/erfinv.h>
+#include <yql/essentials/public/udf/udf_value.h>
+
+#include <util/generic/ymath.h>
+#include <util/system/compiler.h>
+
+#include <math.h>
+
+namespace NYql {
+namespace NUdf {
+
+#define CONST_FUNCS(XX) \
+ XX(Pi, M_PI) \
+ XX(E, M_E) \
+ XX(Eps, std::numeric_limits<double>::epsilon()) \
+ XX(RoundDownward, 0) \
+ XX(RoundToNearest, 1) \
+ XX(RoundTowardZero, 2) \
+ XX(RoundUpward, 3)
+
+#define SINGLE_ARG_FUNCS(XX) \
+ XX(Abs, Abs) \
+ XX(Acos, acos) \
+ XX(Asin, asin) \
+ XX(Asinh, asin) \
+ XX(Atan, atan) \
+ XX(Cbrt, cbrt) \
+ XX(Ceil, ceil) \
+ XX(Cos, cos) \
+ XX(Cosh, cosh) \
+ XX(Erf, Erf) \
+ XX(Exp, exp) \
+ XX(Exp2, Exp2) \
+ XX(Fabs, fabs) \
+ XX(Floor, std::floor) \
+ XX(Lgamma, LogGamma) \
+ XX(Rint, rint) \
+ XX(Sin, sin) \
+ XX(Sinh, sinh) \
+ XX(Sqrt, sqrt) \
+ XX(Tan, tan) \
+ XX(Tanh, tanh) \
+ XX(Tgamma, tgamma) \
+ XX(Trunc, trunc) \
+ XX(IsFinite, std::isfinite) \
+ XX(IsInf, std::isinf) \
+ XX(IsNaN, std::isnan)
+
+#define TWO_ARGS_FUNCS(XX) \
+ XX(Atan2, atan2, double) \
+ XX(Fmod, fmod, double) \
+ XX(Hypot, hypot, double) \
+ XX(Remainder, remainder, double) \
+ XX(Pow, pow, double) \
+ XX(Ldexp, ldexp, int)
+
+#define POSITIVE_SINGLE_ARG_FUNCS(XX) \
+ XX(Log, log) \
+ XX(Log2, Log2) \
+ XX(Log10, log10)
+
+
+#define CONST_IMPL(name, cnst) \
+ extern "C" UDF_ALWAYS_INLINE \
+ void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* /*args*/) {\
+ *result = TUnboxedValuePod(cnst); \
+ }
+
+#define SINGLE_ARG_IMPL(name, func) \
+ extern "C" UDF_ALWAYS_INLINE \
+ void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \
+ *result = TUnboxedValuePod(func(args[0].Get<double>())); \
+ }
+
+#define TWO_ARGS_IMPL(name, func, secondType) \
+ extern "C" UDF_ALWAYS_INLINE \
+ void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \
+ *result = TUnboxedValuePod(func(args[0].Get<double>(), args[1].Get<secondType>())); \
+ }
+
+#define POSITIVE_SINGLE_ARG_IMPL(name, func) \
+ extern "C" UDF_ALWAYS_INLINE \
+ void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \
+ double input = args[0].Get<double>(); \
+ if (input > 0) { \
+ *result = TUnboxedValuePod(func(input)); \
+ } else { \
+ *result = TUnboxedValuePod(static_cast<double>(NAN)); \
+ } \
+ }
+
+CONST_FUNCS(CONST_IMPL)
+SINGLE_ARG_FUNCS(SINGLE_ARG_IMPL)
+TWO_ARGS_FUNCS(TWO_ARGS_IMPL)
+POSITIVE_SINGLE_ARG_FUNCS(POSITIVE_SINGLE_ARG_IMPL)
+
+extern "C" UDF_ALWAYS_INLINE
+void SigmoidIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) {
+ *result = TUnboxedValuePod(1. / (1. + exp(-args[0].Get<double>())));
+}
+
+extern "C" UDF_ALWAYS_INLINE
+void FuzzyEqualsIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) {
+ if (!args[2]) {
+ *result = TUnboxedValuePod(FuzzyEquals(args[0].Get<double>(), args[1].Get<double>()));
+ } else {
+ const double eps = args[2].Get<double>();
+ *result = TUnboxedValuePod(FuzzyEquals(args[0].Get<double>(), args[1].Get<double>(), eps));
+ }
+}
+
+extern "C" UDF_ALWAYS_INLINE
+void RoundIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) {
+ const double val = NMathUdf::RoundToDecimal<long double>(args[0].Get<double>(), args[1].GetOrDefault<int>(0));
+ *result = TUnboxedValuePod(val);
+}
+
+extern "C" UDF_ALWAYS_INLINE
+void ErfInvIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) {
+ *result = TUnboxedValuePod(NMathUdf::ErfInv(args[0].Get<double>()));
+}
+
+extern "C" UDF_ALWAYS_INLINE
+void ErfcInvIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) {
+ *result = TUnboxedValuePod(NMathUdf::ErfInv(1. - args[0].Get<double>()));
+}
+
+extern "C" UDF_ALWAYS_INLINE
+void ModIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) {
+ const auto val = NMathUdf::Mod(args[0].Get<i64>(), args[1].Get<i64>());
+ *result = val ? TUnboxedValuePod(*val) : TUnboxedValuePod();
+}
+
+extern "C" UDF_ALWAYS_INLINE
+void RemIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) {
+ const auto val = NMathUdf::Rem(args[0].Get<i64>(), args[1].Get<i64>());
+ *result = val ? TUnboxedValuePod(*val) : TUnboxedValuePod();
+}
+
+extern "C" UDF_ALWAYS_INLINE
+void NearbyIntIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) {
+ const auto val = NMathUdf::NearbyInt(args[0].Get<double>(), args[1].Get<ui32>());
+ *result = val ? TUnboxedValuePod(*val) : TUnboxedValuePod();
+}
+
+} // NUdf
+} // NYql
diff --git a/yql/essentials/udfs/common/math/math_udf.cpp b/yql/essentials/udfs/common/math/math_udf.cpp
new file mode 100644
index 00000000000..1c2652320bb
--- /dev/null
+++ b/yql/essentials/udfs/common/math/math_udf.cpp
@@ -0,0 +1,99 @@
+#include "math_ir.h"
+
+
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+extern const char TagRoundingMode[] = "MathRoundingMode";
+using TTaggedRoundingMode = NYql::NUdf::TTagged<ui32, TagRoundingMode>;
+
+#define MATH_UDF_MAP(XX, XXL) \
+ XX(Pi, double(), 0) \
+ XX(E, double(), 0) \
+ XX(Eps, double(), 0) \
+ XX(RoundDownward, TTaggedRoundingMode(), 0) \
+ XX(RoundToNearest, TTaggedRoundingMode(), 0) \
+ XX(RoundTowardZero, TTaggedRoundingMode(), 0) \
+ XX(RoundUpward, TTaggedRoundingMode(), 0) \
+ XX(Abs, double(TAutoMap<double>), 0) \
+ XX(Acos, double(TAutoMap<double>), 0) \
+ XX(Asin, double(TAutoMap<double>), 0) \
+ XX(Asinh, double(TAutoMap<double>), 0) \
+ XX(Atan, double(TAutoMap<double>), 0) \
+ XX(Cbrt, double(TAutoMap<double>), 0) \
+ XX(Ceil, double(TAutoMap<double>), 0) \
+ XX(Cos, double(TAutoMap<double>), 0) \
+ XX(Cosh, double(TAutoMap<double>), 0) \
+ XX(Erf, double(TAutoMap<double>), 0) \
+ XX(ErfInv, double(TAutoMap<double>), 0) \
+ XX(ErfcInv, double(TAutoMap<double>), 0) \
+ XX(Exp, double(TAutoMap<double>), 0) \
+ XX(Exp2, double(TAutoMap<double>), 0) \
+ XX(Fabs, double(TAutoMap<double>), 0) \
+ XX(Floor, double(TAutoMap<double>), 0) \
+ XX(Lgamma, double(TAutoMap<double>), 0) \
+ XX(Rint, double(TAutoMap<double>), 0) \
+ XX(Sin, double(TAutoMap<double>), 0) \
+ XX(Sinh, double(TAutoMap<double>), 0) \
+ XX(Sqrt, double(TAutoMap<double>), 0) \
+ XX(Tan, double(TAutoMap<double>), 0) \
+ XX(Tanh, double(TAutoMap<double>), 0) \
+ XX(Tgamma, double(TAutoMap<double>), 0) \
+ XX(Trunc, double(TAutoMap<double>), 0) \
+ XX(Log, double(TAutoMap<double>), 0) \
+ XX(Log2, double(TAutoMap<double>), 0) \
+ XX(Log10, double(TAutoMap<double>), 0) \
+ XX(Atan2, double(TAutoMap<double>, TAutoMap<double>), 0) \
+ XX(Fmod, double(TAutoMap<double>, TAutoMap<double>), 0) \
+ XX(Hypot, double(TAutoMap<double>, TAutoMap<double>), 0) \
+ XX(Remainder, double(TAutoMap<double>, TAutoMap<double>), 0) \
+ XX(Pow, double(TAutoMap<double>, TAutoMap<double>), 0) \
+ XX(Ldexp, double(TAutoMap<double>, TAutoMap<int>), 0) \
+ XX(IsFinite, bool(TAutoMap<double>), 0) \
+ XX(IsInf, bool(TAutoMap<double>), 0) \
+ XX(IsNaN, bool(TAutoMap<double>), 0) \
+ XX(Sigmoid, double(TAutoMap<double>), 0) \
+ XX(FuzzyEquals, bool(TAutoMap<double>, TAutoMap<double>, TEpsilon), 1) \
+ XX(Mod, TOptional<i64>(TAutoMap<i64>, i64), 0) \
+ XX(Rem, TOptional<i64>(TAutoMap<i64>, i64), 0) \
+ XXL(Round, double(TAutoMap<double>, TPrecision), 1)
+
+#define MATH_UDF_MAP_WITHOUT_IR(XX) \
+ XX(NearbyInt, TOptional<i64>(TAutoMap<double>, TTaggedRoundingMode), 0)
+
+#define MATH_STRICT_UDF(name, signature, optionalArgsCount) \
+ SIMPLE_STRICT_UDF_WITH_IR(T##name, signature, optionalArgsCount, "/llvm_bc/Math", #name "IR") { \
+ TUnboxedValuePod res; \
+ name##IR(this, &res, valueBuilder, args); \
+ return res; \
+ }
+
+#define MATH_STRICT_UDF_WITHOUT_IR(name, signature, optionalArgsCount) \
+ SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(T##name, signature, optionalArgsCount) { \
+ TUnboxedValuePod res; \
+ name##IR(this, &res, valueBuilder, args); \
+ return res; \
+ }
+
+#define REGISTER_MATH_UDF(udfName, ...) T##udfName,
+#define REGISTER_MATH_UDF_LAST(udfName, ...) T##udfName
+
+using namespace NKikimr;
+using namespace NUdf;
+
+namespace {
+ extern const char epsilon[] = "Epsilon";
+ using TEpsilon = TNamedArg<double, epsilon>;
+
+ extern const char precision[] = "Precision";
+ using TPrecision = TNamedArg<int, precision>;
+
+ MATH_UDF_MAP(MATH_STRICT_UDF, MATH_STRICT_UDF)
+
+ MATH_UDF_MAP_WITHOUT_IR(MATH_STRICT_UDF_WITHOUT_IR)
+
+ SIMPLE_MODULE(TMathModule,
+ MATH_UDF_MAP_WITHOUT_IR(REGISTER_MATH_UDF)
+ MATH_UDF_MAP(REGISTER_MATH_UDF, REGISTER_MATH_UDF_LAST))
+}
+
+REGISTER_MODULES(TMathModule)
diff --git a/yql/essentials/udfs/common/math/test/canondata/result.json b/yql/essentials/udfs/common/math/test/canondata/result.json
new file mode 100644
index 00000000000..1471c26d599
--- /dev/null
+++ b/yql/essentials/udfs/common/math/test/canondata/result.json
@@ -0,0 +1,35 @@
+{
+ "test.test[ErfInvNoLLVM]": [
+ {
+ "checksum": "be26c6ffe8018b2afe5c6bd554d4468d",
+ "size": 6789,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1937429/e098bd35e45d8b2d18b6958e5e9b0a875d6a03f1/resource.tar.gz#test.test_ErfInvNoLLVM_/results.txt"
+ }
+ ],
+ "test.test[ErfInv]": [
+ {
+ "checksum": "be26c6ffe8018b2afe5c6bd554d4468d",
+ "size": 6789,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1937429/e098bd35e45d8b2d18b6958e5e9b0a875d6a03f1/resource.tar.gz#test.test_ErfInv_/results.txt"
+ }
+ ],
+ "test.test[IR]": [
+ {
+ "checksum": "83061f69c401182342478cbc8fd11b63",
+ "size": 10168,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1937429/e098bd35e45d8b2d18b6958e5e9b0a875d6a03f1/resource.tar.gz#test.test_IR_/results.txt"
+ }
+ ],
+ "test.test[IR_LLVM_OFF]": [
+ {
+ "checksum": "83061f69c401182342478cbc8fd11b63",
+ "size": 10168,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1937429/e098bd35e45d8b2d18b6958e5e9b0a875d6a03f1/resource.tar.gz#test.test_IR_LLVM_OFF_/results.txt"
+ }
+ ],
+ "test.test[NearbyInt]": [
+ {
+ "uri": "file://test.test_NearbyInt_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/math/test/canondata/test.test_NearbyInt_/results.txt b/yql/essentials/udfs/common/math/test/canondata/test.test_NearbyInt_/results.txt
new file mode 100644
index 00000000000..1149fd999dd
--- /dev/null
+++ b/yql/essentials/udfs/common/math/test/canondata/test.test_NearbyInt_/results.txt
@@ -0,0 +1,238 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "TupleType";
+ [
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "x";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ #;
+ #;
+ [
+ "2"
+ ];
+ [
+ "2"
+ ];
+ [
+ "2"
+ ];
+ [
+ "3"
+ ];
+ #;
+ [
+ "-3"
+ ];
+ [
+ "-3"
+ ];
+ [
+ "-3"
+ ];
+ [
+ "-4"
+ ]
+ ];
+ "0"
+ ];
+ [
+ [
+ #;
+ #;
+ [
+ "2"
+ ];
+ [
+ "2"
+ ];
+ [
+ "3"
+ ];
+ [
+ "4"
+ ];
+ #;
+ [
+ "-2"
+ ];
+ [
+ "-2"
+ ];
+ [
+ "-3"
+ ];
+ [
+ "-4"
+ ]
+ ];
+ "1"
+ ];
+ [
+ [
+ #;
+ #;
+ [
+ "2"
+ ];
+ [
+ "2"
+ ];
+ [
+ "2"
+ ];
+ [
+ "3"
+ ];
+ #;
+ [
+ "-2"
+ ];
+ [
+ "-2"
+ ];
+ [
+ "-2"
+ ];
+ [
+ "-3"
+ ]
+ ];
+ "2"
+ ];
+ [
+ [
+ #;
+ #;
+ [
+ "3"
+ ];
+ [
+ "3"
+ ];
+ [
+ "3"
+ ];
+ [
+ "4"
+ ];
+ #;
+ [
+ "-2"
+ ];
+ [
+ "-2"
+ ];
+ [
+ "-2"
+ ];
+ [
+ "-3"
+ ]
+ ];
+ "3"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/math/test/cases/ErfInv.sql b/yql/essentials/udfs/common/math/test/cases/ErfInv.sql
new file mode 100644
index 00000000000..798e0090ad8
--- /dev/null
+++ b/yql/essentials/udfs/common/math/test/cases/ErfInv.sql
@@ -0,0 +1,24 @@
+pragma config.flags("ValidateUdf", "None");
+
+SELECT
+ Math::ErfInv(1e-8),
+ Math::ErfInv(1e-4),
+ Math::ErfInv(0.1),
+ Math::ErfInv(0.25),
+ Math::ErfInv(0.5),
+ Math::ErfInv(0.75),
+ Math::ErfInv(0.9),
+ Math::ErfInv(0.99),
+ Math::ErfInv(0.9999999),
+ Math::ErfInv(0.99999999),
+ Math::ErfInv(0.999999999),
+ Math::ErfInv(0.9999999999),
+ Math::ErfInv(0),
+ Math::ErfInv(1 + Math::Eps()),
+ Math::ErfInv(-1 - Math::Eps()),
+ Math::ErfInv(1),
+ Math::ErfInv(-1),
+ Math::ErfcInv(2),
+ Math::ErfcInv(0),
+ Math::ErfcInv(2 + 2 * Math::Eps()),
+ Math::ErfcInv(-Math::Eps());
diff --git a/yql/essentials/udfs/common/math/test/cases/ErfInvNoLLVM.sql b/yql/essentials/udfs/common/math/test/cases/ErfInvNoLLVM.sql
new file mode 100644
index 00000000000..55e7c60db14
--- /dev/null
+++ b/yql/essentials/udfs/common/math/test/cases/ErfInvNoLLVM.sql
@@ -0,0 +1,25 @@
+pragma config.flags("ValidateUdf", "None");
+pragma config.flags("LLVM_OFF");
+
+SELECT
+ Math::ErfInv(1e-8),
+ Math::ErfInv(1e-4),
+ Math::ErfInv(0.1),
+ Math::ErfInv(0.25),
+ Math::ErfInv(0.5),
+ Math::ErfInv(0.75),
+ Math::ErfInv(0.9),
+ Math::ErfInv(0.99),
+ Math::ErfInv(0.9999999),
+ Math::ErfInv(0.99999999),
+ Math::ErfInv(0.999999999),
+ Math::ErfInv(0.9999999999),
+ Math::ErfInv(0),
+ Math::ErfInv(1 + Math::Eps()),
+ Math::ErfInv(-1 - Math::Eps()),
+ Math::ErfInv(1),
+ Math::ErfInv(-1),
+ Math::ErfcInv(2),
+ Math::ErfcInv(0),
+ Math::ErfcInv(2 + 2 * Math::Eps()),
+ Math::ErfcInv(-Math::Eps());
diff --git a/yql/essentials/udfs/common/math/test/cases/IR.in b/yql/essentials/udfs/common/math/test/cases/IR.in
new file mode 100644
index 00000000000..2a8e728cae8
--- /dev/null
+++ b/yql/essentials/udfs/common/math/test/cases/IR.in
@@ -0,0 +1,4 @@
+{"key"="023";"subkey"="3";"value"="aaa"};
+{"key"="037";"subkey"="5";"value"="ddd"};
+{"key"="075";"subkey"="1";"value"="abc"};
+{"key"="150";"subkey"="1";"value"="aaa"};
diff --git a/yql/essentials/udfs/common/math/test/cases/IR.sql b/yql/essentials/udfs/common/math/test/cases/IR.sql
new file mode 100644
index 00000000000..7dbea3a2dc3
--- /dev/null
+++ b/yql/essentials/udfs/common/math/test/cases/IR.sql
@@ -0,0 +1,26 @@
+/* syntax version 1 */
+pragma config.flags("ValidateUdf", "None");
+
+select
+ Math::Pi(),
+ Math::E(),
+ Math::Eps(),
+ Math::Abs(-2.34),
+ Math::Cos(0.234),
+ Math::IsFinite(0.0),
+ Math::IsNaN(0.0/0.0),
+ Math::Pow(2.0, 3.0),
+ Math::Log(4.0),
+ Math::Log(-4.0),
+ Math::Sigmoid(0.5),
+ Math::FuzzyEquals(1 + 0.0, 1 + 1.0e-200),
+ Math::FuzzyEquals(1.0001, 1.00012, 0.01 as Epsilon),
+ Math::Round(34.4564, -2 as Precision),
+ Math::Exp2(3.4),
+ Math::Exp(3.4),
+ Math::Erf(0.4),
+ Math::Mod(-1, 7),
+ Math::Mod(-1, 0),
+ Math::Rem(-1, 7),
+ Math::Rem(-1, 0)
+from Input;
diff --git a/yql/essentials/udfs/common/math/test/cases/IR_LLVM_OFF.in b/yql/essentials/udfs/common/math/test/cases/IR_LLVM_OFF.in
new file mode 100644
index 00000000000..2a8e728cae8
--- /dev/null
+++ b/yql/essentials/udfs/common/math/test/cases/IR_LLVM_OFF.in
@@ -0,0 +1,4 @@
+{"key"="023";"subkey"="3";"value"="aaa"};
+{"key"="037";"subkey"="5";"value"="ddd"};
+{"key"="075";"subkey"="1";"value"="abc"};
+{"key"="150";"subkey"="1";"value"="aaa"};
diff --git a/yql/essentials/udfs/common/math/test/cases/IR_LLVM_OFF.sql b/yql/essentials/udfs/common/math/test/cases/IR_LLVM_OFF.sql
new file mode 100644
index 00000000000..a8c14225ba9
--- /dev/null
+++ b/yql/essentials/udfs/common/math/test/cases/IR_LLVM_OFF.sql
@@ -0,0 +1,27 @@
+/* syntax version 1 */
+pragma config.flags("ValidateUdf", "None");
+pragma config.flags("LLVM_OFF");
+
+select
+ Math::Pi(),
+ Math::E(),
+ Math::Eps(),
+ Math::Abs(-2.34),
+ Math::Cos(0.234),
+ Math::IsFinite(0.0),
+ Math::IsNaN(0.0/0.0),
+ Math::Pow(2.0, 3.0),
+ Math::Log(4.0),
+ Math::Log(-4.0),
+ Math::Sigmoid(0.5),
+ Math::FuzzyEquals(1 + 0.0, 1 + 1.0e-200),
+ Math::FuzzyEquals(1.0001, 1.00012, 0.01 as Epsilon),
+ Math::Round(34.4564, -2 as Precision),
+ Math::Exp2(3.4),
+ Math::Exp(3.4),
+ Math::Erf(0.4),
+ Math::Mod(-1, 7),
+ Math::Mod(-1, 0),
+ Math::Rem(-1, 7),
+ Math::Rem(-1, 0)
+from Input;
diff --git a/yql/essentials/udfs/common/math/test/cases/NearbyInt.in b/yql/essentials/udfs/common/math/test/cases/NearbyInt.in
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/yql/essentials/udfs/common/math/test/cases/NearbyInt.in
diff --git a/yql/essentials/udfs/common/math/test/cases/NearbyInt.sql b/yql/essentials/udfs/common/math/test/cases/NearbyInt.sql
new file mode 100644
index 00000000000..f27a93d7274
--- /dev/null
+++ b/yql/essentials/udfs/common/math/test/cases/NearbyInt.sql
@@ -0,0 +1,24 @@
+$f = ($mode)->{
+ return (
+ Math::NearbyInt(Double("NaN"),$mode),
+ Math::NearbyInt(1e100,$mode),
+ Math::NearbyInt(2.3,$mode),
+ Math::NearbyInt(2.5,$mode),
+ Math::NearbyInt(2.7,$mode),
+ Math::NearbyInt(3.5,$mode),
+ Math::NearbyInt(-1e100,$mode),
+ Math::NearbyInt(-2.3,$mode),
+ Math::NearbyInt(-2.5,$mode),
+ Math::NearbyInt(-2.7,$mode),
+ Math::NearbyInt(-3.5,$mode)
+ )
+};
+
+select $f(Math::RoundDownward()), 0 as x
+union all
+select $f(Math::RoundToNearest()), 1 as x
+union all
+select $f(Math::RoundTowardZero()), 2 as x
+union all
+select $f(Math::RoundUpward()), 3 as x
+order by x;
diff --git a/yql/essentials/udfs/common/math/test/ya.make b/yql/essentials/udfs/common/math/test/ya.make
new file mode 100644
index 00000000000..2ebe3a7123c
--- /dev/null
+++ b/yql/essentials/udfs/common/math/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/math)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/math/ya.make b/yql/essentials/udfs/common/math/ya.make
new file mode 100644
index 00000000000..01fa2f1d89b
--- /dev/null
+++ b/yql/essentials/udfs/common/math/ya.make
@@ -0,0 +1,92 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+ FROM_SANDBOX(FILE 7319902006 OUT_NOAUTO libmath_udf.so
+ )
+ END()
+ELSE ()
+YQL_UDF_CONTRIB(math_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+ )
+
+ SRCS(
+ math_udf.cpp
+ )
+
+ USE_LLVM_BC14()
+
+ LLVM_BC(
+ math_ir.cpp
+ lib/erfinv.cpp
+ NAME Math
+ SYMBOLS
+ PiIR
+ EIR
+ EpsIR
+ RoundDownwardIR
+ RoundToNearestIR
+ RoundTowardZeroIR
+ RoundUpwardIR
+ AbsIR
+ AcosIR
+ AsinIR
+ AsinhIR
+ AtanIR
+ CbrtIR
+ CeilIR
+ CosIR
+ CoshIR
+ ErfIR
+ ErfInvIR
+ ErfcInvIR
+ ExpIR
+ Exp2IR
+ FabsIR
+ FloorIR
+ LgammaIR
+ RintIR
+ SinIR
+ SinhIR
+ SqrtIR
+ TanIR
+ TanhIR
+ TgammaIR
+ TruncIR
+ IsFiniteIR
+ IsInfIR
+ IsNaNIR
+ Atan2IR
+ FmodIR
+ HypotIR
+ RemainderIR
+ PowIR
+ LdexpIR
+ LogIR
+ Log2IR
+ Log10IR
+ SigmoidIR
+ FuzzyEqualsIR
+ RoundIR
+ ModIR
+ RemIR
+ )
+
+ PEERDIR(
+ yql/essentials/udfs/common/math/lib
+ )
+
+ END()
+ENDIF ()
+
+RECURSE(
+ lib
+)
+
+RECURSE_FOR_TESTS(
+ test
+)
+
+
diff --git a/yql/essentials/udfs/common/pire/pire_udf.cpp b/yql/essentials/udfs/common/pire/pire_udf.cpp
new file mode 100644
index 00000000000..0f9ffc5c213
--- /dev/null
+++ b/yql/essentials/udfs/common/pire/pire_udf.cpp
@@ -0,0 +1,358 @@
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_type_builder.h>
+#include <yql/essentials/public/udf/udf_registrator.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+#include <library/cpp/regex/pire/regexp.h>
+#include <library/cpp/regex/pire/pcre2pire.h>
+
+#include <util/string/builder.h>
+
+using namespace NRegExp;
+using namespace NKikimr;
+using namespace NUdf;
+
+namespace {
+ class TPireUdfBase: public TBoxedValue {
+ protected:
+ TPireUdfBase(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ void SetCommonOptions(std::string_view& regex, TFsm::TOptions& options) {
+ if (regex.size() >= 4U && regex.substr(0U, 4U) == "(?i)") {
+ options.SetCaseInsensitive(true);
+ regex.remove_prefix(4U);
+ }
+ if (UTF8Detect(regex) == UTF8) {
+ options.SetCharset(CODES_UTF8);
+ }
+ }
+
+ TSourcePosition Pos_;
+ };
+
+ class TPireMatch: public TPireUdfBase {
+ public:
+ class TFactory: public TPireUdfBase {
+ public:
+ TFactory(
+ bool surroundMode,
+ bool multiMode,
+ TSourcePosition pos,
+ size_t regexpsCount = 0)
+ : TPireUdfBase(pos)
+ , SurroundMode(surroundMode)
+ , MultiMode(multiMode)
+ , RegexpsCount(regexpsCount)
+ {
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final {
+ return TUnboxedValuePod(
+ new TPireMatch(
+ valueBuilder,
+ args[0],
+ SurroundMode,
+ MultiMode,
+ Pos_,
+ RegexpsCount));
+ }
+
+ bool SurroundMode;
+ bool MultiMode;
+ size_t RegexpsCount;
+ };
+
+ static const TStringRef& Name(bool surroundMode, bool multiMode) {
+ static auto match = TStringRef::Of("Match");
+ static auto grep = TStringRef::Of("Grep");
+ static auto multiMatch = TStringRef::Of("MultiMatch");
+ static auto multiGrep = TStringRef::Of("MultiGrep");
+ if (surroundMode) {
+ return multiMode ? multiGrep : grep;
+ } else {
+ return multiMode ? multiMatch : match;
+ }
+ }
+
+ TPireMatch(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod& runConfig,
+ bool surroundMode,
+ bool multiMode,
+ TSourcePosition pos,
+ size_t regexpsCount)
+ : TPireUdfBase(pos)
+ , MultiMode(multiMode)
+ , RegexpsCount(regexpsCount)
+ , SurroundMode(surroundMode)
+ {
+ Y_UNUSED(valueBuilder);
+ try {
+ std::string_view regex(runConfig.AsStringRef());
+ TFsm::TOptions options;
+ options.SetSurround(surroundMode);
+ SetCommonOptions(regex, options);
+ if (multiMode) {
+ std::vector<std::string_view> parts;
+ StringSplitter(regex).Split('\n').AddTo(&parts);
+ for (const auto& part : parts) {
+ if (!part.empty()) {
+ if (Fsm_) try {
+ *Fsm_ = *Fsm_ | TFsm(TString(part), options);
+ } catch (const yexception&) {
+ UdfTerminate((TStringBuilder() << Pos_ << " Failed to glue up regexes, probably the finite state machine appeared to be too large").data());
+ } else {
+ Fsm_.Reset(new TFsm(TString(part), options));
+ }
+ }
+ }
+ } else {
+ Fsm_.Reset(new TFsm(TString(regex), options));
+ }
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final try {
+ TUnboxedValue* items = nullptr;
+ TUnboxedValue tuple;
+ size_t i = 0;
+
+ if (MultiMode) {
+ tuple = valueBuilder->NewArray(RegexpsCount, items);
+
+ for (i = 0; i < RegexpsCount; ++i) {
+ items[i] = TUnboxedValuePod(false);
+ }
+ }
+
+ if (args[0]) {
+ const auto input = args[0].AsStringRef();
+ TMatcher matcher(*Fsm_);
+ const bool isMatch = matcher.Match(input.Data(), input.Size(), SurroundMode, SurroundMode).Final();
+ if (MultiMode) {
+ if (isMatch) {
+ const auto& matchedRegexps = matcher.MatchedRegexps();
+ size_t matchesCount = matchedRegexps.second - matchedRegexps.first;
+
+ for (i = 0; i < matchesCount; ++i) {
+ items[matchedRegexps.first[i]] = TUnboxedValuePod(true);
+ }
+ }
+ return tuple;
+
+ } else {
+ return TUnboxedValuePod(isMatch);
+ }
+
+ } else {
+ return MultiMode ? tuple : TUnboxedValue(TUnboxedValuePod(false));
+ }
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+
+ private:
+ TUniquePtr<TFsm> Fsm_;
+ bool MultiMode;
+ size_t RegexpsCount;
+ bool SurroundMode;
+ };
+
+ class TPireCapture: public TPireUdfBase {
+ public:
+ class TFactory: public TPireUdfBase {
+ public:
+ TFactory(TSourcePosition pos)
+ : TPireUdfBase(pos)
+ {}
+
+ private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try {
+ return TUnboxedValuePod(new TPireCapture(args[0], Pos_));
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ };
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("Capture");
+ return name;
+ }
+
+ TPireCapture(const TUnboxedValuePod& runConfig, TSourcePosition pos)
+ : TPireUdfBase(pos)
+ {
+ std::string_view regex(runConfig.AsStringRef());
+ TFsm::TOptions options;
+ SetCommonOptions(regex, options);
+ Fsm_.Reset(new TSlowCapturingFsm(TString(regex), options));
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final try {
+ if (args[0]) {
+ const std::string_view input = args[0].AsStringRef();
+
+ TSlowSearcher searcher(*Fsm_);
+ searcher.Search(input.data(), input.size());
+
+ if (searcher.Captured()) {
+ const auto& captured = searcher.GetCaptured();
+ return valueBuilder->SubString(args[0], std::distance(input.begin(), captured.begin()), captured.length());
+ }
+ }
+
+ return TUnboxedValue();
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+
+ TUniquePtr<TSlowCapturingFsm> Fsm_;
+ };
+
+ class TPireReplace: public TPireUdfBase {
+ public:
+ class TFactory: public TPireUdfBase {
+ public:
+ TFactory(TSourcePosition pos)
+ : TPireUdfBase(pos)
+ {}
+
+ private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try {
+ return TUnboxedValuePod(new TPireReplace(args[0], Pos_));
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ };
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("Replace");
+ return name;
+ }
+
+ TPireReplace(const TUnboxedValuePod& runConfig, TSourcePosition pos)
+ : TPireUdfBase(pos)
+ {
+ std::string_view regex(runConfig.AsStringRef());
+ TFsm::TOptions options;
+ SetCommonOptions(regex, options);
+ Fsm_.Reset(new TSlowCapturingFsm(TString(regex), options));
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final try {
+ if (args[0]) {
+ const std::string_view input(args[0].AsStringRef());
+
+ TSlowSearcher s(*Fsm_);
+ s.Search(input.data(), input.size());
+ if (s.Captured()) {
+ const auto& captured = s.GetCaptured();
+ const TString replacement(args[1].AsStringRef());
+ TString replaced(args[0].AsStringRef());
+ replaced.replace(std::distance(input.begin(), captured.begin()), captured.length(), replacement);
+ return valueBuilder->NewString(replaced);
+ } else {
+ return TUnboxedValue(args[0]);
+ }
+ } else {
+ return TUnboxedValue();
+ }
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+
+ TUniquePtr<TSlowCapturingFsm> Fsm_;
+ };
+
+ class TPireModule: public IUdfModule {
+ public:
+ TStringRef Name() const {
+ return TStringRef::Of("Pire");
+ }
+
+ void CleanupOnTerminate() const final {
+ }
+
+ void GetAllFunctions(IFunctionsSink& sink) const final {
+ sink.Add(TPireMatch::Name(true, true))->SetTypeAwareness();
+ sink.Add(TPireMatch::Name(false, true))->SetTypeAwareness();
+ sink.Add(TPireMatch::Name(true, false));
+ sink.Add(TPireMatch::Name(false, false));
+ sink.Add(TPireCapture::Name());
+ sink.Add(TPireReplace::Name());
+ }
+
+ void BuildFunctionTypeInfo(
+ const TStringRef& name,
+ TType*,
+ const TStringRef& typeConfig,
+ ui32 flags,
+ IFunctionTypeInfoBuilder& builder) const final try {
+ const bool typesOnly = (flags & TFlags::TypesOnly);
+ const bool isMatch = (TPireMatch::Name(false, false) == name);
+ const bool isGrep = (TPireMatch::Name(true, false) == name);
+ const bool isMultiMatch = (TPireMatch::Name(false, true) == name);
+ const bool isMultiGrep = (TPireMatch::Name(true, true) == name);
+
+ if (isMatch || isGrep) {
+ builder.SimpleSignature<bool(TOptional<char*>)>()
+ .RunConfig<const char*>();
+
+ if (!typesOnly) {
+ builder.Implementation(new TPireMatch::TFactory(isGrep, false, builder.GetSourcePosition()));
+ }
+ } else if (isMultiMatch || isMultiGrep) {
+ const auto boolType = builder.SimpleType<bool>();
+ const auto optionalStringType = builder.Optional()->Item<char*>().Build();
+ const std::string_view regexp(typeConfig);
+ const size_t regexpCount = std::count(regexp.begin(), regexp.end(), '\n') + 1;
+ const auto tuple = builder.Tuple();
+ for (size_t i = 0; i < regexpCount; ++i) {
+ tuple->Add(boolType);
+ }
+ const auto tupleType = tuple->Build();
+ builder.Args(1)->Add(optionalStringType).Done().Returns(tupleType).RunConfig<char*>();
+
+ if (!typesOnly) {
+ builder.Implementation(new TPireMatch::TFactory(isMultiGrep, true, builder.GetSourcePosition(), regexpCount));
+ }
+ } else if (TPireCapture::Name() == name) {
+ builder.SimpleSignature<TOptional<char*>(TOptional<char*>)>()
+ .RunConfig<char*>();
+
+ if (!typesOnly) {
+ builder.Implementation(new TPireCapture::TFactory(builder.GetSourcePosition()));
+ }
+ } else if (TPireReplace::Name() == name) {
+ builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>()
+ .RunConfig<char*>();
+
+ if (!typesOnly) {
+ builder.Implementation(new TPireReplace::TFactory(builder.GetSourcePosition()));
+ }
+ }
+ } catch (const std::exception& e) {
+ builder.SetError(CurrentExceptionMessage());
+ }
+ };
+
+}
+
+REGISTER_MODULES(TPireModule)
diff --git a/yql/essentials/udfs/common/pire/test/canondata/result.json b/yql/essentials/udfs/common/pire/test/canondata/result.json
new file mode 100644
index 00000000000..2ec1b95e9a0
--- /dev/null
+++ b/yql/essentials/udfs/common/pire/test/canondata/result.json
@@ -0,0 +1,12 @@
+{
+ "test.test[Basic]": [
+ {
+ "uri": "file://test.test_Basic_/results.txt"
+ }
+ ],
+ "test.test[CharacterClasses]": [
+ {
+ "uri": "file://test.test_CharacterClasses_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/pire/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/pire/test/canondata/test.test_Basic_/results.txt
new file mode 100644
index 00000000000..6b3d19d9a17
--- /dev/null
+++ b/yql/essentials/udfs/common/pire/test/canondata/test.test_Basic_/results.txt
@@ -0,0 +1,508 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "match";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "grep";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "insensitive_grep";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "multi_match";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ];
+ [
+ "some_multi_match";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "multi_match2";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ];
+ [
+ "some_multi_match2a";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "some_multi_match2b";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "some_multi_match2c";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "capture";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "capture_many";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "replace";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column13";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "";
+ %false;
+ %false;
+ %false;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ %false;
+ %false;
+ #;
+ #;
+ [
+ ""
+ ];
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ]
+ ];
+ [
+ "a";
+ %true;
+ %false;
+ %false;
+ [
+ %true;
+ %true;
+ %true;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %true;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ %false;
+ %false;
+ #;
+ #;
+ [
+ "a"
+ ];
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ]
+ ];
+ [
+ "aax";
+ %true;
+ %false;
+ %false;
+ [
+ %true;
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %true;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ %false;
+ %false;
+ #;
+ #;
+ [
+ "aax"
+ ];
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ]
+ ];
+ [
+ "xaax";
+ %false;
+ %false;
+ %false;
+ [
+ %false;
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ %false;
+ %false;
+ [
+ "a"
+ ];
+ [
+ "aa"
+ ];
+ [
+ "xbax"
+ ];
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ]
+ ];
+ [
+ "xaaxaaxaa";
+ %false;
+ %true;
+ %true;
+ [
+ %false;
+ %true;
+ %true;
+ %true;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ %false;
+ %false;
+ [
+ "a"
+ ];
+ [
+ "aa"
+ ];
+ [
+ "xaaxaaxba"
+ ];
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ]
+ ];
+ [
+ "XAXA";
+ %false;
+ %false;
+ %true;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ %false;
+ %false;
+ #;
+ #;
+ [
+ "XAXA"
+ ];
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ]
+ ];
+ [
+ "7";
+ %false;
+ %false;
+ %false;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ %false;
+ %false;
+ #;
+ #;
+ [
+ "7"
+ ];
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ]
+ ];
+ [
+ "QC transfer task JAVA";
+ %false;
+ %false;
+ %false;
+ [
+ %false;
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ %false;
+ [
+ %false;
+ %true;
+ %true;
+ %false;
+ %false
+ ];
+ %false;
+ %true;
+ %true;
+ #;
+ #;
+ [
+ "QC transfer task JAVA"
+ ];
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/pire/test/canondata/test.test_CharacterClasses_/results.txt b/yql/essentials/udfs/common/pire/test/canondata/test.test_CharacterClasses_/results.txt
new file mode 100644
index 00000000000..7fe80ff82a7
--- /dev/null
+++ b/yql/essentials/udfs/common/pire/test/canondata/test.test_CharacterClasses_/results.txt
@@ -0,0 +1,59 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "digits";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "spaces";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "xx000xx";
+ %true;
+ %false
+ ];
+ [
+ "lLlLl";
+ %false;
+ %false
+ ];
+ [
+ "a1 b2 c3";
+ %true;
+ %true
+ ];
+ [
+ "xxx yyy";
+ %false;
+ %true
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/pire/test/cases/Basic.in b/yql/essentials/udfs/common/pire/test/cases/Basic.in
new file mode 100644
index 00000000000..ddc62722474
--- /dev/null
+++ b/yql/essentials/udfs/common/pire/test/cases/Basic.in
@@ -0,0 +1,8 @@
+{"key"="1";"subkey"="1";"value"=""};
+{"key"="2";"subkey"="2";"value"="a"};
+{"key"="3";"subkey"="3";"value"="aax"};
+{"key"="4";"subkey"="4";"value"="xaax"};
+{"key"="5";"subkey"="5";"value"="xaaxaaxaa"};
+{"key"="6";"subkey"="6";"value"="XAXA"};
+{"key"="7";"subkey"="7";"value"="7"};
+{"key"="8";"subkey"="8";"value"="QC transfer task JAVA"};
diff --git a/yql/essentials/udfs/common/pire/test/cases/Basic.sql b/yql/essentials/udfs/common/pire/test/cases/Basic.sql
new file mode 100644
index 00000000000..b13078f3232
--- /dev/null
+++ b/yql/essentials/udfs/common/pire/test/cases/Basic.sql
@@ -0,0 +1,32 @@
+/* syntax version 1 */
+$match = Pire::Match("a.*");
+$grep = Pire::Grep("axa");
+$insensitive_grep = Pire::Grep("(?i)axa");
+$multi_match = Pire::MultiMatch(@@a.*
+.*a.*
+.*a
+.*axa.*@@);
+$multi_match2 = Pire::MultiMatch(@@YQL.*
+QC.*
+.*transfer task.*@@);
+
+$capture = Pire::Capture(".*x(a).*");
+$capture_many = Pire::Capture(".*x(a+).*");
+$replace = Pire::Replace(".*x(a).*");
+
+SELECT
+ value,
+ $match(value) AS match,
+ $grep(value) AS grep,
+ $insensitive_grep(value) AS insensitive_grep,
+ $multi_match(value) AS multi_match,
+ $multi_match(value).0 AS some_multi_match,
+ $multi_match2(value) AS multi_match2,
+ $multi_match2(value).0 AS some_multi_match2a,
+ $multi_match2(value).1 AS some_multi_match2b,
+ $multi_match2(value).2 AS some_multi_match2c,
+ $capture(value) AS capture,
+ $capture_many(value) AS capture_many,
+ $replace(value, "b") AS replace,
+ $multi_match2(Nothing(String?))
+FROM Input;
diff --git a/yql/essentials/udfs/common/pire/test/cases/CharacterClasses.in b/yql/essentials/udfs/common/pire/test/cases/CharacterClasses.in
new file mode 100644
index 00000000000..e2737f40a1e
--- /dev/null
+++ b/yql/essentials/udfs/common/pire/test/cases/CharacterClasses.in
@@ -0,0 +1,4 @@
+{"key"="1";"subkey"="1";"value"="xx000xx"};
+{"key"="2";"subkey"="2";"value"="lLlLl"};
+{"key"="3";"subkey"="3";"value"="a1 b2 c3"};
+{"key"="4";"subkey"="4";"value"="xxx yyy"};
diff --git a/yql/essentials/udfs/common/pire/test/cases/CharacterClasses.sql b/yql/essentials/udfs/common/pire/test/cases/CharacterClasses.sql
new file mode 100644
index 00000000000..be8ab6c294a
--- /dev/null
+++ b/yql/essentials/udfs/common/pire/test/cases/CharacterClasses.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+$digits = Pire::Grep("\\d+");
+$spaces = Pire::Grep("\\s+");
+
+SELECT
+ value,
+ $digits(value) AS digits,
+ $spaces(value) AS spaces
+FROM Input;
diff --git a/yql/essentials/udfs/common/pire/test/ya.make b/yql/essentials/udfs/common/pire/test/ya.make
new file mode 100644
index 00000000000..08952c2a431
--- /dev/null
+++ b/yql/essentials/udfs/common/pire/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/pire)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/pire/ya.make b/yql/essentials/udfs/common/pire/ya.make
new file mode 100644
index 00000000000..e7a9fabf607
--- /dev/null
+++ b/yql/essentials/udfs/common/pire/ya.make
@@ -0,0 +1,29 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+ FROM_SANDBOX(FILE 7319902628 OUT_NOAUTO libpire_udf.so
+ )
+ END()
+ELSE ()
+YQL_UDF_CONTRIB(pire_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 27
+ 0
+ )
+
+ SRCS(
+ pire_udf.cpp
+ )
+
+ PEERDIR(
+ library/cpp/regex/pire
+ )
+
+ END()
+ENDIF ()
+
+
+RECURSE_FOR_TESTS(
+ test
+)
diff --git a/yql/essentials/udfs/common/protobuf/protobuf_udf.cpp b/yql/essentials/udfs/common/protobuf/protobuf_udf.cpp
new file mode 100644
index 00000000000..4b7df61c28e
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/protobuf_udf.cpp
@@ -0,0 +1,143 @@
+#include <yql/essentials/minikql/protobuf_udf/type_builder.h>
+#include <yql/essentials/minikql/protobuf_udf/value_builder.h>
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_registrator.h>
+
+#include <library/cpp/protobuf/yql/descriptor.h>
+
+using namespace NKikimr::NUdf;
+using namespace NProtoBuf;
+
+namespace {
+ class TDynamicProtoValue: public TProtobufValue {
+ public:
+ TDynamicProtoValue(const TProtoInfo& info, TDynamicInfoRef dyn)
+ : TProtobufValue(info)
+ , Dynamic_(dyn)
+ {
+ Y_ASSERT(Dynamic_ != nullptr);
+ }
+
+ TAutoPtr<Message> Parse(const TStringBuf& data) const override {
+ return Dynamic_->Parse(data);
+ }
+
+ private:
+ TDynamicInfoRef Dynamic_;
+ };
+
+ class TDynamicProtoSerialize: public TProtobufSerialize {
+ public:
+ TDynamicProtoSerialize(const TProtoInfo& info, TDynamicInfoRef dyn)
+ : TProtobufSerialize(info)
+ , Dynamic_(dyn)
+ {
+ Y_ASSERT(Dynamic_ != nullptr);
+ }
+
+ TMaybe<TString> Serialize(const Message& proto) const override {
+ return Dynamic_->Serialize(proto);
+ }
+
+ TAutoPtr<Message> MakeProto() const override {
+ return Dynamic_->MakeProto();
+ }
+ private:
+ TDynamicInfoRef Dynamic_;
+ };
+
+ class TDynamicProtoValueSafe: public TDynamicProtoValue {
+ public:
+ TDynamicProtoValueSafe(const TProtoInfo& info, TDynamicInfoRef dyn)
+ : TDynamicProtoValue(info, dyn) {}
+
+ TAutoPtr<Message> Parse(const TStringBuf& data) const override {
+ try {
+ return TDynamicProtoValue::Parse(data);
+ } catch (const std::exception& e) {
+ return nullptr;
+ }
+ }
+ };
+
+ class TProtobufModule: public IUdfModule {
+ public:
+ TStringRef Name() const {
+ return TStringRef("Protobuf");
+ }
+
+ void CleanupOnTerminate() const final {
+ }
+
+ void GetAllFunctions(IFunctionsSink& sink) const final {
+ sink.Add(TStringRef::Of("Parse"))->SetTypeAwareness();
+ sink.Add(TStringRef::Of("TryParse"))->SetTypeAwareness();
+ sink.Add(TStringRef::Of("Serialize"))->SetTypeAwareness();
+ }
+
+ void BuildFunctionTypeInfo(
+ const TStringRef& name,
+ TType* userType,
+ const TStringRef& typeConfig,
+ ui32 flags,
+ IFunctionTypeInfoBuilder& builder) const final {
+ Y_UNUSED(userType);
+
+ try {
+ auto dyn = TDynamicInfo::Create(TStringBuf(typeConfig.Data(), typeConfig.Size()));
+
+ TProtoInfo typeInfo;
+ ProtoTypeBuild(dyn->Descriptor(),
+ dyn->GetEnumFormat(),
+ dyn->GetRecursionTraits(),
+ dyn->GetOptionalLists(),
+ builder, &typeInfo,
+ EProtoStringYqlType::Bytes,
+ dyn->GetSyntaxAware(),
+ false,
+ dyn->GetYtMode());
+
+ auto stringType = builder.SimpleType<char*>();
+ auto structType = typeInfo.StructType;
+ auto optionalStructType = builder.Optional()->Item(structType).Build();
+
+ if (TStringRef::Of("Serialize") == name) {
+ // function signature:
+ // String Serialize(Protobuf value)
+ builder.Returns(stringType)
+ .Args()
+ ->Add(structType)
+ .Flags(ICallablePayload::TArgumentFlags::AutoMap)
+ .Done();
+ if ((flags & TFlags::TypesOnly) == 0) {
+ builder.Implementation(new TDynamicProtoSerialize(typeInfo, dyn));
+ }
+ } else {
+ // function signature:
+ // Protobuf Parse(String value)
+ builder.Returns((TStringRef::Of("TryParse") == name) ? optionalStructType : structType)
+ .Args()
+ ->Add(stringType)
+ .Flags(ICallablePayload::TArgumentFlags::AutoMap)
+ .Done();
+
+ if (TStringRef::Of("Parse") == name) {
+ if ((flags & TFlags::TypesOnly) == 0) {
+ builder.Implementation(new TDynamicProtoValue(typeInfo, dyn));
+ }
+ } else if (TStringRef::Of("TryParse") == name) {
+ if ((flags & TFlags::TypesOnly) == 0) {
+ builder.Implementation(new TDynamicProtoValueSafe(typeInfo, dyn));
+ }
+ }
+ }
+
+ } catch (const std::exception& e) {
+ builder.SetError(CurrentExceptionMessage());
+ }
+ }
+ };
+
+}
+
+REGISTER_MODULES(TProtobufModule);
diff --git a/yql/essentials/udfs/common/protobuf/test/canondata/result.json b/yql/essentials/udfs/common/protobuf/test/canondata/result.json
new file mode 100644
index 00000000000..ac534cee58f
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/canondata/result.json
@@ -0,0 +1,126 @@
+{
+ "test.test[CRYPTR-627]": [
+ {
+ "checksum": "fce4dcff82967863048135d17d52f31f",
+ "size": 90043,
+ "uri": "https://storage.yandex-team.ru/get-devtools/212715/870509fa5b3fad596ac1effbd15336199d7c4166/resource.tar.gz#test.test_CRYPTR-627_/results.txt"
+ }
+ ],
+ "test.test[YQL-16111]": [
+ {
+ "checksum": "35d6f2291f27eb3ec28a96739412f276",
+ "size": 5675,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1781765/c2a453956382e0fdfc958f3c4e32ed7740f03d4b/resource.tar.gz#test.test_YQL-16111_/results.txt"
+ }
+ ],
+ "test.test[YQL-3381]": [
+ {
+ "checksum": "c098af301d5dc8d85071a47455f0f592",
+ "size": 97359,
+ "uri": "https://storage.yandex-team.ru/get-devtools/212715/6d6e638efd56a0c6037196ae663d58f279ee467a/resource.tar.gz#test.test_YQL-3381_/results.txt"
+ }
+ ],
+ "test.test[YQL-6706]": [
+ {
+ "checksum": "17fbe1395ae7573288532aa42c5525c4",
+ "size": 87479,
+ "uri": "https://storage.yandex-team.ru/get-devtools/212715/b614019f8638cf007ad9da4a361791da4a66b156/resource.tar.gz#test.test_YQL-6706_/results.txt"
+ }
+ ],
+ "test.test[YQL-8307]": [
+ {
+ "checksum": "e3045a4ed9fe70bc12c8fd0de7bff29c",
+ "size": 3840,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1923547/edc65c62e064cae64c748b355e12a480cac0c768/resource.tar.gz#test.test_YQL-8307_/results.txt"
+ }
+ ],
+ "test.test[recursion_bytes]": [
+ {
+ "checksum": "eeb1ca24bdc09529bd0a320965b3123d",
+ "size": 1241,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1937492/9579ab43fa70f08e465aa7673b27a5da5460a75e/resource.tar.gz#test.test_recursion_bytes_/results.txt"
+ }
+ ],
+ "test.test[recursion_fail]": [
+ {
+ "uri": "file://test.test_recursion_fail_/extracted"
+ }
+ ],
+ "test.test[recursion_ignore]": [
+ {
+ "checksum": "eeb1ca24bdc09529bd0a320965b3123d",
+ "size": 1241,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1937492/9579ab43fa70f08e465aa7673b27a5da5460a75e/resource.tar.gz#test.test_recursion_ignore_/results.txt"
+ }
+ ],
+ "test.test[syntax_aware_empty_nested_message]": [
+ {
+ "checksum": "2f7d8896e8d637d9698c94f99b5e586f",
+ "size": 4344,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1899731/4317be1fdb14f46c8bb94f5975cce5af8f89f78e/resource.tar.gz#test.test_syntax_aware_empty_nested_message_/results.txt"
+ }
+ ],
+ "test.test[syntax_aware_nested_enum_string_value]": [
+ {
+ "checksum": "630a0e1ef04aada546e39ad71771c4d9",
+ "size": 4152,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1899731/4317be1fdb14f46c8bb94f5975cce5af8f89f78e/resource.tar.gz#test.test_syntax_aware_nested_enum_string_value_/results.txt"
+ }
+ ],
+ "test.test[yt_mode_any]": [
+ {
+ "checksum": "c640b3b07686099261511c45d21388f7",
+ "size": 3808,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1946324/c1d8ae91dcc463381c8fa29e568a09926e9a2225/resource.tar.gz#test.test_yt_mode_any_/results.txt"
+ }
+ ],
+ "test.test[yt_mode_enum]": [
+ {
+ "checksum": "2c32becbf155f2dce66d7c00db46a2a4",
+ "size": 2965,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1781765/c2a453956382e0fdfc958f3c4e32ed7740f03d4b/resource.tar.gz#test.test_yt_mode_enum_/results.txt"
+ }
+ ],
+ "test.test[yt_mode_map]": [
+ {
+ "checksum": "67b8ad859a21d7f0c8203daa08194cf3",
+ "size": 21224,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1936947/d770c9a45c5df4db61873fc48e1cb5961f774af6/resource.tar.gz#test.test_yt_mode_map_/results.txt"
+ }
+ ],
+ "test.test[yt_mode_no_ser]": [
+ {
+ "checksum": "eeb1ca24bdc09529bd0a320965b3123d",
+ "size": 1241,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1937492/9579ab43fa70f08e465aa7673b27a5da5460a75e/resource.tar.gz#test.test_yt_mode_no_ser_/results.txt"
+ }
+ ],
+ "test.test[yt_mode_plain]": [
+ {
+ "checksum": "7321635f17c1f8fdb75d96bb2a3ce4d1",
+ "size": 1796,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1937492/9579ab43fa70f08e465aa7673b27a5da5460a75e/resource.tar.gz#test.test_yt_mode_plain_/results.txt"
+ }
+ ],
+ "test.test[yt_mode_ser_pb]": [
+ {
+ "checksum": "eeb1ca24bdc09529bd0a320965b3123d",
+ "size": 1241,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1937492/9579ab43fa70f08e465aa7673b27a5da5460a75e/resource.tar.gz#test.test_yt_mode_ser_pb_/results.txt"
+ }
+ ],
+ "test.test[yt_mode_ser_yt]": [
+ {
+ "checksum": "eeb1ca24bdc09529bd0a320965b3123d",
+ "size": 1241,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1937492/9579ab43fa70f08e465aa7673b27a5da5460a75e/resource.tar.gz#test.test_yt_mode_ser_yt_/results.txt"
+ }
+ ],
+ "test.test[yt_mode_variant]": [
+ {
+ "checksum": "de1dd8aeb5c695707a114866a6124ed8",
+ "size": 12323,
+ "uri": "https://storage.yandex-team.ru/get-devtools/1937492/9579ab43fa70f08e465aa7673b27a5da5460a75e/resource.tar.gz#test.test_yt_mode_variant_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/protobuf/test/canondata/test.test_recursion_fail_/extracted b/yql/essentials/udfs/common/protobuf/test/canondata/test.test_recursion_fail_/extracted
new file mode 100644
index 00000000000..0b282bc72d6
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/canondata/test.test_recursion_fail_/extracted
@@ -0,0 +1,11 @@
+<tmp_path>/program.sql:<main>: Error: Type annotation
+
+ <tmp_path>/program.sql:<main>:64:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject, At function: SqlProjectItem
+ SELECT $udf(TestField) FROM plato.Input;
+ ^
+ <tmp_path>/program.sql:<main>:64:8: Error: At function: NamedApply, At function: Udf, At Protobuf.Parse
+ SELECT $udf(TestField) FROM plato.Input;
+ ^
+ <tmp_path>/program.sql:<main>:64:8: Error: Failed to find UDF function: Protobuf.Parse, reason: Error: Module: Protobuf, function: Parse, error: (yexception) yql/essentials/minikql/protobuf_udf/type_builder.cpp:xxx: can't handle recursive types: Test
+ SELECT $udf(TestField) FROM plato.Input;
+ ^ \ No newline at end of file
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.in b/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.in
new file mode 100644
index 00000000000..4aef303a9ca
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.in
@@ -0,0 +1,3 @@
+{"ProfileDump"="\n\x0F\x08\x8B\3\x15\xC6\x16[Z\x1A\5\x98\xA7\xA5\xD4\nj-\x08\xDC\xC6\xF5\x92\xDE\xAC\xD2\xFF\xD5\1\x10\x98\xA7\xA5\xD4\n\x1D\xC6\x16[Z%\xC6\x16[Z-\xC6\x16[Z1\0\0\0\0\0\0\xF0?@\2H\2";};
+{"ProfileDump"="\n\x0F\x08\xEB\1\x15\xED\xE8^Z\x1A\5\x98BAD_ROW_PROTO";};
+{"ProfileDump"="\n\x0F\x08\x8B\3\x15\3222\x94Z\x1A\5\xB6\xF2\xDB\xA5\2\n\x11\x08\x94\3\x15\3222\x94ZB\7Swift 2\n\x12\x08\x93\3\x15\3222\x94ZB\x08Wileyfox\n\x0B\x08\x94\4\x15\3222\x94Z\x1A\1\2j,\x08\x84\xCD\xAF\xF9\x9B\xC0\xF8\xFEv\x10\xB6\xF2\xDB\xA5\2\x1D\3222\x94Z%\3222\x94Z-\3222\x94Z1\0\0\0\0\0\0\x10@@\2H\2";}; \ No newline at end of file
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.in.attr
new file mode 100644
index 00000000000..6f11c914086
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.in.attr
@@ -0,0 +1 @@
+{schema=[{name=ProfileDump;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.sql b/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.sql
new file mode 100644
index 00000000000..d4f8a432d44
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.sql
@@ -0,0 +1,11 @@
+/* syntax version 1 */
+/*
+ Should check is TryParse work correct for bad protobuf input data
+*/
+
+$config = @@{"name":"yabs.proto.Profile","meta":"eNqNVt1y21QQRrJs2Ws7ceS0UVwKoQyDgWla0h+GXHTGScw0M00T0hQuNYp0bGuiP86RkvoxeAMegeE5uOcF+ga8AHtWUiynypQr+6y+/Tm73+4eeG/C/bl9Lh7FPEqiR6lg3MK/E89n2yQyQH7O/j/4ywT9JPtqbEPdS1ggTGWrNmzvfL69wG3nmOL3EHHGC1hz7IRNIz4vPAhTJd0vq3T3c/DCXyu2hYgjngizRnr3qvRepufk7yE0nSgNE8aFqd0O388wxmNoRJOJBNcJvFUFPpaIIqBvQJul58JsfDyWb0H/LWXcwxvrhN6sQv+MkLnxHejndhjKSJqEHVRh9whifAWaG8TCbBFyowp5EMTGDrT9wJowO0k5xgCEvl+FfhX8lKGMH2GFs2nq29xyooi7wmyT2hdVamenGXRfIo0D6Nup6yUR99LAEmwasBCL1tlSUP/rSv3RNf5NDjeeQ8eOY99D1nhRKMzu7XU5Gy2AA8zJie1xowv1icdFggRVh5qxAg3BnCh0kXR4HjwF7Yx70Udg8nMy87iLlJNae9D5lXnTWcLct4dhUqF9RZ9z7T6009hFJluJFzC0oQz1wenCxv8ItGSzVmVTI5t/KLB6s2Puwsp5YF03neeSg67Rg6YnOc9EFmbX+BTWC4m1HK861GV8YhZdyTaSYIzHwWRfyE6RZwOAXWLFMpUGqRigBbYXItuVYXNXS3jKBu8V6Cw1ECrmHVrEphnr0KE2RIkVuM8WabQdWVzr3KPuV27Jg7EBbcF85mA085hhhMqwu6vu/HAdUWMR0YM3sDIis3todRymAUbUOBifjQ5f9T4xdKiNDg56itGB5snb0/2Xozfjniohp+Oj41/GvRoaXTkd7x8fHY1fH4zODo9f97TBPyq0y3MPFS7Y/Ar7YlGBG6GrlDMMPcUqWJe2n7JsxGl7Kvp/CO0YeVJ8yIaZWTlvJZ0QniCxC3j9djjx/wWsX+VstMr+G7e321IHlPXLYeof16dwseIC4w2nmSIOPWXYuS5Xq0Sg3xXQi4mKJMThm2VUydqUuIS5VD5MMPWdFDqc3SDMcnmIL8Yd6ErjkkEZ4Rrko4rUI9CLDfIhnbvGKtTQOq25rJZrUM+uKcurSNHgCFqLqVtlpA96PrpLhjZu3rA21Mnc3wo08u2wBq1slSz6q5J5/eWmqVESMKOMLq/RUQ4T2ZOpn3g4axmnXKnYHBq3wwtKkSo9xjNuCyY96pQ1nDZy99H8aS7lsVRbvE7TE5YIbJ7gepLy+sT2BRv8W4N6thjLhpTcEGSShL3LKt+S8cyKGdEtjy7lxujKz2IeOsyl8GmUzaKUCyuwxUUe/3UemsXUKeeqRXr3oO9EvpRG+Hyyk1lGGyANzFyCKRK+zDrJ2yRHlsWcuZ4jO8dJOO3H7g1x7ODek2ITemnovVsazytVtF4lIaYBB//3Zo+Us9OOuVY6PTGN0ump2S+dnpnrpdNz806RrSC0bNc17+J5jYptczn5sSIbdKdNWEtsPmWJ5UayxjTDTfq0AatTJL7l+KnI6b1ZVFFg0h1KjjmQssEZ1OS7BT3i86YoeMXclFddh+b1E0O2QZf6oyCZVmrWGLrLDxWkFFbFS1KXUX/I/mz5UTjNRCqJKmcJ4vB1JDcSBqfl5Mk81ksej6Ff9bTpgebbtPCLdkYJVv2y1ODr0JL+RGIHceZ0V3k8+BOXaPm1I6PDHMu5PbPFbNEYDnee7GQytWpd1vLcdbwQXfh+eSYi12R4lty5lzm+Tl8+g7u5MIhCpPmEM+zA0JlTAylVI3JPPVH+AxssP0w=", "lists": {"optional": false}}@@;
+
+$udf = Udf(Protobuf::TryParse, $config as TypeConfig);
+
+SELECT $udf(ProfileDump) AS Profile
+FROM Input;
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-16111.in b/yql/essentials/udfs/common/protobuf/test/cases/YQL-16111.in
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-16111.in
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-16111.sql b/yql/essentials/udfs/common/protobuf/test/cases/YQL-16111.sql
new file mode 100644
index 00000000000..6c35fc403c2
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-16111.sql
@@ -0,0 +1,64 @@
+/*
+syntax='proto3';
+
+import "yt/yt_proto/yt/formats/extension.proto";
+
+message Test {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ message InnerSubProto {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ int64 x = 1;
+ string y = 2;
+
+ message TSubField {
+ repeated string List = 1;
+ }
+
+ TSubField SubField = 3 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+ }
+
+ map<string, InnerSubProto> dict = 10 [(NYT.flags) = MAP_AS_DICT];
+}
+*/
+
+$config = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrFWs1vG8cVLz9FPkrUaiXbjBzHCZNYshNTgfPl0m0SilzJdPjVJZlEBoLFihxSa5O7zO7SNo2iKNBTj0VvbVEUvaToH1Cg6KVBgR4LFGiDHhqgaAu0f0KPfTOzu9wlKYsJkCQHhfvm9z7mzW9m3swY/nALnu0bRn9A9kamYRvH495el1gdUxvZhpljMnGdI3IuIluFjQNtQEoesEls8SZEeyjMhJ6N7KZuvJCbUcoFNRpULDON7L+jsLmgVRQhqqtDajG0m5TZbzEDKyO1c1/tk0yYid1P8RmALhkRvUv0ziQTwSiSsk8ivgQbo/HxQOsoPhggLCYLvKE0Be/A+kOi3vdDUwyapmIfsAirQ2JZGIBiT0YkE2W9f3au97M9TzlaLVQSC5Ak+njILcROyZ+EiFkrCarmmFixiPlA65BMnBnYmTPQ5O2zNlw97EqSPLKJbmmGnllhRl5cMIpk0J01MdUT34AVY2TjLyuTwPFJ3Xh6IRHqHCO7YLEMgmWMzQ5ROkaXKJreMzJJZuDyfEcYsIi4MsLktBX4Fs9D3Jrotvoos8oY4nxlfxuH9WUodgtiPdpLJNjnyAHXCSYx/gWTWICUTiybdDkjIktyCrjSPKWiX4hSH8C6F5Jiqnrf5ebeWZHkJFdPpmpymgS+xRKAoROjh9OrM0CeLM5SnULmsmRwaWcgfnNKtZVTmFLlk2yObW1Im4TyHlPMe5ZkQeTO7JnsqPGOrZn+T/F58AQKoxWwVWjVFdZQtv0Y0sH0iFsQs2zVtBkLYzL/EAWI4CLDVrmYTH+K70w7HGEdvjI/ogHLs/3efhPWAh1Y1nX2u3BuoWkkydZY13SbmCOTUMZyV5n/rJzCubYfza3Im+N54bVk4r8rwvfxv3D2kzhsLZozC6cvTn9k8DExWZJisvOFMyI2UI/JAGdDaDd946WlZmWuQlVkrim+BVFniaYWri1ngc4lmemJFyFJ/8+5EWcxJ6iA8kLchgSbJl3ibm3eNyVWl/TU8cBWHqiDMWGER2I5wveoTLwMKT6rNNR5xFbPmMwnWplKqPt7Fs5lh5rMBRUw92/OLtyXFndvbi7hVskQrzpDrw4yG2ggIae5uO5Is78JQ5QtLOuQah01JKVUb+9XJCEkpgGY4KBSL7SEsPddrrXeeE2IeAptLoj6Aa/eEGJI2FVuoPyBVEJEPChBzIq4Bkkm2a/XK0LCs9lsyeXaoZD0bB7K9XZDAM9CVWo2C4eSkPIQ+0ctqSmsBsJCF2ueC6nWrgppcQPWuAs3iPUZEUYqTAPhVjYCAkSI2SLEGA2R7ulKYV+qKPVGq1yvFSqYO08mS99pl2WphPnzyRpSoYWySLYDW4sW1IVTyMeF8ClcYLZmuZD9Vxg2F2wqC528DTHOZb7NXl24OzFmz221TM9fakROKTWoiTnCfji3+PP98Y1l9kcm+3ybQGzBJnALNuYMLb0Y/yAEmdOSc8aSGA4sibdmM/jc6YMwN9Yfh+D84pJyYQxvQXxI7BPDLauuLNisafPsYDta/t0+clpdyKOZi/SHYTi30PjCQC8BaPpobPPSia/ESSZhixddZce21x5h7cBFDHBzGmiUBfrMKT2dI+YrIHQGGtFtxbJNog41vc+2mkQ+1lMHFpHXeXPTbaUajECmTyMe0ODNnkb2R0lI+Qpw8TlYvac+UBX3UMUzkaKyhnOwegW2GAT7iI46A9WyWNISDCrStjptKrot4uuwyTSGuDdpowFR6DHPYluOF9kGRVQdAI3IwrLwElPrE52Yqk0U8tEYsYqqd5UT1TrJbFED++FMSH6KAg8dnMRgBb17G0FiHs4zK5gR7LDSOSGd+8rY7t3MXPT7ZxE2GaZIIW1EiE1YpYMx1B5jzIbJ9tD0gqXJl8Fc3VGo4vkjH2s2JKkkp1wrB4ZJCdU3vASnOKH6hpteTFanw/uMZ1PnMGZlhECyOp1DDnA4buF8ODdNll9xY66Xs6rocTSZVxQDHkeTWbU3YWt0MprXu+bXExEyq/giO5mbpINj1c1c8MN9DWIO6d9RiK4eI2NUE39YmcsMHLXNMZ4iOh2JNRZYm3gNNozjex3OSAXN9LRHmRdYetdpA+Njg4nFq2jbOlHNEVuSLRwMknmRQ7m85orpjLAeaj3btbjDZwSTOdZ2QaCZCDjeZbA0yv1+cTOgyKnTq7xwQ+HU42twnoJwoVO7qq360C8zNE171WkMxGmOjycesa7zOKnMpdaXVpxn87Dq572YBM58LEiwCCrWS7R8uSthLYJlVKXckhS5XWuVq5IQ8RX2d6KJK8IOrRrSwZOa+C244F6rWMRWHmomm5BDlW+OHn+2HFST2O8j5oBBxApc1g1cAHDhUM2uMr3QUtQOEtIy+EboWXlaN5oOeLpDFBzoDH0jp9EXq+uhOkL+2uaE1ecJOYECiX5/JcckzGZUiOHfmBDHv3FhBf8mhCT+TQqQ/WcEVv0VPD0QddgeFmKr3PNPrPdzRbq55eO8XJa5Ji0sKP0IL08SsvMlHkL8nsVsx5ntF55s+06TGU/eaSq1ulwtVGRHXXwKogP18SS4DTLRssOCFuiVXXDzYaIvcXrsQYzlSwRwMiZ8Q0xAtFiX6RTBOcGlSqMsFXGWZF+HOE8CnT5eGlCJfzo2Qm5ru7ovyUJ4bvCzFs5LX2X+1RzPfx+ClK/SpiWSOhgYDxV1oKmWQw1gogKVLDt0X9GkwemS/XkIhNlSdybM0NcZZvZnIUgH69uZ8J77WsP7RxjWAlXtstF9BBtalwxHhk2v05UBeUAGmSxbNPaeXDfnylO9ClXLb5ZLUrVRb0m14pHSrr1bq79fkwVtBvYlTvsGCLNBiRdgUVg4szdhvVbHXRK3SungQCq2mvwmxEO3AhM8+5MIbC6IBJdxfobhx6rry0Sfo1VEAw+XzpEHqyPMkm5rPQ0rfH4q5web9amcXzK9DOLIsDRbe0Av6d3rKHrQicqC21LWbQ+tk746g6aLeUQW3BYPjRVN1xjT6o/j6N4RklNc5kGcun56D7aKxRmTccgOrKv9vkmNu4b4SSXtiRlw+w4k3DzQzZtmAospdvwO06sx3W1Ep5qlTK/1w9iekFOa5V2JZj/GEib4LIGnmcTAQJJTDf4mtnvGS0au4uBlT3P7TyFIuGLcbqMj1T5h5mL7YSEks28qx5pQZxRw5PSbjuuAqF12DDKGQxxJyx1XR150xPR1zDZVbRDARhlWcBs8cB6ecu12sS7FI1Z3qhRn1x0XHEDJaXd1s38OwYZ7cOt6yaoCqLpu2P50zVN5Ti9X8JRkn4HtIcC05dS04T7lvDmxh0t+1Acuoic8eiFzTPqa7twk8w/3QibqXcjsfw+PcMZwNtx9Yea6wbodunvdAfWNgar3c4bZnz680orH8j2/jo7/Fwr9Mhw5bOz/Orx9yBUbbjJk0huQDu0gfLIGVyb23oSeWbAVf+zxYtna81jrvOVGaket7TOffLN/D8OG9L6pjkbEZKXawUDtZ/8YhijdJcUViBRqR7iIbcBavXVbkpVivdKu1ugStgoJev9J72ax/F/H0oB+ORVMBHMqNCW5XKiU7xboJaZy1BKi4jacD0obcr1V328fCDHqw73bVCrlZkuIM7fOFSgXreAkvVQtNJRCkwmU+gF12cZVValIh4XikZCgThZDhCSN02krlYstAcQMbDkCzxNrSbEOYv1VKuFxZ1V8GjK8g++WG+7qrrxXqLSlprAmnsM00tbiban4ritOZ+/Dpptd5+DD8tty0vsS7JSkhiwV6d2t0qzLLeWgLFVKTRpO2UkOiioSDsFluOhH7B/xX26RGMp+ezqUrDZkrnYdV7gLNaVGQUZPjgXsUwpW3ivgaOAIruVtyNDpobgPEPyNoYdGLPGJj7uZv/yOvlymb1zMIelyczHwq9RzPfbyz2x7TdacV/a6upTXvy706pF43qvXZOUfw3bAq3sGXcbvp47fSwG/vuHlni/4PPsarbwMMe7myQ8vp2R1pn/cVP4dSHWMwXjI33zOskwzx5ZArkM3x/whrN8nE+VzWPnUsbKGesWpIR02FzHorDfb5Ui00Z0jkM+fnztn+luKPq4/H3UIrAXZcqanJQnj/vsS7uZD95HvNK74j3/LJY8/EXLzVRAfqKam6m7WThnxgBeXN4KjyrJCB33/6t0dNTdR6evjdZuow5w53lu8UcHfwpCY2NzD9pK7Wfan9EmRWDZWOtGu1rHZg3vqxnqOCnMllLDrl/3oLz67CDKDbP84BGtlHQuJ5viYvwKsQugRO95F5NAj+jVxaoHQRHwbEohjPXLeHS5x4wETuZYLYq5isqe0fRmSXiN9b6hols0qkaTMfuejv/rsYnS7AkkvXFph4ORx7uLpT+yf91hGY9hcEIPzLJYP3wxxm8dx/gD7f1qjBpI=",
+ "view": {
+ "recursion": "bytes",
+ "enum": "number",
+ "yt_mode": true
+ }
+}
+@@;
+
+$udfParse = Udf(Protobuf::Parse, $config as TypeConfig);
+$udfSerialize = Udf(Protobuf::Serialize, $config as TypeConfig);
+
+$data = @@
+{
+ "dict": [
+ {
+ "key": "key2",
+ "value": {
+ "x": 23,
+ "y": "yy",
+ "SubField": {"List": ["s1"]}
+ }
+ }
+ ]
+}
+@@;
+
+SELECT
+ $data,
+ $udfParse($data),
+ $udfSerialize($udfParse($data)),
+ Ensure("Success", StablePickle($udfParse($data)) == StablePickle($udfParse($udfSerialize($udfParse($data)))), "Fail")
+;
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.in b/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.in
new file mode 100644
index 00000000000..f70c4566f9c
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.in
@@ -0,0 +1,3 @@
+{"ProfileDump"="\n\x0F\x08\x8B\3\x15\xC6\x16[Z\x1A\5\x98\xA7\xA5\xD4\nj-\x08\xDC\xC6\xF5\x92\xDE\xAC\xD2\xFF\xD5\1\x10\x98\xA7\xA5\xD4\n\x1D\xC6\x16[Z%\xC6\x16[Z-\xC6\x16[Z1\0\0\0\0\0\0\xF0?@\2H\2";};
+{"ProfileDump"="\n\x0F\x08\xEB\1\x15\xED\xE8^Z\x1A\5\x99\xE5\xEB\x82\x0F\nH\x08\xDD\2\x15\xE0\xC3\xAAZ\"\7\x08\x99\1\x10\xE0\xC5\x08\"\7\x08\xA2P\x10\xE0\xC5\x08\"\7\x08\x9B\1\x10\xE0\xC5\x08\"\7\x08\x9A\1\x10\xE0\xC5\x08\"\7\x08\x9D\1\x10\xE0\xC5\x08\"\7\x08\x9E\1\x10\xE0\xC5\x08\"\x08\x08\xEE\x9C\7\x10\xE0\xC5\x08\n\x0F\x08\xEB\1\x15\x0BN_Z\x1A\5\xB7\xD8\xE6\xFF\x0E\n\x0B\x08\xA6\2\025fG_Z\x1A\1\7\n\x0F\x08\xEB\1\x15= _Z\x1A\5\xE8\xFD\xBE\xFF\x0E\n\x0B\x08\xA5\3\x15\xE0\xC3\xAAZ\x1A\1F\n\x0F\x08\xEB\1\025b#\xA7Z\x1A\5\xEA\xA5\x8F\xBB\7J\x0C\x08\4\x15\x8E\x83\xA8Z\x1A\3\xD9\xA1\x17";};
+{"ProfileDump"="\n\x0F\x08\x8B\3\x15\3222\x94Z\x1A\5\xB6\xF2\xDB\xA5\2\n\x11\x08\x94\3\x15\3222\x94ZB\7Swift 2\n\x12\x08\x93\3\x15\3222\x94ZB\x08Wileyfox\n\x0B\x08\x94\4\x15\3222\x94Z\x1A\1\2j,\x08\x84\xCD\xAF\xF9\x9B\xC0\xF8\xFEv\x10\xB6\xF2\xDB\xA5\2\x1D\3222\x94Z%\3222\x94Z-\3222\x94Z1\0\0\0\0\0\0\x10@@\2H\2";}; \ No newline at end of file
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.in.attr
new file mode 100644
index 00000000000..6f11c914086
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.in.attr
@@ -0,0 +1 @@
+{schema=[{name=ProfileDump;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.sql b/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.sql
new file mode 100644
index 00000000000..d8ee0b931f2
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.sql
@@ -0,0 +1,7 @@
+/* syntax version 1 */
+$config = @@{"name":"yabs.proto.Profile","meta":"eNqNVt1y21QQRrJs2Ws7ceS0UVwKoQyDgWla0h+GXHTGScw0M00T0hQuNYp0bGuiP86RkvoxeAMegeE5uOcF+ga8AHtWUiynypQr+6y+/Tm73+4eeG/C/bl9Lh7FPEqiR6lg3MK/E89n2yQyQH7O/j/4ywT9JPtqbEPdS1ggTGWrNmzvfL69wG3nmOL3EHHGC1hz7IRNIz4vPAhTJd0vq3T3c/DCXyu2hYgjngizRnr3qvRepufk7yE0nSgNE8aFqd0O388wxmNoRJOJBNcJvFUFPpaIIqBvQJul58JsfDyWb0H/LWXcwxvrhN6sQv+MkLnxHejndhjKSJqEHVRh9whifAWaG8TCbBFyowp5EMTGDrT9wJowO0k5xgCEvl+FfhX8lKGMH2GFs2nq29xyooi7wmyT2hdVamenGXRfIo0D6Nup6yUR99LAEmwasBCL1tlSUP/rSv3RNf5NDjeeQ8eOY99D1nhRKMzu7XU5Gy2AA8zJie1xowv1icdFggRVh5qxAg3BnCh0kXR4HjwF7Yx70Udg8nMy87iLlJNae9D5lXnTWcLct4dhUqF9RZ9z7T6009hFJluJFzC0oQz1wenCxv8ItGSzVmVTI5t/KLB6s2Puwsp5YF03neeSg67Rg6YnOc9EFmbX+BTWC4m1HK861GV8YhZdyTaSYIzHwWRfyE6RZwOAXWLFMpUGqRigBbYXItuVYXNXS3jKBu8V6Cw1ECrmHVrEphnr0KE2RIkVuM8WabQdWVzr3KPuV27Jg7EBbcF85mA085hhhMqwu6vu/HAdUWMR0YM3sDIis3todRymAUbUOBifjQ5f9T4xdKiNDg56itGB5snb0/2Xozfjniohp+Oj41/GvRoaXTkd7x8fHY1fH4zODo9f97TBPyq0y3MPFS7Y/Ar7YlGBG6GrlDMMPcUqWJe2n7JsxGl7Kvp/CO0YeVJ8yIaZWTlvJZ0QniCxC3j9djjx/wWsX+VstMr+G7e321IHlPXLYeof16dwseIC4w2nmSIOPWXYuS5Xq0Sg3xXQi4mKJMThm2VUydqUuIS5VD5MMPWdFDqc3SDMcnmIL8Yd6ErjkkEZ4Rrko4rUI9CLDfIhnbvGKtTQOq25rJZrUM+uKcurSNHgCFqLqVtlpA96PrpLhjZu3rA21Mnc3wo08u2wBq1slSz6q5J5/eWmqVESMKOMLq/RUQ4T2ZOpn3g4axmnXKnYHBq3wwtKkSo9xjNuCyY96pQ1nDZy99H8aS7lsVRbvE7TE5YIbJ7gepLy+sT2BRv8W4N6thjLhpTcEGSShL3LKt+S8cyKGdEtjy7lxujKz2IeOsyl8GmUzaKUCyuwxUUe/3UemsXUKeeqRXr3oO9EvpRG+Hyyk1lGGyANzFyCKRK+zDrJ2yRHlsWcuZ4jO8dJOO3H7g1x7ODek2ITemnovVsazytVtF4lIaYBB//3Zo+Us9OOuVY6PTGN0ump2S+dnpnrpdNz806RrSC0bNc17+J5jYptczn5sSIbdKdNWEtsPmWJ5UayxjTDTfq0AatTJL7l+KnI6b1ZVFFg0h1KjjmQssEZ1OS7BT3i86YoeMXclFddh+b1E0O2QZf6oyCZVmrWGLrLDxWkFFbFS1KXUX/I/mz5UTjNRCqJKmcJ4vB1JDcSBqfl5Mk81ksej6Ff9bTpgebbtPCLdkYJVv2y1ODr0JL+RGIHceZ0V3k8+BOXaPm1I6PDHMu5PbPFbNEYDnee7GQytWpd1vLcdbwQXfh+eSYi12R4lty5lzm+Tl8+g7u5MIhCpPmEM+zA0JlTAylVI3JPPVH+AxssP0w=", "lists": {"optional": false}}@@;
+
+$udf = Udf(Protobuf::Parse, $config as TypeConfig);
+
+SELECT $udf(ProfileDump) AS Profile
+FROM Input;
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.in b/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.in
new file mode 100644
index 00000000000..17ca40800d5
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.in
@@ -0,0 +1,2 @@
+{"doc"="\n\x0Fhttp://agbz.ru/\x10\2\x18\x84\xFB\x89\xC6\5\"\4fake";};
+{"doc"="\nSO REALLY BAD BAD BAD PROTO ROW\n\xBF\2";};
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.in.attr
new file mode 100644
index 00000000000..8838f04a448
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.in.attr
@@ -0,0 +1 @@
+{schema=[{name=doc;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.sql b/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.sql
new file mode 100644
index 00000000000..1989d5b03f8
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.sql
@@ -0,0 +1,17 @@
+/* syntax version 1 */
+/*
+ Should check is TryParse work correct for proto with required fields
+ Can't parse message of type "NNewsStorageProtocol.TRawNewsDoc" because it is missing required fields: Url, Status, FetchTime, FetchHost
+*/
+
+$config = @@{
+ "skip": 0,
+ "meta": "eNqtWFlzG0UQRrc0vpRxjiUER9kcOCbIwVBFkgoB31ZhOWYtAlRRRa20Y3nJale1Rxy9kkfyL/gjVPHIC3+BKn4I3T07q5Us2Q/hTX1P93zT3Sv2Ns+WBqeiveqK02C173uht+pFoWO7L4M6kVw7OABZwz32jkLPN7viENkdz9EdVm49j8J9UOZVlvvOd7RMLbtcMfAnv8qKR17kd4SWJWZM8eusfCTcULggydUyy3NGQktZtwdkoBVqObBKaP2fLKuocAFqbppOp2X3BAUFL4rmNTZzFHU6IghInKUgaRZq7Ji2I6yWb4sgPkaaxR+xAgXS8nCMmTW9Pq0KdXUmQxqg7y0RdHy7H9qeC2lkII00i3/OrqTITc89ti0qRxF0s8ZkIV9iLKVaItUUh99j8zuR46R0yqQzxsXK7fheb0cIS6uARtlIaPbHLXZ3HA6BzJdYP1vtGBWXqR5jtbh+AZb0b9lCqyks22ydRL22C/WeDJw9YXdPwvjmYopfZoXvbSs8ia9LEvoBm5UuDx1zIPx39vdvhs1Jh1AzQGU4wSOUsAkwag36El0AU0WnouUmR8unonGNlTbs0DdDQUDJGorkN1hlxzd7gmQSGEMGnmArgl+IsJJ8RIrmt1mlEWyJYzNyQsJA+Unh2HQCYQz5+l8ZNiPT3PW9qM8fs0IjFL0A0kS8365Put/6SGEMacGfsKKsPdUieSuTbaWmEVvwbcYSKOA7xNh3z7NPtI2Uof4my4pS/i6JPGZFKkYAiaDtrfNsSdOIDf6nPPhTVpKFGes759ZSmeh/ZqBDbr8G/CBI11ieAIq4nV9bmuxoGzRRyyBdBfXsEOpj3Sx3tps9Svo8Int+rTY9ktRLJgE8iUYPdOIeKQn97wzUct01nUFod7AkKg10vjylHol+fXvb7dquSmieZRtW/EbhF77OQxNeURBnElP6T6wUG3LGij+ariVeV9/D37ue13VENQO1md23X4kGQMZ3RVjNorSJNxhVc3yGlQyz13aEX81DVEYXFHS8vqgW9N8pJetAhKee/5J/OZLS/WkpKf36dvxjck76DisrjamHr7DCurXjvYZTw0nXLQMy8eHYoLPp26Hwqnn9bQbm+Xqv3xShyZ+xSlLT+CnVLqq9MTThXzOWJKDeU+2iTI2Ujf5mATqUYZ6iyZbXmdCGnwLyQjOMAgLs/NqdKciLPUhdI7ahBivCzgntCDlaIYaMRLrnBSEAG2MOGTRrbT8Ihw7epyY8xuX7jEms031fp/ue0lW2h4pPKvIOjSAwUvb8AbuEU/rQF6+GgesU+KwAR4QhcHjDnL8i57yipQwQEIJMVzJJ40g6jNpbaiTNGYrEXvDct7tKqsmtKcXCHWXDtJTCEnlOcTDuehe2kAEAuCjHlqIxLibRsAKYaDmMG5PYJ1p26MiFBvoEEfwOm8PQNkBOSm+RdJQ53r8qZ/vXQ7aoTNKay6Q5ScQ5vF/xOtQYqdBvvsKqShX3LZJfJfkZPmYKWy3uktoHpKJIrM++6XYjbIrX5GKhaLoXc3BqOo62RoVVJDU1C3u+NhM3NaKQD88Z+Q8kX1KI7ecurGUCRR+SaMiA7lRRMyTQZunh3pzycJWeMbSAsVOg3qfdoGXgxnkDzJCqtNoCPLoebeNztPinOFRwsxto8ySh35RcFJ54vrYQJ0cUgmVXuL7QqhIsRODTaZr9vrBirwOE2SWC2VkB19msZO4KDxU5KY7wcImXdBM+KiA12VXg+IukPFnI64yPCPBdB9plMpkgoUaDL1r4L2DCIy5vxo1mhIsnxnuAavqiAx9PNarUCA99IfyO7J7tmND0B9od+YEwyuXLsKTj20kp3iXFcfbYh0xKf+XMh0zKakWeo2kHPROTCLR7mP9GtpoxxiRwbzJoSvmjRHlcBKtIOmTK5n5iM1kBc6aKyova9CyhfUyFHmfzr1hZfYRqnxDEb1/8bRgYiRFAoIp92Lc70G0bQRDB+VaT852RQVKleChrDyne0rQxKrUMpY6gaEV+24vXXO1TehEjPHx64OEXAAgC/TP59IYc/RmrtuL5ue915AcGTOJvxIAm8ayBP7E3KXziKM4bitR/zcJtxw622nSsDWhysSvyMbN2b0pGY4GNxI6GhRMFJ8KigNAMYxKb25HddeG+/GSsJ4zUypB/15WhML4ypAZnkWSKXPkBNrR40cbd0BAOcC3gwJY2y8q09yKVQeoFfKd7SGWRWo8sm6gcdMH5lm+6gUMlQF5+5QvGhos1n4PW7XYcL4BUpWscofikpOu9sEcTqJpdabD50STR+MDzezDqvA4YL7KFpulGMF8GW8IRcF7wAcyY2BjIwQ2ufoNPydT2go6S/QUcwb6JcfeiNjgAGRTFsTsoo130CLbPntmH/GDfo7uIGXncuKkYilPgZZZfD71etcivscVdDzYWF/8cGoYr/QdunI1N",
+ "name": "TRawNewsDoc",
+ "format": "protobin"
+}@@;
+
+$udf = Udf(Protobuf::TryParse, $config as TypeConfig);
+
+SELECT $udf(doc) AS parsed
+FROM Input;
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.in b/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.in
new file mode 100644
index 00000000000..6c7f0be49c9
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.in
@@ -0,0 +1,4 @@
+{"TestField"=""};
+{"TestField"="a: 1 b: 2 c: \"hello\""};
+{"TestField"="a: 1 c: \"hello\""};
+{"TestField"="d: [1, 2, 3, 4]"};
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.in.attr
new file mode 100644
index 00000000000..f10d440a236
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.in.attr
@@ -0,0 +1 @@
+{schema=[{name=TestField;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.sql b/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.sql
new file mode 100644
index 00000000000..80604f1ad3c
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.sql
@@ -0,0 +1,10 @@
+/* syntax version 1 */
+
+$config = @@{"name":"Test","syntax":{"aware":true},"lists":{"optional":false},"format":"prototext","meta":"H4sIAAAAAAAAA+NK5FLOKs3L1i/TzU3MNjYu0y8oyi/Jjy9JLS7RBxF6YL6SERdLCJAnxMnFmCjBqMCowQpiJkkwAZnMIGayBDOQyQlipkiwKDBrsCaxgbUaAwBc3r8mYwAAAA=="}@@;
+
+$udf = Udf(Protobuf::Parse, $config as TypeConfig);
+$udf2 = Udf(Protobuf::Serialize, $config as TypeConfig);
+
+SELECT $udf($udf2($udf(TestField))) AS Profile
+FROM plato.Input;
+
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.in b/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.in
new file mode 100644
index 00000000000..b6dd409ad4e
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.in
@@ -0,0 +1 @@
+{"TestField"="{\"inner\":{\"i\":{\"a\":\"hello\"}},\"test\":{\"inner\":{\"i\":{\"a\":\"bye\"}}}}"};
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.in.attr
new file mode 100644
index 00000000000..f10d440a236
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.in.attr
@@ -0,0 +1 @@
+{schema=[{name=TestField;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.sql b/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.sql
new file mode 100644
index 00000000000..3ee0813dc44
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.sql
@@ -0,0 +1,35 @@
+/*
+syntax='proto3';
+
+message Test {
+ message InnerInner {
+ string a = 1;
+ }
+ message Inner {
+ InnerInner i = 1;
+ }
+ Inner inner = 1;
+ Test test = 2;
+}
+*/
+
+$config = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrjWsjIxV2UmlxapFdQlF+Sr9TJyMUSklpcIqTIxZqZl5daJMGowKjBbcStBxLV8wQJBUFkhCS5WEqAghJMYBWsYBVBYCEpKS4usFIwIcTDxZgINocziDFRSp2LFSIsx8WYCTVeAMl4iB2MmUlsYCcZAwC/Qiqb",
+ "view": {
+ "recursion": "bytes",
+ "enum": "number"
+ }
+}@@;
+
+$udfPar = Udf(Protobuf::Parse, $config as TypeConfig);
+$udfSer = Udf(Protobuf::Serialize, $config as TypeConfig);
+
+SELECT TestField, Ensure("Success", $udfPar(TestField) == $udfPar($udfSer($udfPar(TestField))), "Fail")
+FROM plato.Input;
+
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.cfg b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.cfg
new file mode 100644
index 00000000000..d7d756c8260
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.cfg
@@ -0,0 +1,2 @@
+xfail
+in plato.Input recursion_fail.in
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.in b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.in
new file mode 100644
index 00000000000..b6dd409ad4e
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.in
@@ -0,0 +1 @@
+{"TestField"="{\"inner\":{\"i\":{\"a\":\"hello\"}},\"test\":{\"inner\":{\"i\":{\"a\":\"bye\"}}}}"};
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.in.attr
new file mode 100644
index 00000000000..f10d440a236
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.in.attr
@@ -0,0 +1 @@
+{schema=[{name=TestField;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.sql b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.sql
new file mode 100644
index 00000000000..e274c92e010
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.sql
@@ -0,0 +1,32 @@
+/*
+syntax='proto3';
+
+message Test {
+ message InnerInner {
+ string a = 1;
+ }
+ message Inner {
+ InnerInner i = 1;
+ }
+ Inner inner = 1;
+ Test test = 2;
+}
+*/
+
+$config = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrjWsjIxV2UmlxapFdQlF+Sr9TJyMUSklpcIqTIxZqZl5daJMGowKjBbcStBxLV8wQJBUFkhCS5WEqAghJMYBWsYBVBYCEpKS4usFIwIcTDxZgINocziDFRSp2LFSIsx8WYCTVeAMl4iB2MmUlsYCcZAwC/Qiqb",
+ "view": {
+ "recursion": "fail",
+ "enum": "number"
+ }
+}@@;
+
+$udf = Udf(Protobuf::Parse, $config as TypeConfig);
+
+SELECT $udf(TestField) FROM plato.Input;
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.in b/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.in
new file mode 100644
index 00000000000..b6dd409ad4e
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.in
@@ -0,0 +1 @@
+{"TestField"="{\"inner\":{\"i\":{\"a\":\"hello\"}},\"test\":{\"inner\":{\"i\":{\"a\":\"bye\"}}}}"};
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.in.attr
new file mode 100644
index 00000000000..f10d440a236
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.in.attr
@@ -0,0 +1 @@
+{schema=[{name=TestField;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.sql b/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.sql
new file mode 100644
index 00000000000..2f57eb18d19
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.sql
@@ -0,0 +1,35 @@
+/*
+syntax='proto3';
+
+message Test {
+ message InnerInner {
+ string a = 1;
+ }
+ message Inner {
+ InnerInner i = 1;
+ }
+ Inner inner = 1;
+ Test test = 2;
+}
+*/
+
+$config = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrjWsjIxV2UmlxapFdQlF+Sr9TJyMUSklpcIqTIxZqZl5daJMGowKjBbcStBxLV8wQJBUFkhCS5WEqAghJMYBWsYBVBYCEpKS4usFIwIcTDxZgINocziDFRSp2LFSIsx8WYCTVeAMl4iB2MmUlsYCcZAwC/Qiqb",
+ "view": {
+ "recursion": "ignore",
+ "enum": "number"
+ }
+}@@;
+
+$udfPar = Udf(Protobuf::Parse, $config as TypeConfig);
+$udfSer = Udf(Protobuf::Serialize, $config as TypeConfig);
+
+SELECT TestField, Ensure("Success", $udfPar(TestField) == $udfPar($udfSer($udfPar(TestField))), "Fail")
+FROM plato.Input;
+
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.in b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.in
new file mode 100644
index 00000000000..1c32f6a1423
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.in
@@ -0,0 +1,4 @@
+{"TestField"="{}"};
+{"TestField"="{\"inner\":{}}"};
+{"TestField"="{\"inner\":{\"i\":{}}"};
+{"TestField"="{\"inner\":{\"i\":{\"a\":\"\"}}"};
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.in.attr
new file mode 100644
index 00000000000..f10d440a236
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.in.attr
@@ -0,0 +1 @@
+{schema=[{name=TestField;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.sql b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.sql
new file mode 100644
index 00000000000..d24aa384d96
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.sql
@@ -0,0 +1,23 @@
+/* syntax version 1 */
+
+/*
+syntax='proto3';
+
+message Test {
+ message InnerInner {
+ string a = 1;
+ }
+ message Inner {
+ InnerInner i = 1;
+ }
+ Inner inner = 1;
+}
+*/
+
+$config = @@{"name":"Test","syntax":{"aware":true},"lists":{"optional":false},"format":"json","meta":"H4sIAAAAAAAAA+PqZuSSyyrNy9Yv081NzDY2LtMvKMovydcvSS0u0QMzlRK5WEKAPCEpLtbMvLzUIglGBUYNbiNuPZConidISEqciwvMABNCnFyMiWBVnFIqXKwQMWkuxkyoTgEknWAiiQ1skzEAZSMFuY4AAAA="}@@;
+
+$udf = Udf(Protobuf::Parse, $config as TypeConfig);
+
+SELECT $udf(TestField) AS Profile
+FROM plato.Input;
+
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.in b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.in
new file mode 100644
index 00000000000..2487eb8da31
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.in
@@ -0,0 +1,4 @@
+{"TestField"="{}"};
+{"TestField"="{\"inner\": {\"l\": 25}}"};
+{"TestField"="{\"inner\": {\"l\": \"B\"}}"};
+{"TestField"="{\"inner\": {\"alphabet\": [0, 1, 25]}"};
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.in.attr
new file mode 100644
index 00000000000..f10d440a236
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.in.attr
@@ -0,0 +1 @@
+{schema=[{name=TestField;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.sql b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.sql
new file mode 100644
index 00000000000..f869d0a3dd8
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.sql
@@ -0,0 +1,24 @@
+/* syntax version 1 */
+
+/*
+message Test {
+ enum Letters {
+ A = 0;
+ B = 1;
+ Z = 25;
+ }
+ message Inner {
+ repeated Letters alphabet = 1;
+ Letters l = 2;
+ }
+ Inner inner = 1;
+}
+*/
+
+$config = @@{"name":"Test","view":{"enum":"full_name"},"syntax":{"aware":true},"lists":{"optional":false},"format":"json","meta":"H4sIAAAAAAAAA+PayMgll1Wal61fppubmG1sXKZfUJRfkq9fklpcogdmKrUxcrGEALlCUlysmXl5qUUSjAqMGtxG3HogUT1PkJCUExcrmCEkz8WRmFOQkZiUWgJUx6zBZ8QLUeeTWlKSWlQsJMHFmCPBBDQBXUZJjosdpoiVi9FRgAFEOQkwgqgoAckkNrB7jAFNZK4ztAAAAA=="}@@;
+
+$udf = Udf(Protobuf::Parse, $config as TypeConfig);
+
+SELECT $udf(TestField) AS Profile
+FROM plato.Input;
+
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_any.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_any.in
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_any.in
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_any.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_any.sql
new file mode 100644
index 00000000000..ccc9af6f304
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_any.sql
@@ -0,0 +1,43 @@
+/*
+syntax = 'proto3';
+
+import "yt/yt_proto/yt/formats/extension.proto";
+
+message Test {
+ bytes Any = 1 [(NYT.flags) = ANY];
+ string x = 2;
+}
+*/
+
+$config = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrFWltvG8cVDq8iDyVquZJtRo7rhEks2YmpwLlWbtPwspLp8NYlmUQGgsWKHFFrk7vM7tI2jaIo0Kc+9rUoiqIvAfoDChR9aVCgjwUKtHlqgaIt0P6EPvbMzO5ylxeLCZAkDwr3zLnNmW/mnDNj+MNteL5vGP0B2R+Zhm2cjE/3e8TqmtrINsw8o4mbnCPvcuRqkDnUBqTsMbaILb4D0VMkZkPPR/ZSt17KzwjlgxJNSpaZRO7fUdhaMCqKENXVIdUY2kvK7LeYhbWR2n2g9kk2zMjup/gtgB4ZEb1H9O4kG0EvkrKPIr4CmdH4ZKB1FR8bIFtMFvhAecq8C5uPiPrAz5pirGlK9jGWYH1ILAsdUOzJiGSjbPbPz81+duYpR6qNQmIBkkQfD7mG2JL4ScgxqyVBxRwVaxYxH2pdko0zBbtzClp8fFaHK4dTSZLHNtEtzdCza0zJywtWkQx6syqmcuJbsGaMbPxlZRO4Pqlbzy0EQoPzyC6zWAHBMsZmlyhdo0cUTT81skmm4Or8RBhjCfkqyCanrcC3eBHi1kS31cfZdYYQ5yv32zhsrgKx2xA7pbNEgH2BGHCZYBDjXzKIBUjpxLJJjyMisiKmgAvNQyr6pSD1EWx6LimmqvddbO6f50lecuVkKianSeBbLAMYOjFOcXt1B4iTxVFqUJa5KBmc2h2I355CbW0JUmp8k82hrQNpk1DcY4j5zJLMify5M5MdMT6xDdP/Kb4IHkFhsAJ2Cq27xDrSdp5AOhgecRtilq2aNkNhTOYfogARPGTYKReT6U/xvemEI2zC1+ZXNKB5dt47b8NGYAKrms79AC4sVI0g2R7rmm4Tc2QSilhuKvuftSWY6/i5uRZ5azxPvJFM/HdN+BH+F859FoftRXtm4fbF7Y8IPiEmC1JMdr5wR8QG6gkZ4G4I7aVvvbLSrsxXqYjMJcV3Ieoc0VTDjdU00L0kMznxMiTp/zk24sznBCVQXIg7kGDbpEfc1OZ9U2D1yKk6HtjKQ3UwJgzwCCyH+AGliVchxXeVhjKP2ekZk/lGq1AKNX/fwr3sQJOZoARm/u3Zg/vK4unN7SVMlYzjdWfp1UE2gwoScpqTGw4195swRNnBsgmp9nFTUsqNTrEqCSExDcAIh9VGoS2Eve9Kvf3WG0LEE+hwQtTP8PotIYaAXecKKh9JZeSIBynIsyZuQJJRio1GVUh4OlttuVI/EpKeziO50WkK4GmoSa1W4UgSUh5H8bgttYT1gFtoYsMzIdU7NSEtZmCDm3Cd2JwhoafC1BGuJRMgIIeYK0GMwRDhnq4WilJVaTTblUa9UMXYeTRZ+n6nIktljJ+P1pQKbaRFcl3YXnSgLtxCPiyEl2CB6ZrFQu5fYdhakFQWGvkexDiWeZq9vjA7MWTPpVom5y81IktKDapiDrAfzx3+PD++tUp+ZLQvlgRiC5LAbcjMKVr5MP5xCLLLgnPOkRgOHIm3ZyP4wvJFmFvrT0NwcXFJudCHdyE+JPaZ4ZZV1xYkazo8u9iOlD/bR5bVhdybOU9/EoYLC5UvdPQKgKaPxjYvnfhJnGQUdnjRU3Zse+MRNg6cxBjemToaZY5+a8lM54D5GgjdgUZ0W7Fsk6hDTe+zVJM4iJ2qA4vIm3y45Y5SCQYg0ycRD0jwYU8i99MkpHwFuPgCrN9XH6qK21TxSKQorek0Vq/BNmPBOaKh7kC1LBa0BGMV6ViDDpXcEfFN2GISQ8xN2mhAFNrmWSzleJ5lKEfNYaAeWVgWXmFifaITU7WJQj4ZI6+i6j3lTLXOsttUQTGcDcnPUsYjh09ibAW9dweZxAO4yLRgRHDCSveMdB8oY/v0nexlv33mYYvxlChLBznEFqzTxRhqT9Bnw2Q5NL3gaPJFMN9wBGrYfxzEWk1JKsspV8uhYVJA9Q0vwCkOqL7hhheD1e3yOWNv6jRjVlYIBKvbPeIMDsYt3A8XpsHyC2bmZjkrihZHk3lBMWBxNJkVexu2R2ejebkbfjkRWWYFX2aduUm6uFa97CU/u29AzCP8uwrR1RNEjGriDyt7lTFHbXOMXUS3K7HBAhsTb0DGOLnf5YhUUM2p9jj7EgvvJh1geGwysngddVtnqjliR7KFi0GyL3NWTq+7ZLojrEfaqe1q3OU7gtEcbXsg0EgEDO8xtjTS/XYxGVDOqdHrvHBD4tTiG3CRMuFBp/ZUW/Vxv8q4adhrzmDAT3N8MvGAdZP7SWkutL6y4jx3AOt+3ItJ4MjHggSLoFKjTMuXexLWIlhGVSttSZE79XalJgkRX2F/N5q4JuzSqiEd7NTE78Al91rFIrbySDPZhhyqPDl6+Nl2uFrE/hB5DhmLWIWruoEHAB4cqtlTphdaitpFQFoGT4Selud0o+UwTzNEwWGdgW9kGXyxuh6qI8SvbU5YfZ6QE0iQ6PfX0iZhNKNCDP/GhDj+jQtr+DchJPFvUoDcPyOw7q/gaUPUZTksxE65F59a7+dLNLkdxHm5LHNJWlhQ+BFeniRk50s8gvh9i+mOM90vPV333RZTnrzbUuoNuVaoyo64+CxEB+qTSTANMtKqy4Ia6JVdMPkw0le4PfYhxuIlAjgRE54RExAtNWS6RXBPcKrSrEgl3CW5NyHOg0C3jxcGFOKfjo6QO9qpFSVZCM8tfs7CfemrzL+e9vz3IUj5Km1aIqmDgfFIUQeaajnQAEYqUMqqS/c1bRrcLrlfhECYLXVn3Ax9k27mfh6CdLC+nXHvhW/UvX+EYSNQ1a7q3SeQ0XpkODJsep2uDMhDMsjm2KGx//S6OV+ZylWp2MFWpSzVmo22VC8dK536+/XGh3VZ0GbYvsJt3wRh1inxEixyC3f2FmzWG5glMVVKh4dSqd3iNyEedzuwwXM/i8DWAk/wGOc9DG+rbq7ifZ5WEU1sLp2WB6sjjJJua6caVvi8K+eNzeaUzi+ZXgVxZFiarT2kl/TudRRtdKKy4I5UdNvj1klfneGmh3lEFtwRjxsrmp4xptUf56O5IySnOM1jcer66T3YOhZnjMZZdmFT7fdNqtxVxDuVtEdmjDt3IeHGgSZvGgksplj7HaZXY7o7iEY1S5le64dxPCGnNMu7Es19iiVM8FkCu5nEwECQUwn+JrZ3zktGvurwy57kzp9CkHDJmG6jI9U+Y+pixbAQktk3pWNNqDMIOHT6Tdd1QNQea4OM4RBX0nLX1aGXHDJ9HbNNVRsEeKOMV3AHPOYDeNbV28O6FFus3lQozq47LjkMZWfclc39OQQZt3HrecGqAai6btj+cM1DeU4uX/CEZJ+CnSHAdGRp2DBPOW9O7OGSt/rASbTDoxcyJ6Sv6c5NMv9wL2Si3oVM8YfYwhnDWXeLwsx1g3UndO+mw9Q3Bqrezxtmf/rwSisey/f8Ojr5Xyj0q3DkqFn8dXjniAs23WDI5HRAunSC8NkGXJvY+xPas+Ao/tjnxbK176HWecuN1I/bO+c++eb+FoaM9KGpjkbEZKXa4UDt5/4YhijNkuIaRAr1YzzEMrDRaN+RZKXUqHZqdXqErUOC3n/Su1ks/zexNKBfTgUTwZgKLUmuFKqVewV6iakct4WouAMXg9Sm3Gg3ip1DIUZtuHebSrXSagtxZta5AuWkNdykV2qFplJoMYLSOKQmO3iqKlXpqFA6FhLUyGIWIUn9dMbKlVJbADEL2w7Bs8RGUmyCWH+Vy9jurIvPQZZP8P1K0z3dlQ8K1Y7UEjbECxhGOlq6I5Xed8np3APYcqPrND4svm0nvK/AbllqylKJ3t0qrYbcVg4rUrXcou5UnOAgqSrhElyFy36O4jH/5RaJodx3p0vJakNmas8xhVmoJTULMlpyNOCcUrD2QQFXA1dw48CGLN0eivsAwd8YTlGJJT71cTf7l9/Rl8v0rct5BF1+zgd+lXrhlL38M93ekDVnlb2urmT1rwuteiCet+oNWQdPYCdg1e1BV7H7uWP3SsCub3m55Us+y75B60CGGDfz9IeXJVGdmR9XdfAepLrGYDzkbz7naaaRY0cgl6HJ8eAINh+QifIFtHzuaNlAudJUkQ5bixB03pvtaiDK9OYA5LPnx8659laCj2vPBx0CG0G0nGtpRcC4/76Em/nYfeRbhhV/+7da8PgTIVdfA/Ghamqq7kZtyYoHrLi4ERxRFhW66MXr93bV/ESlr483baIO8+Z4f3GigjNITGxuYGfFZJZ7FaJtYtmY4CMFfcI6tPVi9Jd/v/yMTAl4UoceO2k99Pgkzt8j/w99CcIy",
+ "view": {
+ "recursion": "fail",
+ "enum": "number",
+ "yt_mode": true
+ }
+}
+@@;
+
+$udfParse = Udf(Protobuf::Parse, $config as TypeConfig);
+$udfSerialize = Udf(Protobuf::Serialize, $config as TypeConfig);
+
+$data = @@
+{
+ "Any": "<x=y>{a=1;b=c}",
+ "x": "aaa"
+}
+@@;
+
+SELECT
+ $data,
+ $udfParse($data),
+ $udfSerialize($udfParse($data)),
+ Ensure("Success", StablePickle($udfParse($data)) == StablePickle($udfParse($udfSerialize($udfParse($data)))), "Fail")
+;
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_enum.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_enum.in
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_enum.in
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_enum.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_enum.sql
new file mode 100644
index 00000000000..f3b2935cc64
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_enum.sql
@@ -0,0 +1,50 @@
+/*
+import "yt/yt_proto/yt/formats/extension.proto";
+
+message Test {
+ enum Color
+ {
+ WHITE = 0;
+ BLUE = 1;
+ RED = -1;
+ }
+
+ required Color ColorYtIntField = 1 [(NYT.flags) = ENUM_INT];
+ required Color ColorYtStringField = 2 [(NYT.flags) = ENUM_STRING];
+ required Color ColorField = 3;
+}
+*/
+
+$config = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrFWt2PG1cVx59rH+/a49lN4m4a0rptdpM23ir9ZAOl/pjdOPUXY7vpRqpGs/Zdx4k9486Mk2yEEBJPPPKGEEKIl0r8AUiIFyokHpGQoE8gIYoEfwJvcO69M+MZe5x1K7Xtw9Zzvu+5v3vPufcG/nATnhvo+mBE9iaGbunH05O9PjF7xnBi6UaB0cQMlyg4Evk6ZA+GI1JxBdvEEt+G6AkSc6HnIrupGy8W5pQKfo0WJctMI/+vKGwGcEURopo6phZDu0mZ/RZzsDZRew/UAcmFGdn5FL8J0CcTovWJ1jvNRTCKpOyhiC9DdjI9Hg17ikcMUCwmC5xRmQnvQOYRUR94RVNMNE3JHsEyrI+JaWIAinU6IbkoG/1zC6OfH3nK1uqgkliEJNGmY24htiR/EkrMW0lQNdvEmkmMh8MeycWZgZ0FA23On7fh6OFQkuSxRTRzqGu5NWbkpYBZJKP+vImZnvgmrOkTC3+ZuQTOT+rGs4FAaHIZ2REWqyCY+tToEaWn94ky1E70XJIZuLw4ECZYRrkqislp0/ctnoe4eapZ6uPcOkOI/ZX/bRwyq0DsJsRO6CgRYJ8jB1zHn8T4F0xiEVIaMS3S54iIrIgp4EqLkIp+IUh9ABk3JMVQtYGDzb2zIilIjp5M1eQ08X2LFQBdI/oJLq/eCHESnKUmFVnIks6pvZH4rRnU1pYgpc4X2QLaupA2CMU9ppiPLMmCKJw5MtlW4wPbMLyf4gvgEhQGK2C70LpDbCBt+wmk/ekRtyBmWqphMRTGZP4hChDBTYbtcjGZ/hTfnQ04wgZ8ZXFGfZbnx739Fmz4BrCq6/z34VygaQTJ1lQbahYxJgahiOWucv9eW4K5rleaW5E3p4vEa8nEf9aEH+J/4fwncdgKWjOByxeXPyL4mBgsSTHZ/sIVERupx2SEqyG0m77x8kqrslCjKjLXFN+BqL1FUwvXVrNA15LM9MSLkKT/59iIs5gTlEBxIW5Dgi2TPnFKm/tNgdUnJ+p0ZCkP1dGUMMAjsGzi+5QmXoYUX1VD1HnMds+YzBdalVKo+/smrmUbmswFJTD3b81v3JeCh7ewlrBUMonX7KlXR7ksGkjIaU5u2tT8b8IQZRtLBlKdo5akVJrdUk0SQmIagBEOas1iRwi739VG583XhYir0OWEqFfgtRtCDAG7zg1UP5AqKBH3U1BmTdyAJKOUms2akHBttjtytXEoJF2bh3Kz2xLAtVCX2u3ioSSkXInSUUdqC+u+sNDFhutCanTrQlrMwgZ34QSRmSNhpMIsEG4l6yOghJgvQ4zBEOGerhVLUk1ptjrVZqNYw9y5NFn6XrcqSxXMn4fWkoodpEXyPdgK2lADl5AHC+ElWGC25rGQ/ywMmwFFJdDJdyHGsczL7NXA6sSQvVBqmZ631YgsaTWoiQXAfriw+fP6+OYq9ZHRPl8RiAUUgZuQXTC08mb8oxDkliXnjC0x7NsSb85n8Pnlk7Aw1x+H4HxwSxkYwzsQHxPrnu60VVcCijVlz0+2reWt9pFlfSGPZiHSH4fhXKDxwEAvAQy1ydTirRPfiZOMwjYvustOLZcfYXzgJCbw9izQKAv0m0tGugDMV0HojYZEsxTTMog6HmoDVmoS+7ETdWQSOcPZbYdLNRiADI9G3KfB2a5G/idJSHkacPF5WL+vPlQV51DFM5GitJZ9sHoVtpgIjhEd9UaqabKkJZioSHlNyio7HPEN2GQaY6xNw8mIKPSYZ7KS40aWpRJ1W4BGZGJbeImpDYhGDNUiCvloirKKqvWVe6p5L7dFDZTCuZD8DBU8tOUkJlbU+rdQSNyH88wKZgQHrPTukd4DZWqdvJ276PXPImwzmTIV6aKE2IZ1Ohnj4ROMWTdYDU0HbE2eDBaatkIdzx/7sXZLkipyyrFyoBsUUAPdTXCKA2qgO+nFZPV6fMx4NrUPY2ZO8CWr1zvkAjbGTVwP52bJ8ipmF0Y5r4oeJ6eLiqLP4+R0Xu0t2JrcmyzqXfPqiSgyr/gSO5kbpIdz1c9d8Ip7GGIB4d9TiKYeI2JUA3+YuctMOGoZUzxF9HoSYxYZT7wGWf34fo8jUkEzJ8PHuRdZejOUwfDYYmTxKto276nGhG3JJk4Gyb3ERTm94ZDpijAfDU8sx+IOXxGMZlvbBYFmwud4l4mlke71i8WASs6cXuWNGxJnHl+H81QINzq1r1qqR/oVJk3TXreZvjiN6fGpC6zrPE5Kc6D1pTXn+X1Y9+JeTAJHPjYk2ASVmxXavtyVsBfBNqpW7UiK3G10qnVJiHga+9vRxBVhh3YNaf9JTfw2XHCuVUxiKY+GBluQY5UXRxc/W7ZUm1h3UOaAiYg1uKzpuAHgxqEafWV2oaWoPQSkqfNC6Fp5VtPbtvCsQhRt0Tn4RpbBF7vrsTpB/FrGKevPE3ICCRL9/kqOSZjNqBDDvzEhjn/jwhr+TQhJ/JsUIP/PCKx7O3h6IOqxGhZiu9wLT+33C2Va3PbjvF2WuSZtLCj8CG9PErL9JR5C/L7JbMeZ7Refbvt2mxlP3m4rjaZcL9ZkW118BqIj9cmpvwwy0qrTghbolZ2/+DDSl7g89iDG8iUC2BkTviEmIFpuynSJ4JrgVKVVlcq4SvJvQJwngS4fNw2oxD9tGyGH262XJFkIL0x+3sR16enMv5rj+e9DkPJ02rRFUkcj/ZGijoaqaUMDGKlIKatO3Ve0aHC55H8RAmG+1Z0LM/R1hpn/eQjS/v52Lrznv9bw/hGGDV9Xu2p0H0F22CfjiW7R63RlRB6SUS7PNo29p/fNhepMr0bV9jerFaneanakRvlI6TbeazTvNGRhOCf2JS77FgjzQYkXICgsXNmbkGk0sUpiqZQODqRyp81vQlzpjm+B538Wgc2ASHAb52cYfqy6vkr0BdpFtPBwaR95sDvCLGnW8GSIHT4/lfODTWZG55dMr4A40c2hNXxIL+md6yh60InKgsOpapYrrZGBOidNN/OILDgcVxo7mr4+pd0fl6O1IySnOM0Vsfv62T3YOjZnjMZFdiCjDgYGNe4Y4ieVtEtmgtu3IeHkgRZvmglsptjxO0yvxjSHiU6HpjK71g8jPyGnhqZ7JZr/GFsY/7MEnmYSIx1BTjX4m9juGS8ZhZotL7ua238KQcIhY7mNTlTrHjMXK4WFkMy+KR17Qo1BwKbTbzqvI6L22TFIH49xJk1nXm162SbT1zHLUIcjn2yUyQoOwxXeh2ccu33sS/GI1Z8pxdl1xwVboGLzHd38n0OQdQ5ufTdZdQBV03TLm65FKC/oFYqukuwxsD0GmHGWpg3rlP3mxB4u+VEfOIme8OiFzDEZDDX7Jpl/OBcyUfdCpvQDPMLp4/lwS8LcdYN5K3T3ui000EeqNijoxmD28Eo7HtPz/Do5/m8o9Ktw5LBV+nV4+5ArtpxkyORkRHp0gPDJBlw5tfZO6ZkFufhjjzfL5p6LWvstN9I46myf+eSb/1sYstIdQ51MiMFatYOROsj/MQxRWiXFNYgUG0e4iWVho9m5JclKuVnr1ht0C1uHBL3/pHez2P5nsDWgX3YHE8GcCm1JrhZr1btFeompHHWEqLgN5/3UltzsNEvdAyFGfTh3m0qt2u4IcebWvgLlpDVcpJfqxZZSbDOC0jygLru4qyo16bBYPhIS1EmwiJCkcdq8SrXcEUDMwZZNcD0xTooNEPuvSgWPO+vis5DjA3yv2nJ2d+X9Yq0rtYUN8RymkXLLt6Tyew45nX8Am0527YMPy2/HTu/LsFORWrJUpne3Srspd5SDqlSrtGk4VTs5SKpJOAWX4aJXonTEfzlNYij/ndlUst6Qudq1XWEVakutooyebAs4phSsvV/E2cAZ3Ni3IEeXh+I8QPA3hhM0YopPfdzN/eV39OUyfeNiAUFXWIiBX6WeO2Ev/8y2yzIXvLLX1ZW8/jXQqwviRa8uy9x/Ats+r84ZdBW/n9p+L/n8eqaXe77g8exhmvsyxLibpz+8LMnq3Pi4qf13IdXTR9Mxf/M5yzLNHNsCuQ4tjvuHkHlATpXPYeVT28oG6pVnhjTYDELQWW+2q4Eo218AkMefFztn+lsJPo4/D3QIbPjRcqanFQHj/PsS7uZD55FvGVa8x7/VksefCLn5OogPVWOoak7Wlsy4z4uDG8FWZVmhk166endHLZyq9PXxukXUccGY7gUXKvhpGBKnFvewvWI1y38WgmiHmJZ4EzKINd04sqq2f9bFpW+kCpRfYMxS9Jd/vxiW5yWxgxZtEr8PPrD/9UegfkQOEMYdGxiVq0YWVGUPO1+AGPuit2Z3blU7Er8cKGFxYJcDEfqK9z/nv9D/AR5f/qs=",
+ "view": {
+ "recursion": "fail",
+ "enum": "number",
+ "yt_mode": true
+ }
+}
+@@;
+
+$udfParse = Udf(Protobuf::Parse, $config as TypeConfig);
+$udfSerialize = Udf(Protobuf::Serialize, $config as TypeConfig);
+
+$data = @@
+{
+ "ColorYtIntField": 1,
+ "ColorYtStringField": "RED",
+ "ColorField": 0
+}
+@@;
+
+SELECT
+ $data,
+ $udfParse($data),
+ $udfSerialize($udfParse($data)),
+ Ensure("Success", StablePickle($udfParse($data)) == StablePickle($udfParse($udfSerialize($udfParse($data)))), "Fail")
+;
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.in
new file mode 100644
index 00000000000..db7b187bc30
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.in
@@ -0,0 +1 @@
+{"TestField"="{\"dict1\":[{\"key\":\"k1\",\"value\":{\"a\":\"1\"}}],\"dict2\":[{\"key\":\"k2\",\"value\":{\"a\":\"2\"}}],\"dict3\":[{\"key\":\"k3\",\"value\":{\"a\":\"3\"}}],\"dict4\":[{\"key\":\"k4\",\"value\":{\"a\":\"4\"}}],\"dict5\":[{\"key\":\"k5\",\"value\":\"v5\"}],\"dict6\":[{\"key\":\"k6\",\"value\":\"v6\"}],\"dict7\":[{\"key\":\"k7\",\"value\":\"v7\"}],\"dict8\":[{\"key\":\"k8\",\"value\":\"v8\"}]}"};
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.in.attr
new file mode 100644
index 00000000000..f10d440a236
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.in.attr
@@ -0,0 +1 @@
+{schema=[{name=TestField;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.sql
new file mode 100644
index 00000000000..b3c7377390d
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.sql
@@ -0,0 +1,47 @@
+/*
+syntax='proto3';
+
+import "yt/yt_proto/yt/formats/extension.proto";
+
+message Test {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ message Inner {
+ string a = 1;
+ }
+ map<string, Inner> dict1 = 1 [(NYT.flags) = MAP_AS_DICT];
+ map<string, Inner> dict2 = 2 [(NYT.flags) = MAP_AS_OPTIONAL_DICT];
+ map<string, Inner> dict3 = 3 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS_LEGACY];
+ map<string, Inner> dict4 = 4 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS];
+ map<string, string> dict5 = 5 [(NYT.flags) = MAP_AS_DICT];
+ map<string, string> dict6 = 6 [(NYT.flags) = MAP_AS_OPTIONAL_DICT];
+ map<string, string> dict7 = 7 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS_LEGACY];
+ map<string, string> dict8 = 8 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS];
+}
+*/
+
+$config = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrFWktvG9cVLp8iDyVqNJJtRo7rhHnYcWIqlV+q3KahyJFCl68OqSQyEAxGwyuKNjnDzAxty+iiQFddFVkVCIqi6CZFf0CBopsW3RcI0GZTBCjaAu1P6LLnPmY4fFm0m0cWDufc7zzuuefec869gj/dhhfaltXuko2+bbnW4eBoo0Ucw+70XcvOMZq8zBE5D5GtwMpup0uKPrBBXHkLokdIzIReiFxObb6cG2PKjXLUKVllHNl/RWF1yqgsQ9TUe1Ri6HJSZb/lDCz0deO+3iaZMCN7n/I3AVqkT8wWMY2TTAStSKoBivw6rPQHh92OoQVggLCYKvGB4hB8CZYfEv1+EJpi0DQlB4AFWOwRx0EDNPekTzJRNvsXJmY/PvOU4Goik5yHJDEHPS4hNsN/CiLGpSQomxCx4BD7QccgmTgTcGlCQIOPj8vw+HAqSfLIJabTsczMAhPyypRVJN3WuIghn3wTFqy+i7+cTALXJ7X5/NRAqHGM6oHlEkiONbANohlWi2gd88jKJJmAi5MTYcAC4koIU9POyLd8FuLOienqjzKLLELEV/Z3cVieJ8RuQ+yIzhID7Cl8wHlGnRh/RifmIWUSxyUtHhGROWMKONNkSEWfKaTeh2XfJM3WzbYXmxunWZJTPD6VsqlpMvItFwEsk1hHuL2MLsbJdC/VKGTCSxanGl3528NQW5gRKRW+ySaibR/SNqFxjy7mM0syI3KnzkwVbHxiS3bwU34JfILGwgrYKbToEatIW38M6VH3yGsQc1zddlkUxlT+IUsQwUOGnXIxlf6U3x5OOMIm/Orkio5IHp/3+i1YGpnAvKqzP4QzU0VjkKwNzI7pErtvExqxXFXm3wszYm4/iOZS1NXBJPFKMvGfBelH+F84+8c4rE3bM1O3L25/jOBDYjMnxVTxhTsi1tUPSRd3Q+hyevP1uXZlrkxZVM4pvwVRcURTCVfmk0D3ksr45POQpP/nsRFnNicogcaFvA4Jtk1axEtt/jcNrBY50gddV3ugdweEBTwGliC+S2nyRUjxXdVBnkfs9IypfKOVKIWqv+fgXhahyVRQAlN/a/zgvjB9ehN7CVMlQ1wTS693MysoIKGmObkmqNnfhiHKDpZlSDUP6opWrO3vlBUpJKcBGGG3XMs3pbD/Xao2b16XIj7DPidEg4Brm1IMA3aRCyi9rxQRER+lIGZBXoIko+zUamUp4ctsNNVSdU9K+jL31Np+XQJfQkVpNPJ7ipTyETsHTaUhLY6YhSqWfBVKdb8ipeUVWOIqPCOWx0hoqTQ0hEtZGSEgQs4WIMbCEMM9Xc7vKGWtVm+WatV8GX3n01TlB/slVSmi/wK0upJvIi2SNWBt2oE6dQsFYiE8IxaYrPFYyP4zDKtTkspUJd+DGI9lnmZfm5qdWGRPpFrGFyw1IjNKDSpiImA/mDj8eX68OU9+ZLSnSwKxKUngNqxMCJr7MP5xCDKznHPKkRgeORJvj3vwxdmLMLHWn4Tg7PSScqoNb0G8R9xjyyurXp2SrOnw+GILrmC2j8yqC7k1E5b+JAxnpgqfaugFgI7ZH7i8dOIncZJR2OFFT9mB649H2DhwEgNsDQ2NMkO/OWOmE4H5JkhGt0NMV3Ncm+i9jtlmqSaxHTvSuw5Rl/lwwxulHCyA7ABHfISDD/sc2Y+SkAoU4PKLsHhPf6BrXlPFPZGitLporN6ENQbBOaIio6s7DnNagkFlOlajQwVvRL4Bq4yjh7mp0+8SjbZ5Dks5vmUrFFERAGqRg2XhBcbWJiaxdZdo5MMBYjXdbGnHunOcWaMCdsKZkPocBe4JnMJgebP1DoLkbTjLpKBHcMKacUyM+9rAPdrKnA/qZxY2GKZAIfuIkBuwSBej13mMNls2y6HpKUdTwIO5mmCoYP+xHWvUFaWopjwpu5ZNA6pt+Q5O8YBqW5570VmGweeMvaloxpyMNOIsw9jjABHjDu6HM0NnBRlXJmY5zooa+yeTjPKIxv7JONstWOsf9yf5rgT5ZISMM77COnObGLhWrcy5IDwwIOcw/A2NmPohRoxu4w8nc5GBo649wC7CMBQ2mGdj8hVYsQ7vGTwiNRRz1HmUeZm5d5kOsHisM7L8Gsp2jnW7z45kBxeDZF7hUE6vemS6I5yHnSPXk3iJ7whGE9Iug0Q9MaL4MoOlkR7Ui8mAIodKX+OFGxKHGq/DWQrCg05v6a4eQL/B0NTtFTE4Yqc9ODzxA+sqt5PSvND60orz7DYsBuNeTgKPfCxIsAgq1Iq0fLmrYC2CZVS51FQ0db/aLFUUKRIo7O9EE69Kl7KfhiE92qnJ34Fz3rWKQ1ztYcdmG7Kn8+Tox8+aQDWI+x5idhlELsNF08IDAA8O3W5pwwstTTcwIB2LJ0JfyvOm1RDgYYbIC+hY+EZmhS9W1z29j/Hr2iesPk+oCSQo9PsraZPQmwkpif8mJcj+IwKLwXqdtj8Gy1ghdqa99MTqPlegqWw7zotjlXPSMoIGG+HFSEIVX/IexO85THacyX75ybLvNJjw5J2GVq2plXxZFezycxDt6o9PRpMeI827CCiBXtCNphpG+hI3wwbEmL9kAOEx6RtyAqKFmko3BO4ATtXqJaWAeyJ7A+LcCXSz+G5AJv4pZIS80f3KjqJK4dGljkqxrIO7MFCHfzXN+B9CkArU1bQg0rtd66Gmdzu6I0IDGClPKfMu3Ve0RWJSPPuLEEjjhe2YmaGv08zsz0OQHq1mx8x78Ws17+9hWBqpYee17kNY6bRIr2+59PJc65IHpJvJskNj48lVcq405CtTtu3VUlGp1GtNpVo40Par36/W3quqUmcM9iVu+zpI40bJ52CaWbizV2G5WsOciIlR2d1VCs0Gv/fw0c2RDZ79OAKrUyzBY5x3LLyJujqP9TlaM9SxlRQNDtZC6CXT7Rx1sJ7nPThvY5aHdH6l9AbIfcvpuJ0H9Ereu3yibU1UlbyRkun6aJO09TE0PcwjquSN+GisX1rWgNZ6HEdzR0hNcZoPEVX88NZrEUsxRuOQS7Cst9s2Fe4J4n1J2icz4PodSHh+oKmaegJLJ9Zsh+lFmOkNotKOow0v8cM4nlBTHce/AM1+ggXL6CME9i6JroVBTjn4C9jlU94tcmWBV33O9T+HIOGRMd1G+7p7zMTFdsJSSGXflI4VoMlCQNDpN13XLtFbrOmxej1cScdbV0EvCDJ9C3NtvdMdwUYZVvIGfPA2POfJbWEVig1Va8gUZ5cb5wSgKMY93uynIVjx2rSW76wKgG6alht012QoT/Dl8j6TGhCw3gMYjsx0G+Yp8cLEnil5Yw+cRPs5ev1ySNodU9wb8w/v+iXqX7/s/DSEHZvVG7d3Rxq7XXDeCd19q91xjweHOcRvtK2ubraH76zsh3EV+6mrbSvw6np7+PO/odCvwpG9+s5vwut7XF3dc49KjrrEoFOGj5fgKpaaNmkNDLJx4m6wM+AIuwSuxNnww1k86UaqB831U19+aaCvKO/Zer9PbFbD7Xb1dvajMERp+pQXIJKvHuDptgJLteY7iqoVauX9SpWebYuQoNeg9IoWu4BlrBnolyhtIuhsqaGopXy5dDdP7zK1g6YUldfh7Ci1rtaatZ39XSlGdXhXnFq51GhKcaZW3IRy0gLu3guVfF3LNxhBq+1Slft43GplZS9fOJASVMl0iJSkdoqxYqnQlEDOwJog+JrYSIpNEAuzYhG7nsXsfVj1HCVaGeaqpvDU63CpqNRVpUBvY7VGTW1quyWlXGxQySUxTySVFfTmRTgfROwc8F9eIRjKfne4Kqz+Y6ouC1WYaRpKPa+iJiFBWpRTsPBuHh2Li7G07UKGbgHNe1LgrwZHKMSRn/hcm/nL7+lbZHrzfA7jJzdhA78cPXPE3vKZbH/ImdDK3kvn0vrXqVr9eJzU6g85249hfUSr11XOo/czoffCiN7A8nLN5wKaA4POtgoxrubJTykzvDo2Py5q+21IGVZ30OOvOKdJpp5jxxznoQlwew+W75MT7SmkfCakLCFfYSjIhNVpEXTaK+x8QbTSmgiggL5g7Jyqb67w8fQFQofA0mi0nKppzoDx/mKEq/nAe7abFSvBFm8+5/FHPy6+AvID3e7opue1GSs+osWLG0mwMq/QRd95825Oz53o9D3xqkv0Xs4ebDwx9cDfFiBx4nJF60+XprI/W4BokzgulpaxVsdwvyWKBSlHqbkiJbELlp3oLz8/DyoHeehNUSUH0JtDdIqjNz30NfF3FgH0tSE6wdHXPPR18WgUQF8fopMcfd1D3xB/ORFA35iw+4aHvin+cCSAvjlh900PfUv8rU4AfWvC7lseekv8uUUAvTVh99b6GYiVTCy/MMuFdPEoENLXFYChz2lphCeCGKU/Mfv6b3r05SPFlTBJ4tVuO7wV8sRsfjFirn0xYq7/n2K2uJgbs8SsBcUkp3DefGbOW8/MufXUnNvRX39+PnoY54/7/wMXV6Lr",
+ "view": {
+ "recursion": "bytes",
+ "enum": "number",
+ "yt_mode": true
+ }
+}
+@@;
+
+$udfParse = Udf(Protobuf::Parse, $config as TypeConfig);
+$udfSerialize = Udf(Protobuf::Serialize, $config as TypeConfig);
+
+SELECT
+ TestField,
+ $udfParse(TestField),
+ $udfSerialize($udfParse(TestField)),
+ Ensure("Success", StablePickle($udfParse(TestField)) == StablePickle($udfParse($udfSerialize($udfParse(TestField)))), "Fail"),
+FROM plato.Input;
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.in
new file mode 100644
index 00000000000..b6dd409ad4e
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.in
@@ -0,0 +1 @@
+{"TestField"="{\"inner\":{\"i\":{\"a\":\"hello\"}},\"test\":{\"inner\":{\"i\":{\"a\":\"bye\"}}}}"};
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.in.attr
new file mode 100644
index 00000000000..f10d440a236
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.in.attr
@@ -0,0 +1 @@
+{schema=[{name=TestField;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.sql
new file mode 100644
index 00000000000..d75aa4af2fe
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.sql
@@ -0,0 +1,36 @@
+/*
+syntax='proto3';
+
+message Test {
+ message InnerInner {
+ string a = 1;
+ }
+ message Inner {
+ InnerInner i = 1;
+ }
+ Inner inner = 1;
+ Test test = 2;
+}
+*/
+
+$config = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrjWsjIxV2UmlxapFdQlF+Sr9TJyMUSklpcIqTIxZqZl5daJMGowKjBbcStBxLV8wQJBUFkhCS5WEqAghJMYBWsYBVBYCEpKS4usFIwIcTDxZgINocziDFRSp2LFSIsx8WYCTVeAMl4iB2MmUlsYCcZAwC/Qiqb",
+ "view": {
+ "recursion": "bytes",
+ "enum": "number",
+ "yt_mode": true
+ }
+}@@;
+
+$udfPar = Udf(Protobuf::Parse, $config as TypeConfig);
+$udfSer = Udf(Protobuf::Serialize, $config as TypeConfig);
+
+SELECT TestField, Ensure("Success", $udfPar(TestField) == $udfPar($udfSer($udfPar(TestField))), "Fail")
+FROM plato.Input;
+
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.in
new file mode 100644
index 00000000000..f397ecd77cf
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.in
@@ -0,0 +1 @@
+{"TestField"="{\"inner\":{\"i\":{\"a\":\"hello\"}}}"};
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.in.attr
new file mode 100644
index 00000000000..f10d440a236
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.in.attr
@@ -0,0 +1 @@
+{schema=[{name=TestField;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.sql
new file mode 100644
index 00000000000..87e9fc6bd26
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.sql
@@ -0,0 +1,113 @@
+/*
+syntax='proto3';
+
+message Test {
+ message InnerInner {
+ string a = 1;
+ }
+ message Inner {
+ InnerInner i = 1;
+ }
+ Inner inner = 1;
+}
+*/
+
+$configNO = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrjamXk4i7ISczM0ysoyi/JV8rjYglJLS4RUuRizczLSy2SYFRg1OA24tYDiep5goSCIDJSUlxcYD6YEOLhYkwEK+YMYkyUUudihQjLcTFmQs0QQDIDYhBjZhIb2FpjAIm1I9Q=",
+ "view": {
+ "recursion": "bytes",
+ "enum": "number",
+ "yt_mode": true
+ }
+}
+@@;
+
+/*
+syntax='proto3';
+
+import "yt/yt_proto/yt/formats/extension.proto";
+
+message Test {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ message InnerInner {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ string a = 1;
+ }
+ message Inner {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ InnerInner i = 1;
+ }
+ Inner inner = 1;
+}
+*/
+
+$configYT = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrFWs2P28YVrz5XetJqKe7aVtZxbCsf3jixNnA+u27TaCXuRq6+SmmTrIGA4EojmTZFKiRle42iCNBTj7kGRQ+9pOgfUKDopUUvPRUo0OZWoGgLtH9Cj30zQ1KkPrxygCQ5bMQ372ve/GbeezOGP96CK0PTHOpkd2yZjnkyGez2id2ztLFjWiVGEzc4R8njKDYgf6DppOozdogjvgPxARILkSuxnczNF0ozQqWwRJuSZSZR/HccNheMiiLEDXVENUZ20jL7LRZgbaz27qtDUogysvcpPgfQJ2Ni9InROy3E0Iu0HKCIr0B+PDnRtZ4SYANkS8gCH6hOma/BxkOi3g+yZhhrjpIDjBXIjohtowOKczomhTib/ZW52c/OPONKdVFILEOaGJMR15BYEj8JOWa1pKiYq2LNJtYDrUcKSabg2pyCDh+f1eHJ4VTS5JFDDFszjcIaU/LiglUken9WxVROfAvWzLGDv+xCCtcnc/PZhUBocR7ZYxZrINjmxOoRpWf2iaIZA7OQZgouz0+EMVaQr4Zscs4OfYvnIWmfGo76qJBlCHG/ir9NwsYqELsFiQGdJQLsKWLAZcJBTH7FIJYhYxDbIX2OiNiKmAIuNA+p+FeC1Eew4bukWKox9LC5e5YnJcmTk6mYnCOhb7EKYBrEHOD26umIk8VRalGWuSiZnNrTxe9Ooba2BCkNvsnm0HYEOYtQ3GOI+czSzInSmTOTXTE+sXUr+Ck+Dz5BYbACdgplPWITaduPIRcOj7gFCdtRLYehMCHzD1GAGB4y7JRLyPSn+N50wjE24ZfmVzSkeXbe22/DemgCq5ou/hjOLVSNINmaGJrhEGtsEYpYbqrwn7UlmDsKcnMt8uZknng9nfrvmvAp/hct/iEJW4v2zMLti9sfEXxCLBakhOx+4Y5I6OoJ0XE3RHZyN19ZaVeW6lRE5pLiuxB3j2iq4fpqGuhekpmceBHS9P8cG0nmc4oSKC7EbUixbdInXmrzvymw+mSgTnRHeaDqE8IAj8ByiR9QmngZMnxXaSjziJ2eCZlvtBqlUPP3bNzLLjSZCUpg5t+ePbgvLZ7e3F7CVMk4XneXXtULeVSQknOc3HKpxd9EIc4Olg3IdI/bklJtHe3XJSEi5gAY4aDeKneFqP9da3bfekOI+QJHnBAPMrx+U0ggYLNcQe0jqYocyTAFedbEdUgzyn6rVRdSvs5OV641D4W0r/NQbh21BfA1NKROp3woCRmfY/+4K3WEbMgtNLHum5CaRw0hJ+ZhnZvwnNiYIaGnwtQRriUfIiCHWKxAgsEQ4Z6rl/elutJqd2utZrmOsfNpsvSjo5osVTF+AVpbKneRFiv2YGvRgbpwCwWwEF2CBaZrFgvFf0Vhc0FSWWjkB5DgWOZp9uWF2Ykhey7VMrlgqRFbUmpQFXOA/Xju8Of58a1V8iOjPV0SSCxIArcgP6do5cP4pxEoLAvOGUdiNHQk3pqN4NXlizC31l9E4PziknKhD+9CckScu6ZXVr20IFnT4dnFdqWC2T62rC7k3sx5+rMonFuofKGjlwA0YzxxeOnET+I0o7DDi56yE8cfj7Fx4CTG8M7U0Thz9LklM50D5msg9HSNGI5iOxZRR5oxZKkmtZcYqLpN5A0+3PFGqQQDkBWQSIYk+LAvUfwsDZlAAS5ehew99YGqeE0Vj0SG0tpuY/UabDEWnCMa6umqbbOgpRirSMdadKjijYhvwiaTGGFu0sY6UWibZ7OU43uWpxwNl4F6ZGNZeImJDYlBLNUhCvlkgryKavSVu6p9t7BFFexHCxH5Gcp46PJJjK1s9N9HJnEPzjMtGBGcsNK7S3r3lYkzeKdwMWifedhhPBXKcoQcYgeydDFG2mP02bRYDs0tOJoCESy1XIEG9h97iU5bkqpyxtNyYFoUUEPTD3CGA2poeuHFYPV6fM7Ym7rNmF0QQsHq9Q45g4txG/fDuWmwgoL5uVnOiqLF8em8oBiyOD6dFXsbtsZ3x/Ny14NyIrLMCr7IOnOL9HCt+oULQfbAgFhC+PcUYqgniBjVwh924TJjjjvWBLuIXk9ig2U2Jl6HvHlyr8cRqaCagfao8AIL7wYdYHhsM7L4Muq276rWmB3JNi4GKbzIWTm96ZHpjrAfagPH03iN7whGc7XtgEAjETK8w9hySA/axWRAOadGX+aFGxKnFt+A85QJDzq1rzpqgPtVxk3D3nAHQ35ak5NTH1g3uJ+U5kHrayvOi3uQDeJeTANHPhYkWARVWlVavtyRsBbBMqpe60qKfNTs1hqSEAsU9rfjqZeEa7RqyIU7NfF7cMG7VrGJozzULLYhRypPjj5+tlyuDnE+RJ4DxiLW4bJh4gGAB4dq9ZXphZai9hCQtskToa/lWcPsuMzTDFF2WWfgG1sGX6yuR+oY8etYp6w+T8kpJEj0+xtpkzCacSGBfxNCEv8mhTX8mxLS+DctQPGfMcgGK3jaEPVYDouwU+75J9b7pQpNbntJXi7LXJIWFhR+hJcnKdn9Eg8hec9mupNM9wtP1n27w5Snb3eUZktulOuyKy4+A3FdfXwaToOMtOqyoAZ6ZRdOPoz0NW6PXUiweIkAbsSE74gpiFdaMt0iuCc4VWnXpArukuKbkORBoNvHDwMK8U9XR8QbPWrsS7IQnVv8oo37MlCZfzPt+e8jkAlU2rREUnXdfKiouqbaLjSAkcqUsurSfUObBrdL8RcREGZL3Rk3I9+mm8WfRyAXrm9n3Lv6rbr3jyish6raVb37BPJan4zGpkOv0xWdPCB6ocgOjd0n182l2lSuTsX2NmtVqdFudaVm5Vg5av6w2fqwKQvaDNvXuO3bIMw6JV6ARW7hzt6EjWYLsySmSungQKp0O/wmxOfuhjZ48fMYbC7wBI9x3sPwturGKt6XaBXRxubSbXmwOsIoGY420LDC5105b2w2pnR+yfQqiGPT1hztAb2k966jaKMTlwVvpGY4PrdBhuoMNz3MY7LgjfjcWNH0zQmt/jgfzR0ROcNpPotb10/vwbJYnDEaZ7kGG+pwaFHlniLeqeR8MmPcvg0pLw40edNIYDHF2u8ovRozvEE0qtnK9Fo/iuMpOaPZ/pVo8QssYcLPEtjNpHQTQU4l+JvYzhkvGaW6yy/7ktt/jkDKI2O6jY9V5y5Tl9iPChGZfVM61oQGg4BLp990XXWi9lkbZI5GuJK2t64uveKS6euYY6maHuKNM17BG/CZ9+AZT28f61JssfpToSS77rjgMlTdcU+2+JcI5L3Gre8HqwGgGobpBMM1D+U5uVLZF5IDCrZHANORpWHDPOW+ObGHS97qAyfRDo9eyJyQoWa4N8n8w7uQifsXMvs/wRbOHM26uy/MXDfY70fu3HCZhqauGsOSaQ2nD6+04rEDz6/jk/9FIr+Mxg7b+7+Obh9ywbYXDJkMdNKjE4TP1+EGlpoW6U96ZPfU2WU7foBdAldu7/rgdZ90Y83j7vaZL78U1nnpQ0sdj4nFKrYDXR0WP4tCnCZLcQ1i5eYxnmV5WG9135dkpdKqHzWa9CTLQopeg9IrWuwCNrBCoF9uIRPD0AodSa6V67U7ZXqXqRx3hbi4DefD1Lbc6rb2jw6EBLXhXXEq9VqnKySZWfcmlJPWcK9eapTbSrnDCErrgJo8wsNVqUuH5cqxkKJGFrMIaeqnO1atVboCiAXYcgm+JTaSYRPEMqxaxa4nW7wPm16g3FaGharrRuoVuFaV2rJUobexSqcld5WDmlSvdqjmmjtPJNUljOZluBjk2D/mv7yyL1L8/nRVWLXHTO24pjCvdKR2WUZLrgYhK2Zg7YMyBhYXY33PgQIFvOI9KfBXgwEqscUnPtcW/vo7+haZu3mxhPgpzfnAL0fPDdhbPtPtD9lzVtl76UpW/7bQqo/Heav+kL33GLZDVr2uchW7X7p2L4XsBpaXW74QsBwYtPdkSHAzT35KWRLVmflxVXvvQaZn6pMRf8U5SzONHDvUuAxNd3uHsHGfnCpPoeVLV8s6ylWmigzYXISgs15hVwNRvj8HoIC9IHbOtLcSfDx7AegQWA+j5UxLKwLG+xcj3MzH3rPdMqwEG7rVgscf/bj6BogPVEtTDS9qS1Y8ZMXDjeCKsqjQRd9/7U5JLZ2q9D3xhkPUUcma7D4x9cCfIpAd66pmnHJj20+XqoqfRiDeJTYtwxKagQUA68QyNzMlSi3VKEnmI9tXsHymP9gfPKEjqnuhHVH34r/6+8X49g1I8MHnIKK5moSAJq4uonF2/vckyV8y/w/fjdbu",
+ "view": {
+ "recursion": "bytes",
+ "enum": "number",
+ "yt_mode": true
+ }
+}
+@@;
+
+/*
+syntax='proto3';
+
+import "yt/yt_proto/yt/formats/extension.proto";
+
+message Test {
+ option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF;
+ message InnerInner {
+ option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF;
+ string a = 1;
+ }
+ message Inner {
+ option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF;
+ InnerInner i = 1;
+ }
+ Inner inner = 1;
+}
+*/
+
+$configPB = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrFWs2P28YVrz5XetJqKe7aVtZxbCsf3jixNnA+u27TaCXuRq6+SmmTrIGA4EojmTZFKiRle42iCNBTj7kGRQ+9pOgfUKDopUUvPRUo0OZWoGgLtH9Cj30zQ1KkPrxygCQ5bMQ372ve/GbeezOGP96CK0PTHOpkd2yZjnkyGez2id2ztLFjWiVGEzc4R8njKDYgf6DppOozdogjvgPxARILkSuxnczNF0ozQqWwRJuSZSZR/HccNheMiiLEDXVENUZ20jL7LRZgbaz27qtDUogysvcpPgfQJ2Ni9InROy3E0Iu0HKCIr0B+PDnRtZ4SYANkS8gCH6hOma/BxkOi3g+yZhhrjpIDjBXIjohtowOKczomhTib/ZW52c/OPONKdVFILEOaGJMR15BYEj8JOWa1pKiYq2LNJtYDrUcKSabg2pyCDh+f1eHJ4VTS5JFDDFszjcIaU/LiglUken9WxVROfAvWzLGDv+xCCtcnc/PZhUBocR7ZYxZrINjmxOoRpWf2iaIZA7OQZgouz0+EMVaQr4Zscs4OfYvnIWmfGo76qJBlCHG/ir9NwsYqELsFiQGdJQLsKWLAZcJBTH7FIJYhYxDbIX2OiNiKmAIuNA+p+FeC1Eew4bukWKox9LC5e5YnJcmTk6mYnCOhb7EKYBrEHOD26umIk8VRalGWuSiZnNrTxe9Ooba2BCkNvsnm0HYEOYtQ3GOI+czSzInSmTOTXTE+sXUr+Ck+Dz5BYbACdgplPWITaduPIRcOj7gFCdtRLYehMCHzD1GAGB4y7JRLyPSn+N50wjE24ZfmVzSkeXbe22/DemgCq5ou/hjOLVSNINmaGJrhEGtsEYpYbqrwn7UlmDsKcnMt8uZknng9nfrvmvAp/hct/iEJW4v2zMLti9sfEXxCLBakhOx+4Y5I6OoJ0XE3RHZyN19ZaVeW6lRE5pLiuxB3j2iq4fpqGuhekpmceBHS9P8cG0nmc4oSKC7EbUixbdInXmrzvymw+mSgTnRHeaDqE8IAj8ByiR9QmngZMnxXaSjziJ2eCZlvtBqlUPP3bNzLLjSZCUpg5t+ePbgvLZ7e3F7CVMk4XneXXtULeVSQknOc3HKpxd9EIc4Olg3IdI/bklJtHe3XJSEi5gAY4aDeKneFqP9da3bfekOI+QJHnBAPMrx+U0ggYLNcQe0jqYocyTAFedbEdUgzyn6rVRdSvs5OV641D4W0r/NQbh21BfA1NKROp3woCRmfY/+4K3WEbMgtNLHum5CaRw0hJ+ZhnZvwnNiYIaGnwtQRriUfIiCHWKxAgsEQ4Z6rl/elutJqd2utZrmOsfNpsvSjo5osVTF+AVpbKneRFiv2YGvRgbpwCwWwEF2CBaZrFgvFf0Vhc0FSWWjkB5DgWOZp9uWF2Ykhey7VMrlgqRFbUmpQFXOA/Xju8Of58a1V8iOjPV0SSCxIArcgP6do5cP4pxEoLAvOGUdiNHQk3pqN4NXlizC31l9E4PziknKhD+9CckScu6ZXVr20IFnT4dnFdqWC2T62rC7k3sx5+rMonFuofKGjlwA0YzxxeOnET+I0o7DDi56yE8cfj7Fx4CTG8M7U0Thz9LklM50D5msg9HSNGI5iOxZRR5oxZKkmtZcYqLpN5A0+3PFGqQQDkBWQSIYk+LAvUfwsDZlAAS5ehew99YGqeE0Vj0SG0tpuY/UabDEWnCMa6umqbbOgpRirSMdadKjijYhvwiaTGGFu0sY6UWibZ7OU43uWpxwNl4F6ZGNZeImJDYlBLNUhCvlkgryKavSVu6p9t7BFFexHCxH5Gcp46PJJjK1s9N9HJnEPzjMtGBGcsNK7S3r3lYkzeKdwMWifedhhPBXKcoQcYgeydDFG2mP02bRYDs0tOJoCESy1XIEG9h97iU5bkqpyxtNyYFoUUEPTD3CGA2poeuHFYPV6fM7Ym7rNmF0QQsHq9Q45g4txG/fDuWmwgoL5uVnOiqLF8em8oBiyOD6dFXsbtsZ3x/Ny14NyIrLMCr7IOnOL9HCt+oULQfbAgFhC+PcUYqgniBjVwh924TJjjjvWBLuIXk9ig2U2Jl6HvHlyr8cRqaCagfao8AIL7wYdYHhsM7L4Muq276rWmB3JNi4GKbzIWTm96ZHpjrAfagPH03iN7whGc7XtgEAjETK8w9hySA/axWRAOadGX+aFGxKnFt+A85QJDzq1rzpqgPtVxk3D3nAHQ35ak5NTH1g3uJ+U5kHrayvOi3uQDeJeTANHPhYkWARVWlVavtyRsBbBMqpe60qKfNTs1hqSEAsU9rfjqZeEa7RqyIU7NfF7cMG7VrGJozzULLYhRypPjj5+tlyuDnE+RJ4DxiLW4bJh4gGAB4dq9ZXphZai9hCQtskToa/lWcPsuMzTDFF2WWfgG1sGX6yuR+oY8etYp6w+T8kpJEj0+xtpkzCacSGBfxNCEv8mhTX8mxLS+DctQPGfMcgGK3jaEPVYDouwU+75J9b7pQpNbntJXi7LXJIWFhR+hJcnKdn9Eg8hec9mupNM9wtP1n27w5Snb3eUZktulOuyKy4+A3FdfXwaToOMtOqyoAZ6ZRdOPoz0NW6PXUiweIkAbsSE74gpiFdaMt0iuCc4VWnXpArukuKbkORBoNvHDwMK8U9XR8QbPWrsS7IQnVv8oo37MlCZfzPt+e8jkAlU2rREUnXdfKiouqbaLjSAkcqUsurSfUObBrdL8RcREGZL3Rk3I9+mm8WfRyAXrm9n3Lv6rbr3jyish6raVb37BPJan4zGpkOv0xWdPCB6ocgOjd0n182l2lSuTsX2NmtVqdFudaVm5Vg5av6w2fqwKQvaDNvXuO3bIMw6JV6ARW7hzt6EjWYLsySmSungQKp0O/wmxOfuhjZ48fMYbC7wBI9x3sPwturGKt6XaBXRxubSbXmwOsIoGY420LDC5105b2w2pnR+yfQqiGPT1hztAb2k966jaKMTlwVvpGY4PrdBhuoMNz3MY7LgjfjcWNH0zQmt/jgfzR0ROcNpPotb10/vwbJYnDEaZ7kGG+pwaFHlniLeqeR8MmPcvg0pLw40edNIYDHF2u8ovRozvEE0qtnK9Fo/iuMpOaPZ/pVo8QssYcLPEtjNpHQTQU4l+JvYzhkvGaW6yy/7ktt/jkDKI2O6jY9V5y5Tl9iPChGZfVM61oQGg4BLp990XXWi9lkbZI5GuJK2t64uveKS6euYY6maHuKNM17BG/CZ9+AZT28f61JssfpToSS77rjgMlTdcU+2+JcI5L3Gre8HqwGgGobpBMM1D+U5uVLZF5IDCrZHANORpWHDPOW+ObGHS97qAyfRDo9eyJyQoWa4N8n8w7uQifsXMvs/wRbOHM26uy/MXDfY70fu3HCZhqauGsOSaQ2nD6+04rEDz6/jk/9FIr+Mxg7b+7+Obh9ywbYXDJkMdNKjE4TP1+EGlpoW6U96ZPfU2WU7foBdAldu7/rgdZ90Y83j7vaZL78U1nnpQ0sdj4nFKrYDXR0WP4tCnCZLcQ1i5eYxnmV5WG9135dkpdKqHzWa9CTLQopeg9IrWuwCNrBCoF9uIRPD0AodSa6V67U7ZXqXqRx3hbi4DefD1Lbc6rb2jw6EBLXhXXEq9VqnKySZWfcmlJPWcK9eapTbSrnDCErrgJo8wsNVqUuH5cqxkKJGFrMIaeqnO1atVboCiAXYcgm+JTaSYRPEMqxaxa4nW7wPm16g3FaGharrRuoVuFaV2rJUobexSqcld5WDmlSvdqjmmjtPJNUljOZluBjk2D/mv7yyL1L8/nRVWLXHTO24pjCvdKR2WUZLrgYhK2Zg7YMyBhYXY33PgQIFvOI9KfBXgwEqscUnPtcW/vo7+haZu3mxhPgpzfnAL0fPDdhbPtPtD9lzVtl76UpW/7bQqo/Heav+kL33GLZDVr2uchW7X7p2L4XsBpaXW74QsBwYtPdkSHAzT35KWRLVmflxVXvvQaZn6pMRf8U5SzONHDvUuAxNd3uHsHGfnCpPoeVLV8s6ylWmigzYXISgs15hVwNRvj8HoIC9IHbOtLcSfDx7AegQWA+j5UxLKwLG+xcj3MzH3rPdMqwEG7rVgscf/bj6BogPVEtTDS9qS1Y8ZMXDjeCKsqjQRd9/7U5JLZ2q9D3xhkPUUcma7D4x9cCfIpAd66pmjLmx7adLVcVPIxDvEpuWYQnNwAKAdWKZm5kSpZZqlCTzke0rWD7TH+wPntAR1b3Qjqh78V/9/WJi+wYk+OBzENFcTUJAE1cX0Tg7/3uS5C+Z/wfZGtbo",
+ "view": {
+ "recursion": "bytes",
+ "enum": "number",
+ "yt_mode": true
+ }
+}
+@@;
+
+$udfParNO = Udf(Protobuf::Parse, $configNO as TypeConfig);
+$udfSerNO = Udf(Protobuf::Serialize, $configNO as TypeConfig);
+$udfParYT = Udf(Protobuf::Parse, $configYT as TypeConfig);
+$udfSerYT = Udf(Protobuf::Serialize, $configYT as TypeConfig);
+$udfParPB = Udf(Protobuf::Parse, $configPB as TypeConfig);
+$udfSerPB = Udf(Protobuf::Serialize, $configPB as TypeConfig);
+
+SELECT TestField,
+ Ensure("Success", $udfParNO(TestField) == $udfParNO($udfSerNO($udfParNO(TestField))), "Fail"),
+ Ensure("Success", $udfParYT(TestField) == $udfParYT($udfSerYT($udfParYT(TestField))), "Fail"),
+ Ensure("Success", $udfParPB(TestField) == $udfParPB($udfSerPB($udfParPB(TestField))), "Fail")
+FROM plato.Input;
+
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.in
new file mode 100644
index 00000000000..b6dd409ad4e
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.in
@@ -0,0 +1 @@
+{"TestField"="{\"inner\":{\"i\":{\"a\":\"hello\"}},\"test\":{\"inner\":{\"i\":{\"a\":\"bye\"}}}}"};
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.in.attr
new file mode 100644
index 00000000000..f10d440a236
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.in.attr
@@ -0,0 +1 @@
+{schema=[{name=TestField;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.sql
new file mode 100644
index 00000000000..35945ffe7a2
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.sql
@@ -0,0 +1,40 @@
+/*
+syntax='proto3';
+
+import "yt/yt_proto/yt/formats/extension.proto";
+
+message Test {
+ option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF;
+ message InnerInner {
+ string a = 1;
+ }
+ message Inner {
+ InnerInner i = 1;
+ }
+ Inner inner = 1;
+ Test test = 2;
+}
+*/
+
+
+$config = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrFWkuPG8cRDp9LFrnc4exKoinLsunHrmWLa8jPrBLHXHJ2TYWvDEnbK8AYzJJNaiRyhp4ZSlohCALklFPgqxHkkIuD/IAAQS4Jcg8QIHFOAYIkQPITckx198xwhg8tZcC2D2tO9VdV3dVfd1d1C/54E54dGsZwRPYnpmEbp9PBfp9YPVOb2IZZZDJxiyOKLqJQh+yRNiIVD9gmtvgORAcozIWejeylbrxQnFMqBjVaVCwzjcK/o7C9pFUUIaqrY2oxtJeU2W8xBxsTtXdPHZJcmIndT/EZgD6ZEL1P9N5ZLoK9SMo+ifgKZCfT05HWU3wwQFhMFnhDZQbeha0HRL3nh6YYNEPFPmAZ0mNiWdgBxT6bkFyUjf7ZhdHPjzzlaHVQSSxBkujTMbcQWxE/CRHzVhJUzTGxYRHzvtYjuTgzsLtgoM3b5224ejiUJHloE93SDD23wYy8uGQWyag/b2KmJ74FG8bExl9WLoHzk7rx9FIiNDlGdsFiFQTLmJo9ovSMPlE0fWDkkszA1cWBMGAZcVWEyRkr8C1ehLh1ptvqw1yaMcT5Kvw2DlvrUOwmxAZ0lEiwJ4gB1wkGMf4lg1iClE4sm/Q5IyJrcgq40iKlol+KUh/BltclxVT1ocvN/fN6UpRcPZmqyRkS+BYrAIZOjAEur94IebI8Sk0KWYiSwaW9kfjtGdU2VjClzhfZAtu6kDEJ5T2GmI8syTpRPHdksqPGB7Zp+j/F58ETKIxWwHahtCtsoCz/CDLB8Ig7ELNs1bQZC2My/xAFiOAmw3a5mEx/iu/NBhxhA35pcUYDlufHnX8bNgMDWNd14YdwYalpJMnOVNd0m5gTk1DGcle5/2ys4FzXj+ZW5O3povBaMvHfDeHH+F+48Ic47CxbM0uXLy5/ZPApMVmQYrLzhSsiNlJPyQhXQ2gvc+OVtVZlsUZVZK4pvgtRZ4umFq6tZ4GuJZnpiZchSf/PuRFnfU5QAeWFmIcEWyZ94h5t3jclVp8M1OnIVu6roylhhEdiOcIPqEy8Cim+qjTUech2z5jMF1qVSqj7uxauZYeazAUVMPdvz2/cV5YPb2Et4VHJEK87U6+Oclk0kJAzXNx0pIXfhCHKNpYtSHVOWpJSaXYPa5IQEjMATHBUa5Y6Qtj7rjY6b70hRDyFLhdE/YDXbwgxJGyaG6h+JFUQEQ9KELMhbkKSSQ6bzZqQ8Gy2O3K1cSwkPZvHcrPbEsCzUJfa7dKxJKQ8xOFJR2oL6UC30MWm50JqdOtCRszCJnfhdmJrToQ9FWYd4VayAQEixEIZYoyGSPdMrXQo1ZRmq1NtNko1jJ0nk6UfdKuyVMH4+WQtqdRBWaTQg51lG+rSJeTjQngFF5iteS4U/hWG7SWHylIn34MY5zI/Zl9eejoxZi8ctUzPn2pEVqQa1MQCYT9e2Pz5+fjWOucjkz3ZIRBbcgjchOyCobU345+EILcqOOdsieHAlnhzPoLPrZ6Ehbn+PAQXl6eUS/vwLsTHxL5juGnVS0sOa9o8P9mOlv+0j6zKC3lvFnr60zBcWGp8aUevAGj6ZGrz1InvxEkmYZsX3WWnttceYe3ARQzwzqyjUdbRZ1aMdIGYr4HQG2lEtxXLNok61vQhO2oSB7GBOrKIvMWb224r1WAEMn0a8YAGb/Y0Cp8mIeVLwMXnIH1Xva8qblHFI5GispZTWL0GOwyCY0RHvZFqWSxoCQYVaVuTNpXdFvFN2GYaYzybtMmIKLTMs9iR4/UsSxF1B0B7ZGFaeIWpDYlOTNUmCvlkilhF1fvKHdW6k9uhBg7DuZD8FAUeOziJwUp6/30EiQdwkVnBiOCAld4d0runTO3BO7nLfv+sh22GKVNIFxFiG9J0MsbaI+yzYbIzNLNka/JFsNh0FOpYfxzE2i1Jqsgp18qRYVJCDQ0vwClOqKHhhheD1evxMWNt6hRjVk4IBKvXO+YAh+MWrocLs2D5FbMLo5xXRY+Ts0VFMeBxcjav9jbsTO5MFvWu+fVEhMwrvsgqc5P0cK76uUt+uK9BLCL9ewrR1VNkjGriDyt3lYGjtjnFKqLXk1hjibWJ1yBrnN7tcUYqaGagPcy9wMK7RRsYH1tMLL6Mtq07qjlhW7KFk0FyL3IolzdcMV0R1gNtYLsWd/mKYDLH2h4INBIBx3sMlkG53y8eBhQ5c/oyT9xQOPP4BlykINzo1L5qqz70qwxNw153GgP9NKenZx6xrvN+UplLra8sOS8cQNrPezEJnPmYkGASVG5WaPpyW8JcBNOoWrUjKXK30anWJSHiS+xvRRMvCbs0a8gEKzXxO3DJvVaxiK080Ey2IMcqPxw9/uw4qDaxP0TMEYOINbiqG7gB4Mahmn1ldqGlqD0kpGXwg9Cz8rRutB3w7IQoOdA5+kZW0Rez67E6Qf7a5hnLzxNyAgUS/f5ayiSMZlSI4d+YEMe/cWED/yaEJP5NClD4ZwTS/gyeFkQ9doaF2C73/GPz/WKZHm4HcZ4uy1yTJhaUfoSnJwnZ+RKPIX7XYrbjzPYLj7d9q82MJ2+1lUZTrpdqsqMuPgXRkfroLHgMMtG604IW6JVd8PBhoq9weexDjMVLBHAiJnxLTEC03JTpEsE1waVKqyqVcZUU3oQ4DwJdPl4YUIl/OjZCbmu3fijJQnhh8gsWrktfZv71lOe/D0HKl2nTFEkdjYwHijrSVMuhBjBRiUrWnbqvadHgcin8IgTCfKo7183QN9nNws9DkAnmt3Pde+4b7d4/wrAZyGrX7d0nkNX6ZDwxbHqdrozIfTLKFdimsf/4vLlYnenVqNrBdrUi1VvNjtQonyjdxvcbzQ8bsqDNwb7CZd8CYb5T4iVY1i1c2duw1WjiKYlHpXR0JJU7bX4T4qE7gQVe+CwC20t6gts4r2F4WXV9nd4XaRbRwuLSKXkwO8Io6bY20DDD51U5L2y2ZnJ+yfQqiBPD0mztPr2kd6+jaKETlQW3parbHlonQ3UOTTfziCy4LR4aM5q+MaXZH8fRsyMkp7jMgzh5/eweLI3JGZNxyC5sqcOhSY27hnilkvHEDJi/BQk3DvTwppHAZIqV32F6Naa7jehUs5TZtX4Y2xNySrO8K9HC55jCBJ8lsJpJjAwkOdXgb2J757xkFGsOXvY0838KQcIV43Ebnaj2HWYudhgWQjL7pnLMCXVGAUdOv+m8jojaZ2WQMR7jTFruvDrysiOmr2O2qWqjADbKsILb4IEP4CnXbh/zUiyx+jOlOLvuuOQAKk67q1v4cwiybuHW94JVB1B13bD94Vqk8oJeseQpyT4D+THArGVl2PCcct6c2MMlL/WBi2iFRy9kTslQ052bZP7hXshEvQuZwx9hCWeM57t7KMxdN1jvh25fd0BDY6Tqw6JhDmcPrzTjsXzPr5PT/4VCvwxHjluHvw7nj7liyw2GTAYj0qMDhM824TqmmibpT3tk/8zeZyt+gFUCN27te+R1nnQjjZNO/tyXX0rrrPShqU4mxGQZ29FIHRY+DUOUHpbiBkRKjRPcy7Kw2ey8L8lKuVnr1ht0J0tDgl6D0itarAK2MEOgX04iE8HQCm1JrpZq1dslepepnHSEqJiHi0FpS252mofdIyFGfbhXnEqt2u4IcebWuQnlog1cq1fqpZZSajOB0jyiLru4uSo16bhUPhES1MlyiJCk/XTaKtVyRwAxBzuOwPPEWlJsgJiGVSpY9aQL92DbDZRTyrBQdZxIvQK7FaklS2V6G6u0m3JHOapKtUqbWq4640RRTcJoXoXLfsThCf/lpn2hwndns8KyPeZqz3GF50pbapVk9ORYENJiCjY+KGFgcTI2D2zIUcIr7pMCfzUYoBFLfOxzbe4vv6NvkZkbl4vIn+JCH/jl6IUBe8tntr0ma8Erey9dy+tfl3r1+Ljo1WuyDh5BPuDVrSrX8fuF4/dKwK9vernnSz7PvkbrQIYYd/P4p5QVUZ0bHzd18B6kesZoOuavOOdZppFjmxrXocfdwTFs3SNnyhNY+cKxsol65ZkhHbaXMei8V9j1SJTtLxDI58/PnXP9rUUf15+POgQ2g2w519OahHH/xQh387H7bLeKK/6Cbr3g8Uc/br4O4n3V1FTdjdqKGQ94cXkjOKosKnTSD1+7XVSLZyp9T7xuE3VcNKf7jz164G8hSGO2PzUn3Fn+yY6qws9CEO0Qi6ZhMU3HBIBVYqkbqSKVFqtUJPMWWuzbKHRerWIMITNRPo+ZNcWwP7h5h1Tnrjuk5nchxsXPQEhzzAs+89xHSDuI/urvl2Oncf6w+X9iYdnD",
+ "view": {
+ "recursion": "bytes",
+ "enum": "number",
+ "yt_mode": true
+ }
+}@@;
+
+$udfPar = Udf(Protobuf::Parse, $config as TypeConfig);
+$udfSer = Udf(Protobuf::Serialize, $config as TypeConfig);
+
+SELECT TestField, Ensure("Success", $udfPar(TestField) == $udfPar($udfSer($udfPar(TestField))), "Fail")
+FROM plato.Input;
+
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.in
new file mode 100644
index 00000000000..b6dd409ad4e
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.in
@@ -0,0 +1 @@
+{"TestField"="{\"inner\":{\"i\":{\"a\":\"hello\"}},\"test\":{\"inner\":{\"i\":{\"a\":\"bye\"}}}}"};
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.in.attr
new file mode 100644
index 00000000000..f10d440a236
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.in.attr
@@ -0,0 +1 @@
+{schema=[{name=TestField;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.sql
new file mode 100644
index 00000000000..42312716e68
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.sql
@@ -0,0 +1,39 @@
+/*
+syntax='proto3';
+
+import "yt/yt_proto/yt/formats/extension.proto";
+
+message Test {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ message InnerInner {
+ string a = 1;
+ }
+ message Inner {
+ InnerInner i = 1;
+ }
+ Inner inner = 1;
+ Test test = 2;
+}
+*/
+
+$config = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrFWkuPG8cRDp9LFrnc4exKoinLsunHrmWLa8jPrBLHXHJ2TYWvDEnbK8AYzJJNaiRyhp4ZSlohCALklFPgqxHkkIuD/IAAQS4Jcg8QIHFOAYIkQPITckx198xwhg8tZcC2D2tO9VdV3dVfd1d1C/54E54dGsZwRPYnpmEbp9PBfp9YPVOb2IZZZDJxiyOKLqJQh+yRNiIVD9gmtvgORAcozIWejeylbrxQnFMqBjVaVCwzjcK/o7C9pFUUIaqrY2oxtJeU2W8xBxsTtXdPHZJcmIndT/EZgD6ZEL1P9N5ZLoK9SMo+ifgKZCfT05HWU3wwQFhMFnhDZQbeha0HRL3nh6YYNEPFPmAZ0mNiWdgBxT6bkFyUjf7ZhdHPjzzlaHVQSSxBkujTMbcQWxE/CRHzVhJUzTGxYRHzvtYjuTgzsLtgoM3b5224ejiUJHloE93SDD23wYy8uGQWyag/b2KmJ74FG8bExl9WLoHzk7rx9FIiNDlGdsFiFQTLmJo9ovSMPlE0fWDkkszA1cWBMGAZcVWEyRkr8C1ehLh1ptvqw1yaMcT5Kvw2DlvrUOwmxAZ0lEiwJ4gB1wkGMf4lg1iClE4sm/Q5IyJrcgq40iKlol+KUh/BltclxVT1ocvN/fN6UpRcPZmqyRkS+BYrAIZOjAEur94IebI8Sk0KWYiSwaW9kfjtGdU2VjClzhfZAtu6kDEJ5T2GmI8syTpRPHdksqPGB7Zp+j/F58ETKIxWwHahtCtsoCz/CDLB8Ig7ELNs1bQZC2My/xAFiOAmw3a5mEx/iu/NBhxhA35pcUYDlufHnX8bNgMDWNd14YdwYalpJMnOVNd0m5gTk1DGcle5/2ys4FzXj+ZW5O3povBaMvHfDeHH+F+48Ic47CxbM0uXLy5/ZPApMVmQYrLzhSsiNlJPyQhXQ2gvc+OVtVZlsUZVZK4pvgtRZ4umFq6tZ4GuJZnpiZchSf/PuRFnfU5QAeWFmIcEWyZ94h5t3jclVp8M1OnIVu6roylhhEdiOcIPqEy8Cim+qjTUech2z5jMF1qVSqj7uxauZYeazAUVMPdvz2/cV5YPb2Et4VHJEK87U6+Oclk0kJAzXNx0pIXfhCHKNpYtSHVOWpJSaXYPa5IQEjMATHBUa5Y6Qtj7rjY6b70hRDyFLhdE/YDXbwgxJGyaG6h+JFUQEQ9KELMhbkKSSQ6bzZqQ8Gy2O3K1cSwkPZvHcrPbEsCzUJfa7dKxJKQ8xOFJR2oL6UC30MWm50JqdOtCRszCJnfhdmJrToQ9FWYd4VayAQEixEIZYoyGSPdMrXQo1ZRmq1NtNko1jJ0nk6UfdKuyVMH4+WQtqdRBWaTQg51lG+rSJeTjQngFF5iteS4U/hWG7SWHylIn34MY5zI/Zl9eejoxZi8ctUzPn2pEVqQa1MQCYT9e2Pz5+fjWOucjkz3ZIRBbcgjchOyCobU345+EILcqOOdsieHAlnhzPoLPrZ6Ehbn+PAQXl6eUS/vwLsTHxL5juGnVS0sOa9o8P9mOlv+0j6zKC3lvFnr60zBcWGp8aUevAGj6ZGrz1InvxEkmYZsX3WWnttceYe3ARQzwzqyjUdbRZ1aMdIGYr4HQG2lEtxXLNok61vQhO2oSB7GBOrKIvMWb224r1WAEMn0a8YAGb/Y0Cp8mIeVLwMXnIH1Xva8qblHFI5GispZTWL0GOwyCY0RHvZFqWSxoCQYVaVuTNpXdFvFN2GYaYzybtMmIKLTMs9iR4/UsSxF1B0B7ZGFaeIWpDYlOTNUmCvlkilhF1fvKHdW6k9uhBg7DuZD8FAUeOziJwUp6/30EiQdwkVnBiOCAld4d0runTO3BO7nLfv+sh22GKVNIFxFiG9J0MsbaI+yzYbIzNLNka/JFsNh0FOpYfxzE2i1Jqsgp18qRYVJCDQ0vwClOqKHhhheD1evxMWNt6hRjVk4IBKvXO+YAh+MWrocLs2D5FbMLo5xXRY+Ts0VFMeBxcjav9jbsTO5MFvWu+fVEhMwrvsgqc5P0cK76uUt+uK9BLCL9ewrR1VNkjGriDyt3lYGjtjnFKqLXk1hjibWJ1yBrnN7tcUYqaGagPcy9wMK7RRsYH1tMLL6Mtq07qjlhW7KFk0FyL3IolzdcMV0R1gNtYLsWd/mKYDLH2h4INBIBx3sMlkG53y8eBhQ5c/oyT9xQOPP4BlykINzo1L5qqz70qwxNw153GgP9NKenZx6xrvN+UplLra8sOS8cQNrPezEJnPmYkGASVG5WaPpyW8JcBNOoWrUjKXK30anWJSHiS+xvRRMvCbs0a8gEKzXxO3DJvVaxiK080Ey2IMcqPxw9/uw4qDaxP0TMEYOINbiqG7gB4Mahmn1ldqGlqD0kpGXwg9Cz8rRutB3w7IQoOdA5+kZW0Rez67E6Qf7a5hnLzxNyAgUS/f5ayiSMZlSI4d+YEMe/cWED/yaEJP5NClD4ZwTS/gyeFkQ9doaF2C73/GPz/WKZHm4HcZ4uy1yTJhaUfoSnJwnZ+RKPIX7XYrbjzPYLj7d9q82MJ2+1lUZTrpdqsqMuPgXRkfroLHgMMtG604IW6JVd8PBhoq9weexDjMVLBHAiJnxLTEC03JTpEsE1waVKqyqVcZUU3oQ4DwJdPl4YUIl/OjZCbmu3fijJQnhh8gsWrktfZv71lOe/D0HKl2nTFEkdjYwHijrSVMuhBjBRiUrWnbqvadHgcin8IgTCfKo7183QN9nNws9DkAnmt3Pde+4b7d4/wrAZyGrX7d0nkNX6ZDwxbHqdrozIfTLKFdimsf/4vLlYnenVqNrBdrUi1VvNjtQonyjdxvcbzQ8bsqDNwb7CZd8CYb5T4iVY1i1c2duw1WjiKYlHpXR0JJU7bX4T4qE7gQVe+CwC20t6gts4r2F4WXV9nd4XaRbRwuLSKXkwO8Io6bY20DDD51U5L2y2ZnJ+yfQqiBPD0mztPr2kd6+jaKETlQW3parbHlonQ3UOTTfziCy4LR4aM5q+MaXZH8fRsyMkp7jMgzh5/eweLI3JGZNxyC5sqcOhSY27hnilkvHEDJi/BQk3DvTwppHAZIqV32F6Naa7jehUs5TZtX4Y2xNySrO8K9HC55jCBJ8lsJpJjAwkOdXgb2J757xkFGsOXvY0838KQcIV43Ebnaj2HWYudhgWQjL7pnLMCXVGAUdOv+m8jojaZ2WQMR7jTFruvDrysiOmr2O2qWqjADbKsILb4IEP4CnXbh/zUiyx+jOlOLvuuOQAKk67q1v4cwiybuHW94JVB1B13bD94Vqk8oJeseQpyT4D+THArGVl2PCcct6c2MMlL/WBi2iFRy9kTslQ052bZP7hXshEvQuZwx9hCWeM57t7KMxdN1jvh25fd0BDY6Tqw6JhDmcPrzTjsXzPr5PT/4VCvwxHjluHvw7nj7liyw2GTAYj0qMDhM824TqmmibpT3tk/8zeZyt+gFUCN27te+R1nnQjjZNO/tyXX0rrrPShqU4mxGQZ29FIHRY+DUOUHpbiBkRKjRPcy7Kw2ey8L8lKuVnr1ht0J0tDgl6D0itarAK2MEOgX04iE8HQCm1JrpZq1dslepepnHSEqJiHi0FpS252mofdIyFGfbhXnEqt2u4IcebWuQnlog1cq1fqpZZSajOB0jyiLru4uSo16bhUPhES1MlyiJCk/XTaKtVyRwAxBzuOwPPEWlJsgJiGVSpY9aQL92DbDZRTyrBQdZxIvQK7FaklS2V6G6u0m3JHOapKtUqbWq4640RRTcJoXoXLfsThCf/lpn2hwndns8KyPeZqz3GF50pbapVk9ORYENJiCjY+KGFgcTI2D2zIUcIr7pMCfzUYoBFLfOxzbe4vv6NvkZkbl4vIn+JCH/jl6IUBe8tntr0ma8Erey9dy+tfl3r1+Ljo1WuyDh5BPuDVrSrX8fuF4/dKwK9vernnSz7PvkbrQIYYd/P4p5QVUZ0bHzd18B6kesZoOuavOOdZppFjmxrXocfdwTFs3SNnyhNY+cKxsol65ZkhHbaXMei8V9j1SJTtLxDI58/PnXP9rUUf15+POgQ2g2w519OahHH/xQh387H7bLeKK/6Cbr3g8Uc/br4O4n3V1FTdjdqKGQ94cXkjOKosKnTSD1+7XVSLZyp9T7xuE3VcNKf7jz164G8hSGO2PzXPuLP8kx1VhZ+FINohFk3DYpqOCQCrxFI3UkUqLVapSOYttNi3Uei8WsUYQmaifB4za4phf3DzDqnOXXdIze9CjIufgZDmmBd85rmPkHYQ/dXfL0dP4/xh8/9pqNnL",
+ "view": {
+ "recursion": "bytes",
+ "enum": "number",
+ "yt_mode": true
+ }
+}@@;
+
+$udfPar = Udf(Protobuf::Parse, $config as TypeConfig);
+$udfSer = Udf(Protobuf::Serialize, $config as TypeConfig);
+
+SELECT TestField, Ensure("Success", $udfPar(TestField) == $udfPar($udfSer($udfPar(TestField))), "Fail")
+FROM plato.Input;
+
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.in
new file mode 100644
index 00000000000..6ab446801f2
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.in
@@ -0,0 +1,4 @@
+{"TestField"="{\"Name\":\"n1\"}"};
+{"TestField"="{\"Name\":\"n1\",\"a\":\"a1\"}"};
+{"TestField"="{\"Name\":\"n1\",\"test\":{\"Name\":\"n2\",\"a\":\"a2\"}}"};
+{"TestField"="{\"Name\":\"n1\",\"test\":{\"Name\":\"n2\",\"test\":{\"Name\":\"n3\"}}}"};
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.in.attr
new file mode 100644
index 00000000000..f10d440a236
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.in.attr
@@ -0,0 +1 @@
+{schema=[{name=TestField;type=string}]}
diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.sql
new file mode 100644
index 00000000000..7c75b1cea5e
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.sql
@@ -0,0 +1,82 @@
+/*
+syntax='proto3';
+
+import "yt/yt_proto/yt/formats/extension.proto";
+
+message Test {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ option (NYT.default_oneof_flags) = VARIANT;
+
+ oneof Var {
+ Test test = 1 [(NYT.column_name) = "_test"];
+ string a = 2;
+ }
+ string Name = 3 [(NYT.column_name) = "name"];;
+}
+*/
+
+$configVar = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrFWs2P28YVjz5XetrVUty1razjOFESe+PE2sD57LpNow/uWq6+SmmTrIGA4FIjLW2JVEjK9ho9FOippyLXoCiKXlLk1FOBopcWvRcI0OYWoGgLtH9Cj30zQ1KkpPXKAZL4YItv3te8+c2892YMf7kJzw1MczAkO2PLdMyjSX+nR2zN0seOaRUZTVznHEWPo9CA3J4+JFWfsUMc8R2I95GYjzwX287ceLE4I1QMS7QpWWYShX/HYWPBqChC3FBHVGNkOy2z32IeVsaqdk8dkHyUkb1P8VmAHhkTo0cM7SQfQy/ScoAivgK58eRoqGtKgA2QLSELfKA6Zb4K6w+Iei/ImmGsWUoOMFZgdURsGx1QnJMxycfZ7J+bm/3szDOuVBeFxBKkiTEZcQ2JU+InIceslhQVc1Ws2MS6r2skn2QKrs4p6PDxWR2eHE4lTR46xLB108ivMCUvLVhFMuzNqpjKiW/Bijl28JedT+H6ZG48sxAILc4je8xiDQTbnFgaUTSzRxTd6Jv5NFNweX4ijLGCfDVkk7N26Fs8D0n7xHDUh/lVhhD3q/CHJKwvA7GbkOjTWSLAniAGXCYcxOTXDGIJMgaxHdLjiIgtiSngQvOQin8tSH0I675LiqUaAw+bO2d5UpQ8OZmKyVkS+harAKZBzD5uL22IOFkcpRZlmYuSyanaUPzeFGorpyClwTfZHNoOIGsRinsMMZ9ZmjlRPHNmsivGJ7ZmBT/FF8AnKAxWwE6hVY/YRNrWI8iGwyNuQsJ2VMthKEzI/EMUIIaHDDvlEjL9Kb43nXCMTfjK/IqGNM/Oe+ttWAtNYFnThZ/AuYWqESSbE0M3HGKNLUIRy03l/7NyCuYOgtxci7wxmSdeS6f+uyL8FP9EC39OwuaiPbNw++L2RwQfEYsFKSG7X7gjEkP1iAxxN0S2szdeWWpXFutUROaS4rsQd49oquHachroXpKZnHgR0vRfjo0k8zlFCRQX4hak2DbpES+1+d8UWD3SVydDR7mvDieEAR6B5RLfpzTxMmT4rtJR5iE7PRMy32g1SqHm79q4l11oMhOUwMy/PXtwX1o8vbm9hKmScbzuLr06zOdQQUrOcnLLpRZ+H4U4O1jWIdM9bEtKtXVQrktCRMwCMMJevVXqClH/u9bsvvWGEPMFDjghHmR4/YaQQMCucgW1D6UqciTDFORZEdcgzSjlVqsupHydna5ca+4LaV/nvtw6aAvga2hInU5pXxIyPkf5sCt1hNWQW2hizTchNQ8aQlbMwRo34TmxPkNCT4WpI1xLLkRADrFQgQSDIcI9Wy+VpbrSandrrWapjrHzabL044OaLFUxfgFaWyp1kRYraLC56EBduIUCWIieggWmaxYLhX9FYWNBUllo5IeQ4FjmafblhdmJIXsu1TK5YKkRO6XUoCrmAPvR3OHP8+Nby+RHRnuyJJBYkARuQm5O0dKH8c8ikD8tOGccidHQkXhzNoLPn74Ic2v9WQTOLy4pF/rwLiRHxDk2vbLqyoJkTYdnF9uVCmb72Gl1IfdmztOfR+HcQuULHb0EoBvjicNLJ34SpxmFHV70lJ04/niMjQMnMYZ3po7GmaPPnjLTOWC+BoI21InhKLZjEXWkGwOWalK7ib46tIm8zoc73iiVYACyAhLJkAQf9iUKn6QhEyjAxedh9a56X1W8popHIkNpbbexeg02GQvOEQ1pQ9W2WdBSjFWkYy06VPFGxDdhg0mMMDfp4yFRaJtns5Tje5ajHA2XgXpkY1l4iYkNiEEs1SEK+XiCvIpq9JRj1T7Ob1IF5Wg+Ij9NGfddPomxlYzeLWQSd+E804IRwQkr2jHR7ikTp/9O/mLQPvOww3gqlOUAOcQOrNLFGOmP0GfTYjk0u+BoCkSw2HIFGth/7CY6bUmqyhlPy55pUUANTD/AGQ6ogemFF4OlaXzO2Ju6zZidF0LB0rR9zuBi3Mb9cG4arKBgbm6Ws6JocXwyLyiGLI5PZsXehs3x8Xhe7lpQTkSWWcGXWGduEQ3Xqpe/EGQPDIhFhL+mEEM9QsSoFv6w85cZc9yxJthFaJrEBktsTLwGOfPorsYRqaCavv4w/yIL7zodYHhsM7L4Muq2j1VrzI5kGxeD5F/irJze9Mh0R9gP9L7jabzKdwSjudq2QaCRCBneZmxZpAftYjKgnFOjL/PCDYlTi2/AecqEB53aUx01wP0q46Zhb7iDIT+tydGJD6zr3E9K86D1jRXnhV1YDeJeTANHPhYkWARVWlVavtyRsBbBMqpe60qKfNDs1hqSEAsU9rfjqSvC1cIXUciGOzXx+3DBu1axiaM80C22IUcqT44+fjZdrg5xPkCePcYi1uGyYeIBgAeHavWU6YWWomoISNvkidDX8oxhdlzmaYYouawz8I2dBl+srkfqGPHrWCesPk/JKSRI9PtbaZMwmikhjX+nBSj8MwarwXqdtj8ay1gRdqa98NjqvlihqWw3yYtjmUvSMoKCjfBiJCW7X+I+JO/aTHeS6X7x8bpvd5jy9O2O0mzJjVJddsXFpyE+VB+dhJMeIy27CKiBXtCFUw0jfYObYQcSLF4igBsx4SkxBfFKS6YbAncApyrtmlTBPVF4E5I8CHSz+GFAIf7p6oh4oweNsiQL0fBSx4VEwcZdGKjDv51m/E8RyATqaloQqcOh+UBRh7pqu9AARipRyrJL9y1tkYSQLPwqAsJsYTvjZuS7dLPwywhkw9XsjHvPf6fu/SMKa6EadlnvPoac3iOjsenQy3NlSO6TYb7ADo2dx1fJxdpUrk7FdjdqVanRbnWlZuVQOWj+qNn6oCkL+gzbN7jt2yDMOiVegEVu4c7egPVmC3MiJkZpb0+qdDv83sPn7oY2eOHTGGws8ASPcd6x8Cbq+jLeF2nN0MZW0m1wsBbCKBmO3texnuc9OG9j1qd0fqX0Kohj09Yd/T69kvcun2hbE5cFb6RmOD63QQbqDDc9zGOy4I343Fi/9MwJrfU4H80dETnDaT6LW8VPb71WsRRjNM5yFdbVwcCiyj1FvC/J+mTGuHUbUl4caKqmkcDSiTXbUXoRZniDaFS3leklfhTHU3JGt/0L0MJnWLCEHyGwd0kNTQQ5leAvYNtnvFsU6y6/7Etu/TUCKY+M6TY+Vp1jpi5RjgoRmX1TOlaABoOAS6ffdF2HRO2xpsccjXAlbW9dXXrFJdO3MMdS9WGIN854BW/AZ96Fpz29PaxCsaHqTYWS7HLjgstQdcc92cIXEch5bVrPD1YDQDUM0wmGax7Kc3LFki8kBxRsjQCmI6eGDfOU+8LEnil5Yw+cRPs5ev1yRAa64d4b8w/v+iXuX7+UfxHBjs0czfpbFmZuF+xbkTvvDnTneHJURP6dgTlUjcH0nZX90K5jP3V9YAZeXW9Of/4vEvlNNLbfLn8e3drn5tpeeGTSHxKNThk+XYPrWGpapDfRyM6Js8POgD52CdyIvePD2X3SjTUPu1tnvvxSoOekDyx1PCYWq+H2huqg8EkU4jR9iisQKzUP8XTLwVqre0uSlUqrftBo0rNtFVL0GpRe0WIXsI41A/1yS5sYBlvoSHKtVK/dKdG7TOWwK8TFLTgfprblVrdVPtgTEtSGd8Wp1GudrpBkZt2bUE5awd17qVFqK6UOIyitPWryAI9bpS7tlyqHQooaWcwipKmf7li1VukKIOZh0yX4lthIhk0QC7NqFbue1cI92PAC5bYyLFRdN1KvwNWq1JalCr2NVTotuavs1aR6tUM119x5IqkuYTQvw8UgR/mQ//IKwUjhB9NVYfUfM7XtmsJM05HaJRktuRqEVTEDK++XMLC4GGu7DuTpFlC8JwX+atBHJbb42Ofa/N/+SN8iszcuFhE/xTkf+OXouT57y2e6/SF7zip7L13K6t8XWvXxOG/VH7J3H8FWyKrXVS5j90vX7qWQ3cDycssXApYDg/auDAlu5vFPKadEdWZ+XNXue5DRzOFkxF9xztJMI8eOOS5DE+DuPqzfIyfKE2j50tWyhnKVqSIDNhYh6KxX2OVAlOvNAShgL4idM+0tBR/PXgA6BNbCaDnT0pKA8f7HCDfzkfdsdxpWgi3ecsHjj35cfQPE+6qlq4YXtVNWPGTFw43girKo0EUvv3anqBZPVPqeeN0h6qhoTXYem3rgd1jTnDjc0NaTpanCCOJdYjviFYg7+C/ryjI3EkVKLKc//+piQqH0W0/JbBwL6ojKMzuSIqr4DMSp17wMKqdQgJXAMqPupn6L37/+6uJaOQGx91XrKMlfLP8PjbXUZg==",
+ "view": {
+ "recursion": "bytes",
+ "enum": "number",
+ "yt_mode": true
+ }
+}
+@@;
+
+/*
+syntax='proto3';
+
+import "yt/yt_proto/yt/formats/extension.proto";
+
+message Test {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ option (NYT.default_oneof_flags) = SEPARATE_FIELDS;
+
+ oneof Var {
+ Test test = 1 [(NYT.column_name) = "_test"];
+ string a = 2;
+ }
+ string Name = 3 [(NYT.column_name) = "name"];;
+}
+*/
+
+$configSeparate = @@{
+ "name": "Test",
+ "format": "json",
+ "skip": 0,
+ "lists": {
+ "optional": false
+ },
+ "meta": "eNrFWs2P28YVjz5XetrVUty1razjOFESe+PE2sD57LpNow/uWq6+SmmTrIGA4FIjLW2JVEjK9ho9FOippyLXoCiKXlLk1FOBopcWvRcI0OYWoGgLtH9Cj30zQ1KkpPXKAZL4YItv3te8+c2892YMf7kJzw1MczAkO2PLdMyjSX+nR2zN0seOaRUZTVznHEWPo9CA3J4+JFWfsUMc8R2I95GYjzwX287ceLE4I1QMS7QpWWYShX/HYWPBqChC3FBHVGNkOy2z32IeVsaqdk8dkHyUkb1P8VmAHhkTo0cM7SQfQy/ScoAivgK58eRoqGtKgA2QLSELfKA6Zb4K6w+Iei/ImmGsWUoOMFZgdURsGx1QnJMxycfZ7J+bm/3szDOuVBeFxBKkiTEZcQ2JU+InIceslhQVc1Ws2MS6r2skn2QKrs4p6PDxWR2eHE4lTR46xLB108ivMCUvLVhFMuzNqpjKiW/Bijl28JedT+H6ZG48sxAILc4je8xiDQTbnFgaUTSzRxTd6Jv5NFNweX4ijLGCfDVkk7N26Fs8D0n7xHDUh/lVhhD3q/CHJKwvA7GbkOjTWSLAniAGXCYcxOTXDGIJMgaxHdLjiIgtiSngQvOQin8tSH0I675LiqUaAw+bO2d5UpQ8OZmKyVkS+harAKZBzD5uL22IOFkcpRZlmYuSyanaUPzeFGorpyClwTfZHNoOIGsRinsMMZ9ZmjlRPHNmsivGJ7ZmBT/FF8AnKAxWwE6hVY/YRNrWI8iGwyNuQsJ2VMthKEzI/EMUIIaHDDvlEjL9Kb43nXCMTfjK/IqGNM/Oe+ttWAtNYFnThZ/AuYWqESSbE0M3HGKNLUIRy03l/7NyCuYOgtxci7wxmSdeS6f+uyL8FP9EC39OwuaiPbNw++L2RwQfEYsFKSG7X7gjEkP1iAxxN0S2szdeWWpXFutUROaS4rsQd49oquHachroXpKZnHgR0vRfjo0k8zlFCRQX4hak2DbpES+1+d8UWD3SVydDR7mvDieEAR6B5RLfpzTxMmT4rtJR5iE7PRMy32g1SqHm79q4l11oMhOUwMy/PXtwX1o8vbm9hKmScbzuLr06zOdQQUrOcnLLpRZ+H4U4O1jWIdM9bEtKtXVQrktCRMwCMMJevVXqClH/u9bsvvWGEPMFDjghHmR4/YaQQMCucgW1D6UqciTDFORZEdcgzSjlVqsupHydna5ca+4LaV/nvtw6aAvga2hInU5pXxIyPkf5sCt1hNWQW2hizTchNQ8aQlbMwRo34TmxPkNCT4WpI1xLLkRADrFQgQSDIcI9Wy+VpbrSandrrWapjrHzabL044OaLFUxfgFaWyp1kRYraLC56EBduIUCWIieggWmaxYLhX9FYWNBUllo5IeQ4FjmafblhdmJIXsu1TK5YKkRO6XUoCrmAPvR3OHP8+Nby+RHRnuyJJBYkARuQm5O0dKH8c8ikD8tOGccidHQkXhzNoLPn74Ic2v9WQTOLy4pF/rwLiRHxDk2vbLqyoJkTYdnF9uVCmb72Gl1IfdmztOfR+HcQuULHb0EoBvjicNLJ34SpxmFHV70lJ04/niMjQMnMYZ3po7GmaPPnjLTOWC+BoI21InhKLZjEXWkGwOWalK7ib46tIm8zoc73iiVYACyAhLJkAQf9iUKn6QhEyjAxedh9a56X1W8popHIkNpbbexeg02GQvOEQ1pQ9W2WdBSjFWkYy06VPFGxDdhg0mMMDfp4yFRaJtns5Tje5ajHA2XgXpkY1l4iYkNiEEs1SEK+XiCvIpq9JRj1T7Ob1IF5Wg+Ij9NGfddPomxlYzeLWQSd+E804IRwQkr2jHR7ikTp/9O/mLQPvOww3gqlOUAOcQOrNLFGOmP0GfTYjk0u+BoCkSw2HIFGth/7CY6bUmqyhlPy55pUUANTD/AGQ6ogemFF4OlaXzO2Ju6zZidF0LB0rR9zuBi3Mb9cG4arKBgbm6Ws6JocXwyLyiGLI5PZsXehs3x8Xhe7lpQTkSWWcGXWGduEQ3Xqpe/EGQPDIhFhL+mEEM9QsSoFv6w85cZc9yxJthFaJrEBktsTLwGOfPorsYRqaCavv4w/yIL7zodYHhsM7L4Muq2j1VrzI5kGxeD5F/irJze9Mh0R9gP9L7jabzKdwSjudq2QaCRCBneZmxZpAftYjKgnFOjL/PCDYlTi2/AecqEB53aUx01wP0q46Zhb7iDIT+tydGJD6zr3E9K86D1jRXnhV1YDeJeTANHPhYkWARVWlVavtyRsBbBMqpe60qKfNDs1hqSEAsU9rfjqSvC1cIXUciGOzXx+3DBu1axiaM80C22IUcqT44+fjZdrg5xPkCePcYi1uGyYeIBgAeHavWU6YWWomoISNvkidDX8oxhdlzmaYYouawz8I2dBl+srkfqGPHrWCesPk/JKSRI9PtbaZMwmikhjX+nBSj8MwarwXqdtj8ay1gRdqa98NjqvlihqWw3yYtjmUvSMoKCjfBiJCW7X+I+JO/aTHeS6X7x8bpvd5jy9O2O0mzJjVJddsXFpyE+VB+dhJMeIy27CKiBXtCFUw0jfYObYQcSLF4igBsx4SkxBfFKS6YbAncApyrtmlTBPVF4E5I8CHSz+GFAIf7p6oh4oweNsiQL0fBSx4VEwcZdGKjDv51m/E8RyATqaloQqcOh+UBRh7pqu9AARipRyrJL9y1tkYSQLPwqAsJsYTvjZuS7dLPwywhkw9XsjHvPf6fu/SMKa6EadlnvPoac3iOjsenQy3NlSO6TYb7ADo2dx1fJxdpUrk7FdjdqVanRbnWlZuVQOWj+qNn6oCkL+gzbN7jt2yDMOiVegEVu4c7egPVmC3MiJkZpb0+qdDv83sPn7oY2eOHTGGws8ASPcd6x8Cbq+jLeF2nN0MZW0m1wsBbCKBmO3texnuc9OG9j1qd0fqX0Kohj09Yd/T69kvcun2hbE5cFb6RmOD63QQbqDDc9zGOy4I343Fi/9MwJrfU4H80dETnDaT6LW8VPb71WsRRjNM5yFdbVwcCiyj1FvC/J+mTGuHUbUl4caKqmkcDSiTXbUXoRZniDaFS3leklfhTHU3JGt/0L0MJnWLCEHyGwd0kNTQQ5leAvYNtnvFsU6y6/7Etu/TUCKY+M6TY+Vp1jpi5RjgoRmX1TOlaABoOAS6ffdF2HRO2xpsccjXAlbW9dXXrFJdO3MMdS9WGIN854BW/AZ96Fpz29PaxCsaHqTYWS7HLjgstQdcc92cIXEch5bVrPD1YDQDUM0wmGax7Kc3LFki8kBxRsjQCmI6eGDfOU+8LEnil5Yw+cRPs5ev1yRAa64d4b8w/v+iXuX7+UfxHBjs0czfpbFmZuF+xbkTvvDnTneHJURP6dgTlUjcH0nZX90K5jP3V9YAZeXW9Of/4vEvlNNLbfLn8e3drn5tpeeGTSHxKNThk+XYPrWGpapDfRyM6Js8POgD52CdyIvePD2X3SjTUPu1tnvvxSoOekDyx1PCYWq+H2huqg8EkU4jR9iisQKzUP8XTLwVqre0uSlUqrftBo0rNtFVL0GpRe0WIXsI41A/1yS5sYBlvoSHKtVK/dKdG7TOWwK8TFLTgfprblVrdVPtgTEtSGd8Wp1GudrpBkZt2bUE5awd17qVFqK6UOIyitPWryAI9bpS7tlyqHQooaWcwipKmf7li1VukKIOZh0yX4lthIhk0QC7NqFbue1cI92PAC5bYyLFRdN1KvwNWq1JalCr2NVTotuavs1aR6tUM119x5IqkuYTQvw8UgR/mQ//IKwUjhB9NVYfUfM7XtmsJM05HaJRktuRqEVTEDK++XMLC4GGu7DuTpFlC8JwX+atBHJbb42Ofa/N/+SN8iszcuFhE/xTkf+OXouT57y2e6/SF7zip7L13K6t8XWvXxOG/VH7J3H8FWyKrXVS5j90vX7qWQ3cDycssXApYDg/auDAlu5vFPKadEdWZ+XNXue5DRzOFkxF9xztJMI8eOOS5DE+DuPqzfIyfKE2j50tWyhnKVqSIDNhYh6KxX2OVAlOvNAShgL4idM+0tBR/PXgA6BNbCaDnT0pKA8f7HCDfzkfdsdxpWgi3ecsHjj35cfQPE+6qlq4YXtVNWPGTFw43girKo0EUvv3anqBZPVPqeeN0h6qhoTXYem3rgd1jTnDjc0NaTpanCCOJdYjviFYg7+C/ryjI3EkVKLKc//+piQqH0W0/JbBwL6ojKMzuSIqr4DMSp17wMKqdQgJXAMqPupn6L37/+6uJqOQGx91XrKMlfLP8PjaXUZQ==",
+ "view": {
+ "recursion": "bytes",
+ "enum": "number",
+ "yt_mode": true
+ }
+}
+@@;
+
+$udfParseVar = Udf(Protobuf::Parse, $configVar as TypeConfig);
+$udfSerializeVar = Udf(Protobuf::Serialize, $configVar as TypeConfig);
+
+$udfParseSep = Udf(Protobuf::Parse, $configSeparate as TypeConfig);
+$udfSerializeSep = Udf(Protobuf::Serialize, $configSeparate as TypeConfig);
+
+SELECT
+ TestField,
+ $udfParseVar(TestField),
+ $udfSerializeVar($udfParseVar(TestField)),
+ Ensure("Success", StablePickle($udfParseVar(TestField)) == StablePickle($udfParseVar($udfSerializeVar($udfParseVar(TestField)))), "Fail"),
+ $udfParseSep(TestField),
+ $udfSerializeSep($udfParseSep(TestField)),
+ Ensure("Success", StablePickle($udfParseSep(TestField)) == StablePickle($udfParseSep($udfSerializeSep($udfParseSep(TestField)))), "Fail"),
+FROM plato.Input;
+
diff --git a/yql/essentials/udfs/common/protobuf/test/ya.make b/yql/essentials/udfs/common/protobuf/test/ya.make
new file mode 100644
index 00000000000..e44cb5458c9
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/protobuf)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/protobuf/ya.make b/yql/essentials/udfs/common/protobuf/ya.make
new file mode 100644
index 00000000000..714ad77137f
--- /dev/null
+++ b/yql/essentials/udfs/common/protobuf/ya.make
@@ -0,0 +1,23 @@
+YQL_UDF_CONTRIB(protobuf_udf)
+
+YQL_ABI_VERSION(
+ 2
+ 9
+ 0
+)
+
+SRCS(
+ protobuf_udf.cpp
+)
+
+PEERDIR(
+ library/cpp/protobuf/yql
+ yql/essentials/minikql/protobuf_udf
+ yql/essentials/public/udf
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ test
+)
diff --git a/yql/essentials/udfs/common/python/bindings/py27_backports.c b/yql/essentials/udfs/common/python/bindings/py27_backports.c
new file mode 100644
index 00000000000..cf21a97cef0
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py27_backports.c
@@ -0,0 +1,91 @@
+#include "py27_backports.h"
+
+
+// Provide implementations from python 2.7.15 as backports
+
+int
+_PySlice_Unpack(PyObject *_r,
+ Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step)
+{
+ PySliceObject *r = (PySliceObject *)_r;
+ /* this is harder to get right than you might think */
+
+ assert(PY_SSIZE_T_MIN + 1 <= -PY_SSIZE_T_MAX);
+
+ if (r->step == Py_None) {
+ *step = 1;
+ }
+ else {
+ if (!_PyEval_SliceIndex(r->step, step)) return -1;
+ if (*step == 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "slice step cannot be zero");
+ return -1;
+ }
+ /* Here *step might be -PY_SSIZE_T_MAX-1; in this case we replace it
+ * with -PY_SSIZE_T_MAX. This doesn't affect the semantics, and it
+ * guards against later undefined behaviour resulting from code that
+ * does "step = -step" as part of a slice reversal.
+ */
+ if (*step < -PY_SSIZE_T_MAX)
+ *step = -PY_SSIZE_T_MAX;
+ }
+
+ if (r->start == Py_None) {
+ *start = *step < 0 ? PY_SSIZE_T_MAX : 0;
+ }
+ else {
+ if (!_PyEval_SliceIndex(r->start, start)) return -1;
+ }
+
+ if (r->stop == Py_None) {
+ *stop = *step < 0 ? PY_SSIZE_T_MIN : PY_SSIZE_T_MAX;
+ }
+ else {
+ if (!_PyEval_SliceIndex(r->stop, stop)) return -1;
+ }
+
+ return 0;
+}
+
+Py_ssize_t
+_PySlice_AdjustIndices(Py_ssize_t length,
+ Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t step)
+{
+ /* this is harder to get right than you might think */
+
+ assert(step != 0);
+ assert(step >= -PY_SSIZE_T_MAX);
+
+ if (*start < 0) {
+ *start += length;
+ if (*start < 0) {
+ *start = (step < 0) ? -1 : 0;
+ }
+ }
+ else if (*start >= length) {
+ *start = (step < 0) ? length - 1 : length;
+ }
+
+ if (*stop < 0) {
+ *stop += length;
+ if (*stop < 0) {
+ *stop = (step < 0) ? -1 : 0;
+ }
+ }
+ else if (*stop >= length) {
+ *stop = (step < 0) ? length - 1 : length;
+ }
+
+ if (step < 0) {
+ if (*stop < *start) {
+ return (*start - *stop - 1) / (-step) + 1;
+ }
+ }
+ else {
+ if (*start < *stop) {
+ return (*stop - *start - 1) / step + 1;
+ }
+ }
+ return 0;
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py27_backports.h b/yql/essentials/udfs/common/python/bindings/py27_backports.h
new file mode 100644
index 00000000000..766af6a76fa
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py27_backports.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include "Python.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Declare functions which are to be backported
+// (see details about need for backports in ya.make)
+
+int _PySlice_Unpack(PyObject *slice,
+ Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step);
+
+Py_ssize_t _PySlice_AdjustIndices(Py_ssize_t length,
+ Py_ssize_t *start, Py_ssize_t *stop,
+ Py_ssize_t step);
+
+// Declare py23 compatible names
+
+#define PySlice_Unpack _PySlice_Unpack
+#define PySlice_AdjustIndices _PySlice_AdjustIndices
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/yql/essentials/udfs/common/python/bindings/py_callable.cpp b/yql/essentials/udfs/common/python/bindings/py_callable.cpp
new file mode 100644
index 00000000000..c60403bdca2
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_callable.cpp
@@ -0,0 +1,423 @@
+#include "py_callable.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+#include "py_stream.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+
+#include <util/string/builder.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+namespace {
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyCallableObject
+//////////////////////////////////////////////////////////////////////////////
+struct TPyCallableObject
+{
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ const NUdf::TType* Type;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Value;
+ NUdf::TCallableTypeInspector Inspector;
+
+ TPyCallableObject(const TPyCastContext::TPtr& castCtx, const NUdf::TType* type)
+ : CastCtx(castCtx)
+ , Type(type)
+ , Inspector(*castCtx->PyCtx->TypeInfoHelper, type)
+ {}
+};
+
+inline TPyCallableObject* CastToCallable(PyObject* o)
+{
+ return reinterpret_cast<TPyCallableObject*>(o);
+}
+
+void CallableDealloc(PyObject* self)
+{
+ delete CastToCallable(self);
+}
+
+PyObject* CallableRepr(PyObject*)
+{
+ // TODO: print callable signature
+ return PyRepr("<yql.TCallable>").Release();
+}
+
+PyObject* CallableCall(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ Y_UNUSED(kwargs);
+
+ PY_TRY {
+ TPyCallableObject* callable = CastToCallable(self);
+ auto callableType = callable->Type;
+ auto valueBuilder = callable->CastCtx->ValueBuilder;
+ const auto& inspector = callable->Inspector;
+
+ TSmallVec<NUdf::TUnboxedValue> cArgs;
+ cArgs.resize(inspector.GetArgsCount());
+ FromPyArgs(callable->CastCtx, callableType, args, cArgs.data(), inspector);
+
+ NUdf::TUnboxedValue result;
+ {
+ TPyGilUnlocker unlock;
+ result = NUdf::TBoxedValueAccessor::Run(*callable->Value.Get(), valueBuilder, cArgs.data());
+ }
+
+ return ToPyObject(callable->CastCtx, inspector.GetReturnType(), result).Release();
+ } PY_CATCH(nullptr)
+}
+
+}
+
+PyTypeObject PyCallableType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TCallable"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyCallableObject)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , CallableDealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , CallableRepr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , CallableCall),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , 0),
+ INIT_MEMBER(tp_doc , "yql.TCallable object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , nullptr),
+ INIT_MEMBER(tp_iternext , nullptr),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyCallable
+//////////////////////////////////////////////////////////////////////////////
+class TPyCallable: public NUdf::TBoxedValue
+{
+public:
+ TPyCallable(
+ PyObject* function,
+ const NUdf::TType* functionType,
+ const TPyCastContext::TPtr& castCtx)
+ : Function_(function, TPyObjectPtr::ADD_REF)
+ , FunctionType_(functionType)
+ , CastCtx_(castCtx)
+ , Inspector_(*castCtx->PyCtx->TypeInfoHelper, functionType)
+ {
+ // keep ownership of function closure if any
+ if (PyFunction_Check(function)) {
+ PyObject* closure = PyFunction_GetClosure(function);
+ if (closure) {
+ Closure_ = TPyObjectPtr(closure, TPyObjectPtr::ADD_REF);
+ }
+ }
+ }
+
+ ~TPyCallable() {
+ TPyGilLocker lock;
+ Closure_.Reset();
+ Function_.Reset();
+ CastCtx_.Reset();
+ }
+
+private:
+ NUdf::TUnboxedValue Run(
+ const NUdf::IValueBuilder*,
+ const NUdf::TUnboxedValuePod* args) const final
+ {
+ TPyGilLocker lock;
+ try {
+ TPyObjectPtr pyArgs = ToPyArgs(CastCtx_, FunctionType_, args, Inspector_);
+ TPyObjectPtr resultObj =
+ PyObject_CallObject(Function_.Get(), pyArgs.Get());
+ if (!resultObj) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data());
+ }
+
+ auto returnType = Inspector_.GetReturnType();
+ if (CastCtx_->PyCtx->TypeInfoHelper->GetTypeKind(returnType) == NUdf::ETypeKind::Stream) {
+ return FromPyStream(CastCtx_, returnType, resultObj, Function_, Closure_, pyArgs);
+ }
+
+ return FromPyObject(CastCtx_, returnType, resultObj.Get());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to cast arguments or result\n" << e.what()).data());
+ }
+ }
+
+ TPyObjectPtr Function_;
+ TPyObjectPtr Closure_;
+ const NUdf::TType* FunctionType_;
+ TPyCastContext::TPtr CastCtx_;
+ NUdf::TCallableTypeInspector Inspector_;
+};
+
+
+TPyObjectPtr ToPyCallable(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ TPyCallableObject* callable = new TPyCallableObject(castCtx, type);
+ PyObject_INIT(callable, &PyCallableType);
+
+ callable->Value.Set(castCtx->PyCtx, value.AsBoxed());
+
+ return reinterpret_cast<PyObject*>(callable);
+}
+
+NUdf::TUnboxedValue FromPyCallable(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ PyObject* value)
+{
+ return NUdf::TUnboxedValuePod(new TPyCallable(value, type, castCtx));
+}
+
+TMaybe<TPyObjectPtr> GetOptionalAttribute(PyObject* value, const char* attrName) {
+ if (TPyObjectPtr attr = PyObject_GetAttrString(value, attrName)) {
+ return attr;
+ } else {
+ if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
+ PyErr_Clear();
+ return Nothing();
+ } else {
+ throw yexception() << "Cannot get attribute '" << attrName << "', error: " << GetLastErrorAsString();
+ }
+ }
+}
+
+
+struct TPySecureParam
+{
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+
+ TPySecureParam(const TPyCastContext::TPtr& castCtx) : CastCtx(castCtx) {}
+};
+
+inline TPySecureParam* CastToSecureParam(PyObject* o)
+{
+ return reinterpret_cast<TPySecureParam*>(o);
+}
+
+void SecureParamDealloc(PyObject* self)
+{
+ delete CastToSecureParam(self);
+}
+
+PyObject* SecureParamRepr(PyObject*)
+{
+ return PyRepr("<yql.TSecureParam>").Release();
+}
+
+PyObject* SecureParamCall(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+ Y_UNUSED(kwargs);
+
+ struct PyBufDeleter {
+ void operator() (Py_buffer* view) { PyBuffer_Release(view); }
+ };
+ Py_buffer input;
+ if (!PyArg_ParseTuple(args, "s*", &input)) {
+ return nullptr;
+ }
+ std::unique_ptr<Py_buffer, PyBufDeleter> bufPtr(&input);
+ auto valueBuilder = CastToSecureParam(self)->CastCtx->ValueBuilder;
+ NUdf::TStringRef key(static_cast<const char*>(input.buf), input.len);
+ PY_TRY {
+ if (!valueBuilder->GetSecureParam(key, key)) {
+ throw yexception() << "Cannot get secure parameter for key: " << key;
+ }
+ return PyRepr(TStringBuf(key.Data(), key.Size())).Release();
+ } PY_CATCH(nullptr)
+}
+
+static PyTypeObject PySecureParamType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TSecureParam"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPySecureParam)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , SecureParamDealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , SecureParamRepr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , SecureParamCall),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , 0),
+ INIT_MEMBER(tp_doc , "yql.TSecureParam object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , nullptr),
+ INIT_MEMBER(tp_iternext , nullptr),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+TPyObjectPtr ToPySecureParam(const TPyCastContext::TPtr& castCtx)
+{
+ TPySecureParam* ret = new TPySecureParam(castCtx);
+ PyObject_INIT(ret, &PySecureParamType);
+ return reinterpret_cast<PyObject*>(ret);
+}
+
+
+void SetupCallableSettings(const TPyCastContext::TPtr& castCtx, PyObject* value) {
+ if (const auto lazyInput = GetOptionalAttribute(value, "_yql_lazy_input")) try {
+ castCtx->LazyInputObjects = PyCast<bool>(lazyInput->Get());
+ } catch (const yexception& e) {
+ throw yexception() << "Cannot parse attribute '_yql_lazy_input', error: " << e.what();
+ }
+
+ if (const auto convertYson = GetOptionalAttribute(value, "_yql_convert_yson")) try {
+ Py_ssize_t itemsCount = PyTuple_GET_SIZE(convertYson->Get());
+ if (itemsCount != 2) {
+ throw yexception() << "Expected tuple of 2 callables";
+ }
+
+ castCtx->YsonConverterIn.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 0));
+ castCtx->YsonConverterOut.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 1));
+ if (!PyCallable_Check(castCtx->YsonConverterIn.Get()) || !PyCallable_Check(castCtx->YsonConverterOut.Get())) {
+ throw yexception() << "Expected tuple of 2 callables";
+ }
+ } catch (const yexception& e) {
+ throw yexception() << "Cannot parse attribute '_yql_convert_yson', error: " << e.what();
+ }
+
+ if (const auto bytesDecodeMode = GetOptionalAttribute(value, "_yql_bytes_decode_mode")) try {
+ PyObject* bytesValue = nullptr;
+ if (PyBytes_Check(bytesDecodeMode->Get())) {
+ bytesValue = PyObject_Bytes(bytesDecodeMode->Get());
+ } else if (PyUnicode_Check(bytesDecodeMode->Get())) {
+ bytesValue = PyUnicode_AsUTF8String(bytesDecodeMode->Get());
+ } else {
+ throw yexception() << "Expected bytes or unicode";
+ }
+ if (!bytesValue) {
+ PyErr_Clear();
+ throw yexception() << "Failed to convert to bytes";
+ }
+
+ TStringBuf view(PyBytes_AS_STRING(bytesValue));
+ if (view == "never") {
+ castCtx->BytesDecodeMode = EBytesDecodeMode::Never;
+ } else if (view == "strict") {
+ castCtx->BytesDecodeMode = EBytesDecodeMode::Strict;
+ } else {
+ Py_DECREF(bytesValue);
+ throw yexception() << "Expected values 'never' or 'strict'";
+ }
+ Py_DECREF(bytesValue);
+ } catch (const yexception& e) {
+ throw yexception() << "Cannot parse attribute '_yql_bytes_decode_mode', error: " << e.what();
+ }
+
+ if (PyObject_SetAttrString(value, "_yql_secure_param", ToPySecureParam(castCtx).Get()) != 0) {
+ throw yexception() << "Cannot set attribute '_yql_secure_param'";
+ }
+}
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_callable.h b/yql/essentials/udfs/common/python/bindings/py_callable.h
new file mode 100644
index 00000000000..4ce79e1d7f4
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_callable.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+extern PyTypeObject PyCallableType;
+
+TPyObjectPtr ToPyCallable(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyCallable(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ PyObject* value);
+
+void SetupCallableSettings(const TPyCastContext::TPtr& castCtx, PyObject* value);
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp
new file mode 100644
index 00000000000..1c58d7b3714
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp
@@ -0,0 +1,87 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyCallableTest) {
+ struct TTestCallable: public NUdf::TBoxedValue {
+ NUdf::TUnboxedValue Run(
+ const NUdf::IValueBuilder* valueBuilder,
+ const NUdf::TUnboxedValuePod* args) const override
+ {
+ Y_UNUSED(valueBuilder);
+ return NUdf::TUnboxedValuePod(args[0].Get<ui32>() + 42);
+ }
+ };
+
+ Y_UNIT_TEST(FromPyFunction) {
+ TPythonTestEngine engine;
+ const NUdf::IValueBuilder* vb = &engine.GetValueBuilder();
+
+ engine.ToMiniKQL<char* (*)(char*, ui32)>(
+ "def Test():\n"
+ " def test(str, count):\n"
+ " return str * count\n"
+ " return test",
+ [vb](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ NUdf::TUnboxedValue args[2];
+ args[0] = vb->NewString("j");
+ args[1] = NUdf::TUnboxedValuePod((ui32) 5);
+ auto result = value.Run(vb, args);
+
+ UNIT_ASSERT(result);
+ UNIT_ASSERT(5 == result.AsStringRef().Size());
+ UNIT_ASSERT_STRINGS_EQUAL(result.AsStringRef(), "jjjjj");
+ });
+ }
+
+ Y_UNIT_TEST(ToPython) {
+ TPythonTestEngine engine;
+ engine.ToPython<i32 (*)(i32)>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new TTestCallable);
+ },
+ "def Test(value):\n"
+ " assert type(value).__name__ == 'TCallable'\n"
+ " assert value.__call__ != None\n"
+ " assert value(-2) == 40\n"
+ " assert value(-1) == 41\n"
+ " assert value(0) == 42\n"
+ " assert value(1) == 43\n"
+ " assert value(2) == 44\n");
+ }
+
+ Y_UNIT_TEST(ToPythonAndBack) {
+ struct TTestCallable: public NUdf::TBoxedValue {
+ NUdf::TUnboxedValue Run(
+ const NUdf::IValueBuilder* valueBuilder,
+ const NUdf::TUnboxedValuePod* args) const override
+ {
+ Y_UNUSED(valueBuilder);
+ return NUdf::TUnboxedValuePod(args[0].Get<ui32>() + 42);
+ }
+ };
+
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<i32 (*)(i32)>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new TTestCallable);
+ },
+ "def Test(value): return value",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ NUdf::TUnboxedValue arg = NUdf::TUnboxedValuePod((ui32) 5);
+ const auto result = value.Run(nullptr, &arg);
+
+ UNIT_ASSERT(result);
+ UNIT_ASSERT_VALUES_EQUAL(47, result.Get<ui32>());
+ });
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.cpp b/yql/essentials/udfs/common/python/bindings/py_cast.cpp
new file mode 100644
index 00000000000..3aa5537b21b
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_cast.cpp
@@ -0,0 +1,955 @@
+#include "py_cast.h"
+#include "py_ptr.h"
+#include "py_errors.h"
+#include "py_callable.h"
+#include "py_dict.h"
+#include "py_list.h"
+#include "py_gil.h"
+#include "py_utils.h"
+#include "py_void.h"
+#include "py_resource.h"
+#include "py_stream.h"
+#include "py_struct.h"
+#include "py_tuple.h"
+#include "py_variant.h"
+#include "py_decimal.h"
+
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_type_printer.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+#include <yql/essentials/utils/utf8.h>
+
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+
+#include <util/string/join.h>
+#include <util/string/builder.h>
+
+#ifdef HAVE_LONG_LONG
+# define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongLongMask
+# define YQL_PyLong_Asi64 PyLong_AsLongLong
+# define YQL_PyLong_Asui64 PyLong_AsUnsignedLongLong
+#else
+# define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongMask
+# define YQL_PyLong_Asi64 PyLong_AsLong
+# define YQL_PyLong_Asui64 PyLong_AsUnsignedLong
+#endif
+
+#define TO_PYTHON(Format, Type) \
+ template <> \
+ ::NPython::TPyObjectPtr PyCast<Type>(Type value) { \
+ return Py_BuildValue(Format, value); \
+ }
+
+#define TO_PYTHON_BYTES(Type) \
+ template <> \
+ ::NPython::TPyObjectPtr PyCast<Type>(const Type& val) { \
+ TStringBuf value = val; \
+ if (value.data() == nullptr) \
+ Py_RETURN_NONE; \
+ const Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \
+ return PyBytes_FromStringAndSize(value.data(), size); \
+ }
+
+#define TO_PYTHON_UNICODE(Type) \
+ template <> \
+ ::NPython::TPyObjectPtr ToPyUnicode<Type>(const Type& val) { \
+ TStringBuf value = val; \
+ if (value.data() == nullptr) \
+ Py_RETURN_NONE; \
+ Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \
+ return PyUnicode_FromStringAndSize(value.data(), size); \
+ }
+
+#define PY_ENSURE_TYPE(Type, Value, Message) \
+ do { \
+ if (!Py##Type##_Check(Value)) { \
+ throw yexception() << Message << " " #Type "; Object repr: " \
+ << PyObjectRepr(Value); \
+ } \
+ } while (0)
+
+#define FROM_PYTHON_FLOAT(Type) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ double result = PyFloat_AsDouble(value); \
+ if (result == -1.0 && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ ThrowCastException(value, "Float"); \
+ } \
+ return static_cast<Type>(result); \
+ }
+
+#define FROM_PYTHON_LONG(Type, BigType) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ if (PyLong_Check(value)) { \
+ auto result = YQL_PyLong_As##BigType(value); \
+ if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ ThrowCastException(value, "Long"); \
+ } \
+ if (result < Min<Type>() || result > Max<Type>()) { \
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for " << #Type; \
+ } \
+ return static_cast<Type>(result); \
+ } \
+ ThrowCastTypeException(value, "Long"); \
+ }
+
+#define FROM_PYTHON_INT_OR_LONG(Type, BigType) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ if (PyInt_Check(value)) { \
+ long result = PyInt_AsLong(value); \
+ if (result == -1L && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ ThrowCastException(value, "Long"); \
+ } \
+ if ( \
+ static_cast<i64>(Min<long>()) < static_cast<i64>(Min<Type>()) && result < static_cast<long>(Min<Type>()) || \
+ static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()) \
+ ) { \
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for " << #Type; \
+ } \
+ return static_cast<Type>(result); \
+ } else if (PyLong_Check(value)) { \
+ auto result = YQL_PyLong_As##BigType(value); \
+ if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ ThrowCastException(value, "Long"); \
+ } \
+ if (result < Min<Type>() || result > Max<Type>()) { \
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for " << #Type; \
+ } \
+ return static_cast<Type>(result); \
+ } \
+ ThrowCastTypeException(value, "Long"); \
+ }
+
+#define FROM_PYTHON_BYTES_OR_UTF(Type) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ if (PyUnicode_Check(value)) { \
+ Py_ssize_t size = 0U; \
+ const auto str = PyUnicode_AsUTF8AndSize(value, &size); \
+ if (!str || size < 0) { \
+ ThrowCastTypeException(value, "String"); \
+ } \
+ return Type(str, size_t(size)); \
+ } else if (PyBytes_Check(value)) { \
+ Py_ssize_t size = 0U; \
+ char *str = nullptr; \
+ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \
+ if (rc == -1 || size < 0) { \
+ ThrowCastTypeException(value, "String"); \
+ } \
+ return Type(str, size_t(size)); \
+ } \
+ ThrowCastTypeException(value, "String"); \
+ }
+
+#define FROM_PYTHON_BYTES(Type) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ PY_ENSURE_TYPE(Bytes, value, "Expected"); \
+ char* str = nullptr; \
+ Py_ssize_t size = 0; \
+ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \
+ if (rc == -1 || size < 0) { \
+ ThrowCastTypeException(value, "String"); \
+ } \
+ return Type(str, size_t(size)); \
+ }
+
+#define TRY_FROM_PYTHON_FLOAT(Type) \
+ template <> \
+ bool TryPyCast<Type>(PyObject* value, Type& result) { \
+ double v = PyFloat_AsDouble(value); \
+ if (v == -1.0 && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ return false; \
+ } \
+ result = static_cast<Type>(v); \
+ return true; \
+ }
+
+#define TRY_FROM_PYTHON_LONG(Type, BigType) \
+ template <> \
+ bool TryPyCast<Type>(PyObject* value, Type& res) { \
+ if (PyLong_Check(value)) { \
+ auto result = YQL_PyLong_As##BigType(value); \
+ if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ return false; \
+ } \
+ if (result < Min<Type>() || result > Max<Type>()) { \
+ return false; \
+ } \
+ res = static_cast<Type>(result); \
+ return true; \
+ } \
+ return false; \
+ }
+
+#define TRY_FROM_PYTHON_INT_OR_LONG(Type, BigType) \
+ template <> \
+ bool TryPyCast<Type>(PyObject* value, Type& res) { \
+ if (PyInt_Check(value)) { \
+ long result = PyInt_AsLong(value); \
+ if (result == -1L && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ return false; \
+ } \
+ res = static_cast<Type>(result); \
+ if (result < static_cast<long>(Min<Type>()) || (static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()))) { \
+ return false; \
+ } \
+ return true; \
+ } else if (PyLong_Check(value)) { \
+ auto result = YQL_PyLong_As##BigType(value); \
+ if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ return false; \
+ } \
+ if (result < Min<Type>() || result > Max<Type>()) { \
+ return false; \
+ } \
+ res = static_cast<Type>(result); \
+ return true; \
+ } \
+ return false; \
+ }
+
+#define TRY_FROM_PYTHON_BYTES_OR_UTF(Type) \
+ template <> \
+ bool TryPyCast(PyObject* value, Type& result) { \
+ if (PyUnicode_Check(value)) { \
+ Py_ssize_t size = 0U; \
+ const auto str = PyUnicode_AsUTF8AndSize(value, &size); \
+ if (!str || size < 0) { \
+ return false; \
+ } \
+ result = Type(str, size_t(size)); \
+ return true; \
+ } else if (PyBytes_Check(value)) { \
+ Py_ssize_t size = 0U; \
+ char *str = nullptr; \
+ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \
+ if (rc == -1 || size < 0) { \
+ ThrowCastTypeException(value, "String"); \
+ } \
+ result = Type(str, size_t(size)); \
+ return true; \
+ } \
+ return false; \
+ }
+
+#define TRY_FROM_PYTHON_STR_OR_UTF(Type) \
+ template <> \
+ bool TryPyCast(PyObject* value, Type& result) { \
+ if (PyUnicode_Check(value)) { \
+ const TPyObjectPtr utf8(PyUnicode_AsUTF8String(value)); \
+ char* str = nullptr; \
+ Py_ssize_t size = 0; \
+ int rc = PyBytes_AsStringAndSize(utf8.Get(), &str, &size); \
+ if (rc == -1 || size < 0) { \
+ return false; \
+ } \
+ result = Type(str, size_t(size)); \
+ return true; \
+ } else if (PyBytes_Check(value)) { \
+ char* str = nullptr; \
+ Py_ssize_t size = 0; \
+ int rc = PyBytes_AsStringAndSize(value, &str, &size); \
+ if (rc == -1 || size < 0) { \
+ return false; \
+ } \
+ result = Type(str, size_t(size)); \
+ return true; \
+ } else { \
+ return false; \
+ } \
+ }
+
+namespace NPython {
+
+using namespace NKikimr;
+
+inline void ThrowCastTypeException(PyObject* value, TStringBuf toType) {
+ throw yexception() << "Can't cast object '" << Py_TYPE(value)->tp_name << "' to " << toType
+ << "; Object repr: " << PyObjectRepr(value);
+}
+
+inline void ThrowCastException(PyObject* value, TStringBuf toType) {
+ throw yexception() << "Cast error object " << PyObjectRepr(value) << " to " << toType << ": "
+ << GetLastErrorAsString();
+}
+
+
+template <>
+bool TryPyCast<bool>(PyObject* value, bool& result)
+{
+ int isTrue = PyObject_IsTrue(value);
+ if (isTrue == -1) {
+ return false;
+ }
+ result = (isTrue == 1);
+ return true;
+}
+
+#if PY_MAJOR_VERSION >= 3
+TRY_FROM_PYTHON_LONG(i8, i64)
+TRY_FROM_PYTHON_LONG(ui8, ui64)
+TRY_FROM_PYTHON_LONG(i16, i64)
+TRY_FROM_PYTHON_LONG(ui16, ui64)
+TRY_FROM_PYTHON_LONG(i32, i64)
+TRY_FROM_PYTHON_LONG(ui32, ui64)
+TRY_FROM_PYTHON_LONG(i64, i64)
+TRY_FROM_PYTHON_LONG(ui64, ui64)
+TRY_FROM_PYTHON_BYTES_OR_UTF(TString)
+TRY_FROM_PYTHON_BYTES_OR_UTF(NUdf::TStringRef)
+#else
+TRY_FROM_PYTHON_INT_OR_LONG(i8, i64)
+TRY_FROM_PYTHON_INT_OR_LONG(ui8, ui64)
+TRY_FROM_PYTHON_INT_OR_LONG(i16, i64)
+TRY_FROM_PYTHON_INT_OR_LONG(ui16, ui64)
+TRY_FROM_PYTHON_INT_OR_LONG(i32, i64)
+TRY_FROM_PYTHON_INT_OR_LONG(ui32, ui64)
+TRY_FROM_PYTHON_INT_OR_LONG(i64, i64)
+TRY_FROM_PYTHON_INT_OR_LONG(ui64, ui64)
+TRY_FROM_PYTHON_STR_OR_UTF(TString)
+TRY_FROM_PYTHON_STR_OR_UTF(NUdf::TStringRef)
+#endif
+
+TRY_FROM_PYTHON_FLOAT(float)
+TRY_FROM_PYTHON_FLOAT(double)
+
+template <>
+bool PyCast<bool>(PyObject* value)
+{
+ int res = PyObject_IsTrue(value);
+ if (res == -1) {
+ throw yexception() << "Can't cast object '" << Py_TYPE(value)->tp_name << "' to bool. "
+ << GetLastErrorAsString();
+ }
+ return res == 1;
+}
+
+#if PY_MAJOR_VERSION >= 3
+FROM_PYTHON_LONG(i8, i64)
+FROM_PYTHON_LONG(ui8, ui64)
+FROM_PYTHON_LONG(i16, i64)
+FROM_PYTHON_LONG(ui16, ui64)
+FROM_PYTHON_LONG(i32, i64)
+FROM_PYTHON_LONG(ui32, ui64)
+FROM_PYTHON_LONG(i64, i64)
+FROM_PYTHON_LONG(ui64, ui64)
+FROM_PYTHON_BYTES_OR_UTF(TString)
+FROM_PYTHON_BYTES_OR_UTF(TStringBuf)
+FROM_PYTHON_BYTES_OR_UTF(NUdf::TStringRef)
+#else
+FROM_PYTHON_INT_OR_LONG(i8, i64)
+FROM_PYTHON_INT_OR_LONG(ui8, ui64)
+FROM_PYTHON_INT_OR_LONG(i16, i64)
+FROM_PYTHON_INT_OR_LONG(ui16, ui64)
+FROM_PYTHON_INT_OR_LONG(i32, i64)
+FROM_PYTHON_INT_OR_LONG(ui32, ui64)
+FROM_PYTHON_INT_OR_LONG(i64, i64)
+FROM_PYTHON_INT_OR_LONG(ui64, ui64)
+FROM_PYTHON_BYTES(TString)
+FROM_PYTHON_BYTES(TStringBuf)
+FROM_PYTHON_BYTES(NUdf::TStringRef)
+#endif
+
+FROM_PYTHON_FLOAT(float)
+FROM_PYTHON_FLOAT(double)
+
+template <>
+TPyObjectPtr PyCast<bool>(bool value)
+{
+ PyObject* res = value ? Py_True : Py_False;
+ return TPyObjectPtr(res, TPyObjectPtr::ADD_REF);
+}
+
+TO_PYTHON("b", i8)
+TO_PYTHON("B", ui8)
+TO_PYTHON("h", i16)
+TO_PYTHON("H", ui16)
+TO_PYTHON("i", i32)
+TO_PYTHON("I", ui32)
+#ifdef HAVE_LONG_LONG
+TO_PYTHON("L", i64)
+TO_PYTHON("K", ui64)
+#else
+TO_PYTHON("l", i64)
+TO_PYTHON("k", ui64)
+#endif
+
+TO_PYTHON_BYTES(TString)
+TO_PYTHON_BYTES(TStringBuf)
+TO_PYTHON_BYTES(NUdf::TStringRef)
+TO_PYTHON_UNICODE(TString)
+TO_PYTHON_UNICODE(TStringBuf)
+TO_PYTHON_UNICODE(NUdf::TStringRef)
+
+template <typename T>
+NUdf::TUnboxedValuePod FromPyTz(PyObject* value, T limit, TStringBuf typeName, const TPyCastContext::TPtr& ctx) {
+ PY_ENSURE(PyTuple_Check(value),
+ "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name);
+
+ Py_ssize_t tupleSize = PyTuple_GET_SIZE(value);
+ PY_ENSURE(tupleSize == 2,
+ "Expected to get Tuple with 2 elements, but got "
+ << tupleSize << " elements");
+
+ PyObject* el0 = PyTuple_GET_ITEM(value, 0);
+ PyObject* el1 = PyTuple_GET_ITEM(value, 1);
+ auto num = PyCast<T>(el0);
+ if (num >= limit) {
+ throw yexception() << "Python object " << PyObjectRepr(el0) \
+ << " is out of range for " << typeName;
+ }
+
+ auto name = PyCast<NUdf::TStringRef>(el1);
+ auto ret = NUdf::TUnboxedValuePod(num);
+ ui32 tzId;
+ if (!ctx->ValueBuilder->GetDateBuilder().FindTimezoneId(name, tzId)) {
+ throw yexception() << "Unknown timezone: " << TStringBuf(name);
+ }
+
+ ret.SetTimezoneId(tzId);
+ return ret;
+}
+
+TO_PYTHON("f", float)
+TO_PYTHON("d", double)
+
+namespace {
+
+TPyObjectPtr ToPyData(const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, const NUdf::TUnboxedValuePod& value)
+{
+ const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto typeId = inspector.GetTypeId();
+
+ switch (typeId) {
+ case NUdf::TDataType<i8>::Id: return PyCast<i8>(value.Get<i8>());
+ case NUdf::TDataType<ui8>::Id: return PyCast<ui8>(value.Get<ui8>());
+ case NUdf::TDataType<i16>::Id: return PyCast<i16>(value.Get<i16>());
+ case NUdf::TDataType<ui16>::Id: return PyCast<ui16>(value.Get<ui16>());
+ case NUdf::TDataType<i32>::Id: return PyCast<i32>(value.Get<i32>());
+ case NUdf::TDataType<ui32>::Id: return PyCast<ui32>(value.Get<ui32>());
+ case NUdf::TDataType<i64>::Id: return PyCast<i64>(value.Get<i64>());
+ case NUdf::TDataType<ui64>::Id: return PyCast<ui64>(value.Get<ui64>());
+ case NUdf::TDataType<bool>::Id: return PyCast<bool>(value.Get<bool>());
+ case NUdf::TDataType<float>::Id: return PyCast<float>(value.Get<float>());
+ case NUdf::TDataType<double>::Id: return PyCast<double>(value.Get<double>());
+ case NUdf::TDataType<NUdf::TDecimal>::Id: return ToPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale());
+ case NUdf::TDataType<const char*>::Id: {
+ if (ctx->BytesDecodeMode == EBytesDecodeMode::Never) {
+ return PyCast<NUdf::TStringRef>(value.AsStringRef());
+ } else {
+ auto pyObj = ToPyUnicode<NUdf::TStringRef>(value.AsStringRef());
+ if (!pyObj) {
+ UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos <<
+ "Failed to convert to unicode with _yql_bytes_decode_mode='strict':\n" <<
+ GetLastErrorAsString()).data()
+ );
+ }
+ return pyObj;
+ }
+ }
+ case NUdf::TDataType<NUdf::TYson>::Id: {
+ auto pyObj = PyCast<NUdf::TStringRef>(value.AsStringRef());
+ if (ctx->YsonConverterIn) {
+ TPyObjectPtr pyArgs(PyTuple_New(1));
+ PyTuple_SET_ITEM(pyArgs.Get(), 0, pyObj.Release());
+ pyObj = PyObject_CallObject(ctx->YsonConverterIn.Get(), pyArgs.Get());
+ if (!pyObj) {
+ UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data());
+ }
+ }
+
+ return pyObj;
+ }
+ case NUdf::TDataType<NUdf::TUuid>::Id:
+ return PyCast<NUdf::TStringRef>(value.AsStringRef());
+ case NUdf::TDataType<NUdf::TJson>::Id:
+ case NUdf::TDataType<NUdf::TUtf8>::Id:
+ return ToPyUnicode<NUdf::TStringRef>(value.AsStringRef());
+ case NUdf::TDataType<NUdf::TDate>::Id: return PyCast<ui16>(value.Get<ui16>());
+ case NUdf::TDataType<NUdf::TDatetime>::Id: return PyCast<ui32>(value.Get<ui32>());
+ case NUdf::TDataType<NUdf::TTimestamp>::Id: return PyCast<ui64>(value.Get<ui64>());
+ case NUdf::TDataType<NUdf::TInterval>::Id: return PyCast<i64>(value.Get<i64>());
+ case NUdf::TDataType<NUdf::TTzDate>::Id: {
+ TPyObjectPtr pyValue = PyCast<ui16>(value.Get<ui16>());
+ auto tzId = value.GetTimezoneId();
+ auto tzName = ctx->GetTimezoneName(tzId);
+ return PyTuple_Pack(2, pyValue.Get(), tzName.Get());
+ }
+ case NUdf::TDataType<NUdf::TTzDatetime>::Id: {
+ TPyObjectPtr pyValue = PyCast<ui32>(value.Get<ui32>());
+ auto tzId = value.GetTimezoneId();
+ auto tzName = ctx->GetTimezoneName(tzId);
+ return PyTuple_Pack(2, pyValue.Get(), tzName.Get());
+ }
+ case NUdf::TDataType<NUdf::TTzTimestamp>::Id: {
+ TPyObjectPtr pyValue = PyCast<ui64>(value.Get<ui64>());
+ auto tzId = value.GetTimezoneId();
+ auto tzName = ctx->GetTimezoneName(tzId);
+ return PyTuple_Pack(2, pyValue.Get(), tzName.Get());
+ }
+ }
+
+ throw yexception()
+ << "Unsupported type " << typeId;
+}
+
+NUdf::TUnboxedValue FromPyData(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto typeId = inspector.GetTypeId();
+
+ switch (typeId) {
+ case NUdf::TDataType<i8>::Id: return NUdf::TUnboxedValuePod(PyCast<i8>(value));
+ case NUdf::TDataType<ui8>::Id: return NUdf::TUnboxedValuePod(PyCast<ui8>(value));
+ case NUdf::TDataType<i16>::Id: return NUdf::TUnboxedValuePod(PyCast<i16>(value));
+ case NUdf::TDataType<ui16>::Id: return NUdf::TUnboxedValuePod(PyCast<ui16>(value));
+ case NUdf::TDataType<i32>::Id: return NUdf::TUnboxedValuePod(PyCast<i32>(value));
+ case NUdf::TDataType<ui32>::Id: return NUdf::TUnboxedValuePod(PyCast<ui32>(value));
+ case NUdf::TDataType<i64>::Id: return NUdf::TUnboxedValuePod(PyCast<i64>(value));
+ case NUdf::TDataType<ui64>::Id: return NUdf::TUnboxedValuePod(PyCast<ui64>(value));
+ case NUdf::TDataType<bool>::Id: return NUdf::TUnboxedValuePod(PyCast<bool>(value));
+ case NUdf::TDataType<float>::Id: return NUdf::TUnboxedValuePod(PyCast<float>(value));
+ case NUdf::TDataType<double>::Id: return NUdf::TUnboxedValuePod(PyCast<double>(value));
+ case NUdf::TDataType<NUdf::TDecimal>::Id: return FromPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale());
+ case NUdf::TDataType<NUdf::TYson>::Id: {
+ if (ctx->YsonConverterOut) {
+ TPyObjectPtr input(value, TPyObjectPtr::ADD_REF);
+ TPyObjectPtr pyArgs(PyTuple_New(1));
+ // PyTuple_SET_ITEM steals reference, so pass ownership to it
+ PyTuple_SET_ITEM(pyArgs.Get(), 0, input.Release());
+ input.ResetSteal(PyObject_CallObject(ctx->YsonConverterOut.Get(), pyArgs.Get()));
+ if (!input) {
+ UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data());
+ }
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(input.Get()));
+ }
+ }
+#if PY_MAJOR_VERSION >= 3
+ case NUdf::TDataType<const char*>::Id:
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value));
+ case NUdf::TDataType<NUdf::TUtf8>::Id:
+ case NUdf::TDataType<NUdf::TJson>::Id:
+ if (PyUnicode_Check(value)) {
+ const TPyObjectPtr uif8(PyUnicode_AsUTF8String(value));
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(uif8.Get()));
+ }
+ throw yexception() << "Python object " << PyObjectRepr(value) << " has invalid value for unicode";
+#else
+ case NUdf::TDataType<const char*>::Id:
+ case NUdf::TDataType<NUdf::TJson>::Id:
+ case NUdf::TDataType<NUdf::TUtf8>::Id: {
+ if (PyUnicode_Check(value)) {
+ const TPyObjectPtr utf8(PyUnicode_AsUTF8String(value));
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(utf8.Get()));
+ }
+
+ if ((typeId == NUdf::TDataType<NUdf::TUtf8>::Id || typeId == NUdf::TDataType<NUdf::TJson>::Id) &&
+ PyBytes_Check(value) && !NYql::IsUtf8(std::string_view(PyBytes_AS_STRING(value), static_cast<size_t>(PyBytes_GET_SIZE(value))))) {
+ throw yexception() << "Python string " << PyObjectRepr(value) << " is invalid for Utf8/Json";
+ }
+
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value));
+ }
+#endif
+ case NUdf::TDataType<NUdf::TUuid>::Id: {
+ const auto& ret = ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value));
+ if (ret.AsStringRef().Size() != 16) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " has invalid value for Uuid";
+ }
+
+ return ret;
+ }
+ case NUdf::TDataType<NUdf::TDate>::Id: {
+ auto num = PyCast<ui16>(value);
+ if (num >= NUdf::MAX_DATE) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for Date";
+ }
+
+ return NUdf::TUnboxedValuePod(num);
+ }
+
+ case NUdf::TDataType<NUdf::TDatetime>::Id: {
+ auto num = PyCast<ui32>(value);
+ if (num >= NUdf::MAX_DATETIME) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for Datetime";
+ }
+
+ return NUdf::TUnboxedValuePod(num);
+ }
+
+ case NUdf::TDataType<NUdf::TTimestamp>::Id: {
+ auto num = PyCast<ui64>(value);
+ if (num >= NUdf::MAX_TIMESTAMP) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for Timestamp";
+ }
+
+ return NUdf::TUnboxedValuePod(num);
+ }
+
+ case NUdf::TDataType<NUdf::TInterval>::Id: {
+ auto num = PyCast<i64>(value);
+ if (num <= -(i64)NUdf::MAX_TIMESTAMP || num >= (i64)NUdf::MAX_TIMESTAMP) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for Interval";
+ }
+
+ return NUdf::TUnboxedValuePod(num);
+ }
+
+ case NUdf::TDataType<NUdf::TTzDate>::Id:
+ return FromPyTz<ui16>(value, NUdf::MAX_DATE, TStringBuf("TzDate"), ctx);
+ case NUdf::TDataType<NUdf::TTzDatetime>::Id:
+ return FromPyTz<ui32>(value, NUdf::MAX_DATETIME, TStringBuf("TzDatetime"), ctx);
+ case NUdf::TDataType<NUdf::TTzTimestamp>::Id:
+ return FromPyTz<ui64>(value, NUdf::MAX_TIMESTAMP, TStringBuf("TzTimestamp"), ctx);
+ }
+
+ throw yexception()
+ << "Unsupported type " << typeId;
+}
+
+TPyObjectPtr ToPyList(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ const NUdf::TListTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto itemType = inspector.GetItemType();
+
+ if (ctx->LazyInputObjects) {
+ return ToPyLazyList(ctx, itemType, value);
+ }
+
+ TPyObjectPtr list(PyList_New(0));
+ const auto iterator = value.GetListIterator();
+ for (NUdf::TUnboxedValue item; iterator.Next(item);) {
+ auto pyItem = ToPyObject(ctx, itemType, item);
+ if (PyList_Append(list.Get(), pyItem.Get()) < 0) {
+ throw yexception() << "Can't append item to list"
+ << GetLastErrorAsString();
+ }
+ }
+
+ return list;
+}
+
+NUdf::TUnboxedValue FromPyList(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ const NUdf::TListTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+
+ if (PyList_Check(value)) {
+ // eager list to list conversion
+ auto itemType = inspector.GetItemType();
+ Py_ssize_t cnt = PyList_GET_SIZE(value);
+ NUdf::TUnboxedValue *items = nullptr;
+ const auto list = ctx->ValueBuilder->NewArray(cnt, items);
+ for (Py_ssize_t i = 0; i < cnt; ++i) {
+ PyObject *item = PyList_GET_ITEM(value, i);
+ *items++ = FromPyObject(ctx, itemType, item);
+ }
+ return list;
+ }
+
+ if (PyTuple_Check(value)) {
+ // eager tuple to list conversion
+ auto itemType = inspector.GetItemType();
+ Py_ssize_t cnt = PyTuple_GET_SIZE(value);
+ NUdf::TUnboxedValue *items = nullptr;
+ const auto list = ctx->ValueBuilder->NewArray(cnt, items);
+ for (Py_ssize_t i = 0; i < cnt; ++i) {
+ PyObject *item = PyTuple_GET_ITEM(value, i);
+ *items++ = FromPyObject(ctx, itemType, item);
+ }
+ return list;
+ }
+
+ if (PyGen_Check(value)) {
+ TPyObjectPtr valuePtr(PyObject_GetIter(value));
+ return FromPyLazyIterator(ctx, type, std::move(valuePtr));
+ }
+
+ if (PyIter_Check(value)
+#if PY_MAJOR_VERSION < 3
+ // python 2 iterators must also implement "next" method
+ && 1 == PyObject_HasAttrString(value, "next")
+#endif
+ ) {
+ TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF);
+ return FromPyLazyIterator(ctx, type, std::move(valuePtr));
+ }
+
+ // assume that this function will returns generator
+ if (PyCallable_Check(value)) {
+ TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF);
+ return FromPyLazyGenerator(ctx, type, std::move(valuePtr));
+ }
+
+ if (PySequence_Check(value) || PyObject_HasAttrString(value, "__iter__")) {
+ TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF);
+ return FromPyLazyIterable(ctx, type, std::move(valuePtr));
+ }
+
+ throw yexception() << "Expected list, tuple, generator, generator factory, "
+ "iterator or iterable object, but got: " << PyObjectRepr(value);
+}
+
+TPyObjectPtr ToPyOptional(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ if (!value) {
+ return TPyObjectPtr(Py_None, TPyObjectPtr::ADD_REF);
+ }
+
+ const NUdf::TOptionalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ return ToPyObject(ctx, inspector.GetItemType(), value);
+}
+
+NUdf::TUnboxedValue FromPyOptional(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ if (value == Py_None) {
+ return NUdf::TUnboxedValue();
+ }
+
+ const NUdf::TOptionalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ return FromPyObject(ctx, inspector.GetItemType(), value).Release().MakeOptional();
+}
+
+TPyObjectPtr ToPyDict(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ const NUdf::TDictTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto keyType = inspector.GetKeyType();
+ const auto valueType = inspector.GetValueType();
+
+ if (NUdf::ETypeKind::Void == ctx->PyCtx->TypeInfoHelper->GetTypeKind(valueType)) {
+ if (ctx->LazyInputObjects) { // TODO
+ return ToPyLazySet(ctx, keyType, value);
+ }
+
+ const TPyObjectPtr set(PyFrozenSet_New(nullptr));
+ const auto iterator = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; iterator.Next(key);) {
+ auto pyKey = ToPyObject(ctx, keyType, key);
+ if (PySet_Add(set.Get(), pyKey.Get()) < 0) {
+ throw yexception() << "Can't add item to set" << GetLastErrorAsString();
+ }
+ }
+
+ return set;
+ } else {
+ if (ctx->LazyInputObjects) {
+ return ToPyLazyDict(ctx, keyType, valueType, value);
+ }
+
+ const TPyObjectPtr dict(PyDict_New());
+ const auto iterator = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, valueObj; iterator.NextPair(key, valueObj);) {
+ auto pyKey = ToPyObject(ctx, keyType, key);
+ auto pyValue = ToPyObject(ctx, valueType, valueObj);
+ if (PyDict_SetItem(dict.Get(), pyKey.Get(), pyValue.Get()) < 0) {
+ throw yexception() << "Can't add item to dict" << GetLastErrorAsString();
+ }
+ }
+
+ return dict;
+ }
+}
+
+NUdf::TUnboxedValue FromPyDict(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ const NUdf::TDictTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto keyType = inspector.GetKeyType();
+ const auto valueType = inspector.GetValueType();
+
+ if ((PyList_Check(value) || PyTuple_Check(value) || value->ob_type == &PyThinListType || value->ob_type == &PyLazyListType)
+ && ctx->PyCtx->TypeInfoHelper->GetTypeKind(keyType) == NUdf::ETypeKind::Data) {
+ const NUdf::TDataTypeInspector keiIns(*ctx->PyCtx->TypeInfoHelper, keyType);
+ if (NUdf::GetDataTypeInfo(NUdf::GetDataSlot(keiIns.GetTypeId())).Features & NUdf::EDataTypeFeatures::IntegralType) {
+ return FromPySequence(ctx, valueType, keiIns.GetTypeId(), value);
+ }
+ } else if (NUdf::ETypeKind::Void == ctx->PyCtx->TypeInfoHelper->GetTypeKind(valueType)) {
+ if (PyAnySet_Check(value)) {
+ return FromPySet(ctx, keyType, value);
+ } else if (value->ob_type->tp_as_sequence && value->ob_type->tp_as_sequence->sq_contains) {
+ return FromPySequence(ctx, keyType, value);
+ }
+ } else if (PyDict_Check(value)) {
+ return FromPyDict(ctx, keyType, valueType, value);
+ } else if (PyMapping_Check(value)) {
+ return FromPyMapping(ctx, keyType, valueType, value);
+ }
+
+ throw yexception() << "Can't cast "<< PyObjectRepr(value) << " to dict.";
+}
+
+} // namespace
+
+TPyObjectPtr ToPyObject(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, const NUdf::TUnboxedValuePod& value)
+{
+ switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) {
+ case NUdf::ETypeKind::Data: return ToPyData(ctx, type, value);
+ case NUdf::ETypeKind::Tuple: return ToPyTuple(ctx, type, value);
+ case NUdf::ETypeKind::Struct: return ToPyStruct(ctx, type, value);
+ case NUdf::ETypeKind::List: return ToPyList(ctx, type, value);
+ case NUdf::ETypeKind::Optional: return ToPyOptional(ctx, type, value);
+ case NUdf::ETypeKind::Dict: return ToPyDict(ctx, type, value);
+ case NUdf::ETypeKind::Callable: return ToPyCallable(ctx, type, value);
+ case NUdf::ETypeKind::Resource: return ToPyResource(ctx, type, value);
+ case NUdf::ETypeKind::Void: return ToPyVoid(ctx, type, value);
+ case NUdf::ETypeKind::Stream: return ToPyStream(ctx, type, value);
+ case NUdf::ETypeKind::Variant: return ToPyVariant(ctx, type, value);
+ default: {
+ ::TStringBuilder sb;
+ sb << "Failed to export: ";
+ NUdf::TTypePrinter(*ctx->PyCtx->TypeInfoHelper, type).Out(sb.Out);
+ throw yexception() << sb;
+ }
+ }
+}
+
+NUdf::TUnboxedValue FromPyObject(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) {
+ case NUdf::ETypeKind::Data: return FromPyData(ctx, type, value);
+ case NUdf::ETypeKind::Tuple: return FromPyTuple(ctx, type, value);
+ case NUdf::ETypeKind::Struct: return FromPyStruct(ctx, type, value);
+ case NUdf::ETypeKind::List: return FromPyList(ctx, type, value);
+ case NUdf::ETypeKind::Optional: return FromPyOptional(ctx, type, value);
+ case NUdf::ETypeKind::Dict: return FromPyDict(ctx, type, value);
+ case NUdf::ETypeKind::Callable: return FromPyCallable(ctx, type, value);
+ case NUdf::ETypeKind::Resource: return FromPyResource(ctx, type, value);
+ case NUdf::ETypeKind::Void: return FromPyVoid(ctx, type, value);
+ case NUdf::ETypeKind::Stream: return FromPyStream(ctx, type, TPyObjectPtr(value, TPyObjectPtr::ADD_REF), nullptr, nullptr, nullptr);
+ case NUdf::ETypeKind::Variant: return FromPyVariant(ctx, type, value);
+ default: {
+ ::TStringBuilder sb;
+ sb << "Failed to import: ";
+ NUdf::TTypePrinter(*ctx->PyCtx->TypeInfoHelper, type).Out(sb.Out);
+ throw yexception() << sb;
+ }
+ }
+}
+
+TPyObjectPtr ToPyArgs(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod* args,
+ const NUdf::TCallableTypeInspector& inspector)
+{
+ const auto argsCount = inspector.GetArgsCount();
+ TPyObjectPtr tuple(PyTuple_New(argsCount));
+
+ for (ui32 i = 0; i < argsCount; i++) {
+ auto arg = ToPyObject(ctx, inspector.GetArgType(i), args[i]);
+ PyTuple_SET_ITEM(tuple.Get(), i, arg.Release());
+ }
+
+ return tuple;
+}
+
+void FromPyArgs(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ PyObject* pyArgs,
+ NUdf::TUnboxedValue* cArgs,
+ const NUdf::TCallableTypeInspector& inspector)
+{
+ PY_ENSURE_TYPE(Tuple, pyArgs, "Expected");
+
+ const auto argsCount = inspector.GetArgsCount();
+ const auto optArgsCount = inspector.GetOptionalArgsCount();
+
+ ui32 pyArgsCount = static_cast<ui32>(PyTuple_GET_SIZE(pyArgs));
+ PY_ENSURE(argsCount - optArgsCount <= pyArgsCount && pyArgsCount <= argsCount,
+ "arguments count missmatch: "
+ "min " << (argsCount - optArgsCount) << ", max " << argsCount
+ << ", got " << pyArgsCount);
+
+ for (ui32 i = 0; i < pyArgsCount; i++) {
+ PyObject* item = PyTuple_GET_ITEM(pyArgs, i);
+ cArgs[i] = FromPyObject(ctx, inspector.GetArgType(i), item);
+ }
+
+ for (ui32 i = pyArgsCount; i < argsCount; i++) {
+ cArgs[i] = NUdf::TUnboxedValuePod();
+ }
+}
+
+class TDummyMemoryLock : public IMemoryLock {
+public:
+ void Acquire() override {}
+ void Release() override {}
+};
+
+TPyCastContext::TPyCastContext(
+ const NKikimr::NUdf::IValueBuilder* builder,
+ TPyContext::TPtr pyCtx,
+ THolder<IMemoryLock> memoryLock)
+ : ValueBuilder(builder)
+ , PyCtx(std::move(pyCtx))
+ , MemoryLock(std::move(memoryLock))
+{
+ if (!MemoryLock) {
+ MemoryLock = MakeHolder<TDummyMemoryLock>();
+ }
+}
+
+TPyCastContext::~TPyCastContext() {
+ TPyGilLocker locker;
+ StructTypes.clear();
+ YsonConverterIn.Reset();
+ YsonConverterOut.Reset();
+ TimezoneNames.clear();
+}
+
+const TPyObjectPtr& TPyCastContext::GetTimezoneName(ui32 id) {
+ auto& x = TimezoneNames[id];
+ if (!x) {
+ NKikimr::NUdf::TStringRef ref;
+ if (!ValueBuilder->GetDateBuilder().FindTimezoneName(id, ref)) {
+ throw yexception() << "Unknown timezone id: " << id;
+ }
+
+ x = PyRepr(ref);
+ }
+
+ return x;
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.h b/yql/essentials/udfs/common/python/bindings/py_cast.h
new file mode 100644
index 00000000000..e6850c74040
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_cast.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+#include <util/generic/typetraits.h>
+
+namespace NPython {
+
+template <typename T>
+TPyObjectPtr PyCast(typename TTypeTraits<T>::TFuncParam value);
+
+template <typename T>
+T PyCast(PyObject* value);
+
+template <typename T>
+bool TryPyCast(PyObject* value, T& result);
+
+template <typename T>
+TPyObjectPtr ToPyUnicode(const T& value);
+
+TPyObjectPtr ToPyObject(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyObject(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ PyObject* value);
+
+TPyObjectPtr ToPyArgs(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod* args,
+ const NKikimr::NUdf::TCallableTypeInspector& inspector);
+
+void FromPyArgs(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ PyObject* pyArgs,
+ NKikimr::NUdf::TUnboxedValue* cArgs,
+ const NKikimr::NUdf::TCallableTypeInspector& inspector);
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp
new file mode 100644
index 00000000000..47f65ab6fab
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp
@@ -0,0 +1,90 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyCastTest) {
+ Y_UNIT_TEST(FromPyStrToInt) {
+ TPythonTestEngine engine;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ engine.ToMiniKQL<i32>(
+ "def Test():\n"
+ " return '123a'",
+ [](const NUdf::TUnboxedValuePod& value) {
+ Y_UNUSED(value);
+ }),
+ yexception, "str");
+ }
+
+ Y_UNIT_TEST(FromPyTupleToLong) {
+ TPythonTestEngine engine;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ engine.ToMiniKQL<ui64>(
+ "def Test():\n"
+ " return 1, 1",
+ [](const NUdf::TUnboxedValuePod& value) {
+ Y_UNUSED(value);
+ }),
+ yexception, "tuple");
+ }
+
+ Y_UNIT_TEST(FromPyFuncToString) {
+ TPythonTestEngine engine;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ engine.ToMiniKQL<char*>(
+ "def f():\n"
+ " return 42\n"
+ "def Test():\n"
+ " return f",
+ [](const NUdf::TUnboxedValuePod& value) {
+ Y_UNUSED(value);
+ }),
+ yexception, "function");
+ }
+
+ Y_UNIT_TEST(FromPyNoneToString) {
+ TPythonTestEngine engine;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ engine.ToMiniKQL<char*>(
+ "def Test():\n"
+ " return None",
+ [](const NUdf::TUnboxedValuePod& value) {
+ Y_UNUSED(value);
+ }),
+ yexception, "None");
+ }
+
+ Y_UNIT_TEST(BadFromPythonFloat) {
+ TPythonTestEngine engine;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ engine.ToMiniKQL<float>(
+ "def Test():\n"
+ " return '3 <dot> 1415926'",
+ [](const NUdf::TUnboxedValuePod& value) {
+ Y_UNUSED(value);
+ Y_UNREACHABLE();
+ }),
+ yexception, "Cast error object '3 <dot> 1415926' to Float");
+ }
+
+#if PY_MAJOR_VERSION >= 3
+# define RETVAL "-1"
+#else
+# define RETVAL "-18446744073709551616L"
+#endif
+
+ Y_UNIT_TEST(BadFromPythonLong) {
+ TPythonTestEngine engine;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ engine.ToMiniKQL<ui64>(
+ "def Test():\n"
+ " return " RETVAL,
+ [](const NUdf::TUnboxedValuePod& value) {
+ Y_UNUSED(value);
+ Y_UNREACHABLE();
+ }),
+ yexception, "Cast error object " RETVAL " to Long");
+ }
+
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_ctx.h b/yql/essentials/udfs/common/python/bindings/py_ctx.h
new file mode 100644
index 00000000000..9e86042908f
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_ctx.h
@@ -0,0 +1,120 @@
+#pragma once
+
+#include "py_ptr.h"
+
+#include <yql/essentials/public/udf/udf_types.h>
+#include <yql/essentials/public/udf/udf_type_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_string.h>
+
+#include <util/generic/ptr.h>
+#include <util/generic/intrlist.h>
+
+#include <unordered_map>
+
+namespace NPython {
+
+enum class EBytesDecodeMode {
+ Never,
+ Strict,
+};
+
+class IMemoryLock {
+public:
+ virtual ~IMemoryLock() = default;
+ virtual void Acquire() = 0;
+ virtual void Release() = 0;
+};
+
+struct TPyCleanupListItemBase: public TIntrusiveListItem<TPyCleanupListItemBase> {
+ virtual ~TPyCleanupListItemBase() = default;
+ virtual void Cleanup() = 0;
+};
+
+template <typename TValueType>
+class TPyCleanupListItem: public TPyCleanupListItemBase {
+public:
+ TPyCleanupListItem() = default;
+ virtual ~TPyCleanupListItem() {
+ Unlink();
+ }
+
+ void Cleanup() override {
+ Value = {};
+ }
+
+ template <typename TCtx>
+ void Set(const TIntrusivePtr<TCtx>& ctx, TValueType val) {
+ Value = std::move(val);
+ ctx->CleanupList.PushBack(this);
+ }
+
+ bool IsSet() const {
+ return !!Value;
+ }
+
+ const TValueType& Get() const {
+ if (!Value) {
+ throw yexception() << "Trying to use python wrap object with destroyed yql value";
+ }
+ return Value;
+ }
+
+private:
+ TValueType Value;
+};
+
+struct TPyContext: public TSimpleRefCount<TPyContext> {
+ const NKikimr::NUdf::ITypeInfoHelper::TPtr TypeInfoHelper;
+ const NKikimr::NUdf::TStringRef ResourceTag;
+ const NKikimr::NUdf::TSourcePosition Pos;
+ TIntrusiveList<TPyCleanupListItemBase> CleanupList;
+
+ TPyContext(NKikimr::NUdf::ITypeInfoHelper::TPtr helper, const NKikimr::NUdf::TStringRef& tag, const NKikimr::NUdf::TSourcePosition& pos)
+ : TypeInfoHelper(std::move(helper))
+ , ResourceTag(tag)
+ , Pos(pos)
+ {
+ }
+
+ void Cleanup() {
+ for (auto& o: CleanupList) {
+ o.Cleanup();
+ }
+ CleanupList.Clear();
+ }
+
+ ~TPyContext() = default;
+
+ using TPtr = TIntrusivePtr<TPyContext>;
+};
+
+struct TPyCastContext: public TSimpleRefCount<TPyCastContext> {
+ const NKikimr::NUdf::IValueBuilder *const ValueBuilder;
+ const TPyContext::TPtr PyCtx;
+ std::unordered_map<const NKikimr::NUdf::TType*, TPyObjectPtr> StructTypes;
+ bool LazyInputObjects = true;
+ TPyObjectPtr YsonConverterIn;
+ TPyObjectPtr YsonConverterOut;
+ EBytesDecodeMode BytesDecodeMode = EBytesDecodeMode::Never;
+ TPyObjectPtr Decimal;
+ std::unordered_map<ui32, TPyObjectPtr> TimezoneNames;
+ THolder<IMemoryLock> MemoryLock;
+
+ TPyCastContext(
+ const NKikimr::NUdf::IValueBuilder* builder,
+ TPyContext::TPtr pyCtx,
+ THolder<IMemoryLock> memoryLock = {});
+
+ ~TPyCastContext();
+
+ const TPyObjectPtr& GetTimezoneName(ui32 id);
+ const TPyObjectPtr& GetDecimal();
+
+ using TPtr = TIntrusivePtr<TPyCastContext>;
+};
+
+using TPyCastContextPtr = TPyCastContext::TPtr;
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal.cpp b/yql/essentials/udfs/common/python/bindings/py_decimal.cpp
new file mode 100644
index 00000000000..0070e3420f1
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_decimal.cpp
@@ -0,0 +1,59 @@
+#include "py_decimal.h"
+#include "py_errors.h"
+#include "py_utils.h"
+#include "py_cast.h"
+
+#include <util/stream/str.h>
+
+#include <yql/essentials/public/udf/udf_value.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+
+TPyObjectPtr ToPyDecimal(const TPyCastContext::TPtr& ctx, const NKikimr::NUdf::TUnboxedValuePod& value, ui8 precision, ui8 scale)
+{
+ const auto str = NYql::NDecimal::ToString(value.GetInt128(), precision, scale);
+ PY_ENSURE(str, "Bad decimal value.");
+
+ const TPyObjectPtr pyStr(PyRepr(str));
+
+ const TPyObjectPtr args(PyTuple_Pack(1, pyStr.Get()));
+ PY_ENSURE(args, "Can't pack args.");
+
+ const TPyObjectPtr dec(PyObject_CallObject(ctx->GetDecimal().Get(), args.Get()));
+ PY_ENSURE(dec, "Can't create Decimal.");
+ return dec;
+}
+
+NKikimr::NUdf::TUnboxedValue FromPyDecimal(const TPyCastContext::TPtr& ctx, PyObject* value, ui8 precision, ui8 scale)
+{
+ const TPyObjectPtr print(PyObject_Str(value));
+ PY_ENSURE(print, "Can't print decimal.");
+
+ TString str;
+ PY_ENSURE(TryPyCast<TString>(print.Get(), str), "Can't get decimal string.");
+
+ if (str.EndsWith("Infinity")) {
+ str.resize(str.size() - 5U);
+ }
+
+ const auto dec = NYql::NDecimal::FromStringEx(str.c_str(), precision, scale);
+ PY_ENSURE(!NYql::NDecimal::IsError(dec), "Can't make Decimal from string.");
+
+ return NKikimr::NUdf::TUnboxedValuePod(dec);
+}
+
+const TPyObjectPtr& TPyCastContext::GetDecimal() {
+ if (!Decimal) {
+ const TPyObjectPtr module(PyImport_ImportModule("decimal"));
+ PY_ENSURE(module, "Can't import decimal.");
+
+ Decimal.ResetSteal(PyObject_GetAttrString(module.Get(), "Decimal"));
+ PY_ENSURE(Decimal, "Can't get Decimal.");
+ }
+
+ return Decimal;
+}
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal.h b/yql/essentials/udfs/common/python/bindings/py_decimal.h
new file mode 100644
index 00000000000..5764fe4fa85
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_decimal.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+TPyObjectPtr ToPyDecimal(const TPyCastContext::TPtr& castCtx, const NKikimr::NUdf::TUnboxedValuePod& value, ui8 precision, ui8 scale);
+
+NKikimr::NUdf::TUnboxedValue FromPyDecimal(const TPyCastContext::TPtr& castCtx, PyObject* value, ui8 precision, ui8 scale);
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp
new file mode 100644
index 00000000000..8388c110f32
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp
@@ -0,0 +1,122 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyDecimalTest) {
+ Y_UNIT_TEST(FromPyZero) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDecimalDataType<12,5>>(
+ R"(
+from decimal import Decimal
+def Test(): return Decimal()
+ )",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(!value.GetInt128());
+ });
+ }
+
+ Y_UNIT_TEST(FromPyPi) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDecimalDataType<28,18>>(
+ R"(
+from decimal import Decimal
+def Test(): return Decimal('3.141592653589793238')
+ )",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.GetInt128() == 3141592653589793238LL);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyTini) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDecimalDataType<35,35>>(
+ R"(
+from decimal import Decimal
+def Test(): return Decimal('-.00000000000000000000000000000000001')
+ )",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.GetInt128() == -1);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyNan) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDecimalDataType<35,34>>(
+ R"(
+from decimal import Decimal
+def Test(): return Decimal('NaN')
+ )",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.GetInt128() == NYql::NDecimal::Nan());
+ });
+ }
+
+ Y_UNIT_TEST(FromPyInf) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDecimalDataType<35,34>>(
+ R"(
+from decimal import Decimal
+def Test(): return Decimal('-inf')
+ )",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.GetInt128() == -NYql::NDecimal::Inf());
+ });
+ }
+
+ Y_UNIT_TEST(ToPyZero) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDecimalDataType<7,7>>(
+ [](const TType*, const NUdf::IValueBuilder&) {
+ return NUdf::TUnboxedValuePod::Zero();
+ },
+ "def Test(value): assert value.is_zero()"
+ );
+ }
+
+ Y_UNIT_TEST(ToPyPi) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDecimalDataType<20,18>>(
+ [](const TType*, const NUdf::IValueBuilder&) {
+ return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(3141592653589793238LL));
+ },
+ "def Test(value): assert str(value) == '3.141592653589793238'"
+ );
+ }
+
+ Y_UNIT_TEST(ToPyTini) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDecimalDataType<35,35>>(
+ [](const TType*, const NUdf::IValueBuilder&) {
+ return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(-1));
+ },
+ "def Test(value): assert format(value, '.35f') == '-0.00000000000000000000000000000000001'"
+ );
+ }
+
+ Y_UNIT_TEST(ToPyNan) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDecimalDataType<2,2>>(
+ [](const TType*, const NUdf::IValueBuilder&) {
+ return NUdf::TUnboxedValuePod(NYql::NDecimal::Nan());
+ },
+ "def Test(value): assert value.is_nan()"
+ );
+ }
+
+ Y_UNIT_TEST(ToPyInf) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDecimalDataType<30,0>>(
+ [](const TType*, const NUdf::IValueBuilder&) {
+ return NUdf::TUnboxedValuePod(-NYql::NDecimal::Inf());
+ },
+ "def Test(value): assert value.is_infinite() and value.is_signed()"
+ );
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_dict.cpp b/yql/essentials/udfs/common/python/bindings/py_dict.cpp
new file mode 100644
index 00000000000..f2bd0669eda
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_dict.cpp
@@ -0,0 +1,683 @@
+#include "py_dict.h"
+#include "py_iterator.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+
+
+using namespace NKikimr;
+
+namespace NPython {
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazyDict interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyLazyDict
+{
+ using TPtr = NUdf::TRefCountedPtr<TPyLazyDict, TPyPtrOps<TPyLazyDict>>;
+
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ const NUdf::TType* KeyType;
+ const NUdf::TType* PayloadType;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Value;
+
+ inline static TPyLazyDict* Cast(PyObject* o) {
+ return reinterpret_cast<TPyLazyDict*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ static PyObject* New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* keyType,
+ const NUdf::TType* payloadType,
+ NUdf::IBoxedValuePtr&& value);
+
+ static int Bool(PyObject* self);
+ static PyObject* Repr(PyObject* self);
+ static Py_ssize_t Len(PyObject* self);
+ static PyObject* Subscript(PyObject* self, PyObject* key);
+ static int Contains(PyObject* self, PyObject* key);
+ static PyObject* Get(PyObject* self, PyObject* args);
+
+ static PyObject* Iter(PyObject* self) { return Keys(self, nullptr); }
+ static PyObject* Keys(PyObject* self, PyObject* /* args */);
+ static PyObject* Items(PyObject* self, PyObject* /* args */);
+ static PyObject* Values(PyObject* self, PyObject* /* args */);
+};
+
+PyMappingMethods LazyDictMapping = {
+ INIT_MEMBER(mp_length, TPyLazyDict::Len),
+ INIT_MEMBER(mp_subscript, TPyLazyDict::Subscript),
+ INIT_MEMBER(mp_ass_subscript, nullptr),
+};
+
+PySequenceMethods LazyDictSequence = {
+ INIT_MEMBER(sq_length , TPyLazyDict::Len),
+ INIT_MEMBER(sq_concat , nullptr),
+ INIT_MEMBER(sq_repeat , nullptr),
+ INIT_MEMBER(sq_item , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(was_sq_slice , nullptr),
+#else
+ INIT_MEMBER(sq_slice , nullptr),
+#endif
+ INIT_MEMBER(sq_ass_item , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(was_sq_ass_slice , nullptr),
+#else
+ INIT_MEMBER(sq_ass_slice , nullptr),
+#endif
+ INIT_MEMBER(sq_contains , TPyLazyDict::Contains),
+ INIT_MEMBER(sq_inplace_concat , nullptr),
+ INIT_MEMBER(sq_inplace_repeat , nullptr),
+};
+
+PyNumberMethods LazyDictNumbering = {
+ INIT_MEMBER(nb_add, nullptr),
+ INIT_MEMBER(nb_subtract, nullptr),
+ INIT_MEMBER(nb_multiply, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_divide, nullptr),
+#endif
+ INIT_MEMBER(nb_remainder, nullptr),
+ INIT_MEMBER(nb_divmod, nullptr),
+ INIT_MEMBER(nb_power, nullptr),
+ INIT_MEMBER(nb_negative, nullptr),
+ INIT_MEMBER(nb_positive, nullptr),
+ INIT_MEMBER(nb_absolute, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_bool, TPyLazyDict::Bool),
+#else
+ INIT_MEMBER(nb_nonzero, TPyLazyDict::Bool),
+#endif
+ INIT_MEMBER(nb_invert, nullptr),
+ INIT_MEMBER(nb_lshift, nullptr),
+ INIT_MEMBER(nb_rshift, nullptr),
+ INIT_MEMBER(nb_and, nullptr),
+ INIT_MEMBER(nb_xor, nullptr),
+ INIT_MEMBER(nb_or, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_coerce, nullptr),
+#endif
+ INIT_MEMBER(nb_int, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_reserved, nullptr),
+#else
+ INIT_MEMBER(nb_long, nullptr),
+#endif
+ INIT_MEMBER(nb_float, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_oct, nullptr),
+ INIT_MEMBER(nb_hex, nullptr),
+#endif
+
+ INIT_MEMBER(nb_inplace_add, nullptr),
+ INIT_MEMBER(nb_inplace_subtract, nullptr),
+ INIT_MEMBER(nb_inplace_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_remainder, nullptr),
+ INIT_MEMBER(nb_inplace_power, nullptr),
+ INIT_MEMBER(nb_inplace_lshift, nullptr),
+ INIT_MEMBER(nb_inplace_rshift, nullptr),
+ INIT_MEMBER(nb_inplace_and, nullptr),
+ INIT_MEMBER(nb_inplace_xor, nullptr),
+ INIT_MEMBER(nb_inplace_or, nullptr),
+
+ INIT_MEMBER(nb_floor_divide, nullptr),
+ INIT_MEMBER(nb_true_divide, nullptr),
+ INIT_MEMBER(nb_inplace_floor_divide, nullptr),
+ INIT_MEMBER(nb_inplace_true_divide, nullptr),
+
+ INIT_MEMBER(nb_index, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_matrix_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_matrix_multiply, nullptr),
+#endif
+};
+
+
+#if PY_MAJOR_VERSION >= 3
+#define Py_TPFLAGS_HAVE_ITER 0
+#define Py_TPFLAGS_HAVE_SEQUENCE_IN 0
+#endif
+
+PyDoc_STRVAR(get__doc__,
+ "D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.");
+PyDoc_STRVAR(keys__doc__,
+ "D.keys() -> an iterator over the keys of D");
+PyDoc_STRVAR(values__doc__,
+ "D.values() -> an iterator over the values of D");
+PyDoc_STRVAR(items__doc__,
+ "D.items() -> an iterator over the (key, value) items of D");
+#if PY_MAJOR_VERSION < 3
+PyDoc_STRVAR(iterkeys__doc__,
+ "D.iterkeys() -> an iterator over the keys of D");
+PyDoc_STRVAR(itervalues__doc__,
+ "D.itervalues() -> an iterator over the values of D");
+PyDoc_STRVAR(iteritems__doc__,
+ "D.iteritems() -> an iterator over the (key, value) items of D");
+#endif
+
+static PyMethodDef LazyDictMethods[] = {
+ { "get", TPyLazyDict::Get, METH_VARARGS, get__doc__ },
+ { "keys", TPyLazyDict::Keys, METH_NOARGS, keys__doc__ },
+ { "items", TPyLazyDict::Items, METH_NOARGS, items__doc__ },
+ { "values", TPyLazyDict::Values, METH_NOARGS, values__doc__ },
+#if PY_MAJOR_VERSION < 3
+ { "iterkeys", TPyLazyDict::Keys, METH_NOARGS, iterkeys__doc__ },
+ { "iteritems", TPyLazyDict::Items, METH_NOARGS, iteritems__doc__ },
+ { "itervalues", TPyLazyDict::Values, METH_NOARGS, itervalues__doc__ },
+#endif
+ { nullptr, nullptr, 0, nullptr } /* sentinel */
+};
+
+PyTypeObject PyLazyDictType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TDict"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyLazyDict)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyLazyDict::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyLazyDict::Repr),
+ INIT_MEMBER(tp_as_number , &LazyDictNumbering),
+ INIT_MEMBER(tp_as_sequence , &LazyDictSequence),
+ INIT_MEMBER(tp_as_mapping , &LazyDictMapping),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN),
+ INIT_MEMBER(tp_doc , "yql.TDict object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , &TPyLazyDict::Iter),
+ INIT_MEMBER(tp_iternext , nullptr),
+ INIT_MEMBER(tp_methods , LazyDictMethods),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazySet interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyLazySet
+{
+ using TPtr = NUdf::TRefCountedPtr<TPyLazySet, TPyPtrOps<TPyLazySet>>;
+
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ const NUdf::TType* ItemType;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Value;
+
+ inline static TPyLazySet* Cast(PyObject* o) {
+ return reinterpret_cast<TPyLazySet*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ static PyObject* New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ NUdf::IBoxedValuePtr&& value);
+
+ static int Bool(PyObject* self);
+ static PyObject* Repr(PyObject* self);
+ static Py_ssize_t Len(PyObject* self);
+ static int Contains(PyObject* self, PyObject* key);
+ static PyObject* Get(PyObject* self, PyObject* args);
+
+ static PyObject* Iter(PyObject* self);
+};
+
+PySequenceMethods LazySetSequence = {
+ INIT_MEMBER(sq_length , TPyLazySet::Len),
+ INIT_MEMBER(sq_concat , nullptr),
+ INIT_MEMBER(sq_repeat , nullptr),
+ INIT_MEMBER(sq_item , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(was_sq_slice , nullptr),
+#else
+ INIT_MEMBER(sq_slice , nullptr),
+#endif
+ INIT_MEMBER(sq_ass_item , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(was_sq_ass_slice , nullptr),
+#else
+ INIT_MEMBER(sq_ass_slice , nullptr),
+#endif
+ INIT_MEMBER(sq_contains , TPyLazySet::Contains),
+ INIT_MEMBER(sq_inplace_concat , nullptr),
+ INIT_MEMBER(sq_inplace_repeat , nullptr),
+};
+
+PyNumberMethods LazySetNumbering = {
+ INIT_MEMBER(nb_add, nullptr),
+ INIT_MEMBER(nb_subtract, nullptr),
+ INIT_MEMBER(nb_multiply, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_divide, nullptr),
+#endif
+ INIT_MEMBER(nb_remainder, nullptr),
+ INIT_MEMBER(nb_divmod, nullptr),
+ INIT_MEMBER(nb_power, nullptr),
+ INIT_MEMBER(nb_negative, nullptr),
+ INIT_MEMBER(nb_positive, nullptr),
+ INIT_MEMBER(nb_absolute, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_bool, TPyLazySet::Bool),
+#else
+ INIT_MEMBER(nb_nonzero, TPyLazySet::Bool),
+#endif
+ INIT_MEMBER(nb_invert, nullptr),
+ INIT_MEMBER(nb_lshift, nullptr),
+ INIT_MEMBER(nb_rshift, nullptr),
+ INIT_MEMBER(nb_and, nullptr),
+ INIT_MEMBER(nb_xor, nullptr),
+ INIT_MEMBER(nb_or, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_coerce, nullptr),
+#endif
+ INIT_MEMBER(nb_int, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_reserved, nullptr),
+#else
+ INIT_MEMBER(nb_long, nullptr),
+#endif
+ INIT_MEMBER(nb_float, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_oct, nullptr),
+ INIT_MEMBER(nb_hex, nullptr),
+#endif
+
+ INIT_MEMBER(nb_inplace_add, nullptr),
+ INIT_MEMBER(nb_inplace_subtract, nullptr),
+ INIT_MEMBER(nb_inplace_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_remainder, nullptr),
+ INIT_MEMBER(nb_inplace_power, nullptr),
+ INIT_MEMBER(nb_inplace_lshift, nullptr),
+ INIT_MEMBER(nb_inplace_rshift, nullptr),
+ INIT_MEMBER(nb_inplace_and, nullptr),
+ INIT_MEMBER(nb_inplace_xor, nullptr),
+ INIT_MEMBER(nb_inplace_or, nullptr),
+
+ INIT_MEMBER(nb_floor_divide, nullptr),
+ INIT_MEMBER(nb_true_divide, nullptr),
+ INIT_MEMBER(nb_inplace_floor_divide, nullptr),
+ INIT_MEMBER(nb_inplace_true_divide, nullptr),
+
+ INIT_MEMBER(nb_index, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_matrix_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_matrix_multiply, nullptr),
+#endif
+};
+
+PyTypeObject PyLazySetType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TSet"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyLazySet)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyLazySet::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyLazySet::Repr),
+ INIT_MEMBER(tp_as_number , &LazySetNumbering),
+ INIT_MEMBER(tp_as_sequence , &LazySetSequence),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN),
+ INIT_MEMBER(tp_doc , "yql.TSet object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , &TPyLazySet::Iter),
+ INIT_MEMBER(tp_iternext , nullptr),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazyDict implementation
+//////////////////////////////////////////////////////////////////////////////
+int TPyLazyDict::Bool(PyObject* self)
+{
+ PY_TRY {
+ return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self)->Value.Get()) ? 1 : 0;
+ } PY_CATCH(-1)
+}
+
+PyObject* TPyLazyDict::Repr(PyObject*)
+{
+ return PyRepr("<yql.TDict>").Release();
+}
+
+Py_ssize_t TPyLazyDict::Len(PyObject* self)
+{
+ PY_TRY {
+ return static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*Cast(self)->Value.Get()));
+ } PY_CATCH(-1)
+}
+
+PyObject* TPyLazyDict::Subscript(PyObject* self, PyObject* key)
+{
+ PY_TRY {
+ TPyLazyDict* dict = Cast(self);
+
+ if (dict->KeyType) {
+ const auto mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key);
+ if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), mkqlKey)) {
+ return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release();
+ }
+
+ const TPyObjectPtr repr = PyObject_Repr(key);
+ PyErr_SetObject(PyExc_KeyError, repr.Get());
+ return nullptr;
+ } else {
+ if (!PyIndex_Check(key)) {
+ const TPyObjectPtr type = PyObject_Type(key);
+ const TPyObjectPtr repr = PyObject_Repr(type.Get());
+ const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get());
+ PyErr_SetObject(PyExc_TypeError, error.Get());
+ return nullptr;
+ }
+
+ const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError);
+ if (index < 0) {
+ return nullptr;
+ }
+
+ if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), NUdf::TUnboxedValuePod(ui64(index)))) {
+ return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release();
+ }
+
+ const TPyObjectPtr repr = PyObject_Repr(key);
+ PyErr_SetObject(PyExc_IndexError, repr.Get());
+ return nullptr;
+ }
+
+ } PY_CATCH(nullptr)
+}
+
+// -1 error
+// 0 not found
+// 1 found
+int TPyLazyDict::Contains(PyObject* self, PyObject* key)
+{
+ PY_TRY {
+ TPyLazyDict* dict = Cast(self);
+ NUdf::TUnboxedValue mkqlKey;
+
+ if (dict->KeyType) {
+ mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key);
+ } else {
+ if (!PyIndex_Check(key)) {
+ const TPyObjectPtr type = PyObject_Type(key);
+ const TPyObjectPtr repr = PyObject_Repr(type.Get());
+ const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get());
+ PyErr_SetObject(PyExc_TypeError, error.Get());
+ return -1;
+ }
+
+ const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError);
+ if (index < 0) {
+ return 0;
+ }
+ mkqlKey = NUdf::TUnboxedValuePod(ui64(index));
+ }
+
+ return NUdf::TBoxedValueAccessor::Contains(*dict->Value.Get(), mkqlKey) ? 1 : 0;
+ } PY_CATCH(-1)
+}
+
+PyObject* TPyLazyDict::Get(PyObject* self, PyObject* args)
+{
+ PY_TRY {
+ PyObject* key = nullptr;
+ PyObject* failobj = Py_None;
+
+ if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj))
+ return nullptr;
+
+ TPyLazyDict* dict = Cast(self);
+ if (dict->KeyType) {
+ const auto mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key);
+ if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), mkqlKey)) {
+ return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release();
+ }
+ } else {
+ if (!PyIndex_Check(key)) {
+ const TPyObjectPtr type = PyObject_Type(key);
+ const TPyObjectPtr repr = PyObject_Repr(type.Get());
+ const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get());
+ PyErr_SetObject(PyExc_TypeError, error.Get());
+ return nullptr;
+ }
+
+ const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError);
+ if (index < 0) {
+ return nullptr;
+ }
+
+ if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), NUdf::TUnboxedValuePod(ui64(index)))) {
+ return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release();
+ }
+ }
+
+ Py_INCREF(failobj);
+ return failobj;
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyDict::Keys(PyObject* self, PyObject* /* args */)
+{
+ PY_TRY {
+ const auto dict = Cast(self);
+ return ToPyIterator(dict->CastCtx, dict->KeyType,
+ NUdf::TBoxedValueAccessor::GetKeysIterator(*dict->Value.Get())).Release();
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyDict::Items(PyObject* self, PyObject* /* args */)
+{
+ PY_TRY {
+ const auto dict = Cast(self);
+ return ToPyIterator(dict->CastCtx, dict->KeyType, dict->PayloadType,
+ NUdf::TBoxedValueAccessor::GetDictIterator(*dict->Value.Get())).Release();
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyDict::Values(PyObject* self, PyObject* /* args */)
+{
+ PY_TRY {
+ const auto dict = Cast(self);
+ return ToPyIterator(dict->CastCtx, dict->PayloadType,
+ NUdf::TBoxedValueAccessor::GetPayloadsIterator(*dict->Value.Get())).Release();
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyDict::New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* keyType,
+ const NUdf::TType* payloadType,
+ NUdf::IBoxedValuePtr&& value)
+{
+ TPyLazyDict* dict = new TPyLazyDict;
+ PyObject_INIT(dict, &PyLazyDictType);
+
+ dict->CastCtx = castCtx;
+ dict->KeyType = keyType;
+ dict->PayloadType = payloadType;
+ dict->Value.Set(castCtx->PyCtx, value);
+ return reinterpret_cast<PyObject*>(dict);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazySet implementation
+//////////////////////////////////////////////////////////////////////////////
+int TPyLazySet::Bool(PyObject* self)
+{
+ PY_TRY {
+ return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self)->Value.Get()) ? 1 : 0;
+ } PY_CATCH(-1)
+}
+
+PyObject* TPyLazySet::Repr(PyObject*)
+{
+ return PyRepr("<yql.TSet>").Release();
+}
+
+Py_ssize_t TPyLazySet::Len(PyObject* self)
+{
+ PY_TRY {
+ return static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*Cast(self)->Value.Get()));
+ } PY_CATCH(-1)
+}
+
+// -1 error
+// 0 not found
+// 1 found
+int TPyLazySet::Contains(PyObject* self, PyObject* key)
+{
+ PY_TRY {
+ const auto set = Cast(self);
+ const auto mkqlKey = FromPyObject(set->CastCtx, set->ItemType, key);
+ return NUdf::TBoxedValueAccessor::Contains(*set->Value.Get(), mkqlKey) ? 1 : 0;
+ } PY_CATCH(-1)
+}
+
+PyObject* TPyLazySet::Iter(PyObject* self)
+{
+ PY_TRY {
+ const auto set = Cast(self);
+ return ToPyIterator(set->CastCtx, set->ItemType,
+ NUdf::TBoxedValueAccessor::GetKeysIterator(*set->Value.Get())).Release();
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazySet::New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ NUdf::IBoxedValuePtr&& value)
+{
+ TPyLazySet* dict = new TPyLazySet;
+ PyObject_INIT(dict, &PyLazySetType);
+
+ dict->CastCtx = castCtx;
+ dict->ItemType = itemType;
+ dict->Value.Set(castCtx->PyCtx, value);
+ return reinterpret_cast<PyObject*>(dict);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+TPyObjectPtr ToPyLazyDict(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* keyType,
+ const NUdf::TType* payloadType,
+ const NUdf::TUnboxedValuePod& value)
+{
+ return TPyLazyDict::New(castCtx, keyType, payloadType, value.AsBoxed());
+}
+
+TPyObjectPtr ToPyLazySet(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ const NUdf::TUnboxedValuePod& value)
+{
+ return TPyLazySet::New(castCtx, itemType, value.AsBoxed());
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_dict.h b/yql/essentials/udfs/common/python/bindings/py_dict.h
new file mode 100644
index 00000000000..538ca69a127
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_dict.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+extern PyTypeObject PyLazyDictType;
+extern PyTypeObject PyLazySetType;
+
+TPyObjectPtr ToPyLazyDict(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* keyType,
+ const NKikimr::NUdf::TType* payloadType,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+TPyObjectPtr ToPyLazySet(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* itemType,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyMapping(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* keyType,
+ const NKikimr::NUdf::TType* payType,
+ PyObject* map);
+
+NKikimr::NUdf::TUnboxedValue FromPyDict(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* keyType,
+ const NKikimr::NUdf::TType* payType,
+ PyObject* dict);
+
+NKikimr::NUdf::TUnboxedValue FromPySet(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* itemType,
+ PyObject* set);
+
+NKikimr::NUdf::TUnboxedValue FromPySequence(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* keyType,
+ PyObject* sequence);
+
+NKikimr::NUdf::TUnboxedValue FromPySequence(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* itemType,
+ const NKikimr::NUdf::TDataTypeId keyType,
+ PyObject* sequence);
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp
new file mode 100644
index 00000000000..9ac9627ebba
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp
@@ -0,0 +1,722 @@
+#include "ut3/py_test_engine.h"
+
+#include <yql/essentials/public/udf/udf_ut_helpers.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyDictTest) {
+ Y_UNIT_TEST(FromPyEmptyDict) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return {}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(!value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 0);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDict_Length) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return {1: 'one', 3: 'three', 2: 'two'}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT(!value.IsSortedDict());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 3);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDict_Lookup) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return {1: 'one', 3: 'three', 2: 'two'}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(ui32(1)));
+ UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one");
+ const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(ui32(2)));
+ UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two");
+ const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(ui32(3)));
+ UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three");
+
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(0))));
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(4))));
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDict_Contains) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return {1: 'one', 3: 'three', 2: 'two'}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(2))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(3))));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(4))));
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDict_Items) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return {1: 'one', 3: 'three', 2: 'two'}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ std::map<ui32, TString> items;
+ const auto it = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ items.emplace(key.Get<ui32>(), payload.AsStringRef());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+ UNIT_ASSERT_EQUAL(items[1], "one");
+ UNIT_ASSERT_EQUAL(items[2], "two");
+ UNIT_ASSERT_EQUAL(items[3], "three");
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDict_Keys) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return {1: 'one', 3: 'three', 2: 'two'}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ std::vector<ui32> items;
+ const auto it = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; it.Next(key);) {
+ items.emplace_back(key.Get<ui32>());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+
+ std::sort(items.begin(), items.end());
+ UNIT_ASSERT_EQUAL(items[0], 1U);
+ UNIT_ASSERT_EQUAL(items[1], 2U);
+ UNIT_ASSERT_EQUAL(items[2], 3U);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDict_Values) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return {1: 'one', 3: 'three', 2: 'two'}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ std::vector<TString> items;
+ const auto it = value.GetPayloadsIterator();
+ for (NUdf::TUnboxedValue payload; it.Next(payload);) {
+ items.emplace_back(payload.AsStringRef());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+
+ std::sort(items.begin(), items.end());
+ UNIT_ASSERT_EQUAL(items[0], "one");
+ UNIT_ASSERT_EQUAL(items[1], "three");
+ UNIT_ASSERT_EQUAL(items[2], "two");
+ });
+ }
+
+ Y_UNIT_TEST(FromPyList_Length) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return ['one', 'two', 'three']",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT(value.IsSortedDict());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 3);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyTuple_Lookup) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<i32, char*>>(
+ "def Test(): return ('one', 'two', 'three')",
+ [](const NUdf::TUnboxedValuePod& value) {
+ const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(i32(0)));
+ UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one");
+ const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(i32(1)));
+ UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two");
+ const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(i32(2)));
+ UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three");
+ const auto v4 = value.Lookup(NUdf::TUnboxedValuePod(i32(-1)));
+ UNIT_ASSERT_EQUAL(v4.AsStringRef(), "three");
+ const auto v5 = value.Lookup(NUdf::TUnboxedValuePod(i32(-2)));
+ UNIT_ASSERT_EQUAL(v5.AsStringRef(), "two");
+ const auto v6 = value.Lookup(NUdf::TUnboxedValuePod(i32(-3)));
+ UNIT_ASSERT_EQUAL(v6.AsStringRef(), "one");
+
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(3))));
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(-4))));
+ });
+ }
+
+ Y_UNIT_TEST(FromPyList_Contains) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<i16, char*>>(
+ "def Test(): return ['one', 'two', 'three']",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(0))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(1))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(2))));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(3))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-1))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-2))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-3))));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(-4))));
+ });
+ }
+
+ Y_UNIT_TEST(FromPyTuple_Items) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui16, char*>>(
+ "def Test(): return ('one', 'two', 'three')",
+ [](const NUdf::TUnboxedValuePod& value) {
+ std::vector<std::pair<ui16, TString>> items;
+ const auto it = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ items.emplace_back(key.Get<ui16>(), payload.AsStringRef());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 3U);
+ UNIT_ASSERT_EQUAL(items[0].first, 0);
+ UNIT_ASSERT_EQUAL(items[1].first, 1);
+ UNIT_ASSERT_EQUAL(items[2].first, 2);
+ UNIT_ASSERT_EQUAL(items[0].second, "one");
+ UNIT_ASSERT_EQUAL(items[1].second, "two");
+ UNIT_ASSERT_EQUAL(items[2].second, "three");
+ });
+ }
+
+ Y_UNIT_TEST(FromPyList_Keys) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<i64, char*>>(
+ "def Test(): return ['one', 'two', 'three']",
+ [](const NUdf::TUnboxedValuePod& value) {
+ std::vector<i64> items;
+ const auto it = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; it.Next(key);) {
+ items.emplace_back(key.Get<i64>());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+ UNIT_ASSERT_EQUAL(items[0], 0);
+ UNIT_ASSERT_EQUAL(items[1], 1);
+ UNIT_ASSERT_EQUAL(items[2], 2);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyTuple_Values) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui64, char*>>(
+ "def Test(): return ('one', 'two', 'three')",
+ [](const NUdf::TUnboxedValuePod& value) {
+ std::vector<TString> items;
+ const auto it = value.GetPayloadsIterator();
+ for (NUdf::TUnboxedValue payload; it.Next(payload);) {
+ items.emplace_back(payload.AsStringRef());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+ UNIT_ASSERT_EQUAL(items[0], "one");
+ UNIT_ASSERT_EQUAL(items[1], "two");
+ UNIT_ASSERT_EQUAL(items[2], "three");
+ });
+ }
+
+ Y_UNIT_TEST(ToPyEmptyDict) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDict<ui8, ui32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build();
+ },
+ "def Test(value):\n"
+ " assert not value\n"
+ " assert len(value) == 0\n"
+ );
+ }
+
+ Y_UNIT_TEST(ToPyDict) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDict<int, double>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ return vb.NewDict(type, NUdf::TDictFlags::Hashed)->
+ Add(NUdf::TUnboxedValuePod((int) 1), NUdf::TUnboxedValuePod((double) 0.1))
+ .Add(NUdf::TUnboxedValuePod((int) 2), NUdf::TUnboxedValuePod((double) 0.2))
+ .Add(NUdf::TUnboxedValuePod((int) 3), NUdf::TUnboxedValuePod((double) 0.3))
+ .Build();
+ },
+ "def Test(value):\n"
+ " assert value\n"
+ " assert len(value) == 3\n"
+ " assert iter(value) is not None\n"
+ " assert 2 in value\n"
+ " assert 0 not in value\n"
+ " assert set(iter(value)) == set([1, 2, 3])\n"
+ " assert value[2] == 0.2\n"
+ " assert value.get(0, 0.7) == 0.7\n"
+ " assert value.get(3, 0.7) == 0.3\n"
+ " assert sorted(value.keys()) == [1, 2, 3]\n"
+ " assert sorted(value.items()) == [(1, 0.1), (2, 0.2), (3, 0.3)]\n"
+ " assert sorted(value.values()) == [0.1, 0.2, 0.3]\n"
+#if PY_MAJOR_VERSION < 3
+ " assert all(isinstance(k, int) for k in value.iterkeys())\n"
+ " assert all(isinstance(v, float) for v in value.itervalues())\n"
+ " assert all(isinstance(k, int) and isinstance(v, float) for k,v in value.iteritems())\n"
+#endif
+ );
+ }
+
+ Y_UNIT_TEST(ToPyDictWrongKey) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDict<int, double>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ return vb.NewDict(type, NUdf::TDictFlags::Hashed)->
+ Add(NUdf::TUnboxedValuePod((int) 1), NUdf::TUnboxedValuePod((double) 0.1))
+ .Add(NUdf::TUnboxedValuePod((int) 2), NUdf::TUnboxedValuePod((double) 0.2))
+ .Add(NUdf::TUnboxedValuePod((int) 3), NUdf::TUnboxedValuePod((double) 0.3))
+ .Build();
+ },
+ "def Test(value):\n"
+ " try:\n"
+ " print(value[0])\n"
+ " except KeyError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(FromPyEmptySet) {
+ TPythonTestEngine engine;
+
+ engine.ToMiniKQL<NUdf::TDict<ui32, void>>(
+ "def Test(): return set([])",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(!value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 0);
+ });
+
+ }
+
+ Y_UNIT_TEST(FromPySet) {
+ TPythonTestEngine engine;
+
+ engine.ToMiniKQL<NUdf::TDict<char*, void>>(
+ "def Test(): return set(['one', 'two', 'three'])",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT(!value.IsSortedDict());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 3);
+
+ std::set<TString> set;
+ const auto it = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; it.Next(key);) {
+ set.emplace(key.AsStringRef());
+ }
+
+ UNIT_ASSERT_EQUAL(set.size(), 3);
+ UNIT_ASSERT(set.count("one"));
+ UNIT_ASSERT(set.count("two"));
+ UNIT_ASSERT(set.count("three"));
+ });
+
+ }
+
+ Y_UNIT_TEST(FromPySet_Contains) {
+ TPythonTestEngine engine;
+
+ engine.ToMiniKQL<NUdf::TDict<char*, void>>(
+ "def Test(): return {b'one', b'two', b'three'}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("one")));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("two")));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("three")));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod::Embedded("zero")));
+ });
+
+ }
+
+ Y_UNIT_TEST(ToPyEmptySet) {
+ TPythonTestEngine engine;
+
+ engine.ToPython<NUdf::TDict<ui8, void>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build();
+ },
+ "def Test(value):\n"
+ " assert not value\n"
+ " assert len(value) == 0\n"
+ );
+
+ }
+
+ Y_UNIT_TEST(ToPySet) {
+ TPythonTestEngine engine;
+
+ engine.ToPython<NUdf::TDict<ui8, void>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ return vb.NewDict(type, NUdf::TDictFlags::Hashed)->
+ Add(NUdf::TUnboxedValuePod((ui8) 1), NUdf::TUnboxedValuePod::Void())
+ .Add(NUdf::TUnboxedValuePod((ui8) 2), NUdf::TUnboxedValuePod::Void())
+ .Add(NUdf::TUnboxedValuePod((ui8) 3), NUdf::TUnboxedValuePod::Void())
+ .Build();
+
+ },
+ "def Test(value):\n"
+ " assert len(value) == 3\n"
+ " assert all(isinstance(k, int) for k in iter(value))\n"
+ " assert all(i in value for i in [1, 2, 3])\n");
+ }
+
+ Y_UNIT_TEST(FromPyMultiDict) {
+ TPythonTestEngine engine;
+
+ engine.ToMiniKQL<NUdf::TDict<ui32, NUdf::TListType<char*>>>(
+ "def Test(): return {1: ['one', 'two'], 3: ['three']}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 2);
+
+ std::unordered_map<ui32, std::vector<TString>> map;
+ const auto dictIt = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; dictIt.NextPair(key, payload);) {
+ auto& val = map[key.Get<ui32>()];
+ const auto listIt = payload.GetListIterator();
+ for (NUdf::TUnboxedValue listItem; listIt.Next(listItem);) {
+ val.emplace_back(listItem.AsStringRef());
+ }
+ }
+
+ UNIT_ASSERT_EQUAL(map.size(), 2);
+ auto it = map.find(1);
+ UNIT_ASSERT(it != map.end());
+ UNIT_ASSERT_EQUAL(it->second.size(), 2);
+ UNIT_ASSERT_EQUAL(it->second[0], "one");
+ UNIT_ASSERT_EQUAL(it->second[1], "two");
+ it = map.find(3);
+ UNIT_ASSERT(it != map.end());
+ UNIT_ASSERT_EQUAL(it->second.size(), 1);
+ UNIT_ASSERT_EQUAL(it->second[0], "three");
+ });
+
+ }
+
+ Y_UNIT_TEST(ToPyMultiDict) {
+ TPythonTestEngine engine;
+
+ engine.ToPython<NUdf::TDict<ui8, NUdf::TListType<NUdf::TUtf8>>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ ui32 flags = NUdf::TDictFlags::Hashed | NUdf::TDictFlags::Multi;
+ return vb.NewDict(type, flags)->
+ Add(NUdf::TUnboxedValuePod((ui8) 1), vb.NewString("one"))
+ .Add(NUdf::TUnboxedValuePod((ui8) 1), vb.NewString("two"))
+ .Add(NUdf::TUnboxedValuePod((ui8) 3), vb.NewString("three"))
+ .Build();
+
+ },
+ "def Test(value):\n"
+ " assert len(value) == 2\n"
+ " assert 1 in value\n"
+ " assert 3 in value\n"
+ " assert len(value[1]) == 2\n"
+ " assert 'one' in value[1]\n"
+ " assert 'two' in value[1]\n"
+ " assert list(value[3]) == ['three']\n");
+ }
+
+ Y_UNIT_TEST(ToPyAndBackDictAsIs) {
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<NUdf::TDict<i32, double>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ return vb.NewDict(type, NUdf::TDictFlags::Sorted)->
+ Add(NUdf::TUnboxedValuePod((i32) 1), NUdf::TUnboxedValuePod((double) 0.1))
+ .Add(NUdf::TUnboxedValuePod((i32) 2), NUdf::TUnboxedValuePod((double) 0.2))
+ .Add(NUdf::TUnboxedValuePod((i32) 3), NUdf::TUnboxedValuePod((double) 0.3))
+ .Build();
+ },
+ "def Test(value): return value",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 3);
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((i32) 0)));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((i32) 3)));
+ UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((i32) 2)).Get<double>(), 0.2);
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((i32) 4)));
+
+ std::vector<std::pair<i32, double>> items;
+ const auto it = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ items.emplace_back(key.Get<i32>(), payload.Get<double>());
+ }
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+ UNIT_ASSERT_EQUAL(items[0].first, 1);
+ UNIT_ASSERT_EQUAL(items[1].first, 2);
+ UNIT_ASSERT_EQUAL(items[2].first, 3);
+ UNIT_ASSERT_EQUAL(items[0].second, 0.1);
+ UNIT_ASSERT_EQUAL(items[1].second, 0.2);
+ UNIT_ASSERT_EQUAL(items[2].second, 0.3);
+
+ std::vector<i32> keys;
+ const auto kit = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; kit.Next(key);) {
+ keys.emplace_back(key.Get<i32>());
+ }
+
+ UNIT_ASSERT_EQUAL(keys.size(), 3);
+ UNIT_ASSERT_EQUAL(keys[0], 1);
+ UNIT_ASSERT_EQUAL(keys[1], 2);
+ UNIT_ASSERT_EQUAL(keys[2], 3);
+
+ std::vector<double> values;
+ const auto pit = value.GetPayloadsIterator();
+ for (NUdf::TUnboxedValue payload; pit.Next(payload);) {
+ values.emplace_back(payload.Get<double>());
+ }
+
+ UNIT_ASSERT_EQUAL(values.size(), 3);
+ UNIT_ASSERT_EQUAL(values[0], 0.1);
+ UNIT_ASSERT_EQUAL(values[1], 0.2);
+ UNIT_ASSERT_EQUAL(values[2], 0.3);
+ }
+ );
+ }
+
+ Y_UNIT_TEST(PyInvertDict) {
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<NUdf::TDict<i32, double>, NUdf::TDict<double, i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ return vb.NewDict(type, NUdf::TDictFlags::Hashed)->
+ Add(NUdf::TUnboxedValuePod((i32) 1), NUdf::TUnboxedValuePod((double) 0.1))
+ .Add(NUdf::TUnboxedValuePod((i32) 2), NUdf::TUnboxedValuePod((double) 0.2))
+ .Add(NUdf::TUnboxedValuePod((i32) 3), NUdf::TUnboxedValuePod((double) 0.3))
+ .Build();
+ },
+ "def Test(value): return { v: k for k, v in value.items() }",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 3);
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((double) 0.1)));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((double) 0.0)));
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((double) 0.4)));
+ UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((double) 0.2)).Get<i32>(), 2);
+
+ std::map<double, i32> items;
+ const auto it = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ items.emplace(key.Get<double>(), payload.Get<i32>());
+ }
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+ UNIT_ASSERT_EQUAL(items[0.1], 1);
+ UNIT_ASSERT_EQUAL(items[0.2], 2);
+ UNIT_ASSERT_EQUAL(items[0.3], 3);
+ }
+ );
+ }
+
+ Y_UNIT_TEST(FromPyOrderedDict) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "from collections import OrderedDict\n"
+ "def Test(): return OrderedDict([(2, 'two'), (1, 'one'), (3, 'three')])\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 3);
+
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1))));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0))));
+ const auto v = value.Lookup(NUdf::TUnboxedValuePod(ui32(1)));
+ UNIT_ASSERT_EQUAL(v.AsStringRef(), "one");
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((ui32(4)))));
+
+#if PY_MAJOR_VERSION >= 3
+ std::vector<std::pair<ui32, TString>> items;
+ const auto it = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ items.emplace_back(key.Get<ui32>(), payload.AsStringRef());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+ UNIT_ASSERT_EQUAL(items[0].first, 2);
+ UNIT_ASSERT_EQUAL(items[1].first, 1);
+ UNIT_ASSERT_EQUAL(items[2].first, 3);
+ UNIT_ASSERT_EQUAL(items[0].second, "two");
+ UNIT_ASSERT_EQUAL(items[1].second, "one");
+ UNIT_ASSERT_EQUAL(items[2].second, "three");
+
+ std::vector<ui32> keys;
+ const auto kit = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; kit.Next(key);) {
+ keys.emplace_back(key.Get<ui32>());
+ }
+
+ UNIT_ASSERT_EQUAL(keys.size(), 3);
+ UNIT_ASSERT_EQUAL(keys[0], 2);
+ UNIT_ASSERT_EQUAL(keys[1], 1);
+ UNIT_ASSERT_EQUAL(keys[2], 3);
+
+ std::vector<TString> values;
+ const auto pit = value.GetPayloadsIterator();
+ for (NUdf::TUnboxedValue payload; pit.Next(payload);) {
+ values.emplace_back(payload.AsStringRef());
+ }
+
+ UNIT_ASSERT_EQUAL(values.size(), 3);
+ UNIT_ASSERT_EQUAL(values[0], "two");
+ UNIT_ASSERT_EQUAL(values[1], "one");
+ UNIT_ASSERT_EQUAL(values[2], "three");
+#endif
+ });
+ }
+
+ Y_UNIT_TEST(ToPyAndBackSetAsIs) {
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<NUdf::TDict<float, void>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ return vb.NewDict(type, NUdf::TDictFlags::Sorted)->
+ Add(NUdf::TUnboxedValuePod(0.1f), NUdf::TUnboxedValuePod::Void())
+ .Add(NUdf::TUnboxedValuePod(0.2f), NUdf::TUnboxedValuePod::Void())
+ .Add(NUdf::TUnboxedValuePod(0.3f), NUdf::TUnboxedValuePod::Void())
+ .Build();
+ },
+ "def Test(value): return value",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 3);
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(0.0f)));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(0.3f)));
+ UNIT_ASSERT(value.Lookup(NUdf::TUnboxedValuePod(0.2f)));
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(0.4f)));
+
+ std::vector<float> keys;
+ const auto kit = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; kit.Next(key);) {
+ keys.emplace_back(key.Get<float>());
+ }
+
+ UNIT_ASSERT_EQUAL(keys.size(), 3);
+ UNIT_ASSERT_EQUAL(keys[0], 0.1f);
+ UNIT_ASSERT_EQUAL(keys[1], 0.2f);
+ UNIT_ASSERT_EQUAL(keys[2], 0.3f);
+ }
+ );
+ }
+
+ Y_UNIT_TEST(ToPyAsThinList_FromPyAsDict) {
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<NUdf::TListType<float>, NUdf::TDict<i8, float>>(
+ [](const TType*, const NUdf::IValueBuilder& vb) {
+ NUdf::TUnboxedValue *items = nullptr;
+ const auto a = vb.NewArray(9U, items);
+ const float f[] = { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f };
+ std::transform(f, f + 9U, items, [](float v){ return NUdf::TUnboxedValuePod(v); });
+ return a;
+ },
+ "def Test(value): return value",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U);
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i8(0))));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i8(10))));
+ UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(i8(5))).Get<float>(), 0.6f);
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i8(13))));
+
+ std::vector<std::pair<i8, float>> items;
+ const auto it = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ items.emplace_back(key.Get<i8>(), payload.Get<float>());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 9U);
+ UNIT_ASSERT_EQUAL(items.front().first, 0);
+ UNIT_ASSERT_EQUAL(items.back().first, 8);
+ UNIT_ASSERT_EQUAL(items.front().second, 0.1f);
+ UNIT_ASSERT_EQUAL(items.back().second, 0.9f);
+
+ std::vector<i8> keys;
+ const auto kit = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; kit.Next(key);) {
+ keys.emplace_back(key.Get<i8>());
+ }
+
+ UNIT_ASSERT_EQUAL(keys.size(), 9U);
+ UNIT_ASSERT_EQUAL(keys.front(), 0);
+ UNIT_ASSERT_EQUAL(keys.back(), 8);
+
+ std::vector<float> values;
+ const auto pit = value.GetPayloadsIterator();
+ for (NUdf::TUnboxedValue payload; pit.Next(payload);) {
+ values.emplace_back(payload.Get<float>());
+ }
+
+ UNIT_ASSERT_EQUAL(values.size(), 9U);
+ UNIT_ASSERT_EQUAL(values.front(), 0.1f);
+ UNIT_ASSERT_EQUAL(values.back(), 0.9f);
+ }
+ );
+ }
+
+ Y_UNIT_TEST(ToPyAsLazyList_FromPyAsDict) {
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<NUdf::TListType<i32>, NUdf::TDict<ui8, i32>>(
+ [](const TType*, const NUdf::IValueBuilder&) {
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(1, 10));
+ },
+ "def Test(value): return value",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U);
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui8(0))));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui8(10))));
+ UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(ui8(5))).Get<i32>(), 6);
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui8(13))));
+
+ std::vector<std::pair<ui8, i32>> items;
+ const auto it = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ items.emplace_back(key.Get<ui8>(), payload.Get<i32>());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 9U);
+ UNIT_ASSERT_EQUAL(items.front().first, 0);
+ UNIT_ASSERT_EQUAL(items.back().first, 8);
+ UNIT_ASSERT_EQUAL(items.front().second, 1);
+ UNIT_ASSERT_EQUAL(items.back().second, 9);
+
+ std::vector<ui8> keys;
+ const auto kit = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; kit.Next(key);) {
+ keys.emplace_back(key.Get<ui8>());
+ }
+
+ UNIT_ASSERT_EQUAL(keys.size(), 9U);
+ UNIT_ASSERT_EQUAL(keys.front(), 0);
+ UNIT_ASSERT_EQUAL(keys.back(), 8);
+
+ std::vector<i32> values;
+ const auto pit = value.GetPayloadsIterator();
+ for (NUdf::TUnboxedValue payload; pit.Next(payload);) {
+ values.emplace_back(payload.Get<i32>());
+ }
+
+ UNIT_ASSERT_EQUAL(values.size(), 9U);
+ UNIT_ASSERT_EQUAL(values.front(), 1);
+ UNIT_ASSERT_EQUAL(values.back(), 9);
+ }
+ );
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_errors.cpp b/yql/essentials/udfs/common/python/bindings/py_errors.cpp
new file mode 100644
index 00000000000..5741978d543
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_errors.cpp
@@ -0,0 +1,72 @@
+#include "py_errors.h"
+#include "py_ptr.h"
+#include "py_cast.h"
+#include "py_utils.h"
+
+#include <util/generic/string.h>
+#include <util/stream/output.h>
+
+namespace NPython {
+
+// this function in conjuction with code after Py_Initialize
+// does approximately following:
+//
+// sys.stderr = StderrProxy(sys.stderr)
+//
+// ...
+//
+// sys.stderr._toggle_real_mode()
+// sys.excepthook(
+// sys.last_type,
+// sys.last_value,
+// sys.last_traceback)
+// sys.stderr._get_value()
+// sys.stderr._toggle_real_mode()
+//
+// where _toggle_real_mode, _get_value & all calls to stderr not in real mode
+// are handled in a thread-safe way
+//
+TString GetLastErrorAsString()
+{
+ PyObject* etype;
+ PyObject* evalue;
+ PyObject* etraceback;
+
+ PyErr_Fetch(&etype, &evalue, &etraceback);
+
+ if (!etype) {
+ return {};
+ }
+
+ TPyObjectPtr etypePtr {etype, TPyObjectPtr::ADD_REF};
+ TPyObjectPtr evaluePtr {evalue, TPyObjectPtr::ADD_REF};
+ TPyObjectPtr etracebackPtr {etraceback, TPyObjectPtr::ADD_REF};
+
+ TPyObjectPtr stderrObject {PySys_GetObject("stderr"), TPyObjectPtr::ADD_REF};
+ if (!stderrObject) {
+ return {};
+ }
+
+ TPyObjectPtr unused = PyObject_CallMethod(stderrObject.Get(), "_toggle_real_mode", nullptr);
+
+ PyErr_Restore(etypePtr.Get(), evaluePtr.Get(), etracebackPtr.Get());
+ // in unusual situations there may be low-level write to stderr
+ // (by direct C FILE* write), but that's OK
+ PyErr_Print();
+
+ TPyObjectPtr error = PyObject_CallMethod(stderrObject.Get(), "_get_value", nullptr);
+ if (!error) {
+ return {};
+ }
+ unused.ResetSteal(
+ PyObject_CallMethod(stderrObject.Get(), "_toggle_real_mode", nullptr)
+ );
+
+ TString errorValue;
+ if (!TryPyCast(error.Get(), errorValue)) {
+ errorValue = TString("can't get error string from: ") += PyObjectRepr(error.Get());
+ }
+ return errorValue;
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_errors.h b/yql/essentials/udfs/common/python/bindings/py_errors.h
new file mode 100644
index 00000000000..2306b47bb95
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_errors.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+
+namespace NPython {
+
+TString GetLastErrorAsString();
+
+#define PY_TRY try
+
+#define PY_CATCH(ErrorValue) \
+ catch (const yexception& e) { \
+ PyErr_SetString(PyExc_RuntimeError, e.what()); \
+ return ErrorValue; \
+ }
+
+#define PY_ENSURE(condition, message) \
+ do { \
+ if (Y_UNLIKELY(!(condition))) { \
+ throw yexception() << message; \
+ } \
+ } while (0)
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_gil.h b/yql/essentials/udfs/common/python/bindings/py_gil.h
new file mode 100644
index 00000000000..70e9bf3e91d
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_gil.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <Python.h>
+
+
+namespace NPython {
+
+struct TPyGilLocker
+{
+ TPyGilLocker()
+ : Gil(PyGILState_Ensure())
+ {
+ }
+
+ ~TPyGilLocker() {
+ PyGILState_Release(Gil);
+ }
+
+private:
+ PyGILState_STATE Gil;
+};
+
+struct TPyGilUnlocker {
+ TPyGilUnlocker()
+ : ThreadState(PyEval_SaveThread())
+ {
+ }
+
+ ~TPyGilUnlocker() {
+ PyEval_RestoreThread(ThreadState);
+ }
+
+private:
+ PyThreadState* ThreadState;
+};
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_iterator.cpp b/yql/essentials/udfs/common/python/bindings/py_iterator.cpp
new file mode 100644
index 00000000000..090211be2c1
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_iterator.cpp
@@ -0,0 +1,280 @@
+#include "py_iterator.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyIterator interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyIterator
+{
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ const NUdf::TType* ItemType;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Iterator;
+
+ inline static TPyIterator* Cast(PyObject* o) {
+ return reinterpret_cast<TPyIterator*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ inline static PyObject* Repr(PyObject* self) {
+ Y_UNUSED(self);
+ return PyRepr("<yql.TDictKeysIterator>").Release();
+ }
+
+ static PyObject* New(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, NUdf::IBoxedValuePtr&& iterator);
+ static PyObject* Next(PyObject* self);
+};
+
+#if PY_MAJOR_VERSION >= 3
+#define Py_TPFLAGS_HAVE_ITER 0
+#endif
+
+PyTypeObject PyIteratorType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TIterator"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyIterator)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyIterator::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyIterator::Repr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER),
+ INIT_MEMBER(tp_doc , "yql.TDictKeysIterator object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , PyObject_SelfIter),
+ INIT_MEMBER(tp_iternext , TPyIterator::Next),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyPairIterator interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyPairIterator
+{
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ const NUdf::TType* KeyType;
+ const NUdf::TType* PayloadType;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Iterator;
+
+ inline static TPyPairIterator* Cast(PyObject* o) {
+ return reinterpret_cast<TPyPairIterator*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ inline static PyObject* Repr(PyObject* self) {
+ Y_UNUSED(self);
+ return PyRepr("<yql.TDictIterator>").Release();
+ }
+
+ static PyObject* New(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payloadType, NUdf::IBoxedValuePtr&& iterator);
+ static PyObject* Next(PyObject* self);
+};
+
+PyTypeObject PyPairIteratorType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TDictIterator"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyPairIterator)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyPairIterator::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyPairIterator::Repr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER),
+ INIT_MEMBER(tp_doc , "yql.TPairIterator object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , PyObject_SelfIter),
+ INIT_MEMBER(tp_iternext , TPyPairIterator::Next),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyIterator implementation
+//////////////////////////////////////////////////////////////////////////////
+PyObject* TPyIterator::New(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, NUdf::IBoxedValuePtr&& iterator)
+{
+ TPyIterator* dictIter = new TPyIterator;
+ PyObject_INIT(dictIter, &PyIteratorType);
+ dictIter->CastCtx = ctx;
+ dictIter->ItemType = itemType;
+ dictIter->Iterator.Set(ctx->PyCtx, iterator);
+ return reinterpret_cast<PyObject*>(dictIter);
+}
+
+PyObject* TPyIterator::Next(PyObject* self)
+{
+ PY_TRY {
+ const auto iter = Cast(self);
+ NUdf::TUnboxedValue item;
+ if (NUdf::TBoxedValueAccessor::Next(*iter->Iterator.Get(), item)) {
+ return (iter->ItemType ? ToPyObject(iter->CastCtx, iter->ItemType, item) : PyCast<ui64>(item.Get<ui64>())).Release();
+ }
+ return nullptr;
+ } PY_CATCH(nullptr)
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyPairIterator implementation
+//////////////////////////////////////////////////////////////////////////////
+PyObject* TPyPairIterator::New(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payloadType, NUdf::IBoxedValuePtr&& iterator)
+{
+ TPyPairIterator* dictIter = new TPyPairIterator;
+ PyObject_INIT(dictIter, &PyPairIteratorType);
+ dictIter->CastCtx = ctx;
+ dictIter->KeyType = keyType;
+ dictIter->PayloadType = payloadType;
+ dictIter->Iterator.Set(ctx->PyCtx, iterator);
+ return reinterpret_cast<PyObject*>(dictIter);
+}
+
+PyObject* TPyPairIterator::Next(PyObject* self)
+{
+ PY_TRY {
+ const auto iter = Cast(self);
+ NUdf::TUnboxedValue k, v;
+ if (NUdf::TBoxedValueAccessor::NextPair(*iter->Iterator.Get(), k, v)) {
+ const TPyObjectPtr key = iter->KeyType ?
+ ToPyObject(iter->CastCtx, iter->KeyType, k):
+ PyCast<ui64>(k.Get<ui64>());
+ const TPyObjectPtr value = ToPyObject(iter->CastCtx, iter->PayloadType, v);
+ return PyTuple_Pack(2, key.Get(), value.Get());
+ }
+ return nullptr;
+ } PY_CATCH(nullptr)
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+TPyObjectPtr ToPyIterator(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ const NUdf::TUnboxedValuePod& value)
+{
+ return TPyIterator::New(castCtx, itemType, value.AsBoxed());
+}
+
+TPyObjectPtr ToPyIterator(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* keyType,
+ const NUdf::TType* payloadType,
+ const NUdf::TUnboxedValuePod& value)
+{
+ return TPyPairIterator::New(castCtx, keyType, payloadType, value.AsBoxed());
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_iterator.h b/yql/essentials/udfs/common/python/bindings/py_iterator.h
new file mode 100644
index 00000000000..5c5de27b0bc
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_iterator.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+extern PyTypeObject PyIteratorType;
+extern PyTypeObject PyPairIteratorType;
+
+TPyObjectPtr ToPyIterator(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* itemType,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+TPyObjectPtr ToPyIterator(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* keyType,
+ const NKikimr::NUdf::TType* payloadType,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp
new file mode 100644
index 00000000000..ffaa2fe4ec0
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp
@@ -0,0 +1,705 @@
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+#include <util/generic/maybe.h>
+#include <util/string/builder.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+namespace {
+//////////////////////////////////////////////////////////////////////////////
+// TLazyDictBase
+//////////////////////////////////////////////////////////////////////////////
+class TLazyDictBase: public NUdf::TBoxedValue
+{
+protected:
+ class TIterator: public NUdf::TBoxedValue {
+ public:
+ TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, TPyObjectPtr&& pyIter)
+ : CastCtx_(ctx), ItemType_(type), PyIter_(std::move(pyIter))
+ {}
+
+ ~TIterator() {
+ const TPyGilLocker lock;
+ PyIter_.Reset();
+ }
+
+ private:
+ bool Skip() override try {
+ const TPyGilLocker lock;
+ const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
+ if (next) {
+ return true;
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return false;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool Next(NUdf::TUnboxedValue& value) override try {
+ const TPyGilLocker lock;
+ const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
+ if (next) {
+ value = FromPyObject(CastCtx_, ItemType_, next.Get());
+ return true;
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return false;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override {
+ payload = NUdf::TUnboxedValuePod::Void();
+ return Next(key);
+ }
+
+ private:
+ const TPyCastContext::TPtr CastCtx_;
+ const NUdf::TType* ItemType_;
+ TPyObjectPtr PyIter_;
+ };
+
+ class TPairIterator: public NUdf::TBoxedValue {
+ public:
+ TPairIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, TPyObjectPtr&& pyIter)
+ : CastCtx_(ctx), KeyType_(keyType), PayType_(payType), PyIter_(std::move(pyIter))
+ {}
+
+ ~TPairIterator() {
+ const TPyGilLocker lock;
+ PyIter_.Reset();
+ }
+
+ private:
+ bool Skip() override try {
+ const TPyGilLocker lock;
+ const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
+ if (next) {
+ return true;
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return false;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& pay) override try {
+ const TPyGilLocker lock;
+ const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
+ if (next) {
+ key = FromPyObject(CastCtx_, KeyType_, PyTuple_GET_ITEM(next.Get(), 0));
+ pay = FromPyObject(CastCtx_, PayType_, PyTuple_GET_ITEM(next.Get(), 1));
+ return true;
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return false;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ private:
+ const TPyCastContext::TPtr CastCtx_;
+ const NUdf::TType* KeyType_;
+ const NUdf::TType* PayType_;
+ TPyObjectPtr PyIter_;
+ };
+
+ TLazyDictBase(const TPyCastContext::TPtr& castCtx, const NUdf::TType* itemType, PyObject* pyObject)
+ : CastCtx_(castCtx), ItemType_(itemType), PyObject_(pyObject, TPyObjectPtr::AddRef())
+ {}
+
+ ~TLazyDictBase() {
+ const TPyGilLocker lock;
+ PyObject_.Reset();
+ }
+
+ bool HasDictItems() const override try {
+ const TPyGilLocker lock;
+ const auto has = PyObject_IsTrue(PyObject_.Get());
+ if (has < 0) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ return bool(has);
+ }
+ catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ const TPyCastContext::TPtr CastCtx_;
+ const NUdf::TType* ItemType_;
+ TPyObjectPtr PyObject_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazyMapping
+//////////////////////////////////////////////////////////////////////////////
+class TLazyMapping: public TLazyDictBase
+{
+public:
+ TLazyMapping(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, PyObject* dict)
+ : TLazyDictBase(ctx, keyType, dict), PayType_(payType)
+ {}
+
+private:
+ bool IsSortedDict() const override { return false; }
+
+ ui64 GetDictLength() const override try {
+ const TPyGilLocker lock;
+ const auto len = PyMapping_Size(PyObject_.Get());
+ if (len < 0) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ return ui64(len);
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetKeysIterator() const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyList = PyMapping_Keys(PyObject_.Get())) {
+ if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter)));
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetPayloadsIterator() const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyList = PyMapping_Values(PyObject_.Get())) {
+ if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, PayType_, std::move(pyIter)));
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetDictIterator() const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyList = PyMapping_Items(PyObject_.Get())) {
+ if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
+ return NUdf::TUnboxedValuePod(new TPairIterator(CastCtx_, ItemType_, PayType_, std::move(pyIter)));
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
+ if (const auto item = PyObject_GetItem(PyObject_.Get(), pyKey.Get())) {
+ return FromPyObject(CastCtx_, PayType_, item).Release().MakeOptional();
+ }
+
+ if (PyErr_Occurred()) {
+ PyErr_Clear();
+ }
+
+ return NUdf::TUnboxedValue();
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool Contains(const NUdf::TUnboxedValuePod& key) const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
+ const auto map = PyObject_.Get();
+ const auto has = map->ob_type->tp_as_sequence && map->ob_type->tp_as_sequence->sq_contains ?
+ (map->ob_type->tp_as_sequence->sq_contains)(map, pyKey.Get()) :
+ PyMapping_HasKey(map, pyKey.Get());
+
+ if (has >= 0) {
+ return bool(has);
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+private:
+ const NUdf::TType* PayType_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazyDict
+//////////////////////////////////////////////////////////////////////////////
+class TLazyDict: public TLazyDictBase
+{
+public:
+ TLazyDict(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, PyObject* dict)
+ : TLazyDictBase(ctx, keyType, dict), PayType_(payType)
+ {}
+
+private:
+ bool IsSortedDict() const override { return false; }
+
+ ui64 GetDictLength() const override try {
+ const TPyGilLocker lock;
+ const auto len = PyDict_Size(PyObject_.Get());
+ if (len < 0) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ return ui64(len);
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetKeysIterator() const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyList = PyDict_Keys(PyObject_.Get())) {
+ if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter)));
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetPayloadsIterator() const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyList = PyDict_Values(PyObject_.Get())) {
+ if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, PayType_, std::move(pyIter)));
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetDictIterator() const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyList = PyDict_Items(PyObject_.Get())) {
+ if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
+ return NUdf::TUnboxedValuePod(new TPairIterator(CastCtx_, ItemType_, PayType_, std::move(pyIter)));
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
+ if (const auto item = PyDict_GetItem(PyObject_.Get(), pyKey.Get())) {
+ return FromPyObject(CastCtx_, PayType_, item).Release().MakeOptional();
+ } else if (!PyErr_Occurred()) {
+ return NUdf::TUnboxedValue();
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool Contains(const NUdf::TUnboxedValuePod& key) const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
+ const auto has = PyDict_Contains(PyObject_.Get(), pyKey.Get());
+ if (has >= 0) {
+ return bool(has);
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+private:
+ const NUdf::TType* PayType_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazySet
+//////////////////////////////////////////////////////////////////////////////
+class TLazySet: public TLazyDictBase
+{
+public:
+ TLazySet(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, PyObject* set)
+ : TLazyDictBase(ctx, itemType, set)
+ {}
+
+private:
+ bool IsSortedDict() const override { return false; }
+
+ ui64 GetDictLength() const override try {
+ const TPyGilLocker lock;
+ const auto len = PySet_Size(PyObject_.Get());
+ if (len < 0) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ return ui64(len);
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
+ return Contains(key) ? NUdf::TUnboxedValuePod::Void() : NUdf::TUnboxedValuePod();
+ }
+
+ bool Contains(const NUdf::TUnboxedValuePod& key) const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
+ const auto has = PySet_Contains(PyObject_.Get(), pyKey.Get());
+ if (has >= 0) {
+ return bool(has);
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetKeysIterator() const override try {
+ const TPyGilLocker lock;
+ if (TPyObjectPtr pyIter = PyObject_GetIter(PyObject_.Get())) {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter)));
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetPayloadsIterator() const override {
+ return GetKeysIterator();
+ }
+
+ NUdf::TUnboxedValue GetDictIterator() const override {
+ return GetKeysIterator();
+ }
+
+ NUdf::TUnboxedValue GetListIterator() const override {
+ return GetKeysIterator();
+ }
+
+ ui64 GetListLength() const override {
+ return GetDictLength();
+ }
+
+ bool HasListItems() const override {
+ return HasDictItems();
+ }
+
+ bool HasFastListLength() const override {
+ return true;
+ }
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazySequenceAsSet
+//////////////////////////////////////////////////////////////////////////////
+class TLazySequenceAsSet: public TLazyDictBase
+{
+public:
+ TLazySequenceAsSet(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, PyObject* sequence)
+ : TLazyDictBase(ctx, keyType, sequence)
+ {}
+
+private:
+ bool IsSortedDict() const override { return false; }
+
+ ui64 GetDictLength() const override try {
+ const TPyGilLocker lock;
+ const auto len = PySequence_Size(PyObject_.Get());
+ if (len < 0) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ return ui64(len);
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
+ return Contains(key) ? NUdf::TUnboxedValuePod::Void() : NUdf::TUnboxedValuePod();
+ }
+
+ bool Contains(const NUdf::TUnboxedValuePod& key) const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
+ const auto has = PySequence_Contains(PyObject_.Get(), pyKey.Get());
+ if (has >= 0) {
+ return bool(has);
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetKeysIterator() const override try {
+ const TPyGilLocker lock;
+ if (TPyObjectPtr pyIter = PyObject_GetIter(PyObject_.Get())) {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter)));
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetPayloadsIterator() const override {
+ return GetKeysIterator();
+ }
+
+ NUdf::TUnboxedValue GetDictIterator() const override {
+ return GetKeysIterator();
+ }
+
+ NUdf::TUnboxedValue GetListIterator() const override {
+ return GetKeysIterator();
+ }
+
+ ui64 GetListLength() const override {
+ return GetDictLength();
+ }
+
+ bool HasListItems() const override {
+ return HasDictItems();
+ }
+
+ bool HasFastListLength() const override {
+ return true;
+ }
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazySequenceAsDict
+//////////////////////////////////////////////////////////////////////////////
+template<typename KeyType>
+class TLazySequenceAsDict: public NUdf::TBoxedValue
+{
+private:
+ class TKeyIterator: public NUdf::TBoxedValue {
+ public:
+ TKeyIterator(Py_ssize_t size)
+ : Size(size), Index(0)
+ {}
+
+ private:
+ bool Skip() override {
+ if (Index >= Size)
+ return false;
+
+ ++Index;
+ return true;
+ }
+
+ bool Next(NUdf::TUnboxedValue& value) override {
+ if (Index >= Size)
+ return false;
+
+ value = NUdf::TUnboxedValuePod(KeyType(Index++));
+ return true;
+ }
+
+ private:
+ const Py_ssize_t Size;
+ Py_ssize_t Index;
+ };
+
+ class TIterator: public NUdf::TBoxedValue {
+ public:
+ TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, Py_ssize_t size, const TPyObjectPtr& pySeq)
+ : CastCtx_(ctx), ItemType_(itemType), PySeq_(pySeq), Size(size), Index(0)
+ {}
+
+ ~TIterator() {
+ const TPyGilLocker lock;
+ PySeq_.Reset();
+ }
+
+ private:
+ bool Skip() override {
+ if (Index >= Size)
+ return false;
+
+ ++Index;
+ return true;
+ }
+
+ bool Next(NUdf::TUnboxedValue& value) override try {
+ if (Index >= Size)
+ return false;
+
+ const TPyGilLocker lock;
+ value = FromPyObject(CastCtx_, ItemType_, PySequence_Fast_GET_ITEM(PySeq_.Get(), Index++));
+ return true;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& pay) override try {
+ if (Index >= Size)
+ return false;
+
+ const TPyGilLocker lock;
+ key = NUdf::TUnboxedValuePod(KeyType(Index));
+ pay = FromPyObject(CastCtx_, ItemType_, PySequence_Fast_GET_ITEM(PySeq_.Get(), Index++));
+ return true;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ private:
+ const TPyCastContext::TPtr CastCtx_;
+ const NUdf::TType* ItemType_;
+ TPyObjectPtr PySeq_;
+ const Py_ssize_t Size;
+ Py_ssize_t Index;
+ };
+
+public:
+ TLazySequenceAsDict(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, TPyObjectPtr&& sequence, Py_ssize_t size)
+ : CastCtx_(ctx), ItemType_(itemType), Size(size), PySeq_(std::move(sequence))
+ {}
+
+ ~TLazySequenceAsDict()
+ {
+ const TPyGilLocker lock;
+ PySeq_.Reset();
+ }
+
+private:
+ bool IsSortedDict() const override { return true; }
+
+ bool HasDictItems() const override {
+ return Size > 0;
+ }
+
+ ui64 GetDictLength() const override {
+ return Size;
+ }
+
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
+ const Py_ssize_t index = key.Get<KeyType>();
+ if (index >= -Size && index < Size) try {
+ const TPyGilLocker lock;
+ if (const auto item = PySequence_Fast_GET_ITEM(PySeq_.Get(), index >= 0 ? index : Size + index)) {
+ return FromPyObject(CastCtx_, ItemType_, item).Release().MakeOptional();
+ } else if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+ return NUdf::TUnboxedValue();
+ }
+
+ bool Contains(const NUdf::TUnboxedValuePod& key) const override {
+ const Py_ssize_t index = key.Get<KeyType>();
+ return index >= -Size && index < Size;
+ }
+
+ NUdf::TUnboxedValue GetKeysIterator() const override {
+ return NUdf::TUnboxedValuePod(new TKeyIterator(Size));
+ }
+
+ NUdf::TUnboxedValue GetPayloadsIterator() const override {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, Size, PySeq_));
+ }
+
+ NUdf::TUnboxedValue GetDictIterator() const override {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, Size, PySeq_));
+ }
+
+ const TPyCastContext::TPtr CastCtx_;
+ const NUdf::TType* ItemType_;
+ const Py_ssize_t Size;
+ TPyObjectPtr PySeq_;
+};
+
+} // namspace
+
+NUdf::TUnboxedValue FromPyDict(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* keyType,
+ const NUdf::TType* payType,
+ PyObject* dict)
+{
+ return NUdf::TUnboxedValuePod(new TLazyDict(castCtx, keyType, payType, dict));
+}
+
+NUdf::TUnboxedValue FromPyMapping(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* keyType,
+ const NUdf::TType* payType,
+ PyObject* map)
+{
+ return NUdf::TUnboxedValuePod(new TLazyMapping(castCtx, keyType, payType, map));
+}
+
+NUdf::TUnboxedValue FromPySet(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ PyObject* set)
+{
+ return NUdf::TUnboxedValuePod(new TLazySet(castCtx, itemType, set));
+}
+
+NUdf::TUnboxedValue FromPySequence(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* keyType,
+ PyObject* set)
+{
+ return NUdf::TUnboxedValuePod(new TLazySequenceAsSet(castCtx, keyType, set));
+}
+
+NUdf::TUnboxedValue FromPySequence(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ const NUdf::TDataTypeId keyType,
+ PyObject* sequence)
+{
+ if (TPyObjectPtr fast = PySequence_Fast(sequence, "Can't get fast sequence.")) {
+ const auto size = PySequence_Fast_GET_SIZE(fast.Get());
+ if (size >= 0) {
+ switch (keyType) {
+#define MAKE_PRIMITIVE_TYPE_SIZE(type) \
+ case NUdf::TDataType<type>::Id: \
+ return NUdf::TUnboxedValuePod(new TLazySequenceAsDict<type>(castCtx, itemType, std::move(fast), size));
+ INTEGRAL_VALUE_TYPES(MAKE_PRIMITIVE_TYPE_SIZE)
+#undef MAKE_PRIMITIVE_TYPE_SIZE
+ }
+ Y_ABORT("Invalid key type.");
+ }
+ }
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data());
+}
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp
new file mode 100644
index 00000000000..fe3b8892e66
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp
@@ -0,0 +1,382 @@
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+#include <util/generic/maybe.h>
+#include <util/string/builder.h>
+
+
+using namespace NKikimr;
+
+namespace NPython {
+namespace {
+
+static ui64 CalculateIteratorLength(PyObject* iter, const TPyCastContext::TPtr& castCtx)
+{
+ PyObject* item;
+
+ ui64 length = 0;
+ while ((item = PyIter_Next(iter))) {
+ length++;
+ Py_DECREF(item);
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return length;
+}
+
+static bool IsIteratorHasItems(PyObject* iter, const TPyCastContext::TPtr& castCtx)
+{
+ if (const TPyObjectPtr item = PyIter_Next(iter)) {
+ return true;
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return false;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TBaseLazyList
+//////////////////////////////////////////////////////////////////////////////
+template<typename TDerived>
+class TBaseLazyList: public NUdf::TBoxedValue
+{
+ using TListSelf = TBaseLazyList<TDerived>;
+
+ class TIterator: public NUdf::TBoxedValue {
+ public:
+ TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, TPyObjectPtr&& pyIter)
+ : CastCtx_(ctx)
+ , PyIter_(std::move(pyIter))
+ , ItemType_(type)
+ {}
+
+ ~TIterator() {
+ const TPyGilLocker lock;
+ PyIter_.Reset();
+ }
+
+ private:
+ bool Skip() override try {
+ const TPyGilLocker lock;
+ const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
+ if (next) {
+ return true;
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return false;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool Next(NUdf::TUnboxedValue& value) override try {
+ const TPyGilLocker lock;
+ const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
+ if (next) {
+ value = FromPyObject(CastCtx_, ItemType_, next.Get());
+ return true;
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return false;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ private:
+ const TPyCastContext::TPtr CastCtx_;
+ TPyObjectPtr PyIter_;
+ const NUdf::TType* ItemType_;
+ };
+
+public:
+ TBaseLazyList(
+ const TPyCastContext::TPtr& castCtx,
+ TPyObjectPtr&& pyObject,
+ const NUdf::TType* type)
+ : CastCtx_(castCtx)
+ , PyObject_(std::move(pyObject))
+ , ItemType_(NUdf::TListTypeInspector(*CastCtx_->PyCtx->TypeInfoHelper, type).GetItemType())
+ {
+ }
+
+ ~TBaseLazyList() {
+ TPyGilLocker lock;
+ PyObject_.Reset();
+ }
+
+private:
+ TPyObjectPtr GetIterator() const try {
+ return static_cast<const TDerived*>(this)->GetIteratorImpl();
+ }
+ catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool HasFastListLength() const override {
+ return Length_.Defined();
+ }
+
+ ui64 GetEstimatedListLength() const override {
+ return GetListLength();
+ }
+
+ ui64 GetListLength() const override try {
+ if (!Length_.Defined()) {
+ const TPyGilLocker lock;
+ TPyObjectPtr iter = GetIterator();
+ Length_ = CalculateIteratorLength(iter.Get(), CastCtx_);
+ }
+
+ return *Length_;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool HasListItems() const override try {
+ if (Length_.Defined())
+ return *Length_ > 0;
+
+ const TPyGilLocker lock;
+ TPyObjectPtr iter = GetIterator();
+ const bool hasItems = IsIteratorHasItems(iter.Get(), CastCtx_);
+ if (!hasItems) {
+ Length_ = 0;
+ }
+ return hasItems;
+ }
+ catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetListIterator() const override try {
+ const TPyGilLocker lock;
+ TPyObjectPtr pyIter = GetIterator();
+ auto* self = const_cast<TListSelf*>(this);
+ return NUdf::TUnboxedValuePod(new TIterator(self->CastCtx_, self->ItemType_, std::move(pyIter)));
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ const NUdf::TOpaqueListRepresentation* GetListRepresentation() const override {
+ return nullptr;
+ }
+
+ NUdf::IBoxedValuePtr ReverseListImpl(
+ const NUdf::IValueBuilder& builder) const override
+ {
+ Y_UNUSED(builder);
+ return nullptr;
+ }
+
+ NUdf::IBoxedValuePtr SkipListImpl(
+ const NUdf::IValueBuilder& builder, ui64 count) const override
+ {
+ Y_UNUSED(builder);
+ Y_UNUSED(count);
+ return nullptr;
+ }
+
+ NUdf::IBoxedValuePtr TakeListImpl(
+ const NUdf::IValueBuilder& builder, ui64 count) const override
+ {
+ Y_UNUSED(builder);
+ Y_UNUSED(count);
+ return nullptr;
+ }
+
+ NUdf::IBoxedValuePtr ToIndexDictImpl(
+ const NUdf::IValueBuilder& builder) const override
+ {
+ Y_UNUSED(builder);
+ return nullptr;
+ }
+
+protected:
+ const TPyCastContext::TPtr CastCtx_;
+ TPyObjectPtr PyObject_;
+ const NUdf::TType* ItemType_;
+ mutable TMaybe<ui64> Length_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazyIterable
+//////////////////////////////////////////////////////////////////////////////
+class TLazyIterable: public TBaseLazyList<TLazyIterable>
+{
+ using TBase = TBaseLazyList<TLazyIterable>;
+public:
+ TLazyIterable(
+ const TPyCastContext::TPtr& castCtx,
+ TPyObjectPtr&& pyObject,
+ const NUdf::TType* type)
+ : TBase(castCtx, std::move(pyObject), type)
+ {}
+
+ TPyObjectPtr GetIteratorImpl() const {
+ if (const TPyObjectPtr ret = PyObject_GetIter(PyObject_.Get())) {
+ return ret;
+ }
+
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos
+ << "Cannot get iterator from object: "
+ << PyObjectRepr(PyObject_.Get()) << ", error: "
+ << GetLastErrorAsString()).data());
+ }
+
+private:
+ bool HasFastListLength() const override {
+ return Length_.Defined();
+ }
+
+ ui64 GetListLength() const override try {
+ if (!Length_.Defined()) {
+ const TPyGilLocker lock;
+ const auto len = PyObject_Size(PyObject_.Get());
+ if (len >= 0) {
+ Length_ = len;
+ } else {
+ Length_ = CalculateIteratorLength(GetIteratorImpl().Get(), CastCtx_);
+ }
+ }
+ return *Length_;
+ }
+ catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool HasListItems() const override try {
+ const TPyGilLocker lock;
+ bool hasItems = false;
+ const auto isTrue = PyObject_IsTrue(PyObject_.Get());
+ if (isTrue != -1) {
+ hasItems = static_cast<bool>(isTrue);
+ } else {
+ TPyObjectPtr iter = GetIteratorImpl();
+ hasItems = IsIteratorHasItems(iter.Get(), CastCtx_);
+ }
+ if (!hasItems) {
+ Length_ = 0;
+ }
+ return hasItems;
+ }
+ catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazyIterator
+//////////////////////////////////////////////////////////////////////////////
+class TLazyIterator: public TBaseLazyList<TLazyIterator>
+{
+ using TBase = TBaseLazyList<TLazyIterator>;
+public:
+ TLazyIterator(
+ const TPyCastContext::TPtr& castCtx,
+ TPyObjectPtr&& pyObject,
+ const NUdf::TType* type)
+ : TBase(castCtx, std::move(pyObject), type)
+ , IteratorDrained_(false)
+ {}
+
+ TPyObjectPtr GetIteratorImpl() const {
+ if (IteratorDrained_) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos <<
+ "Lazy list was build under python iterator. "
+ "Iterator was already used.").data());
+ }
+ IteratorDrained_ = true;
+ return PyObject_;
+ }
+
+private:
+ mutable bool IteratorDrained_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazyGenerator
+//////////////////////////////////////////////////////////////////////////////
+class TLazyGenerator: public TBaseLazyList<TLazyGenerator>
+{
+ using TBase = TBaseLazyList<TLazyGenerator>;
+public:
+ TLazyGenerator(
+ const TPyCastContext::TPtr& castCtx,
+ TPyObjectPtr&& pyObject,
+ const NUdf::TType* type)
+ : TBase(castCtx, std::move(pyObject), type)
+ {
+ // keep ownership of function closure if any
+ if (PyFunction_Check(PyObject_.Get())) {
+ PyObject* closure = PyFunction_GetClosure(PyObject_.Get());
+ if (closure) {
+ Closure_ = TPyObjectPtr(closure, TPyObjectPtr::ADD_REF);
+ }
+ }
+ }
+
+ ~TLazyGenerator() {
+ const TPyGilLocker lock;
+ Closure_.Reset();
+ }
+
+ TPyObjectPtr GetIteratorImpl() const {
+ TPyObjectPtr generator = PyObject_CallObject(PyObject_.Get(), nullptr);
+ if (!generator || !PyGen_Check(generator.Get())) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Expected generator as a result of function call").data());
+ }
+ return PyObject_GetIter(generator.Get());
+ }
+
+private:
+ TPyObjectPtr Closure_;
+};
+
+} // namspace
+
+
+NUdf::TUnboxedValue FromPyLazyGenerator(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ TPyObjectPtr callableObj)
+{
+ return NUdf::TUnboxedValuePod(new TLazyGenerator(castCtx, std::move(callableObj), type));
+}
+
+NUdf::TUnboxedValue FromPyLazyIterable(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ TPyObjectPtr iterableObj)
+{
+ return NUdf::TUnboxedValuePod(new TLazyIterable(castCtx, std::move(iterableObj), type));
+}
+
+NUdf::TUnboxedValue FromPyLazyIterator(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ TPyObjectPtr iteratorObj)
+{
+ return NUdf::TUnboxedValuePod(new TLazyIterator(castCtx, std::move(iteratorObj), type));
+}
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_list.cpp b/yql/essentials/udfs/common/python/bindings/py_list.cpp
new file mode 100644
index 00000000000..376a1ca124a
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_list.cpp
@@ -0,0 +1,1116 @@
+#include "py_list.h"
+#include "py_dict.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+
+using namespace NKikimr;
+
+#if PY_MAJOR_VERSION >= 3
+#define SLICEOBJ(obj) obj
+#else
+#define SLICEOBJ(obj) (reinterpret_cast<PySliceObject*>(obj))
+// See details about need for backports in ya.make
+#include "py27_backports.h"
+#endif
+
+namespace NPython {
+
+namespace {
+inline Py_ssize_t CastIndex(PyObject* key, const char* name)
+{
+ Py_ssize_t index = -1;
+ if (PyIndex_Check(key)) {
+ index = PyNumber_AsSsize_t(key, PyExc_IndexError);
+ }
+ if (index < 0) {
+ const TPyObjectPtr value = PyUnicode_FromFormat("argument of %s must be positive integer or long", name);
+ PyErr_SetObject(PyExc_IndexError, value.Get());
+ }
+
+ return index;
+}
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazyList interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyLazyList
+{
+ using TPtr = NUdf::TRefCountedPtr<TPyLazyList, TPyPtrOps<TPyLazyList>>;
+
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ const NUdf::TType* ItemType;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Value;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Dict;
+ Py_ssize_t Step;
+ Py_ssize_t CachedLength;
+
+ inline static TPyLazyList* Cast(PyObject* o) {
+ return reinterpret_cast<TPyLazyList*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ static PyObject* New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ NUdf::IBoxedValuePtr value,
+ Py_ssize_t step = 1,
+ Py_ssize_t size = -1);
+
+ static int Bool(PyObject* self);
+ static PyObject* Repr(PyObject* self);
+ static PyObject* Iter(PyObject* self);
+ static Py_ssize_t Len(PyObject* self);
+ static PyObject* Subscript(PyObject* self, PyObject* slice);
+ static PyObject* ToIndexDict(PyObject* self, PyObject* /* arg */);
+ static PyObject* Reversed(PyObject* self, PyObject* /* arg */);
+ static PyObject* Take(PyObject* self, PyObject* arg);
+ static PyObject* Skip(PyObject* self, PyObject* arg);
+ static PyObject* HasFastLen(PyObject* self, PyObject* /* arg */);
+ static PyObject* HasItems(PyObject* self, PyObject* /* arg */);
+};
+
+PyMappingMethods LazyListMapping = {
+ INIT_MEMBER(mp_length, TPyLazyList::Len),
+ INIT_MEMBER(mp_subscript, TPyLazyList::Subscript),
+ INIT_MEMBER(mp_ass_subscript, nullptr),
+};
+
+PyNumberMethods LazyListNumbering = {
+ INIT_MEMBER(nb_add, nullptr),
+ INIT_MEMBER(nb_subtract, nullptr),
+ INIT_MEMBER(nb_multiply, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_divide, nullptr),
+#endif
+ INIT_MEMBER(nb_remainder, nullptr),
+ INIT_MEMBER(nb_divmod, nullptr),
+ INIT_MEMBER(nb_power, nullptr),
+ INIT_MEMBER(nb_negative, nullptr),
+ INIT_MEMBER(nb_positive, nullptr),
+ INIT_MEMBER(nb_absolute, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_bool, TPyLazyList::Bool),
+#else
+ INIT_MEMBER(nb_nonzero, TPyLazyList::Bool),
+#endif
+ INIT_MEMBER(nb_invert, nullptr),
+ INIT_MEMBER(nb_lshift, nullptr),
+ INIT_MEMBER(nb_rshift, nullptr),
+ INIT_MEMBER(nb_and, nullptr),
+ INIT_MEMBER(nb_xor, nullptr),
+ INIT_MEMBER(nb_or, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_coerce, nullptr),
+#endif
+ INIT_MEMBER(nb_int, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_reserved, nullptr),
+#else
+ INIT_MEMBER(nb_long, nullptr),
+#endif
+ INIT_MEMBER(nb_float, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_oct, nullptr),
+ INIT_MEMBER(nb_hex, nullptr),
+#endif
+
+ INIT_MEMBER(nb_inplace_add, nullptr),
+ INIT_MEMBER(nb_inplace_subtract, nullptr),
+ INIT_MEMBER(nb_inplace_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_remainder, nullptr),
+ INIT_MEMBER(nb_inplace_power, nullptr),
+ INIT_MEMBER(nb_inplace_lshift, nullptr),
+ INIT_MEMBER(nb_inplace_rshift, nullptr),
+ INIT_MEMBER(nb_inplace_and, nullptr),
+ INIT_MEMBER(nb_inplace_xor, nullptr),
+ INIT_MEMBER(nb_inplace_or, nullptr),
+
+ INIT_MEMBER(nb_floor_divide, nullptr),
+ INIT_MEMBER(nb_true_divide, nullptr),
+ INIT_MEMBER(nb_inplace_floor_divide, nullptr),
+ INIT_MEMBER(nb_inplace_true_divide, nullptr),
+
+ INIT_MEMBER(nb_index, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_matrix_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_matrix_multiply, nullptr),
+#endif
+};
+
+PyDoc_STRVAR(reversed__doc__, "DEPRECATED: use reversed(list) or list[::-1] instead.");
+PyDoc_STRVAR(take__doc__, "DEPRECATED: use slice list[:n] instead.");
+PyDoc_STRVAR(skip__doc__, "DEPRECATED: use slice list[n:] instead.");
+PyDoc_STRVAR(to_index_dict__doc__, "DEPRECATED: use list[n] instead.");
+PyDoc_STRVAR(has_fast_len__doc__, "DEPRECATED: do not use.");
+PyDoc_STRVAR(has_items__doc__, "DEPRECATED: test list as bool instead.");
+
+static PyMethodDef TPyLazyListMethods[] = {
+ { "__reversed__", TPyLazyList::Reversed, METH_NOARGS, nullptr },
+ { "to_index_dict", TPyLazyList::ToIndexDict, METH_NOARGS, to_index_dict__doc__ },
+ { "reversed", TPyLazyList::Reversed, METH_NOARGS, reversed__doc__ },
+ { "take", TPyLazyList::Take, METH_O, take__doc__ },
+ { "skip", TPyLazyList::Skip, METH_O, skip__doc__ },
+ { "has_fast_len", TPyLazyList::HasFastLen, METH_NOARGS, has_fast_len__doc__ },
+ { "has_items", TPyLazyList::HasItems, METH_NOARGS, has_items__doc__ },
+ { nullptr, nullptr, 0, nullptr } /* sentinel */
+};
+
+#if PY_MAJOR_VERSION >= 3
+#define Py_TPFLAGS_HAVE_ITER 0
+#endif
+
+PyTypeObject PyLazyListType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TList"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyLazyList)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyLazyList::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyLazyList::Repr),
+ INIT_MEMBER(tp_as_number , &LazyListNumbering),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , &LazyListMapping),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER),
+ INIT_MEMBER(tp_doc , "yql.TList object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , TPyLazyList::Iter),
+ INIT_MEMBER(tp_iternext , nullptr),
+ INIT_MEMBER(tp_methods , TPyLazyListMethods),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazyListIterator interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyLazyListIterator
+{
+ PyObject_HEAD;
+ TPyLazyList::TPtr List;
+ TPyCleanupListItem<NUdf::TUnboxedValue> Iterator;
+ Py_ssize_t Length;
+ TPyCastContext::TPtr CastCtx;
+
+ inline static TPyLazyListIterator* Cast(PyObject* o) {
+ return reinterpret_cast<TPyLazyListIterator*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ auto obj = Cast(self);
+ auto ctx = obj->CastCtx;
+ ctx->MemoryLock->Acquire();
+ delete obj;
+ ctx->MemoryLock->Release();
+ }
+
+ inline static PyObject* Repr(PyObject* self) {
+ Y_UNUSED(self);
+ return PyRepr("<yql.TListIterator>").Release();
+ }
+
+ static PyObject* New(TPyLazyList* list);
+ static PyObject* Next(PyObject* self);
+};
+
+PyTypeObject PyLazyListIteratorType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TListIterator"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyLazyListIterator)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyLazyListIterator::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyLazyListIterator::Repr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER),
+ INIT_MEMBER(tp_doc , "yql.ListIterator object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , PyObject_SelfIter),
+ INIT_MEMBER(tp_iternext , TPyLazyListIterator::Next),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazyList implementation
+//////////////////////////////////////////////////////////////////////////////
+PyObject* TPyLazyList::New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ NUdf::IBoxedValuePtr value,
+ Py_ssize_t step,
+ Py_ssize_t size)
+{
+ TPyLazyList* list = new TPyLazyList;
+ PyObject_INIT(list, &PyLazyListType);
+
+ list->CastCtx = castCtx;
+ list->ItemType = itemType;
+ list->Value.Set(castCtx->PyCtx, value);
+ list->Step = step;
+ list->CachedLength = size;
+
+ return reinterpret_cast<PyObject*>(list);
+}
+
+PyObject* TPyLazyList::Repr(PyObject*)
+{
+ return PyRepr("<yql.TList>").Release();
+}
+
+PyObject* TPyLazyList::Iter(PyObject* self)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ return TPyLazyListIterator::New(list);
+ } PY_CATCH(nullptr)
+}
+
+Py_ssize_t TPyLazyList::Len(PyObject* self)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ if (list->CachedLength == -1) {
+ list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get()));
+ }
+ return (list->CachedLength + list->Step - 1) / list->Step;
+ } PY_CATCH(-1)
+}
+
+PyObject* TPyLazyList::Subscript(PyObject* self, PyObject* slice)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ const auto vb = list->CastCtx->ValueBuilder;
+
+ if (PyIndex_Check(slice)) {
+ Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError);
+
+ if (!list->Dict.IsSet()) {
+ list->Dict.Set(list->CastCtx->PyCtx, vb->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed());
+ }
+
+ if (index < 0) {
+ if (list->CachedLength == -1) {
+ list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*list->Dict.Get()));
+ }
+
+ ++index *= list->Step;
+ --index += list->CachedLength;
+ } else {
+ index *= list->Step;
+ }
+
+ if (index < 0 || (list->CachedLength != -1 && index >= list->CachedLength)) {
+ const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->CachedLength);
+ PyErr_SetObject(PyExc_IndexError, error.Get());
+ return nullptr;
+ }
+
+ if (const auto item = NUdf::TBoxedValueAccessor::Lookup(*list->Dict.Get(), NUdf::TUnboxedValuePod(ui64(index)))) {
+ return ToPyObject(list->CastCtx, list->ItemType, item.GetOptionalValue()).Release();
+ }
+
+ const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds", index);
+ PyErr_SetObject(PyExc_IndexError, error.Get());
+ return nullptr;
+ }
+
+ if (PySlice_Check(slice)) {
+ Py_ssize_t start, stop, step, size;
+
+ if (list->CachedLength >= 0) {
+ if (PySlice_GetIndicesEx(SLICEOBJ(slice), (list->CachedLength + list->Step - 1) / list->Step, &start, &stop, &step, &size) < 0) {
+ return nullptr;
+ }
+ } else {
+ if (PySlice_Unpack(slice, &start, &stop, &step) < 0) {
+ return nullptr;
+ }
+
+ if (step < -1 || step > 1 || (start < 0 && start > PY_SSIZE_T_MIN) || (stop < 0 && stop > PY_SSIZE_T_MIN)) {
+ list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get()));
+ size = PySlice_AdjustIndices((list->CachedLength + list->Step - 1) / list->Step, &start, &stop, step);
+ } else {
+ size = PySlice_AdjustIndices(PY_SSIZE_T_MAX, &start, &stop, step);
+ }
+ }
+
+ if (!step) {
+ PyErr_SetString(PyExc_ValueError, "slice step cannot be zero");
+ return nullptr;
+ }
+
+ const Py_ssize_t hi = PY_SSIZE_T_MAX / list->Step;
+ const Py_ssize_t lo = PY_SSIZE_T_MIN / list->Step;
+ step = step > lo && step < hi ? step * list->Step : (step > 0 ? PY_SSIZE_T_MAX : PY_SSIZE_T_MIN);
+
+ NUdf::TUnboxedValue newList;
+ if (size > 0) {
+ size = step > 0 ?
+ (size < PY_SSIZE_T_MAX / step ? --size * step + 1 : PY_SSIZE_T_MAX):
+ (size < PY_SSIZE_T_MAX / -step ? --size * -step + 1 : PY_SSIZE_T_MAX);
+
+ start = start < hi ? start * list->Step : PY_SSIZE_T_MAX;
+ const Py_ssize_t skip = step > 0 ? start : start - size + 1;
+
+ newList = NUdf::TUnboxedValuePod(list->Value.Get().Get());
+ if (skip > 0) {
+ newList = vb->SkipList(newList, skip);
+ }
+
+ if (size < PY_SSIZE_T_MAX && (list->CachedLength == -1 || list->CachedLength - skip > size)) {
+ newList = vb->TakeList(newList, size);
+ }
+
+ if (step < 0) {
+ step = -step;
+ newList = vb->ReverseList(newList);
+ }
+ } else {
+ newList = vb->NewEmptyList();
+ }
+
+ return New(list->CastCtx, list->ItemType, newList.AsBoxed(), step, size);
+ }
+
+ const TPyObjectPtr type = PyObject_Type(slice);
+ const TPyObjectPtr repr = PyObject_Repr(type.Get());
+ const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get());
+ PyErr_SetObject(PyExc_TypeError, error.Get());
+ return nullptr;
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyList::ToIndexDict(PyObject* self, PyObject* /* arg */)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ if (!list->Dict.IsSet()) {
+ list->Dict.Set(list->CastCtx->PyCtx, list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed());
+ }
+
+ return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, NUdf::TUnboxedValuePod(list->Dict.Get().Get())).Release();
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyList::Reversed(PyObject* self, PyObject* /* arg */)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ const auto newList = list->CastCtx->ValueBuilder->ReverseList(NUdf::TUnboxedValuePod(list->Value.Get().Get()));
+ return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step);
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyList::Take(PyObject* self, PyObject* arg)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ Py_ssize_t count = CastIndex(arg, "take");
+ if (count < 0) {
+ return nullptr;
+ }
+ count *= list->Step;
+
+ auto vb = list->CastCtx->ValueBuilder;
+ NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get()));
+ auto newList = vb->TakeList(value, static_cast<ui64>(count));
+ return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step);
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyList::Skip(PyObject* self, PyObject* arg)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ Py_ssize_t count = CastIndex(arg, "skip");
+ if (count < 0) {
+ return nullptr;
+ }
+ count *= list->Step;
+
+ NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get()));
+ const auto newList = list->CastCtx->ValueBuilder->SkipList(value, static_cast<ui64>(count));
+ return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step);
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyList::HasFastLen(PyObject* self, PyObject* /* arg */)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ if (NUdf::TBoxedValueAccessor::HasFastListLength(*list->Value.Get())) {
+ Py_RETURN_TRUE;
+ }
+ Py_RETURN_FALSE;
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyList::HasItems(PyObject* self, PyObject* /* arg */)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ if (NUdf::TBoxedValueAccessor::HasListItems(*list->Value.Get())) {
+ Py_RETURN_TRUE;
+ }
+ Py_RETURN_FALSE;
+ } PY_CATCH(nullptr)
+}
+
+int TPyLazyList::Bool(PyObject* self)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ if (list->CachedLength == -1) {
+ return NUdf::TBoxedValueAccessor::HasListItems(*list->Value.Get()) ? 1 : 0;
+ } else {
+ return list->CachedLength > 0 ? 1 : 0;
+ }
+ } PY_CATCH(-1)
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazyListIterator implementation
+//////////////////////////////////////////////////////////////////////////////
+PyObject* TPyLazyListIterator::New(TPyLazyList* list)
+{
+ TPyLazyListIterator* listIter = new TPyLazyListIterator;
+ PyObject_INIT(listIter, &PyLazyListIteratorType);
+ listIter->List.Reset(list);
+ listIter->Iterator.Set(list->CastCtx->PyCtx, NUdf::TBoxedValueAccessor::GetListIterator(*list->Value.Get()));
+ listIter->Length = 0;
+ listIter->CastCtx = list->CastCtx;
+ return reinterpret_cast<PyObject*>(listIter);
+}
+
+PyObject* TPyLazyListIterator::Next(PyObject* self)
+{
+ PY_TRY {
+ TPyLazyListIterator* iter = Cast(self);
+ TPyLazyList* list = iter->List.Get();
+
+ NUdf::TUnboxedValue item;
+ if (iter->Iterator.Get().Next(item)) {
+ ++iter->Length;
+
+ for (auto skip = list->Step; --skip && iter->Iterator.Get().Skip(); ++iter->Length)
+ continue;
+
+ return ToPyObject(list->CastCtx, list->ItemType, item).Release();
+ }
+
+ // store calculated list length after traverse over whole list
+ if (list->CachedLength == -1) {
+ list->CachedLength = iter->Length;
+ }
+
+ return nullptr;
+ } PY_CATCH(nullptr)
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyThinList interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyThinList
+{
+ using TPtr = NUdf::TRefCountedPtr<TPyThinList, TPyPtrOps<TPyThinList>>;
+
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ const NUdf::TType* ItemType;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Value;
+ const NUdf::TUnboxedValue* Elements;
+ Py_ssize_t Length;
+ Py_ssize_t Step;
+
+ inline static TPyThinList* Cast(PyObject* o) {
+ return reinterpret_cast<TPyThinList*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ static PyObject* New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ NUdf::IBoxedValuePtr value = NUdf::IBoxedValuePtr(),
+ const NUdf::TUnboxedValue* elements = nullptr,
+ Py_ssize_t length = 0,
+ Py_ssize_t step = 1);
+
+ static int Bool(PyObject* self);
+ static PyObject* Repr(PyObject* self);
+ static PyObject* Iter(PyObject* self);
+ static Py_ssize_t Len(PyObject* self);
+ static PyObject* Subscript(PyObject* self, PyObject* slice);
+ static PyObject* ToIndexDict(PyObject* self, PyObject* /* arg */);
+ static PyObject* Reversed(PyObject* self, PyObject* /* arg */);
+ static PyObject* Take(PyObject* self, PyObject* arg);
+ static PyObject* Skip(PyObject* self, PyObject* arg);
+ static PyObject* HasFastLen(PyObject* self, PyObject* /* arg */);
+ static PyObject* HasItems(PyObject* self, PyObject* /* arg */);
+};
+
+PyMappingMethods ThinListMapping = {
+ INIT_MEMBER(mp_length, TPyThinList::Len),
+ INIT_MEMBER(mp_subscript, TPyThinList::Subscript),
+ INIT_MEMBER(mp_ass_subscript, nullptr),
+};
+
+PyNumberMethods ThinListNumbering = {
+ INIT_MEMBER(nb_add, nullptr),
+ INIT_MEMBER(nb_subtract, nullptr),
+ INIT_MEMBER(nb_multiply, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_divide, nullptr),
+#endif
+ INIT_MEMBER(nb_remainder, nullptr),
+ INIT_MEMBER(nb_divmod, nullptr),
+ INIT_MEMBER(nb_power, nullptr),
+ INIT_MEMBER(nb_negative, nullptr),
+ INIT_MEMBER(nb_positive, nullptr),
+ INIT_MEMBER(nb_absolute, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_bool, TPyThinList::Bool),
+#else
+ INIT_MEMBER(nb_nonzero, TPyThinList::Bool),
+#endif
+ INIT_MEMBER(nb_invert, nullptr),
+ INIT_MEMBER(nb_lshift, nullptr),
+ INIT_MEMBER(nb_rshift, nullptr),
+ INIT_MEMBER(nb_and, nullptr),
+ INIT_MEMBER(nb_xor, nullptr),
+ INIT_MEMBER(nb_or, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_coerce, nullptr),
+#endif
+ INIT_MEMBER(nb_int, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_reserved, nullptr),
+#else
+ INIT_MEMBER(nb_long, nullptr),
+#endif
+ INIT_MEMBER(nb_float, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_oct, nullptr),
+ INIT_MEMBER(nb_hex, nullptr),
+#endif
+
+ INIT_MEMBER(nb_inplace_add, nullptr),
+ INIT_MEMBER(nb_inplace_subtract, nullptr),
+ INIT_MEMBER(nb_inplace_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_remainder, nullptr),
+ INIT_MEMBER(nb_inplace_power, nullptr),
+ INIT_MEMBER(nb_inplace_lshift, nullptr),
+ INIT_MEMBER(nb_inplace_rshift, nullptr),
+ INIT_MEMBER(nb_inplace_and, nullptr),
+ INIT_MEMBER(nb_inplace_xor, nullptr),
+ INIT_MEMBER(nb_inplace_or, nullptr),
+
+ INIT_MEMBER(nb_floor_divide, nullptr),
+ INIT_MEMBER(nb_true_divide, nullptr),
+ INIT_MEMBER(nb_inplace_floor_divide, nullptr),
+ INIT_MEMBER(nb_inplace_true_divide, nullptr),
+
+ INIT_MEMBER(nb_index, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_matrix_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_matrix_multiply, nullptr),
+#endif
+};
+
+static PyMethodDef TPyThinListMethods[] = {
+ { "__reversed__", TPyThinList::Reversed, METH_NOARGS, nullptr },
+ { "to_index_dict", TPyThinList::ToIndexDict, METH_NOARGS, to_index_dict__doc__ },
+ { "reversed", TPyThinList::Reversed, METH_NOARGS, reversed__doc__ },
+ { "take", TPyThinList::Take, METH_O, take__doc__ },
+ { "skip", TPyThinList::Skip, METH_O, skip__doc__ },
+ { "has_fast_len", TPyThinList::HasFastLen, METH_NOARGS, has_fast_len__doc__ },
+ { "has_items", TPyThinList::HasItems, METH_NOARGS, has_items__doc__ },
+ { nullptr, nullptr, 0, nullptr } /* sentinel */
+};
+
+#if PY_MAJOR_VERSION >= 3
+#define Py_TPFLAGS_HAVE_ITER 0
+#endif
+
+PyTypeObject PyThinListType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TList"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyThinList)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyThinList::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyThinList::Repr),
+ INIT_MEMBER(tp_as_number , &ThinListNumbering),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , &ThinListMapping),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER),
+ INIT_MEMBER(tp_doc , "yql.TList object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , TPyThinList::Iter),
+ INIT_MEMBER(tp_iternext , nullptr),
+ INIT_MEMBER(tp_methods , TPyThinListMethods),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyThinListIterator interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyThinListIterator
+{
+ PyObject_HEAD;
+ TPyThinList::TPtr List;
+ const NUdf::TUnboxedValue* Elements;
+ Py_ssize_t Count;
+
+ inline static TPyThinListIterator* Cast(PyObject* o) {
+ return reinterpret_cast<TPyThinListIterator*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ inline static PyObject* Repr(PyObject* self) {
+ Y_UNUSED(self);
+ return PyRepr("<yql.TListIterator>").Release();
+ }
+
+ static PyObject* New(TPyThinList* list);
+ static PyObject* Next(PyObject* self);
+};
+
+PyTypeObject PyThinListIteratorType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TListIterator"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyThinListIterator)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyThinListIterator::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyThinListIterator::Repr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER),
+ INIT_MEMBER(tp_doc , "yql.ListIterator object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , PyObject_SelfIter),
+ INIT_MEMBER(tp_iternext , TPyThinListIterator::Next),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyThinList implementation
+//////////////////////////////////////////////////////////////////////////////
+PyObject* TPyThinList::New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ NUdf::IBoxedValuePtr value,
+ const NUdf::TUnboxedValue* elements,
+ Py_ssize_t length,
+ Py_ssize_t step)
+{
+ TPyThinList* list = new TPyThinList;
+ PyObject_INIT(list, &PyThinListType);
+
+ list->CastCtx = castCtx;
+ list->ItemType = itemType;
+ list->Value.Set(castCtx->PyCtx, value);
+ list->Elements = elements;
+ list->Length = length;
+ list->Step = step;
+
+ return reinterpret_cast<PyObject*>(list);
+}
+
+PyObject* TPyThinList::Repr(PyObject*)
+{
+ return PyRepr("<yql.TList>").Release();
+}
+
+PyObject* TPyThinList::Iter(PyObject* self)
+{
+ PY_TRY {
+ TPyThinList* list = Cast(self);
+ return TPyThinListIterator::New(list);
+ } PY_CATCH(nullptr)
+}
+
+Py_ssize_t TPyThinList::Len(PyObject* self)
+{
+ return Cast(self)->Length;
+}
+
+PyObject* TPyThinList::Subscript(PyObject* self, PyObject* slice)
+{
+ PY_TRY {
+ TPyThinList* list = Cast(self);
+ const auto vb = list->CastCtx->ValueBuilder;
+
+ if (PyIndex_Check(slice)) {
+ Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError);
+
+ if (index < 0) {
+ index += list->Length;
+ }
+
+ if (index < 0 || index >= list->Length) {
+ const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->Length);
+ PyErr_SetObject(PyExc_IndexError, error.Get());
+ return nullptr;
+ }
+
+ if (list->Step > 0) {
+ index *= list->Step;
+ } else {
+ index = list->Length - ++index;
+ index *= -list->Step;
+ }
+
+ return ToPyObject(list->CastCtx, list->ItemType, list->Elements[index]).Release();
+ }
+
+ if (PySlice_Check(slice)) {
+ Py_ssize_t start, stop, step, size;
+
+ if (PySlice_GetIndicesEx(SLICEOBJ(slice), list->Length, &start, &stop, &step, &size) < 0) {
+ return nullptr;
+ }
+
+ if (!step) {
+ PyErr_SetString(PyExc_ValueError, "slice step cannot be zero");
+ return nullptr;
+ }
+
+ if (size > 0) {
+ const Py_ssize_t skip = list->Step * (list->Step > 0 ?
+ (step > 0 ? start : start + step * (size - 1)):
+ (step > 0 ? stop : start + 1) - list->Length);
+
+ return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements + skip, size, step * list->Step);
+ } else {
+ return New(list->CastCtx, list->ItemType, list->Value.Get());
+ }
+ }
+
+ const TPyObjectPtr type = PyObject_Type(slice);
+ const TPyObjectPtr repr = PyObject_Repr(type.Get());
+ const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get());
+ PyErr_SetObject(PyExc_TypeError, error.Get());
+ return nullptr;
+ } PY_CATCH(nullptr)
+}
+
+#undef SLICEOBJ
+
+PyObject* TPyThinList::ToIndexDict(PyObject* self, PyObject* /* arg */)
+{
+ PY_TRY {
+ TPyThinList* list = Cast(self);
+ const auto dict = list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get()));
+ return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, dict).Release();
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyThinList::Reversed(PyObject* self, PyObject* /* arg */)
+{
+ PY_TRY {
+ TPyThinList* list = Cast(self);
+ return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements, list->Length, -list->Step);
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyThinList::Take(PyObject* self, PyObject* arg)
+{
+ PY_TRY {
+ TPyThinList* list = Cast(self);
+ const Py_ssize_t count = CastIndex(arg, "take");
+ if (count < 0) {
+ return nullptr;
+ }
+
+ if (const auto size = std::min(count, list->Length)) {
+ return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements : list->Elements + list->Length + size * list->Step, size, list->Step);
+ } else {
+ return New(list->CastCtx, list->ItemType, list->Value.Get());
+ }
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyThinList::Skip(PyObject* self, PyObject* arg)
+{
+ PY_TRY {
+ TPyThinList* list = Cast(self);
+ const Py_ssize_t count = CastIndex(arg, "skip");
+ if (count < 0) {
+ return nullptr;
+ }
+
+ if (const auto size = std::max(list->Length - count, Py_ssize_t(0))) {
+ return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements + count * list->Step : list->Elements, size, list->Step);
+ } else {
+ return New(list->CastCtx, list->ItemType);
+ }
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyThinList::HasFastLen(PyObject* self, PyObject* /* arg */)
+{
+ Py_RETURN_TRUE;
+}
+
+PyObject* TPyThinList::HasItems(PyObject* self, PyObject* /* arg */)
+{
+ if (Cast(self)->Length > 0)
+ Py_RETURN_TRUE;
+ else
+ Py_RETURN_FALSE;
+}
+
+int TPyThinList::Bool(PyObject* self)
+{
+ return Cast(self)->Length > 0 ? 1 : 0;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyThinListIterator implementation
+//////////////////////////////////////////////////////////////////////////////
+PyObject* TPyThinListIterator::New(TPyThinList* list)
+{
+ TPyThinListIterator* listIter = new TPyThinListIterator;
+ PyObject_INIT(listIter, &PyThinListIteratorType);
+ listIter->List.Reset(list);
+ listIter->Elements = list->Step > 0 ? list->Elements - list->Step : list->Elements - list->Length * list->Step;
+ listIter->Count = list->Length;
+ return reinterpret_cast<PyObject*>(listIter);
+}
+
+PyObject* TPyThinListIterator::Next(PyObject* self)
+{
+ PY_TRY {
+ TPyThinListIterator* iter = Cast(self);
+
+ if (iter->Count) {
+ --iter->Count;
+ TPyThinList* list = iter->List.Get();
+ return ToPyObject(list->CastCtx, list->ItemType, *(iter->Elements += list->Step)).Release();
+ }
+
+ return nullptr;
+ } PY_CATCH(nullptr)
+}
+
+TPyObjectPtr ToPyLazyList(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ const NUdf::TUnboxedValuePod& value)
+{
+ if (const auto elements = value.GetElements()) {
+ return TPyThinList::New(castCtx, itemType, value.AsBoxed(), elements, value.GetListLength());
+ } else {
+ return TPyLazyList::New(castCtx, itemType, value.AsBoxed());
+ }
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_list.h b/yql/essentials/udfs/common/python/bindings/py_list.h
new file mode 100644
index 00000000000..9db170a7954
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_list.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+extern PyTypeObject PyLazyListIteratorType;
+extern PyTypeObject PyLazyListType;
+extern PyTypeObject PyThinListIteratorType;
+extern PyTypeObject PyThinListType;
+
+TPyObjectPtr ToPyLazyList(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* itemType,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyLazyGenerator(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ TPyObjectPtr callableObj);
+
+NKikimr::NUdf::TUnboxedValue FromPyLazyIterable(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ TPyObjectPtr iterableObj);
+
+NKikimr::NUdf::TUnboxedValue FromPyLazyIterator(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ TPyObjectPtr iteratorObj);
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp
new file mode 100644
index 00000000000..f16165fc54b
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp
@@ -0,0 +1,1025 @@
+#include "ut3/py_test_engine.h"
+
+#include <yql/essentials/public/udf/udf_ut_helpers.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyListTest) {
+ Y_UNIT_TEST(FromPyEmptyList) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "def Test(): return []",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetListLength(), 0);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyList) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "def Test(): return [1, 2, 3, 4]",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetListLength(), 4);
+ const auto it = value.GetListIterator();
+ NUdf::TUnboxedValue item;
+
+ UNIT_ASSERT(it.Next(item));
+ UNIT_ASSERT_EQUAL(item.Get<ui32>(), 1);
+ UNIT_ASSERT(it.Next(item));
+ UNIT_ASSERT_EQUAL(item.Get<ui32>(), 2);
+ UNIT_ASSERT(it.Next(item));
+ UNIT_ASSERT_EQUAL(item.Get<ui32>(), 3);
+ UNIT_ASSERT(it.Next(item));
+ UNIT_ASSERT_EQUAL(item.Get<ui32>(), 4);
+ UNIT_ASSERT(false == it.Next(item));
+ });
+ }
+
+ Y_UNIT_TEST(ToPyEmptyList) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<char*>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ return vb.NewEmptyList();
+ },
+ "def Test(value):\n"
+ " assert value.has_fast_len()\n"
+ " assert len(value) == 0\n");
+ }
+
+ Y_UNIT_TEST(ToPyList) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<double>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 3U> list = {{
+ NUdf::TUnboxedValuePod(0.1),
+ NUdf::TUnboxedValuePod(0.2),
+ NUdf::TUnboxedValuePod(0.3)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(value):\n"
+ " assert value.has_fast_len()\n"
+ " assert len(value) == 3\n"
+ " assert all(isinstance(v, float) for v in value)\n"
+ " assert list(value) == [0.1, 0.2, 0.3]\n");
+ }
+
+ Y_UNIT_TEST(FromPyTuple) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "def Test(): return (1, 2, 3)",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetListLength(), 3);
+
+ ui32 expected = 1;
+ auto it = value.GetListIterator();
+ for (NUdf::TUnboxedValue item; it.Next(item);) {
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, expected);
+ expected++;
+ }
+ });
+ }
+
+ Y_UNIT_TEST(ThinListIteration) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<double>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 3U> list = {{
+ NUdf::TUnboxedValuePod(0.1),
+ NUdf::TUnboxedValuePod(0.2),
+ NUdf::TUnboxedValuePod(0.3)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(value):\n"
+ " assert '__iter__' in dir(value)\n"
+ " it = iter(value)\n"
+ " assert next(it) == 0.1\n"
+ " assert next(it) == 0.2\n"
+ " assert next(it) == 0.3\n"
+ " try:\n"
+ " next(it)\n"
+ " except StopIteration:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListReversed) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 10U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U),
+ NUdf::TUnboxedValuePod(3U),
+ NUdf::TUnboxedValuePod(4U),
+ NUdf::TUnboxedValuePod(5U),
+ NUdf::TUnboxedValuePod(6U),
+ NUdf::TUnboxedValuePod(7U),
+ NUdf::TUnboxedValuePod(8U),
+ NUdf::TUnboxedValuePod(9U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 10))\n"
+ " assert '__reversed__' in dir(v)\n"
+ " assert all(one == two for one, two in zip(reversed(v), reversed(e)))\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListReversed) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3));
+ },
+ "def Test(v):\n"
+ " assert '__reversed__' in dir(v)\n"
+ " it = iter(reversed(v))\n"
+ " assert next(it) == 2\n"
+ " assert next(it) == 1\n"
+ " assert next(it) == 0\n"
+ " try:\n"
+ " next(it)\n"
+ " except StopIteration:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListIteration) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3));
+ },
+ "def Test(value):\n"
+ " assert '__iter__' in dir(value)\n"
+ " it = iter(value)\n"
+ " assert next(it) == 0\n"
+ " assert next(it) == 1\n"
+ " assert next(it) == 2\n"
+ " try:\n"
+ " next(it)\n"
+ " except StopIteration:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListInvalidIndexType) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3));
+ },
+ "def Test(v):\n"
+ " try:\n"
+ " print(v[{}])\n"
+ " except TypeError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListInvalidIndexType) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<double>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 3U> list = {{
+ NUdf::TUnboxedValuePod(0.1),
+ NUdf::TUnboxedValuePod(0.2),
+ NUdf::TUnboxedValuePod(0.3)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " try:\n"
+ " print(v[{}])\n"
+ " except TypeError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListZeroSliceStep) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3));
+ },
+ "def Test(v):\n"
+ " try:\n"
+ " print(v[::0])\n"
+ " except ValueError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListZeroSliceStep) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<double>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 3U> list = {{
+ NUdf::TUnboxedValuePod(0.1),
+ NUdf::TUnboxedValuePod(0.2),
+ NUdf::TUnboxedValuePod(0.3)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " try:\n"
+ " print(v[::0])\n"
+ " except ValueError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListSlice) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 10U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U),
+ NUdf::TUnboxedValuePod(3U),
+ NUdf::TUnboxedValuePod(4U),
+ NUdf::TUnboxedValuePod(5U),
+ NUdf::TUnboxedValuePod(6U),
+ NUdf::TUnboxedValuePod(7U),
+ NUdf::TUnboxedValuePod(8U),
+ NUdf::TUnboxedValuePod(9U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 10))\n"
+ " assert '__len__' in dir(v)\n"
+ " assert list(v[::1]) == e[::1]\n"
+ " assert list(v[::-1]) == e[::-1]\n"
+ " assert list(v[1::1]) == e[1::1]\n"
+ " assert list(v[2::1]) == e[2::1]\n"
+ " assert list(v[3::1]) == e[3::1]\n"
+ " assert list(v[:-1:1]) == e[:-1:1]\n"
+ " assert list(v[:-2:1]) == e[:-2:1]\n"
+ " assert list(v[:-3:1]) == e[:-3:1]\n"
+ " assert list(v[1::-1]) == e[1::-1]\n"
+ " assert list(v[2::-1]) == e[2::-1]\n"
+ " assert list(v[3::-1]) == e[3::-1]\n"
+ " assert list(v[:-1:-1]) == e[:-1:-1]\n"
+ " assert list(v[:-2:-1]) == e[:-2:-1]\n"
+ " assert list(v[:-3:-1]) == e[:-3:-1]\n"
+ " assert list(v[:-2:-1]) == e[:-2:-1]\n"
+ " assert list(v[-12:-1:1]) == e[-12:-1:1]\n"
+ " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n"
+ " assert list(v[-5:-3:1]) == e[-5:-3:1]\n"
+ " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n"
+ " assert list(v[:7:1]) == e[:7:1]\n"
+ " assert list(v[-1:4]) == e[-1:4]\n"
+ " assert list(v[5:11]) == e[5:11]\n"
+ " assert list(v[4:1]) == e[4:1]\n"
+ " assert list(v[5:-2]) == e[5:-2]\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListSliceOverReversed) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 10U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U),
+ NUdf::TUnboxedValuePod(3U),
+ NUdf::TUnboxedValuePod(4U),
+ NUdf::TUnboxedValuePod(5U),
+ NUdf::TUnboxedValuePod(6U),
+ NUdf::TUnboxedValuePod(7U),
+ NUdf::TUnboxedValuePod(8U),
+ NUdf::TUnboxedValuePod(9U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(x):\n"
+ " e = list(reversed(range(0, 10)))\n"
+ " v = reversed(x)\n"
+ " assert list(v[::1]) == e[::1]\n"
+ " assert list(v[::-1]) == e[::-1]\n"
+ " assert list(v[1::1]) == e[1::1]\n"
+ " assert list(v[2::1]) == e[2::1]\n"
+ " assert list(v[3::1]) == e[3::1]\n"
+ " assert list(v[:-1:1]) == e[:-1:1]\n"
+ " assert list(v[:-2:1]) == e[:-2:1]\n"
+ " assert list(v[:-3:1]) == e[:-3:1]\n"
+ " assert list(v[1::-1]) == e[1::-1]\n"
+ " assert list(v[2::-1]) == e[2::-1]\n"
+ " assert list(v[3::-1]) == e[3::-1]\n"
+ " assert list(v[:-1:-1]) == e[:-1:-1]\n"
+ " assert list(v[:-2:-1]) == e[:-2:-1]\n"
+ " assert list(v[:-3:-1]) == e[:-3:-1]\n"
+ " assert list(v[:-2:-1]) == e[:-2:-1]\n"
+ " assert list(v[-12:-1:1]) == e[-12:-1:1]\n"
+ " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n"
+ " assert list(v[-5:-3:1]) == e[-5:-3:1]\n"
+ " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n"
+ " assert list(v[:7:1]) == e[:7:1]\n"
+ " assert list(v[-1:4]) == e[-1:4]\n"
+ " assert list(v[5:11]) == e[5:11]\n"
+ " assert list(v[4:1]) == e[4:1]\n"
+ " assert list(v[5:-2]) == e[5:-2]\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListSlice) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10));
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 10))\n"
+ " assert '__len__' in dir(v)\n"
+ " assert len(v) == len(e)\n"
+ " assert list(v[::1]) == e[::1]\n"
+ " assert list(v[::-1]) == e[::-1]\n"
+ " assert list(v[3:]) == e[3:]\n"
+ " assert list(v[-2:]) == e[-2:]\n"
+ " assert list(v[2::-1]) == e[2::-1]\n"
+ " assert list(v[:-2:-1]) == e[:-2:-1]\n"
+ " assert list(v[-12:-1:1]) == e[-12:-1:1]\n"
+ " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n"
+ " assert list(v[-5:-3:1]) == e[-5:-3:1]\n"
+ " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n"
+ " assert list(v[:7:1]) == e[:7:1]\n"
+ " assert list(v[-1:4]) == e[-1:4]\n"
+ " assert list(v[5:11]) == e[5:11]\n"
+ " assert list(v[4:1]) == e[4:1]\n"
+ " assert list(v[5:-2]) == e[5:-2]\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListIterateSliceWithStep) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 20U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U),
+ NUdf::TUnboxedValuePod(3U),
+ NUdf::TUnboxedValuePod(4U),
+ NUdf::TUnboxedValuePod(5U),
+ NUdf::TUnboxedValuePod(6U),
+ NUdf::TUnboxedValuePod(7U),
+ NUdf::TUnboxedValuePod(8U),
+ NUdf::TUnboxedValuePod(9U),
+ NUdf::TUnboxedValuePod(10U),
+ NUdf::TUnboxedValuePod(11U),
+ NUdf::TUnboxedValuePod(12U),
+ NUdf::TUnboxedValuePod(13U),
+ NUdf::TUnboxedValuePod(14U),
+ NUdf::TUnboxedValuePod(15U),
+ NUdf::TUnboxedValuePod(16U),
+ NUdf::TUnboxedValuePod(17U),
+ NUdf::TUnboxedValuePod(18U),
+ NUdf::TUnboxedValuePod(19U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 20))\n"
+ " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[3:8:2]), e[3:8:2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[::-2]), e[::-2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListIterateSliceWithStep) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20));
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 20))\n"
+ " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[3:4:2]), e[3:4:2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListGetByIndexSliceWithStep) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 20U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U),
+ NUdf::TUnboxedValuePod(3U),
+ NUdf::TUnboxedValuePod(4U),
+ NUdf::TUnboxedValuePod(5U),
+ NUdf::TUnboxedValuePod(6U),
+ NUdf::TUnboxedValuePod(7U),
+ NUdf::TUnboxedValuePod(8U),
+ NUdf::TUnboxedValuePod(9U),
+ NUdf::TUnboxedValuePod(10U),
+ NUdf::TUnboxedValuePod(11U),
+ NUdf::TUnboxedValuePod(12U),
+ NUdf::TUnboxedValuePod(13U),
+ NUdf::TUnboxedValuePod(14U),
+ NUdf::TUnboxedValuePod(15U),
+ NUdf::TUnboxedValuePod(16U),
+ NUdf::TUnboxedValuePod(17U),
+ NUdf::TUnboxedValuePod(18U),
+ NUdf::TUnboxedValuePod(19U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 20))\n"
+ " assert v[::2][3] == e[::2][3]\n"
+ " assert v[::2][5] == e[::2][5]\n"
+ " assert v[::2][-3] == e[::2][-3]\n"
+ " assert v[::2][-7] == e[::2][-7]\n"
+ " assert v[2::2][4] == e[2::2][4]\n"
+ " assert v[2::2][5] == e[2::2][5]\n"
+ " assert v[2::2][-7] == e[2::2][-7]\n"
+ " assert v[2::2][-2] == e[2::2][-2]\n"
+ " assert v[:-3:2][2] == e[:-3:2][2]\n"
+ " assert v[:-3:2][4] == e[:-3:2][4]\n"
+ " assert v[:-3:2][-1] == e[:-3:2][-1]\n"
+ " assert v[:-3:2][-2] == e[:-3:2][-2]\n"
+ " assert v[:-4:3][2] == e[:-4:3][2]\n"
+ " assert v[:-4:3][4] == e[:-4:3][4]\n"
+ " assert v[:-4:3][-3] == e[:-4:3][-3]\n"
+ " assert v[:-4:3][-2] == e[:-4:3][-2]\n"
+ " assert v[-6::-3][1] == e[-6::-3][1]\n"
+ " assert v[-6::-3][3] == e[-6::-3][3]\n"
+ " assert v[-6::-3][-4] == e[-6::-3][-4]\n"
+ " assert v[-6::-3][-1] == e[-6::-3][-1]\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListGetByIndexSliceWithStep) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20));
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 20))\n"
+ " assert v[::2][3] == e[::2][3]\n"
+ " assert v[::2][5] == e[::2][5]\n"
+ " assert v[::2][-3] == e[::2][-3]\n"
+ " assert v[::2][-7] == e[::2][-7]\n"
+ " assert v[2::2][4] == e[2::2][4]\n"
+ " assert v[2::2][5] == e[2::2][5]\n"
+ " assert v[2::2][-7] == e[2::2][-7]\n"
+ " assert v[2::2][-2] == e[2::2][-2]\n"
+ " assert v[:-3:2][2] == e[:-3:2][2]\n"
+ " assert v[:-3:2][4] == e[:-3:2][4]\n"
+ " assert v[:-3:2][-1] == e[:-3:2][-1]\n"
+ " assert v[:-3:2][-2] == e[:-3:2][-2]\n"
+ " assert v[:-4:3][2] == e[:-4:3][2]\n"
+ " assert v[:-4:3][4] == e[:-4:3][4]\n"
+ " assert v[:-4:3][-3] == e[:-4:3][-3]\n"
+ " assert v[:-4:3][-2] == e[:-4:3][-2]\n"
+ " assert v[-6::-3][1] == e[-6::-3][1]\n"
+ " assert v[-6::-3][3] == e[-6::-3][3]\n"
+ " assert v[-6::-3][-4] == e[-6::-3][-4]\n"
+ " assert v[-6::-3][-1] == e[-6::-3][-1]\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListByIndex) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 10U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U),
+ NUdf::TUnboxedValuePod(3U),
+ NUdf::TUnboxedValuePod(4U),
+ NUdf::TUnboxedValuePod(5U),
+ NUdf::TUnboxedValuePod(6U),
+ NUdf::TUnboxedValuePod(7U),
+ NUdf::TUnboxedValuePod(8U),
+ NUdf::TUnboxedValuePod(9U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 10))\n"
+ " assert '__getitem__' in dir(v)\n"
+ " assert v[0] == e[0]\n"
+ " assert v[3] == e[3]\n"
+ " assert v[5] == e[5]\n"
+ " assert v[9] == e[9]\n"
+ " assert v[-1] == e[-1]\n"
+ " assert v[-4] == e[-4]\n"
+ " assert v[-9] == e[-9]\n"
+ " assert v[-10] == e[-10]\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListByIndex) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10));
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 10))\n"
+ " assert '__getitem__' in dir(v)\n"
+ " assert v[0] == e[0]\n"
+ " assert v[3] == e[3]\n"
+ " assert v[5] == e[5]\n"
+ " assert v[9] == e[9]\n"
+ " assert v[-1] == e[-1]\n"
+ " assert v[-4] == e[-4]\n"
+ " assert v[-9] == e[-9]\n"
+ " assert v[-10] == e[-10]\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListIndexOutOfBounds) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 3U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " try:\n"
+ " print(v[3])\n"
+ " except IndexError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ " try:\n"
+ " print(v[-4])\n"
+ " except IndexError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListIndexOutOfBounds) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3));
+ },
+ "def Test(v):\n"
+ " try:\n"
+ " print(v[3])\n"
+ " except IndexError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ " try:\n"
+ " print(v[-4])\n"
+ " except IndexError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListWithoutLenghNormalSlice) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10));
+ },
+ "def Test(v):\n"
+ " e = range(0, 10)\n"
+ " assert '__len__' in dir(v)\n"
+ " assert all(one == two for one, two in zip(iter(v[::1]), e[::1]))\n"
+ " assert all(one == two for one, two in zip(iter(v[::-1]), e[::-1]))\n"
+ " assert all(one == two for one, two in zip(iter(v[4:]), e[4:]))\n"
+ " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:6:1]), e[:6:1]))\n"
+ " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n"
+ " assert all(one == two for one, two in zip(iter(v[4:11]), e[4:11]))\n"
+ " assert all(one == two for one, two in zip(iter(v[5:1]), e[5:1]))\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListTakeSkip) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 10U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U),
+ NUdf::TUnboxedValuePod(3U),
+ NUdf::TUnboxedValuePod(4U),
+ NUdf::TUnboxedValuePod(5U),
+ NUdf::TUnboxedValuePod(6U),
+ NUdf::TUnboxedValuePod(7U),
+ NUdf::TUnboxedValuePod(8U),
+ NUdf::TUnboxedValuePod(9U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 10))\n"
+ " assert len(v) == len(e)\n"
+ " assert list(v.skip(5)) == e[5:]\n"
+ " assert list(v.take(5)) == e[0:5]\n"
+ " assert list(v.skip(4).take(5)) == e[4:][:5]\n"
+ " try:\n"
+ " print(list(v.skip(-1)))\n"
+ " except IndexError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListTakeSkip) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10));
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 10))\n"
+ " assert list(v.skip(5)) == e[5:]\n"
+ " assert list(v.take(5)) == e[0:5]\n"
+ " assert list(v.skip(4).take(5)) == e[4:][:5]\n"
+ " try:\n"
+ " print(list(v.skip(-1)))\n"
+ " except IndexError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListToIndexDict) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6));
+ },
+ "def Test(value):\n"
+ " d = value.to_index_dict()\n"
+ " assert len(d) == 3\n"
+ " assert d[0] == 3\n"
+ " assert d[1] == 4\n"
+ " assert d[2] == 5\n"
+ " assert 3 not in d");
+ }
+
+ Y_UNIT_TEST(LazyListTrue) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ NUdf::TUnboxedValue *items = nullptr;
+ return vb.NewArray(1U, items);
+ },
+ "def Test(value):\n"
+ " assert value\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListFalse) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0));
+ },
+ "def Test(value):\n"
+ " assert not value\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListTrue) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6));
+ },
+ "def Test(value):\n"
+ " assert value\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListFalse) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ return vb.NewEmptyList();
+ },
+ "def Test(value):\n"
+ " assert not value\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListHasItems) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6));
+ },
+ "def Test(value):\n"
+ " b = value.has_items()\n"
+ " assert b\n");
+ }
+
+ Y_UNIT_TEST(LazyListEmptyHasItems) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0));
+ },
+ "def Test(value):\n"
+ " b = value.has_items()\n"
+ " assert not b\n");
+ }
+
+ Y_UNIT_TEST(LazyIndexDictContains) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6));
+ },
+ "def Test(value):\n"
+ " d = value.to_index_dict()\n"
+ " assert 0 in d\n"
+ " assert 1 in d\n"
+ " assert 2 in d\n"
+ " assert 3 not in d\n"
+ " assert -1 not in d");
+ }
+
+ Y_UNIT_TEST(LazyIndexDictIter) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6));
+ },
+ "def Test(value):\n"
+ " d = value.to_index_dict()\n"
+ " i, j = 0, 3\n"
+ " for k, v in d.items():\n"
+ " assert i == k\n"
+ " assert j == v\n"
+ " i, j = i+1, j+1");
+ }
+
+ Y_UNIT_TEST(LazyIndexDictGet) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 5));
+ },
+ "def Test(value):\n"
+ " d = value.to_index_dict()\n"
+ " assert d.get(1) == 4\n"
+ " assert d.get(5) == None\n"
+ " assert d.get(5, 10) == 10\n");
+ }
+
+ Y_UNIT_TEST(FromPyGeneratorFactory) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "def first_10():\n"
+ " num = 0\n"
+ " while num < 10:\n"
+ " yield num\n"
+ " num += 1\n"
+ "def Test():\n"
+ " return first_10\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(!value.HasFastListLength());
+ UNIT_ASSERT(value.HasListItems());
+
+ const auto it = value.GetListIterator();
+ ui32 expected = 0;
+ for (NUdf::TUnboxedValue item; it.Next(item);) {
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, expected);
+ expected++;
+ }
+
+ UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10);
+ UNIT_ASSERT_EQUAL(value.GetListLength(), 10);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyIterable) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "def Test():\n"
+#if PY_MAJOR_VERSION >= 3
+ " return range(10)\n",
+#else
+ " return xrange(10)\n",
+#endif
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(!value.HasFastListLength());
+ UNIT_ASSERT(value.HasListItems());
+
+ const auto it = value.GetListIterator();
+ ui32 expected = 0U;
+ for (NUdf::TUnboxedValue item; it.Next(item);) {
+ UNIT_ASSERT_EQUAL(item.Get<ui32>(), expected++);
+ }
+
+ UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10);
+ UNIT_ASSERT_EQUAL(value.GetListLength(), 10);
+ UNIT_ASSERT(value.HasFastListLength());
+ });
+ }
+
+ Y_UNIT_TEST(FromPyCustomIterable) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "class T:\n"
+ " def __init__(self, l):\n"
+ " self.l = l\n"
+ " def __len__(self):\n"
+ " return len(self.l)\n"
+ " def __nonzero__(self):\n"
+ " return bool(self.l)\n"
+ " def __iter__(self):\n"
+ " return iter(self.l)\n"
+ "\n"
+ "def Test():\n"
+ " return T([1, 2])\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(value.HasListItems());
+ UNIT_ASSERT_EQUAL(value.GetListLength(), 2);
+
+ auto it = value.GetListIterator();
+ {
+ NUdf::TUnboxedValue item;
+ it.Next(item);
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, 1);
+ }
+ {
+ NUdf::TUnboxedValue item;
+ it.Next(item);
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, 2);
+ }
+
+ UNIT_ASSERT(false == it.Skip());
+ });
+ }
+
+ Y_UNIT_TEST(FromPyIterator) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "def Test():\n"
+ " return iter(range(2))\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(false == value.HasFastListLength());
+
+ auto it = value.GetListIterator();
+ {
+ NUdf::TUnboxedValue item;
+ it.Next(item);
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, 0);
+ }
+ {
+ NUdf::TUnboxedValue item;
+ it.Next(item);
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, 1);
+ }
+
+ UNIT_ASSERT(false == it.Skip());
+ });
+ }
+
+ Y_UNIT_TEST(FromPyGenerator) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "def Test():\n"
+ " yield 0\n"
+ " yield 1\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(false == value.HasFastListLength());
+
+ auto it = value.GetListIterator();
+ {
+ NUdf::TUnboxedValue item;
+ it.Next(item);
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, 0);
+ }
+ {
+ NUdf::TUnboxedValue item;
+ it.Next(item);
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, 1);
+ }
+
+ UNIT_ASSERT(false == it.Skip());
+ });
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp
new file mode 100644
index 00000000000..c55e25891d2
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp
@@ -0,0 +1,359 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#define PY_CHECKER(Name, PyType, AsType, Type) \
+ struct TPy##Name##Checker { \
+ void operator()(PyObject* pyVal, Type expected) { \
+ UNIT_ASSERT(Py##PyType##_Check(pyVal)); \
+ Type val = Py##PyType##_As##AsType(pyVal); \
+ UNIT_ASSERT(val != static_cast<Type>(-1) || !PyErr_Occurred()); \
+ UNIT_ASSERT_EQUAL(val, expected); \
+ } \
+ };
+
+#if PY_MAJOR_VERSION >= 3
+PY_CHECKER(Long, Long, Long, long)
+#else
+PY_CHECKER(Int, Int, Long, long)
+#endif
+
+#ifdef HAVE_LONG_LONG
+PY_CHECKER(LLong, Long, LongLong, long long)
+PY_CHECKER(Ulong, Long, UnsignedLongLong, unsigned long long)
+#else
+PY_CHECKER(LLong, Long, Long, long)
+PY_CHECKER(Ulong, Long, UnsignedLong, unsigned long)
+#endif
+
+PY_CHECKER(Float, Float, Double, long)
+
+#undef PY_CHECKER
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyNumberTest) {
+ template <typename T, typename TPyChecker>
+ void TestCastsInRange(T begin, T end) {
+ for (T i = begin; i < end; i++) {
+ TPyObjectPtr pyVal = PyCast<T>(i);
+ UNIT_ASSERT(pyVal.Get() != nullptr);
+
+ TPyChecker c;
+ c(pyVal.Get(), i);
+
+ T cppVal = PyCast<T>(pyVal.Get());
+ UNIT_ASSERT_EQUAL(cppVal, i);
+ }
+ }
+
+ template <typename T, typename TPyChecker, int range = 10>
+ void TestSignedCasts() {
+ TPythonTestEngine engine;
+ TestCastsInRange<T, TPyChecker>(Min<T>(), Min<T>() + range);
+ TestCastsInRange<T, TPyChecker>(-range, range);
+ TestCastsInRange<T, TPyChecker>(Max<T>() - range, Max<T>());
+ }
+
+ template <typename T, typename TPyDownChecker,
+ typename TPyUpChecker = TPyDownChecker, int range = 10>
+ void TestUnsignedCasts() {
+ TPythonTestEngine engine;
+ TestCastsInRange<T, TPyDownChecker>(Min<T>(), Min<T>() + range);
+ TestCastsInRange<T, TPyUpChecker>(Max<T>() - range, Max<T>());
+ }
+
+ Y_UNIT_TEST(Bool) {
+ TPythonTestEngine engine;
+ UNIT_ASSERT_EQUAL(PyCast<bool>(Py_True), true);
+ UNIT_ASSERT_EQUAL(PyCast<bool>(Py_False), false);
+
+ TPyObjectPtr list = PyList_New(0);
+ UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), false);
+ bool res1;
+ UNIT_ASSERT(TryPyCast<bool>(list.Get(), res1));
+ UNIT_ASSERT_EQUAL(res1, false);
+
+ PyList_Append(list.Get(), Py_None);
+ UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), true);
+ bool res2;
+ UNIT_ASSERT(TryPyCast<bool>(list.Get(), res2));
+ UNIT_ASSERT_EQUAL(res2, true);
+ }
+
+ Y_UNIT_TEST(Float) {
+ TestSignedCasts<float, TPyFloatChecker>();
+ }
+
+ Y_UNIT_TEST(Double) {
+ TestUnsignedCasts<double, TPyFloatChecker>();
+ }
+
+ Y_UNIT_TEST(I64) {
+ TestSignedCasts<i64, TPyLLongChecker>();
+ }
+
+ Y_UNIT_TEST(Ui64) {
+ TestUnsignedCasts<ui64, TPyUlongChecker>();
+ }
+
+#if PY_MAJOR_VERSION >= 3
+ Y_UNIT_TEST(I8) {
+ TestSignedCasts<i8, TPyLongChecker>();
+ }
+
+ Y_UNIT_TEST(Ui8) {
+ TestUnsignedCasts<ui8, TPyLongChecker>();
+ }
+
+ Y_UNIT_TEST(I16) {
+ TestSignedCasts<i16, TPyLongChecker>();
+ }
+
+ Y_UNIT_TEST(Ui16) {
+ TestUnsignedCasts<ui16, TPyLongChecker>();
+ }
+
+ Y_UNIT_TEST(I32) {
+ TestSignedCasts<i32, TPyLongChecker>();
+ }
+
+ Y_UNIT_TEST(Ui32) {
+ TestUnsignedCasts<ui32, TPyLongChecker>();
+ }
+ Y_UNIT_TEST(ImplicitIntCasts) {
+ TPythonTestEngine engine;
+ const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>();
+ i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10);
+ TPyObjectPtr pyInt = PyLong_FromLong(expected);
+
+ { // signed
+ i64 actual = PyCast<i64>(pyInt.Get());
+ UNIT_ASSERT_EQUAL(actual, expected);
+
+ bool isOk = TryPyCast<i64>(pyInt.Get(), actual);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_EQUAL(actual, expected);
+ }
+
+ { // unsigned
+ ui64 actual = PyCast<ui64>(pyInt.Get());
+ UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected));
+
+ bool isOk = TryPyCast<ui64>(pyInt.Get(), actual);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected));
+ }
+
+ { // to float
+ float f = PyCast<float>(pyInt.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+
+ bool isOk = TryPyCast<float>(pyInt.Get(), f);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+ }
+
+ { // to double
+ double d = PyCast<double>(pyInt.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+
+ bool isOk = TryPyCast<double>(pyInt.Get(), d);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+ }
+
+ // expected overflow
+ i32 tmp;
+ UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp));
+ ui32 tmpu;
+ UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu));
+ }
+
+#else
+ Y_UNIT_TEST(I8) {
+ TestSignedCasts<i8, TPyIntChecker>();
+ }
+
+ Y_UNIT_TEST(Ui8) {
+ TestUnsignedCasts<ui8, TPyIntChecker>();
+ }
+
+ Y_UNIT_TEST(I16) {
+ TestSignedCasts<i16, TPyIntChecker>();
+ }
+
+ Y_UNIT_TEST(Ui16) {
+ TestUnsignedCasts<ui16, TPyIntChecker>();
+ }
+
+ Y_UNIT_TEST(I32) {
+ TestSignedCasts<i32, TPyIntChecker>();
+ }
+
+ Y_UNIT_TEST(Ui32) {
+ if (sizeof(long) == 4) {
+ TestUnsignedCasts<ui32, TPyIntChecker, TPyLLongChecker>();
+ } else {
+ TestUnsignedCasts<ui32, TPyIntChecker>();
+ }
+ }
+
+ Y_UNIT_TEST(ImplicitIntCasts) {
+ TPythonTestEngine engine;
+ const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>();
+ i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10);
+ TPyObjectPtr pyInt = PyInt_FromLong(expected);
+
+ { // signed
+ i64 actual = PyCast<i64>(pyInt.Get());
+ UNIT_ASSERT_EQUAL(actual, expected);
+
+ bool isOk = TryPyCast<i64>(pyInt.Get(), actual);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_EQUAL(actual, expected);
+ }
+
+ { // unsigned
+ ui64 actual = PyCast<ui64>(pyInt.Get());
+ UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected));
+
+ bool isOk = TryPyCast<ui64>(pyInt.Get(), actual);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected));
+ }
+
+ { // to float
+ float f = PyCast<float>(pyInt.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+
+ bool isOk = TryPyCast<float>(pyInt.Get(), f);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+ }
+
+ { // to double
+ double d = PyCast<double>(pyInt.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+
+ bool isOk = TryPyCast<double>(pyInt.Get(), d);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+ }
+
+ // expected overflow
+ i32 tmp;
+ UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp));
+ ui32 tmpu;
+ UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu));
+ }
+#endif
+
+
+ Y_UNIT_TEST(ImplicitLongCasts) {
+ TPythonTestEngine engine;
+ i64 expected = static_cast<i64>(Max<ui32>()) + 10;
+ TPyObjectPtr pyLong;
+ #ifdef HAVE_LONG_LONG
+ pyLong = PyLong_FromLongLong(expected);
+ #else
+ pyLong = PyLong_FromLong(expected)
+ #endif
+
+ { // signed
+ i64 actual = PyCast<i64>(pyLong.Get());
+ UNIT_ASSERT_EQUAL(actual, expected);
+
+ bool isOk = TryPyCast<i64>(pyLong.Get(), actual);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_EQUAL(actual, expected);
+ }
+
+ { // unsigned
+ ui64 actual = PyCast<ui64>(pyLong.Get());
+ UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected));
+
+ bool isOk = TryPyCast<ui64>(pyLong.Get(), actual);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected));
+ }
+
+ { // to float
+ float f = PyCast<float>(pyLong.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+
+ bool isOk = TryPyCast<float>(pyLong.Get(), f);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+ }
+
+ { // to double
+ double d = PyCast<double>(pyLong.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+
+ bool isOk = TryPyCast<double>(pyLong.Get(), d);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+ }
+
+ // expected overflow
+ i8 tmp;
+ UNIT_ASSERT(!TryPyCast<i8>(pyLong.Get(), tmp));
+ }
+
+ Y_UNIT_TEST(HugeLongOverflow) {
+ TPythonTestEngine engine;
+ TPyObjectPtr pyLong = PyLong_FromString((char*)"0xfffffffffffffffff", nullptr, 0);
+ TPyObjectPtr bitLength = PyObject_CallMethod(pyLong.Get(), (char*)"bit_length", (char*)"()");
+ UNIT_ASSERT_EQUAL(PyCast<ui32>(bitLength.Get()), 68); // 68 bits number
+
+ ui64 resUI64;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI64));
+
+ i64 resI64;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI64));
+
+ ui32 resUI32;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI32));
+
+ i32 resI32;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI32));
+
+ ui16 resUI16;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI16));
+
+ i16 resI16;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI16));
+
+ ui8 resUI8;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI8));
+
+ i8 resI8;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI8));
+ }
+
+ Y_UNIT_TEST(ImplicitFloatCasts) {
+ TPythonTestEngine engine;
+ double expected = 3.14159;
+ TPyObjectPtr pyFloat = PyFloat_FromDouble(expected);
+
+ { // to float
+ float f = PyCast<float>(pyFloat.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+
+ bool isOk = TryPyCast<float>(pyFloat.Get(), f);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+ }
+
+ { // to double
+ double d = PyCast<double>(pyFloat.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+
+ bool isOk = TryPyCast<double>(pyFloat.Get(), d);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+ }
+ }
+
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp
new file mode 100644
index 00000000000..d13ea65da64
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp
@@ -0,0 +1,56 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(FromPyNone) {
+ Y_UNIT_TEST(FromPyNone) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TOptional<ui32>>(
+ "def Test(): return None",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(!value);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyObject) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TOptional<ui32>>(
+ "def Test(): return 42",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT_EQUAL(value.Get<ui32>(), 42);
+ });
+ }
+
+ Y_UNIT_TEST(ToPyNone) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TOptional<char*>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod();
+ },
+ "def Test(value):\n"
+ " assert value == None\n");
+ }
+
+ Y_UNIT_TEST(ToPyFilledOptional) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TOptional<NUdf::TTuple<NUdf::TUtf8, bool>>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ const TOptionalType* optType =
+ static_cast<const TOptionalType*>(type);
+ NUdf::TUnboxedValue* items = nullptr;
+ auto tuple = vb.NewArray(static_cast<const TTupleType*>(optType->GetItemType())->GetElementsCount(), items);
+ items[0] = vb.NewString("test string");
+ items[1] = NUdf::TUnboxedValuePod(false);
+ return NUdf::TUnboxedValue(tuple);
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert len(value) == 2\n"
+ " assert value == ('test string', False)\n");
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_ptr.h b/yql/essentials/udfs/common/python/bindings/py_ptr.h
new file mode 100644
index 00000000000..704629b86b7
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_ptr.h
@@ -0,0 +1,69 @@
+#pragma once
+
+#include <Python.h> // PyObject
+
+#include <yql/essentials/public/udf/udf_ptr.h>
+
+namespace NPython {
+
+template <typename T>
+class TPyPtrOps
+{
+public:
+ static inline void Ref(T* t) {
+ Y_ASSERT(t);
+ Py_INCREF(t);
+ }
+
+ static inline void UnRef(T* t) {
+ Y_ASSERT(t);
+ Py_DECREF(t);
+ }
+
+ static inline ui32 RefCount(const T* t) {
+ Y_ASSERT(t);
+ return t->ob_refcnt;
+ }
+};
+
+class TPyObjectPtr:
+ public NYql::NUdf::TRefCountedPtr<PyObject, TPyPtrOps<PyObject>>
+{
+ using TSelf = NYql::NUdf::TRefCountedPtr<PyObject, TPyPtrOps<PyObject>>;
+
+public:
+ inline TPyObjectPtr()
+ {
+ }
+
+ inline TPyObjectPtr(PyObject* p)
+ : TSelf(p, STEAL_REF) // do not increment refcounter by default
+ {
+ }
+
+ inline TPyObjectPtr(PyObject* p, AddRef)
+ : TSelf(p)
+ {
+ }
+
+ inline void ResetSteal(PyObject* p) {
+ TSelf::Reset(p, STEAL_REF);
+ }
+
+ inline void ResetAddRef(PyObject* p) {
+ TSelf::Reset(p);
+ }
+
+ inline void Reset() {
+ TSelf::Reset();
+ }
+
+ template <class T>
+ inline T* GetAs() const {
+ return reinterpret_cast<T*>(Get());
+ }
+
+ void Reset(PyObject* p) = delete;
+};
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_resource.cpp b/yql/essentials/udfs/common/python/bindings/py_resource.cpp
new file mode 100644
index 00000000000..ebb096029ad
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_resource.cpp
@@ -0,0 +1,116 @@
+#include "py_resource.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+namespace {
+
+void DestroyResourceCapsule(PyObject* obj) {
+ if (auto* ptr = PyCapsule_GetPointer(obj, ResourceCapsuleName)) {
+ delete reinterpret_cast<NUdf::TUnboxedValue*>(ptr);
+ }
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// TResource
+/////////////////////////////////////////////////////////////////////////////
+class TResource final: public NUdf::TBoxedValue
+{
+public:
+ TResource(PyObject* value, const NUdf::TStringRef& tag)
+ : Value_(value, TPyObjectPtr::ADD_REF), Tag_(tag)
+ {
+ }
+
+ ~TResource() {
+ TPyGilLocker lock;
+ Value_.Reset();
+ }
+
+private:
+ NUdf::TStringRef GetResourceTag() const override {
+ return Tag_;
+ }
+
+ void* GetResource() final {
+ return Value_.Get();
+ }
+
+ TPyObjectPtr Value_;
+ const NUdf::TStringRef Tag_;
+};
+
+} // namespace
+
+const char ResourceCapsuleName[] = "YqlResourceCapsule";
+
+TPyObjectPtr ToPyResource(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+// TODO NILE-43
+#if false && UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 15)
+ NUdf::TResourceTypeInspector inpector(*ctx->PyCtx->TypeInfoHelper, type);
+ auto tag = inpector.GetTag();
+ if (tag == ctx->PyCtx->ResourceTag) {
+ PyObject* p = reinterpret_cast<PyObject*>(value.GetResource());
+ return TPyObjectPtr(p, TPyObjectPtr::ADD_REF);
+ }
+#else
+ Y_UNUSED(type);
+ if (value.GetResourceTag() == ctx->PyCtx->ResourceTag) {
+ PyObject* p = reinterpret_cast<PyObject*>(value.GetResource());
+ return TPyObjectPtr(p, TPyObjectPtr::ADD_REF);
+ }
+#endif
+ auto resource = MakeHolder<NUdf::TUnboxedValue>(value);
+
+ return PyCapsule_New(resource.Release(), ResourceCapsuleName, &DestroyResourceCapsule);
+}
+
+NUdf::TUnboxedValue FromPyResource(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+// TODO NILE-43
+#if false && UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 15)
+ NUdf::TResourceTypeInspector inpector(*ctx->PyCtx->TypeInfoHelper, type);
+ auto tag = inpector.GetTag();
+ if (tag == ctx->PyCtx->ResourceTag) {
+ return NUdf::TUnboxedValuePod(new TResource(value, ctx->PyCtx->ResourceTag));
+ }
+
+ if (PyCapsule_IsValid(value, ResourceCapsuleName)) {
+ auto* resource = reinterpret_cast<NUdf::TUnboxedValue*>(PyCapsule_GetPointer(value, ResourceCapsuleName));
+ auto valueTag = resource->GetResourceTag();
+ if (valueTag != tag) {
+ throw yexception() << "Mismatch of resource tag, expected: "
+ << tag << ", got: " << valueTag;
+ }
+
+ return *resource;
+ }
+
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is not a valid resource with tag " << tag;
+#else
+ Y_UNUSED(type);
+ if (PyCapsule_CheckExact(value)) {
+ if (!PyCapsule_IsValid(value, ResourceCapsuleName)) {
+ throw yexception() << "Python object " << PyObjectRepr(value) << " is not a valid resource capsule";
+ }
+ return *reinterpret_cast<NUdf::TUnboxedValue*>(PyCapsule_GetPointer(value, ResourceCapsuleName));
+ }
+ return NUdf::TUnboxedValuePod(new TResource(value, ctx->PyCtx->ResourceTag));
+#endif
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_resource.h b/yql/essentials/udfs/common/python/bindings/py_resource.h
new file mode 100644
index 00000000000..b46b84c84b1
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_resource.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+extern const char ResourceCapsuleName[];
+
+TPyObjectPtr ToPyResource(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyResource(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ PyObject* value);
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp
new file mode 100644
index 00000000000..aaa9899c4f1
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp
@@ -0,0 +1,81 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+extern const char SimpleDataTag[] = "SimpleData";
+extern const char PythonTestTag[] = PYTHON_TEST_TAG;
+
+struct TSimpleData {
+ TString Name;
+ ui32 Age;
+
+ TSimpleData(const TString& name, ui32 age)
+ : Name(name)
+ , Age(age)
+ {}
+};
+
+using TSimpleDataResource = NUdf::TBoxedResource<TSimpleData, SimpleDataTag>;
+
+Y_UNIT_TEST_SUITE(TPyResourceTest) {
+ Y_UNIT_TEST(MkqlObject) {
+ TPythonTestEngine engine;
+ TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<SimpleDataTag>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new TSimpleDataResource("Jamel", 99));
+ },
+ "import yql\n"
+ "\n"
+ "def Test(value):\n"
+ " assert str(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n"
+ " assert repr(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n"
+ " assert type(value).__name__ == 'PyCapsule'\n"
+ " return value\n");
+ UNIT_ASSERT(!!pyValue);
+
+ engine.ToMiniKQLWithArg<NUdf::TResource<SimpleDataTag>>(
+ pyValue.Get(),
+ "import yql\n"
+ "\n"
+ "def Test(value):\n"
+ " return value\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);;
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_STRINGS_EQUAL(value.GetResourceTag(), SimpleDataTag);
+ auto simpleData =
+ reinterpret_cast<TSimpleData*>(value.GetResource());
+ UNIT_ASSERT_EQUAL(simpleData->Age, 99);
+ UNIT_ASSERT_STRINGS_EQUAL(simpleData->Name, "Jamel");
+ });
+ }
+
+ Y_UNIT_TEST(PythonObject) {
+ TPythonTestEngine engine;
+ NUdf::TUnboxedValue mkqlValue = engine.FromPython<NUdf::TResource<PythonTestTag>>(
+ "class CustomStruct:\n"
+ " def __init__(self, name, age):\n"
+ " self.name = name\n"
+ " self.age = age\n"
+ "\n"
+ "def Test():\n"
+ " return CustomStruct('Jamel', 97)\n");
+ UNIT_ASSERT(mkqlValue);
+ UNIT_ASSERT_STRINGS_EQUAL(mkqlValue.GetResourceTag(), PythonTestTag);
+
+ TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<PythonTestTag>>(
+ [mkqlValue](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return mkqlValue;
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, CustomStruct)\n"
+ " assert value.age, 97\n"
+ " assert value.name, 'Jamel'\n");
+ UNIT_ASSERT(!!pyValue);
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_stream.cpp b/yql/essentials/udfs/common/python/bindings/py_stream.cpp
new file mode 100644
index 00000000000..3d9aecdc00b
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_stream.cpp
@@ -0,0 +1,343 @@
+#include "py_stream.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+#include <util/string/builder.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+
+// will be initialized in InitYqlModule()
+PyObject* PyYieldIterationException = nullptr;
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyStream
+//////////////////////////////////////////////////////////////////////////////
+struct TPyStream {
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Value;
+ const NUdf::TType* ItemType;
+
+ inline static TPyStream* Cast(PyObject* o) {
+ return reinterpret_cast<TPyStream*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ inline static PyObject* Repr(PyObject* self) {
+ Y_UNUSED(self);
+ return PyRepr("<yql.TStream>").Release();
+ }
+
+ static PyObject* New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ NUdf::IBoxedValuePtr value);
+
+ static PyObject* Next(PyObject* self);
+};
+
+#if PY_MAJOR_VERSION >= 3
+#define Py_TPFLAGS_HAVE_ITER 0
+#endif
+
+PyTypeObject PyStreamType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TStream"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyStream)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyStream::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyStream::Repr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER),
+ INIT_MEMBER(tp_doc , "yql.TStream object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , PyObject_SelfIter),
+ INIT_MEMBER(tp_iternext , TPyStream::Next),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+PyObject* TPyStream::New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ NUdf::IBoxedValuePtr value)
+{
+ TPyStream* stream = new TPyStream;
+ PyObject_INIT(stream, &PyStreamType);
+ stream->CastCtx = castCtx;
+ stream->Value.Set(castCtx->PyCtx, value);
+
+ const NUdf::TStreamTypeInspector inspector(*castCtx->PyCtx->TypeInfoHelper, type);
+ stream->ItemType = inspector.GetItemType();
+
+ return reinterpret_cast<PyObject*>(stream);
+}
+
+PyObject* TPyStream::Next(PyObject* self) {
+ PY_TRY {
+ TPyStream* stream = Cast(self);
+
+ NUdf::TUnboxedValue item;
+ auto status = NUdf::TBoxedValueAccessor::Fetch(*stream->Value.Get(), item);
+
+ switch (status) {
+ case NUdf::EFetchStatus::Ok:
+ return ToPyObject(stream->CastCtx, stream->ItemType, item)
+ .Release();
+ case NUdf::EFetchStatus::Finish:
+ return nullptr;
+ case NUdf::EFetchStatus::Yield:
+ PyErr_SetNone(PyYieldIterationException);
+ return nullptr;
+ default:
+ Y_ABORT("Unknown stream status");
+ }
+ } PY_CATCH(nullptr)
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TStreamOverPyIter
+//////////////////////////////////////////////////////////////////////////////
+class TStreamOverPyIter final: public NUdf::TBoxedValue {
+public:
+ TStreamOverPyIter(
+ TPyCastContext::TPtr castCtx,
+ const NUdf::TType* itemType,
+ TPyObjectPtr pyIter,
+ TPyObjectPtr pyIterable,
+ TPyObjectPtr pyGeneratorCallable,
+ TPyObjectPtr pyGeneratorCallableClosure,
+ TPyObjectPtr pyGeneratorCallableArgs)
+ : CastCtx_(std::move(castCtx))
+ , ItemType_(itemType)
+ , PyIter_(std::move(pyIter))
+ , PyIterable_(std::move(pyIterable))
+ , PyGeneratorCallable_(std::move(pyGeneratorCallable))
+ , PyGeneratorCallableClosure_(std::move(pyGeneratorCallableClosure))
+ , PyGeneratorCallableArgs_(std::move(pyGeneratorCallableArgs))
+ {
+ }
+
+ ~TStreamOverPyIter() {
+ TPyGilLocker lock;
+ PyIter_.Reset();
+ PyIterable_.Reset();
+ PyGeneratorCallableArgs_.Reset();
+ PyGeneratorCallableClosure_.Reset();
+ PyGeneratorCallable_.Reset();
+ }
+
+private:
+ NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override {
+ try {
+ TPyGilLocker lock;
+ TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
+ if (next) {
+ if (PyErr_GivenExceptionMatches(next.Get(), PyYieldIterationException)) {
+ return NUdf::EFetchStatus::Yield;
+ }
+
+ result = FromPyObject(CastCtx_, ItemType_, next.Get());
+ return NUdf::EFetchStatus::Ok;
+ }
+
+ if (PyObject* ex = PyErr_Occurred()) {
+ if (PyErr_GivenExceptionMatches(ex, PyYieldIterationException)) {
+ PyErr_Clear();
+ TPyObjectPtr iterable;
+ TPyObjectPtr iter;
+ if (PyIterable_) {
+ PyIter_.Reset();
+ iterable = PyIterable_;
+ } else if (PyGeneratorCallable_) {
+ PyIter_.Reset();
+ TPyObjectPtr result(PyObject_CallObject(PyGeneratorCallable_.Get(), PyGeneratorCallableArgs_.Get()));
+ if (!result) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data());
+ }
+
+ if (PyGen_Check(result.Get())) {
+ iterable = std::move(result);
+ } else if (PyIter_Check(result.Get())) {
+ iter = std::move(result);
+ } else {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Expected iterator or generator, but got " << PyObjectRepr(result.Get())).data());
+ }
+ } else {
+ return NUdf::EFetchStatus::Yield;
+ }
+
+ if (!iter) {
+ iter.ResetSteal(PyObject_GetIter(iterable.Get()));
+ if (!iter) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ }
+
+ PyIter_.ResetAddRef(iter.Get());
+ return NUdf::EFetchStatus::Yield;
+ }
+
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return NUdf::EFetchStatus::Finish;
+ }
+ catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+ }
+
+private:
+ TPyCastContext::TPtr CastCtx_;
+ const NUdf::TType* ItemType_;
+ TPyObjectPtr PyIter_;
+ TPyObjectPtr PyIterable_;
+ TPyObjectPtr PyGeneratorCallable_;
+ TPyObjectPtr PyGeneratorCallableClosure_;
+ TPyObjectPtr PyGeneratorCallableArgs_;
+};
+
+
+//////////////////////////////////////////////////////////////////////////////
+// public functions
+//////////////////////////////////////////////////////////////////////////////
+TPyObjectPtr ToPyStream(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value)
+{
+ return TPyStream::New(castCtx, type, value.AsBoxed());
+}
+
+NKikimr::NUdf::TUnboxedValue FromPyStream(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const TPyObjectPtr& value,
+ const TPyObjectPtr& originalCallable,
+ const TPyObjectPtr& originalCallableClosure,
+ const TPyObjectPtr& originalCallableArgs
+)
+{
+ const NUdf::TStreamTypeInspector inspector(*castCtx->PyCtx->TypeInfoHelper, type);
+ const NUdf::TType* itemType = inspector.GetItemType();
+
+ if (PyGen_Check(value.Get())) {
+ TPyObjectPtr iter(PyObject_GetIter(value.Get()));
+ if (!iter) {
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr,
+ originalCallable, originalCallableClosure, originalCallableArgs));
+ }
+
+ if (PyIter_Check(value.Get())
+#if PY_MAJOR_VERSION < 3
+ // python 2 iterators must also implement "next" method
+ && 1 == PyObject_HasAttrString(value.Get(), "next")
+#endif
+ ) {
+ TPyObjectPtr iter(value.Get(), TPyObjectPtr::ADD_REF);
+ return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr,
+ originalCallable, originalCallableClosure, originalCallableArgs));
+ }
+
+ // assume that this function will returns generator
+ if (PyCallable_Check(value.Get())) {
+ TPyObjectPtr generator(PyObject_CallObject(value.Get(), nullptr));
+ if (!generator || !PyGen_Check(generator.Get())) {
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << "Expected generator as a result of function call").data());
+ }
+ TPyObjectPtr iter(PyObject_GetIter(generator.Get()));
+ if (!iter) {
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ TPyObjectPtr callableClosure;
+ if (PyFunction_Check(value.Get())) {
+ PyObject* closure = PyFunction_GetClosure(value.Get());
+ if (closure) {
+ callableClosure = TPyObjectPtr(closure, TPyObjectPtr::ADD_REF);
+ }
+ }
+
+ return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr,
+ originalCallable ? value : nullptr, originalCallable ? callableClosure : nullptr, nullptr));
+ }
+
+ // must be after checking for callable
+ if (PySequence_Check(value.Get()) || PyObject_HasAttrString(value.Get(), "__iter__")) {
+ TPyObjectPtr iter(PyObject_GetIter(value.Get()));
+ if (!iter) {
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), originalCallable ? value : nullptr, nullptr, nullptr, nullptr));
+ }
+
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << "Expected iterator, generator, generator factory, "
+ "or iterable object, but got " << PyObjectRepr(value.Get())).data());
+}
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_stream.h b/yql/essentials/udfs/common/python/bindings/py_stream.h
new file mode 100644
index 00000000000..f677e23930d
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_stream.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+extern PyTypeObject PyStreamType;
+extern PyObject* PyYieldIterationException;
+
+TPyObjectPtr ToPyStream(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyStream(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const TPyObjectPtr& value,
+ const TPyObjectPtr& originalCallable,
+ const TPyObjectPtr& originalCallableClosure,
+ const TPyObjectPtr& originalCallableArgs);
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp
new file mode 100644
index 00000000000..4a24dd1a138
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp
@@ -0,0 +1,208 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyStreamTest) {
+ void Ui32StreamValidator(const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+
+ NUdf::TUnboxedValue item;
+ ui32 expected = 0;
+ NUdf::EFetchStatus status;
+
+ while (true) {
+ status = value.Fetch(item);
+ if (status != NUdf::EFetchStatus::Ok) break;
+
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, expected);
+ expected++;
+ }
+
+ UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Finish);
+ UNIT_ASSERT_EQUAL(expected, 10);
+ }
+
+ struct TTestStream final: NUdf::TBoxedValue {
+ TTestStream(ui32 maxValue, ui32 yieldOn = Max<ui32>())
+ : Current_(0)
+ , YieldOn_(yieldOn)
+ , MaxValue_(maxValue)
+ {
+ }
+
+ private:
+ NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override {
+ if (Current_ == YieldOn_) {
+ return NUdf::EFetchStatus::Yield;
+ } else if (Current_ >= MaxValue_) {
+ return NUdf::EFetchStatus::Finish;
+ }
+ result = NUdf::TUnboxedValuePod(Current_++);
+ return NUdf::EFetchStatus::Ok;
+ }
+
+ ui32 Current_, YieldOn_, MaxValue_;
+ };
+
+ Y_UNIT_TEST(FromGenerator) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TStream<ui32>>(
+ "def Test():\n"
+ " num = 0\n"
+ " while num < 10:\n"
+ " yield num\n"
+ " num += 1\n",
+ Ui32StreamValidator);
+ }
+
+ Y_UNIT_TEST(FromGeneratorFactory) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TStream<ui32>>(
+ "def first_10():\n"
+ " num = 0\n"
+ " while num < 10:\n"
+ " yield num\n"
+ " num += 1\n"
+ "def Test():\n"
+ " return first_10\n",
+ Ui32StreamValidator);
+ }
+
+ Y_UNIT_TEST(FromIterator) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TStream<ui32>>(
+ "def Test():\n"
+ " return iter(range(10))\n",
+ Ui32StreamValidator);
+ }
+
+ Y_UNIT_TEST(FromIterable) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TStream<ui32>>(
+ "def Test():\n"
+#if PY_MAJOR_VERSION >= 3
+ " return range(10)\n",
+#else
+ " return xrange(10)\n",
+#endif
+ Ui32StreamValidator);
+ }
+
+ Y_UNIT_TEST(FromCustomIterable) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TStream<ui32>>(
+ "class T:\n"
+ " def __init__(self, l):\n"
+ " self.l = l\n"
+ " def __len__(self):\n"
+ " return len(self.l)\n"
+ " def __nonzero__(self):\n"
+ " return bool(self.l)\n"
+ " def __iter__(self):\n"
+ " return iter(self.l)\n"
+ "\n"
+ "def Test():\n"
+ " return T(list(range(10)))\n",
+ Ui32StreamValidator);
+ }
+
+ Y_UNIT_TEST(FromList) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TStream<ui32>>(
+ "def Test():\n"
+ " return [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
+ Ui32StreamValidator);
+ }
+
+ Y_UNIT_TEST(ToPython) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TStream<ui32>>(
+ [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) {
+ return NUdf::TUnboxedValuePod(new TTestStream(10));
+ },
+ "def Test(value):\n"
+ " import yql\n"
+ " assert repr(value) == '<yql.TStream>'\n"
+ " assert type(value).__name__ == 'TStream'\n"
+ " assert list(value) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n");
+ }
+
+ Y_UNIT_TEST(ToPythonAndBackAsIs) {
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<NUdf::TStream<ui32>>(
+ [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) {
+ return NUdf::TUnboxedValuePod(new TTestStream(10));
+ },
+ "def Test(value): return value",
+ Ui32StreamValidator
+ );
+ }
+
+ Y_UNIT_TEST(YieldingStreamFromPython) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TStream<ui32>>(
+ "import yql\n"
+ "def Test():\n"
+ " yield 0\n"
+ " yield 1\n"
+ " yield yql.TYieldIteration\n"
+ " yield 2\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+
+ NUdf::TUnboxedValue item;
+ ui32 expected = 0;
+ NUdf::EFetchStatus status;
+
+ while ((status = value.Fetch(item)) == NUdf::EFetchStatus::Ok) {
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, expected);
+ expected++;
+ }
+
+ UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Yield);
+ UNIT_ASSERT_EQUAL(expected, 2);
+ });
+ }
+
+ Y_UNIT_TEST(YieldingStreamFromCpp) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TStream<ui32>>(
+ [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) {
+ return NUdf::TUnboxedValuePod(new TTestStream(5, 2));
+ },
+ "import yql\n"
+ "def Test(value):\n"
+ " assert repr(value) == '<yql.TStream>'\n"
+ " assert type(value).__name__ == 'TStream'\n"
+ " assert next(value) == 0\n"
+ " assert next(value) == 1\n"
+ " try:\n"
+ " next(value)\n"
+ " except yql.TYieldIteration:\n"
+ " pass\n"
+ " else:\n"
+ " assert False, 'Expected yql.TYieldIteration'\n");
+ }
+
+ Y_UNIT_TEST(FromCppListIterator) {
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<NUdf::TListType<ui32>, NUdf::TStream<ui32>>(
+ [](const TType*, const NUdf::IValueBuilder& vb) {
+ NUdf::TUnboxedValue *items = nullptr;
+ const auto a = vb.NewArray(10U, items);
+ ui32 i = 0U;
+ std::generate_n(items, 10U, [&i](){ return NUdf::TUnboxedValuePod(i++); });
+ return a;
+ },
+ "def Test(value): return iter(value)",
+ Ui32StreamValidator
+ );
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp
new file mode 100644
index 00000000000..444b7b0c5b0
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp
@@ -0,0 +1,98 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyStringTest) {
+ template <typename TStringType>
+ void TestStringCasts() {
+ TStringType testStr1(TStringBuf("test string"));
+ TStringBuf strBuf1 = testStr1;
+ TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data());
+ const auto value = PyCast<TStringType>(str1.Get());
+
+ UNIT_ASSERT_STRINGS_EQUAL(value, testStr1);
+
+ TStringType testStr2(TStringBuf("another test string"));
+ TStringBuf strBuf2 = testStr2;
+ TPyObjectPtr str2 = PyCast<TStringType>(testStr2);
+
+ Py_ssize_t size = 0U;
+ char* buf = nullptr;
+ const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size);
+ UNIT_ASSERT(rc >= 0);
+ UNIT_ASSERT(buf != nullptr);
+ UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size());
+ UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2);
+ }
+
+ template <typename TStringType>
+ void TestBinaryStringCasts() {
+ TStringType testStr1(TStringBuf("\xa0\xa1"sv));
+ TStringBuf strBuf1 = testStr1;
+ TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data());
+ const auto value = PyCast<TStringType>(str1.Get());
+
+ UNIT_ASSERT_STRINGS_EQUAL(value, testStr1);
+
+ TStringType testStr2(TStringBuf("\xf0\x90\x28\xbc"sv));
+ TStringBuf strBuf2 = testStr2;
+ TPyObjectPtr str2 = PyCast<TStringType>(testStr2);
+
+ Py_ssize_t size = 0U;
+ char* buf = nullptr;
+ const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size);
+ UNIT_ASSERT(rc >= 0);
+ UNIT_ASSERT(buf != nullptr);
+ UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size());
+ UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2);
+ }
+
+ template <typename TStringType>
+ void TestUtf8StringCasts() {
+ const TStringType testStr1(TStringBuf("тестовая строка"));
+ TStringBuf strBuf1 = testStr1;
+ const TPyObjectPtr str1 = PyUnicode_FromString(strBuf1.data());
+ const TPyObjectPtr utf8 = PyUnicode_AsUTF8String(str1.Get());
+ const auto value = PyCast<TStringType>(utf8.Get());
+ UNIT_ASSERT_STRINGS_EQUAL(value, testStr1);
+
+ const TStringType testStr2(TStringBuf("еще одна тестовая строка"));
+ TStringBuf strBuf2 = testStr2;
+ const auto str2 = ToPyUnicode<TStringType>(testStr2);
+
+ UNIT_ASSERT(PyUnicode_Check(str2.Get()));
+
+ Py_ssize_t size = 0U;
+#if PY_MAJOR_VERSION >= 3
+ const auto buf = PyUnicode_AsUTF8AndSize(str2.Get(), &size);
+#else
+ char* buf = nullptr;
+ const TPyObjectPtr pyUtf8Str = PyUnicode_AsUTF8String(str2.Get());
+ const auto rc = PyBytes_AsStringAndSize(pyUtf8Str.Get(), &buf, &size);
+ UNIT_ASSERT(rc >= 0);
+#endif
+ UNIT_ASSERT(buf != nullptr);
+ UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size());
+ UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2);
+ }
+
+ Y_UNIT_TEST(Simple) {
+ TestStringCasts<TString>();
+ TestStringCasts<TStringBuf>();
+ TestStringCasts<NUdf::TStringRef>();
+ }
+
+ Y_UNIT_TEST(Utf8) {
+ TestUtf8StringCasts<TString>();
+ TestUtf8StringCasts<TStringBuf>();
+ TestUtf8StringCasts<NUdf::TStringRef>();
+ }
+
+ Y_UNIT_TEST(Binary) {
+ TestBinaryStringCasts<TString>();
+ TestBinaryStringCasts<TStringBuf>();
+ TestBinaryStringCasts<NUdf::TStringRef>();
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_struct.cpp b/yql/essentials/udfs/common/python/bindings/py_struct.cpp
new file mode 100644
index 00000000000..a4ab99ee32c
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_struct.cpp
@@ -0,0 +1,188 @@
+#include "py_struct.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+#include <util/string/cast.h>
+#include <util/string/join.h>
+#include <util/string/builder.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+
+namespace {
+
+TPyObjectPtr CreateNewStrucInstance(const TPyCastContext::TPtr& ctx, const NKikimr::NUdf::TType* type, const NUdf::TStructTypeInspector& inspector)
+{
+ const auto it = ctx->StructTypes.emplace(type, TPyObjectPtr());
+ if (it.second) {
+#if PY_MAJOR_VERSION >= 3
+ std::vector<PyStructSequence_Field> fields(inspector.GetMembersCount() + 1U);
+ for (ui32 i = 0U; i < inspector.GetMembersCount(); ++i) {
+ fields[i] = {const_cast<char*>(inspector.GetMemberName(i).Data()), nullptr};
+ }
+ fields.back() = {nullptr, nullptr};
+
+ PyStructSequence_Desc desc = {
+ INIT_MEMBER(name, "yql.Struct"),
+ INIT_MEMBER(doc, nullptr),
+ INIT_MEMBER(fields, fields.data()),
+ INIT_MEMBER(n_in_sequence, int(inspector.GetMembersCount()))
+ };
+
+ const auto typeObject = new PyTypeObject();
+ if (0 > PyStructSequence_InitType2(typeObject, &desc)) {
+ throw yexception() << "can't create struct type: " << GetLastErrorAsString();
+ }
+
+ it.first->second.ResetSteal(reinterpret_cast<PyObject*>(typeObject));
+ }
+
+ const TPyObjectPtr object = PyStructSequence_New(it.first->second.GetAs<PyTypeObject>());
+#else
+ const auto className = TString("yql.Struct_") += ToString(ctx->StructTypes.size());
+ PyObject* metaclass = (PyObject *) &PyClass_Type;
+ const TPyObjectPtr name = PyRepr(TStringBuf(className));
+ const TPyObjectPtr bases = PyTuple_New(0);
+ const TPyObjectPtr dict = PyDict_New();
+
+ TPyObjectPtr newClass = PyObject_CallFunctionObjArgs(
+ metaclass, name.Get(), bases.Get(), dict.Get(),
+ nullptr);
+ if (!newClass) {
+ throw yexception() << "can't create new type: " << GetLastErrorAsString();
+ }
+
+ it.first->second = std::move(newClass);
+ }
+
+ Y_UNUSED(inspector);
+ const TPyObjectPtr object = PyInstance_New(it.first->second.Get(), nullptr, nullptr);
+#endif
+ if (!object) {
+ throw yexception() << "can't struct instance: " << GetLastErrorAsString();
+ }
+ return object;
+}
+
+}
+
+TPyObjectPtr ToPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, const NUdf::TUnboxedValuePod& value)
+{
+ const NUdf::TStructTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const TPyObjectPtr object = CreateNewStrucInstance(ctx, type, inspector);
+ const auto membersCount = inspector.GetMembersCount();
+
+ if (auto ptr = value.GetElements()) {
+ for (Py_ssize_t i = 0; i < membersCount; ++i) {
+#if PY_MAJOR_VERSION >= 3
+ auto item = ToPyObject(ctx, inspector.GetMemberType(i), *ptr++);
+ PyStructSequence_SetItem(object.Get(), i, item.Release());
+#else
+ const TStringBuf name = inspector.GetMemberName(i);
+ const auto item = ToPyObject(ctx, inspector.GetMemberType(i), *ptr++);
+ if (0 > PyObject_SetAttrString(object.Get(), name.data(), item.Get())) {
+ throw yexception()
+ << "Can't set attr '" << name << "' to python object: "
+ << GetLastErrorAsString();
+ }
+#endif
+ }
+ } else {
+ for (Py_ssize_t i = 0; i < membersCount; ++i) {
+#if PY_MAJOR_VERSION >= 3
+ auto item = ToPyObject(ctx, inspector.GetMemberType(i), value.GetElement(i));
+ PyStructSequence_SetItem(object.Get(), i, item.Release());
+#else
+ const TStringBuf name = inspector.GetMemberName(i);
+ const auto item = ToPyObject(ctx, inspector.GetMemberType(i), value.GetElement(i));
+ if (0 > PyObject_SetAttrString(object.Get(), name.data(), item.Get())) {
+ throw yexception()
+ << "Can't set attr '" << name << "' to python object: "
+ << GetLastErrorAsString();
+ }
+#endif
+ }
+ }
+
+ return object;
+}
+
+NUdf::TUnboxedValue FromPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, PyObject* value)
+{
+ NUdf::TUnboxedValue* items = nullptr;
+ const NUdf::TStructTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto membersCount = inspector.GetMembersCount();
+ auto mkqlStruct = ctx->ValueBuilder->NewArray(membersCount, items);
+
+ TVector<TString> errors;
+ if (PyDict_Check(value)) {
+ for (ui32 i = 0; i < membersCount; i++) {
+ TStringBuf memberName = inspector.GetMemberName(i);
+ auto memberType = inspector.GetMemberType(i);
+ // borrowed reference - no need to manage ownership
+ PyObject* item = PyDict_GetItemString(value, memberName.data());
+ if (!item) {
+ TPyObjectPtr bytesMemberName = PyBytes_FromStringAndSize(memberName.data(), memberName.size());
+ item = PyDict_GetItem(value, bytesMemberName.Get());
+ }
+ if (!item) {
+ if (ctx->PyCtx->TypeInfoHelper->GetTypeKind(memberType) == NUdf::ETypeKind::Optional) {
+ items[i] = NUdf::TUnboxedValue();
+ continue;
+ }
+
+ errors.push_back(TStringBuilder() << "Dict has no item '" << memberName << "'");
+ continue;
+ }
+
+ try {
+ items[i] = FromPyObject(ctx, inspector.GetMemberType(i), item);
+ } catch (const yexception& e) {
+ errors.push_back(TStringBuilder() << "Failed to convert dict item '" << memberName << "' - " << e.what());
+ }
+ }
+
+ if (!errors.empty()) {
+ throw yexception() << "Failed to convert dict to struct\n" << JoinSeq("\n", errors) << "\nDict repr: " << PyObjectRepr(value);
+ }
+ } else {
+ for (ui32 i = 0; i < membersCount; i++) {
+ TStringBuf memberName = inspector.GetMemberName(i);
+ auto memberType = inspector.GetMemberType(i);
+ TPyObjectPtr attr = PyObject_GetAttrString(value, memberName.data());
+ if (!attr) {
+ if (ctx->PyCtx->TypeInfoHelper->GetTypeKind(memberType) == NUdf::ETypeKind::Optional &&
+ PyErr_ExceptionMatches(PyExc_AttributeError)) {
+ PyErr_Clear();
+ items[i] = NUdf::TUnboxedValue();
+ continue;
+ }
+
+ errors.push_back(TStringBuilder() << "Object has no attr '" << memberName << "' , error: " << GetLastErrorAsString());
+ continue;
+ }
+
+ try {
+ items[i] = FromPyObject(ctx, memberType, attr.Get());
+ } catch (const yexception& e) {
+ errors.push_back(TStringBuilder() << "Failed to convert object attr '" << memberName << "' - " << e.what());
+ }
+ }
+
+ if (!errors.empty()) {
+ throw yexception() << "Failed to convert object to struct\n" << JoinSeq("\n", errors) << "\nObject repr: " << PyObjectRepr(value);
+ }
+ }
+
+ return mkqlStruct;
+}
+
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_struct.h b/yql/essentials/udfs/common/python/bindings/py_struct.h
new file mode 100644
index 00000000000..79a380283fb
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_struct.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+TPyObjectPtr ToPyStruct(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyStruct(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type, PyObject* value);
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp
new file mode 100644
index 00000000000..a97507f5499
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp
@@ -0,0 +1,307 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyStructTest) {
+ Y_UNIT_TEST(FromPyObject) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<int>("age", &ageIdx)
+ .AddField<char*>("name", &nameIdx)
+ .Build();
+
+ engine.ToMiniKQL(personType,
+ "class Person:\n"
+ " def __init__(self, age, name):\n"
+ " self.age = age\n"
+ " self.name = name\n"
+ "\n"
+ "def Test():\n"
+ " return Person(99, 'Jamel')\n",
+ [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ auto name = value.GetElement(nameIdx);
+ UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel");
+ auto age = value.GetElement(ageIdx);
+ UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyObjectMissingOptionalField) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build();
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<int>("age", &ageIdx)
+ .AddField("name", optionalStringType, &nameIdx)
+ .Build();
+
+ engine.ToMiniKQL(personType,
+ "class Person:\n"
+ " def __init__(self, age):\n"
+ " self.age = age\n"
+ "\n"
+ "def Test():\n"
+ " return Person(99)\n",
+ [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ auto name = value.GetElement(nameIdx);
+ UNIT_ASSERT(!name);
+ auto age = value.GetElement(ageIdx);
+ UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDict) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<int>("age", &ageIdx)
+ .AddField<char*>("name", &nameIdx)
+ .Build();
+
+ engine.ToMiniKQL(personType,
+ "def Test():\n"
+ " return { 'name': 'Jamel', 'age': 99 }\n",
+ [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ auto name = value.GetElement(nameIdx);
+ UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel");
+ auto age = value.GetElement(ageIdx);
+ UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDictMissingOptionalField) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build();
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<int>("age", &ageIdx)
+ .AddField("name", optionalStringType, &nameIdx)
+ .Build();
+
+ engine.ToMiniKQL(personType,
+ "def Test():\n"
+ " return { 'age': 99 }\n",
+ [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ auto name = value.GetElement(nameIdx);
+ UNIT_ASSERT(!name);
+ auto age = value.GetElement(ageIdx);
+ UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDictBytesKeyWithNullCharacter) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0;
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<int>("a\0ge", &ageIdx)
+ .Build();
+
+ engine.ToMiniKQL(personType,
+ "def Test():\n"
+ " return { b'a\\0ge': 99 }\n",
+ [ageIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ auto age = value.GetElement(ageIdx);
+ UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyNamedTuple) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<int>("age", &ageIdx)
+ .AddField<char*>("name", &nameIdx)
+ .Build();
+
+ engine.ToMiniKQL(personType,
+ "from collections import namedtuple\n"
+ "def Test():\n"
+ " Person = namedtuple('Person', 'name age')\n"
+ " return Person(age=13, name='Tony')\n",
+ [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ auto name = value.GetElement(nameIdx);
+ UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Tony");
+ auto age = value.GetElement(ageIdx);
+ UNIT_ASSERT_EQUAL(age.Get<ui32>(), 13);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyNamedTupleNoneOptionalField) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build();
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<int>("age", &ageIdx)
+ .AddField("name", optionalStringType, &nameIdx)
+ .Build();
+
+ engine.ToMiniKQL(personType,
+ "from collections import namedtuple\n"
+ "def Test():\n"
+ " Pers = namedtuple('Person', 'name age')\n"
+ " return Pers(name=None, age=15)\n",
+ [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ auto name = value.GetElement(nameIdx);
+ UNIT_ASSERT(!name);
+ auto age = value.GetElement(ageIdx);
+ UNIT_ASSERT_EQUAL(age.Get<ui32>(), 15);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyEmptyStruct) {
+ TPythonTestEngine engine;
+ auto emptyStruct = engine.GetTypeBuilder().Struct()->Build();
+
+ engine.ToMiniKQL(emptyStruct,
+ "class Empty: pass\n"
+ "\n"
+ "def Test():\n"
+ " return Empty()\n",
+ [](const NUdf::TUnboxedValuePod&) {});
+ }
+
+ Y_UNIT_TEST(ToPyObject) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0, addressIdx = 0, cityIdx = 0, streetIdx = 0, buildingIdx = 0;
+ auto addressType = engine.GetTypeBuilder().Struct()->
+ AddField<NUdf::TUtf8>("city", &cityIdx)
+ .AddField<NUdf::TUtf8>("street", &streetIdx)
+ .AddField<ui16>("building", &buildingIdx)
+ .Build();
+
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<ui16>("age", &ageIdx)
+ .AddField<NUdf::TUtf8>("name", &nameIdx)
+ .AddField("address", addressType, &addressIdx)
+ .Build();
+
+
+ engine.ToPython(personType,
+ [=](const TType* type, const NUdf::IValueBuilder& vb) {
+ NUdf::TUnboxedValue* items = nullptr;
+ auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items);
+ items[ageIdx] = NUdf::TUnboxedValuePod(ui16(97));
+ items[nameIdx] = vb.NewString("Jamel");
+ NUdf::TUnboxedValue* items2 = nullptr;
+ items[addressIdx] = vb.NewArray(static_cast<const TStructType*>(static_cast<const TStructType*>(type)->GetMemberType(addressIdx))->GetMembersCount(), items2);
+ items2[cityIdx] = vb.NewString("Moscow");;
+ items2[streetIdx] = vb.NewString("L'va Tolstogo");
+ items2[buildingIdx] = NUdf::TUnboxedValuePod(ui16(16));
+ return new_struct;
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, object)\n"
+ " assert value.name == 'Jamel'\n"
+ " assert value.age == 97\n"
+ " assert value.address.city == 'Moscow'\n"
+ " assert value.address.building == 16\n"
+ );
+ }
+
+ Y_UNIT_TEST(ToPyObjectKeywordsAsFields) {
+ TPythonTestEngine engine;
+
+ ui32 passIdx = 0, whileIdx = 0, ifIdx = 0, notIdx = 0;
+ auto structType = engine.GetTypeBuilder().Struct()->
+ AddField<NUdf::TUtf8>("pass", &passIdx)
+ .AddField<NUdf::TUtf8>("while", &whileIdx)
+ .AddField<NUdf::TUtf8>("if", &ifIdx)
+ .AddField<NUdf::TUtf8>("not", &notIdx)
+ .Build();
+
+ engine.ToPython(structType,
+ [=](const TType* type, const NUdf::IValueBuilder& vb) {
+ NUdf::TUnboxedValue* items = nullptr;
+ auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items);
+ items[ifIdx] = vb.NewString("You");
+ items[whileIdx] = vb.NewString("Shall");
+ items[notIdx] = vb.NewString("Not");
+ items[passIdx] = vb.NewString("Pass");
+ return new_struct;
+ },
+ "def Test(value):\n"
+ " assert getattr(value, 'if') == 'You'\n"
+ " assert getattr(value, 'while') == 'Shall'\n"
+ " assert getattr(value, 'not') == 'Not'\n"
+ " assert getattr(value, 'pass') == 'Pass'\n"
+ );
+ }
+
+#if PY_MAJOR_VERSION >= 3 // TODO: Fix for python 2
+ Y_UNIT_TEST(ToPyObjectTryModify) {
+ TPythonTestEngine engine;
+
+ ui32 field1Idx = 0, field2Idx = 0;
+ auto structType = engine.GetTypeBuilder().Struct()->
+ AddField<NUdf::TUtf8>("field1", &field1Idx)
+ .AddField<NUdf::TUtf8>("field2", &field2Idx)
+ .Build();
+
+ engine.ToPython(structType,
+ [=](const TType* type, const NUdf::IValueBuilder& vb) {
+ NUdf::TUnboxedValue* items = nullptr;
+ auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items);
+ items[field1Idx] = NUdf::TUnboxedValuePod::Zero();
+ items[field2Idx] = NUdf::TUnboxedValuePod::Embedded("empty");
+ return new_struct;
+ },
+ "def Test(value):\n"
+ " try:\n"
+ " setattr(value, 'field1', 17)\n"
+ " except AttributeError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ " try:\n"
+ " value.field2 = 18\n"
+ " except AttributeError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+#endif
+
+ Y_UNIT_TEST(ToPyObjectEmptyStruct) {
+ TPythonTestEngine engine;
+
+ auto personType = engine.GetTypeBuilder().Struct()->Build();
+
+ engine.ToPython(personType,
+ [](const TType*, const NUdf::IValueBuilder& vb) {
+ return vb.NewEmptyList();
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, object)\n"
+#if PY_MAJOR_VERSION >= 3
+ " assert len(value) == 0\n"
+#endif
+ );
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple.cpp b/yql/essentials/udfs/common/python/bindings/py_tuple.cpp
new file mode 100644
index 00000000000..6cef25ea47f
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_tuple.cpp
@@ -0,0 +1,61 @@
+#include "py_tuple.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+
+TPyObjectPtr ToPyTuple(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, const NUdf::TUnboxedValuePod& value)
+{
+ const NUdf::TTupleTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto elementsCount = inspector.GetElementsCount();
+
+ const TPyObjectPtr tuple(PyTuple_New(elementsCount));
+
+ if (auto ptr = value.GetElements()) {
+ for (ui32 i = 0U; i < elementsCount; ++i) {
+ auto item = ToPyObject(ctx, inspector.GetElementType(i), *ptr++);
+ PyTuple_SET_ITEM(tuple.Get(), i, item.Release());
+ }
+ } else {
+ for (ui32 i = 0U; i < elementsCount; ++i) {
+ auto item = ToPyObject(ctx, inspector.GetElementType(i), value.GetElement(i));
+ PyTuple_SET_ITEM(tuple.Get(), i, item.Release());
+ }
+ }
+
+ return tuple;
+}
+
+NUdf::TUnboxedValue FromPyTuple(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, PyObject* value)
+{
+ const NUdf::TTupleTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ if (const TPyObjectPtr fast = PySequence_Fast(value, "Expected tuple or list.")) {
+ const Py_ssize_t itemsCount = PySequence_Fast_GET_SIZE(fast.Get());
+
+ if (itemsCount < 0 || inspector.GetElementsCount() != itemsCount) {
+ throw yexception() << "Tuple elements count mismatch.";
+ }
+
+ NUdf::TUnboxedValue* tuple_items = nullptr;
+ const auto tuple = ctx->ValueBuilder->NewArray(inspector.GetElementsCount(), tuple_items);
+ for (Py_ssize_t i = 0; i < itemsCount; i++) {
+ const auto item = PySequence_Fast_GET_ITEM(fast.Get(), i);
+ *tuple_items++ = FromPyObject(ctx, inspector.GetElementType(i), item);
+ }
+
+ return tuple;
+ }
+
+ throw yexception() << "Expected Tuple or Sequence but got: " << PyObjectRepr(value);
+}
+
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple.h b/yql/essentials/udfs/common/python/bindings/py_tuple.h
new file mode 100644
index 00000000000..7d66af9b011
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_tuple.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+TPyObjectPtr ToPyTuple(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyTuple(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type, PyObject* value);
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp
new file mode 100644
index 00000000000..a6b9b6cc3e4
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp
@@ -0,0 +1,108 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyTupleTest) {
+ Y_UNIT_TEST(FromPyEmptyTuple) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTuple<>>(
+ "def Test(): return ()",
+ [](const NUdf::TUnboxedValuePod&) {});
+ }
+
+ Y_UNIT_TEST(FromPyList) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTuple<int, int, int>>(
+ "def Test(): return [1, 2, 3]",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1);
+ UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2);
+ UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyIter) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTuple<int, int, int>>(
+ "def Test(): return iter({1, 2, 3})",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1);
+ UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2);
+ UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyTuple) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTuple<int, double, char*>>(
+ "def Test(): return (1, float(2.3), '4')",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1);
+ auto second = value.GetElement(1);
+ UNIT_ASSERT_DOUBLES_EQUAL(second.Get<double>(), 2.3, 0.0001);
+ const auto third = value.GetElement(2);
+ UNIT_ASSERT_EQUAL(third.AsStringRef(), "4");
+ });
+ }
+
+ Y_UNIT_TEST(FromPyTupleInTuple) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTuple<ui32, NUdf::TTuple<ui8, float>, char*>>(
+ "def Test(): return (1, (2, float(3.4)), '5')",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetElement(0).Get<ui32>(), 1);
+
+ auto second = value.GetElement(1);
+ UNIT_ASSERT(second);
+ UNIT_ASSERT(second.IsBoxed());
+ UNIT_ASSERT_EQUAL(second.GetElement(0).Get<ui8>(), 2);
+ UNIT_ASSERT_DOUBLES_EQUAL(
+ second.GetElement(1).Get<float>(), 3.4, 0.0001);
+
+ const auto third = value.GetElement(2);
+ UNIT_ASSERT_EQUAL(third.AsStringRef(), "5");
+ });
+ }
+
+ Y_UNIT_TEST(ToPyEmptyTuple) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TTuple<>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ NUdf::TUnboxedValue* items = nullptr;
+ return vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items);
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert len(value) == 0\n"
+ " assert value == ()\n");
+ }
+
+ Y_UNIT_TEST(ToPyTuple) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TTuple<NUdf::TUtf8, ui64, ui8, float>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ NUdf::TUnboxedValue* items = nullptr;
+ auto tuple = vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items);
+ items[0] = vb.NewString("111");
+ items[1] = NUdf::TUnboxedValuePod((ui64) 2);
+ items[2] = NUdf::TUnboxedValuePod((ui8) 3);
+ items[3] = NUdf::TUnboxedValuePod((float) 4.5);
+ return tuple;
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert len(value) == 4\n"
+ " assert value == ('111', 2, 3, 4.5)\n");
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp
new file mode 100644
index 00000000000..e9f5971c78f
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp
@@ -0,0 +1,85 @@
+#include "py_variant.h"
+#include "ut3/py_test_engine.h"
+#include <yql/essentials/minikql/mkql_type_ops.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyTzDateTest) {
+ Y_UNIT_TEST(FromDate) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTzDate>(
+ "def Test():\n"
+ " return (2, 'Europe/Moscow')\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT_VALUES_EQUAL(value.Get<ui16>(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow"));
+ });
+ }
+
+ Y_UNIT_TEST(FromDatetime) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTzDatetime>(
+ "def Test():\n"
+ " return (2, 'Europe/Moscow')\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT_VALUES_EQUAL(value.Get<ui32>(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow"));
+ });
+ }
+
+ Y_UNIT_TEST(FromTimestamp) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTzTimestamp>(
+ "def Test():\n"
+ " return (2, 'Europe/Moscow')\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT_VALUES_EQUAL(value.Get<ui64>(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow"));
+ });
+ }
+
+ Y_UNIT_TEST(ToDate) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TTzDate>(
+ [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) {
+ auto ret = NUdf::TUnboxedValuePod((ui16)2);
+ ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow"));
+ return ret;
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert value == (2, 'Europe/Moscow')\n");
+ }
+
+ Y_UNIT_TEST(ToDatetime) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TTzDatetime>(
+ [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) {
+ auto ret = NUdf::TUnboxedValuePod((ui32)2);
+ ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow"));
+ return ret;
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert value == (2, 'Europe/Moscow')\n");
+ }
+
+ Y_UNIT_TEST(ToTimestamp) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TTzTimestamp>(
+ [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) {
+ auto ret = NUdf::TUnboxedValuePod((ui64)2);
+ ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow"));
+ return ret;
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert value == (2, 'Europe/Moscow')\n");
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_utils.cpp b/yql/essentials/udfs/common/python/bindings/py_utils.cpp
new file mode 100644
index 00000000000..d1e0e8b4846
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_utils.cpp
@@ -0,0 +1,89 @@
+#include "py_utils.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+
+#include <util/generic/yexception.h>
+#include <util/string/split.h>
+
+#include <regex>
+
+
+namespace NPython {
+
+TPyObjectPtr PyRepr(TStringBuf asciiStr, bool intern) {
+ for (auto c : asciiStr) {
+ Y_ABORT_UNLESS((c&0x80) == 0, "expected ascii");
+ }
+
+ Py_ssize_t size = static_cast<Py_ssize_t>(asciiStr.size());
+#if PY_MAJOR_VERSION >= 3
+ TPyObjectPtr pyStr = PyUnicode_FromStringAndSize(asciiStr.data(), size);
+#else
+ TPyObjectPtr pyStr = PyString_FromStringAndSize(asciiStr.data(), size);
+#endif
+ Y_ABORT_UNLESS(pyStr, "Can't get repr string");
+ if (!intern) {
+ return pyStr;
+ }
+
+ PyObject* tmp = pyStr.Release();
+#if PY_MAJOR_VERSION >= 3
+ PyUnicode_InternInPlace(&tmp);
+#else
+ PyString_InternInPlace(&tmp);
+#endif
+ return TPyObjectPtr(tmp);
+}
+
+TString PyObjectRepr(PyObject* value) {
+ static constexpr size_t maxLen = 1000;
+ static constexpr std::string_view truncSuffix = "(truncated)";
+ const TPyObjectPtr repr(PyObject_Repr(value));
+ if (!repr) {
+ return TString("repr error: ") + GetLastErrorAsString();
+ }
+
+ TString string;
+ if (!TryPyCast(repr.Get(), string)) {
+ string = "can't get repr as string";
+ }
+ if (string.size() > maxLen) {
+ string.resize(maxLen - truncSuffix.size());
+ string += truncSuffix;
+ }
+ return string;
+}
+
+bool HasEncodingCookie(const TString& source) {
+ //
+ // To define a source code encoding, a magic comment must be placed
+ // into the source files either as first or second line in the file.
+ //
+ // See https://www.python.org/dev/peps/pep-0263 for more details.
+ //
+
+ static std::regex encodingRe(
+ "^[ \\t\\v]*#.*?coding[:=][ \\t]*[-_.a-zA-Z0-9]+.*");
+
+ int i = 0;
+ for (const auto& it: StringSplitter(source).Split('\n')) {
+ if (i++ == 2) break;
+
+ TStringBuf line = it.Token();
+ if (std::regex_match(line.begin(), line.end(), encodingRe)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void PyCleanup() {
+ TPyGilLocker lock;
+ PyErr_Clear();
+ PySys_SetObject("last_type", Py_None);
+ PySys_SetObject("last_value", Py_None);
+ PySys_SetObject("last_traceback", Py_None);
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_utils.h b/yql/essentials/udfs/common/python/bindings/py_utils.h
new file mode 100644
index 00000000000..0c5ef058f1a
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_utils.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include "py_ptr.h"
+
+#include <util/generic/strbuf.h>
+
+#ifdef _win_
+#define INIT_MEMBER(member, value) value //member
+#else
+#define INIT_MEMBER(member, value) .member = (value)
+#endif
+
+namespace NPython {
+
+TPyObjectPtr PyRepr(TStringBuf asciiStr, bool intern = false);
+
+template <size_t size>
+TPyObjectPtr PyRepr(const char(&str)[size]) {
+ return PyRepr(TStringBuf(str, size - 1), true);
+}
+
+TString PyObjectRepr(PyObject* value);
+
+bool HasEncodingCookie(const TString& source);
+
+void PyCleanup();
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp
new file mode 100644
index 00000000000..ce521689b40
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp
@@ -0,0 +1,37 @@
+#include "py_utils.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyUtilsTest) {
+
+ Y_UNIT_TEST(EncodingCookie) {
+ UNIT_ASSERT(HasEncodingCookie("# -*- coding: latin-1 -*-"));
+ UNIT_ASSERT(HasEncodingCookie("# -*- coding:latin-1 -*-"));
+ UNIT_ASSERT(HasEncodingCookie("# -*- coding=latin-1 -*-"));
+ UNIT_ASSERT(HasEncodingCookie("# -*- encoding: latin-1 -*-"));
+ UNIT_ASSERT(HasEncodingCookie("# -*- encoding:latin-1 -*-"));
+ UNIT_ASSERT(HasEncodingCookie("# -*- encoding=latin-1 -*-"));
+ UNIT_ASSERT(HasEncodingCookie("# -*- coding: iso-8859-15 -*-"));
+ UNIT_ASSERT(HasEncodingCookie("# -*- coding: ascii -*-"));
+ UNIT_ASSERT(HasEncodingCookie(
+ "# This Python file uses the following encoding: utf-8"));
+
+ // encoding commend on second line
+ UNIT_ASSERT(HasEncodingCookie(
+ "#!/usr/local/bin/python\n"
+ "# -*- coding: iso-8859-15 -*-\n"
+ "print 'hello'"));
+
+ // missing "coding:" prefix
+ UNIT_ASSERT(false == HasEncodingCookie("# latin-1"));
+
+ // encoding comment not on line 1 or 2
+ UNIT_ASSERT(false == HasEncodingCookie(
+ "#!/usr/local/bin/python\n"
+ "#\n"
+ "# -*- coding: latin-1 -*-\n"));
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_variant.cpp b/yql/essentials/udfs/common/python/bindings/py_variant.cpp
new file mode 100644
index 00000000000..ab222b34323
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_variant.cpp
@@ -0,0 +1,97 @@
+#include "py_variant.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+
+
+using namespace NKikimr;
+
+namespace NPython {
+
+//////////////////////////////////////////////////////////////////////////////
+// public functions
+//////////////////////////////////////////////////////////////////////////////
+TPyObjectPtr ToPyVariant(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ auto& th = *castCtx->PyCtx->TypeInfoHelper;
+ NUdf::TVariantTypeInspector varInsp(th, type);
+ const NUdf::TType* subType = varInsp.GetUnderlyingType();
+ ui32 index = value.GetVariantIndex();
+ auto item = value.GetVariantItem();
+
+ const NUdf::TType* itemType = nullptr;
+ if (auto tupleInsp = NUdf::TTupleTypeInspector(th, subType)) {
+ itemType = tupleInsp.GetElementType(index);
+ TPyObjectPtr pyIndex = PyCast<ui32>(index);
+ TPyObjectPtr pyItem = ToPyObject(castCtx, itemType, item);
+ return PyTuple_Pack(2, pyIndex.Get(), pyItem.Get());
+ } else if (auto structInsp = NUdf::TStructTypeInspector(th, subType)) {
+ itemType = structInsp.GetMemberType(index);
+ TPyObjectPtr pyName = ToPyUnicode<NUdf::TStringRef>(
+ structInsp.GetMemberName(index));
+ TPyObjectPtr pyItem = ToPyObject(castCtx, itemType, item);
+ return PyTuple_Pack(2, pyName.Get(), pyItem.Get());
+ }
+
+ throw yexception() << "Cannot get Variant item type";
+}
+
+NUdf::TUnboxedValue FromPyVariant(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ PyObject* value)
+{
+ PY_ENSURE(PyTuple_Check(value),
+ "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name);
+
+ Py_ssize_t tupleSize = PyTuple_GET_SIZE(value);
+ PY_ENSURE(tupleSize == 2,
+ "Expected to get Tuple with 2 elements, but got "
+ << tupleSize << " elements");
+
+ auto& th = *castCtx->PyCtx->TypeInfoHelper;
+ NUdf::TVariantTypeInspector varInsp(th, type);
+ const NUdf::TType* subType = varInsp.GetUnderlyingType();
+
+ PyObject* el0 = PyTuple_GET_ITEM(value, 0);
+ PyObject* el1 = PyTuple_GET_ITEM(value, 1);
+
+ ui32 index;
+ NUdf::TStringRef name;
+ if (TryPyCast(el0, index)) {
+ if (auto tupleInsp = NUdf::TTupleTypeInspector(th, subType)) {
+ PY_ENSURE(index < tupleInsp.GetElementsCount(),
+ "Index must be < " << tupleInsp.GetElementsCount()
+ << ", but got " << index);
+ auto* itemType = tupleInsp.GetElementType(index);
+ return castCtx->ValueBuilder->NewVariant(index, FromPyObject(castCtx, itemType, el1));
+ } else {
+ throw yexception() << "Cannot convert " << PyObjectRepr(value)
+ << " underlying Variant type is not a Tuple";
+ }
+ } else if (TryPyCast(el0, name)) {
+ if (auto structInsp = NUdf::TStructTypeInspector(th, subType)) {
+ ui32 index = structInsp.GetMemberIndex(name);
+ PY_ENSURE(index < structInsp.GetMembersCount(),
+ "Unknown member name: " << TStringBuf(name));
+ auto* itemType = structInsp.GetMemberType(index);
+ return castCtx->ValueBuilder->NewVariant(index, FromPyObject(castCtx, itemType, el1));
+ } else {
+ throw yexception() << "Cannot convert " << PyObjectRepr(value)
+ << " underlying Variant type is not a Struct";
+ }
+ } else {
+ throw yexception()
+ << "Expected first Tuple element to either be an int "
+ "or a str, but got " << Py_TYPE(el0)->tp_name;
+ }
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_variant.h b/yql/essentials/udfs/common/python/bindings/py_variant.h
new file mode 100644
index 00000000000..ca97123183b
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_variant.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "py_ctx.h"
+
+namespace NPython {
+
+TPyObjectPtr ToPyVariant(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyVariant(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ PyObject* value);
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp
new file mode 100644
index 00000000000..77ab9bc6e8a
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp
@@ -0,0 +1,101 @@
+#include "py_variant.h"
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyVariantTest) {
+ Y_UNIT_TEST(FromPyWithIndex) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TVariant<float, ui32, char*>>(
+ "def Test():\n"
+ " return (2, 'hello')\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT_EQUAL(value.GetVariantIndex(), 2);
+ auto item = value.GetVariantItem();
+ UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "hello");
+ });
+ }
+
+ Y_UNIT_TEST(FromPyWithName) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ NUdf::TType* personType = engine.GetTypeBuilder().Struct()->
+ AddField<ui32>("age", &ageIdx)
+ .AddField<char*>("name", &nameIdx)
+ .Build();
+
+ NUdf::TType* variantType = engine.GetTypeBuilder()
+ .Variant()->Over(personType).Build();
+
+ engine.ToMiniKQL(
+ variantType,
+ "def Test():\n"
+ " return ('age', 99)\n",
+ [ageIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT_EQUAL(value.GetVariantIndex(), ageIdx);
+ auto item = value.GetVariantItem();
+ UNIT_ASSERT_EQUAL(item.Get<ui32>(), 99);
+ });
+
+ engine.ToMiniKQL(
+ variantType,
+ "def Test():\n"
+ " return ('name', 'Jamel')\n",
+ [nameIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT_EQUAL(value.GetVariantIndex(), nameIdx);
+ auto item = value.GetVariantItem();
+ UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "Jamel");
+ });
+ }
+
+ Y_UNIT_TEST(ToPyWithIndex) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TVariant<float, ui32, char*>>(
+ [](const TType* /*type*/, const NUdf::IValueBuilder& vb) {
+ return vb.NewVariant(1, NUdf::TUnboxedValuePod((ui32) 42));
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert value == (1, 42)\n");
+ }
+
+ Y_UNIT_TEST(ToPyWithName) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ NUdf::TType* personType = engine.GetTypeBuilder().Struct()->
+ AddField<ui32>("age", &ageIdx)
+ .AddField<NUdf::TUtf8>("name", &nameIdx)
+ .Build();
+
+ NUdf::TType* variantType = engine.GetTypeBuilder()
+ .Variant()->Over(personType).Build();
+
+ engine.ToPython(
+ variantType,
+ [ageIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) {
+ return vb.NewVariant(ageIdx, NUdf::TUnboxedValuePod((ui32) 99));
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert value == ('age', 99)\n"
+ );
+
+ engine.ToPython(
+ variantType,
+ [nameIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) {
+ return vb.NewVariant(nameIdx, vb.NewString("Jamel"));
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert value == ('name', 'Jamel')\n"
+ );
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_void.cpp b/yql/essentials/udfs/common/python/bindings/py_void.cpp
new file mode 100644
index 00000000000..ef72c052fbc
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_void.cpp
@@ -0,0 +1,117 @@
+#include "py_void.h"
+#include "py_errors.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+namespace {
+
+static PyObject* VoidRepr(PyObject*) {
+ return PyRepr("yql.Void").Release();
+}
+
+static void VoidDealloc(PyObject*) {
+ Py_FatalError("Deallocating yql.Void");
+}
+
+} // namespace
+
+PyTypeObject PyVoidType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.Void"),
+ INIT_MEMBER(tp_basicsize , 0),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , VoidDealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , VoidRepr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , 0),
+ INIT_MEMBER(tp_doc , "yql.Void object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , nullptr),
+ INIT_MEMBER(tp_iternext , nullptr),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+PyObject PyVoidObject = {
+ _PyObject_EXTRA_INIT
+ 1, &PyVoidType
+};
+
+TPyObjectPtr ToPyVoid(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ Y_UNUSED(ctx);
+ Y_UNUSED(type);
+ Y_UNUSED(value);
+ return TPyObjectPtr(&PyVoidObject, TPyObjectPtr::ADD_REF);
+}
+
+NUdf::TUnboxedValue FromPyVoid(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ PyObject* value)
+{
+ Y_UNUSED(ctx);
+ Y_UNUSED(type);
+ Y_UNUSED(value);
+ PY_ENSURE(value == &PyVoidObject, "Expected object of yql.Void type");
+ return NUdf::TUnboxedValue::Void();
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_void.h b/yql/essentials/udfs/common/python/bindings/py_void.h
new file mode 100644
index 00000000000..3c8203ab6e8
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_void.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+extern PyTypeObject PyVoidType;
+extern PyObject PyVoidObject;
+
+TPyObjectPtr ToPyVoid(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyVoid(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ PyObject* value);
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp
new file mode 100644
index 00000000000..7fbeca20437
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp
@@ -0,0 +1,37 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyVoidTest) {
+ Y_UNIT_TEST(FromPython) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<void>(
+ "import yql\n"
+ "\n"
+ "def Test():\n"
+ " return yql.Void\n",
+ [](const NUdf::TUnboxedValue& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(false == value.IsBoxed());
+ });
+ }
+
+ Y_UNIT_TEST(ToPython) {
+ TPythonTestEngine engine;
+ engine.ToPython<void>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValue::Void();
+ },
+ "import yql\n"
+ "\n"
+ "def Test(value):\n"
+ " assert str(value) == 'yql.Void'\n"
+ " assert repr(value) == 'yql.Void'\n"
+ " assert isinstance(value, yql.TVoid)\n"
+ " assert value is yql.Void\n");
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp b/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp
new file mode 100644
index 00000000000..5d1497f7c76
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp
@@ -0,0 +1,251 @@
+#include "py_yql_module.h"
+
+#include "py_void.h"
+#include "py_iterator.h"
+#include "py_list.h"
+#include "py_dict.h"
+#include "py_stream.h"
+#include "py_utils.h"
+#include "py_callable.h"
+
+#include <library/cpp/resource/resource.h>
+#include <yql/essentials/udfs/common/python/python_udf/python_udf.h>
+
+namespace NPython {
+
+static PyMethodDef ModuleMethods[] = {
+ { nullptr, nullptr, 0, nullptr } /* sentinel */
+};
+
+#define MODULE_NAME "yql"
+
+#if PY_MAJOR_VERSION >= 3
+#define MODULE_NAME_TYPING "yql.typing"
+#endif
+
+#define MODULE_INITIALIZED_ATTRIBUTE "_initialized"
+
+PyDoc_STRVAR(ModuleDoc,
+ "This module provides YQL specific types for Python.");
+
+#if PY_MAJOR_VERSION >= 3
+PyDoc_STRVAR(ModuleDocTyping,
+ "This module provides annotations for YQL types for Python.");
+#endif
+
+PyDoc_STRVAR(StopIterationException_doc,
+ "Can be throwed to yield stream iteration.");
+
+#define PREPARE_TYPE(Name, Type) \
+ do { \
+ if (PyType_Ready(Type) < 0) { \
+ throw yexception() << "Can't prepare type: " << (Name); \
+ } \
+ } while (0)
+
+#define REGISTER_TYPE(Name, Type) \
+ do { \
+ PREPARE_TYPE(Name, Type); \
+ Py_INCREF(Type); \
+ if (PyModule_AddObject(module, (Name), (PyObject*) Type) < 0) { \
+ throw yexception() << "Can't add type: " << (Name); \
+ } \
+ } while (0)
+
+#define REGISTER_OBJECT(Name, Object) \
+ do { \
+ if (PyDict_SetItemString(dict, (Name), (PyObject *) (Object)) < 0) \
+ throw yexception() << "Can't register object: " << (Name); \
+ } while (0)
+
+#define REGISTER_EXCEPTION(Name, Object, Doc) \
+ do { \
+ if (!Object) { \
+ Object = PyErr_NewExceptionWithDoc((char*) MODULE_NAME "." Name, Doc, nullptr, nullptr); \
+ if (!Object) { \
+ throw yexception() << "Can't register exception: " << (Name); \
+ } \
+ REGISTER_OBJECT(Name, Object); \
+ } \
+ } while (0)
+
+#if PY_MAJOR_VERSION >= 3
+static PyModuleDef ModuleDefinition = {
+ PyModuleDef_HEAD_INIT,
+ INIT_MEMBER(m_name, MODULE_NAME),
+ INIT_MEMBER(m_doc, ModuleDoc),
+ INIT_MEMBER(m_size, -1),
+ INIT_MEMBER(m_methods, ModuleMethods),
+ INIT_MEMBER(m_slots, nullptr),
+ INIT_MEMBER(m_traverse, nullptr),
+ INIT_MEMBER(m_clear, nullptr),
+ INIT_MEMBER(m_free, nullptr),
+};
+
+static PyModuleDef ModuleDefinitionTyping = {
+ PyModuleDef_HEAD_INIT,
+ INIT_MEMBER(m_name, MODULE_NAME_TYPING),
+ INIT_MEMBER(m_doc, ModuleDocTyping),
+ INIT_MEMBER(m_size, -1),
+ INIT_MEMBER(m_methods, nullptr),
+ INIT_MEMBER(m_slots, nullptr),
+ INIT_MEMBER(m_traverse, nullptr),
+ INIT_MEMBER(m_clear, nullptr),
+ INIT_MEMBER(m_free, nullptr),
+};
+
+PyMODINIT_FUNC PyInit_YQL(void)
+{
+ auto mod = PyModule_Create(&ModuleDefinition);
+ PyModule_AddObject(mod, "__path__", Py_BuildValue("()"));
+ return mod;
+}
+
+void go_throw();
+
+PyMODINIT_FUNC PyInit_YQLTyping(void)
+{
+ return PyModule_Create(&ModuleDefinitionTyping);
+}
+#else
+PyMODINIT_FUNC PyInit_YQL(void)
+{
+ Py_InitModule3(MODULE_NAME, ModuleMethods, ModuleDoc);
+}
+#endif
+
+void PrepareYqlModule() {
+ PyImport_AppendInittab(MODULE_NAME, &PyInit_YQL);
+#if PY_MAJOR_VERSION >= 3
+ PyImport_AppendInittab(MODULE_NAME_TYPING, &PyInit_YQLTyping);
+#endif
+}
+
+#if PY_MAJOR_VERSION >= 3
+void RegisterRuntimeModule(const char* name, PyObject* module) {
+ if (!module || !PyModule_Check(module)) {
+ throw yexception() << "Invalid object for module " << name;
+ }
+
+ // borrowed reference
+ PyObject* modules = PyImport_GetModuleDict();
+ if (!modules || !PyDict_CheckExact(modules)) {
+ throw yexception() << "Can't get sys.modules dictionary";
+ }
+
+ if (PyDict_SetItemString(modules, name, module) < 0) {
+ throw yexception() << "Can't register module " << name;
+ }
+}
+#endif
+
+void InitYqlModule(NYql::NUdf::EPythonFlavor pythonFlavor, bool standalone) {
+ TPyObjectPtr m = PyImport_ImportModule(MODULE_NAME);
+ if (!standalone && !m) {
+ PyErr_Clear();
+#if PY_MAJOR_VERSION >= 3
+ m = PyInit_YQL();
+ RegisterRuntimeModule(MODULE_NAME, m.Get());
+#else
+ PyInit_YQL();
+#endif
+ m = PyImport_ImportModule(MODULE_NAME);
+ }
+
+ PyObject* module = m.Get();
+
+ if (!module) {
+ throw yexception() << "Can't get YQL module.";
+ }
+
+ TPyObjectPtr initialized = PyObject_GetAttrString(module, MODULE_INITIALIZED_ATTRIBUTE);
+ if (!initialized) {
+ PyErr_Clear();
+ } else if (initialized.Get() == Py_True) {
+ return;
+ }
+
+ PyObject* dict = PyModule_GetDict(module);
+
+ REGISTER_TYPE("TVoid", &PyVoidType);
+ REGISTER_OBJECT("Void", &PyVoidObject);
+
+ PREPARE_TYPE("TIterator", &PyIteratorType);
+ PREPARE_TYPE("TPairIterator", &PyPairIteratorType);
+
+ PREPARE_TYPE("TDict", &PyLazyDictType);
+ PREPARE_TYPE("TSet", &PyLazySetType);
+
+ PREPARE_TYPE("TLazyListIterator", &PyLazyListIteratorType);
+ PREPARE_TYPE("TLazyList", &PyLazyListType);
+ PREPARE_TYPE("TThinListIterator", &PyThinListIteratorType);
+ PREPARE_TYPE("TThinList", &PyThinListType);
+
+ PREPARE_TYPE("TStream", &PyStreamType);
+ PREPARE_TYPE("TCallable", &PyCallableType);
+
+ REGISTER_EXCEPTION("TYieldIteration", PyYieldIterationException, StopIterationException_doc);
+
+#if PY_MAJOR_VERSION >= 3
+ if (pythonFlavor == NYql::NUdf::EPythonFlavor::Arcadia) {
+ if (!standalone) {
+ TPyObjectPtr typingModule = PyImport_ImportModule(MODULE_NAME_TYPING);
+ if (!typingModule) {
+ PyErr_Clear();
+ typingModule = PyInit_YQLTyping();
+ RegisterRuntimeModule(MODULE_NAME_TYPING, typingModule.Get());
+ }
+ }
+
+ const auto typing = NResource::Find(TStringBuf("typing.py"));
+ const auto rc = PyRun_SimpleStringFlags(typing.c_str(), nullptr);
+
+ if (rc < 0) {
+ // Not sure if PyErr_Print() works after PyRun_SimpleStringFlags,
+ // but just in case...
+ PyErr_Print();
+ ythrow yexception() << "Can't parse YQL type annotations module";
+ }
+
+ auto processError = [&] (PyObject* obj, TStringBuf message) {
+ if (obj) {
+ return;
+ }
+ PyObject *ptype, *pvalue, *ptraceback;
+ PyErr_Fetch(&ptype, &pvalue, &ptraceback);
+ if (pvalue) {
+ auto pstr = PyObject_Str(pvalue);
+ if (pstr) {
+ if (auto err_msg = PyUnicode_AsUTF8(pstr)) {
+ Cerr << err_msg << Endl;
+ }
+ }
+ PyErr_Restore(ptype, pvalue, ptraceback);
+ }
+ ythrow yexception() << "Can't setup YQL type annotations module: " << message;
+ };
+
+ auto main = PyImport_ImportModule("__main__");
+ processError(main, "PyImport_ImportModule");
+ auto function = PyObject_GetAttrString(main, "main");
+ processError(function, "PyObject_GetAttrString");
+ auto args = PyTuple_New(0);
+ processError(args, "PyTuple_New");
+ auto result = PyObject_CallObject(function, args);
+ processError(result, "PyObject_CallObject");
+
+ Py_DECREF(result);
+ Py_DECREF(args);
+ Py_DECREF(function);
+ Py_DECREF(main);
+ }
+#endif
+
+ REGISTER_OBJECT(MODULE_INITIALIZED_ATTRIBUTE, Py_True);
+}
+
+void TermYqlModule() {
+ PyYieldIterationException = nullptr;
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_yql_module.h b/yql/essentials/udfs/common/python/bindings/py_yql_module.h
new file mode 100644
index 00000000000..970471d029e
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_yql_module.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <yql/essentials/udfs/common/python/python_udf/python_udf.h>
+
+namespace NPython {
+
+void PrepareYqlModule();
+void InitYqlModule(NYql::NUdf::EPythonFlavor pythonFlavor, bool standalone = true);
+void TermYqlModule();
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/typing.py b/yql/essentials/udfs/common/python/bindings/typing.py
new file mode 100644
index 00000000000..0e53ad1e0a4
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/typing.py
@@ -0,0 +1,188 @@
+def main():
+ import importlib.abc
+ import importlib.machinery
+ import sys
+
+ class Finder(importlib.abc.MetaPathFinder):
+ def find_spec(self, fullname, path, target=None):
+ if fullname in sys.builtin_module_names:
+ return importlib.machinery.ModuleSpec(
+ fullname,
+ importlib.machinery.BuiltinImporter,
+ )
+
+ sys.meta_path.append(Finder())
+
+ try:
+ import yandex.type_info.type_base as ti_base
+ import yandex.type_info.typing as ti_typing
+ import six
+ except ImportError as e:
+ raise ImportError(
+ str(e) + ". Make sure that library/python/type_info is in your PEERDIR list"
+ )
+
+ from yql import typing
+
+ AutoMap = ti_base.make_primitive_type("AutoMap")
+
+ def _format_arg(arg):
+ res = []
+ if arg[0]:
+ res.append("{}:".format(ti_base.quote_string(arg[0])))
+ res.append(str(arg[1]))
+ if arg[2]:
+ res.append("{Flags:")
+ res.append(",".join(str(x) for x in sorted(list(arg[2]))))
+ res.append("}")
+ return "".join(res)
+
+ Stream = ti_typing._SingleArgumentGeneric("Stream")
+
+ @six.python_2_unicode_compatible
+ class GenericResourceAlias(ti_base.Type):
+ REQUIRED_ATTRS = ti_base.Type.REQUIRED_ATTRS + ["tag"]
+
+ def __str__(self):
+ return u"{}<{}>".format(self.name, ti_base.quote_string(self.tag))
+
+ def to_yson_type(self):
+ return {"type_name": self.yt_type_name, "tag": self.tag}
+
+ class GenericResource(ti_base.Generic):
+ def __getitem__(self, params):
+ if not isinstance(params, str):
+ raise ValueError("Expected str, but got: {}".format(ti_base._with_type(params)))
+
+ attrs = {
+ "name": self.name,
+ "yt_type_name": self.yt_type_name,
+ "tag": params,
+ }
+
+ return GenericResourceAlias(attrs)
+
+ def from_dict(self):
+ raise NotImplementedError()
+
+ Resource = GenericResource("Resource")
+
+ def _extract_arg_info(param):
+ name = ""
+ arg_type = param
+ flags = set()
+ if isinstance(param, slice):
+ name = param.start
+ if name is None:
+ name = ""
+ if not isinstance(name, str):
+ raise ValueError("Expected str as argument name but got: {}".format(ti_base._with_type(name)))
+ arg_type = param.stop
+ ti_base.validate_type(arg_type)
+ if param.step is not None:
+ for x in param.step:
+ if x != AutoMap:
+ raise ValueError("Expected AutoMap as parameter flag but got: {}".format(ti_base._with_type(x)))
+ flags.add(x)
+ else:
+ ti_base.validate_type(arg_type)
+ return (name, arg_type, flags)
+
+ @six.python_2_unicode_compatible
+ class GenericCallableAlias(ti_base.Type):
+ def __str__(self):
+ return ("Callable<(" +
+ ",".join(_format_arg(x) for x in self.args[:len(self.args)-self.optional_args]) +
+ ("," if len(self.args) > self.optional_args and self.optional_args else "") +
+ ("[" if self.optional_args else "") +
+ ",".join(_format_arg(x) for x in self.args[len(self.args)-self.optional_args:]) +
+ ("]" if self.optional_args else "") +
+ ")->" + str(getattr(self, "return")) + ">")
+
+ def to_yson_type(self):
+ yson_repr = {
+ "optional_args": self.optional_args,
+ "return": getattr(self, "return"),
+ "args": self.args,
+ "type_name": self.yt_type_name,
+ }
+ return yson_repr
+
+
+ class GenericCallable(ti_base.Generic):
+ def __getitem__(self, params):
+ if not isinstance(params, tuple) or len(params) < 2 or not isinstance(params[0], int) or not ti_typing.is_valid_type(params[1]):
+ raise ValueError("Expected at least two arguments (integer and type of return value) but got: {}".format(ti_base._with_type(params)))
+ args = []
+ for param in params[2:]:
+ name, arg_type, flags = _extract_arg_info(param)
+ args.append((name, arg_type, flags))
+
+ if params[0] < 0 or params[0] > len(args):
+ raise ValueError("Optional argument count - " + str(params[0]) + " out of range [0.." + str(len(args)) + "]")
+
+ attrs = {
+ "optional_args": params[0],
+ "return": params[1],
+ "args": args,
+ "name": "Tagged",
+ "yt_type_name": "tagged",
+ }
+
+ return GenericCallableAlias(attrs)
+
+ def from_dict(self):
+ raise NotImplementedError()
+
+ Callable = GenericCallable("Callable")
+
+ def parse_slice_arg(arg):
+ try:
+ return _format_arg(_extract_arg_info(arg))
+ except ValueError:
+ pass
+
+ typing.Type = ti_base.Type
+ typing.is_valid_type = ti_base.is_valid_type
+ typing.parse_slice_arg = parse_slice_arg
+
+ typing.Bool = ti_typing.Bool
+ typing.Int8 = ti_typing.Int8
+ typing.Uint8 = ti_typing.Uint8
+ typing.Int16 = ti_typing.Int16
+ typing.Uint16 = ti_typing.Uint16
+ typing.Int32 = ti_typing.Int32
+ typing.Uint32 = ti_typing.Uint32
+ typing.Int64 = ti_typing.Int64
+ typing.Uint64 = ti_typing.Uint64
+ typing.Float = ti_typing.Float
+ typing.Double = ti_typing.Double
+ typing.String = ti_typing.String
+ typing.Utf8 = ti_typing.Utf8
+ typing.Yson = ti_typing.Yson
+ typing.Json = ti_typing.Json
+ typing.Uuid = ti_typing.Uuid
+ typing.Date = ti_typing.Date
+ typing.Datetime = ti_typing.Datetime
+ typing.Timestamp = ti_typing.Timestamp
+ typing.Interval = ti_typing.Interval
+ typing.TzDate = ti_typing.TzDate
+ typing.TzDatetime = ti_typing.TzDatetime
+ typing.TzTimestamp = ti_typing.TzTimestamp
+ typing.Void = ti_typing.Void
+ typing.Null = ti_typing.Null
+ typing.EmptyTuple = ti_typing.EmptyTuple
+ typing.EmptyStruct = ti_typing.EmptyStruct
+ typing.Optional = ti_typing.Optional
+ typing.List = ti_typing.List
+ typing.Dict = ti_typing.Dict
+ typing.Tuple = ti_typing.Tuple
+ typing.Struct = ti_typing.Struct
+ typing.Variant = ti_typing.Variant
+ typing.Tagged = ti_typing.Tagged
+ typing.Decimal = ti_typing.Decimal
+
+ typing.Stream = Stream
+ typing.Resource = Resource
+ typing.Callable = Callable
+ typing.AutoMap = AutoMap
diff --git a/yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h b/yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h
new file mode 100644
index 00000000000..a36e19fa32f
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h
@@ -0,0 +1,227 @@
+#pragma once
+
+#include "py_cast.h"
+#include "py_yql_module.h"
+#include "py_utils.h"
+
+#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
+#include <yql/essentials/minikql/mkql_type_builder.h>
+#include <yql/essentials/minikql/computation/mkql_value_builder.h>
+#include <yql/essentials/udfs/common/python/python_udf/python_udf.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#define PYTHON_TEST_TAG "Python2Test"
+
+
+using namespace NKikimr;
+using namespace NMiniKQL;
+
+namespace NPython {
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyInitializer
+//////////////////////////////////////////////////////////////////////////////
+struct TPyInitializer {
+ TPyInitializer() {
+ PrepareYqlModule();
+ Py_Initialize();
+ InitYqlModule(NYql::NUdf::EPythonFlavor::Arcadia);
+ }
+ ~TPyInitializer() {
+ TermYqlModule();
+ Py_Finalize();
+ }
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPythonTestEngine
+//////////////////////////////////////////////////////////////////////////////
+class TPythonTestEngine {
+public:
+ TPythonTestEngine()
+ : MemInfo_("Memory")
+ , Alloc_(__LOCATION__)
+ , Env_(Alloc_)
+ , TypeInfoHelper_(new TTypeInfoHelper)
+ , FunctionInfoBuilder_(Env_, TypeInfoHelper_, "", nullptr, {})
+ {
+ HolderFactory_ = MakeHolder<THolderFactory>(
+ Alloc_.Ref(),
+ MemInfo_,
+ nullptr);
+ ValueBuilder_ = MakeHolder<TDefaultValueBuilder>(*HolderFactory_, NUdf::EValidatePolicy::Exception);
+ BindTerminator_ = MakeHolder<TBindTerminator>(ValueBuilder_.Get());
+ Singleton<TPyInitializer>();
+ CastCtx_ = MakeIntrusive<TPyCastContext>(&GetValueBuilder(),
+ MakeIntrusive<TPyContext>(TypeInfoHelper_.Get(), NUdf::TStringRef::Of(PYTHON_TEST_TAG), NUdf::TSourcePosition())
+ );
+ }
+
+ ~TPythonTestEngine() {
+ PyCleanup();
+ }
+
+ NUdf::IFunctionTypeInfoBuilder& GetTypeBuilder() {
+ return FunctionInfoBuilder_;
+ }
+
+ const NUdf::IValueBuilder& GetValueBuilder() const {
+ return *ValueBuilder_;
+ }
+
+ template <typename TChecker>
+ void ToMiniKQL(NUdf::TType* udfType, const TStringBuf& script, TChecker&& checker) {
+ TPyObjectPtr result = RunPythonFunction(script);
+ UNIT_ASSERT_C(!!result, script);
+
+ TType* type = static_cast<TType*>(udfType);
+ auto value = FromPyObject(CastCtx_, type, result.Get());
+ checker(value);
+ }
+
+ template <typename TExpectedType, typename TChecker>
+ void ToMiniKQL(const TStringBuf& script, TChecker&& checker) {
+ auto type = GetTypeBuilder().SimpleType<TExpectedType>();
+ ToMiniKQL<TChecker>(type, script, std::move(checker));
+ }
+
+ template <typename TChecker>
+ void ToMiniKQLWithArg(
+ NUdf::TType* udfType, PyObject* argValue,
+ const TStringBuf& script, TChecker&& checker)
+ {
+ TPyObjectPtr args = Py_BuildValue("(O)", argValue);
+
+ auto result = RunPythonFunction(script, args.Get());
+ if (!result || PyErr_Occurred()) {
+ PyErr_Print();
+ UNIT_FAIL("function execution error");
+ }
+
+ TType* type = static_cast<TType*>(udfType);
+ auto value = FromPyObject(CastCtx_, type, result.Get());
+ checker(value);
+ }
+
+ template <typename TExpectedType, typename TChecker>
+ void ToMiniKQLWithArg(
+ PyObject* argValue,
+ const TStringBuf& script, TChecker&& checker)
+ {
+ auto type = GetTypeBuilder().SimpleType<TExpectedType>();
+ ToMiniKQLWithArg<TChecker>(type, argValue, script, std::move(checker));
+ }
+
+ template <typename TMiniKQLValueBuilder>
+ TPyObjectPtr ToPython(
+ NUdf::TType* udfType,
+ TMiniKQLValueBuilder&& builder,
+ const TStringBuf& script)
+ {
+ try {
+ TType* type = static_cast<TType*>(udfType);
+ NUdf::TUnboxedValue value = builder(type, GetValueBuilder());
+ TPyObjectPtr pyValue = ToPyObject(CastCtx_, type, value);
+ if (!pyValue || PyErr_Occurred()) {
+ PyErr_Print();
+ UNIT_FAIL("object execution error");
+ }
+ TPyObjectPtr args = Py_BuildValue("(O)", pyValue.Get());
+
+ auto result = RunPythonFunction(script, args.Get());
+ if (!result || PyErr_Occurred()) {
+ PyErr_Print();
+ UNIT_FAIL("function execution error");
+ }
+ return result;
+ } catch (const yexception& e) {
+ Cerr << e << Endl;
+ UNIT_FAIL("cast error");
+ }
+
+ Py_RETURN_NONE;
+ }
+
+ template <typename TExpectedType, typename TMiniKQLValueBuilder>
+ TPyObjectPtr ToPython(TMiniKQLValueBuilder&& builder, const TStringBuf& script) {
+ auto type = GetTypeBuilder().SimpleType<TExpectedType>();
+ return ToPython<TMiniKQLValueBuilder>(type, std::move(builder), script);
+ }
+
+ NUdf::TUnboxedValue FromPython(NUdf::TType* udfType, const TStringBuf& script) {
+ auto result = RunPythonFunction(script);
+ if (!result || PyErr_Occurred()) {
+ PyErr_Print();
+ UNIT_FAIL("function execution error");
+ }
+
+ TType* type = static_cast<TType*>(udfType);
+ return FromPyObject(CastCtx_, type, result.Get());
+ }
+
+ template <typename TExpectedType>
+ NUdf::TUnboxedValue FromPython(const TStringBuf& script) {
+ auto type = GetTypeBuilder().SimpleType<TExpectedType>();
+ return FromPython(type, script);
+ }
+
+ template <typename TArgumentType, typename TReturnType = TArgumentType, typename TMiniKQLValueBuilder>
+ NUdf::TUnboxedValue ToPythonAndBack(TMiniKQLValueBuilder&& builder, const TStringBuf& script) {
+ const auto aType = GetTypeBuilder().SimpleType<TArgumentType>();
+ const auto result = ToPython<TMiniKQLValueBuilder>(aType, std::move(builder), script);
+
+ if (!result || PyErr_Occurred()) {
+ PyErr_Print();
+ UNIT_FAIL("function execution error");
+ }
+
+ const auto rType = static_cast<TType*>(GetTypeBuilder().SimpleType<TReturnType>());
+ return FromPyObject(CastCtx_, rType, result.Get());
+ }
+
+ template <typename TArgumentType, typename TReturnType = TArgumentType, typename TMiniKQLValueBuilder, typename TChecker>
+ void ToPythonAndBack(TMiniKQLValueBuilder&& builder, const TStringBuf& script, TChecker&& checker) {
+ const auto result = ToPythonAndBack<TArgumentType, TReturnType, TMiniKQLValueBuilder>(std::move(builder), script);
+ checker(result);
+ }
+
+private:
+ TPyObjectPtr RunPythonFunction(
+ const TStringBuf& script, PyObject* args = nullptr)
+ {
+ TString filename(TStringBuf("embedded:test.py"));
+ TPyObjectPtr code(Py_CompileString(script.data(), filename.data(), Py_file_input));
+ if (!code) {
+ PyErr_Print();
+ UNIT_FAIL("can't compile python script");
+ }
+
+ TString moduleName(TStringBuf("py_cast_ut"));
+ TPyObjectPtr module(PyImport_ExecCodeModule(moduleName.begin(), code.Get()));
+ if (!module) {
+ PyErr_Print();
+ UNIT_FAIL("can't create python module");
+ }
+
+ TPyObjectPtr function(PyObject_GetAttrString(module.Get(), "Test"));
+ if (!function) {
+ PyErr_Print();
+ UNIT_FAIL("function 'Test' is not found in module");
+ }
+ return PyObject_CallObject(function.Get(), args);
+ }
+
+private:
+ TMemoryUsageInfo MemInfo_;
+ TScopedAlloc Alloc_;
+ TTypeEnvironment Env_;
+ const NUdf::ITypeInfoHelper::TPtr TypeInfoHelper_;
+ TFunctionTypeInfoBuilder FunctionInfoBuilder_;
+ THolder<THolderFactory> HolderFactory_;
+ THolder<TDefaultValueBuilder> ValueBuilder_;
+ THolder<TBindTerminator> BindTerminator_;
+ TPyCastContext::TPtr CastCtx_;
+};
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/ut3/ya.make b/yql/essentials/udfs/common/python/bindings/ut3/ya.make
new file mode 100644
index 00000000000..b9d500938c7
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/ut3/ya.make
@@ -0,0 +1,37 @@
+IF (OS_LINUX)
+ IF (NOT WITH_VALGRIND)
+ UNITTEST_FOR(yql/essentials/udfs/common/python/bindings)
+
+ SRCS(
+ py_callable_ut.cpp
+ py_cast_ut.cpp
+ py_dict_ut.cpp
+ py_list_ut.cpp
+ py_decimal_ut.cpp
+ py_number_ut.cpp
+ py_optional_ut.cpp
+ py_resource_ut.cpp
+ py_stream_ut.cpp
+ py_string_ut.cpp
+ py_struct_ut.cpp
+ py_tuple_ut.cpp
+ py_tzdate_ut.cpp
+ py_utils_ut.cpp
+ py_variant_ut.cpp
+ py_void_ut.cpp
+ )
+
+ USE_PYTHON3()
+
+ PEERDIR(
+ library/python/type_info
+ yql/essentials/minikql/computation/llvm14
+ yql/essentials/public/udf/service/exception_policy
+ yql/essentials/sql/pg_dummy
+ )
+
+ YQL_LAST_ABI_VERSION()
+
+ END()
+ ENDIF()
+ENDIF()
diff --git a/yql/essentials/udfs/common/python/bindings/ya.make b/yql/essentials/udfs/common/python/bindings/ya.make
new file mode 100644
index 00000000000..efb5b475c4f
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/ya.make
@@ -0,0 +1,54 @@
+PY23_NATIVE_LIBRARY()
+
+YQL_ABI_VERSION(2 27 0)
+
+SRCS(
+ py_callable.cpp
+ py_cast.cpp
+ py_decimal.cpp
+ py_errors.cpp
+ py_dict.cpp
+ py_list.cpp
+ py_lazy_mkql_dict.cpp
+ py_lazy_mkql_list.cpp
+ py_iterator.cpp
+ py_resource.cpp
+ py_stream.cpp
+ py_struct.cpp
+ py_tuple.cpp
+ py_utils.cpp
+ py_variant.cpp
+ py_void.cpp
+ py_yql_module.cpp
+)
+
+IF (USE_SYSTEM_PYTHON AND _SYSTEM_PYTHON27)
+ # we should be able to run on python 2.7.X versions
+ # with X ranging from 3 to (at least) 15
+ #
+ # for now bindings already use some functionality from 2.7.15,
+ # which doesn't exist earlier versions
+ # (according symbols won't be loaded from system python)
+ #
+ # so we provide backported implementation for this scenario to work as intended
+ SRCS(
+ py27_backports.c
+ )
+ENDIF()
+
+RESOURCE(
+ typing.py typing.py
+)
+
+PEERDIR(
+ yql/essentials/public/udf
+ yql/essentials/utils
+)
+
+NO_COMPILER_WARNINGS()
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut3
+)
diff --git a/yql/essentials/udfs/common/python/main_py3/__main__.pyx b/yql/essentials/udfs/common/python/main_py3/__main__.pyx
new file mode 100644
index 00000000000..6f4ca943584
--- /dev/null
+++ b/yql/essentials/udfs/common/python/main_py3/__main__.pyx
@@ -0,0 +1,50 @@
+import os
+import runpy
+import importlib
+
+import __res
+
+
+cdef env_entry_point = 'Y_PYTHON_ENTRY_POINT'
+
+
+cdef extern from 'main.h':
+ pass
+
+
+def find_pymain():
+ py_main = __res.find('PY_MAIN')
+
+ if isinstance(py_main, bytes):
+ py_main = py_main.decode('utf8')
+
+ if isinstance(py_main, unicode):
+ return py_main
+
+ return None
+
+
+def run_main():
+ entry_point = os.environ.pop(env_entry_point, None)
+
+ if entry_point is None:
+ entry_point = find_pymain()
+
+ if entry_point is None:
+ raise RuntimeError('No entry point found')
+
+ module_name, colon, func_name = entry_point.partition(':')
+
+ if not colon:
+ runpy._run_module_as_main(module_name, alter_argv=False)
+ return
+
+ if not module_name:
+ module_name = 'library.python.runtime_py3.entry_points'
+
+ module = importlib.import_module(module_name)
+ func = getattr(module, func_name)
+ func()
+
+
+run_main()
diff --git a/yql/essentials/udfs/common/python/main_py3/include/main.h b/yql/essentials/udfs/common/python/main_py3/include/main.h
new file mode 100644
index 00000000000..c96402004e3
--- /dev/null
+++ b/yql/essentials/udfs/common/python/main_py3/include/main.h
@@ -0,0 +1,12 @@
+#pragma once
+#include <util/system/compiler.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+Y_PUBLIC
+int RunPython(int argc, char** argv);
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/yql/essentials/udfs/common/python/main_py3/main.cpp b/yql/essentials/udfs/common/python/main_py3/main.cpp
new file mode 100644
index 00000000000..edc3c89ca5b
--- /dev/null
+++ b/yql/essentials/udfs/common/python/main_py3/main.cpp
@@ -0,0 +1,9 @@
+#include "main.h"
+
+extern "C"
+int RunPythonImpl(int argc, char** argv);
+
+extern "C"
+int RunPython(int argc, char** argv) {
+ return RunPythonImpl(argc, argv);
+}
diff --git a/yql/essentials/udfs/common/python/main_py3/ya.make b/yql/essentials/udfs/common/python/main_py3/ya.make
new file mode 100644
index 00000000000..cc13fb77e4c
--- /dev/null
+++ b/yql/essentials/udfs/common/python/main_py3/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+USE_PYTHON3()
+
+ADDINCL(
+ yql/essentials/udfs/common/python/main_py3/include
+)
+
+SRCS(GLOBAL main.cpp)
+
+BUILDWITH_CYTHON_C(__main__.pyx --embed=RunPythonImpl)
+
+END()
diff --git a/yql/essentials/udfs/common/python/python3_small/test/canondata/result.json b/yql/essentials/udfs/common/python/python3_small/test/canondata/result.json
new file mode 100644
index 00000000000..dd55da78b53
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/canondata/result.json
@@ -0,0 +1,61 @@
+{
+ "test.test[Annotations]": [
+ {
+ "checksum": "19c6d906cb8617cf9d2b5d484e09caf8",
+ "size": 7570,
+ "uri": "https://{canondata_backend}/212715/49b4751c22bd43fa7057cc92ae5cbedb40404f40/resource.tar.gz#test.test_Annotations_/results.txt"
+ }
+ ],
+ "test.test[BytesDecodeModeStrict]": [
+ {
+ "checksum": "f8534cff0843faaf876c41e0875dcf05",
+ "size": 3120,
+ "uri": "https://{canondata_backend}/1775319/4c4fed0942b33bcc70d44f7dd2972a8e05c6db97/resource.tar.gz#test.test_BytesDecodeModeStrict_/results.txt"
+ }
+ ],
+ "test.test[Cleanup]": [
+ {
+ "checksum": "036e77892757e48fa3fb319ed324b019",
+ "size": 954,
+ "uri": "https://{canondata_backend}/1871182/9909e0b25b15bb1f21d5def23fb072d64c82f07e/resource.tar.gz#test.test_Cleanup_/results.txt"
+ }
+ ],
+ "test.test[CustomYsonConverter]": [
+ {
+ "checksum": "7716204e544d2fcb9313412c3919e66d",
+ "size": 1625,
+ "uri": "https://{canondata_backend}/1130705/576535b56a4e74992911431865e5edd0f7d55520/resource.tar.gz#test.test_CustomYsonConverter_/results.txt"
+ }
+ ],
+ "test.test[Data]": [
+ {
+ "checksum": "f40e83806b294be420681fdfbf2133e8",
+ "size": 25268,
+ "uri": "https://{canondata_backend}/1031349/7065a0985fe0cd26a754a5bee7a4c808836a4692/resource.tar.gz#test.test_Data_/results.txt"
+ }
+ ],
+ "test.test[Excepthook]": [
+ {
+ "uri": "file://test.test_Excepthook_/extracted"
+ }
+ ],
+ "test.test[GreedyInputContainers]": [
+ {
+ "checksum": "02a619c86f180e8a4c536087d64bab6d",
+ "size": 1328,
+ "uri": "https://{canondata_backend}/995452/085d43bbd16f44afc51d6cafed42465a3d20215c/resource.tar.gz#test.test_GreedyInputContainers_/results.txt"
+ }
+ ],
+ "test.test[OptionalNested]": [
+ {
+ "uri": "file://test.test_OptionalNested_/extracted"
+ }
+ ],
+ "test.test[Switch]": [
+ {
+ "checksum": "e60320702512bdcecd5c663f387ee939",
+ "size": 9172,
+ "uri": "https://{canondata_backend}/1130705/493ee46b1e8f2e848ab928f97913d332cb4fffc7/resource.tar.gz#test.test_Switch_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_Excepthook_/extracted b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_Excepthook_/extracted
new file mode 100644
index 00000000000..b260fe7616b
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_Excepthook_/extracted
@@ -0,0 +1,15 @@
+<tmp_path>/program.sql:<main>: Fatal: Execution
+
+ <tmp_path>/program.sql:<main>:44:1: Fatal: Execution of node: Result
+ SELECT $udf(@@{"abc":1}@@);
+ ^
+ <tmp_path>/program.sql:<main>:40:17: Fatal: Failed to execute:
+CUSTOM_EXCEPTHOOK
+True
+Traceback (most recent call last):
+ File "embedded:f", line 31, in f
+Exception
+
+
+ $udf = Python3::f(Callable<(String)->String>, $script);
+ ^ \ No newline at end of file
diff --git a/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_OptionalNested_/extracted b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_OptionalNested_/extracted
new file mode 100644
index 00000000000..413eb2f4ec0
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_OptionalNested_/extracted
@@ -0,0 +1,14 @@
+<tmp_path>/program.sql:<main>: Error: Type annotation
+
+ <tmp_path>/program.sql:<main>:12:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject, At function: SqlProjectItem
+ SELECT $optOptList("42");
+ ^
+ <tmp_path>/program.sql:<main>:12:8: Error: At function: Apply
+ SELECT $optOptList("42");
+ ^
+ <tmp_path>/program.sql:<main>:2:24: Error: At function: ScriptUdf
+ $optOptList = Python3::opt_opt_list(Callable<(String)->List<String>??>, @@
+ ^
+ <tmp_path>/program.sql:<main>:2:24: Error: Nested optionals are unsupported in script UDF
+ $optOptList = Python3::opt_opt_list(Callable<(String)->List<String>??>, @@
+ ^ \ No newline at end of file
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.in
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.in
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.sql
new file mode 100644
index 00000000000..3f845322e20
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.sql
@@ -0,0 +1,67 @@
+--sanitizer ignore memory
+$script = @@
+from yql.typing import *
+
+def primitive(a0:Bool,a1:Int8,a2:Uint8,a3:Int16,a4:Uint16,a5:Int32,a6:Uint32,
+ a7:Int64,a8:Uint64,a9:Float,a10:Double,a11:String,a12:Utf8,a13:Yson,a14:Json,
+ a15:Uuid,a16:Date,a17:Datetime,a18:Timestamp,a19:Interval,a20:TzDate,
+ a21:TzDatetime,a22:TzTimestamp)->Decimal(10,3):
+ pass
+
+def singletons(a0:Void,a1:Null,a2:EmptyStruct,a3:EmptyTuple)->Void:
+ pass
+
+def containers(a0:Optional[Int32],a1:List[List[Bool]],a2:Stream[String],a3:Dict[Int32,String],
+ a4:Tuple[Int32,String],a5:Tuple[Int32],a6:Struct["a":Int32,"b":String],a7:Struct["a":Int32],
+ a8:Variant[Int32,String],a9:Variant[Int32],a10:Variant["a":Int32,"b":String],a11:Variant["a":Int32])->List[String]:
+ pass
+
+def special(a0:Resource["Python3"],a1:Tagged[Int32,"foo"])->Void:
+ pass
+
+def c0()->Callable[0,Int32]: pass
+def c1()->Callable[1,Int32,Optional[List[Int32]]]: pass
+def c2()->Callable[1,Int32,Int32,Optional[List[Int32]]]: pass
+def c3()->Callable[0,Int32,"a":Int32:{AutoMap}]: pass
+def c4()->Callable[0,Int32,"":Int32:{AutoMap}]: pass
+def c5()->Callable[0,Int32,"":Int32:{}]: pass
+def c6()->Callable[0,Int32,"foo":Int32]: pass
+
+def f0(x:Optional[Int32]=None,y:Optional[Int32]=None)->Void: pass
+def f1(x:Optional[Int32],y:Optional[Int32]=None)->Void: pass
+def f2(x:Optional[Int32],y:Optional[Int32])->Void: pass
+def f3(x:slice("",Int32,{AutoMap}), y:slice("name",String))->Void: pass
+
+@@;
+
+$t = ($name)->{
+ return FormatType(EvaluateType(
+ ParseTypeHandle(Core::PythonFuncSignature(AsAtom("Python3"), $script, $name))));
+};
+
+-- Singletons
+
+select $t("primitive");
+select $t("singletons");
+
+-- Containers & Special
+
+select $t("containers");
+select $t("special");
+
+-- Callable
+select
+ $t("c0") as c0,
+ $t("c1") as c1,
+ $t("c2") as c2,
+ $t("c3") as c3,
+ $t("c4") as c4,
+ $t("c5") as c5,
+ $t("c6") as c6;
+
+-- Top level
+select
+ $t("f0") as f0,
+ $t("f1") as f1,
+ $t("f2") as f2,
+ $t("f3") as f3;
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.in b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.in
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.in
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.sql
new file mode 100644
index 00000000000..e540dbf38ab
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.sql
@@ -0,0 +1,11 @@
+--sanitizer ignore memory
+$script = @@
+def f(string, uuid, yson):
+ return (string, str(type(string)), uuid, str(type(uuid)), yson, str(type(yson)))
+
+f._yql_bytes_decode_mode = 'strict'
+@@;
+
+$udf = Python3::f(Callable<(String?, UUid?, Yson?)->Tuple<String?, String, UUid?, String, Yson?, String>>, $script);
+
+SELECT $udf("string", UUid('1812bc18-5838-4cde-98aa-287302697b90'), cast(@@{"abc"=1}@@ as yson));
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.in
new file mode 100644
index 00000000000..d5ddcb40830
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.in
@@ -0,0 +1 @@
+{"key"="1";"subkey"="2";"value"="3"};
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.sql
new file mode 100644
index 00000000000..9db98402923
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.sql
@@ -0,0 +1,12 @@
+--sanitizer ignore memory
+$udfScript = @@
+import yql
+def mapper(records):
+ yql.g = records
+ for record in records:
+ yield dict(yid=b"bla", rnd=0.)
+@@;
+
+$udf = Python3::mapper(Callable<(Stream<Struct<key:String, subkey:String, value:String>>)->Stream<Struct<yid:String, rnd:Double>>>, $udfScript);
+
+PROCESS Input using $udf(TableRows());
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.in b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.in
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.in
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.sql
new file mode 100644
index 00000000000..43dd00cb3df
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.sql
@@ -0,0 +1,20 @@
+--sanitizer ignore memory
+/* syntax version 1 */
+$script = @@
+import json
+
+def yloads(z):
+ return json.loads(str(z, 'latin-1').replace("=",":"))
+
+def ydumps(z):
+ return bytes(json.dumps(z).replace(":","="), 'latin-1')
+
+def f(s):
+ return (s.get("abc",0),s)
+
+f._yql_convert_yson = (yloads,ydumps)
+@@;
+
+$udf = Python3::f(Callable<(Yson?)->Tuple<Int64, Yson?>>, $script);
+
+SELECT $udf(cast(@@{"abc"=1}@@ as yson));
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Data.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.in
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.in
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Data.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.sql
new file mode 100644
index 00000000000..3f7de07d5c2
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.sql
@@ -0,0 +1,61 @@
+--sanitizer ignore memory
+$data = AsTuple(
+ Bool("true"),
+ Bool("FalsE"),
+ Int8("-128"),
+ Int8("127"),
+ Uint8("0"),
+ Uint8("255"),
+ Int16("-32768"),
+ Int16("32767"),
+ Uint16("0"),
+ Uint16("65535"),
+ Int32("-2147483648"),
+ Int32("2147483647"),
+ Uint32("0"),
+ Uint32("4294967295"),
+ Int64("-9223372036854775808"),
+ Int64("9223372036854775807"),
+ Uint64("0"),
+ Uint64("18446744073709551615"),
+ Float("0"),
+ Float("1"),
+ Float("-1e30"),
+ Float("-inf"),
+ Float("+inf"),
+ Float("nan"),
+ Double("0"),
+ Double("1"),
+ Double("-1e300"),
+ Double("-inf"),
+ Double("+inf"),
+ Double("nan"),
+ String("foo\xffbar"),
+ Utf8("привет"),
+ Yson("<a=1>[3;%false]"),
+ Json(@@{"a":1,"b":null}@@),
+ Date("2000-01-01"),
+ Datetime("2000-01-01T01:02:03Z"),
+ Timestamp("2000-01-01T01:02:03.4Z"),
+ Interval("P1DT12H"),
+ TzDate("2000-01-01,Europe/Moscow"),
+ TzDatetime("2000-01-01T01:02:03,Europe/Moscow"),
+ TzTimestamp("2000-01-01T01:02:03.4,Europe/Moscow"),
+ Uuid('31323334-3536-3738-393a-3b3c3d3e3f40'),
+ Decimal('3.1415926535897932384626433832795029', 35, 34),
+ Decimal('-.00000000000000000000000000000000001', 35, 35),
+ Decimal('NAN', 10, 5),
+ Decimal('-iNf', 1, 0)
+);
+
+$type = CallableType(0,
+ TypeOf($data),
+ TypeOf($data)
+);
+
+$f = Python3::f($type, @@
+def f(x):
+ return x
+@@);
+
+select $data, $f($data);
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.cfg b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.cfg
new file mode 100644
index 00000000000..5dae597903c
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.cfg
@@ -0,0 +1 @@
+xfail
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.sql
new file mode 100644
index 00000000000..100086c9e4e
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.sql
@@ -0,0 +1,23 @@
+--sanitizer ignore memory
+/* syntax version 1 */
+$script = @@
+import sys
+import traceback
+
+
+def excepthook(*args):
+ print('CUSTOM_EXCEPTHOOK', file=sys.stderr)
+ print(all(_ for _ in args), file=sys.stderr)
+ print("".join(traceback.format_exception(*args)), file=sys.stderr)
+
+
+sys.excepthook = excepthook
+
+
+def f(string):
+ raise Exception()
+@@;
+
+$udf = Python3::f(Callable<(String)->String>, $script);
+
+SELECT $udf(@@{"abc":1}@@);
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.in b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.in
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.in
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.sql
new file mode 100644
index 00000000000..a43af8791d6
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.sql
@@ -0,0 +1,19 @@
+--sanitizer ignore memory
+/* syntax version 1 */
+$s = @@
+def list_func(lst):
+ return lst.count(1)
+list_func._yql_lazy_input = False
+@@;
+
+$u = Python3::list_func(Callable<(List<Int32>)->Int32>, $s);
+select $u(AsList(1,2,3));
+
+$s = @@
+def dict_func(dict):
+ return list(dict.values()).count(b"b")
+dict_func._yql_lazy_input = False
+@@;
+
+$v = Python3::dict_func(Callable<(Dict<Int32, String>)->Int32>, $s);
+select $v(AsDict(AsTuple(1,"a"),AsTuple(2,"b")));
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.cfg b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.cfg
new file mode 100644
index 00000000000..5dae597903c
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.cfg
@@ -0,0 +1 @@
+xfail
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.sql
new file mode 100644
index 00000000000..33396f036a7
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.sql
@@ -0,0 +1,7 @@
+--sanitizer ignore memory
+$optOptList = Python3::opt_opt_list(Callable<(String)->List<String>??>, @@
+def opt_opt_list(in_str):
+ return [in_str] if len(in_str) % 2 == 0 else None
+@@);
+
+SELECT $optOptList("42");
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.in
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.in
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.sql
new file mode 100644
index 00000000000..c2576a72e45
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.sql
@@ -0,0 +1,92 @@
+--sanitizer ignore memory
+/* syntax version 1 */
+$x = AsList(1,2,3);
+
+$s1 = @@
+def f(input):
+ for x in input:
+ yield x
+@@;
+
+$s2 = @@
+class Iter:
+ def __init__(self, input):
+ self.input = input
+
+ def __next__(self):
+ return next(self.input)
+@@;
+
+$s3 = @@
+class CallableIter:
+ def __init__(self, input):
+ self.input = input
+
+ def __call__(self):
+ def f(input):
+ for x in input:
+ yield x
+
+ return f(self.input)
+@@;
+
+$s4 = @@
+class Iterable:
+ def __init__(self, input):
+ self.input = input
+
+ def __iter__(self):
+ return iter(self.input)
+@@;
+
+$f1 = Python3::f(Callable<(Stream<Int32>)->Stream<Int32>>, $s1);
+
+$f2 = Python3::Iter(Callable<(Stream<Int32>)->Stream<Int32>>, $s2);
+
+$f3 = Python3::CallableIter(Callable<(Stream<Int32>)->Stream<Int32>>, $s3);
+
+$f4 = Python3::Iterable(Callable<(Stream<Int32>)->Stream<Int32>>, $s4);
+
+$g = ($stream)->{
+ return $stream;
+};
+
+select Yql::Collect($g(Yql::Iterator($x, Yql::DependsOn("A1"))));
+
+select Yql::Collect($f1(Yql::Iterator($x, Yql::DependsOn("A2"))));
+
+select Yql::Collect($f2(Yql::Iterator($x, Yql::DependsOn("A3"))));
+
+select Yql::Collect($f3(Yql::Iterator($x, Yql::DependsOn("A4"))));
+
+select Yql::Collect($f4(Yql::Iterator($x, Yql::DependsOn("A5"))));
+
+select Yql::Collect(Yql::Switch(
+ Yql::Iterator($x, Yql::DependsOn("B1")),
+ AsAtom('0'),
+ AsTuple(AsAtom('0')),
+ $g));
+
+select Yql::Collect(Yql::Switch(
+ Yql::Iterator($x, Yql::DependsOn("B2")),
+ AsAtom('0'),
+ AsTuple(AsAtom('0')),
+ $f1));
+
+select Yql::Collect(Yql::Switch(
+ Yql::Iterator($x, Yql::DependsOn("B3")),
+ AsAtom('0'),
+ AsTuple(AsAtom('0')),
+ $f2));
+
+select Yql::Collect(Yql::Switch(
+ Yql::Iterator($x, Yql::DependsOn("B4")),
+ AsAtom('0'),
+ AsTuple(AsAtom('0')),
+ $f3));
+
+select Yql::Collect(Yql::Switch(
+ Yql::Iterator($x, Yql::DependsOn("B5")),
+ AsAtom('0'),
+ AsTuple(AsAtom('0')),
+ $f4));
diff --git a/yql/essentials/udfs/common/python/python3_small/test/ya.make b/yql/essentials/udfs/common/python/python3_small/test/ya.make
new file mode 100644
index 00000000000..ac03d946685
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/ya.make
@@ -0,0 +1,10 @@
+YQL_UDF_TEST_CONTRIB()
+
+TIMEOUT(300)
+SIZE(MEDIUM)
+
+DEPENDS(
+ yql/essentials/udfs/common/python/python3_small
+)
+
+END()
diff --git a/yql/essentials/udfs/common/python/python3_small/ya.make b/yql/essentials/udfs/common/python/python3_small/ya.make
new file mode 100644
index 00000000000..f815fa8d757
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/ya.make
@@ -0,0 +1,16 @@
+YQL_PYTHON3_UDF(python3_udf)
+
+REGISTER_YQL_PYTHON_UDF(
+ NAME Python3
+ RESOURCE_NAME Python3
+)
+
+PEERDIR(
+ yql/essentials/public/udf
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ test
+)
diff --git a/yql/essentials/udfs/common/python/python_udf/python_function_factory.h b/yql/essentials/udfs/common/python/python_udf/python_function_factory.h
new file mode 100644
index 00000000000..a4e393b4868
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python_udf/python_function_factory.h
@@ -0,0 +1,111 @@
+#pragma once
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_builder.h>
+#include <yql/essentials/public/udf/udf_registrator.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+#include <yql/essentials/udfs/common/python/bindings/py_ptr.h>
+#include <yql/essentials/udfs/common/python/bindings/py_callable.h>
+#include <yql/essentials/udfs/common/python/bindings/py_cast.h>
+#include <yql/essentials/udfs/common/python/bindings/py_errors.h>
+#include <yql/essentials/udfs/common/python/bindings/py_gil.h>
+#include <yql/essentials/udfs/common/python/bindings/py_utils.h>
+#include <yql/essentials/udfs/common/python/bindings/py_yql_module.h>
+
+#include <util/generic/yexception.h>
+#include <util/stream/str.h>
+#include <util/stream/printf.h>
+#include <util/string/builder.h>
+#include <util/string/cast.h>
+
+using namespace NYql::NUdf;
+using namespace NPython;
+
+//////////////////////////////////////////////////////////////////////////////
+// TPythonFunctionFactory
+//////////////////////////////////////////////////////////////////////////////
+class TPythonFunctionFactory: public TBoxedValue
+{
+public:
+ TPythonFunctionFactory(
+ const TStringRef& name,
+ const TStringRef& tag,
+ const TType* functionType,
+ ITypeInfoHelper::TPtr&& helper,
+ const NYql::NUdf::TSourcePosition& pos)
+ : Ctx(new TPyContext(helper, tag, pos))
+ , FunctionName(name)
+ , FunctionType_(functionType)
+ {
+ }
+
+ ~TPythonFunctionFactory() {
+ Ctx->Cleanup();
+ PyCleanup();
+ }
+
+private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override
+ {
+ TPyCastContext::TPtr castCtx = MakeIntrusive<TPyCastContext>(valueBuilder, Ctx);
+
+ // for get propper c-compatible null-terminating string
+ TString source(args[0].AsStringRef());
+
+ TPyGilLocker lock;
+ TPyObjectPtr module = CompileModule(FunctionName, source);
+ if (!module) {
+ UdfTerminate((TStringBuilder() << Ctx->Pos << "Failed to compile module: " << GetLastErrorAsString()).data());
+ }
+
+ TPyObjectPtr function(PyObject_GetAttrString(module.Get(), FunctionName.data()));
+ if (!function) {
+ UdfTerminate((TStringBuilder() << Ctx->Pos << "Failed to find entry point: " << GetLastErrorAsString()).data());
+ }
+
+ if (!PyCallable_Check(function.Get())) {
+ UdfTerminate((TStringBuilder() << Ctx->Pos << "Entry point is not a callable").data());
+ }
+
+ try {
+ SetupCallableSettings(castCtx, function.Get());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << Ctx->Pos << "Failed to setup callable settings: "
+ << e.what()).data());
+ }
+ return FromPyCallable(castCtx, FunctionType_, function.Release());
+ }
+
+ static TPyObjectPtr CompileModule(const TString& name, const TString& source) {
+ unsigned int moduleNum = AtomicCounter++;
+ TString filename(TStringBuf("embedded:"));
+ filename += name;
+
+ TPyObjectPtr module, code;
+ if (HasEncodingCookie(source)) {
+ code.ResetSteal(Py_CompileString(source.data(), filename.data(), Py_file_input));
+ } else {
+ PyCompilerFlags cflags;
+ cflags.cf_flags = PyCF_SOURCE_IS_UTF8;
+
+ code.ResetSteal(Py_CompileStringFlags(
+ source.data(), filename.data(), Py_file_input, &cflags));
+ }
+
+ if (code) {
+ TString nameWithNum = name + ToString(moduleNum);
+ char* moduleName = const_cast<char*>(nameWithNum.data());
+ module.ResetSteal(PyImport_ExecCodeModule(moduleName, code.Get()));
+ }
+
+ return module;
+ }
+
+ const TPyContext::TPtr Ctx;
+ const TString FunctionName;
+ const TType* FunctionType_;
+ inline static std::atomic_uint AtomicCounter = 0;
+};
diff --git a/yql/essentials/udfs/common/python/python_udf/python_udf.cpp b/yql/essentials/udfs/common/python/python_udf/python_udf.cpp
new file mode 100644
index 00000000000..b1739a1775e
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python_udf/python_udf.cpp
@@ -0,0 +1,232 @@
+#include "python_udf.h"
+#include "python_function_factory.h"
+
+#include <yql/essentials/public/udf/udf_version.h>
+#include <yql/essentials/udfs/common/python/bindings/py_utils.h>
+
+#include <util/generic/vector.h>
+#include <util/system/execpath.h>
+
+namespace {
+
+#if PY_MAJOR_VERSION >= 3
+#define PYTHON_PROGRAMM_NAME L"YQL::Python3"
+#else
+#define PYTHON_PROGRAMM_NAME "YQL::Python2"
+#endif
+
+int AddToPythonPath(const TVector<TStringBuf>& pathVals)
+{
+ char pathVar[] = "path"; // PySys_{Get,Set}Object take a non-const char* arg
+
+ TPyObjectPtr sysPath(PySys_GetObject(pathVar), TPyObjectPtr::ADD_REF);
+ if (!sysPath) return -1;
+
+ for (const auto& val: pathVals) {
+ TPyObjectPtr pyStr = PyRepr(val.data());
+ int rc = PyList_Append(sysPath.Get(), pyStr.Get());
+ if (rc != 0) {
+ return rc;
+ }
+ }
+
+ return PySys_SetObject(pathVar, sysPath.Get());
+}
+
+void InitArcadiaPythonRuntime()
+{
+ // Arcadia static python import hook resides in __res module
+ // It modifies sys.meta_path upon import
+
+ TPyObjectPtr mod(PyImport_ImportModule("__res"));
+ Y_ABORT_UNLESS(mod, "Can't import arcadia python runtime");
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TPythonModule
+//////////////////////////////////////////////////////////////////////////////
+class TPythonModule: public IUdfModule
+{
+public:
+ TPythonModule(const TString& resourceName, EPythonFlavor pythonFlavor, bool standalone = true)
+ : ResourceName(resourceName), Standalone(standalone)
+ {
+ if (Standalone) {
+ Py_SetProgramName(PYTHON_PROGRAMM_NAME);
+ PrepareYqlModule();
+ Py_Initialize();
+ }
+
+ InitYqlModule(pythonFlavor, standalone);
+
+ const auto rc = PyRun_SimpleString(R"(
+# numpy on import may find installed openblas library and load it,
+# which in turn causes it to start CPUCOUNT threads
+# with approx. 40Mb memory reserved for each thread;
+#
+# See more detailed explanation here: https://st.yandex-team.ru/STATLIBS-1715#5bfc68ecbbc039001cec572a
+#
+# Thus, we reduce negative effects as much as possible
+import os
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+
+
+# Following part allows us later to format tracebacks via sys.excepthook
+# in thread-safe manner
+import sys
+import threading
+if sys.version_info >= (3, 0):
+ from io import StringIO, TextIOWrapper as SysStderrType
+else:
+ from cStringIO import StringIO
+ SysStderrType = file
+
+class StderrLocal(threading.local):
+
+ def __init__(self):
+ self.is_real_mode = True
+ self.buffer = StringIO()
+
+
+class StderrProxy(object):
+ def __init__(self, stderr):
+ self._stderr = stderr
+ self._tls = StderrLocal()
+
+ def _toggle_real_mode(self):
+ self._tls.is_real_mode = not self._tls.is_real_mode
+ if not self._tls.is_real_mode:
+ self._tls.buffer.clear()
+
+ def _get_value(self):
+ assert not self._tls.is_real_mode
+ return self._tls.buffer.getvalue()
+
+ def __getattr__(self, attr):
+ target = self._stderr
+ if not self._tls.is_real_mode:
+ target = self._tls.buffer
+
+ return getattr(target, attr)
+
+if isinstance(sys.stderr, SysStderrType):
+ sys.stderr = StderrProxy(sys.stderr)
+)");
+ Y_ABORT_UNLESS(rc >= 0, "Can't setup module");
+
+ if (pythonFlavor == EPythonFlavor::Arcadia) {
+ InitArcadiaPythonRuntime();
+ }
+
+#ifndef _win_
+ if (Standalone) {
+ TVector<TStringBuf> paths;
+ if (pythonFlavor == EPythonFlavor::System) {
+ paths.push_back(TStringBuf("/usr/lib/python2.7/dist-packages"));
+ }
+ paths.push_back(TStringBuf("."));
+ const auto r = AddToPythonPath(paths);
+ Y_ABORT_UNLESS(r >= 0, "Can't add dist-packages into sys.path");
+ }
+#endif
+
+ char executableVar[] = "executable"; // PySys_{Get,Set}Object take a non-const char* arg
+ TPyObjectPtr pyExecutableStr = PyRepr(GetExecPath().data());
+ Y_ABORT_UNLESS(PySys_SetObject(executableVar, pyExecutableStr.Get()) >= 0, "Can't set sys.executable");
+
+ if (Standalone) {
+ PyEval_InitThreads();
+ MainThreadState_ = PyEval_SaveThread();
+ }
+ }
+
+ ~TPythonModule() {
+ if (Standalone) {
+ PyEval_RestoreThread(MainThreadState_);
+ Py_Finalize();
+ }
+ }
+
+ void CleanupOnTerminate() const final {
+ PyCleanup();
+ }
+
+ void GetAllFunctions(IFunctionsSink&) const final {}
+
+ void BuildFunctionTypeInfo(
+ const TStringRef& name,
+ TType* userType,
+ const TStringRef& typeConfig,
+ ui32 flags,
+ IFunctionTypeInfoBuilder& builder) const final
+ {
+ Y_UNUSED(typeConfig);
+
+ if (flags & TFlags::TypesOnly) {
+ return;
+ }
+
+ try {
+ auto typeHelper = builder.TypeInfoHelper();
+ if (ETypeKind::Callable != typeHelper->GetTypeKind(userType)) {
+ return builder.SetError(TStringRef::Of("Expected callable type"));
+ }
+
+ const auto pos = builder.GetSourcePosition();
+ builder.Implementation(new TPythonFunctionFactory(name, ResourceName, userType, std::move(typeHelper), pos));
+ } catch (const yexception& e) {
+ builder.SetError(TStringBuf(e.what()));
+ }
+ }
+
+private:
+ TString ResourceName;
+ bool Standalone;
+ PyThreadState* MainThreadState_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TStubModule
+//////////////////////////////////////////////////////////////////////////////
+class TStubModule: public IUdfModule {
+ void GetAllFunctions(IFunctionsSink&) const final {}
+
+ void BuildFunctionTypeInfo(
+ const TStringRef& /*name*/,
+ TType* /*userType*/,
+ const TStringRef& /*typeConfig*/,
+ ui32 flags,
+ IFunctionTypeInfoBuilder& /*builder*/) const final
+ {
+ Y_DEBUG_ABORT_UNLESS(flags & TFlags::TypesOnly,
+ "in stub module this function can be called only for types loading");
+ }
+
+ void CleanupOnTerminate() const final {}
+};
+
+} // namespace
+
+void NKikimr::NUdf::RegisterYqlPythonUdf(
+ IRegistrator& registrator,
+ ui32 flags,
+ TStringBuf moduleName,
+ TStringBuf resourceName,
+ EPythonFlavor pythonFlavor)
+{
+ if (flags & IRegistrator::TFlags::TypesOnly) {
+ registrator.AddModule(moduleName, new TStubModule);
+ } else {
+ registrator.AddModule(
+ moduleName,
+ NKikimr::NUdf::GetYqlPythonUdfModule(resourceName, pythonFlavor, true)
+ );
+ }
+}
+
+TUniquePtr<NKikimr::NUdf::IUdfModule> NKikimr::NUdf::GetYqlPythonUdfModule(
+ TStringBuf resourceName, NKikimr::NUdf::EPythonFlavor pythonFlavor,
+ bool standalone
+) {
+ return new TPythonModule(TString(resourceName), pythonFlavor, standalone);
+}
diff --git a/yql/essentials/udfs/common/python/python_udf/python_udf.h b/yql/essentials/udfs/common/python/python_udf/python_udf.h
new file mode 100644
index 00000000000..16d7da096dd
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python_udf/python_udf.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <yql/essentials/public/udf/udf_registrator.h>
+
+namespace NYql {
+namespace NUdf {
+
+enum class EPythonFlavor {
+ System,
+ Arcadia,
+};
+
+void RegisterYqlPythonUdf(
+ IRegistrator& registrator,
+ ui32 flags,
+ TStringBuf moduleName,
+ TStringBuf resourceName,
+ EPythonFlavor pythonFlavor);
+
+TUniquePtr<IUdfModule> GetYqlPythonUdfModule(
+ TStringBuf resourceName,
+ EPythonFlavor pythonFlavor,
+ bool standalone);
+
+} // namespace NUdf
+} // namespace NYql
diff --git a/yql/essentials/udfs/common/python/python_udf/python_udfs_exports.exports b/yql/essentials/udfs/common/python/python_udf/python_udfs_exports.exports
new file mode 100644
index 00000000000..2ffd6f54b59
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python_udf/python_udfs_exports.exports
@@ -0,0 +1,5 @@
+C Register
+C AbiVersion
+C RunPython
+C BindSymbols
+C SetBackTraceCallback
diff --git a/yql/essentials/udfs/common/python/python_udf/ya.make b/yql/essentials/udfs/common/python/python_udf/ya.make
new file mode 100644
index 00000000000..9a2090665a2
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python_udf/ya.make
@@ -0,0 +1,20 @@
+PY23_NATIVE_LIBRARY()
+
+YQL_ABI_VERSION(2 27 0)
+
+SRCS(
+ python_udf.cpp
+)
+
+PEERDIR(
+ yql/essentials/public/udf
+ yql/essentials/udfs/common/python/bindings
+)
+
+CFLAGS(
+ -DDISABLE_PYDEBUG
+)
+
+NO_COMPILER_WARNINGS()
+
+END()
diff --git a/yql/essentials/udfs/common/python/system_python/README.MD b/yql/essentials/udfs/common/python/system_python/README.MD
new file mode 100644
index 00000000000..16d46fd51d3
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/README.MD
@@ -0,0 +1,7 @@
+python3_N folders here are mirrors of python3_small, adjusted for system python (Name Python3 -> SystemPython3_N, LDFLAGS(-lpython3.N))
+
+They are supposed to be built with local python: `ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.N -DPYTHON_CONFIG=python3.N-config python3.N`
+
+One way to get all pythons on the same machine is `sudo add-apt-repository ppa:deadsnakes/ppa` and `sudo apt install python3.N-dev`
+
+Use build_system_python_udfs.sh to build all python udfs with system pythons(local pythons)
diff --git a/yql/essentials/udfs/common/python/system_python/build_system_python_udfs.sh b/yql/essentials/udfs/common/python/system_python/build_system_python_udfs.sh
new file mode 100755
index 00000000000..8dd22452304
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/build_system_python_udfs.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -eux
+ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.8 -DPYTHON_CONFIG=python3.8-config python3_8
+ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.9 -DPYTHON_CONFIG=python3.9-config python3_9
+ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.10 -DPYTHON_CONFIG=python3.10-config python3_10
+ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.11 -DPYTHON_CONFIG=python3.11-config python3_11
+ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.12 -DPYTHON_CONFIG=python3.12-config python3_12
diff --git a/yql/essentials/udfs/common/python/system_python/python3_10/ya.make b/yql/essentials/udfs/common/python/system_python/python3_10/ya.make
new file mode 100644
index 00000000000..12068a33a1e
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/python3_10/ya.make
@@ -0,0 +1,16 @@
+YQL_PYTHON3_UDF(systempython3_10_udf)
+
+REGISTER_YQL_PYTHON_UDF(
+ NAME SystemPython3_10
+ RESOURCE_NAME SystemPython3_10
+)
+
+IF (USE_LOCAL_PYTHON)
+ LDFLAGS("-lpython3.10")
+ENDIF()
+
+PEERDIR(
+ yql/essentials/public/udf
+)
+
+END()
diff --git a/yql/essentials/udfs/common/python/system_python/python3_11/ya.make b/yql/essentials/udfs/common/python/system_python/python3_11/ya.make
new file mode 100644
index 00000000000..483432b9b90
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/python3_11/ya.make
@@ -0,0 +1,16 @@
+YQL_PYTHON3_UDF(systempython3_11_udf)
+
+REGISTER_YQL_PYTHON_UDF(
+ NAME SystemPython3_11
+ RESOURCE_NAME SystemPython3_11
+)
+
+IF (USE_LOCAL_PYTHON)
+ LDFLAGS("-lpython3.11")
+ENDIF()
+
+PEERDIR(
+ yql/essentials/public/udf
+)
+
+END()
diff --git a/yql/essentials/udfs/common/python/system_python/python3_12/ya.make b/yql/essentials/udfs/common/python/system_python/python3_12/ya.make
new file mode 100644
index 00000000000..8220fda0eac
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/python3_12/ya.make
@@ -0,0 +1,16 @@
+YQL_PYTHON3_UDF(systempython3_12_udf)
+
+REGISTER_YQL_PYTHON_UDF(
+ NAME SystemPython3_12
+ RESOURCE_NAME SystemPython3_12
+)
+
+IF (USE_LOCAL_PYTHON)
+ LDFLAGS("-lpython3.12")
+ENDIF()
+
+PEERDIR(
+ yql/essentials/public/udf
+)
+
+END()
diff --git a/yql/essentials/udfs/common/python/system_python/python3_8/ya.make b/yql/essentials/udfs/common/python/system_python/python3_8/ya.make
new file mode 100644
index 00000000000..df447bacb4d
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/python3_8/ya.make
@@ -0,0 +1,16 @@
+YQL_PYTHON3_UDF(systempython3_8_udf)
+
+REGISTER_YQL_PYTHON_UDF(
+ NAME SystemPython3_8
+ RESOURCE_NAME SystemPython3_8
+)
+
+IF (USE_LOCAL_PYTHON)
+ LDFLAGS("-lpython3.8")
+ENDIF()
+
+PEERDIR(
+ yql/essentials/public/udf
+)
+
+END()
diff --git a/yql/essentials/udfs/common/python/system_python/python3_9/ya.make b/yql/essentials/udfs/common/python/system_python/python3_9/ya.make
new file mode 100644
index 00000000000..ea3e5d849ed
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/python3_9/ya.make
@@ -0,0 +1,16 @@
+YQL_PYTHON3_UDF(systempython3_9_udf)
+
+REGISTER_YQL_PYTHON_UDF(
+ NAME SystemPython3_9
+ RESOURCE_NAME SystemPython3_9
+)
+
+IF (USE_LOCAL_PYTHON)
+ LDFLAGS("-lpython3.9")
+ENDIF()
+
+PEERDIR(
+ yql/essentials/public/udf
+)
+
+END()
diff --git a/yql/essentials/udfs/common/python/system_python/ya.make b/yql/essentials/udfs/common/python/system_python/ya.make
new file mode 100644
index 00000000000..3afc7796bd3
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/ya.make
@@ -0,0 +1,7 @@
+RECURSE(
+ python3_8
+ python3_9
+ python3_10
+ python3_11
+ python3_12
+)
diff --git a/yql/essentials/udfs/common/python/ya.make b/yql/essentials/udfs/common/python/ya.make
new file mode 100644
index 00000000000..bb6a4c8d5b4
--- /dev/null
+++ b/yql/essentials/udfs/common/python/ya.make
@@ -0,0 +1,10 @@
+# This module should not be exported under CMake since it requires Python build
+NO_BUILD_IF(STRICT EXPORT_CMAKE)
+
+RECURSE(
+ bindings
+ main_py3
+ python3_small
+ python_udf
+ system_python
+)
diff --git a/yql/essentials/udfs/common/re2/re2_udf.cpp b/yql/essentials/udfs/common/re2/re2_udf.cpp
new file mode 100644
index 00000000000..5d43ce040ae
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/re2_udf.cpp
@@ -0,0 +1,536 @@
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+
+#include <contrib/libs/re2/re2/re2.h>
+
+#include <util/charset/utf8.h>
+#include <util/string/cast.h>
+
+using namespace re2;
+using namespace NKikimr;
+using namespace NUdf;
+
+namespace {
+
+ template <typename T>
+ T Id(T x) {
+ return x;
+ }
+
+ re2::RE2::Options::Encoding EncodingFromBool(bool x) {
+ return x ? re2::RE2::Options::Encoding::EncodingUTF8 : re2::RE2::Options::Encoding::EncodingLatin1;
+ }
+
+#define OPTIONS_MAP(xx) \
+ xx(Utf8, 0, bool, true, set_encoding, EncodingFromBool) \
+ xx(PosixSyntax, 1, bool, false, set_posix_syntax, Id) \
+ xx(LongestMatch, 2, bool, false, set_longest_match, Id) \
+ xx(LogErrors, 3, bool, true, set_log_errors, Id) \
+ xx(MaxMem, 4, ui64, 8 << 20, set_max_mem, Id) \
+ xx(Literal, 5, bool, false, set_literal, Id) \
+ xx(NeverNl, 6, bool, false, set_never_nl, Id) \
+ xx(DotNl, 7, bool, false, set_dot_nl, Id) \
+ xx(NeverCapture, 8, bool, false, set_never_capture, Id) \
+ xx(CaseSensitive, 9, bool, true, set_case_sensitive, Id) \
+ xx(PerlClasses, 10, bool, false, set_perl_classes, Id) \
+ xx(WordBoundary, 11, bool, false, set_word_boundary, Id) \
+ xx(OneLine, 12, bool, false, set_one_line, Id)
+
+ enum EOptionsField : ui32 {
+ OPTIONS_MAP(ENUM_VALUE_GEN)
+ Count
+ };
+
+ struct TOptionsSchema {
+ TType* StructType;
+ ui32 Indices[EOptionsField::Count];
+ };
+
+ struct TRegexpGroups {
+ TVector<TString> Names;
+ TVector<ui32> Indexes;
+ };
+
+ class TRe2Udf: public TBoxedValue {
+ public:
+ enum EMode {
+ MATCH,
+ GREP,
+ CAPTURE,
+ REPLACE,
+ COUNT,
+ FIND_AND_CONSUME,
+ };
+
+ template <bool posix>
+ class TFactory: public TBoxedValue {
+ public:
+ TFactory(
+ EMode mode,
+ const TOptionsSchema& optionsSchema,
+ TSourcePosition pos,
+ const TRegexpGroups& regexpGroups = TRegexpGroups())
+ : Mode(mode)
+ , OptionsSchema(optionsSchema)
+ , Pos_(pos)
+ , RegexpGroups(regexpGroups)
+ {
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ return TUnboxedValuePod(
+ new TRe2Udf(
+ valueBuilder,
+ args[0],
+ RegexpGroups,
+ Mode,
+ posix,
+ OptionsSchema,
+ Pos_));
+ }
+
+ EMode Mode;
+ const TOptionsSchema OptionsSchema;
+ TSourcePosition Pos_;
+ const TRegexpGroups RegexpGroups;
+ };
+
+ static const TStringRef& Name(EMode mode) {
+ static auto match = TStringRef::Of("Match");
+ static auto grep = TStringRef::Of("Grep");
+ static auto capture = TStringRef::Of("Capture");
+ static auto replace = TStringRef::Of("Replace");
+ static auto count = TStringRef::Of("Count");
+ static auto findAndconsume = TStringRef::Of("FindAndConsume");
+
+ switch (mode) {
+ case EMode::MATCH:
+ return match;
+ case EMode::GREP:
+ return grep;
+ case EMode::CAPTURE:
+ return capture;
+ case EMode::REPLACE:
+ return replace;
+ case EMode::COUNT:
+ return count;
+ case EMode::FIND_AND_CONSUME:
+ return findAndconsume;
+ }
+ Y_ABORT("Unexpected mode");
+ }
+
+ TRe2Udf(
+ const IValueBuilder*,
+ const TUnboxedValuePod& runConfig,
+ const TRegexpGroups regexpGroups,
+ EMode mode,
+ bool posix,
+ const TOptionsSchema& optionsSchema,
+ TSourcePosition pos)
+ : RegexpGroups(regexpGroups)
+ , Mode(mode)
+ , Captured()
+ , OptionsSchema(optionsSchema)
+ , Pos_(pos)
+ {
+ try {
+ auto patternValue = runConfig.GetElement(0);
+ auto optionsValue = runConfig.GetElement(1);
+ const std::string_view pattern(patternValue.AsStringRef());
+ RE2::Options options;
+
+ options.set_posix_syntax(posix);
+ bool needUtf8 = (UTF8Detect(pattern) == UTF8);
+ options.set_encoding(
+ needUtf8
+ ? RE2::Options::Encoding::EncodingUTF8
+ : RE2::Options::Encoding::EncodingLatin1
+ );
+ if (optionsValue) {
+#define FIELD_HANDLE(name, index, type, defVal, setter, conv) options.setter(conv(optionsValue.GetElement(OptionsSchema.Indices[index]).Get<type>()));
+ OPTIONS_MAP(FIELD_HANDLE)
+#undef FIELD_HANDLE
+ }
+
+ Regexp = std::make_unique<RE2>(StringPiece(pattern.data(), pattern.size()), options);
+
+ if (mode == EMode::CAPTURE) {
+ Captured = std::make_unique<StringPiece[]>(Regexp->NumberOfCapturingGroups() + 1);
+ }
+
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const final try {
+ RE2::Anchor anchor = RE2::UNANCHORED;
+ if (args[0]) {
+ const std::string_view input(args[0].AsStringRef());
+ const StringPiece piece(input.data(), input.size());
+
+ switch (Mode) {
+ case MATCH:
+ anchor = RE2::ANCHOR_BOTH;
+ [[fallthrough]];
+ case GREP:
+ return TUnboxedValuePod(Regexp->Match(piece, 0, input.size(), anchor, nullptr, 0));
+ case CAPTURE: {
+ const int count = Regexp->NumberOfCapturingGroups() + 1;
+ TUnboxedValue* items = nullptr;
+ const auto result = valueBuilder->NewArray(RegexpGroups.Names.size(), items);
+ if (Regexp->Match(piece, 0, input.size(), anchor, Captured.get(), count)) {
+ for (int i = 0; i < count; ++i) {
+ if (!Captured[i].empty()) {
+ items[RegexpGroups.Indexes[i]] = valueBuilder->SubString(args[0], std::distance(piece.begin(), Captured[i].begin()), Captured[i].size());
+ }
+ }
+ } else {
+ return BuildEmptyStruct(valueBuilder);
+ }
+ return result;
+ }
+ case REPLACE: {
+ const std::string_view rewriteRef(args[1].AsStringRef());
+ const StringPiece rewrite(rewriteRef.data(), rewriteRef.size());
+ TString rewriteError;
+ if (!Regexp->CheckRewriteString(rewrite, &rewriteError)) {
+ UdfTerminate((TStringBuilder() << Pos_ << " [rewrite error] " << rewriteError).data());
+ }
+ std::string result(input);
+ RE2::GlobalReplace(&result, *Regexp, rewrite);
+ return input == result ? TUnboxedValue(args[0]) : valueBuilder->NewString(result);
+ }
+ case COUNT: {
+ std::string inputHolder(input);
+ const ui32 result = RE2::GlobalReplace(&inputHolder, *Regexp, "");
+ return TUnboxedValuePod(result);
+ }
+ case FIND_AND_CONSUME: {
+ StringPiece text(piece);
+ std::vector<TUnboxedValue> matches;
+ for (StringPiece w; text.begin() < text.end() && RE2::FindAndConsume(&text, *Regexp, &w);) {
+ if (w.size() == 0) {
+ text.remove_prefix(1);
+ }
+ matches.emplace_back(valueBuilder->SubString(args[0], std::distance(piece.begin(), w.begin()), w.size()));
+ }
+ return valueBuilder->NewList(matches.data(), matches.size());
+ }
+ }
+ Y_ABORT("Unexpected mode");
+ } else {
+ switch (Mode) {
+ case MATCH:
+ case GREP:
+ return TUnboxedValuePod(false);
+ case CAPTURE:
+ return BuildEmptyStruct(valueBuilder);
+ case REPLACE:
+ return TUnboxedValuePod();
+ case COUNT:
+ return TUnboxedValuePod::Zero();
+ case FIND_AND_CONSUME:
+ return valueBuilder->NewEmptyList();
+ }
+ Y_ABORT("Unexpected mode");
+ }
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+
+ std::unique_ptr<RE2> Regexp;
+ const TRegexpGroups RegexpGroups;
+ EMode Mode;
+ std::unique_ptr<StringPiece[]> Captured;
+ const TOptionsSchema OptionsSchema;
+ TSourcePosition Pos_;
+
+ TUnboxedValue BuildEmptyStruct(const IValueBuilder* valueBuilder) const {
+ TUnboxedValue* items = nullptr;
+ return valueBuilder->NewArray(RegexpGroups.Names.size(), items);
+ }
+ };
+
+ SIMPLE_STRICT_UDF(TEscape, char*(char*)) {
+ const std::string_view input(args[0].AsStringRef());
+ const auto& result = RE2::QuoteMeta(StringPiece(input.data(), input.size()));
+ return input == result ? TUnboxedValue(args[0]) : valueBuilder->NewString(result);
+ }
+
+ TOptionsSchema MakeOptionsSchema(::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder) {
+ TOptionsSchema ret;
+ auto structBuilder = builder.Struct(EOptionsField::Count);
+#define FIELD_HANDLE(name, index, type, ...) structBuilder->AddField<type>(TStringRef::Of(#name), &ret.Indices[index]);
+ OPTIONS_MAP(FIELD_HANDLE)
+#undef FIELD_HANDLE
+
+ ret.StructType = structBuilder->Build();
+ return ret;
+ }
+
+ class TOptions: public TBoxedValue {
+ private:
+ const TOptionsSchema Schema_;
+
+ public:
+ TOptions(const TOptionsSchema& schema)
+ : Schema_(schema)
+ {
+ }
+
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ TUnboxedValue* items = nullptr;
+ const auto result = valueBuilder->NewArray(EOptionsField::Count, items);
+#define FIELD_HANDLE(name, index, type, defVal, ...) \
+ { \
+ auto structIndex = Schema_.Indices[index]; \
+ if (!args[index]) { \
+ items[structIndex] = TUnboxedValuePod(static_cast<type>(defVal)); \
+ } else { \
+ items[structIndex] = args[index].GetOptionalValue(); \
+ } \
+ }
+
+ OPTIONS_MAP(FIELD_HANDLE)
+#undef FIELD_HANDLE
+ return result;
+ }
+
+ static const ::NKikimr::NUdf::TStringRef& Name() {
+ static auto name = ::NKikimr::NUdf::TStringRef::Of("Options");
+ return name;
+ }
+
+ static bool DeclareSignature(
+ const ::NKikimr::NUdf::TStringRef& name,
+ ::NKikimr::NUdf::TType* userType,
+ ::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.IsStrict();
+
+ auto argsBuilder = builder.Args();
+#define FIELD_HANDLE(name, index, type, ...) argsBuilder->Add<TOptional<type>>().Name(TStringRef::Of(#name));
+ OPTIONS_MAP(FIELD_HANDLE)
+#undef FIELD_HANDLE
+ auto optionsSchema = MakeOptionsSchema(builder);
+ builder.Returns(optionsSchema.StructType);
+ builder.OptionalArgs(EOptionsField::Count);
+ if (!typesOnly) {
+ builder.Implementation(new TOptions(optionsSchema));
+ }
+
+ return true;
+ } else {
+ return false;
+ }
+ }
+ };
+
+ SIMPLE_UDF_WITH_OPTIONAL_ARGS(TPatternFromLike, char*(char*, TOptional<char*>), 1) {
+ const std::string_view input(args[0].AsStringRef());
+ const bool hasEscape = bool(args[1]);
+ char escape = 0;
+ if (hasEscape) {
+ const std::string_view escapeRef(args[1].AsStringRef());
+ if (escapeRef.size() != 1U) {
+ UdfTerminate((TStringBuilder() << GetPos() << " Escape should be single character").data());
+ }
+ escape = escapeRef.front();
+ }
+ const TString escaped(RE2::QuoteMeta(StringPiece(input.data(), input.size())));
+
+ TStringBuilder result;
+ result << "(?s)";
+ bool slash = false;
+ bool escapeOn = false;
+
+ for (const char& c : escaped) {
+ switch (c) {
+ case '\\':
+ if (slash) {
+ result << "\\\\";
+ }
+ slash = !slash;
+ break;
+ case '%':
+ if (escapeOn) {
+ result << "\\%";
+ escapeOn = false;
+ } else {
+ result << ".*";
+ }
+ slash = false;
+ break;
+ case '_':
+ if (escapeOn) {
+ result << "\\_";
+ escapeOn = false;
+ } else {
+ result << '.';
+ }
+ slash = false;
+ break;
+ default:
+ if (hasEscape && c == escape) {
+ if (escapeOn) {
+ result << RE2::QuoteMeta(StringPiece(&c, 1));
+ }
+ escapeOn = !escapeOn;
+ } else {
+ if (slash)
+ result << '\\';
+ result << c;
+ escapeOn = false;
+ }
+ slash = false;
+ break;
+ }
+ }
+ return valueBuilder->NewString(result);
+ }
+
+ TType* MakeRunConfigType(IFunctionTypeInfoBuilder& builder, TType* optOptionsStructType) {
+ return builder.Tuple()->Add<char*>().Add(optOptionsStructType).Build();
+ }
+
+ template <bool posix>
+ class TRe2Module: public IUdfModule {
+ public:
+ TStringRef Name() const {
+ return posix ? TStringRef::Of("Re2posix") : TStringRef::Of("Re2");
+ }
+
+ void CleanupOnTerminate() const final {
+ }
+
+ void GetAllFunctions(IFunctionsSink& sink) const final {
+ sink.Add(TRe2Udf::Name(TRe2Udf::EMode::MATCH));
+ sink.Add(TRe2Udf::Name(TRe2Udf::EMode::GREP));
+ sink.Add(TRe2Udf::Name(TRe2Udf::EMode::CAPTURE))->SetTypeAwareness();
+ sink.Add(TRe2Udf::Name(TRe2Udf::EMode::REPLACE));
+ sink.Add(TRe2Udf::Name(TRe2Udf::EMode::COUNT));
+ sink.Add(TRe2Udf::Name(TRe2Udf::EMode::FIND_AND_CONSUME));
+ sink.Add(TEscape::Name());
+ sink.Add(TPatternFromLike::Name());
+ sink.Add(TOptions::Name());
+ }
+
+ void BuildFunctionTypeInfo(
+ const TStringRef& name,
+ TType* userType,
+ const TStringRef& typeConfig,
+ ui32 flags,
+ IFunctionTypeInfoBuilder& builder) const final try {
+ Y_UNUSED(userType);
+ TOptionsSchema optionsSchema = MakeOptionsSchema(builder);
+ auto optOptionsStructType = builder.Optional()->Item(optionsSchema.StructType).Build();
+
+ bool typesOnly = (flags & TFlags::TypesOnly);
+ bool isMatch = (TRe2Udf::Name(TRe2Udf::EMode::MATCH) == name);
+ bool isGrep = (TRe2Udf::Name(TRe2Udf::EMode::GREP) == name);
+ bool isCapture = (TRe2Udf::Name(TRe2Udf::EMode::CAPTURE) == name);
+ bool isReplace = (TRe2Udf::Name(TRe2Udf::EMode::REPLACE) == name);
+ bool isCount = (TRe2Udf::Name(TRe2Udf::EMode::COUNT) == name);
+ bool isFindAndConsume = (TRe2Udf::Name(TRe2Udf::FIND_AND_CONSUME) == name);
+
+ if (isMatch || isGrep) {
+ builder.SimpleSignature<bool(TOptional<char*>)>()
+ .RunConfig(MakeRunConfigType(builder, optOptionsStructType));
+
+ if (!typesOnly) {
+ const auto mode = isMatch ? TRe2Udf::EMode::MATCH : TRe2Udf::EMode::GREP;
+ builder.Implementation(new TRe2Udf::TFactory<posix>(mode, optionsSchema, builder.GetSourcePosition()));
+ }
+ } else if (isCapture) {
+ TRegexpGroups groups;
+ auto optionalStringType = builder.Optional()->Item<char*>().Build();
+ auto structBuilder = builder.Struct();
+ RE2 regexp(StringPiece(typeConfig.Data(), typeConfig.Size()));
+ const auto& groupNames = regexp.CapturingGroupNames();
+ int groupCount = regexp.NumberOfCapturingGroups();
+ if (groupCount >= 0) {
+ std::unordered_set<std::string_view> groupNamesSet;
+ int unnamedCount = 0;
+ ++groupCount;
+ groups.Indexes.resize(groupCount);
+ groups.Names.resize(groupCount);
+ for (int i = 0; i < groupCount; ++i) {
+ TString fieldName;
+ auto it = groupNames.find(i);
+ if (it != groupNames.end()) {
+ if (!groupNamesSet.insert(it->second).second) {
+ builder.SetError(
+ TStringBuilder() << "Regexp contains duplicate capturing group name: " << it->second);
+ return;
+ }
+ fieldName = it->second;
+ } else {
+ fieldName = "_" + ToString(unnamedCount);
+ ++unnamedCount;
+ }
+ groups.Names[i] = fieldName;
+ structBuilder->AddField(fieldName, optionalStringType, &groups.Indexes[i]);
+ }
+ builder.Args(1)->Add(optionalStringType).Done().Returns(structBuilder->Build()).RunConfig(MakeRunConfigType(builder, optOptionsStructType));
+
+ if (!typesOnly) {
+ builder.Implementation(
+ new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::CAPTURE, optionsSchema, builder.GetSourcePosition(), groups));
+ }
+
+ } else {
+ if (regexp.ok()) {
+ builder.SetError("Regexp contains no capturing groups");
+ } else {
+ builder.SetError(regexp.error());
+ }
+ }
+ } else if (isReplace) {
+ builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>()
+ .RunConfig(MakeRunConfigType(builder, optOptionsStructType));
+
+ if (!typesOnly) {
+ builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::REPLACE, optionsSchema, builder.GetSourcePosition()));
+ }
+ } else if (isCount) {
+ builder.SimpleSignature<ui32(TOptional<char*>)>()
+ .RunConfig(MakeRunConfigType(builder, optOptionsStructType));
+
+ if (!typesOnly) {
+ builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::COUNT, optionsSchema, builder.GetSourcePosition()));
+ }
+ } else if (isFindAndConsume) {
+ builder.SimpleSignature<TListType<char*>(TOptional<char*>)>()
+ .RunConfig(MakeRunConfigType(builder, optOptionsStructType));
+ if (!typesOnly) {
+ builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::FIND_AND_CONSUME, optionsSchema, builder.GetSourcePosition()));
+ }
+ } else if (!(
+ TEscape::DeclareSignature(name, userType, builder, typesOnly) ||
+ TPatternFromLike::DeclareSignature(name, userType, builder, typesOnly) ||
+ TOptions::DeclareSignature(name, userType, builder, typesOnly))) {
+ builder.SetError(
+ TStringBuilder() << "Unknown function name: " << TString(name));
+ }
+ } catch (const std::exception& e) {
+ builder.SetError(CurrentExceptionMessage());
+ }
+ };
+
+}
+
+REGISTER_MODULES(
+ TRe2Module<false>,
+ TRe2Module<true>)
diff --git a/yql/essentials/udfs/common/re2/test/canondata/result.json b/yql/essentials/udfs/common/re2/test/canondata/result.json
new file mode 100644
index 00000000000..b9a16f32d66
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/canondata/result.json
@@ -0,0 +1,52 @@
+{
+ "test.test[BackslashInLike]": [
+ {
+ "uri": "file://test.test_BackslashInLike_/results.txt"
+ }
+ ],
+ "test.test[BasicOptions]": [
+ {
+ "uri": "file://test.test_BasicOptions_/results.txt"
+ }
+ ],
+ "test.test[Basic]": [
+ {
+ "uri": "file://test.test_Basic_/results.txt"
+ }
+ ],
+ "test.test[DefOptions]": [
+ {
+ "uri": "file://test.test_DefOptions_/results.txt"
+ }
+ ],
+ "test.test[FindAndConsumeEmpty]": [
+ {
+ "uri": "file://test.test_FindAndConsumeEmpty_/results.txt"
+ }
+ ],
+ "test.test[LikeEscape]": [
+ {
+ "uri": "file://test.test_LikeEscape_/results.txt"
+ }
+ ],
+ "test.test[MultipleCaptureGroups]": [
+ {
+ "uri": "file://test.test_MultipleCaptureGroups_/extracted"
+ }
+ ],
+ "test.test[MutableLambda]": [
+ {
+ "uri": "file://test.test_MutableLambda_/results.txt"
+ }
+ ],
+ "test.test[SkipGroup]": [
+ {
+ "uri": "file://test.test_SkipGroup_/results.txt"
+ }
+ ],
+ "test.test[Space]": [
+ {
+ "uri": "file://test.test_Space_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_BackslashInLike_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_BackslashInLike_/results.txt
new file mode 100644
index 00000000000..cbd3b76ebac
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_BackslashInLike_/results.txt
@@ -0,0 +1,28 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %false
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_BasicOptions_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_BasicOptions_/results.txt
new file mode 100644
index 00000000000..ba109c8a084
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_BasicOptions_/results.txt
@@ -0,0 +1,278 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "match";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "grep";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "capture";
+ [
+ "StructType";
+ [
+ [
+ "_0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "_1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "foo";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "capture_member";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "replace";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "count";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "tokens";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "";
+ %false;
+ %false;
+ [
+ #;
+ #;
+ #
+ ];
+ #;
+ [
+ ""
+ ];
+ "0";
+ []
+ ];
+ [
+ "a";
+ %false;
+ %true;
+ [
+ #;
+ #;
+ #
+ ];
+ #;
+ [
+ "a"
+ ];
+ "1";
+ [
+ "a"
+ ]
+ ];
+ [
+ "aax";
+ %false;
+ %true;
+ [
+ #;
+ #;
+ #
+ ];
+ #;
+ [
+ "aax"
+ ];
+ "2";
+ [
+ "aax"
+ ]
+ ];
+ [
+ "xaax1";
+ %false;
+ %true;
+ [
+ [
+ "xaax1"
+ ];
+ [
+ "aa"
+ ];
+ [
+ "x"
+ ]
+ ];
+ [
+ "aa"
+ ];
+ [
+ "baaz1"
+ ];
+ "2";
+ [
+ "xaax1"
+ ]
+ ];
+ [
+ "xaaxaaxaa";
+ %false;
+ %true;
+ [
+ [
+ "xaaxaaxaa"
+ ];
+ [
+ "aa"
+ ];
+ [
+ "x"
+ ]
+ ];
+ [
+ "aa"
+ ];
+ [
+ "baazaaxaa"
+ ];
+ "6";
+ [
+ "xaaxaaxaa"
+ ]
+ ];
+ [
+ "sup, dude";
+ %false;
+ %false;
+ [
+ #;
+ #;
+ #
+ ];
+ #;
+ [
+ "sup, dude"
+ ];
+ "0";
+ [
+ "sup";
+ "dude"
+ ]
+ ];
+ [
+ "one, two, three.";
+ %false;
+ %false;
+ [
+ #;
+ #;
+ #
+ ];
+ #;
+ [
+ "one, two, three."
+ ];
+ "0";
+ [
+ "one";
+ "two";
+ "three"
+ ]
+ ];
+ [
+ "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD1\x8D\xD1\x82\xD0\xBE \xD1\x80\xD1\x83\xD1\x81\xD1\x81\xD0\xBA\xD0\xB8\xD0\xB5, \xD0\xB1\xD1\x83\xD0\xBA\xD0\xB2\xD1\213111!";
+ %false;
+ %false;
+ [
+ #;
+ #;
+ #
+ ];
+ #;
+ [
+ "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD1\x8D\xD1\x82\xD0\xBE \xD1\x80\xD1\x83\xD1\x81\xD1\x81\xD0\xBA\xD0\xB8\xD0\xB5, \xD0\xB1\xD1\x83\xD0\xBA\xD0\xB2\xD1\213111!"
+ ];
+ "0";
+ [
+ "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82";
+ "\xD1\x8D\xD1\x82\xD0\xBE";
+ "\xD1\x80\xD1\x83\xD1\x81\xD1\x81\xD0\xBA\xD0\xB8\xD0\xB5";
+ "\xD0\xB1\xD1\x83\xD0\xBA\xD0\xB2\xD1\213111"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_Basic_/results.txt
new file mode 100644
index 00000000000..d57d92025ef
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_Basic_/results.txt
@@ -0,0 +1,257 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "match";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "grep";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "capture";
+ [
+ "StructType";
+ [
+ [
+ "_0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "_1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "foo";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "capture_member";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "replace";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "count";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ];
+ [
+ "tokens";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "";
+ %false;
+ %false;
+ [
+ #;
+ #;
+ #
+ ];
+ #;
+ [
+ ""
+ ];
+ "0";
+ []
+ ];
+ [
+ "a";
+ %false;
+ %true;
+ [
+ #;
+ #;
+ #
+ ];
+ #;
+ [
+ "a"
+ ];
+ "1";
+ [
+ "a"
+ ]
+ ];
+ [
+ "aax";
+ %false;
+ %true;
+ [
+ #;
+ #;
+ #
+ ];
+ #;
+ [
+ "aax"
+ ];
+ "2";
+ [
+ "aax"
+ ]
+ ];
+ [
+ "xaax1";
+ %false;
+ %true;
+ [
+ [
+ "xaax1"
+ ];
+ [
+ "aa"
+ ];
+ [
+ "x"
+ ]
+ ];
+ [
+ "aa"
+ ];
+ [
+ "baaz1"
+ ];
+ "2";
+ [
+ "xaax1"
+ ]
+ ];
+ [
+ "xaaxaaxaa";
+ %false;
+ %true;
+ [
+ [
+ "xaaxaaxaa"
+ ];
+ [
+ "aa"
+ ];
+ [
+ "x"
+ ]
+ ];
+ [
+ "aa"
+ ];
+ [
+ "baazaaxaa"
+ ];
+ "6";
+ [
+ "xaaxaaxaa"
+ ]
+ ];
+ [
+ "sup, dude";
+ %false;
+ %false;
+ [
+ #;
+ #;
+ #
+ ];
+ #;
+ [
+ "sup, dude"
+ ];
+ "0";
+ [
+ "sup";
+ "dude"
+ ]
+ ];
+ [
+ "one, two, three.";
+ %false;
+ %false;
+ [
+ #;
+ #;
+ #
+ ];
+ #;
+ [
+ "one, two, three."
+ ];
+ "0";
+ [
+ "one";
+ "two";
+ "three"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_DefOptions_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_DefOptions_/results.txt
new file mode 100644
index 00000000000..1287d3da2e8
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_DefOptions_/results.txt
@@ -0,0 +1,266 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "StructType";
+ [
+ [
+ "CaseSensitive";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "DotNl";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "Literal";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "LogErrors";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "LongestMatch";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "MaxMem";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "NeverCapture";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "NeverNl";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "OneLine";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "PerlClasses";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "PosixSyntax";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "Utf8";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "WordBoundary";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ %true;
+ %false;
+ %false;
+ %true;
+ %false;
+ "8388608";
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %true;
+ %false
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "StructType";
+ [
+ [
+ "CaseSensitive";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "DotNl";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "Literal";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "LogErrors";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "LongestMatch";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "MaxMem";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "NeverCapture";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "NeverNl";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "OneLine";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "PerlClasses";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "PosixSyntax";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "Utf8";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "WordBoundary";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ %true;
+ %false;
+ %false;
+ %true;
+ %false;
+ "8388608";
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %true;
+ %false
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_FindAndConsumeEmpty_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_FindAndConsumeEmpty_/results.txt
new file mode 100644
index 00000000000..55958536293
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_FindAndConsumeEmpty_/results.txt
@@ -0,0 +1,66 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "a";
+ "";
+ "aa"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ ""
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_LikeEscape_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_LikeEscape_/results.txt
new file mode 100644
index 00000000000..b9dbaf1f91f
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_LikeEscape_/results.txt
@@ -0,0 +1,76 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column3";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column4";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column5";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column6";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %true;
+ %true;
+ %true;
+ %true;
+ %true;
+ %true;
+ %false
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted b/yql/essentials/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted
new file mode 100644
index 00000000000..2441849448b
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted
@@ -0,0 +1,14 @@
+<tmp_path>/program.sql:<main>: Error: Type annotation
+
+ <tmp_path>/program.sql:<main>:8:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject, At function: SqlProjectItem
+ select $regexp("abc");
+ ^
+ <tmp_path>/program.sql:<main>:8:8: Error: At function: Apply
+ select $regexp("abc");
+ ^
+ <tmp_path>/program.sql:<main>:4:16: Error: At function: Udf, At Re2.Capture
+ $regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)");
+ ^
+ <tmp_path>/program.sql:<main>:4:16: Error: Failed to find UDF function: Re2.Capture, reason: Error: Module: Re2, function: Capture, error: Regexp contains duplicate capturing group name: groupname1
+ $regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)");
+ ^ \ No newline at end of file
diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_MutableLambda_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_MutableLambda_/results.txt
new file mode 100644
index 00000000000..4e62b7d8ce4
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_MutableLambda_/results.txt
@@ -0,0 +1,52 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "x";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "e";
+ %false
+ ];
+ [
+ "aa";
+ %true
+ ];
+ [
+ "et";
+ %false
+ ];
+ [
+ "cb";
+ %false
+ ];
+ [
+ "ba";
+ %true
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_SkipGroup_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_SkipGroup_/results.txt
new file mode 100644
index 00000000000..466ed839507
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_SkipGroup_/results.txt
@@ -0,0 +1,105 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "capture";
+ [
+ "StructType";
+ [
+ [
+ "_0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "_1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "major";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "minor";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "no_groups";
+ [
+ "StructType";
+ [
+ [
+ "_0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/601.2.7 (KHTML, like Gecko) Version/9.0.1 Safari"
+ ];
+ [
+ "Safari"
+ ];
+ [
+ "5"
+ ];
+ [
+ "0"
+ ]
+ ];
+ [
+ [
+ "Intel Mac"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_Space_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_Space_/results.txt
new file mode 100644
index 00000000000..b62998b6977
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_Space_/results.txt
@@ -0,0 +1,28 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %true
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/re2/test/cases/BackslashInLike.sql b/yql/essentials/udfs/common/re2/test/cases/BackslashInLike.sql
new file mode 100644
index 00000000000..65973c7d812
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/cases/BackslashInLike.sql
@@ -0,0 +1 @@
+select 'utma' like @@%utm\_@@;
diff --git a/yql/essentials/udfs/common/re2/test/cases/Basic.in b/yql/essentials/udfs/common/re2/test/cases/Basic.in
new file mode 100644
index 00000000000..ba0028e8611
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/cases/Basic.in
@@ -0,0 +1,7 @@
+{"key"="1";"subkey"="1";"value"=""};
+{"key"="2";"subkey"="2";"value"="a"};
+{"key"="3";"subkey"="3";"value"="aax"};
+{"key"="4";"subkey"="4";"value"="xaax1"};
+{"key"="5";"subkey"="5";"value"="xaaxaaxaa"};
+{"key"="6";"subkey"="6";"value"="sup, dude"};
+{"key"="7";"subkey"="7";"value"="one, two, three."};
diff --git a/yql/essentials/udfs/common/re2/test/cases/Basic.sql b/yql/essentials/udfs/common/re2/test/cases/Basic.sql
new file mode 100644
index 00000000000..7d049f88b2f
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/cases/Basic.sql
@@ -0,0 +1,21 @@
+/* syntax version 1 */
+$match = Re2::Match("[ax]+\d");
+$grep = Re2Posix::Grep("a.*");
+$capture = Re2::Capture(".*(?P<foo>xa?)(a{2,}).*");
+$replace = Re2::Replace("x(a+)x");
+$count = Re2::Count("a");
+-- regex to find all tokens consisting of letters and digist
+-- L stands for "Letters", Nd stands for "Number, decimal digit",
+-- see https://en.wikipedia.org/wiki/Unicode_character_property#General_Category
+$find_and_consume = Re2::FindAndConsume('([\\pL\\p{Nd}]+)');
+
+SELECT
+ value,
+ $match(value) AS match,
+ $grep(value) AS grep,
+ $capture(value) AS capture,
+ $capture(value)._1 AS capture_member,
+ $replace(value, "b\\1z") AS replace,
+ $count(value) AS count,
+ $find_and_consume(value) AS tokens
+FROM Input;
diff --git a/yql/essentials/udfs/common/re2/test/cases/BasicOptions.in b/yql/essentials/udfs/common/re2/test/cases/BasicOptions.in
new file mode 100644
index 00000000000..f63986dffef
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/cases/BasicOptions.in
@@ -0,0 +1,8 @@
+{"key"="1";"subkey"="1";"value"=""};
+{"key"="2";"subkey"="2";"value"="a"};
+{"key"="3";"subkey"="3";"value"="aax"};
+{"key"="4";"subkey"="4";"value"="xaax1"};
+{"key"="5";"subkey"="5";"value"="xaaxaaxaa"};
+{"key"="6";"subkey"="6";"value"="sup, dude"};
+{"key"="7";"subkey"="7";"value"="one, two, three."};
+{"key"="7";"subkey"="7";"value"="привет это русские, буквы111!"};
diff --git a/yql/essentials/udfs/common/re2/test/cases/BasicOptions.sql b/yql/essentials/udfs/common/re2/test/cases/BasicOptions.sql
new file mode 100644
index 00000000000..a4338c03e18
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/cases/BasicOptions.sql
@@ -0,0 +1,22 @@
+/* syntax version 1 */
+$options = Re2::Options(true as Utf8);
+$match = Re2::Match("[ax]+\d",$options);
+$grep = Re2Posix::Grep("a.*",$options);
+$capture = Re2::Capture(".*(?P<foo>xa?)(a{2,}).*",$options);
+$replace = Re2::Replace("x(a+)x",$options);
+$count = Re2::Count("a",$options);
+-- regex to find all tokens consisting of letters and digist
+-- L stands for "Letters", Nd stands for "Number, decimal digit",
+-- see https://en.wikipedia.org/wiki/Unicode_character_property#General_Category
+$find_and_consume = Re2::FindAndConsume('([\\pL\\p{Nd}]+)',$options);
+
+SELECT
+ value,
+ $match(value) AS match,
+ $grep(value) AS grep,
+ $capture(value) AS capture,
+ $capture(value)._1 AS capture_member,
+ $replace(value, "b\\1z") AS replace,
+ $count(value) AS count,
+ $find_and_consume(value) AS tokens
+FROM Input;
diff --git a/yql/essentials/udfs/common/re2/test/cases/DefOptions.sql b/yql/essentials/udfs/common/re2/test/cases/DefOptions.sql
new file mode 100644
index 00000000000..d21a7108a82
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/cases/DefOptions.sql
@@ -0,0 +1,19 @@
+/* syntax version 1 */
+select Re2::Options(
+ true as `Utf8`,
+ false as PosixSyntax,
+ false as LongestMatch,
+ true as LogErrors,
+ 8<<20 as MaxMem,
+ false as Literal,
+ false as NeverNl,
+ false as DotNl,
+ false as NeverCapture,
+ true as CaseSensitive,
+ false as PerlClasses,
+ false as WordBoundary,
+ false as OneLine
+);
+
+select Re2::Options(
+);
diff --git a/yql/essentials/udfs/common/re2/test/cases/FindAndConsumeEmpty.sql b/yql/essentials/udfs/common/re2/test/cases/FindAndConsumeEmpty.sql
new file mode 100644
index 00000000000..e40ad0deb98
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/cases/FindAndConsumeEmpty.sql
@@ -0,0 +1,6 @@
+/* syntax version 1 */
+$regexp1 = Re2::FindAndConsume("(a*)");
+$regexp2 = Re2::FindAndConsume("a(b*)");
+
+SELECT $regexp1("abaa");
+SELECT $regexp2("a"); \ No newline at end of file
diff --git a/yql/essentials/udfs/common/re2/test/cases/LikeEscape.sql b/yql/essentials/udfs/common/re2/test/cases/LikeEscape.sql
new file mode 100644
index 00000000000..ca488640767
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/cases/LikeEscape.sql
@@ -0,0 +1,7 @@
+SELECT '?' LIKE '%??%' ESCAPE '?',
+ 'x_' LIKE '%xxx_' ESCAPE 'x',
+ '[' LIKE '[' ESCAPE '!',
+ '.' LIKE '..' ESCAPE '.',
+ '[' LIKE '[[' ESCAPE '[',
+ 'a%b' LIKE '.a.%.b' ESCAPE '.',
+ 'x' LIKE '..' ESCAPE '.'; \ No newline at end of file
diff --git a/yql/essentials/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg b/yql/essentials/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg
new file mode 100644
index 00000000000..eb2e5315d1e
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg
@@ -0,0 +1 @@
+xfail \ No newline at end of file
diff --git a/yql/essentials/udfs/common/re2/test/cases/MultipleCaptureGroups.sql b/yql/essentials/udfs/common/re2/test/cases/MultipleCaptureGroups.sql
new file mode 100644
index 00000000000..49e0da34fd2
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/cases/MultipleCaptureGroups.sql
@@ -0,0 +1,4 @@
+/* syntax version 1 */
+$regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)");
+
+select $regexp("abc"); \ No newline at end of file
diff --git a/yql/essentials/udfs/common/re2/test/cases/MutableLambda.in b/yql/essentials/udfs/common/re2/test/cases/MutableLambda.in
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/cases/MutableLambda.in
diff --git a/yql/essentials/udfs/common/re2/test/cases/MutableLambda.sql b/yql/essentials/udfs/common/re2/test/cases/MutableLambda.sql
new file mode 100644
index 00000000000..5e3f24be0b2
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/cases/MutableLambda.sql
@@ -0,0 +1,24 @@
+/* syntax version 1 */
+$regs = AsList("^a","^b");
+
+$input = AsList("e","aa","et","cb","ba");
+
+$table_input = (select * from (select $input as x) flatten by x);
+
+$compiled_regs = ListMap($regs, ($r)->{
+ return Re2::Grep($r);
+});
+
+$f = ($s) -> {
+ $apply_list = ListMap($compiled_regs, ($cr)->{
+ return $cr($s);
+ });
+
+ $filtered = ListFilter($apply_list, ($m)->{
+ return $m;
+ });
+
+ return ListLength(ListTake($filtered,1)) > 0;
+};
+
+select x, $f(x) from $table_input;
diff --git a/yql/essentials/udfs/common/re2/test/cases/SkipGroup.sql b/yql/essentials/udfs/common/re2/test/cases/SkipGroup.sql
new file mode 100644
index 00000000000..5231c72fcbf
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/cases/SkipGroup.sql
@@ -0,0 +1,10 @@
+/* syntax version 1 */
+$input = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/601.2.7 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.7";
+$capture = Re2::Capture(
+ "(?:Mozilla|Opera)/(?P<major>\\d)\\.(?P<minor>\\d).*(Safari)"
+);
+$no_groups = Re2::Capture("(?:Intel) Mac");
+
+SELECT
+ $capture($input) AS capture,
+ $no_groups($input) AS no_groups;
diff --git a/yql/essentials/udfs/common/re2/test/cases/Space.sql b/yql/essentials/udfs/common/re2/test/cases/Space.sql
new file mode 100644
index 00000000000..34f0590ca1f
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/cases/Space.sql
@@ -0,0 +1,2 @@
+/* syntax version 1 */
+SELECT 'a b c' LIKE 'a b%';
diff --git a/yql/essentials/udfs/common/re2/test/ya.make b/yql/essentials/udfs/common/re2/test/ya.make
new file mode 100644
index 00000000000..179b2ca19bf
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/re2)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/re2/ya.make b/yql/essentials/udfs/common/re2/ya.make
new file mode 100644
index 00000000000..426916222c8
--- /dev/null
+++ b/yql/essentials/udfs/common/re2/ya.make
@@ -0,0 +1,30 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+ FROM_SANDBOX(FILE 7319903255 OUT_NOAUTO libre2_udf.so
+ )
+ END()
+ELSE ()
+YQL_UDF_CONTRIB(re2_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+ )
+
+ SRCS(
+ re2_udf.cpp
+ )
+
+ PEERDIR(
+ contrib/libs/re2
+ library/cpp/deprecated/enum_codegen
+ )
+
+ END()
+ENDIF ()
+
+
+RECURSE_FOR_TESTS(
+ test
+)
diff --git a/yql/essentials/udfs/common/set/set_udf.cpp b/yql/essentials/udfs/common/set/set_udf.cpp
new file mode 100644
index 00000000000..4a9d050a31a
--- /dev/null
+++ b/yql/essentials/udfs/common/set/set_udf.cpp
@@ -0,0 +1,576 @@
+#include <yql/essentials/public/udf/udf_type_ops.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+#include <unordered_set>
+
+using namespace NKikimr;
+using namespace NUdf;
+
+namespace {
+
+template <typename THash, typename TEquals>
+class TSetBase {
+private:
+ std::unordered_set<TUnboxedValue, THash, TEquals, TUnboxedValue::TAllocator> Set;
+ ui32 MaxSize = 0;
+ bool WasChanged = false;
+
+protected:
+ TSetBase(THash hash, TEquals equals)
+ : Set(1, hash, equals)
+ {}
+
+ void Init(const TUnboxedValuePod& value, ui32 maxSize) {
+ MaxSize = maxSize ? maxSize : std::numeric_limits<ui32>::max();
+ AddValue(value);
+ }
+
+ void Merge(const TSetBase& left, const TSetBase& right) {
+ MaxSize = std::max(left.MaxSize, right.MaxSize);
+ for (const auto& item : left.Set) {
+ AddValue(item);
+ }
+ for (const auto& item : right.Set) {
+ AddValue(item);
+ }
+ }
+
+ void Deserialize(const TUnboxedValuePod& serialized) {
+ MaxSize = serialized.GetElement(0).Get<ui32>();
+ auto list = serialized.GetElement(1);
+
+ const auto listIter = list.GetListIterator();
+ for (TUnboxedValue current; listIter.Next(current);) {
+ AddValue(current);
+ }
+ }
+
+public:
+ void ResetChanged() {
+ WasChanged = false;
+ }
+
+ bool Changed() const {
+ return WasChanged;
+ }
+
+ TUnboxedValue Serialize(const IValueBuilder* builder) {
+ TUnboxedValue* values = nullptr;
+ auto list = builder->NewArray(Set.size(), values);
+
+ for (const auto& item : Set) {
+ *values++ = item;
+ }
+
+ TUnboxedValue* items = nullptr;
+ auto result = builder->NewArray(2U, items);
+ items[0] = TUnboxedValuePod(MaxSize);
+ items[1] = list;
+
+ return result;
+ }
+
+ TUnboxedValue GetResult(const IValueBuilder* builder) {
+ TUnboxedValue* values = nullptr;
+ auto result = builder->NewArray(Set.size(), values);
+
+ for (const auto& item : Set) {
+ *values++ = item;
+ }
+ return result;
+ }
+
+ void AddValue(const TUnboxedValuePod& value) {
+ if (Set.size() < MaxSize) {
+ WasChanged = Set.insert(TUnboxedValuePod(value)).second;
+ }
+ }
+};
+
+template <EDataSlot Slot>
+class TSetData
+ : public TSetBase<TUnboxedValueHash<Slot>, TUnboxedValueEquals<Slot>>
+{
+public:
+ using TBase = TSetBase<TUnboxedValueHash<Slot>, TUnboxedValueEquals<Slot>>;
+
+ TSetData(const TUnboxedValuePod& value, ui32 maxSize)
+ : TBase(TUnboxedValueHash<Slot>(), TUnboxedValueEquals<Slot>())
+ {
+ TBase::Init(value, maxSize);
+ }
+
+ TSetData(const TSetData& left, const TSetData& right)
+ : TBase(TUnboxedValueHash<Slot>(), TUnboxedValueEquals<Slot>())
+ {
+ TBase::Merge(left, right);
+ }
+
+ explicit TSetData(const TUnboxedValuePod& serialized)
+ : TBase(TUnboxedValueHash<Slot>(), TUnboxedValueEquals<Slot>())
+ {
+ TBase::Deserialize(serialized);
+ }
+};
+
+struct TGenericHash {
+ IHash::TPtr Hash;
+
+ std::size_t operator()(const TUnboxedValuePod& value) const {
+ return Hash->Hash(value);
+ }
+};
+
+struct TGenericEquals {
+ IEquate::TPtr Equate;
+
+ bool operator()(const TUnboxedValuePod& left, const TUnboxedValuePod& right) const {
+ return Equate->Equals(left, right);
+ }
+};
+
+class TSetGeneric
+ : public TSetBase<TGenericHash, TGenericEquals>
+{
+public:
+ using TBase = TSetBase<TGenericHash, TGenericEquals>;
+
+ TSetGeneric(const TUnboxedValuePod& value, ui32 maxSize,
+ IHash::TPtr hash, IEquate::TPtr equate)
+ : TBase(TGenericHash{hash}, TGenericEquals{equate})
+ {
+ TBase::Init(value, maxSize);
+ }
+
+ TSetGeneric(const TSetGeneric& left, const TSetGeneric& right,
+ IHash::TPtr hash, IEquate::TPtr equate)
+ : TBase(TGenericHash{hash}, TGenericEquals{equate})
+ {
+ TBase::Merge(left, right);
+ }
+
+ TSetGeneric(const TUnboxedValuePod& serialized,
+ IHash::TPtr hash, IEquate::TPtr equate)
+ : TBase(TGenericHash{hash}, TGenericEquals{equate})
+ {
+ TBase::Deserialize(serialized);
+ }
+};
+
+extern const char SetResourceNameGeneric[] = "Set.SetResource.Generic";
+class TSetResource:
+ public TBoxedResource<TSetGeneric, SetResourceNameGeneric>
+{
+public:
+ template <typename... Args>
+ inline TSetResource(Args&&... args)
+ : TBoxedResource(std::forward<Args>(args)...)
+ {}
+};
+
+template <EDataSlot Slot>
+class TSetResourceData;
+
+template <EDataSlot Slot>
+TSetResourceData<Slot>* GetSetResourceData(const TUnboxedValuePod& arg) {
+ TSetResourceData<Slot>::Validate(arg);
+ return static_cast<TSetResourceData<Slot>*>(arg.AsBoxed().Get());
+}
+
+TSetResource* GetSetResource(const TUnboxedValuePod& arg) {
+ TSetResource::Validate(arg);
+ return static_cast<TSetResource*>(arg.AsBoxed().Get());
+}
+
+
+template <EDataSlot Slot>
+class TSetCreateData: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ return TUnboxedValuePod(new TSetResourceData<Slot>(args[0], args[1].Get<ui32>()));
+ }
+};
+
+class TSetCreate: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ return TUnboxedValuePod(new TSetResource(args[0], args[1].Get<ui32>(), Hash_, Equate_));
+ }
+
+public:
+ TSetCreate(IHash::TPtr hash, IEquate::TPtr equate)
+ : Hash_(hash)
+ , Equate_(equate)
+ {}
+
+private:
+ IHash::TPtr Hash_;
+ IEquate::TPtr Equate_;
+};
+
+template <EDataSlot Slot>
+class TSetAddValueData: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ auto resource = GetSetResourceData<Slot>(args[0]);
+ resource->Get()->ResetChanged();
+ resource->Get()->AddValue(args[1]);
+ return TUnboxedValuePod(resource);
+ }
+};
+
+class TSetAddValue: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ auto resource = GetSetResource(args[0]);
+ resource->Get()->ResetChanged();
+ resource->Get()->AddValue(args[1]);
+ return TUnboxedValuePod(resource);
+ }
+};
+
+template <EDataSlot Slot>
+class TSetWasChangedData: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ auto resource = GetSetResourceData<Slot>(args[0]);
+ return TUnboxedValuePod(resource->Get()->Changed());
+ }
+};
+
+class TSetWasChanged: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ auto resource = GetSetResource(args[0]);
+ return TUnboxedValuePod(resource->Get()->Changed());
+ }
+};
+
+template <EDataSlot Slot>
+class TSetSerializeData: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override {
+ return GetSetResourceData<Slot>(args[0])->Get()->Serialize(valueBuilder);
+ }
+};
+
+class TSetSerialize: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override {
+ return GetSetResource(args[0])->Get()->Serialize(valueBuilder);
+ }
+};
+
+template <EDataSlot Slot>
+class TSetDeserializeData: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ return TUnboxedValuePod(new TSetResourceData<Slot>(args[0]));
+ }
+};
+
+class TSetDeserialize: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ return TUnboxedValuePod(new TSetResource(args[0], Hash_, Equate_));
+ }
+
+public:
+ TSetDeserialize(IHash::TPtr hash, IEquate::TPtr equate)
+ : Hash_(hash)
+ , Equate_(equate)
+ {}
+
+private:
+ IHash::TPtr Hash_;
+ IEquate::TPtr Equate_;
+};
+
+template <EDataSlot Slot>
+class TSetMergeData: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ auto left = GetSetResourceData<Slot>(args[0]);
+ auto right = GetSetResourceData<Slot>(args[1]);
+ return TUnboxedValuePod(new TSetResourceData<Slot>(*left->Get(), *right->Get()));
+ }
+};
+
+class TSetMerge: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ auto left = GetSetResource(args[0]);
+ auto right = GetSetResource(args[1]);
+ return TUnboxedValuePod(new TSetResource(*left->Get(), *right->Get(), Hash_, Equate_));
+ }
+
+public:
+ TSetMerge(IHash::TPtr hash, IEquate::TPtr equate)
+ : Hash_(hash)
+ , Equate_(equate)
+ {}
+
+private:
+ IHash::TPtr Hash_;
+ IEquate::TPtr Equate_;
+};
+
+template <EDataSlot Slot>
+class TSetGetResultData: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override {
+ return GetSetResourceData<Slot>(args[0])->Get()->GetResult(valueBuilder);
+ }
+};
+
+class TSetGetResult: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override {
+ return GetSetResource(args[0])->Get()->GetResult(valueBuilder);
+ }
+};
+
+
+#define MAKE_RESOURCE(slot, ...) \
+extern const char SetResourceName##slot[] = "Set.SetResource."#slot; \
+template <> \
+class TSetResourceData<EDataSlot::slot>: \
+ public TBoxedResource<TSetData<EDataSlot::slot>, SetResourceName##slot> \
+{ \
+public: \
+ template <typename... Args> \
+ inline TSetResourceData(Args&&... args) \
+ : TBoxedResource(std::forward<Args>(args)...) \
+ {} \
+};
+
+UDF_TYPE_ID_MAP(MAKE_RESOURCE)
+
+#define MAKE_IMPL(operation, slot) \
+case EDataSlot::slot: \
+ builder.Implementation(new operation<EDataSlot::slot>); \
+ break;
+
+#define MAKE_CREATE(slot, ...) MAKE_IMPL(TSetCreateData, slot)
+#define MAKE_ADD_VALUE(slot, ...) MAKE_IMPL(TSetAddValueData, slot)
+#define MAKE_WAS_CHANGED(slot, ...) MAKE_IMPL(TSetWasChangedData, slot)
+#define MAKE_SERIALIZE(slot, ...) MAKE_IMPL(TSetSerializeData, slot)
+#define MAKE_DESERIALIZE(slot, ...) MAKE_IMPL(TSetDeserializeData, slot)
+#define MAKE_MERGE(slot, ...) MAKE_IMPL(TSetMergeData, slot)
+#define MAKE_GET_RESULT(slot, ...) MAKE_IMPL(TSetGetResultData, slot)
+
+#define MAKE_TYPE(slot, ...) \
+case EDataSlot::slot: \
+ setType = builder.Resource(SetResourceName##slot); \
+ break;
+
+
+static const auto CreateName = TStringRef::Of("Create");
+static const auto AddValueName = TStringRef::Of("AddValue");
+static const auto WasChangedName = TStringRef::Of("WasChanged"); // must be used right after AddValue
+static const auto SerializeName = TStringRef::Of("Serialize");
+static const auto DeserializeName = TStringRef::Of("Deserialize");
+static const auto MergeName = TStringRef::Of("Merge");
+static const auto GetResultName = TStringRef::Of("GetResult");
+
+class TSetModule: public IUdfModule {
+public:
+ TStringRef Name() const {
+ return TStringRef::Of("Set");
+ }
+
+ void CleanupOnTerminate() const final {
+ }
+
+ void GetAllFunctions(IFunctionsSink& sink) const final {
+ sink.Add(CreateName)->SetTypeAwareness();
+ sink.Add(AddValueName)->SetTypeAwareness();
+ sink.Add(WasChangedName)->SetTypeAwareness();
+ sink.Add(SerializeName)->SetTypeAwareness();
+ sink.Add(DeserializeName)->SetTypeAwareness();
+ sink.Add(MergeName)->SetTypeAwareness();
+ sink.Add(GetResultName)->SetTypeAwareness();
+ }
+
+ void BuildFunctionTypeInfo(
+ const TStringRef& name,
+ TType* userType,
+ const TStringRef& typeConfig,
+ ui32 flags,
+ IFunctionTypeInfoBuilder& builder) const final
+ {
+ Y_UNUSED(typeConfig);
+
+ try {
+ const bool typesOnly = (flags & TFlags::TypesOnly);
+ builder.UserType(userType);
+
+ auto typeHelper = builder.TypeInfoHelper();
+
+ auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType);
+ if (!userTypeInspector || userTypeInspector.GetElementsCount() != 3) {
+ builder.SetError("User type is not a 3-tuple");
+ return;
+ }
+
+ bool isGeneric = false;
+ IHash::TPtr hash;
+ IEquate::TPtr equate;
+ TMaybe<EDataSlot> slot;
+
+ auto valueType = userTypeInspector.GetElementType(2);
+ auto valueTypeInspector = TDataTypeInspector(*typeHelper, valueType);
+ if (!valueTypeInspector) {
+ isGeneric = true;
+ hash = builder.MakeHash(valueType);
+ equate = builder.MakeEquate(valueType);
+ if (!hash || !equate) {
+ return;
+ }
+ } else {
+ slot = FindDataSlot(valueTypeInspector.GetTypeId());
+ if (!slot) {
+ builder.SetError("Unknown data type");
+ return;
+ }
+ const auto& info = NUdf::GetDataTypeInfo(*slot);
+ const auto& features = info.Features;
+ if (!(features & NUdf::CanHash) || !(features & NUdf::CanEquate)) {
+ builder.SetError(TStringBuilder() << "Type " << info.Name << " is not hashable or equatable");
+ return;
+ }
+ }
+
+ auto serializedListType = builder.List()->Item(valueType).Build();
+ auto serializedType = builder.Tuple()->Add<ui32>().Add(serializedListType).Build();
+
+ TType* setType = nullptr;
+ if (isGeneric) {
+ setType = builder.Resource(SetResourceNameGeneric);
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_TYPE)
+ }
+ }
+
+ if (name == CreateName) {
+ builder.IsStrict();
+
+ builder.Args()->Add(valueType).Add<ui32>().Done().Returns(setType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ builder.Implementation(new TSetCreate(hash, equate));
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_CREATE)
+ }
+ }
+ }
+ }
+
+ if (name == AddValueName) {
+ builder.IsStrict();
+
+ builder.Args()->Add(setType).Add(valueType).Done().Returns(setType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ builder.Implementation(new TSetAddValue);
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_ADD_VALUE)
+ }
+ }
+ }
+ }
+
+ if (name == WasChangedName) {
+ builder.IsStrict();
+
+ builder.Args()->Add(setType).Done().Returns<bool>();
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ builder.Implementation(new TSetWasChanged);
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_WAS_CHANGED)
+ }
+ }
+ }
+ }
+
+ if (name == MergeName) {
+ builder.IsStrict();
+
+ builder.Args()->Add(setType).Add(setType).Done().Returns(setType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ builder.Implementation(new TSetMerge(hash, equate));
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_MERGE)
+ }
+ }
+ }
+ }
+
+ if (name == SerializeName) {
+ builder.IsStrict();
+
+ builder.Args()->Add(setType).Done().Returns(serializedType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ builder.Implementation(new TSetSerialize);
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_SERIALIZE)
+ }
+ }
+ }
+ }
+
+ if (name == DeserializeName) {
+ builder.Args()->Add(serializedType).Done().Returns(setType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ builder.Implementation(new TSetDeserialize(hash, equate));
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_DESERIALIZE)
+ }
+ }
+ }
+ }
+
+ if (name == GetResultName) {
+ auto resultType = builder.List()->Item(valueType).Build();
+
+ builder.IsStrict();
+
+ builder.Args()->Add(setType).Done().Returns(resultType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ builder.Implementation(new TSetGetResult);
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_GET_RESULT)
+ }
+ }
+ }
+ }
+
+ } catch (const std::exception& e) {
+ builder.SetError(CurrentExceptionMessage());
+ }
+ }
+};
+
+} // namespace
+
+REGISTER_MODULES(TSetModule)
diff --git a/yql/essentials/udfs/common/set/test/canondata/result.json b/yql/essentials/udfs/common/set/test/canondata/result.json
new file mode 100644
index 00000000000..a235fbf9029
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/canondata/result.json
@@ -0,0 +1,47 @@
+{
+ "test.test[ListDistinctDictInDict]": [
+ {
+ "uri": "file://test.test_ListDistinctDictInDict_/results.txt"
+ }
+ ],
+ "test.test[ListDistinctDict]": [
+ {
+ "uri": "file://test.test_ListDistinctDict_/results.txt"
+ }
+ ],
+ "test.test[ListDistinctLazyList]": [
+ {
+ "uri": "file://test.test_ListDistinctLazyList_/results.txt"
+ }
+ ],
+ "test.test[ListDistinctLimit]": [
+ {
+ "uri": "file://test.test_ListDistinctLimit_/results.txt"
+ }
+ ],
+ "test.test[ListDistinctSingular]": [
+ {
+ "uri": "file://test.test_ListDistinctSingular_/results.txt"
+ }
+ ],
+ "test.test[ListDistinctStructInDict]": [
+ {
+ "uri": "file://test.test_ListDistinctStructInDict_/results.txt"
+ }
+ ],
+ "test.test[ListDistinctTuple]": [
+ {
+ "uri": "file://test.test_ListDistinctTuple_/results.txt"
+ }
+ ],
+ "test.test[ListDistinctVariant]": [
+ {
+ "uri": "file://test.test_ListDistinctVariant_/results.txt"
+ }
+ ],
+ "test.test[ListDistinct]": [
+ {
+ "uri": "file://test.test_ListDistinct_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctDictInDict_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctDictInDict_/results.txt
new file mode 100644
index 00000000000..09714369716
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctDictInDict_/results.txt
@@ -0,0 +1,79 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "DictType";
+ [
+ "DictType";
+ [
+ "DataType";
+ "Int32"
+ ];
+ [
+ "VoidType"
+ ]
+ ];
+ [
+ "VoidType"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ [
+ [
+ [
+ "1";
+ "Void"
+ ]
+ ];
+ "Void"
+ ]
+ ];
+ [
+ [
+ [
+ [
+ "1";
+ "Void"
+ ]
+ ];
+ "Void"
+ ];
+ [
+ [
+ [
+ "2";
+ "Void"
+ ];
+ [
+ "1";
+ "Void"
+ ]
+ ];
+ "Void"
+ ]
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctDict_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctDict_/results.txt
new file mode 100644
index 00000000000..c2f9c4e101f
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctDict_/results.txt
@@ -0,0 +1,103 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "DictType";
+ [
+ "DataType";
+ "Int32"
+ ];
+ [
+ "VoidType"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ [
+ "1";
+ "Void"
+ ]
+ ];
+ [
+ [
+ "2";
+ "Void"
+ ];
+ [
+ "1";
+ "Void"
+ ]
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "DictType";
+ [
+ "DataType";
+ "Int32"
+ ];
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ [
+ "1";
+ "3"
+ ]
+ ];
+ [
+ [
+ "1";
+ "2"
+ ]
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctLazyList_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctLazyList_/results.txt
new file mode 100644
index 00000000000..3749a4a3b75
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctLazyList_/results.txt
@@ -0,0 +1,85 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ "1";
+ "2";
+ "3"
+ ];
+ [
+ "1";
+ "2"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ "1";
+ "3"
+ ];
+ [
+ "1";
+ "2"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctLimit_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctLimit_/results.txt
new file mode 100644
index 00000000000..dbcfe2be9dc
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctLimit_/results.txt
@@ -0,0 +1,57 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ [
+ "a";
+ "c";
+ "b"
+ ]
+ ];
+ [
+ "2";
+ [
+ "x";
+ "y";
+ "u"
+ ]
+ ];
+ [
+ "3";
+ [
+ "m"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctSingular_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctSingular_/results.txt
new file mode 100644
index 00000000000..7fb0c4f89b1
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctSingular_/results.txt
@@ -0,0 +1,115 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "EmptyListType"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ []
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "VoidType"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ []
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "EmptyListType"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ []
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "EmptyDictType"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ []
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctStructInDict_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctStructInDict_/results.txt
new file mode 100644
index 00000000000..09933a72e3d
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctStructInDict_/results.txt
@@ -0,0 +1,71 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "DictType";
+ [
+ "StructType";
+ [
+ [
+ "a";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "b";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ];
+ [
+ "VoidType"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ [
+ [
+ "1";
+ "3"
+ ];
+ "Void"
+ ]
+ ];
+ [
+ [
+ [
+ "1";
+ "2"
+ ];
+ "Void"
+ ]
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctTuple_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctTuple_/results.txt
new file mode 100644
index 00000000000..510244f4301
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctTuple_/results.txt
@@ -0,0 +1,115 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ [
+ [
+ "1";
+ "a"
+ ];
+ [
+ "1";
+ "e"
+ ];
+ [
+ "1";
+ "b"
+ ];
+ [
+ "1";
+ "c"
+ ];
+ [
+ "2";
+ "a"
+ ]
+ ]
+ ];
+ [
+ "2";
+ [
+ [
+ "2";
+ "x"
+ ];
+ [
+ "3";
+ "y"
+ ];
+ [
+ "4";
+ "x"
+ ];
+ [
+ "5";
+ "u"
+ ];
+ [
+ "6";
+ "v"
+ ];
+ [
+ "8";
+ "x"
+ ];
+ [
+ "7";
+ "w"
+ ];
+ [
+ "9";
+ "w"
+ ]
+ ]
+ ];
+ [
+ "3";
+ [
+ [
+ "0";
+ "m"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctVariant_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctVariant_/results.txt
new file mode 100644
index 00000000000..e0ce566a468
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctVariant_/results.txt
@@ -0,0 +1,108 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Int32"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ "1";
+ "str"
+ ];
+ [
+ "0";
+ "1"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "VariantType";
+ [
+ "StructType";
+ [
+ [
+ "x";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "y";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ "1";
+ "str"
+ ];
+ [
+ "0";
+ "1"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinct_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinct_/results.txt
new file mode 100644
index 00000000000..937160a4571
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinct_/results.txt
@@ -0,0 +1,60 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ [
+ "a";
+ "c";
+ "b";
+ "e"
+ ]
+ ];
+ [
+ "2";
+ [
+ "x";
+ "y";
+ "v";
+ "u";
+ "w"
+ ]
+ ];
+ [
+ "3";
+ [
+ "m"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinct.in b/yql/essentials/udfs/common/set/test/cases/ListDistinct.in
new file mode 100644
index 00000000000..af51412a1e3
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinct.in
@@ -0,0 +1,15 @@
+{"key"="1";"subkey"="";"value"="a"};
+{"key"="1";"subkey"="";"value"="b"};
+{"key"="1";"subkey"="";"value"="c"};
+{"key"="1";"subkey"="";"value"="a"};
+{"key"="1";"subkey"="";"value"="e"};
+{"key"="1";"subkey"="";"value"="b"};
+{"key"="2";"subkey"="";"value"="x"};
+{"key"="2";"subkey"="";"value"="y"};
+{"key"="2";"subkey"="";"value"="x"};
+{"key"="2";"subkey"="";"value"="u"};
+{"key"="2";"subkey"="";"value"="v"};
+{"key"="2";"subkey"="";"value"="w"};
+{"key"="2";"subkey"="";"value"="x"};
+{"key"="2";"subkey"="";"value"="w"};
+{"key"="3";"subkey"="";"value"="m"};
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinct.in.attr b/yql/essentials/udfs/common/set/test/cases/ListDistinct.in.attr
new file mode 100644
index 00000000000..2a151e9c475
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinct.in.attr
@@ -0,0 +1,30 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinct.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinct.sql
new file mode 100644
index 00000000000..b13b6c9c715
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinct.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+USE plato;
+
+SELECT
+ key,
+ AGGREGATE_LIST_DISTINCT(value)
+FROM Input
+GROUP BY key
+ORDER BY key
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctDict.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctDict.sql
new file mode 100644
index 00000000000..2bee58b66e9
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctDict.sql
@@ -0,0 +1,8 @@
+/* syntax version 1 */
+select AGGREGATE_LIST_DISTINCT(x) from
+(select [{1,2},{1},{1,2}] as x)
+flatten list by x;
+
+select AGGREGATE_LIST_DISTINCT(x) from
+(select [{1:2},{1:3},{1:2}] as x)
+flatten list by x;
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctDictInDict.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctDictInDict.sql
new file mode 100644
index 00000000000..775b8dc797a
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctDictInDict.sql
@@ -0,0 +1,4 @@
+/* syntax version 1 */
+select AGGREGATE_LIST_DISTINCT(x) from
+(select [{{1,2},{1}},{{1}},{{1,2},{1}}] as x)
+flatten list by x;
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctLazyList.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctLazyList.sql
new file mode 100644
index 00000000000..fcaf49e69d6
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctLazyList.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+select AGGREGATE_LIST_DISTINCT(x) from
+(select [[1,2],[1,2,3],YQL::LazyList([1,2])] as x)
+flatten list by x;
+
+select AGGREGATE_LIST_DISTINCT(x) from
+(select [YQL::LazyList([1,2]),[1,3], YQL::LazyList([1,2])] as x)
+flatten list by x;
+
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.in b/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.in
new file mode 100644
index 00000000000..af51412a1e3
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.in
@@ -0,0 +1,15 @@
+{"key"="1";"subkey"="";"value"="a"};
+{"key"="1";"subkey"="";"value"="b"};
+{"key"="1";"subkey"="";"value"="c"};
+{"key"="1";"subkey"="";"value"="a"};
+{"key"="1";"subkey"="";"value"="e"};
+{"key"="1";"subkey"="";"value"="b"};
+{"key"="2";"subkey"="";"value"="x"};
+{"key"="2";"subkey"="";"value"="y"};
+{"key"="2";"subkey"="";"value"="x"};
+{"key"="2";"subkey"="";"value"="u"};
+{"key"="2";"subkey"="";"value"="v"};
+{"key"="2";"subkey"="";"value"="w"};
+{"key"="2";"subkey"="";"value"="x"};
+{"key"="2";"subkey"="";"value"="w"};
+{"key"="3";"subkey"="";"value"="m"};
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.in.attr b/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.in.attr
new file mode 100644
index 00000000000..2a151e9c475
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.in.attr
@@ -0,0 +1,30 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.sql
new file mode 100644
index 00000000000..439f05776ee
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+USE plato;
+
+SELECT
+ key,
+ AGGREGATE_LIST_DISTINCT(value, 3)
+FROM Input
+GROUP BY key
+ORDER BY key
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctSingular.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctSingular.sql
new file mode 100644
index 00000000000..1887ec61804
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctSingular.sql
@@ -0,0 +1,16 @@
+/* syntax version 1 */
+select AGGREGATE_LIST_DISTINCT(x) from
+(select [null, null] as x)
+flatten list by x;
+
+select AGGREGATE_LIST_DISTINCT(x) from
+(select [void(), void()] as x)
+flatten list by x;
+
+select AGGREGATE_LIST_DISTINCT(x) from
+(select [[], []] as x)
+flatten list by x;
+
+select AGGREGATE_LIST_DISTINCT(x) from
+(select [{}, {}] as x)
+flatten list by x;
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctStructInDict.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctStructInDict.sql
new file mode 100644
index 00000000000..88acaede014
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctStructInDict.sql
@@ -0,0 +1,4 @@
+/* syntax version 1 */
+select AGGREGATE_LIST_DISTINCT(x) from
+(select [{<|a:1,b:2|>},{<|a:1,b:3|>},{<|a:1,b:2|>}] as x)
+flatten list by x;
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.in b/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.in
new file mode 100644
index 00000000000..7c638071c18
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.in
@@ -0,0 +1,15 @@
+{"key"="1";"subkey"="1";"value"="a"};
+{"key"="1";"subkey"="1";"value"="b"};
+{"key"="1";"subkey"="1";"value"="c"};
+{"key"="1";"subkey"="2";"value"="a"};
+{"key"="1";"subkey"="1";"value"="e"};
+{"key"="1";"subkey"="1";"value"="b"};
+{"key"="2";"subkey"="2";"value"="x"};
+{"key"="2";"subkey"="3";"value"="y"};
+{"key"="2";"subkey"="4";"value"="x"};
+{"key"="2";"subkey"="5";"value"="u"};
+{"key"="2";"subkey"="6";"value"="v"};
+{"key"="2";"subkey"="7";"value"="w"};
+{"key"="2";"subkey"="8";"value"="x"};
+{"key"="2";"subkey"="9";"value"="w"};
+{"key"="3";"subkey"="0";"value"="m"};
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.in.attr b/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.in.attr
new file mode 100644
index 00000000000..2a151e9c475
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.in.attr
@@ -0,0 +1,30 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.sql
new file mode 100644
index 00000000000..8c3472bb858
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+USE plato;
+
+SELECT
+ key,
+ AGGREGATE_LIST_DISTINCT(AsTuple(subkey, value))
+FROM Input
+GROUP BY key
+ORDER BY key
diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctVariant.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctVariant.sql
new file mode 100644
index 00000000000..29469d01b1e
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctVariant.sql
@@ -0,0 +1,11 @@
+/* syntax version 1 */
+$vt1 = Variant<Int32,String>;
+select AGGREGATE_LIST_DISTINCT(x) from
+(select [Variant(1,"0",$vt1),Variant("str","1",$vt1),Variant(1,"0",$vt1)] as x)
+flatten list by x;
+
+$vt2 = Variant<x:Int32,y:String>;
+select AGGREGATE_LIST_DISTINCT(x) from
+(select [Variant(1,"x",$vt2),Variant("str","y",$vt2),Variant(1,"x",$vt2)] as x)
+flatten list by x;
+
diff --git a/yql/essentials/udfs/common/set/test/ya.make b/yql/essentials/udfs/common/set/test/ya.make
new file mode 100644
index 00000000000..e31115a00d8
--- /dev/null
+++ b/yql/essentials/udfs/common/set/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/set)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/set/ya.make b/yql/essentials/udfs/common/set/ya.make
new file mode 100644
index 00000000000..1303267409d
--- /dev/null
+++ b/yql/essentials/udfs/common/set/ya.make
@@ -0,0 +1,25 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+ FROM_SANDBOX(FILE 7319903801 OUT_NOAUTO libset_udf.so
+ )
+ END()
+ELSE ()
+YQL_UDF_CONTRIB(set_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+ )
+
+ SRCS(
+ set_udf.cpp
+ )
+
+ END()
+ENDIF ()
+
+
+RECURSE_FOR_TESTS(
+ test
+)
diff --git a/yql/essentials/udfs/common/stat/stat_udf.cpp b/yql/essentials/udfs/common/stat/stat_udf.cpp
new file mode 100644
index 00000000000..64c2bb4a698
--- /dev/null
+++ b/yql/essentials/udfs/common/stat/stat_udf.cpp
@@ -0,0 +1,3 @@
+#include "static/stat_udf.h"
+
+REGISTER_MODULES(TStatModule)
diff --git a/yql/essentials/udfs/common/stat/stat_udf_ut.cpp b/yql/essentials/udfs/common/stat/stat_udf_ut.cpp
new file mode 100644
index 00000000000..2a033ff31e4
--- /dev/null
+++ b/yql/essentials/udfs/common/stat/stat_udf_ut.cpp
@@ -0,0 +1,363 @@
+#include <library/cpp/testing/unittest/registar.h>
+#include <yql/essentials/minikql/mkql_function_registry.h>
+#include <yql/essentials/minikql/mkql_program_builder.h>
+#include <yql/essentials/minikql/computation/mkql_computation_node.h>
+#include <yql/essentials/minikql/comp_nodes/mkql_factories.h>
+#include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h>
+#include <util/random/random.h>
+#include <util/system/sanitizers.h>
+#include <array>
+
+namespace NYql {
+using namespace NKikimr::NMiniKQL;
+
+ namespace NUdf {
+ extern NUdf::TUniquePtr<NUdf::IUdfModule> CreateStatModule();
+ }
+
+ Y_UNIT_TEST_SUITE(TUDFStatTest) {
+ Y_UNIT_TEST(SimplePercentile) {
+ auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone();
+ auto randomProvider = CreateDeterministicRandomProvider(1);
+ auto timeProvider = CreateDeterministicTimeProvider(10000000);
+ NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule();
+ mutableFunctionRegistry->AddModule("", "Stat", std::move(module));
+ TScopedAlloc alloc(__LOCATION__);
+ TTypeEnvironment env(alloc);
+ TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry);
+ auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create");
+ auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue");
+ auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile");
+
+ TRuntimeNode pgmDigest;
+ {
+ auto param1 = pgmBuilder.NewDataLiteral<double>(0.0);
+ TVector<TRuntimeNode> params = {param1};
+ pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params);
+ }
+
+ for (int n = 1; n < 10; n += 1) {
+ auto param2 = pgmBuilder.NewDataLiteral((double)n);
+ TVector<TRuntimeNode> params = {pgmDigest, param2};
+ pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params);
+ }
+
+ TRuntimeNode pgmReturn;
+ {
+ auto param2 = pgmBuilder.NewDataLiteral<double>(0.9);
+ TVector<TRuntimeNode> params = {pgmDigest, param2};
+ pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params);
+ }
+
+ TExploringNodeVisitor explorer;
+ explorer.Walk(pgmReturn.GetNode(), env);
+ TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(),
+ NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi);
+ auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts);
+ auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider));
+ auto value = graph->GetValue();
+ UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.5, 0.001);
+ }
+
+ Y_UNIT_TEST(SimplePercentileSpecific) {
+ auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone();
+ auto randomProvider = CreateDeterministicRandomProvider(1);
+ auto timeProvider = CreateDeterministicTimeProvider(1);
+ NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule();
+ mutableFunctionRegistry->AddModule("", "Stat", std::move(module));
+ TScopedAlloc alloc(__LOCATION__);
+ TTypeEnvironment env(alloc);
+ TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry);
+ auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create");
+ auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue");
+ auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile");
+
+ TRuntimeNode pgmDigest;
+ {
+ auto param1 = pgmBuilder.NewDataLiteral<double>(75.0);
+ TVector<TRuntimeNode> params = {param1};
+ pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params);
+ }
+
+ TVector<double> vals = {800, 20, 150};
+ for (auto val : vals) {
+ auto param2 = pgmBuilder.NewDataLiteral(val);
+ TVector<TRuntimeNode> params = {pgmDigest, param2};
+ pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params);
+ }
+
+ TRuntimeNode pgmReturn;
+ {
+ auto param2 = pgmBuilder.NewDataLiteral<double>(0.5);
+ TVector<TRuntimeNode> params = {pgmDigest, param2};
+ pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params);
+ }
+
+ TExploringNodeVisitor explorer;
+ explorer.Walk(pgmReturn.GetNode(), env);
+ TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(),
+ NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi);
+ auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts);
+ auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider));
+ auto value = graph->GetValue();
+ Cerr << value.Get<double>() << Endl;
+ //~ UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 9.0, 0.001);
+ }
+
+ Y_UNIT_TEST(SerializedPercentile) {
+ auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone();
+ auto randomProvider = CreateDeterministicRandomProvider(1);
+ auto timeProvider = CreateDeterministicTimeProvider(1);
+ NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule();
+ mutableFunctionRegistry->AddModule("", "Stat", std::move(module));
+ TScopedAlloc alloc(__LOCATION__);
+ TTypeEnvironment env(alloc);
+ TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry);
+ auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create");
+ auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue");
+ auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile");
+ auto udfTDigest_Serialize = pgmBuilder.Udf("Stat.TDigest_Serialize");
+ auto udfTDigest_Deserialize = pgmBuilder.Udf("Stat.TDigest_Deserialize");
+
+ TRuntimeNode pgmDigest;
+ {
+ auto param1 = pgmBuilder.NewDataLiteral<double>(0.0);
+ TVector<TRuntimeNode> params = {param1};
+ pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params);
+ }
+
+ for (int n = 1; n < 10; n += 1) {
+ auto param2 = pgmBuilder.NewDataLiteral((double)n);
+ TVector<TRuntimeNode> params = {pgmDigest, param2};
+ pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params);
+ }
+
+ TRuntimeNode pgmSerializedData;
+ {
+ TVector<TRuntimeNode> params = {pgmDigest};
+ pgmSerializedData = pgmBuilder.Apply(udfTDigest_Serialize, params);
+ }
+
+ TRuntimeNode pgmDigest2;
+ {
+ TVector<TRuntimeNode> params = {pgmSerializedData};
+ pgmDigest2 = pgmBuilder.Apply(udfTDigest_Deserialize, params);
+ }
+
+ TRuntimeNode pgmReturn;
+ {
+ auto param2 = pgmBuilder.NewDataLiteral<double>(0.9);
+ TVector<TRuntimeNode> params = {pgmDigest2, param2};
+ pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params);
+ }
+
+ TExploringNodeVisitor explorer;
+ explorer.Walk(pgmReturn.GetNode(), env);
+ TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(),
+ NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi);
+ auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts);
+ auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider));
+ auto value = graph->GetValue();
+ UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.5, 0.001);
+ }
+
+ Y_UNIT_TEST(SerializedMergedPercentile) {
+ auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone();
+ auto randomProvider = CreateDeterministicRandomProvider(1);
+ auto timeProvider = CreateDeterministicTimeProvider(1);
+ NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule();
+ mutableFunctionRegistry->AddModule("", "Stat", std::move(module));
+ TScopedAlloc alloc(__LOCATION__);
+ TTypeEnvironment env(alloc);
+ TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry);
+ auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create");
+ auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue");
+ auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile");
+ auto udfTDigest_Serialize = pgmBuilder.Udf("Stat.TDigest_Serialize");
+ auto udfTDigest_Deserialize = pgmBuilder.Udf("Stat.TDigest_Deserialize");
+ auto udfTDigest_Merge = pgmBuilder.Udf("Stat.TDigest_Merge");
+
+ TVector<TRuntimeNode> pgmSerializedDataVector;
+
+ for (int i = 0; i < 100; i += 10) {
+ TRuntimeNode pgmDigest;
+ {
+ auto param1 = pgmBuilder.NewDataLiteral(double(i) / 10);
+ TVector<TRuntimeNode> params = {param1};
+ pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params);
+ }
+
+ for (int n = i + 1; n < i + 10; n += 1) {
+ auto param2 = pgmBuilder.NewDataLiteral(double(n) / 10);
+ TVector<TRuntimeNode> params = {pgmDigest, param2};
+ pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params);
+ }
+
+ TRuntimeNode pgmSerializedData;
+ {
+ TVector<TRuntimeNode> params = {pgmDigest};
+ pgmSerializedData = pgmBuilder.Apply(udfTDigest_Serialize, params);
+ }
+ pgmSerializedDataVector.push_back(pgmSerializedData);
+ }
+
+ TRuntimeNode pgmDigest;
+ for (size_t i = 0; i < pgmSerializedDataVector.size(); ++i) {
+ TRuntimeNode pgmDigest2;
+ {
+ TVector<TRuntimeNode> params = {pgmSerializedDataVector[i]};
+ pgmDigest2 = pgmBuilder.Apply(udfTDigest_Deserialize, params);
+ }
+ if (!pgmDigest) {
+ pgmDigest = pgmDigest2;
+ } else {
+ TVector<TRuntimeNode> params = {pgmDigest, pgmDigest2};
+ pgmDigest = pgmBuilder.Apply(udfTDigest_Merge, params);
+ }
+ }
+
+ TRuntimeNode pgmReturn;
+ {
+ auto param2 = pgmBuilder.NewDataLiteral<double>(0.9);
+ TVector<TRuntimeNode> params = {pgmDigest, param2};
+ pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params);
+ }
+
+ TExploringNodeVisitor explorer;
+ explorer.Walk(pgmReturn.GetNode(), env);
+ TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(),
+ NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi);
+ auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts);
+ auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider));
+ auto value = graph->GetValue();
+ UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.95, 0.001);
+ }
+
+ static double GetParetoRandomNumber(double a) {
+ return 1 / pow(RandomNumber<double>(), double(1) / a);
+ }
+
+ Y_UNIT_TEST(BigPercentile) {
+ auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone();
+ auto randomProvider = CreateDeterministicRandomProvider(1);
+ auto timeProvider = CreateDeterministicTimeProvider(1);
+ NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule();
+ mutableFunctionRegistry->AddModule("", "Stat", std::move(module));
+ TScopedAlloc alloc(__LOCATION__);
+ TTypeEnvironment env(alloc);
+ TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry);
+ auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create");
+ auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue");
+ auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile");
+ const size_t NUMBERS = 100000;
+ const double PERCENTILE = 0.99;
+ const double THRESHOLD = 0.0004; // at q=0.99 threshold is 4*delta*0.0099
+ TVector<double> randomNumbers1;
+ TVector<TRuntimeNode> randomNumbers2;
+ randomNumbers1.reserve(NUMBERS);
+ randomNumbers2.reserve(NUMBERS);
+ for (size_t n = 0; n < NUMBERS; ++n) {
+ double randomNumber = GetParetoRandomNumber(10);
+ randomNumbers1.push_back(randomNumber);
+ randomNumbers2.push_back(pgmBuilder.NewDataLiteral(randomNumber));
+ }
+ TRuntimeNode bigList = pgmBuilder.AsList(randomNumbers2);
+ auto pgmDigest =
+ pgmBuilder.Fold1(bigList,
+ [&](TRuntimeNode item) {
+ std::array<TRuntimeNode, 1> args;
+ args[0] = item;
+ return pgmBuilder.Apply(udfTDigest_Create, args);
+ },
+ [&](TRuntimeNode item, TRuntimeNode state) {
+ std::array<TRuntimeNode, 2> args;
+ args[0] = state;
+ args[1] = item;
+ return pgmBuilder.Apply(udfTDigest_AddValue, args);
+ });
+ TRuntimeNode pgmReturn =
+ pgmBuilder.Map(pgmDigest, [&](TRuntimeNode item) {
+ auto param2 = pgmBuilder.NewDataLiteral(PERCENTILE);
+ std::array<TRuntimeNode, 2> args;
+ args[0] = item;
+ args[1] = param2;
+ return pgmBuilder.Apply(udfTDigest_GetPercentile, args);
+ });
+
+ TExploringNodeVisitor explorer;
+ explorer.Walk(pgmReturn.GetNode(), env);
+ TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(),
+ NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi);
+ auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts);
+ auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider));
+ auto value = graph->GetValue();
+ UNIT_ASSERT(value);
+ double digestValue = value.Get<double>();
+ std::sort(randomNumbers1.begin(), randomNumbers1.end());
+ // This gives us a 1-based index of the last value <= digestValue
+ auto index = std::upper_bound(randomNumbers1.begin(), randomNumbers1.end(), digestValue) - randomNumbers1.begin();
+ // See https://en.wikipedia.org/wiki/Percentile#First_Variant.2C
+ double p = (index - 0.5) / double(randomNumbers1.size());
+ UNIT_ASSERT_DOUBLES_EQUAL(p, PERCENTILE, THRESHOLD);
+ }
+
+ Y_UNIT_TEST(CentroidPrecision) {
+ auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone();
+ auto randomProvider = CreateDeterministicRandomProvider(1);
+ auto timeProvider = CreateDeterministicTimeProvider(1);
+ NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule();
+ mutableFunctionRegistry->AddModule("", "Stat", std::move(module));
+ TScopedAlloc alloc(__LOCATION__);
+ TTypeEnvironment env(alloc);
+ TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry);
+ auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create");
+ auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue");
+ auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile");
+ const size_t NUMBERS = 100000;
+ const double PERCENTILE = 0.25;
+ const double minValue = 1.0;
+ const double maxValue = 100.0;
+ const double majorityValue = 50.0;
+ TVector<TRuntimeNode> numbers;
+ numbers.reserve(NUMBERS);
+ for (size_t n = 0; n < NUMBERS - 2; ++n) {
+ numbers.push_back(pgmBuilder.NewDataLiteral(majorityValue));
+ }
+ numbers.push_back(pgmBuilder.NewDataLiteral(minValue));
+ numbers.push_back(pgmBuilder.NewDataLiteral(maxValue));
+ TRuntimeNode bigList = pgmBuilder.AsList(numbers);
+ auto pgmDigest =
+ pgmBuilder.Fold1(bigList,
+ [&](TRuntimeNode item) {
+ std::array<TRuntimeNode, 1> args;
+ args[0] = item;
+ return pgmBuilder.Apply(udfTDigest_Create, args);
+ },
+ [&](TRuntimeNode item, TRuntimeNode state) {
+ std::array<TRuntimeNode, 2> args;
+ args[0] = state;
+ args[1] = item;
+ return pgmBuilder.Apply(udfTDigest_AddValue, args);
+ });
+ TRuntimeNode pgmReturn =
+ pgmBuilder.Map(pgmDigest, [&](TRuntimeNode item) {
+ auto param2 = pgmBuilder.NewDataLiteral(PERCENTILE);
+ std::array<TRuntimeNode, 2> args;
+ args[0] = item;
+ args[1] = param2;
+ return pgmBuilder.Apply(udfTDigest_GetPercentile, args);
+ });
+
+ TExploringNodeVisitor explorer;
+ explorer.Walk(pgmReturn.GetNode(), env);
+ TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(),
+ NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi);
+ auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts);
+ auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider));
+ auto value = graph->GetValue();
+ UNIT_ASSERT(value);
+ double digestValue = value.Get<double>();
+ UNIT_ASSERT_EQUAL(digestValue, majorityValue);
+ }
+ }
+}
diff --git a/yql/essentials/udfs/common/stat/static/stat_udf.h b/yql/essentials/udfs/common/stat/static/stat_udf.h
new file mode 100644
index 00000000000..f0c11a6812d
--- /dev/null
+++ b/yql/essentials/udfs/common/stat/static/stat_udf.h
@@ -0,0 +1,75 @@
+#pragma once
+
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+
+#include <library/cpp/tdigest/tdigest.h>
+
+using namespace NYql;
+using namespace NUdf;
+
+namespace {
+ extern const char DigestResourceName[] = "Stat.TDigestResource";
+
+ typedef TBoxedResource<TDigest, DigestResourceName> TDigestResource;
+ typedef TRefCountedPtr<TDigestResource> TDigestResourcePtr;
+
+ SIMPLE_UDF_WITH_OPTIONAL_ARGS(TTDigest_Create, TResource<DigestResourceName>(double, TOptional<double>, TOptional<double>), 2) {
+ Y_UNUSED(valueBuilder);
+ const double delta = args[1].GetOrDefault<double>(0.01);
+ const double K = args[2].GetOrDefault<double>(25.0);
+ if (delta == 0 || K / delta < 1) {
+ UdfTerminate((TStringBuilder() << GetPos() << " Invalid combination of delta/K values").data());
+ }
+
+ return TUnboxedValuePod(new TDigestResource(delta, K, args[0].Get<double>()));
+ }
+
+ SIMPLE_STRICT_UDF(TTDigest_AddValue, TResource<DigestResourceName>(TResource<DigestResourceName>, double)) {
+ Y_UNUSED(valueBuilder);
+ TDigestResource::Validate(args[0]);
+ TDigestResource* resource = static_cast<TDigestResource*>(args[0].AsBoxed().Get());
+ resource->Get()->AddValue(args[1].Get<double>());
+ return TUnboxedValuePod(resource);
+ }
+
+ SIMPLE_STRICT_UDF(TTDigest_GetPercentile, double(TResource<DigestResourceName>, double)) {
+ Y_UNUSED(valueBuilder);
+ TDigestResource::Validate(args[0]);
+ return TUnboxedValuePod(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get()->GetPercentile(args[1].Get<double>()));
+ }
+
+ SIMPLE_STRICT_UDF(TTDigest_Serialize, char*(TResource<DigestResourceName>)) {
+ TDigestResource::Validate(args[0]);
+ return valueBuilder->NewString(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get()->Serialize());
+ }
+
+ SIMPLE_UDF(TTDigest_Deserialize, TResource<DigestResourceName>(char*)) {
+ Y_UNUSED(valueBuilder);
+ return TUnboxedValuePod(new TDigestResource(TString(args[0].AsStringRef())));
+ }
+
+ SIMPLE_STRICT_UDF(TTDigest_Merge, TResource<DigestResourceName>(TResource<DigestResourceName>, TResource<DigestResourceName>)) {
+ Y_UNUSED(valueBuilder);
+ TDigestResource::Validate(args[0]);
+ TDigestResource::Validate(args[1]);
+ return TUnboxedValuePod(new TDigestResource(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get(), static_cast<TDigestResource*>(args[1].AsBoxed().Get())->Get()));
+ }
+
+ /*
+ *
+ * TODO: Memory tracking
+ *
+ *
+ *
+ */
+
+ SIMPLE_MODULE(TStatModule,
+ TTDigest_Create,
+ TTDigest_AddValue,
+ TTDigest_GetPercentile,
+ TTDigest_Serialize,
+ TTDigest_Deserialize,
+ TTDigest_Merge)
+
+}
diff --git a/yql/essentials/udfs/common/stat/static/static_udf.cpp b/yql/essentials/udfs/common/stat/static/static_udf.cpp
new file mode 100644
index 00000000000..3cb1d88a1c8
--- /dev/null
+++ b/yql/essentials/udfs/common/stat/static/static_udf.cpp
@@ -0,0 +1,10 @@
+#include "stat_udf.h"
+
+namespace NYql {
+ namespace NUdf {
+ NUdf::TUniquePtr<NUdf::IUdfModule> CreateStatModule() {
+ return new TStatModule();
+ }
+
+ }
+}
diff --git a/yql/essentials/udfs/common/stat/static/ya.make b/yql/essentials/udfs/common/stat/static/ya.make
new file mode 100644
index 00000000000..f3cc7842eea
--- /dev/null
+++ b/yql/essentials/udfs/common/stat/static/ya.make
@@ -0,0 +1,19 @@
+LIBRARY()
+
+YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+)
+
+SRCS(
+ static_udf.cpp
+ stat_udf.h
+)
+
+PEERDIR(
+ yql/essentials/public/udf
+ library/cpp/tdigest
+)
+
+END()
diff --git a/yql/essentials/udfs/common/stat/ut/ya.make b/yql/essentials/udfs/common/stat/ut/ya.make
new file mode 100644
index 00000000000..e7c32bb7ec4
--- /dev/null
+++ b/yql/essentials/udfs/common/stat/ut/ya.make
@@ -0,0 +1,19 @@
+UNITTEST_FOR(yql/essentials/udfs/common/stat/static)
+
+SRCS(
+ ../stat_udf_ut.cpp
+)
+
+PEERDIR(
+ yql/essentials/minikql/comp_nodes/llvm14
+ yql/essentials/public/udf/service/exception_policy
+ yql/essentials/sql/pg_dummy
+)
+
+YQL_LAST_ABI_VERSION()
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+END()
diff --git a/yql/essentials/udfs/common/stat/ya.make b/yql/essentials/udfs/common/stat/ya.make
new file mode 100644
index 00000000000..cbc0f71c032
--- /dev/null
+++ b/yql/essentials/udfs/common/stat/ya.make
@@ -0,0 +1,30 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+ FROM_SANDBOX(FILE 7319904307 OUT_NOAUTO libstat_udf.so
+ )
+ END()
+ELSE ()
+YQL_UDF_CONTRIB(stat_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+ )
+
+ SRCS(
+ stat_udf.cpp
+ )
+
+ PEERDIR(
+ yql/essentials/udfs/common/stat/static
+ )
+
+ END()
+ENDIF ()
+
+
+RECURSE_FOR_TESTS(
+ ut
+)
+
diff --git a/yql/essentials/udfs/common/streaming/streaming_udf.cpp b/yql/essentials/udfs/common/streaming/streaming_udf.cpp
new file mode 100644
index 00000000000..bd01935321e
--- /dev/null
+++ b/yql/essentials/udfs/common/streaming/streaming_udf.cpp
@@ -0,0 +1,829 @@
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_registrator.h>
+#include <yql/essentials/public/udf/udf_type_builder.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+#include <util/generic/buffer.h>
+#include <util/generic/mem_copy.h>
+#include <util/generic/maybe.h>
+#include <util/generic/ptr.h>
+#include <util/string/builder.h>
+#include <util/stream/mem.h>
+#include <library/cpp/deprecated/kmp/kmp.h>
+#include <util/string/strip.h>
+#include <util/system/condvar.h>
+#include <util/system/shellcommand.h>
+#include <util/system/tempfile.h>
+#include <util/system/sysstat.h>
+
+#include <functional>
+
+using namespace NKikimr;
+using namespace NUdf;
+
+namespace {
+ // Cyclic Read-Write buffer.
+ // Not thread safe, synchronization between reader and writer threads
+ // should be managed externally.
+ class TCyclicRWBuffer {
+ public:
+ TCyclicRWBuffer(size_t capacity)
+ : Buffer(capacity)
+ , Finished(false)
+ , DataStart(0)
+ , DataSize(0)
+ {
+ Buffer.Resize(capacity);
+ }
+
+ bool IsFinished() const {
+ return Finished;
+ }
+
+ void Finish() {
+ Finished = true;
+ }
+
+ bool HasData() const {
+ return DataSize > 0;
+ }
+
+ size_t GetDataSize() const {
+ return DataSize;
+ }
+
+ void GetData(const char*& ptr, size_t& len) const {
+ size_t readSize = GetDataRegionSize(DataStart, DataSize);
+ ptr = Buffer.Data() + DataStart;
+ len = readSize;
+ }
+
+ void CommitRead(size_t len) {
+ Y_DEBUG_ABORT_UNLESS(len <= GetDataRegionSize(DataStart, DataSize));
+
+ DataStart = GetBufferPosition(DataStart + len);
+ DataSize -= len;
+ }
+
+ bool CanWrite() const {
+ return WriteSize() > 0;
+ }
+
+ size_t WriteSize() const {
+ return Buffer.Size() - DataSize;
+ }
+
+ size_t Write(const char*& ptr, size_t& len) {
+ if (!CanWrite()) {
+ return 0;
+ }
+
+ size_t bytesWritten = 0;
+ size_t bytesToWrite = std::min(len, WriteSize());
+ while (bytesToWrite > 0) {
+ size_t writeStart = GetWriteStart();
+ size_t writeSize = GetDataRegionSize(writeStart, bytesToWrite);
+
+ MemCopy(Data(writeStart), ptr, writeSize);
+
+ DataSize += writeSize;
+ bytesWritten += writeSize;
+ bytesToWrite -= writeSize;
+
+ ptr += writeSize;
+ len -= writeSize;
+ }
+
+ return bytesWritten;
+ }
+
+ size_t Write(IZeroCopyInput& input) {
+ const void* ptr;
+ size_t dataLen = input.Next(&ptr, WriteSize());
+ const char* dataPtr = reinterpret_cast<const char*>(ptr);
+ return Write(dataPtr, dataLen);
+ }
+
+ private:
+ size_t GetBufferPosition(size_t pos) const {
+ return pos % Buffer.Size();
+ }
+
+ size_t GetDataRegionSize(size_t start, size_t size) const {
+ Y_DEBUG_ABORT_UNLESS(start < Buffer.Size());
+
+ return std::min(size, Buffer.Size() - start);
+ }
+
+ size_t GetWriteStart() const {
+ return GetBufferPosition(DataStart + DataSize);
+ }
+
+ char* Data(size_t pos) {
+ Y_DEBUG_ABORT_UNLESS(pos < Buffer.Size());
+
+ return (Buffer.Data() + pos);
+ }
+
+ private:
+ TBuffer Buffer;
+
+ bool Finished;
+
+ size_t DataStart;
+ size_t DataSize;
+ };
+
+ struct TStreamingParams {
+ public:
+ const size_t DefaultProcessPollLatencyMs = 5 * 1000; // 5 seconds
+ const size_t DefaultInputBufferSizeBytes = 4 * 1024 * 1024; // 4MB
+ const size_t DefaultOutputBufferSizeBytes = 16 * 1024 * 1024; // 16MB
+ const char* DefaultInputDelimiter = "\n";
+ const char* DefaultOutputDelimiter = "\n";
+
+ public:
+ TUnboxedValue InputStreamObj;
+ TString CommandLine;
+ TUnboxedValue ArgumentsList;
+ TString InputDelimiter;
+ TString OutputDelimiter;
+ size_t InputBufferSizeBytes;
+ size_t OutputBufferSizeBytes;
+ size_t ProcessPollLatencyMs;
+
+ TStreamingParams()
+ : InputDelimiter(DefaultInputDelimiter)
+ , OutputDelimiter(DefaultOutputDelimiter)
+ , InputBufferSizeBytes(DefaultInputBufferSizeBytes)
+ , OutputBufferSizeBytes(DefaultOutputBufferSizeBytes)
+ , ProcessPollLatencyMs(DefaultProcessPollLatencyMs)
+ {
+ }
+ };
+
+ struct TThreadSyncData {
+ TMutex BuffersMutex;
+ TCondVar InputBufferCanReadCond;
+ TCondVar MainThreadHasWorkCond;
+ TCondVar OutputBufferCanWriteCond;
+ };
+
+ class TStringListBufferedInputStream: public IInputStream {
+ public:
+ TStringListBufferedInputStream(TUnboxedValue rowsStream, const TString& delimiter, size_t bufferSizeBytes,
+ TThreadSyncData& syncData, TSourcePosition pos)
+ : RowsStream(rowsStream)
+ , Delimiter(delimiter)
+ , SyncData(syncData)
+ , Pos_(pos)
+ , DelimiterMatcher(delimiter)
+ , DelimiterInput(delimiter)
+ , Buffer(bufferSizeBytes)
+ , CurReadMode(ReadMode::Start)
+ {
+ }
+
+ TStringListBufferedInputStream(const TStringListBufferedInputStream&) = delete;
+ TStringListBufferedInputStream& operator=(const TStringListBufferedInputStream&) = delete;
+
+ TCyclicRWBuffer& GetBuffer() {
+ return Buffer;
+ }
+
+ // Fetch input from upstream list iterator to the buffer.
+ // Called from Main thread.
+ EFetchStatus FetchInput() {
+ with_lock (SyncData.BuffersMutex) {
+ Y_DEBUG_ABORT_UNLESS(!Buffer.HasData());
+ Y_DEBUG_ABORT_UNLESS(Buffer.CanWrite());
+
+ bool receivedYield = false;
+
+ while (Buffer.CanWrite() && CurReadMode != ReadMode::Done && !receivedYield) {
+ switch (CurReadMode) {
+ case ReadMode::Start: {
+ auto status = ReadNextString();
+ if (status == EFetchStatus::Yield) {
+ receivedYield = true;
+ break;
+ }
+
+ CurReadMode = (status == EFetchStatus::Ok)
+ ? ReadMode::String
+ : ReadMode::Done;
+
+ break;
+ }
+
+ case ReadMode::String:
+ if (CurStringInput.Exhausted()) {
+ DelimiterInput.Reset(Delimiter.data(), Delimiter.size());
+ CurReadMode = ReadMode::Delimiter;
+ break;
+ }
+
+ Buffer.Write(CurStringInput);
+ break;
+
+ case ReadMode::Delimiter:
+ if (DelimiterInput.Exhausted()) {
+ CurReadMode = ReadMode::Start;
+ break;
+ }
+
+ Buffer.Write(DelimiterInput);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (CurReadMode == ReadMode::Done) {
+ Buffer.Finish();
+ }
+
+ SyncData.InputBufferCanReadCond.Signal();
+ return receivedYield ? EFetchStatus::Yield : EFetchStatus::Ok;
+ }
+ }
+
+ private:
+ // Read data to pass into the child process input pipe.
+ // Called from Communicate thread.
+ size_t DoRead(void* buf, size_t len) override {
+ try {
+ with_lock (SyncData.BuffersMutex) {
+ while (!Buffer.HasData() && !Buffer.IsFinished()) {
+ SyncData.MainThreadHasWorkCond.Signal();
+ SyncData.InputBufferCanReadCond.WaitI(SyncData.BuffersMutex);
+ }
+
+ if (!Buffer.HasData()) {
+ Y_DEBUG_ABORT_UNLESS(Buffer.IsFinished());
+ return 0;
+ }
+
+ const char* dataPtr;
+ size_t dataLen;
+ Buffer.GetData(dataPtr, dataLen);
+
+ size_t bytesRead = std::min(dataLen, len);
+ Y_DEBUG_ABORT_UNLESS(bytesRead > 0);
+ memcpy(buf, dataPtr, bytesRead);
+ Buffer.CommitRead(bytesRead);
+ return bytesRead;
+ }
+
+ ythrow yexception();
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ EFetchStatus ReadNextString() {
+ TUnboxedValue item;
+ EFetchStatus status = RowsStream.Fetch(item);
+ switch (status) {
+ case EFetchStatus::Yield:
+ case EFetchStatus::Finish:
+ return status;
+ default:
+ break;
+ }
+
+ CurString = item.GetElement(0);
+ CurStringInput.Reset(CurString.AsStringRef().Data(), CurString.AsStringRef().Size());
+
+ // Check that input string doesn't contain delimiters
+ const char* match;
+ Y_UNUSED(match);
+ if (DelimiterMatcher.SubStr(
+ CurString.AsStringRef().Data(),
+ CurString.AsStringRef().Data() + CurString.AsStringRef().Size(),
+ match))
+ {
+ ythrow yexception() << "Delimiter found in input string.";
+ }
+
+ return EFetchStatus::Ok;
+ }
+
+ private:
+ enum class ReadMode {
+ Start,
+ String,
+ Delimiter,
+ Done
+ };
+
+ TUnboxedValue RowsStream;
+ TString Delimiter;
+ TThreadSyncData& SyncData;
+ TSourcePosition Pos_;
+
+ TKMPMatcher DelimiterMatcher;
+ TUnboxedValue CurString;
+ TMemoryInput CurStringInput;
+ TMemoryInput DelimiterInput;
+
+ TCyclicRWBuffer Buffer;
+
+ ReadMode CurReadMode;
+ };
+
+ class TStringListBufferedOutputStream: public IOutputStream {
+ public:
+ TStringListBufferedOutputStream(const TString& delimiter, size_t stringBufferSizeBytes,
+ TStringListBufferedInputStream& inputStream, TThreadSyncData& syncData)
+ : Delimiter(delimiter)
+ , InputStream(inputStream)
+ , SyncData(syncData)
+ , HasDelimiterMatch(false)
+ , DelimiterMatcherCallback(HasDelimiterMatch)
+ , DelimiterMatcher(delimiter.data(), delimiter.data() + delimiter.size(), &DelimiterMatcherCallback)
+ , Buffer(stringBufferSizeBytes)
+ {
+ }
+
+ TStringListBufferedOutputStream(const TStringListBufferedOutputStream&) = delete;
+ TStringListBufferedOutputStream& operator=(const TStringListBufferedOutputStream&) = delete;
+
+ // Get string record from buffer.
+ // Called from Main thread.
+ EFetchStatus FetchNextString(TString& str) {
+ while (!HasDelimiterMatch) {
+ with_lock (SyncData.BuffersMutex) {
+ bool inputHasData;
+ bool bufferNeedsData;
+
+ do {
+ inputHasData = InputStream.GetBuffer().HasData() || InputStream.GetBuffer().IsFinished();
+ bufferNeedsData = !Buffer.HasData() && !Buffer.IsFinished();
+
+ if (inputHasData && bufferNeedsData) {
+ SyncData.MainThreadHasWorkCond.WaitI(SyncData.BuffersMutex);
+ }
+ } while (inputHasData && bufferNeedsData);
+
+ if (!inputHasData) {
+ auto status = InputStream.FetchInput();
+ if (status == EFetchStatus::Yield) {
+ return EFetchStatus::Yield;
+ }
+ }
+
+ if (bufferNeedsData) {
+ continue;
+ }
+
+ if (!Buffer.HasData()) {
+ Y_DEBUG_ABORT_UNLESS(Buffer.IsFinished());
+ str = TString(TStringBuf(CurrentString.Data(), CurrentString.Size()));
+ CurrentString.Clear();
+ return str.empty() ? EFetchStatus::Finish : EFetchStatus::Ok;
+ }
+
+ const char* data;
+ size_t size;
+ Buffer.GetData(data, size);
+
+ size_t read = 0;
+ while (!HasDelimiterMatch && read < size) {
+ DelimiterMatcher.Push(data[read]);
+ ++read;
+ }
+
+ Y_DEBUG_ABORT_UNLESS(read > 0);
+ CurrentString.Append(data, read);
+ bool signalCanWrite = !Buffer.CanWrite();
+ Buffer.CommitRead(read);
+
+ if (signalCanWrite) {
+ SyncData.OutputBufferCanWriteCond.Signal();
+ }
+ }
+ }
+
+ Y_DEBUG_ABORT_UNLESS(CurrentString.Size() >= Delimiter.size());
+ str = TString(TStringBuf(CurrentString.Data(), CurrentString.Size() - Delimiter.size()));
+ CurrentString.Clear();
+ HasDelimiterMatch = false;
+
+ return EFetchStatus::Ok;
+ }
+
+ TCyclicRWBuffer& GetBuffer() {
+ return Buffer;
+ }
+
+ private:
+ // Write data from child process output to buffer.
+ // Called from Communicate thread.
+ void DoWrite(const void* buf, size_t len) override {
+ const char* curStrPos = reinterpret_cast<const char*>(buf);
+ size_t curStrLen = len;
+
+ while (curStrLen > 0) {
+ with_lock (SyncData.BuffersMutex) {
+ while (!Buffer.CanWrite() && !Buffer.IsFinished()) {
+ SyncData.OutputBufferCanWriteCond.WaitI(SyncData.BuffersMutex);
+ }
+
+ if (Buffer.IsFinished()) {
+ return;
+ }
+
+ bool signalCanRead = !Buffer.HasData();
+ Buffer.Write(curStrPos, curStrLen);
+
+ if (signalCanRead) {
+ SyncData.MainThreadHasWorkCond.Signal();
+ }
+ }
+ }
+ }
+
+ void DoFinish() override {
+ IOutputStream::DoFinish();
+
+ with_lock (SyncData.BuffersMutex) {
+ Buffer.Finish();
+ SyncData.MainThreadHasWorkCond.Signal();
+ }
+ }
+
+ private:
+ class MatcherCallback: public TKMPStreamMatcher<char>::ICallback {
+ public:
+ MatcherCallback(bool& hasMatch)
+ : HasMatch(hasMatch)
+ {
+ }
+
+ void OnMatch(const char* begin, const char* end) override {
+ Y_UNUSED(begin);
+ Y_UNUSED(end);
+
+ HasMatch = true;
+ }
+
+ private:
+ bool& HasMatch;
+ };
+
+ private:
+ TString Delimiter;
+ TStringListBufferedInputStream& InputStream;
+ TThreadSyncData& SyncData;
+
+ bool HasDelimiterMatch;
+ MatcherCallback DelimiterMatcherCallback;
+ TKMPStreamMatcher<char> DelimiterMatcher;
+
+ TBuffer CurrentString;
+
+ TCyclicRWBuffer Buffer;
+ };
+
+ class TStreamingOutputListIterator {
+ public:
+ TStreamingOutputListIterator(const TStreamingParams& params, const IValueBuilder* valueBuilder, TSourcePosition pos)
+ : StreamingParams(params)
+ , ValueBuilder(valueBuilder)
+ , Pos_(pos)
+ {
+ }
+
+ TStreamingOutputListIterator(const TStreamingOutputListIterator&) = delete;
+ TStreamingOutputListIterator& operator=(const TStreamingOutputListIterator&) = delete;
+
+ ~TStreamingOutputListIterator() {
+ if (ShellCommand) {
+ Y_DEBUG_ABORT_UNLESS(InputStream && OutputStream);
+
+ try {
+ ShellCommand->Terminate();
+ } catch (const std::exception& e) {
+ Cerr << CurrentExceptionMessage();
+ }
+
+ // Let Communicate thread finish.
+ with_lock (ThreadSyncData.BuffersMutex) {
+ InputStream->GetBuffer().Finish();
+ OutputStream->GetBuffer().Finish();
+ ThreadSyncData.InputBufferCanReadCond.Signal();
+ ThreadSyncData.OutputBufferCanWriteCond.Signal();
+ }
+
+ ShellCommand->Wait();
+ }
+ }
+
+ EFetchStatus Fetch(TUnboxedValue& result) {
+ try {
+ EFetchStatus status = EFetchStatus::Ok;
+
+ if (!ProcessStarted()) {
+ StartProcess();
+
+ // Don't try to fetch data if there was a problem starting the process,
+ // this causes infinite wait on Windows system due to incorrect ShellCommand behavior.
+ if (ShellCommand->GetStatus() != TShellCommand::SHELL_RUNNING && ShellCommand->GetStatus() != TShellCommand::SHELL_FINISHED) {
+ status = EFetchStatus::Finish;
+ }
+ }
+
+ if (status == EFetchStatus::Ok) {
+ status = OutputStream->FetchNextString(CurrentRecord);
+ }
+
+ if (status == EFetchStatus::Finish) {
+ switch (ShellCommand->GetStatus()) {
+ case TShellCommand::SHELL_FINISHED:
+ break;
+ case TShellCommand::SHELL_INTERNAL_ERROR:
+ ythrow yexception() << "Internal error running process: " << ShellCommand->GetInternalError();
+ break;
+ case TShellCommand::SHELL_ERROR:
+ ythrow yexception() << "Error running user process: " << ShellCommand->GetError();
+ break;
+ default:
+ ythrow yexception() << "Unexpected shell command status: " << (int)ShellCommand->GetStatus();
+ }
+ return EFetchStatus::Finish;
+ }
+
+ if (status == EFetchStatus::Ok) {
+ TUnboxedValue* items = nullptr;
+ result = ValueBuilder->NewArray(1, items);
+ *items = ValueBuilder->NewString(TStringRef(CurrentRecord.data(), CurrentRecord.size()));
+ }
+
+ return status;
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ private:
+ void StartProcess() {
+ InputStream.Reset(new TStringListBufferedInputStream(
+ StreamingParams.InputStreamObj, StreamingParams.InputDelimiter,
+ StreamingParams.InputBufferSizeBytes, ThreadSyncData, Pos_));
+
+ OutputStream.Reset(new TStringListBufferedOutputStream(
+ StreamingParams.OutputDelimiter, StreamingParams.OutputBufferSizeBytes, *InputStream,
+ ThreadSyncData));
+
+ TShellCommandOptions opt;
+ opt.SetAsync(true).SetUseShell(false).SetLatency(StreamingParams.ProcessPollLatencyMs).SetInputStream(InputStream.Get()).SetOutputStream(OutputStream.Get()).SetCloseStreams(true).SetCloseAllFdsOnExec(true);
+
+ TList<TString> commandArguments;
+ auto argumetsIterator = StreamingParams.ArgumentsList.GetListIterator();
+ for (TUnboxedValue item; argumetsIterator.Next(item);) {
+ commandArguments.emplace_back(TStringBuf(item.AsStringRef()));
+ }
+
+ ShellCommand.Reset(new TShellCommand(StreamingParams.CommandLine, commandArguments, opt));
+ ShellCommand->Run();
+ }
+
+ bool ProcessStarted() const {
+ return !!ShellCommand;
+ }
+
+ private:
+ TStreamingParams StreamingParams;
+ const IValueBuilder* ValueBuilder;
+ TSourcePosition Pos_;
+
+ TThreadSyncData ThreadSyncData;
+
+ THolder<TShellCommand> ShellCommand;
+ THolder<TStringListBufferedInputStream> InputStream;
+ THolder<TStringListBufferedOutputStream> OutputStream;
+
+ TString CurrentRecord;
+ };
+
+ class TStreamingOutput: public TBoxedValue {
+ public:
+ TStreamingOutput(const TStreamingParams& params, const IValueBuilder* valueBuilder, TSourcePosition pos)
+ : StreamingParams(params)
+ , ValueBuilder(valueBuilder)
+ , Pos_(pos)
+ {
+ }
+
+ TStreamingOutput(const TStreamingOutput&) = delete;
+ TStreamingOutput& operator=(const TStreamingOutput&) = delete;
+
+ private:
+ EFetchStatus Fetch(TUnboxedValue& result) override {
+ if (IsFinished) {
+ return EFetchStatus::Finish;
+ }
+
+ if (!Iterator) {
+ Iterator.Reset(new TStreamingOutputListIterator(StreamingParams, ValueBuilder, Pos_));
+ }
+
+ auto ret = Iterator->Fetch(result);
+
+ if (ret == EFetchStatus::Finish) {
+ IsFinished = true;
+ Iterator.Reset();
+ }
+
+ return ret;
+ }
+
+ TStreamingParams StreamingParams;
+ const IValueBuilder* ValueBuilder;
+ TSourcePosition Pos_;
+ bool IsFinished = false;
+ THolder<TStreamingOutputListIterator> Iterator;
+ };
+
+ class TStreamingScriptOutput: public TStreamingOutput {
+ public:
+ TStreamingScriptOutput(const TStreamingParams& params, const IValueBuilder* valueBuilder,
+ TSourcePosition pos, const TString& script, const TString& scriptFilename)
+ : TStreamingOutput(params, valueBuilder, pos)
+ , ScriptFileHandle(scriptFilename)
+ {
+ auto scriptStripped = StripBeforeShebang(script);
+ ScriptFileHandle.Write(scriptStripped.data(), scriptStripped.size());
+ ScriptFileHandle.Close();
+
+ if (Chmod(ScriptFileHandle.Name().c_str(), MODE0755) != 0) {
+ ythrow yexception() << "Chmod failed for script file:" << ScriptFileHandle.Name()
+ << " with error: " << LastSystemErrorText();
+ }
+ }
+
+ private:
+ static TString StripBeforeShebang(const TString& script) {
+ auto shebangIndex = script.find("#!");
+ if (shebangIndex != TString::npos) {
+ auto scriptStripped = StripStringLeft(script);
+
+ if (scriptStripped.size() == script.size() - shebangIndex) {
+ return scriptStripped;
+ }
+ }
+
+ return script;
+ }
+
+ TTempFileHandle ScriptFileHandle;
+ };
+
+ class TStreamingProcess: public TBoxedValue {
+ public:
+ TStreamingProcess(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ auto inputListArg = args[0];
+ auto commandLineArg = args[1].AsStringRef();
+ auto argumentsArg = args[2];
+ auto inputDelimiterArg = args[3];
+ auto outputDelimiterArg = args[4];
+
+ Y_DEBUG_ABORT_UNLESS(inputListArg.IsBoxed());
+
+ TStreamingParams params;
+ params.InputStreamObj = TUnboxedValuePod(inputListArg);
+ params.CommandLine = TString(TStringBuf(commandLineArg));
+ params.ArgumentsList = !argumentsArg
+ ? valueBuilder->NewEmptyList()
+ : TUnboxedValue(argumentsArg.GetOptionalValue());
+
+ if (inputDelimiterArg) {
+ params.InputDelimiter = TString(TStringBuf(inputDelimiterArg.AsStringRef()));
+ }
+ if (outputDelimiterArg) {
+ params.OutputDelimiter = TString(TStringBuf(outputDelimiterArg.AsStringRef()));
+ }
+
+ return TUnboxedValuePod(new TStreamingOutput(params, valueBuilder, Pos_));
+ }
+
+ public:
+ static TStringRef Name() {
+ static auto name = TStringRef::Of("Process");
+ return name;
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ class TStreamingProcessInline: public TBoxedValue {
+ public:
+ TStreamingProcessInline(TSourcePosition pos)
+ : Pos_(pos)
+ {}
+
+ private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ auto inputListArg = args[0];
+ auto scriptArg = args[1].AsStringRef();
+ auto argumentsArg = args[2];
+ auto inputDelimiterArg = args[3];
+ auto outputDelimiterArg = args[4];
+
+ TString script(scriptArg);
+ TString scriptFilename = MakeTempName(".");
+
+ TStreamingParams params;
+ params.InputStreamObj = TUnboxedValuePod(inputListArg);
+ params.CommandLine = scriptFilename;
+ params.ArgumentsList = !argumentsArg
+ ? valueBuilder->NewEmptyList()
+ : TUnboxedValue(argumentsArg.GetOptionalValue());
+
+ if (inputDelimiterArg) {
+ params.InputDelimiter = TString(TStringBuf(inputDelimiterArg.AsStringRef()));
+ }
+ if (outputDelimiterArg) {
+ params.OutputDelimiter = TString(TStringBuf(outputDelimiterArg.AsStringRef()));
+ }
+
+ return TUnboxedValuePod(new TStreamingScriptOutput(params, valueBuilder, Pos_, script, scriptFilename));
+ }
+
+ public:
+ static TStringRef Name() {
+ static auto name = TStringRef::Of("ProcessInline");
+ return name;
+ }
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ class TStreamingModule: public IUdfModule {
+ public:
+ TStringRef Name() const {
+ return TStringRef::Of("Streaming");
+ }
+
+ void CleanupOnTerminate() const final {
+ }
+
+ void GetAllFunctions(IFunctionsSink& sink) const final {
+ sink.Add(TStreamingProcess::Name());
+ sink.Add(TStreamingProcessInline::Name());
+ }
+
+ void BuildFunctionTypeInfo(
+ const TStringRef& name,
+ NUdf::TType* userType,
+ const TStringRef& typeConfig,
+ ui32 flags,
+ IFunctionTypeInfoBuilder& builder) const override {
+ try {
+ Y_UNUSED(userType);
+ Y_UNUSED(typeConfig);
+
+ bool typesOnly = (flags & TFlags::TypesOnly);
+
+ auto optionalStringType = builder.Optional()->Item<char*>().Build();
+ auto rowType = builder.Struct(1)->AddField("Data", TDataType<char*>::Id, nullptr).Build();
+ auto rowsType = builder.Stream()->Item(rowType).Build();
+ auto stringListType = builder.List()->Item(TDataType<char*>::Id).Build();
+ auto optionalStringListType = builder.Optional()->Item(stringListType).Build();
+
+ if (TStreamingProcess::Name() == name) {
+ builder.Returns(rowsType).Args()->Add(rowsType).Add<char*>().Add(optionalStringListType).Add(optionalStringType).Add(optionalStringType).Done().OptionalArgs(3);
+
+ if (!typesOnly) {
+ builder.Implementation(new TStreamingProcess(builder.GetSourcePosition()));
+ }
+ }
+
+ if (TStreamingProcessInline::Name() == name) {
+ builder.Returns(rowsType).Args()->Add(rowsType).Add<char*>().Add(optionalStringListType).Add(optionalStringType).Add(optionalStringType).Done().OptionalArgs(3);
+
+ if (!typesOnly) {
+ builder.Implementation(new TStreamingProcessInline(builder.GetSourcePosition()));
+ }
+ }
+ } catch (const std::exception& e) {
+ builder.SetError(CurrentExceptionMessage());
+ }
+ }
+ };
+
+}
+
+REGISTER_MODULES(TStreamingModule)
diff --git a/yql/essentials/udfs/common/streaming/test/canondata/result.json b/yql/essentials/udfs/common/streaming/test/canondata/result.json
new file mode 100644
index 00000000000..311aa7bb59d
--- /dev/null
+++ b/yql/essentials/udfs/common/streaming/test/canondata/result.json
@@ -0,0 +1,44 @@
+{
+ "test.test[Big]": [
+ {
+ "checksum": "6ea4faa20341a15cc8ff132ede9be694",
+ "size": 1909,
+ "uri": "https://{canondata_backend}/1130705/0ba4949d004901679d526dece7802426a6bb3667/resource.tar#test.test_Big_/results.txt"
+ }
+ ],
+ "test.test[Empty]": [
+ {
+ "checksum": "7a81241874ebe3aaa437d8e6abe6af55",
+ "size": 574,
+ "uri": "https://{canondata_backend}/212715/eeb78aadf48e5da34543a0dd89f2554c391a4ad5/resource.tar#test.test_Empty_/results.txt"
+ }
+ ],
+ "test.test[File]": [
+ {
+ "checksum": "a784abbfe20172c03bed177628a71c79",
+ "size": 10454,
+ "uri": "https://{canondata_backend}/212715/eeb78aadf48e5da34543a0dd89f2554c391a4ad5/resource.tar#test.test_File_/results.txt"
+ }
+ ],
+ "test.test[Simple]": [
+ {
+ "checksum": "18a4d9c3e1efd491be7844e09066ebe4",
+ "size": 2331,
+ "uri": "https://{canondata_backend}/1130705/0ba4949d004901679d526dece7802426a6bb3667/resource.tar#test.test_Simple_/results.txt"
+ }
+ ],
+ "test.test[YieldSwitchEmpty]": [
+ {
+ "checksum": "3f1707fb49aabaaadbb283a3aa34ea5b",
+ "size": 2447,
+ "uri": "https://{canondata_backend}/1130705/0ba4949d004901679d526dece7802426a6bb3667/resource.tar#test.test_YieldSwitchEmpty_/results.txt"
+ }
+ ],
+ "test.test[Yield]": [
+ {
+ "checksum": "3a6152c18e813c6be8fe23308ba05fc7",
+ "size": 1085,
+ "uri": "https://{canondata_backend}/212715/eeb78aadf48e5da34543a0dd89f2554c391a4ad5/resource.tar#test.test_Yield_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/streaming/test/cases/Big.sql b/yql/essentials/udfs/common/streaming/test/cases/Big.sql
new file mode 100644
index 00000000000..29a08c74ff7
--- /dev/null
+++ b/yql/essentials/udfs/common/streaming/test/cases/Big.sql
@@ -0,0 +1,48 @@
+/* syntax version 1 */
+SELECT YQL::@@(block '(
+ (let times16 (lambda '(x) (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x x)))))))))))))))))
+ (let s2_8 (Apply times16 (String '"1000000000000007")))
+ (let s2_12 (Apply times16 s2_8))
+ (let s2_16 (Apply times16 s2_12))
+ (let s2_20 (Apply times16 s2_16))
+ (let s2_24 (Apply times16 s2_20))
+
+ (let s2_12_1 (Concat s2_12 (String '"762")))
+ (let vt (VariantType (TupleType (DataType 'Int32) (DataType 'String))))
+ (let inputRows (AsList
+ (Variant s2_24 '1 vt)
+ (Variant s2_8 '1 vt)
+ (Variant s2_12 '1 vt)
+ (Variant s2_12_1 '1 vt)
+ (Variant s2_16 '1 vt)
+ (Variant s2_8 '1 vt)
+ (Variant s2_12_1 '1 vt)
+ (Variant s2_24 '1 vt)
+ (Variant s2_12_1 '1 vt)
+ (Variant s2_24 '1 vt)
+ (Variant s2_24 '1 vt)
+ (Variant s2_12_1 '1 vt)
+ (Variant s2_12_1 '1 vt)
+ (Variant s2_24 '1 vt)
+ (Variant s2_12_1 '1 vt)
+ (Variant s2_12_1 '1 vt)
+ (Variant s2_16 '1 vt)
+ ))
+
+ (let udf (Udf '"Streaming.Process"))
+
+ (let pr (lambda '(x) (block '(
+ (let res (AsStruct '('Data x)))
+ (return res)
+ ))))
+
+ (let tr1 (lambda '(x) (block '(
+ (let y (OrderedMap x pr))
+ (return (Apply udf y (String '"cat"))))
+ )))
+
+ (let hugeResult (Switch (Iterator inputRows (DependsOn (String 'A))) '1 '('1) tr1))
+ (let md5Udf (Udf '"Digest.Md5Hex"))
+ (let shortResult (OrderedMap hugeResult (lambda '(x) (Apply md5Udf (Member x 'Data)))))
+ (return (Collect shortResult))
+))@@;
diff --git a/yql/essentials/udfs/common/streaming/test/cases/Empty.in b/yql/essentials/udfs/common/streaming/test/cases/Empty.in
new file mode 100644
index 00000000000..0f7f64882e5
--- /dev/null
+++ b/yql/essentials/udfs/common/streaming/test/cases/Empty.in
@@ -0,0 +1,100 @@
+{"key"="1";"subkey"="1";"value"="Input line #13"};
+{"key"="1";"subkey"="1";"value"="Input line #35"};
+{"key"="1";"subkey"="1";"value"="Input line #76"};
+{"key"="1";"subkey"="1";"value"="Input line #70"};
+{"key"="1";"subkey"="1";"value"="Input line #9"};
+{"key"="1";"subkey"="1";"value"="Input line #63"};
+{"key"="1";"subkey"="1";"value"="Input line #53"};
+{"key"="1";"subkey"="1";"value"="Input line #89"};
+{"key"="1";"subkey"="1";"value"="Input line #31"};
+{"key"="1";"subkey"="1";"value"="Input line #4"};
+{"key"="1";"subkey"="1";"value"="Input line #65"};
+{"key"="1";"subkey"="1";"value"="Input line #64"};
+{"key"="1";"subkey"="1";"value"="Input line #37"};
+{"key"="1";"subkey"="1";"value"="Input line #79"};
+{"key"="1";"subkey"="1";"value"="Input line #51"};
+{"key"="1";"subkey"="1";"value"="Input line #59"};
+{"key"="1";"subkey"="1";"value"="Input line #67"};
+{"key"="1";"subkey"="1";"value"="Input line #98"};
+{"key"="1";"subkey"="1";"value"="Input line #94"};
+{"key"="1";"subkey"="1";"value"="Input line #55"};
+{"key"="1";"subkey"="1";"value"="Input line #80"};
+{"key"="1";"subkey"="1";"value"="Input line #96"};
+{"key"="1";"subkey"="1";"value"="Input line #27"};
+{"key"="1";"subkey"="1";"value"="Input line #29"};
+{"key"="1";"subkey"="1";"value"="Input line #84"};
+{"key"="1";"subkey"="1";"value"="Input line #77"};
+{"key"="1";"subkey"="1";"value"="Input line #19"};
+{"key"="1";"subkey"="1";"value"="Input line #22"};
+{"key"="1";"subkey"="1";"value"="Input line #21"};
+{"key"="1";"subkey"="1";"value"="Input line #49"};
+{"key"="1";"subkey"="1";"value"="Input line #93"};
+{"key"="1";"subkey"="1";"value"="Input line #61"};
+{"key"="1";"subkey"="1";"value"="Input line #71"};
+{"key"="1";"subkey"="1";"value"="Input line #15"};
+{"key"="1";"subkey"="1";"value"="Input line #92"};
+{"key"="1";"subkey"="1";"value"="Input line #50"};
+{"key"="1";"subkey"="1";"value"="Input line #14"};
+{"key"="1";"subkey"="1";"value"="Input line #99"};
+{"key"="1";"subkey"="1";"value"="Input line #57"};
+{"key"="1";"subkey"="1";"value"="Input line #10"};
+{"key"="1";"subkey"="1";"value"="Input line #73"};
+{"key"="1";"subkey"="1";"value"="Input line #54"};
+{"key"="1";"subkey"="1";"value"="Input line #43"};
+{"key"="1";"subkey"="1";"value"="Input line #17"};
+{"key"="1";"subkey"="1";"value"="Input line #34"};
+{"key"="1";"subkey"="1";"value"="Input line #36"};
+{"key"="1";"subkey"="1";"value"="Input line #45"};
+{"key"="1";"subkey"="1";"value"="Input line #30"};
+{"key"="1";"subkey"="1";"value"="Input line #72"};
+{"key"="1";"subkey"="1";"value"="Input line #90"};
+{"key"="1";"subkey"="1";"value"="Input line #47"};
+{"key"="1";"subkey"="1";"value"="Input line #86"};
+{"key"="1";"subkey"="1";"value"="Input line #56"};
+{"key"="1";"subkey"="1";"value"="Input line #38"};
+{"key"="1";"subkey"="1";"value"="Input line #52"};
+{"key"="1";"subkey"="1";"value"="Input line #42"};
+{"key"="1";"subkey"="1";"value"="Input line #1"};
+{"key"="1";"subkey"="1";"value"="Input line #82"};
+{"key"="1";"subkey"="1";"value"="Input line #48"};
+{"key"="1";"subkey"="1";"value"="Input line #75"};
+{"key"="1";"subkey"="1";"value"="Input line #40"};
+{"key"="1";"subkey"="1";"value"="Input line #85"};
+{"key"="1";"subkey"="1";"value"="Input line #58"};
+{"key"="1";"subkey"="1";"value"="Input line #33"};
+{"key"="1";"subkey"="1";"value"="Input line #12"};
+{"key"="1";"subkey"="1";"value"="Input line #46"};
+{"key"="1";"subkey"="1";"value"="Input line #8"};
+{"key"="1";"subkey"="1";"value"="Input line #44"};
+{"key"="1";"subkey"="1";"value"="Input line #18"};
+{"key"="1";"subkey"="1";"value"="Input line #25"};
+{"key"="1";"subkey"="1";"value"="Input line #11"};
+{"key"="1";"subkey"="1";"value"="Input line #2"};
+{"key"="1";"subkey"="1";"value"="Input line #5"};
+{"key"="1";"subkey"="1";"value"="Input line #3"};
+{"key"="1";"subkey"="1";"value"="Input line #23"};
+{"key"="1";"subkey"="1";"value"="Input line #20"};
+{"key"="1";"subkey"="1";"value"="Input line #83"};
+{"key"="1";"subkey"="1";"value"="Input line #6"};
+{"key"="1";"subkey"="1";"value"="Input line #78"};
+{"key"="1";"subkey"="1";"value"="Input line #95"};
+{"key"="1";"subkey"="1";"value"="Input line #0"};
+{"key"="1";"subkey"="1";"value"="Input line #16"};
+{"key"="1";"subkey"="1";"value"="Input line #88"};
+{"key"="1";"subkey"="1";"value"="Input line #28"};
+{"key"="1";"subkey"="1";"value"="Input line #81"};
+{"key"="1";"subkey"="1";"value"="Input line #60"};
+{"key"="1";"subkey"="1";"value"="Input line #41"};
+{"key"="1";"subkey"="1";"value"="Input line #24"};
+{"key"="1";"subkey"="1";"value"="Input line #87"};
+{"key"="1";"subkey"="1";"value"="Input line #26"};
+{"key"="1";"subkey"="1";"value"="Input line #97"};
+{"key"="1";"subkey"="1";"value"="Input line #91"};
+{"key"="1";"subkey"="1";"value"="Input line #66"};
+{"key"="1";"subkey"="1";"value"="Input line #69"};
+{"key"="1";"subkey"="1";"value"="Input line #74"};
+{"key"="1";"subkey"="1";"value"="Input line #7"};
+{"key"="1";"subkey"="1";"value"="Input line #68"};
+{"key"="1";"subkey"="1";"value"="Input line #39"};
+{"key"="1";"subkey"="1";"value"="Input line #32"};
+{"key"="1";"subkey"="1";"value"="Input line #62"};
diff --git a/yql/essentials/udfs/common/streaming/test/cases/Empty.sql b/yql/essentials/udfs/common/streaming/test/cases/Empty.sql
new file mode 100644
index 00000000000..21ed9da180b
--- /dev/null
+++ b/yql/essentials/udfs/common/streaming/test/cases/Empty.sql
@@ -0,0 +1,3 @@
+/* syntax version 1 */
+$in = (SELECT value AS Data FROM Input);
+PROCESS $in USING Streaming::Process(TableRows(), "tail", AsList("-n+101")); \ No newline at end of file
diff --git a/yql/essentials/udfs/common/streaming/test/cases/File.in b/yql/essentials/udfs/common/streaming/test/cases/File.in
new file mode 100644
index 00000000000..045bc6bd14c
--- /dev/null
+++ b/yql/essentials/udfs/common/streaming/test/cases/File.in
@@ -0,0 +1,200 @@
+{"key"="180";"subkey"="7";"value"="Value #44"};
+{"key"="178";"subkey"="17";"value"="Value #7"};
+{"key"="6";"subkey"="4";"value"="Value #1"};
+{"key"="53";"subkey"="19";"value"="Value #41"};
+{"key"="112";"subkey"="15";"value"="Value #14"};
+{"key"="121";"subkey"="11";"value"="Value #58"};
+{"key"="69";"subkey"="5";"value"="Value #49"};
+{"key"="142";"subkey"="3";"value"="Value #5"};
+{"key"="73";"subkey"="8";"value"="Value #33"};
+{"key"="52";"subkey"="14";"value"="Value #45"};
+{"key"="18";"subkey"="6";"value"="Value #40"};
+{"key"="141";"subkey"="16";"value"="Value #35"};
+{"key"="63";"subkey"="18";"value"="Value #12"};
+{"key"="5";"subkey"="13";"value"="Value #28"};
+{"key"="128";"subkey"="2";"value"="Value #56"};
+{"key"="48";"subkey"="12";"value"="Value #13"};
+{"key"="93";"subkey"="9";"value"="Value #20"};
+{"key"="49";"subkey"="0";"value"="Value #30"};
+{"key"="95";"subkey"="1";"value"="Value #34"};
+{"key"="159";"subkey"="10";"value"="Value #52"};
+{"key"="55";"subkey"="7";"value"="Value #15"};
+{"key"="7";"subkey"="17";"value"="Value #24"};
+{"key"="35";"subkey"="4";"value"="Value #51"};
+{"key"="82";"subkey"="19";"value"="Value #0"};
+{"key"="170";"subkey"="15";"value"="Value #26"};
+{"key"="150";"subkey"="11";"value"="Value #27"};
+{"key"="26";"subkey"="5";"value"="Value #54"};
+{"key"="58";"subkey"="3";"value"="Value #37"};
+{"key"="16";"subkey"="8";"value"="Value #46"};
+{"key"="166";"subkey"="14";"value"="Value #4"};
+{"key"="86";"subkey"="6";"value"="Value #16"};
+{"key"="101";"subkey"="16";"value"="Value #32"};
+{"key"="160";"subkey"="18";"value"="Value #39"};
+{"key"="199";"subkey"="13";"value"="Value #25"};
+{"key"="138";"subkey"="2";"value"="Value #6"};
+{"key"="96";"subkey"="12";"value"="Value #57"};
+{"key"="33";"subkey"="9";"value"="Value #21"};
+{"key"="9";"subkey"="0";"value"="Value #42"};
+{"key"="21";"subkey"="1";"value"="Value #55"};
+{"key"="176";"subkey"="10";"value"="Value #23"};
+{"key"="0";"subkey"="7";"value"="Value #18"};
+{"key"="66";"subkey"="17";"value"="Value #3"};
+{"key"="198";"subkey"="4";"value"="Value #22"};
+{"key"="186";"subkey"="19";"value"="Value #17"};
+{"key"="83";"subkey"="15";"value"="Value #2"};
+{"key"="179";"subkey"="11";"value"="Value #19"};
+{"key"="64";"subkey"="5";"value"="Value #38"};
+{"key"="56";"subkey"="3";"value"="Value #50"};
+{"key"="155";"subkey"="8";"value"="Value #43"};
+{"key"="143";"subkey"="14";"value"="Value #9"};
+{"key"="188";"subkey"="6";"value"="Value #8"};
+{"key"="172";"subkey"="16";"value"="Value #53"};
+{"key"="103";"subkey"="18";"value"="Value #11"};
+{"key"="44";"subkey"="13";"value"="Value #36"};
+{"key"="173";"subkey"="2";"value"="Value #10"};
+{"key"="133";"subkey"="12";"value"="Value #48"};
+{"key"="168";"subkey"="9";"value"="Value #29"};
+{"key"="157";"subkey"="0";"value"="Value #31"};
+{"key"="152";"subkey"="1";"value"="Value #47"};
+{"key"="74";"subkey"="10";"value"="Value #59"};
+{"key"="154";"subkey"="7";"value"="Value #44"};
+{"key"="40";"subkey"="17";"value"="Value #7"};
+{"key"="89";"subkey"="4";"value"="Value #1"};
+{"key"="41";"subkey"="19";"value"="Value #41"};
+{"key"="24";"subkey"="15";"value"="Value #14"};
+{"key"="182";"subkey"="11";"value"="Value #58"};
+{"key"="80";"subkey"="5";"value"="Value #49"};
+{"key"="196";"subkey"="3";"value"="Value #5"};
+{"key"="43";"subkey"="8";"value"="Value #33"};
+{"key"="156";"subkey"="14";"value"="Value #45"};
+{"key"="34";"subkey"="6";"value"="Value #40"};
+{"key"="88";"subkey"="16";"value"="Value #35"};
+{"key"="22";"subkey"="18";"value"="Value #12"};
+{"key"="27";"subkey"="13";"value"="Value #28"};
+{"key"="84";"subkey"="2";"value"="Value #56"};
+{"key"="12";"subkey"="12";"value"="Value #13"};
+{"key"="98";"subkey"="9";"value"="Value #20"};
+{"key"="140";"subkey"="0";"value"="Value #30"};
+{"key"="31";"subkey"="1";"value"="Value #34"};
+{"key"="105";"subkey"="10";"value"="Value #52"};
+{"key"="149";"subkey"="7";"value"="Value #15"};
+{"key"="153";"subkey"="17";"value"="Value #24"};
+{"key"="177";"subkey"="4";"value"="Value #51"};
+{"key"="14";"subkey"="19";"value"="Value #0"};
+{"key"="190";"subkey"="15";"value"="Value #26"};
+{"key"="118";"subkey"="11";"value"="Value #27"};
+{"key"="174";"subkey"="5";"value"="Value #54"};
+{"key"="104";"subkey"="3";"value"="Value #37"};
+{"key"="47";"subkey"="8";"value"="Value #46"};
+{"key"="46";"subkey"="14";"value"="Value #4"};
+{"key"="124";"subkey"="6";"value"="Value #16"};
+{"key"="70";"subkey"="16";"value"="Value #32"};
+{"key"="110";"subkey"="18";"value"="Value #39"};
+{"key"="91";"subkey"="13";"value"="Value #25"};
+{"key"="192";"subkey"="2";"value"="Value #6"};
+{"key"="183";"subkey"="12";"value"="Value #57"};
+{"key"="100";"subkey"="9";"value"="Value #21"};
+{"key"="38";"subkey"="0";"value"="Value #42"};
+{"key"="71";"subkey"="1";"value"="Value #55"};
+{"key"="29";"subkey"="10";"value"="Value #23"};
+{"key"="51";"subkey"="7";"value"="Value #18"};
+{"key"="32";"subkey"="17";"value"="Value #3"};
+{"key"="130";"subkey"="4";"value"="Value #22"};
+{"key"="77";"subkey"="19";"value"="Value #17"};
+{"key"="4";"subkey"="15";"value"="Value #2"};
+{"key"="97";"subkey"="11";"value"="Value #19"};
+{"key"="67";"subkey"="5";"value"="Value #38"};
+{"key"="158";"subkey"="3";"value"="Value #50"};
+{"key"="25";"subkey"="8";"value"="Value #43"};
+{"key"="119";"subkey"="14";"value"="Value #9"};
+{"key"="2";"subkey"="6";"value"="Value #8"};
+{"key"="167";"subkey"="16";"value"="Value #53"};
+{"key"="193";"subkey"="18";"value"="Value #11"};
+{"key"="11";"subkey"="13";"value"="Value #36"};
+{"key"="129";"subkey"="2";"value"="Value #10"};
+{"key"="187";"subkey"="12";"value"="Value #48"};
+{"key"="20";"subkey"="9";"value"="Value #29"};
+{"key"="134";"subkey"="0";"value"="Value #31"};
+{"key"="115";"subkey"="1";"value"="Value #47"};
+{"key"="94";"subkey"="10";"value"="Value #59"};
+{"key"="30";"subkey"="7";"value"="Value #44"};
+{"key"="175";"subkey"="17";"value"="Value #7"};
+{"key"="62";"subkey"="4";"value"="Value #1"};
+{"key"="147";"subkey"="19";"value"="Value #41"};
+{"key"="87";"subkey"="15";"value"="Value #14"};
+{"key"="99";"subkey"="11";"value"="Value #58"};
+{"key"="114";"subkey"="5";"value"="Value #49"};
+{"key"="117";"subkey"="3";"value"="Value #5"};
+{"key"="10";"subkey"="8";"value"="Value #33"};
+{"key"="162";"subkey"="14";"value"="Value #45"};
+{"key"="171";"subkey"="6";"value"="Value #40"};
+{"key"="108";"subkey"="16";"value"="Value #35"};
+{"key"="60";"subkey"="18";"value"="Value #12"};
+{"key"="144";"subkey"="13";"value"="Value #28"};
+{"key"="113";"subkey"="2";"value"="Value #56"};
+{"key"="102";"subkey"="12";"value"="Value #13"};
+{"key"="194";"subkey"="9";"value"="Value #20"};
+{"key"="76";"subkey"="0";"value"="Value #30"};
+{"key"="189";"subkey"="1";"value"="Value #34"};
+{"key"="164";"subkey"="10";"value"="Value #52"};
+{"key"="23";"subkey"="7";"value"="Value #15"};
+{"key"="65";"subkey"="17";"value"="Value #24"};
+{"key"="54";"subkey"="4";"value"="Value #51"};
+{"key"="148";"subkey"="19";"value"="Value #0"};
+{"key"="123";"subkey"="15";"value"="Value #26"};
+{"key"="185";"subkey"="11";"value"="Value #27"};
+{"key"="28";"subkey"="5";"value"="Value #54"};
+{"key"="13";"subkey"="3";"value"="Value #37"};
+{"key"="136";"subkey"="8";"value"="Value #46"};
+{"key"="57";"subkey"="14";"value"="Value #4"};
+{"key"="184";"subkey"="6";"value"="Value #16"};
+{"key"="36";"subkey"="16";"value"="Value #32"};
+{"key"="132";"subkey"="18";"value"="Value #39"};
+{"key"="120";"subkey"="13";"value"="Value #25"};
+{"key"="50";"subkey"="2";"value"="Value #6"};
+{"key"="195";"subkey"="12";"value"="Value #57"};
+{"key"="135";"subkey"="9";"value"="Value #21"};
+{"key"="92";"subkey"="0";"value"="Value #42"};
+{"key"="151";"subkey"="1";"value"="Value #55"};
+{"key"="125";"subkey"="10";"value"="Value #23"};
+{"key"="146";"subkey"="7";"value"="Value #18"};
+{"key"="45";"subkey"="17";"value"="Value #3"};
+{"key"="90";"subkey"="4";"value"="Value #22"};
+{"key"="126";"subkey"="19";"value"="Value #17"};
+{"key"="145";"subkey"="15";"value"="Value #2"};
+{"key"="19";"subkey"="11";"value"="Value #19"};
+{"key"="127";"subkey"="5";"value"="Value #38"};
+{"key"="79";"subkey"="3";"value"="Value #50"};
+{"key"="131";"subkey"="8";"value"="Value #43"};
+{"key"="111";"subkey"="14";"value"="Value #9"};
+{"key"="75";"subkey"="6";"value"="Value #8"};
+{"key"="191";"subkey"="16";"value"="Value #53"};
+{"key"="3";"subkey"="18";"value"="Value #11"};
+{"key"="165";"subkey"="13";"value"="Value #36"};
+{"key"="85";"subkey"="2";"value"="Value #10"};
+{"key"="1";"subkey"="12";"value"="Value #48"};
+{"key"="161";"subkey"="9";"value"="Value #29"};
+{"key"="37";"subkey"="0";"value"="Value #31"};
+{"key"="107";"subkey"="1";"value"="Value #47"};
+{"key"="122";"subkey"="10";"value"="Value #59"};
+{"key"="139";"subkey"="7";"value"="Value #44"};
+{"key"="15";"subkey"="17";"value"="Value #7"};
+{"key"="106";"subkey"="4";"value"="Value #1"};
+{"key"="59";"subkey"="19";"value"="Value #41"};
+{"key"="61";"subkey"="15";"value"="Value #14"};
+{"key"="17";"subkey"="11";"value"="Value #58"};
+{"key"="68";"subkey"="5";"value"="Value #49"};
+{"key"="163";"subkey"="3";"value"="Value #5"};
+{"key"="197";"subkey"="8";"value"="Value #33"};
+{"key"="81";"subkey"="14";"value"="Value #45"};
+{"key"="169";"subkey"="6";"value"="Value #40"};
+{"key"="8";"subkey"="16";"value"="Value #35"};
+{"key"="109";"subkey"="18";"value"="Value #12"};
+{"key"="78";"subkey"="13";"value"="Value #28"};
+{"key"="181";"subkey"="2";"value"="Value #56"};
+{"key"="116";"subkey"="12";"value"="Value #13"};
+{"key"="137";"subkey"="9";"value"="Value #20"};
+{"key"="39";"subkey"="0";"value"="Value #30"};
+{"key"="72";"subkey"="1";"value"="Value #34"};
+{"key"="42";"subkey"="10";"value"="Value #52"};
diff --git a/yql/essentials/udfs/common/streaming/test/cases/File.sql b/yql/essentials/udfs/common/streaming/test/cases/File.sql
new file mode 100644
index 00000000000..a4b0faed03b
--- /dev/null
+++ b/yql/essentials/udfs/common/streaming/test/cases/File.sql
@@ -0,0 +1,24 @@
+/* syntax version 1 */
+SELECT YQL::@@(block '(
+ (let x (Read! world (DataSource '"yt" '"plato") (Key '('table (String '"Input"))) (Void) '()))
+
+ (let world (Left! x))
+ (let table0 (Right! x))
+
+ (let data (FlatMap table0 (lambda '(row) (block '(
+ (let res (Struct))
+ (let res (AddMember res '"Data" ("Apply" ("Udf" '"String.JoinFromList") ("AsList" (Member row '"key") (Member row '"subkey") (Member row '"value")) (String '","))))
+ (let res (AsList res))
+ (return res)
+ ))))
+ )
+
+ (let udf (Udf '"Streaming.Process"))
+ (let args1 (List (ListType (DataType 'String)) (String '"[13]")))
+ (let res1 (LMap data (lambda '(stream) (Apply udf stream (String '"grep") args1))))
+
+ (let args2 (List (ListType (DataType 'String)) (String '"2")))
+ (let res2 (LMap res1 (lambda '(stream) (Apply udf stream (String '"grep") args2))))
+
+ (return res2)
+))@@;
diff --git a/yql/essentials/udfs/common/streaming/test/cases/Simple.sql b/yql/essentials/udfs/common/streaming/test/cases/Simple.sql
new file mode 100644
index 00000000000..0ea920cc2e1
--- /dev/null
+++ b/yql/essentials/udfs/common/streaming/test/cases/Simple.sql
@@ -0,0 +1,116 @@
+/* syntax version 1 */
+SELECT YQL::@@(block '(
+ (let inputRows (AsList
+ (AsStruct '('Data (String '"Input line #13")))
+ (AsStruct '('Data (String '"Input line #35")))
+ (AsStruct '('Data (String '"Input line #76")))
+ (AsStruct '('Data (String '"Input line #70")))
+ (AsStruct '('Data (String '"Input line #9")))
+ (AsStruct '('Data (String '"Input line #63")))
+ (AsStruct '('Data (String '"Input line #53")))
+ (AsStruct '('Data (String '"Input line #89")))
+ (AsStruct '('Data (String '"Input line #31")))
+ (AsStruct '('Data (String '"Input line #4")))
+ (AsStruct '('Data (String '"Input line #65")))
+ (AsStruct '('Data (String '"Input line #64")))
+ (AsStruct '('Data (String '"Input line #37")))
+ (AsStruct '('Data (String '"Input line #79")))
+ (AsStruct '('Data (String '"Input line #51")))
+ (AsStruct '('Data (String '"Input line #59")))
+ (AsStruct '('Data (String '"Input line #67")))
+ (AsStruct '('Data (String '"Input line #98")))
+ (AsStruct '('Data (String '"Input line #94")))
+ (AsStruct '('Data (String '"Input line #55")))
+ (AsStruct '('Data (String '"Input line #80")))
+ (AsStruct '('Data (String '"Input line #96")))
+ (AsStruct '('Data (String '"Input line #27")))
+ (AsStruct '('Data (String '"Input line #29")))
+ (AsStruct '('Data (String '"Input line #84")))
+ (AsStruct '('Data (String '"Input line #77")))
+ (AsStruct '('Data (String '"Input line #19")))
+ (AsStruct '('Data (String '"Input line #22")))
+ (AsStruct '('Data (String '"Input line #21")))
+ (AsStruct '('Data (String '"Input line #49")))
+ (AsStruct '('Data (String '"Input line #93")))
+ (AsStruct '('Data (String '"Input line #61")))
+ (AsStruct '('Data (String '"Input line #71")))
+ (AsStruct '('Data (String '"Input line #15")))
+ (AsStruct '('Data (String '"Input line #92")))
+ (AsStruct '('Data (String '"Input line #50")))
+ (AsStruct '('Data (String '"Input line #14")))
+ (AsStruct '('Data (String '"Input line #99")))
+ (AsStruct '('Data (String '"Input line #57")))
+ (AsStruct '('Data (String '"Input line #10")))
+ (AsStruct '('Data (String '"Input line #73")))
+ (AsStruct '('Data (String '"Input line #54")))
+ (AsStruct '('Data (String '"Input line #43")))
+ (AsStruct '('Data (String '"Input line #17")))
+ (AsStruct '('Data (String '"Input line #34")))
+ (AsStruct '('Data (String '"Input line #36")))
+ (AsStruct '('Data (String '"Input line #45")))
+ (AsStruct '('Data (String '"Input line #30")))
+ (AsStruct '('Data (String '"Input line #72")))
+ (AsStruct '('Data (String '"Input line #90")))
+ (AsStruct '('Data (String '"Input line #47")))
+ (AsStruct '('Data (String '"Input line #86")))
+ (AsStruct '('Data (String '"Input line #56")))
+ (AsStruct '('Data (String '"Input line #38")))
+ (AsStruct '('Data (String '"Input line #52")))
+ (AsStruct '('Data (String '"Input line #42")))
+ (AsStruct '('Data (String '"Input line #1")))
+ (AsStruct '('Data (String '"Input line #82")))
+ (AsStruct '('Data (String '"Input line #48")))
+ (AsStruct '('Data (String '"Input line #75")))
+ (AsStruct '('Data (String '"Input line #40")))
+ (AsStruct '('Data (String '"Input line #85")))
+ (AsStruct '('Data (String '"Input line #58")))
+ (AsStruct '('Data (String '"Input line #33")))
+ (AsStruct '('Data (String '"Input line #12")))
+ (AsStruct '('Data (String '"Input line #46")))
+ (AsStruct '('Data (String '"Input line #8")))
+ (AsStruct '('Data (String '"Input line #44")))
+ (AsStruct '('Data (String '"Input line #18")))
+ (AsStruct '('Data (String '"Input line #25")))
+ (AsStruct '('Data (String '"Input line #11")))
+ (AsStruct '('Data (String '"Input line #2")))
+ (AsStruct '('Data (String '"Input line #5")))
+ (AsStruct '('Data (String '"Input line #3")))
+ (AsStruct '('Data (String '"Input line #23")))
+ (AsStruct '('Data (String '"Input line #20")))
+ (AsStruct '('Data (String '"Input line #83")))
+ (AsStruct '('Data (String '"Input line #6")))
+ (AsStruct '('Data (String '"Input line #78")))
+ (AsStruct '('Data (String '"Input line #95")))
+ (AsStruct '('Data (String '"Input line #0")))
+ (AsStruct '('Data (String '"Input line #16")))
+ (AsStruct '('Data (String '"Input line #88")))
+ (AsStruct '('Data (String '"Input line #28")))
+ (AsStruct '('Data (String '"Input line #81")))
+ (AsStruct '('Data (String '"Input line #60")))
+ (AsStruct '('Data (String '"Input line #41")))
+ (AsStruct '('Data (String '"Input line #24")))
+ (AsStruct '('Data (String '"Input line #87")))
+ (AsStruct '('Data (String '"Input line #26")))
+ (AsStruct '('Data (String '"Input line #97")))
+ (AsStruct '('Data (String '"Input line #91")))
+ (AsStruct '('Data (String '"Input line #66")))
+ (AsStruct '('Data (String '"Input line #69")))
+ (AsStruct '('Data (String '"Input line #74")))
+ (AsStruct '('Data (String '"Input line #7")))
+ (AsStruct '('Data (String '"Input line #68")))
+ (AsStruct '('Data (String '"Input line #39")))
+ (AsStruct '('Data (String '"Input line #32")))
+ (AsStruct '('Data (String '"Input line #62")))
+ ))
+
+ (let udf (Udf '"Streaming.Process"))
+ (let args1 (AsList (String '"[123679]")))
+ (let res1 (Apply udf (Iterator inputRows) (String '"grep") args1))
+
+ (let args2 (AsList (String '"4")))
+ (let res2 (Apply udf res1 (String '"grep") args2))
+
+ (let res3 (Apply udf res2 (String '"head")))
+
+ (return (Collect res3))
+))@@;
diff --git a/yql/essentials/udfs/common/streaming/test/cases/Yield.sql b/yql/essentials/udfs/common/streaming/test/cases/Yield.sql
new file mode 100644
index 00000000000..bd8cce86a8f
--- /dev/null
+++ b/yql/essentials/udfs/common/streaming/test/cases/Yield.sql
@@ -0,0 +1,44 @@
+/* syntax version 1 */
+SELECT YQL::@@(block '(
+ (let vt (VariantType (TupleType (DataType 'String) (DataType 'String))))
+ (let inputRows (AsList
+ (Variant (String 'abbbbd111) '1 vt)
+ (Variant (String 'btzzzzzzzzzz) '0 vt)
+ (Variant (String 'kaziiaakkakaka) '1 vt)
+ (Variant (String 'bufffffffff) '0 vt)
+ (Variant (String 'aaaaa11111qqqqd) '1 vt)
+ (Variant (String 'zoppppppppp) '0 vt)
+ (Variant (String 'arrrrrrrr) '0 vt)
+ (Variant (String 'zzzzzzzzzzzzzzz) '0 vt)
+ (Variant (String 'wwwwwwwwwwwwwww1) '0 vt)
+ (Variant (String 'baaaaaaaaaaaaaaa) '1 vt)
+ ))
+
+ (let udf (Udf '"Streaming.Process"))
+ (let args1 (AsList (String '"[ab1]")))
+ (let args2 (AsList (String '"[rpd]")))
+
+ (let pr (lambda '(x) (block '(
+ (let res (AsStruct '('Data x)))
+ (return res)
+ ))))
+
+ (let tr1 (lambda '(x) (block '(
+ (let y (OrderedMap x pr))
+ (return (Apply udf y (String '"grep") args1)))
+ )))
+
+ (let tr2 (lambda '(x) (block '(
+ (let y (OrderedMap x pr))
+ (return (Apply udf y (String '"grep") args2)))
+ )))
+
+ (let id (lambda '(x) x))
+ (let res1 (Switch (Iterator inputRows (DependsOn (String 'A))) '1 '('0) tr1 '('1) tr2))
+ (let pr2 (lambda '(x) (Member x 'Data)))
+ (let pr3 (lambda '(x) (Visit x '0 pr2 '1 pr2)))
+ (let res2 (OrderedMap (Collect res1) pr3))
+ (let res3 (Sort res2 (Bool 'true) id))
+
+ (return res3)
+))@@;
diff --git a/yql/essentials/udfs/common/streaming/test/cases/YieldSwitchEmpty.sql b/yql/essentials/udfs/common/streaming/test/cases/YieldSwitchEmpty.sql
new file mode 100644
index 00000000000..015c82217ec
--- /dev/null
+++ b/yql/essentials/udfs/common/streaming/test/cases/YieldSwitchEmpty.sql
@@ -0,0 +1,44 @@
+/* syntax version 1 */
+SELECT YQL::@@(block '(
+ (let vt (VariantType (TupleType (DataType 'String) (DataType 'String))))
+ (let inputRows (AsList
+ (Variant (String 'aaaaaa) '0 vt)
+ (Variant (String 'bbbbbb) '1 vt)
+ (Variant (String 'aaaaaa) '0 vt)
+ (Variant (String 'bbbbbb) '1 vt)
+ (Variant (String 'aaaaaa) '0 vt)
+ (Variant (String 'bbbbbb) '1 vt)
+ (Variant (String 'aaaaaa) '0 vt)
+ (Variant (String 'bbbbbb) '1 vt)
+ (Variant (String 'aaaaaa) '0 vt)
+ (Variant (String 'bbbbbb) '1 vt)
+ (Variant (String 'aaaaaa) '0 vt)
+ (Variant (String 'bbbbbb) '1 vt)
+ (Variant (String 'aaaaaa) '0 vt)
+ (Variant (String 'bbbbbb) '1 vt)
+ (Variant (String 'aaaaaa) '0 vt)
+ (Variant (String 'bbbbbb) '1 vt)
+ ))
+
+ (let udf (Udf '"Streaming.Process"))
+ (let args (AsList (String '"-c") (String '"grep missing || true")))
+
+ (let pr (lambda '(x) (block '(
+ (let res (AsStruct '('Data x)))
+ (return res)
+ ))))
+
+ (let tr1 (lambda '(x) (block '(
+ (let y (Map x pr))
+ (return (Apply udf y (String '"bash") args)))
+ )))
+
+ (let tr2 (lambda '(x) (block '(
+ (let y (Map x pr))
+ (return (Apply udf y (String '"bash") args)))
+ )))
+
+ (let input2 (Switch (Iterator inputRows (DependsOn (String 'A))) '1 '('0) tr1 '('1) tr2))
+
+ (return (Collect input2))
+))@@;
diff --git a/yql/essentials/udfs/common/streaming/test/ya.make b/yql/essentials/udfs/common/streaming/test/ya.make
new file mode 100644
index 00000000000..08e2048adc0
--- /dev/null
+++ b/yql/essentials/udfs/common/streaming/test/ya.make
@@ -0,0 +1,16 @@
+IF (OS_LINUX)
+YQL_UDF_TEST_CONTRIB()
+ DEPENDS(
+ yql/essentials/udfs/common/digest
+ yql/essentials/udfs/common/string
+ yql/essentials/udfs/common/streaming
+ )
+ TIMEOUT(300)
+ SIZE(MEDIUM)
+
+ IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ ENDIF()
+ END()
+
+ENDIF()
diff --git a/yql/essentials/udfs/common/streaming/ya.make b/yql/essentials/udfs/common/streaming/ya.make
new file mode 100644
index 00000000000..9b080a7f86f
--- /dev/null
+++ b/yql/essentials/udfs/common/streaming/ya.make
@@ -0,0 +1,21 @@
+YQL_UDF_CONTRIB(streaming_udf)
+
+YQL_ABI_VERSION(
+ 2
+ 27
+ 0
+)
+
+SRCS(
+ streaming_udf.cpp
+)
+
+PEERDIR(
+ library/cpp/deprecated/kmp
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ test
+)
diff --git a/yql/essentials/udfs/common/string/string_udf.cpp b/yql/essentials/udfs/common/string/string_udf.cpp
new file mode 100644
index 00000000000..d621e92582d
--- /dev/null
+++ b/yql/essentials/udfs/common/string/string_udf.cpp
@@ -0,0 +1,926 @@
+#include <yql/essentials/public/udf/udf_allocator.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+
+#include <library/cpp/charset/codepage.h>
+#include <library/cpp/deprecated/split/split_iterator.h>
+#include <library/cpp/html/pcdata/pcdata.h>
+#include <library/cpp/string_utils/base32/base32.h>
+#include <library/cpp/string_utils/base64/base64.h>
+#include <library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h>
+#include <library/cpp/string_utils/quote/quote.h>
+
+#include <yql/essentials/public/udf/arrow/udf_arrow_helpers.h>
+
+#include <util/charset/wide.h>
+#include <util/generic/vector.h>
+#include <util/stream/format.h>
+#include <util/string/ascii.h>
+#include <util/string/escape.h>
+#include <util/string/hex.h>
+#include <util/string/join.h>
+#include <util/string/reverse.h>
+#include <util/string/split.h>
+#include <util/string/strip.h>
+#include <util/string/subst.h>
+#include <util/string/util.h>
+#include <util/string/vector.h>
+
+using namespace NKikimr;
+using namespace NUdf;
+
+namespace {
+
+#define STRING_UDF(udfName, function) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap<char*>)) { \
+ const TString input(args[0].AsStringRef()); \
+ const auto& result = function(input); \
+ return valueBuilder->NewString(result); \
+ } \
+ \
+ struct T##udfName##KernelExec \
+ : public TUnaryKernelExec<T##udfName##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \
+ const TString input(arg1.AsStringRef()); \
+ const auto& result = function(input); \
+ sink(TBlockItem(result)); \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) \
+
+
+// 'unsafe' udf is actually strict - it returns null on any exception
+#define STRING_UNSAFE_UDF(udfName, function) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, TOptional<char*>(TOptional<char*>)) { \
+ EMPTY_RESULT_ON_EMPTY_ARG(0); \
+ const TString input(args[0].AsStringRef()); \
+ try { \
+ const auto& result = function(input); \
+ return valueBuilder->NewString(result); \
+ } catch (yexception&) { \
+ return TUnboxedValue(); \
+ } \
+ } \
+ \
+ struct T##udfName##KernelExec \
+ : public TUnaryKernelExec<T##udfName##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \
+ if (!arg1) { \
+ return sink(TBlockItem()); \
+ } \
+ \
+ const TString input(arg1.AsStringRef()); \
+ try { \
+ const auto& result = function(input); \
+ sink(TBlockItem(result)); \
+ } catch (yexception&) { \
+ return sink(TBlockItem()); \
+ } \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do)
+
+#define STROKA_UDF(udfName, function) \
+ SIMPLE_STRICT_UDF(T##udfName, TOptional<char*>(TOptional<char*>)) { \
+ EMPTY_RESULT_ON_EMPTY_ARG(0) \
+ const TString input(args[0].AsStringRef()); \
+ try { \
+ TUtf16String wide = UTF8ToWide(input); \
+ function(wide); \
+ return valueBuilder->NewString(WideToUTF8(wide)); \
+ } catch (yexception&) { \
+ return TUnboxedValue(); \
+ } \
+ }
+
+#define STROKA_CASE_UDF(udfName, function) \
+ SIMPLE_STRICT_UDF(T##udfName, TOptional<char*>(TOptional<char*>)) { \
+ EMPTY_RESULT_ON_EMPTY_ARG(0) \
+ const TString input(args[0].AsStringRef()); \
+ try { \
+ TUtf16String wide = UTF8ToWide(input); \
+ function(wide.begin(), wide.size()); \
+ return valueBuilder->NewString(WideToUTF8(wide)); \
+ } catch (yexception&) { \
+ return TUnboxedValue(); \
+ } \
+ }
+
+#define STROKA_ASCII_CASE_UDF(udfName, function) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap<char*>)) { \
+ TString input(args[0].AsStringRef()); \
+ if (input.function()) { \
+ return valueBuilder->NewString(input); \
+ } else { \
+ return args[0]; \
+ } \
+ } \
+ \
+ struct T##udfName##KernelExec \
+ : public TUnaryKernelExec<T##udfName##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \
+ TString input(arg1.AsStringRef()); \
+ if (input.function()) { \
+ sink(TBlockItem(input)); \
+ } else { \
+ sink(arg1); \
+ } \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do)
+
+
+#define STROKA_FIND_UDF(udfName, function) \
+ SIMPLE_STRICT_UDF(T##udfName, bool(TOptional<char*>, char*)) { \
+ Y_UNUSED(valueBuilder); \
+ if (args[0]) { \
+ const TString haystack(args[0].AsStringRef()); \
+ const TString needle(args[1].AsStringRef()); \
+ return TUnboxedValuePod(haystack.function(needle)); \
+ } else { \
+ return TUnboxedValuePod(false); \
+ } \
+ }
+
+#define STRING_TWO_ARGS_UDF(udfName, function) \
+ SIMPLE_STRICT_UDF(T##udfName, bool(TOptional<char*>, char*)) { \
+ Y_UNUSED(valueBuilder); \
+ if (args[0]) { \
+ const TString haystack(args[0].AsStringRef()); \
+ const TString needle(args[1].AsStringRef()); \
+ return TUnboxedValuePod(function(haystack, needle)); \
+ } else { \
+ return TUnboxedValuePod(false); \
+ } \
+ }
+
+#define IS_ASCII_UDF(function) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, bool(TOptional<char*>)) { \
+ Y_UNUSED(valueBuilder); \
+ if (args[0]) { \
+ const TStringBuf input(args[0].AsStringRef()); \
+ bool result = true; \
+ for (auto c : input) { \
+ if (!function(c)) { \
+ result = false; \
+ break; \
+ } \
+ } \
+ return TUnboxedValuePod(result); \
+ } else { \
+ return TUnboxedValuePod(false); \
+ } \
+ } \
+ \
+ struct T##function##KernelExec \
+ : public TUnaryKernelExec<T##function##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \
+ if (arg1) { \
+ const TStringBuf input(arg1.AsStringRef()); \
+ bool result = true; \
+ for (auto c : input) { \
+ if (!function(c)) { \
+ result = false; \
+ break; \
+ } \
+ } \
+ sink(TBlockItem(result)); \
+ } else { \
+ sink(TBlockItem(false)); \
+ } \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do)
+
+
+
+#define STRING_STREAM_PAD_FORMATTER_UDF(function) \
+ BEGIN_SIMPLE_ARROW_UDF_WITH_OPTIONAL_ARGS(T##function, \
+ char*(TAutoMap<char*>, ui64, TOptional<char*>), 1) \
+ { \
+ TStringStream result; \
+ const TStringBuf input(args[0].AsStringRef()); \
+ char paddingSymbol = ' '; \
+ if (args[2]) { \
+ if (args[2].AsStringRef().Size() != 1) { \
+ ythrow yexception() << "Not 1 symbol in paddingSymbol"; \
+ } \
+ paddingSymbol = TString(args[2].AsStringRef())[0]; \
+ } \
+ const ui64 padLen = args[1].Get<ui64>(); \
+ if (padLen > padLim) { \
+ ythrow yexception() << "Padding length (" << padLen << ") exceeds maximum: " << padLim; \
+ } \
+ result << function(input, padLen, paddingSymbol); \
+ return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \
+ } \
+ \
+ struct T##function##KernelExec \
+ : public TGenericKernelExec<T##function##KernelExec, 3> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { \
+ TStringStream result; \
+ const TStringBuf input(args.GetElement(0).AsStringRef()); \
+ char paddingSymbol = ' '; \
+ if (args.GetElement(2)) { \
+ if (args.GetElement(2).AsStringRef().Size() != 1) { \
+ ythrow yexception() << "Not 1 symbol in paddingSymbol"; \
+ } \
+ paddingSymbol = TString(args.GetElement(2).AsStringRef())[0]; \
+ } \
+ const ui64 padLen = args.GetElement(1).Get<ui64>(); \
+ if (padLen > padLim) { \
+ ythrow yexception() << "Padding length (" << padLen \
+ << ") exceeds maximum: " << padLim; \
+ } \
+ result << function(input, padLen, paddingSymbol); \
+ sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do)
+
+#define STRING_STREAM_NUM_FORMATTER_UDF(function, argType) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap<argType>)) { \
+ TStringStream result; \
+ result << function(args[0].Get<argType>()); \
+ return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \
+ } \
+ \
+ struct T##function##KernelExec \
+ : public TUnaryKernelExec<T##function##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \
+ TStringStream result; \
+ result << function(arg1.Get<argType>()); \
+ sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do)
+
+#define STRING_STREAM_TEXT_FORMATTER_UDF(function) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap<char*>)) { \
+ TStringStream result; \
+ const TStringBuf input(args[0].AsStringRef()); \
+ result << function(input); \
+ return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \
+ } \
+ \
+ struct T##function##KernelExec \
+ : public TUnaryKernelExec<T##function##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \
+ TStringStream result; \
+ const TStringBuf input(arg1.AsStringRef()); \
+ result << function(input); \
+ sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do)
+
+
+#define STRING_STREAM_HRSZ_FORMATTER_UDF(udfName, hrSize) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap<ui64>)) { \
+ TStringStream result; \
+ result << HumanReadableSize(args[0].Get<ui64>(), hrSize); \
+ return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \
+ } \
+ \
+ struct T##udfName##KernelExec \
+ : public TUnaryKernelExec<T##udfName##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \
+ TStringStream result; \
+ result << HumanReadableSize(arg1.Get<ui64>(), hrSize); \
+ sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do)
+
+#define STRING_UDF_MAP(XX) \
+ XX(Base32Encode, Base32Encode) \
+ XX(Base64Encode, Base64Encode) \
+ XX(Base64EncodeUrl, Base64EncodeUrl) \
+ XX(EscapeC, EscapeC) \
+ XX(UnescapeC, UnescapeC) \
+ XX(HexEncode, HexEncode) \
+ XX(EncodeHtml, EncodeHtmlPcdata) \
+ XX(DecodeHtml, DecodeHtmlPcdata) \
+ XX(CgiEscape, CGIEscapeRet) \
+ XX(CgiUnescape, CGIUnescapeRet) \
+ XX(Strip, Strip) \
+ XX(Collapse, Collapse)
+
+#define STRING_UNSAFE_UDF_MAP(XX) \
+ XX(Base32Decode, Base32Decode) \
+ XX(Base32StrictDecode, Base32StrictDecode) \
+ XX(Base64Decode, Base64Decode) \
+ XX(Base64StrictDecode, Base64StrictDecode) \
+ XX(HexDecode, HexDecode)
+
+// NOTE: The functions below are marked as deprecated, so block implementation
+// is not required for them. Hence, STROKA_CASE_UDF provides only the scalar
+// one at the moment.
+#define STROKA_CASE_UDF_MAP(XX) \
+ XX(ToLower, ToLower) \
+ XX(ToUpper, ToUpper) \
+ XX(ToTitle, ToTitle)
+
+#define STROKA_ASCII_CASE_UDF_MAP(XX) \
+ XX(AsciiToLower, to_lower) \
+ XX(AsciiToUpper, to_upper) \
+ XX(AsciiToTitle, to_title)
+
+// NOTE: The functions below are marked as deprecated, so block implementation
+// is not required for them. Hence, STROKA_FIND_UDF provides only the scalar
+// one at the moment.
+#define STROKA_FIND_UDF_MAP(XX) \
+ XX(StartsWith, StartsWith) \
+ XX(EndsWith, EndsWith) \
+ XX(HasPrefix, StartsWith) \
+ XX(HasSuffix, EndsWith)
+
+// NOTE: The functions below are marked as deprecated, so block implementation
+// is not required for them. Hence, STRING_TWO_ARGS_UDF provides only the
+// scalar one at the moment.
+#define STRING_TWO_ARGS_UDF_MAP(XX) \
+ XX(StartsWithIgnoreCase, AsciiHasPrefixIgnoreCase) \
+ XX(EndsWithIgnoreCase, AsciiHasSuffixIgnoreCase) \
+ XX(HasPrefixIgnoreCase, AsciiHasPrefixIgnoreCase) \
+ XX(HasSuffixIgnoreCase, AsciiHasSuffixIgnoreCase)
+
+// NOTE: The functions below are marked as deprecated, so block implementation
+// is not required for them. Hence, STROKA_UDF provides only the scalar one at
+// the moment.
+#define STROKA_UDF_MAP(XX) \
+ XX(Reverse, ReverseInPlace)
+
+#define IS_ASCII_UDF_MAP(XX) \
+ XX(IsAscii) \
+ XX(IsAsciiSpace) \
+ XX(IsAsciiUpper) \
+ XX(IsAsciiLower) \
+ XX(IsAsciiDigit) \
+ XX(IsAsciiAlpha) \
+ XX(IsAsciiAlnum) \
+ XX(IsAsciiHex)
+
+#define STRING_STREAM_PAD_FORMATTER_UDF_MAP(XX) \
+ XX(LeftPad) \
+ XX(RightPad)
+
+#define STRING_STREAM_NUM_FORMATTER_UDF_MAP(XX) \
+ XX(Hex, ui64) \
+ XX(SHex, i64) \
+ XX(Bin, ui64) \
+ XX(SBin, i64)
+
+#define STRING_STREAM_TEXT_FORMATTER_UDF_MAP(XX) \
+ XX(HexText) \
+ XX(BinText)
+
+#define STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(XX) \
+ XX(HumanReadableQuantity, SF_QUANTITY) \
+ XX(HumanReadableBytes, SF_BYTES)
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TCollapseText, char*(TAutoMap<char*>, ui64)) {
+ TString input(args[0].AsStringRef());
+ ui64 maxLength = args[1].Get<ui64>();
+ CollapseText(input, maxLength);
+ return valueBuilder->NewString(input);
+ }
+
+ struct TCollapseTextKernelExec
+ : public TBinaryKernelExec<TCollapseTextKernelExec>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ TString input(arg1.AsStringRef());
+ ui64 maxLength = arg2.Get<ui64>();
+ CollapseText(input, maxLength);
+ return sink(TBlockItem(input));
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TCollapseText, TCollapseTextKernelExec::Do);
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TContains, bool(TOptional<char*>, char*)) {
+ Y_UNUSED(valueBuilder);
+ if (!args[0])
+ return TUnboxedValuePod(false);
+
+ const TString haystack(args[0].AsStringRef());
+ const TString needle(args[1].AsStringRef());
+ return TUnboxedValuePod(haystack.Contains(needle));
+ }
+
+ struct TContainsKernelExec : public TBinaryKernelExec<TContainsKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ if (!arg1)
+ return sink(TBlockItem(false));
+
+ const TString haystack(arg1.AsStringRef());
+ const TString needle(arg2.AsStringRef());
+ sink(TBlockItem(haystack.Contains(needle)));
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TContains, TContainsKernelExec::Do);
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceAll, char*(TAutoMap<char*>, char*, char*)) {
+ if (TString result(args[0].AsStringRef()); SubstGlobal(result, args[1].AsStringRef(), args[2].AsStringRef()))
+ return valueBuilder->NewString(result);
+ else
+ return args[0];
+ }
+
+ struct TReplaceAllKernelExec
+ : public TGenericKernelExec<TReplaceAllKernelExec, 3>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) {
+ TString result(args.GetElement(0).AsStringRef());
+ const TStringBuf what(args.GetElement(1).AsStringRef());
+ const TStringBuf with(args.GetElement(2).AsStringRef());
+ if (SubstGlobal(result, what, with)) {
+ return sink(TBlockItem(result));
+ } else {
+ return sink(args.GetElement(0));
+ }
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TReplaceAll, TReplaceAllKernelExec::Do)
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceFirst, char*(TAutoMap<char*>, char*, char*)) {
+ std::string result(args[0].AsStringRef());
+ const std::string_view what(args[1].AsStringRef());
+ if (const auto index = result.find(what); index != std::string::npos) {
+ result.replace(index, what.size(), std::string_view(args[2].AsStringRef()));
+ return valueBuilder->NewString(result);
+ }
+ return args[0];
+ }
+
+ struct TReplaceFirstKernelExec
+ : public TGenericKernelExec<TReplaceFirstKernelExec, 3>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) {
+ std::string result(args.GetElement(0).AsStringRef());
+ const std::string_view what(args.GetElement(1).AsStringRef());
+ const std::string_view with(args.GetElement(2).AsStringRef());
+ if (const auto index = result.find(what); index != std::string::npos) {
+ result.replace(index, what.size(), with);
+ return sink(TBlockItem(result));
+ }
+ return sink(args.GetElement(0));
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TReplaceFirst, TReplaceFirstKernelExec::Do)
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceLast, char*(TAutoMap<char*>, char*, char*)) {
+ std::string result(args[0].AsStringRef());
+ const std::string_view what(args[1].AsStringRef());
+ if (const auto index = result.rfind(what); index != std::string::npos) {
+ result.replace(index, what.size(), std::string_view(args[2].AsStringRef()));
+ return valueBuilder->NewString(result);
+ }
+ return args[0];
+ }
+
+ struct TReplaceLastKernelExec
+ : public TGenericKernelExec<TReplaceLastKernelExec, 3>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) {
+ std::string result(args.GetElement(0).AsStringRef());
+ const std::string_view what(args.GetElement(1).AsStringRef());
+ const std::string_view with(args.GetElement(2).AsStringRef());
+ if (const auto index = result.rfind(what); index != std::string::npos) {
+ result.replace(index, what.size(), with);
+ return sink(TBlockItem(result));
+ }
+ return sink(args.GetElement(0));
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TReplaceLast, TReplaceLastKernelExec::Do)
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveAll, char*(TAutoMap<char*>, char*)) {
+ std::string input(args[0].AsStringRef());
+ const std::string_view remove(args[1].AsStringRef());
+ std::array<bool, 256> chars{};
+ for (const ui8 c : remove) {
+ chars[c] = true;
+ }
+ size_t tpos = 0;
+ for (const ui8 c : input) {
+ if (!chars[c]) {
+ input[tpos++] = c;
+ }
+ }
+ if (tpos != input.size()) {
+ input.resize(tpos);
+ return valueBuilder->NewString(input);
+ }
+ return args[0];
+ }
+
+ struct TRemoveAllKernelExec
+ : public TBinaryKernelExec<TRemoveAllKernelExec>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ std::string input(arg1.AsStringRef());
+ const std::string_view remove(arg2.AsStringRef());
+ std::array<bool, 256> chars{};
+ for (const ui8 c : remove) {
+ chars[c] = true;
+ }
+ size_t tpos = 0;
+ for (const ui8 c : input) {
+ if (!chars[c]) {
+ input[tpos++] = c;
+ }
+ }
+ if (tpos != input.size()) {
+ input.resize(tpos);
+ return sink(TBlockItem(input));
+ }
+ sink(arg1);
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TRemoveAll, TRemoveAllKernelExec::Do)
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveFirst, char*(TAutoMap<char*>, char*)) {
+ std::string input(args[0].AsStringRef());
+ const std::string_view remove(args[1].AsStringRef());
+ std::array<bool, 256> chars{};
+ for (const ui8 c : remove) {
+ chars[c] = true;
+ }
+ for (auto it = input.cbegin(); it != input.cend(); ++it) {
+ if (chars[static_cast<ui8>(*it)]) {
+ input.erase(it);
+ return valueBuilder->NewString(input);
+ }
+ }
+ return args[0];
+ }
+
+ struct TRemoveFirstKernelExec
+ : public TBinaryKernelExec<TRemoveFirstKernelExec>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ std::string input(arg1.AsStringRef());
+ const std::string_view remove(arg2.AsStringRef());
+ std::array<bool, 256> chars{};
+ for (const ui8 c : remove) {
+ chars[c] = true;
+ }
+ for (auto it = input.cbegin(); it != input.cend(); ++it) {
+ if (chars[static_cast<ui8>(*it)]) {
+ input.erase(it);
+ return sink(TBlockItem(input));
+ }
+ }
+ sink(arg1);
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TRemoveFirst, TRemoveFirstKernelExec::Do)
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveLast, char*(TAutoMap<char*>, char*)) {
+ std::string input(args[0].AsStringRef());
+ const std::string_view remove(args[1].AsStringRef());
+ std::array<bool, 256> chars{};
+ for (const ui8 c : remove) {
+ chars[c] = true;
+ }
+ for (auto it = input.crbegin(); it != input.crend(); ++it) {
+ if (chars[static_cast<ui8>(*it)]) {
+ input.erase(input.crend() - it - 1, 1);
+ return valueBuilder->NewString(input);
+ }
+ }
+ return args[0];
+ }
+
+ struct TRemoveLastKernelExec
+ : public TBinaryKernelExec<TRemoveLastKernelExec>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ std::string input(arg1.AsStringRef());
+ const std::string_view remove(arg2.AsStringRef());
+ std::array<bool, 256> chars{};
+ for (const ui8 c : remove) {
+ chars[c] = true;
+ }
+ for (auto it = input.crbegin(); it != input.crend(); ++it) {
+ if (chars[static_cast<ui8>(*it)]) {
+ input.erase(input.crend() - it - 1, 1);
+ return sink(TBlockItem(input));
+ }
+ }
+ sink(arg1);
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TRemoveLast, TRemoveLastKernelExec::Do)
+
+
+ // NOTE: String::Find is marked as deprecated, so block implementation is
+ // not required for them. Hence, only the scalar one is provided.
+ SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) {
+ Y_UNUSED(valueBuilder);
+ const TString haystack(args[0].AsStringRef());
+ const TString needle(args[1].AsStringRef());
+ const ui64 pos = args[2].GetOrDefault<ui64>(0);
+ return TUnboxedValuePod(haystack.find(needle, pos));
+ }
+
+ // NOTE: String::ReverseFind is marked as deprecated, so block
+ // implementation is not required for them. Hence, only the scalar one is
+ // provided.
+ SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TReverseFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) {
+ Y_UNUSED(valueBuilder);
+ const TString haystack(args[0].AsStringRef());
+ const TString needle(args[1].AsStringRef());
+ const ui64 pos = args[2].GetOrDefault<ui64>(TString::npos);
+ return TUnboxedValuePod(haystack.rfind(needle, pos));
+ }
+
+ // NOTE: String::Substring is marked as deprecated, so block implementation
+ // is not required for them. Hence, only the scalar one is provided.
+ SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSubstring, char*(TAutoMap<char*>, TOptional<ui64>, TOptional<ui64>), 1) {
+ const TString input(args[0].AsStringRef());
+ const ui64 from = args[1].GetOrDefault<ui64>(0);
+ const ui64 count = args[2].GetOrDefault<ui64>(TString::npos);
+ return valueBuilder->NewString(input.substr(from, count));
+ }
+
+ using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>;
+
+ template <typename TIt>
+ static void SplitToListImpl(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValue& input,
+ const std::string_view::const_iterator from,
+ const TIt& it,
+ TTmpVector& result) {
+ for (const auto& elem : it) {
+ result.emplace_back(valueBuilder->SubString(input, std::distance(from, elem.TokenStart()), std::distance(elem.TokenStart(), elem.TokenDelim())));
+ }
+ }
+ template <typename TIt>
+ static void SplitToListImpl(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValue& input,
+ const std::string_view::const_iterator from,
+ TIt& it,
+ bool skipEmpty,
+ TTmpVector& result) {
+ if (skipEmpty) {
+ SplitToListImpl(valueBuilder, input, from, it.SkipEmpty(), result);
+ } else {
+ SplitToListImpl(valueBuilder, input, from, it, result);
+ }
+ }
+
+ constexpr char delimeterStringName[] = "DelimeterString";
+ constexpr char skipEmptyName[] = "SkipEmpty";
+ constexpr char limitName[] = "Limit";
+ using TDelimeterStringArg = TNamedArg<bool, delimeterStringName>;
+ using TSkipEmptyArg = TNamedArg<bool, skipEmptyName>;
+ using TLimitArg = TNamedArg<ui64, limitName>;
+
+
+ SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSplitToList, TListType<char*>(
+ TOptional<char*>,
+ char*,
+ TDelimeterStringArg,
+ TSkipEmptyArg,
+ TLimitArg
+ ),
+ 3) {
+ TTmpVector result;
+ if (args[0]) {
+ const std::string_view input(args[0].AsStringRef());
+ const std::string_view delimeter(args[1].AsStringRef());
+ const bool delimiterString = args[2].GetOrDefault<bool>(true);
+ const bool skipEmpty = args[3].GetOrDefault<bool>(false);
+ const auto limit = args[4].GetOrDefault<ui64>(0);
+ if (delimiterString) {
+ if (limit) {
+ auto it = StringSplitter(input).SplitByString(delimeter).Limit(limit + 1);
+ SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result);
+ } else {
+ auto it = StringSplitter(input).SplitByString(delimeter);
+ SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result);
+ }
+ } else {
+ if (limit) {
+ auto it = StringSplitter(input).SplitBySet(TString(delimeter).c_str()).Limit(limit + 1);
+ SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result);
+ } else {
+ auto it = StringSplitter(input).SplitBySet(TString(delimeter).c_str());
+ SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result);
+ }
+ }
+ }
+ return valueBuilder->NewList(result.data(), result.size());
+ }
+
+ SIMPLE_STRICT_UDF(TJoinFromList, char*(TAutoMap<TListType<TOptional<char*>>>, char*)) {
+ auto input = args[0].GetListIterator();
+ const TString delimeter(args[1].AsStringRef());
+ TVector<TString> items;
+
+ for (TUnboxedValue current; input.Next(current);) {
+ if (current) {
+ TString item(current.AsStringRef());
+ items.push_back(std::move(item));
+ }
+ }
+
+ return valueBuilder->NewString(JoinSeq(delimeter, items));
+ }
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TLevensteinDistance, ui64(TAutoMap<char*>, TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const TStringBuf left(args[0].AsStringRef());
+ const TStringBuf right(args[1].AsStringRef());
+ const ui64 result = NLevenshtein::Distance(left, right);
+ return TUnboxedValuePod(result);
+ }
+
+ struct TLevensteinDistanceKernelExec : public TBinaryKernelExec<TLevensteinDistanceKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ const std::string_view left(arg1.AsStringRef());
+ const std::string_view right(arg2.AsStringRef());
+ const ui64 result = NLevenshtein::Distance(left, right);
+ sink(TBlockItem(result));
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TLevensteinDistance, TLevensteinDistanceKernelExec::Do);
+
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(THumanReadableDuration, char*(TAutoMap<ui64>)) {
+ TStringStream result;
+ result << HumanReadable(TDuration::MicroSeconds(args[0].Get<ui64>()));
+ return valueBuilder->NewString(TStringRef(result.Data(), result.Size()));
+ }
+
+ struct THumanReadableDurationKernelExec
+ : public TUnaryKernelExec<THumanReadableDurationKernelExec>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {
+ TStringStream result;
+ result << HumanReadable(TDuration::MicroSeconds(arg1.Get<ui64>()));
+ sink(TBlockItem(TStringRef(result.Data(), result.Size())));
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(THumanReadableDuration, THumanReadableDurationKernelExec::Do)
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TPrec, char*(TAutoMap<double>, ui64)) {
+ TStringStream result;
+ result << Prec(args[0].Get<double>(), args[1].Get<ui64>());
+ return valueBuilder->NewString(TStringRef(result.Data(), result.Size()));
+ }
+
+ struct TPrecKernelExec : public TBinaryKernelExec<TPrecKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ TStringStream result;
+ result << Prec(arg1.Get<double>(), arg2.Get<ui64>());
+ sink(TBlockItem(TStringRef(result.Data(), result.Size())));
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TPrec, TPrecKernelExec::Do)
+
+
+ SIMPLE_STRICT_UDF(TToByteList, TListType<ui8>(char*)) {
+ const TStringBuf input(args[0].AsStringRef());
+ TUnboxedValue* items = nullptr;
+ TUnboxedValue result = valueBuilder->NewArray(input.size(), items);
+ for (const unsigned char c : input) {
+ *items++ = TUnboxedValuePod(c);
+ }
+ return result;
+ }
+
+ SIMPLE_STRICT_UDF(TFromByteList, char*(TListType<ui8>)) {
+ auto input = args[0];
+
+ if (auto elems = input.GetElements()) {
+ const auto elemCount = input.GetListLength();
+ TUnboxedValue result = valueBuilder->NewStringNotFilled(input.GetListLength());
+ auto bufferPtr = result.AsStringRef().Data();
+ for (ui64 i = 0; i != elemCount; ++i) {
+ *(bufferPtr++) = elems[i].Get<ui8>();
+ }
+ return result;
+ }
+
+ std::vector<char, NKikimr::NUdf::TStdAllocatorForUdf<char>> buffer;
+ buffer.reserve(TUnboxedValuePod::InternalBufferSize);
+
+ const auto& iter = input.GetListIterator();
+ for (NUdf::TUnboxedValue item; iter.Next(item); ) {
+ buffer.push_back(item.Get<ui8>());
+ }
+
+ return valueBuilder->NewString(TStringRef(buffer.data(), buffer.size()));
+ }
+
+#define STRING_REGISTER_UDF(udfName, ...) T##udfName,
+
+ STRING_UDF_MAP(STRING_UDF)
+ STRING_UNSAFE_UDF_MAP(STRING_UNSAFE_UDF)
+ STROKA_UDF_MAP(STROKA_UDF)
+ STROKA_CASE_UDF_MAP(STROKA_CASE_UDF)
+ STROKA_ASCII_CASE_UDF_MAP(STROKA_ASCII_CASE_UDF)
+ STROKA_FIND_UDF_MAP(STROKA_FIND_UDF)
+ STRING_TWO_ARGS_UDF_MAP(STRING_TWO_ARGS_UDF)
+ IS_ASCII_UDF_MAP(IS_ASCII_UDF)
+
+ static constexpr ui64 padLim = 1000000;
+ STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_STREAM_PAD_FORMATTER_UDF)
+ STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_STREAM_NUM_FORMATTER_UDF)
+ STRING_STREAM_TEXT_FORMATTER_UDF_MAP(STRING_STREAM_TEXT_FORMATTER_UDF)
+ STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(STRING_STREAM_HRSZ_FORMATTER_UDF)
+
+ SIMPLE_MODULE(TStringModule,
+ STRING_UDF_MAP(STRING_REGISTER_UDF)
+ STRING_UNSAFE_UDF_MAP(STRING_REGISTER_UDF)
+ STROKA_UDF_MAP(STRING_REGISTER_UDF)
+ STROKA_CASE_UDF_MAP(STRING_REGISTER_UDF)
+ STROKA_ASCII_CASE_UDF_MAP(STRING_REGISTER_UDF)
+ STROKA_FIND_UDF_MAP(STRING_REGISTER_UDF)
+ STRING_TWO_ARGS_UDF_MAP(STRING_REGISTER_UDF)
+ IS_ASCII_UDF_MAP(STRING_REGISTER_UDF)
+ STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_REGISTER_UDF)
+ STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_REGISTER_UDF)
+ STRING_STREAM_TEXT_FORMATTER_UDF_MAP(STRING_REGISTER_UDF)
+ STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(STRING_REGISTER_UDF)
+ TCollapseText,
+ TReplaceAll,
+ TReplaceFirst,
+ TReplaceLast,
+ TRemoveAll,
+ TRemoveFirst,
+ TRemoveLast,
+ TContains,
+ TFind,
+ TReverseFind,
+ TSubstring,
+ TSplitToList,
+ TJoinFromList,
+ TLevensteinDistance,
+ THumanReadableDuration,
+ TPrec,
+ TToByteList,
+ TFromByteList)
+}
+
+REGISTER_MODULES(TStringModule)
diff --git a/yql/essentials/udfs/common/string/test/canondata/result.json b/yql/essentials/udfs/common/string/test/canondata/result.json
new file mode 100644
index 00000000000..f9e3a670c2c
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/result.json
@@ -0,0 +1,112 @@
+{
+ "test.test[AsciiChecks]": [
+ {
+ "uri": "file://test.test_AsciiChecks_/results.txt"
+ }
+ ],
+ "test.test[Base32Decode]": [
+ {
+ "uri": "file://test.test_Base32Decode_/results.txt"
+ }
+ ],
+ "test.test[Base32Encode]": [
+ {
+ "uri": "file://test.test_Base32Encode_/results.txt"
+ }
+ ],
+ "test.test[BlockAsciiChecks]": [
+ {
+ "uri": "file://test.test_BlockAsciiChecks_/results.txt"
+ }
+ ],
+ "test.test[BlockFind]": [
+ {
+ "uri": "file://test.test_BlockFind_/results.txt"
+ }
+ ],
+ "test.test[BlockRemove]": [
+ {
+ "uri": "file://test.test_BlockRemove_/results.txt"
+ }
+ ],
+ "test.test[BlockReplace]": [
+ {
+ "uri": "file://test.test_BlockReplace_/results.txt"
+ }
+ ],
+ "test.test[BlockStreamFormat]": [
+ {
+ "uri": "file://test.test_BlockStreamFormat_/results.txt"
+ }
+ ],
+ "test.test[BlockStringUDF]": [
+ {
+ "uri": "file://test.test_BlockStringUDF_/results.txt"
+ }
+ ],
+ "test.test[BlockStringUnsafeUDF]": [
+ {
+ "uri": "file://test.test_BlockStringUnsafeUDF_/results.txt"
+ }
+ ],
+ "test.test[BlockTo]": [
+ {
+ "uri": "file://test.test_BlockTo_/results.txt"
+ }
+ ],
+ "test.test[ExtendAndTake]": [
+ {
+ "uri": "file://test.test_ExtendAndTake_/results.txt"
+ }
+ ],
+ "test.test[Find]": [
+ {
+ "uri": "file://test.test_Find_/results.txt"
+ }
+ ],
+ "test.test[List]": [
+ {
+ "uri": "file://test.test_List_/results.txt"
+ }
+ ],
+ "test.test[List_v0]": [
+ {
+ "uri": "file://test.test_List_v0_/results.txt"
+ }
+ ],
+ "test.test[Remove]": [
+ {
+ "uri": "file://test.test_Remove_/results.txt"
+ }
+ ],
+ "test.test[ReplaceFirstLast]": [
+ {
+ "uri": "file://test.test_ReplaceFirstLast_/results.txt"
+ }
+ ],
+ "test.test[Replace]": [
+ {
+ "uri": "file://test.test_Replace_/results.txt"
+ }
+ ],
+ "test.test[StreamFormat]": [
+ {
+ "uri": "file://test.test_StreamFormat_/results.txt"
+ }
+ ],
+ "test.test[StringUDF]": [
+ {
+ "uri": "file://test.test_StringUDF_/results.txt"
+ }
+ ],
+ "test.test[StringUnsafeUDF]": [
+ {
+ "uri": "file://test.test_StringUnsafeUDF_/results.txt"
+ }
+ ],
+ "test.test[To]": [
+ {
+ "uri": "file://test.test_To_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiChecks_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiChecks_/results.txt
new file mode 100644
index 00000000000..944b17d4c1e
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiChecks_/results.txt
@@ -0,0 +1,124 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "isascii";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isspace";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isupper";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "islower";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isdigit";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isalpha";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isalnum";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "ishex";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %true;
+ %false;
+ %true;
+ %true;
+ %false
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %true;
+ %false;
+ %true;
+ %true
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Decode_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Decode_/results.txt
new file mode 100644
index 00000000000..bf4aa56fa93
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Decode_/results.txt
@@ -0,0 +1,79 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "strict_decoded";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "decoded";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "ORSXG5A=";
+ [
+ "test"
+ ];
+ [
+ "test"
+ ]
+ ];
+ [
+ "KRSXG5CUMVZXI===";
+ [
+ "TestTest"
+ ];
+ [
+ "TestTest"
+ ]
+ ];
+ [
+ "MFYHA3DF";
+ [
+ "apple"
+ ];
+ [
+ "apple"
+ ]
+ ];
+ [
+ "hmmmm===hmmmm";
+ #;
+ [
+ "\0\0\0"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Encode_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Encode_/results.txt
new file mode 100644
index 00000000000..51c74759fc7
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Encode_/results.txt
@@ -0,0 +1,44 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "encoded";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "test";
+ "ORSXG5A="
+ ];
+ [
+ "TestTest";
+ "KRSXG5CUMVZXI==="
+ ];
+ [
+ "apple";
+ "MFYHA3DF"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiChecks_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiChecks_/results.txt
new file mode 100644
index 00000000000..944b17d4c1e
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiChecks_/results.txt
@@ -0,0 +1,124 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "isascii";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isspace";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isupper";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "islower";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isdigit";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isalpha";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isalnum";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "ishex";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %true;
+ %false;
+ %true;
+ %true;
+ %false
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %true;
+ %false;
+ %true;
+ %true
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt
new file mode 100644
index 00000000000..f6374e682e5
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt
@@ -0,0 +1,69 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "contains";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "levenstein";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "fdsa";
+ %false;
+ "3"
+ ];
+ [
+ "aswedfg";
+ %true;
+ "5"
+ ];
+ [
+ "asdadsaasd";
+ %true;
+ "8"
+ ];
+ [
+ "gdsfsassas";
+ %true;
+ "8"
+ ];
+ [
+ "";
+ %false;
+ "2"
+ ];
+ [
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ %false;
+ "23"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockRemove_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockRemove_/results.txt
new file mode 100644
index 00000000000..6fbf37a9f9b
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockRemove_/results.txt
@@ -0,0 +1,173 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "all";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hwruall";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hwrufirst";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hwrulast";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "fdsa";
+ "fd";
+ "fds";
+ "fds";
+ "fda";
+ "fds";
+ "fdsa";
+ "fdsa";
+ "fdsa";
+ "fdsa";
+ "fdsa"
+ ];
+ [
+ "aswedfg";
+ "wedfg";
+ "swedfg";
+ "swedfg";
+ "swedfg";
+ "awedfg";
+ "aswedfg";
+ "aswedfg";
+ "aswedfg";
+ "aswedfg";
+ "aswedfg"
+ ];
+ [
+ "asdadsaasd";
+ "ddd";
+ "sdadsaasd";
+ "asdadsasd";
+ "sdadsaasd";
+ "asdadsaad";
+ "asdadsaasd";
+ "asdadsaasd";
+ "asdadsaasd";
+ "asdadsaasd";
+ "asdadsaasd"
+ ];
+ [
+ "gdsfsassas";
+ "gdf";
+ "gdsfsssas";
+ "gdsfsasss";
+ "gdfsassas";
+ "gdsfsassa";
+ "gdsfsassas";
+ "gdsfsassas";
+ "gdsfsassas";
+ "gdsfsassas";
+ "gdsfsassas"
+ ];
+ [
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ ""
+ ];
+ [
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!";
+ "\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReplace_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReplace_/results.txt
new file mode 100644
index 00000000000..2ac3566c61d
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReplace_/results.txt
@@ -0,0 +1,134 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "all";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last3";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "fdsa";
+ "fdsa";
+ "fdsz";
+ "fdsz";
+ "fdszz";
+ "fdszz";
+ "fds";
+ "fds"
+ ];
+ [
+ "aswedfg";
+ "zzzwedfg";
+ "zswedfg";
+ "zswedfg";
+ "zzswedfg";
+ "zzswedfg";
+ "swedfg";
+ "swedfg"
+ ];
+ [
+ "asdadsaasd";
+ "zzzdadsazzzd";
+ "zsdadsaasd";
+ "asdadsazsd";
+ "zzsdadsaasd";
+ "asdadsazzsd";
+ "sdadsaasd";
+ "asdadsasd"
+ ];
+ [
+ "gdsfsassas";
+ "gdsfszzzszzz";
+ "gdsfszssas";
+ "gdsfsasszs";
+ "gdsfszzssas";
+ "gdsfsasszzs";
+ "gdsfsssas";
+ "gdsfsasss"
+ ];
+ [
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ ""
+ ];
+ [
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStreamFormat_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStreamFormat_/results.txt
new file mode 100644
index 00000000000..b1bff8a57b8
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStreamFormat_/results.txt
@@ -0,0 +1,208 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "right_pad";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "left_pad";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "right_pad_zero";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "left_pad_zero";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hex";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "shex";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "bin";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "sbin";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hex_text";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "bin_text";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "duration";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "quantity";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "bytes";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "prec";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "qwertyui";
+ "qwertyui ";
+ " qwertyui";
+ "qwertyui000000000000";
+ "000000000000qwertyui";
+ "0x00000000499602D2";
+ "-0x000000000000007B";
+ "0b0000000000000000000000000000000001001001100101100000001011010010";
+ "-0b0000000000000000000000000000000000000000000000000000000001111011";
+ "71 77 65 72 74 79 75 69";
+ "01110001 01110111 01100101 01110010 01110100 01111001 01110101 01101001";
+ "20m 34s";
+ "1.23G";
+ "1.15GiB";
+ "-0.009963"
+ ];
+ [
+ "asdfghjl";
+ "asdfghjl ";
+ " asdfghjl";
+ "asdfghjl000000000000";
+ "000000000000asdfghjl";
+ "0x000000024CB016EA";
+ "-0x00000000000001C8";
+ "0b0000000000000000000000000000001001001100101100000001011011101010";
+ "-0b0000000000000000000000000000000000000000000000000000000111001000";
+ "61 73 64 66 67 68 6A 6C";
+ "01100001 01110011 01100100 01100110 01100111 01101000 01101010 01101100";
+ "2h 44m 36s";
+ "9.88G";
+ "9.2GiB";
+ "-0.03694"
+ ];
+ [
+ "zxcvbnm?";
+ "zxcvbnm? ";
+ " zxcvbnm?";
+ "zxcvbnm?000000000000";
+ "000000000000zxcvbnm?";
+ "0x00000002540BE3FF";
+ "-0x0000000000000315";
+ "0b0000000000000000000000000000001001010100000010111110001111111111";
+ "-0b0000000000000000000000000000000000000000000000000000001100010101";
+ "7A 78 63 76 62 6E 6D 3F";
+ "01111010 01111000 01100011 01110110 01100010 01101110 01101101 00111111";
+ "2h 46m 40s";
+ "10G";
+ "9.31GiB";
+ "-0.06391"
+ ];
+ [
+ "12345678";
+ "12345678 ";
+ " 12345678";
+ "12345678000000000000";
+ "00000000000012345678";
+ "0x0000000000000000";
+ "0x0000000000000000";
+ "0b0000000000000000000000000000000000000000000000000000000000000000";
+ "0b0000000000000000000000000000000000000000000000000000000000000000";
+ "31 32 33 34 35 36 37 38";
+ "00110001 00110010 00110011 00110100 00110101 00110110 00110111 00111000";
+ "0us";
+ "0";
+ "0B";
+ "0"
+ ];
+ [
+ "!@#$%^&*";
+ "!@#$%^&* ";
+ " !@#$%^&*";
+ "!@#$%^&*000000000000";
+ "000000000000!@#$%^&*";
+ "0x0000000223557439";
+ "-0x00000000000003E7";
+ "0b0000000000000000000000000000001000100011010101010111010000111001";
+ "-0b0000000000000000000000000000000000000000000000000000001111100111";
+ "21 40 23 24 25 5E 26 2A";
+ "00100001 01000000 00100011 00100100 00100101 01011110 00100110 00101010";
+ "2h 33m 2s";
+ "9.18G";
+ "8.55GiB";
+ "-0.08092"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUDF_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUDF_/results.txt
new file mode 100644
index 00000000000..a665105224f
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUDF_/results.txt
@@ -0,0 +1,169 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "b32enc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "b64enc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "b64encu";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cunesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "xenc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "henc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hdec";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cgesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cgunesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "clps";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "strp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "clpst";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOSI===";
+ "ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ=";
+ "ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ,";
+ " !qwe rty uiop [ ]$";
+ " !qwe rty uiop [ ]$";
+ "202020217177652072747920202075696F70205B205D24";
+ " !qwe rty uiop [ ]$";
+ " !qwe rty uiop [ ]$";
+ "+++!qwe+rty+++uiop+%5B+%5D$";
+ " !qwe rty uiop [ ]$";
+ " !qwe rty uiop [ ]$";
+ "!qwe rty uiop [ ]$";
+ "!qwe ..."
+ ];
+ [
+ "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA";
+ "QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA==";
+ "QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA,,";
+ "@as dfgh jkl\\\\n;'% ";
+ "@as dfgh jkl\n;'% ";
+ "4061732020202020202064666768206A6B6C5C6E3B27252020";
+ "@as dfgh jkl\\n;&#39;% ";
+ "@as dfgh jkl\\n;'% ";
+ "@as+++++++dfgh+jkl%5Cn;%27%25++";
+ "@as dfgh jkl\\n;'% ";
+ "@as dfgh jkl\\n;'% ";
+ "@as dfgh jkl\\n;'%";
+ "@as ..."
+ ];
+ [
+ "EAQCAI32PBRQS5TCNYQASCQIEBWSYLRPH5PCAIBA";
+ "ICAgI3p4Ywl2Ym4gCQoIIG0sLi8/XiAgIA==";
+ "ICAgI3p4Ywl2Ym4gCQoIIG0sLi8_XiAgIA,,";
+ " #zxc\\tvbn \\t\\n\\x08 m,./?^ ";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ "202020237A78630976626E20090A08206D2C2E2F3F5E202020";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ "+++%23zxc%09vbn+%09%0A%08+m%2C./%3F%5E+++";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ " #zxc vbn \x08 m,./?^ ";
+ "#zxc\tvbn \t\n\x08 m,./?^";
+ "#zxc ..."
+ ];
+ [
+ "GEQTEQBTEM2CINJFGZPDOJRYFI4SQMBJFVPT2KZMHQXD4===";
+ "MSEyQDMjNCQ1JTZeNyY4KjkoMCktXz0rLDwuPg==";
+ "MSEyQDMjNCQ1JTZeNyY4KjkoMCktXz0rLDwuPg,,";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "31213240332334243525365E3726382A392830292D5F3D2B2C3C2E3E";
+ "1!2@3#4$5%6^7&amp;8*9(0)-_=+,&lt;.&gt;";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@3%234$5%256%5E7%268*9%280%29-_%3D%2B%2C%3C.%3E";
+ "1!2@3#4$5%6^7&8*9(0)-_= ,<.>";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@ ..."
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUnsafeUDF_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUnsafeUDF_/results.txt
new file mode 100644
index 00000000000..26b182f9343
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUnsafeUDF_/results.txt
@@ -0,0 +1,158 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "b32dec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "b32sdec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "b64dec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "b64sdec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "xdec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ [
+ [
+ "EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOS"
+ ]
+ ];
+ #;
+ #
+ ];
+ [
+ [
+ [
+ "QIAEXLvMggAcAECCAFgAQUALyg=="
+ ]
+ ];
+ #;
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ #
+ ];
+ [
+ [
+ [
+ "0DQNA0D4P/93QP6/z4NA0DQP98Dxfg0DodA6PQ=="
+ ]
+ ];
+ #;
+ #;
+ #;
+ [
+ " !qwe rty uiop [ ]$"
+ ]
+ ];
+ [
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ [
+ [
+ "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"
+ ]
+ ];
+ [
+ [
+ "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"
+ ]
+ ];
+ #
+ ];
+ [
+ [
+ [
+ "gYoECABAgAQaIM6AAAAAubn0goBAAA=="
+ ]
+ ];
+ #;
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ #
+ ];
+ [
+ [
+ [
+ "4DwP70DQNA0DQNA0D3Pe9/wNA8DwfC6LxNh1/XdA0A=="
+ ]
+ ];
+ #;
+ #;
+ #;
+ [
+ "@as dfgh jkl\\n;'% "
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockTo_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockTo_/results.txt
new file mode 100644
index 00000000000..143cfb76417
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockTo_/results.txt
@@ -0,0 +1,88 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ascii_lower";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ascii_upper";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ascii_title";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "test";
+ "test";
+ "TEST";
+ "Test"
+ ];
+ [
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"
+ ];
+ [
+ "TeSt";
+ "test";
+ "TEST";
+ "Test"
+ ];
+ [
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"
+ ];
+ [
+ "Eyl\xC3\xBCl";
+ "eyl\xC3\xBCl";
+ "EYL\xC3\xBCL";
+ "Eyl\xC3\xBCl"
+ ];
+ [
+ "6";
+ "6";
+ "6";
+ "6"
+ ];
+ [
+ "";
+ "";
+ "";
+ ""
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_ExtendAndTake_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_ExtendAndTake_/results.txt
new file mode 100644
index 00000000000..81269c68153
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_ExtendAndTake_/results.txt
@@ -0,0 +1,60 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ "b"
+ ]
+ ];
+ [
+ [
+ "d"
+ ];
+ [
+ "d"
+ ]
+ ];
+ [
+ [];
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt
new file mode 100644
index 00000000000..cec53212501
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt
@@ -0,0 +1,147 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "contains";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "prefix";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "starts";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "suffix";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "ends";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "find";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "rfind";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "levenstein";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "fdsa";
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ "-1";
+ "-1";
+ "3"
+ ];
+ [
+ "aswedfg";
+ %true;
+ %true;
+ %true;
+ %false;
+ %false;
+ "0";
+ "0";
+ "5"
+ ];
+ [
+ "asdadsaasd";
+ %true;
+ %true;
+ %true;
+ %false;
+ %false;
+ "0";
+ "7";
+ "8"
+ ];
+ [
+ "gdsfsassas";
+ %true;
+ %false;
+ %false;
+ %true;
+ %true;
+ "5";
+ "8";
+ "8"
+ ];
+ [
+ "";
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ "-1";
+ "-1";
+ "2"
+ ];
+ [
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ "-1";
+ "-1";
+ "23"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_List_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_List_/results.txt
new file mode 100644
index 00000000000..dac9a135756
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_List_/results.txt
@@ -0,0 +1,265 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "equals_to_original";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "replace_delimeter";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "just_split";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "first";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "skip_empty";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "multichar_delim_set";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "multichar_delim_string";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "limited";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "a@b@c";
+ "a@b@c";
+ "a#b#c";
+ [
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ "a"
+ ];
+ [
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ "a";
+ "";
+ "";
+ "c"
+ ];
+ [
+ "a@";
+ "c"
+ ];
+ [
+ "a";
+ "b@c"
+ ]
+ ];
+ [
+ "@a@b@c";
+ "@a@b@c";
+ "#a#b#c";
+ [
+ "";
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ ""
+ ];
+ [
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ "";
+ "a";
+ "";
+ "";
+ "c"
+ ];
+ [
+ "@a@";
+ "c"
+ ];
+ [
+ "";
+ "a@b@c"
+ ]
+ ];
+ [
+ "@@@a@a";
+ "@@@a@a";
+ "###a#a";
+ [
+ "";
+ "";
+ "";
+ "a";
+ "a"
+ ];
+ [
+ ""
+ ];
+ [
+ "a";
+ "a"
+ ];
+ [
+ "";
+ "";
+ "";
+ "a";
+ "a"
+ ];
+ [
+ "@@@a@a"
+ ];
+ [
+ "";
+ "@@a@a"
+ ]
+ ];
+ [
+ "d#e#f";
+ "d#e#f";
+ "d#e#f";
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ]
+ ];
+ [
+ "d";
+ "d";
+ "d";
+ [
+ "d"
+ ];
+ [
+ "d"
+ ];
+ [
+ "d"
+ ];
+ [
+ "d"
+ ];
+ [
+ "d"
+ ];
+ [
+ "d"
+ ]
+ ];
+ [
+ "";
+ "";
+ "";
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_List_v0_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_List_v0_/results.txt
new file mode 100644
index 00000000000..b149ad38a60
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_List_v0_/results.txt
@@ -0,0 +1,125 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "not_equals_to_original";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "not_equals_to_original_skip_empty";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "equals_to_original";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "multichar";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "a@b@c";
+ #;
+ #;
+ "a@b@c";
+ [
+ "a";
+ "b";
+ "c"
+ ]
+ ];
+ [
+ "@a@b@c";
+ #;
+ #;
+ "@a@b@c";
+ [
+ "a";
+ "b";
+ "c"
+ ]
+ ];
+ [
+ "@@@a@a";
+ [
+ "@@@a@a"
+ ];
+ [
+ "@@@a@a"
+ ];
+ "@@@a@a";
+ [
+ "a";
+ "a"
+ ]
+ ];
+ [
+ "d#e#f";
+ #;
+ #;
+ "d#e#f";
+ [
+ "d";
+ "e";
+ "f"
+ ]
+ ];
+ [
+ "d";
+ #;
+ #;
+ "d";
+ [
+ "d"
+ ]
+ ];
+ [
+ "";
+ #;
+ #;
+ "";
+ []
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Remove_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Remove_/results.txt
new file mode 100644
index 00000000000..6fbf37a9f9b
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Remove_/results.txt
@@ -0,0 +1,173 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "all";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hwruall";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hwrufirst";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hwrulast";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "fdsa";
+ "fd";
+ "fds";
+ "fds";
+ "fda";
+ "fds";
+ "fdsa";
+ "fdsa";
+ "fdsa";
+ "fdsa";
+ "fdsa"
+ ];
+ [
+ "aswedfg";
+ "wedfg";
+ "swedfg";
+ "swedfg";
+ "swedfg";
+ "awedfg";
+ "aswedfg";
+ "aswedfg";
+ "aswedfg";
+ "aswedfg";
+ "aswedfg"
+ ];
+ [
+ "asdadsaasd";
+ "ddd";
+ "sdadsaasd";
+ "asdadsasd";
+ "sdadsaasd";
+ "asdadsaad";
+ "asdadsaasd";
+ "asdadsaasd";
+ "asdadsaasd";
+ "asdadsaasd";
+ "asdadsaasd"
+ ];
+ [
+ "gdsfsassas";
+ "gdf";
+ "gdsfsssas";
+ "gdsfsasss";
+ "gdfsassas";
+ "gdsfsassa";
+ "gdsfsassas";
+ "gdsfsassas";
+ "gdsfsassas";
+ "gdsfsassas";
+ "gdsfsassas"
+ ];
+ [
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ ""
+ ];
+ [
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!";
+ "\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_ReplaceFirstLast_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_ReplaceFirstLast_/results.txt
new file mode 100644
index 00000000000..9320ac1c18a
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_ReplaceFirstLast_/results.txt
@@ -0,0 +1,84 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column4";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column5";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column6";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column7";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "gzas";
+ "gzzzsas";
+ "gsas";
+ "gasas";
+ "gasz";
+ "gaszzzs";
+ "gass";
+ "gasas"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Replace_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Replace_/results.txt
new file mode 100644
index 00000000000..2ac3566c61d
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Replace_/results.txt
@@ -0,0 +1,134 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "all";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last3";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "fdsa";
+ "fdsa";
+ "fdsz";
+ "fdsz";
+ "fdszz";
+ "fdszz";
+ "fds";
+ "fds"
+ ];
+ [
+ "aswedfg";
+ "zzzwedfg";
+ "zswedfg";
+ "zswedfg";
+ "zzswedfg";
+ "zzswedfg";
+ "swedfg";
+ "swedfg"
+ ];
+ [
+ "asdadsaasd";
+ "zzzdadsazzzd";
+ "zsdadsaasd";
+ "asdadsazsd";
+ "zzsdadsaasd";
+ "asdadsazzsd";
+ "sdadsaasd";
+ "asdadsasd"
+ ];
+ [
+ "gdsfsassas";
+ "gdsfszzzszzz";
+ "gdsfszssas";
+ "gdsfsasszs";
+ "gdsfszzssas";
+ "gdsfsasszzs";
+ "gdsfsssas";
+ "gdsfsasss"
+ ];
+ [
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ ""
+ ];
+ [
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_StreamFormat_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_StreamFormat_/results.txt
new file mode 100644
index 00000000000..b1bff8a57b8
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_StreamFormat_/results.txt
@@ -0,0 +1,208 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "right_pad";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "left_pad";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "right_pad_zero";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "left_pad_zero";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hex";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "shex";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "bin";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "sbin";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hex_text";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "bin_text";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "duration";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "quantity";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "bytes";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "prec";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "qwertyui";
+ "qwertyui ";
+ " qwertyui";
+ "qwertyui000000000000";
+ "000000000000qwertyui";
+ "0x00000000499602D2";
+ "-0x000000000000007B";
+ "0b0000000000000000000000000000000001001001100101100000001011010010";
+ "-0b0000000000000000000000000000000000000000000000000000000001111011";
+ "71 77 65 72 74 79 75 69";
+ "01110001 01110111 01100101 01110010 01110100 01111001 01110101 01101001";
+ "20m 34s";
+ "1.23G";
+ "1.15GiB";
+ "-0.009963"
+ ];
+ [
+ "asdfghjl";
+ "asdfghjl ";
+ " asdfghjl";
+ "asdfghjl000000000000";
+ "000000000000asdfghjl";
+ "0x000000024CB016EA";
+ "-0x00000000000001C8";
+ "0b0000000000000000000000000000001001001100101100000001011011101010";
+ "-0b0000000000000000000000000000000000000000000000000000000111001000";
+ "61 73 64 66 67 68 6A 6C";
+ "01100001 01110011 01100100 01100110 01100111 01101000 01101010 01101100";
+ "2h 44m 36s";
+ "9.88G";
+ "9.2GiB";
+ "-0.03694"
+ ];
+ [
+ "zxcvbnm?";
+ "zxcvbnm? ";
+ " zxcvbnm?";
+ "zxcvbnm?000000000000";
+ "000000000000zxcvbnm?";
+ "0x00000002540BE3FF";
+ "-0x0000000000000315";
+ "0b0000000000000000000000000000001001010100000010111110001111111111";
+ "-0b0000000000000000000000000000000000000000000000000000001100010101";
+ "7A 78 63 76 62 6E 6D 3F";
+ "01111010 01111000 01100011 01110110 01100010 01101110 01101101 00111111";
+ "2h 46m 40s";
+ "10G";
+ "9.31GiB";
+ "-0.06391"
+ ];
+ [
+ "12345678";
+ "12345678 ";
+ " 12345678";
+ "12345678000000000000";
+ "00000000000012345678";
+ "0x0000000000000000";
+ "0x0000000000000000";
+ "0b0000000000000000000000000000000000000000000000000000000000000000";
+ "0b0000000000000000000000000000000000000000000000000000000000000000";
+ "31 32 33 34 35 36 37 38";
+ "00110001 00110010 00110011 00110100 00110101 00110110 00110111 00111000";
+ "0us";
+ "0";
+ "0B";
+ "0"
+ ];
+ [
+ "!@#$%^&*";
+ "!@#$%^&* ";
+ " !@#$%^&*";
+ "!@#$%^&*000000000000";
+ "000000000000!@#$%^&*";
+ "0x0000000223557439";
+ "-0x00000000000003E7";
+ "0b0000000000000000000000000000001000100011010101010111010000111001";
+ "-0b0000000000000000000000000000000000000000000000000000001111100111";
+ "21 40 23 24 25 5E 26 2A";
+ "00100001 01000000 00100011 00100100 00100101 01011110 00100110 00101010";
+ "2h 33m 2s";
+ "9.18G";
+ "8.55GiB";
+ "-0.08092"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_StringUDF_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_StringUDF_/results.txt
new file mode 100644
index 00000000000..a665105224f
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_StringUDF_/results.txt
@@ -0,0 +1,169 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "b32enc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "b64enc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "b64encu";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cunesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "xenc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "henc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hdec";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cgesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cgunesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "clps";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "strp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "clpst";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOSI===";
+ "ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ=";
+ "ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ,";
+ " !qwe rty uiop [ ]$";
+ " !qwe rty uiop [ ]$";
+ "202020217177652072747920202075696F70205B205D24";
+ " !qwe rty uiop [ ]$";
+ " !qwe rty uiop [ ]$";
+ "+++!qwe+rty+++uiop+%5B+%5D$";
+ " !qwe rty uiop [ ]$";
+ " !qwe rty uiop [ ]$";
+ "!qwe rty uiop [ ]$";
+ "!qwe ..."
+ ];
+ [
+ "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA";
+ "QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA==";
+ "QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA,,";
+ "@as dfgh jkl\\\\n;'% ";
+ "@as dfgh jkl\n;'% ";
+ "4061732020202020202064666768206A6B6C5C6E3B27252020";
+ "@as dfgh jkl\\n;&#39;% ";
+ "@as dfgh jkl\\n;'% ";
+ "@as+++++++dfgh+jkl%5Cn;%27%25++";
+ "@as dfgh jkl\\n;'% ";
+ "@as dfgh jkl\\n;'% ";
+ "@as dfgh jkl\\n;'%";
+ "@as ..."
+ ];
+ [
+ "EAQCAI32PBRQS5TCNYQASCQIEBWSYLRPH5PCAIBA";
+ "ICAgI3p4Ywl2Ym4gCQoIIG0sLi8/XiAgIA==";
+ "ICAgI3p4Ywl2Ym4gCQoIIG0sLi8_XiAgIA,,";
+ " #zxc\\tvbn \\t\\n\\x08 m,./?^ ";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ "202020237A78630976626E20090A08206D2C2E2F3F5E202020";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ "+++%23zxc%09vbn+%09%0A%08+m%2C./%3F%5E+++";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ " #zxc vbn \x08 m,./?^ ";
+ "#zxc\tvbn \t\n\x08 m,./?^";
+ "#zxc ..."
+ ];
+ [
+ "GEQTEQBTEM2CINJFGZPDOJRYFI4SQMBJFVPT2KZMHQXD4===";
+ "MSEyQDMjNCQ1JTZeNyY4KjkoMCktXz0rLDwuPg==";
+ "MSEyQDMjNCQ1JTZeNyY4KjkoMCktXz0rLDwuPg,,";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "31213240332334243525365E3726382A392830292D5F3D2B2C3C2E3E";
+ "1!2@3#4$5%6^7&amp;8*9(0)-_=+,&lt;.&gt;";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@3%234$5%256%5E7%268*9%280%29-_%3D%2B%2C%3C.%3E";
+ "1!2@3#4$5%6^7&8*9(0)-_= ,<.>";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@ ..."
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_StringUnsafeUDF_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_StringUnsafeUDF_/results.txt
new file mode 100644
index 00000000000..26b182f9343
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_StringUnsafeUDF_/results.txt
@@ -0,0 +1,158 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "b32dec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "b32sdec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "b64dec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "b64sdec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "xdec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ [
+ [
+ "EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOS"
+ ]
+ ];
+ #;
+ #
+ ];
+ [
+ [
+ [
+ "QIAEXLvMggAcAECCAFgAQUALyg=="
+ ]
+ ];
+ #;
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ #
+ ];
+ [
+ [
+ [
+ "0DQNA0D4P/93QP6/z4NA0DQP98Dxfg0DodA6PQ=="
+ ]
+ ];
+ #;
+ #;
+ #;
+ [
+ " !qwe rty uiop [ ]$"
+ ]
+ ];
+ [
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ [
+ [
+ "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"
+ ]
+ ];
+ [
+ [
+ "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"
+ ]
+ ];
+ #
+ ];
+ [
+ [
+ [
+ "gYoECABAgAQaIM6AAAAAubn0goBAAA=="
+ ]
+ ];
+ #;
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ #
+ ];
+ [
+ [
+ [
+ "4DwP70DQNA0DQNA0D3Pe9/wNA8DwfC6LxNh1/XdA0A=="
+ ]
+ ];
+ #;
+ #;
+ #;
+ [
+ "@as dfgh jkl\\n;'% "
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_To_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_To_/results.txt
new file mode 100644
index 00000000000..441e62fd21b
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_To_/results.txt
@@ -0,0 +1,294 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ascii_lower";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ascii_upper";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ascii_title";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "lower";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "upper";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "title";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "reverse";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "byte_list";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "from_byte_list";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "from_lazy_byte_list";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "test";
+ "test";
+ "TEST";
+ "Test";
+ [
+ "test"
+ ];
+ [
+ "TEST"
+ ];
+ [
+ "Test"
+ ];
+ [
+ "tset"
+ ];
+ [
+ "116";
+ "101";
+ "115";
+ "116"
+ ];
+ "test";
+ "test"
+ ];
+ [
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ [
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"
+ ];
+ [
+ "\xD0\xA2\xD0\x95\xD0\xA1\xD0\xA2"
+ ];
+ [
+ "\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82"
+ ];
+ [
+ "\xD1\x82\xD1\x81\xD0\xB5\xD1\x82"
+ ];
+ [
+ "209";
+ "130";
+ "208";
+ "181";
+ "209";
+ "129";
+ "209";
+ "130"
+ ];
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"
+ ];
+ [
+ "TeSt";
+ "test";
+ "TEST";
+ "Test";
+ [
+ "test"
+ ];
+ [
+ "TEST"
+ ];
+ [
+ "Test"
+ ];
+ [
+ "tSeT"
+ ];
+ [
+ "84";
+ "101";
+ "83";
+ "116"
+ ];
+ "TeSt";
+ "TeSt"
+ ];
+ [
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ [
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"
+ ];
+ [
+ "\xD0\xA2\xD0\x95\xD0\xA1\xD0\xA2"
+ ];
+ [
+ "\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82"
+ ];
+ [
+ "\xD0\xA2\xD1\x81\xD0\x95\xD1\x82"
+ ];
+ [
+ "209";
+ "130";
+ "208";
+ "149";
+ "209";
+ "129";
+ "208";
+ "162"
+ ];
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"
+ ];
+ [
+ "Eyl\xC3\xBCl";
+ "eyl\xC3\xBCl";
+ "EYL\xC3\xBCL";
+ "Eyl\xC3\xBCl";
+ [
+ "eyl\xC3\xBCl"
+ ];
+ [
+ "EYL\xC3\x9CL"
+ ];
+ [
+ "Eyl\xC3\xBCl"
+ ];
+ [
+ "l\xC3\xBClyE"
+ ];
+ [
+ "69";
+ "121";
+ "108";
+ "195";
+ "188";
+ "108"
+ ];
+ "Eyl\xC3\xBCl";
+ "Eyl\xC3\xBCl"
+ ];
+ [
+ "6";
+ "6";
+ "6";
+ "6";
+ [
+ "6"
+ ];
+ [
+ "6"
+ ];
+ [
+ "6"
+ ];
+ [
+ "6"
+ ];
+ [
+ "54"
+ ];
+ "6";
+ "6"
+ ];
+ [
+ "";
+ "";
+ "";
+ "";
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [];
+ "";
+ ""
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/cases/AsciiChecks.in b/yql/essentials/udfs/common/string/test/cases/AsciiChecks.in
new file mode 100644
index 00000000000..26a46b0f6c6
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/AsciiChecks.in
@@ -0,0 +1,5 @@
+{"value"="qweRTY123$%?"};
+{"value"="asdFGHjkl:'|"};
+{"value"="zxcvbnm"};
+{"value"="1234567890"};
+{"value"="!@#$%^&*()_+{}"};
diff --git a/yql/essentials/udfs/common/string/test/cases/AsciiChecks.sql b/yql/essentials/udfs/common/string/test/cases/AsciiChecks.sql
new file mode 100644
index 00000000000..f6e74d87462
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/AsciiChecks.sql
@@ -0,0 +1,10 @@
+SELECT
+ String::IsAscii(value) as isascii,
+ String::IsAsciiSpace(value) as isspace,
+ String::IsAsciiUpper(value) as isupper,
+ String::IsAsciiLower(value) as islower,
+ String::IsAsciiDigit(value) as isdigit,
+ String::IsAsciiAlpha(value) as isalpha,
+ String::IsAsciiAlnum(value) as isalnum,
+ String::IsAsciiHex(value) as ishex
+FROM Input
diff --git a/yql/essentials/udfs/common/string/test/cases/Base32Decode.in b/yql/essentials/udfs/common/string/test/cases/Base32Decode.in
new file mode 100644
index 00000000000..34af8b23d47
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/Base32Decode.in
@@ -0,0 +1,4 @@
+{"key"="1";subkey="";"value"="ORSXG5A="};
+{"key"="2";subkey="";"value"="KRSXG5CUMVZXI==="};
+{"key"="3";subkey="";"value"="MFYHA3DF"};
+{"key"="4";subkey="";"value"="hmmmm===hmmmm"};
diff --git a/yql/essentials/udfs/common/string/test/cases/Base32Decode.sql b/yql/essentials/udfs/common/string/test/cases/Base32Decode.sql
new file mode 100644
index 00000000000..51b47ec1665
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/Base32Decode.sql
@@ -0,0 +1,6 @@
+/* syntax version 1 */
+SELECT
+ value,
+ String::Base32StrictDecode(value) AS strict_decoded,
+ String::Base32Decode(value) AS decoded
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/Base32Encode.in b/yql/essentials/udfs/common/string/test/cases/Base32Encode.in
new file mode 100644
index 00000000000..c0051d04efd
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/Base32Encode.in
@@ -0,0 +1,3 @@
+{"key"="1";subkey="";"value"="test"};
+{"key"="2";subkey="";"value"="TestTest"};
+{"key"="3";subkey="";"value"="apple"};
diff --git a/yql/essentials/udfs/common/string/test/cases/Base32Encode.sql b/yql/essentials/udfs/common/string/test/cases/Base32Encode.sql
new file mode 100644
index 00000000000..1ff9e3e4078
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/Base32Encode.sql
@@ -0,0 +1,5 @@
+/* syntax version 1 */
+SELECT
+ value,
+ String::Base32Encode(value) AS encoded
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.in b/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.in
new file mode 100644
index 00000000000..26a46b0f6c6
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.in
@@ -0,0 +1,5 @@
+{"value"="qweRTY123$%?"};
+{"value"="asdFGHjkl:'|"};
+{"value"="zxcvbnm"};
+{"value"="1234567890"};
+{"value"="!@#$%^&*()_+{}"};
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.sql b/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.sql
new file mode 100644
index 00000000000..d8bf9e942be
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.sql
@@ -0,0 +1,13 @@
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+PRAGMA UseBlocks;
+
+SELECT
+ String::IsAscii(value) as isascii,
+ String::IsAsciiSpace(value) as isspace,
+ String::IsAsciiUpper(value) as isupper,
+ String::IsAsciiLower(value) as islower,
+ String::IsAsciiDigit(value) as isdigit,
+ String::IsAsciiAlpha(value) as isalpha,
+ String::IsAsciiAlnum(value) as isalnum,
+ String::IsAsciiHex(value) as ishex
+FROM Input
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockFind.sql b/yql/essentials/udfs/common/string/test/cases/BlockFind.sql
new file mode 100644
index 00000000000..f1c855bcc11
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockFind.sql
@@ -0,0 +1,7 @@
+/* syntax version 1 */
+pragma UseBlocks;
+SELECT
+ value,
+ String::Contains(value, "as") AS contains,
+ String::LevensteinDistance(value, "as") AS levenstein
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockRemove.sql b/yql/essentials/udfs/common/string/test/cases/BlockRemove.sql
new file mode 100644
index 00000000000..4c285b78d07
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockRemove.sql
@@ -0,0 +1,16 @@
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+PRAGMA UseBlocks;
+
+SELECT
+ value,
+ String::RemoveAll(value, "as") AS all,
+ String::RemoveFirst(value, "a") AS first,
+ String::RemoveLast(value, "a") AS last,
+ String::RemoveFirst(value, "as") AS first2,
+ String::RemoveLast(value, "as") AS last2,
+ String::RemoveFirst(value, "") AS first3,
+ String::RemoveLast(value, "") AS last3,
+ String::RemoveAll(value, "`") AS hwruall,
+ String::RemoveFirst(value, "`") AS hwrufirst,
+ String::RemoveLast(value, "`") AS hwrulast,
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockReplace.sql b/yql/essentials/udfs/common/string/test/cases/BlockReplace.sql
new file mode 100644
index 00000000000..030e36050cd
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockReplace.sql
@@ -0,0 +1,13 @@
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+PRAGMA UseBlocks;
+
+SELECT
+ value,
+ String::ReplaceAll(value, "as", "zzz") AS all,
+ String::ReplaceFirst(value, "a", "z") AS first,
+ String::ReplaceLast(value, "a", "z") AS last,
+ String::ReplaceFirst(value, "a", "zz") AS first2,
+ String::ReplaceLast(value, "a", "zz") AS last2,
+ String::ReplaceFirst(value, "a", "") AS first3,
+ String::ReplaceLast(value, "a", "") AS last3
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in
new file mode 100644
index 00000000000..1a446c4e488
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in
@@ -0,0 +1,5 @@
+{"key"="1";"subkey"="1";"value"="qwertyui";"biguint"=1234567890u;"negint"=-123};
+{"key"="2";"subkey"="2";"value"="asdfghjl";"biguint"=9876543210u;"negint"=-456};
+{"key"="3";"subkey"="3";"value"="zxcvbnm?";"biguint"=9999999999u;"negint"=-789};
+{"key"="4";"subkey"="4";"value"="12345678";"biguint"=0000000000u;"negint"=-000};
+{"key"="5";"subkey"="5";"value"="!@#$%^&*";"biguint"=9182737465u;"negint"=-999};
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in.attr b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in.attr
new file mode 100644
index 00000000000..bbc040040c8
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in.attr
@@ -0,0 +1,9 @@
+{"_yql_row_spec"={
+ "Type"=["StructType";[
+ ["key";["DataType";"String"]];
+ ["subkey";["DataType";"String"]];
+ ["value";["DataType";"String"]];
+ ["biguint";["DataType";"Uint64"]];
+ ["negint";["DataType";"Int64"]]
+ ]];
+}}
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.sql b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.sql
new file mode 100644
index 00000000000..8b61758a964
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.sql
@@ -0,0 +1,20 @@
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+PRAGMA UseBlocks;
+
+SELECT
+ value,
+ String::RightPad(value, 20) AS right_pad,
+ String::LeftPad(value, 20) AS left_pad,
+ String::RightPad(value, 20, "0") AS right_pad_zero,
+ String::LeftPad(value, 20, "0") AS left_pad_zero,
+ String::Hex(biguint) AS hex,
+ String::SHex(negint) AS shex,
+ String::Bin(biguint) AS bin,
+ String::SBin(negint) AS sbin,
+ String::HexText(value) AS hex_text,
+ String::BinText(value) AS bin_text,
+ String::HumanReadableDuration(biguint) AS duration,
+ String::HumanReadableQuantity(biguint) AS quantity,
+ String::HumanReadableBytes(biguint) AS bytes,
+ String::Prec(negint / 12345.6789, 4) AS prec
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.in b/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.in
new file mode 100644
index 00000000000..a9d378e0590
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.in
@@ -0,0 +1,4 @@
+{"value"=" !qwe rty uiop [ ]$"};
+{"value"="@as dfgh jkl\\n;'\% "};
+{"value"=" #zxc\tvbn \t\n\b m,./?^ "};
+{"value"="1!2@3#4$5%6^7&8*9(0)-_=+,<.>"};
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.sql b/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.sql
new file mode 100644
index 00000000000..1f96f5d62b0
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.sql
@@ -0,0 +1,18 @@
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+PRAGMA UseBlocks;
+
+SELECT
+ String::Base32Encode(value) as b32enc,
+ String::Base64Encode(value) as b64enc,
+ String::Base64EncodeUrl(value) as b64encu,
+ String::EscapeC(value) as cesc,
+ String::UnescapeC(value) as cunesc,
+ String::HexEncode(value) as xenc,
+ String::EncodeHtml(value) as henc,
+ String::DecodeHtml(value) as hdec,
+ String::CgiEscape(value) as cgesc,
+ String::CgiUnescape(value) as cgunesc,
+ String::Collapse(value) as clps,
+ String::Strip(value) as strp,
+ String::CollapseText(value, 9) as clpst,
+FROM Input
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.in b/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.in
new file mode 100644
index 00000000000..2c15dd67ac6
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.in
@@ -0,0 +1,6 @@
+{"value"="EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOSI==="};
+{"value"="ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ="};
+{"value"="202020217177652072747920202075696F70205B205D24"};
+{"value"="IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"};
+{"value"="QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA,,"};
+{"value"="4061732020202020202064666768206A6B6C5C6E3B27252020"};
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.sql b/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.sql
new file mode 100644
index 00000000000..82f82f50d9d
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.sql
@@ -0,0 +1,10 @@
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+PRAGMA UseBlocks;
+
+SELECT
+ String::Base32Decode(value) as b32dec,
+ String::Base32StrictDecode(value) AS b32sdec,
+ String::Base64Decode(value) as b64dec,
+ String::Base64StrictDecode(value) AS b64sdec,
+ String::HexDecode(value) as xdec,
+FROM Input
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockTo.in b/yql/essentials/udfs/common/string/test/cases/BlockTo.in
new file mode 100644
index 00000000000..93a00f7db8d
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockTo.in
@@ -0,0 +1,7 @@
+{"key"="1";"subkey"="1";"value"="test"};
+{"key"="2";"subkey"="2";"value"="\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"};
+{"key"="3";"subkey"="3";"value"="TeSt"};
+{"key"="4";"subkey"="4";"value"="\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"};
+{"key"="5";"subkey"="5";"value"="Eyl\xC3\xBCl"};
+{"key"="6";"subkey"="6";"value"="6"};
+{"key"="4";"subkey"="4";"value"=""};
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockTo.sql b/yql/essentials/udfs/common/string/test/cases/BlockTo.sql
new file mode 100644
index 00000000000..628febe899e
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockTo.sql
@@ -0,0 +1,9 @@
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+PRAGMA UseBlocks;
+
+SELECT
+ value,
+ String::AsciiToLower(value) AS ascii_lower,
+ String::AsciiToUpper(value) AS ascii_upper,
+ String::AsciiToTitle(value) AS ascii_title,
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.in b/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.in
new file mode 100644
index 00000000000..27fc322b1ae
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.in
@@ -0,0 +1,3 @@
+{"key"="1";"subkey"="1";"value"="a b c"};
+{"key"="2";"subkey"="2";"value"="d"};
+{"key"="3";"subkey"="3";"value"=""};
diff --git a/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.sql b/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.sql
new file mode 100644
index 00000000000..2dab551eb1c
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.sql
@@ -0,0 +1,10 @@
+/* syntax version 1 */
+
+$split = ($row) -> {
+ return String::SplitToList($row.value, " ", true AS SkipEmpty, false AS DelimeterString);
+};
+
+SELECT
+ $split(TableRow()),
+ ListExtend($split(TableRow()), $split(TableRow()))[1]
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/Find.sql b/yql/essentials/udfs/common/string/test/cases/Find.sql
new file mode 100644
index 00000000000..273553dcf9e
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/Find.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+SELECT
+ value,
+ String::Contains(value, "as") AS contains,
+ String::HasPrefix(value, "as") AS prefix,
+ String::StartsWith(value, "as") AS starts,
+ String::HasSuffix(value, "as") AS suffix,
+ String::EndsWith(value, "as") AS ends,
+ String::Find(value, "as") AS find,
+ String::ReverseFind(value, "as") AS rfind,
+ String::LevensteinDistance(value, "as") AS levenstein
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/List.in b/yql/essentials/udfs/common/string/test/cases/List.in
new file mode 100644
index 00000000000..949cf26c776
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/List.in
@@ -0,0 +1,6 @@
+{"key"="1";"subkey"="1";"value"="a@b@c"};
+{"key"="1";"subkey"="1";"value"="@a@b@c"};
+{"key"="1";"subkey"="1";"value"="@@@a@a"};
+{"key"="2";"subkey"="2";"value"="d#e#f"};
+{"key"="3";"subkey"="3";"value"="d"};
+{"key"="4";"subkey"="4";"value"=""};
diff --git a/yql/essentials/udfs/common/string/test/cases/List.sql b/yql/essentials/udfs/common/string/test/cases/List.sql
new file mode 100644
index 00000000000..42b983074e5
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/List.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+SELECT
+ value,
+ Ensure(value, String::JoinFromList(String::SplitToList(value, "@"), "@") == value) AS equals_to_original,
+ String::JoinFromList(String::SplitToList(value, "@"), "#") AS replace_delimeter,
+ String::SplitToList(value, "@") AS just_split,
+ String::SplitToList(value, "@")[0] as first,
+ String::SplitToList(value, "@", true AS SkipEmpty) AS skip_empty,
+ String::SplitToList(value, "b@", false AS DelimeterString) AS multichar_delim_set,
+ String::SplitToList(value, "b@", true AS DelimeterString) AS multichar_delim_string,
+ String::SplitToList(value, "@", 1 AS Limit) AS limited
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/List_v0.in b/yql/essentials/udfs/common/string/test/cases/List_v0.in
new file mode 100644
index 00000000000..949cf26c776
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/List_v0.in
@@ -0,0 +1,6 @@
+{"key"="1";"subkey"="1";"value"="a@b@c"};
+{"key"="1";"subkey"="1";"value"="@a@b@c"};
+{"key"="1";"subkey"="1";"value"="@@@a@a"};
+{"key"="2";"subkey"="2";"value"="d#e#f"};
+{"key"="3";"subkey"="3";"value"="d"};
+{"key"="4";"subkey"="4";"value"=""};
diff --git a/yql/essentials/udfs/common/string/test/cases/List_v0.sql b/yql/essentials/udfs/common/string/test/cases/List_v0.sql
new file mode 100644
index 00000000000..36d984dc6a8
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/List_v0.sql
@@ -0,0 +1,27 @@
+/* syntax version 1 */
+-- use SplitToList settings which are used as defaults in v0 syntax
+SELECT
+ value,
+ IF (
+ String::Contains(value, "@@"),
+ Ensure(
+ value,
+ String::JoinFromList(String::SplitToList(value, "@", true AS SkipEmpty, false AS DelimeterString), "@") != value,
+ value
+ )
+ ) AS not_equals_to_original,
+ IF (
+ String::Contains(value, "@@"),
+ Ensure(
+ value,
+ String::JoinFromList(String::SplitToList(value, "@", true AS SkipEmpty, false AS DelimeterString), "@") != value,
+ value
+ )
+ ) AS not_equals_to_original_skip_empty,
+ Ensure(
+ value,
+ String::JoinFromList(String::SplitToList(value, "@", false AS SkipEmpty, false AS DelimeterString), "@") == value,
+ value
+ ) AS equals_to_original,
+ String::SplitToList(value, "@#", true AS SkipEmpty, false AS DelimeterString) AS multichar
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/Remove.sql b/yql/essentials/udfs/common/string/test/cases/Remove.sql
new file mode 100644
index 00000000000..8bfe2c92e26
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/Remove.sql
@@ -0,0 +1,14 @@
+/* syntax version 1 */
+SELECT
+ value,
+ String::RemoveAll(value, "as") AS all,
+ String::RemoveFirst(value, "a") AS first,
+ String::RemoveLast(value, "a") AS last,
+ String::RemoveFirst(value, "as") AS first2,
+ String::RemoveLast(value, "as") AS last2,
+ String::RemoveFirst(value, "") AS first3,
+ String::RemoveLast(value, "") AS last3,
+ String::RemoveAll(value, "`") AS hwruall,
+ String::RemoveFirst(value, "`") AS hwrufirst,
+ String::RemoveLast(value, "`") AS hwrulast,
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/Replace.sql b/yql/essentials/udfs/common/string/test/cases/Replace.sql
new file mode 100644
index 00000000000..0eea32a3e41
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/Replace.sql
@@ -0,0 +1,11 @@
+/* syntax version 1 */
+SELECT
+ value,
+ String::ReplaceAll(value, "as", "zzz") AS all,
+ String::ReplaceFirst(value, "a", "z") AS first,
+ String::ReplaceLast(value, "a", "z") AS last,
+ String::ReplaceFirst(value, "a", "zz") AS first2,
+ String::ReplaceLast(value, "a", "zz") AS last2,
+ String::ReplaceFirst(value, "a", "") AS first3,
+ String::ReplaceLast(value, "a", "") AS last3
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/ReplaceFirstLast.sql b/yql/essentials/udfs/common/string/test/cases/ReplaceFirstLast.sql
new file mode 100644
index 00000000000..6a83400d424
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/ReplaceFirstLast.sql
@@ -0,0 +1,10 @@
+SELECT
+ String::ReplaceFirst("gasas", "as", "z"),
+ String::ReplaceFirst("gasas", "a", "zzz"),
+ String::ReplaceFirst("gasas", "a", ""),
+ String::ReplaceFirst("gasas", "e", "z"),
+ String::ReplaceLast("gasas", "as", "z"),
+ String::ReplaceLast("gasas", "a", "zzz"),
+ String::ReplaceLast("gasas", "a", ""),
+ String::ReplaceLast("gasas", "k", "ey");
+
diff --git a/yql/essentials/udfs/common/string/test/cases/StreamFormat.in b/yql/essentials/udfs/common/string/test/cases/StreamFormat.in
new file mode 100644
index 00000000000..1a446c4e488
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/StreamFormat.in
@@ -0,0 +1,5 @@
+{"key"="1";"subkey"="1";"value"="qwertyui";"biguint"=1234567890u;"negint"=-123};
+{"key"="2";"subkey"="2";"value"="asdfghjl";"biguint"=9876543210u;"negint"=-456};
+{"key"="3";"subkey"="3";"value"="zxcvbnm?";"biguint"=9999999999u;"negint"=-789};
+{"key"="4";"subkey"="4";"value"="12345678";"biguint"=0000000000u;"negint"=-000};
+{"key"="5";"subkey"="5";"value"="!@#$%^&*";"biguint"=9182737465u;"negint"=-999};
diff --git a/yql/essentials/udfs/common/string/test/cases/StreamFormat.in.attr b/yql/essentials/udfs/common/string/test/cases/StreamFormat.in.attr
new file mode 100644
index 00000000000..bbc040040c8
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/StreamFormat.in.attr
@@ -0,0 +1,9 @@
+{"_yql_row_spec"={
+ "Type"=["StructType";[
+ ["key";["DataType";"String"]];
+ ["subkey";["DataType";"String"]];
+ ["value";["DataType";"String"]];
+ ["biguint";["DataType";"Uint64"]];
+ ["negint";["DataType";"Int64"]]
+ ]];
+}}
diff --git a/yql/essentials/udfs/common/string/test/cases/StreamFormat.sql b/yql/essentials/udfs/common/string/test/cases/StreamFormat.sql
new file mode 100644
index 00000000000..46ee9a7c688
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/StreamFormat.sql
@@ -0,0 +1,19 @@
+/* syntax version 1 */
+
+SELECT
+ value,
+ String::RightPad(value, 20) AS right_pad,
+ String::LeftPad(value, 20) AS left_pad,
+ String::RightPad(value, 20, "0") AS right_pad_zero,
+ String::LeftPad(value, 20, "0") AS left_pad_zero,
+ String::Hex(biguint) AS hex,
+ String::SHex(negint) AS shex,
+ String::Bin(biguint) AS bin,
+ String::SBin(negint) AS sbin,
+ String::HexText(value) AS hex_text,
+ String::BinText(value) AS bin_text,
+ String::HumanReadableDuration(biguint) AS duration,
+ String::HumanReadableQuantity(biguint) AS quantity,
+ String::HumanReadableBytes(biguint) AS bytes,
+ String::Prec(negint / 12345.6789, 4) AS prec
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/StringUDF.in b/yql/essentials/udfs/common/string/test/cases/StringUDF.in
new file mode 100644
index 00000000000..a9d378e0590
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/StringUDF.in
@@ -0,0 +1,4 @@
+{"value"=" !qwe rty uiop [ ]$"};
+{"value"="@as dfgh jkl\\n;'\% "};
+{"value"=" #zxc\tvbn \t\n\b m,./?^ "};
+{"value"="1!2@3#4$5%6^7&8*9(0)-_=+,<.>"};
diff --git a/yql/essentials/udfs/common/string/test/cases/StringUDF.sql b/yql/essentials/udfs/common/string/test/cases/StringUDF.sql
new file mode 100644
index 00000000000..77af707acb0
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/StringUDF.sql
@@ -0,0 +1,15 @@
+SELECT
+ String::Base32Encode(value) as b32enc,
+ String::Base64Encode(value) as b64enc,
+ String::Base64EncodeUrl(value) as b64encu,
+ String::EscapeC(value) as cesc,
+ String::UnescapeC(value) as cunesc,
+ String::HexEncode(value) as xenc,
+ String::EncodeHtml(value) as henc,
+ String::DecodeHtml(value) as hdec,
+ String::CgiEscape(value) as cgesc,
+ String::CgiUnescape(value) as cgunesc,
+ String::Collapse(value) as clps,
+ String::Strip(value) as strp,
+ String::CollapseText(value, 9) as clpst,
+FROM Input
diff --git a/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.in b/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.in
new file mode 100644
index 00000000000..2c15dd67ac6
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.in
@@ -0,0 +1,6 @@
+{"value"="EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOSI==="};
+{"value"="ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ="};
+{"value"="202020217177652072747920202075696F70205B205D24"};
+{"value"="IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"};
+{"value"="QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA,,"};
+{"value"="4061732020202020202064666768206A6B6C5C6E3B27252020"};
diff --git a/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.sql b/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.sql
new file mode 100644
index 00000000000..dab39cbd391
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.sql
@@ -0,0 +1,7 @@
+SELECT
+ String::Base32Decode(value) as b32dec,
+ String::Base32StrictDecode(value) AS b32sdec,
+ String::Base64Decode(value) as b64dec,
+ String::Base64StrictDecode(value) AS b64sdec,
+ String::HexDecode(value) as xdec,
+FROM Input
diff --git a/yql/essentials/udfs/common/string/test/cases/To.in b/yql/essentials/udfs/common/string/test/cases/To.in
new file mode 100644
index 00000000000..93a00f7db8d
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/To.in
@@ -0,0 +1,7 @@
+{"key"="1";"subkey"="1";"value"="test"};
+{"key"="2";"subkey"="2";"value"="\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"};
+{"key"="3";"subkey"="3";"value"="TeSt"};
+{"key"="4";"subkey"="4";"value"="\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"};
+{"key"="5";"subkey"="5";"value"="Eyl\xC3\xBCl"};
+{"key"="6";"subkey"="6";"value"="6"};
+{"key"="4";"subkey"="4";"value"=""};
diff --git a/yql/essentials/udfs/common/string/test/cases/To.sql b/yql/essentials/udfs/common/string/test/cases/To.sql
new file mode 100644
index 00000000000..a7faf41efe6
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/To.sql
@@ -0,0 +1,14 @@
+/* syntax version 1 */
+SELECT
+ value,
+ String::AsciiToLower(value) AS ascii_lower,
+ String::AsciiToUpper(value) AS ascii_upper,
+ String::AsciiToTitle(value) AS ascii_title,
+ String::ToLower(value) AS lower,
+ String::ToUpper(value) AS upper,
+ String::ToTitle(value) AS title,
+ String::Reverse(value) AS reverse,
+ String::ToByteList(value) AS byte_list,
+ String::FromByteList(String::ToByteList(value)) AS from_byte_list,
+ String::FromByteList(YQL::LazyList(String::ToByteList(value))) AS from_lazy_byte_list
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/default.in b/yql/essentials/udfs/common/string/test/cases/default.in
new file mode 100644
index 00000000000..182158fdf67
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/default.in
@@ -0,0 +1,6 @@
+{"key"="1";"subkey"="1";"value"="fdsa"};
+{"key"="2";"subkey"="2";"value"="aswedfg"};
+{"key"="3";"subkey"="3";"value"="asdadsaasd"};
+{"key"="4";"subkey"="4";"value"="gdsfsassas"};
+{"key"="5";"subkey"="5";"value"=""};
+{"key"="6";"subkey"="6";"value"="`Привет, мир!`"};
diff --git a/yql/essentials/udfs/common/string/test/ya.make b/yql/essentials/udfs/common/string/test/ya.make
new file mode 100644
index 00000000000..87d8b667780
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/string)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/string/ya.make b/yql/essentials/udfs/common/string/ya.make
new file mode 100644
index 00000000000..12ae827ad17
--- /dev/null
+++ b/yql/essentials/udfs/common/string/ya.make
@@ -0,0 +1,38 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+ FROM_SANDBOX(FILE 7319905679 OUT_NOAUTO libstring_udf.so
+ )
+ END()
+ELSE ()
+YQL_UDF_CONTRIB(string_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 37
+ 0
+ )
+
+ SRCS(
+ string_udf.cpp
+ )
+
+ PEERDIR(
+ yql/essentials/public/udf/arrow
+ library/cpp/charset
+ library/cpp/deprecated/split
+ library/cpp/html/pcdata
+ library/cpp/string_utils/base32
+ library/cpp/string_utils/base64
+ library/cpp/string_utils/levenshtein_diff
+ library/cpp/string_utils/quote
+ )
+
+ END()
+ENDIF ()
+
+
+RECURSE_FOR_TESTS(
+ test
+)
+
+
diff --git a/yql/essentials/udfs/common/top/test/canondata/result.json b/yql/essentials/udfs/common/top/test/canondata/result.json
new file mode 100644
index 00000000000..c09d321f5a3
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/canondata/result.json
@@ -0,0 +1,47 @@
+{
+ "test.test[BottomByTuple]": [
+ {
+ "uri": "file://test.test_BottomByTuple_/results.txt"
+ }
+ ],
+ "test.test[BottomBy]": [
+ {
+ "uri": "file://test.test_BottomBy_/results.txt"
+ }
+ ],
+ "test.test[Bottom]": [
+ {
+ "uri": "file://test.test_Bottom_/results.txt"
+ }
+ ],
+ "test.test[TopBy]": [
+ {
+ "uri": "file://test.test_TopBy_/results.txt"
+ }
+ ],
+ "test.test[TopList]": [
+ {
+ "uri": "file://test.test_TopList_/results.txt"
+ }
+ ],
+ "test.test[TopTuple]": [
+ {
+ "uri": "file://test.test_TopTuple_/results.txt"
+ }
+ ],
+ "test.test[TopVariant]": [
+ {
+ "uri": "file://test.test_TopVariant_/results.txt"
+ }
+ ],
+ "test.test[Top]": [
+ {
+ "uri": "file://test.test_Top_/results.txt"
+ }
+ ],
+ "test.test[Window]": [
+ {
+ "uri": "file://test.test_Window_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_BottomByTuple_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_BottomByTuple_/results.txt
new file mode 100644
index 00000000000..2d0670bd687
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/canondata/test.test_BottomByTuple_/results.txt
@@ -0,0 +1,119 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ [
+ [
+ "13";
+ "f"
+ ];
+ [
+ "2";
+ "b"
+ ];
+ [
+ "4";
+ "d"
+ ];
+ [
+ "5";
+ "a"
+ ];
+ [
+ "7";
+ "c"
+ ];
+ [
+ "8";
+ "e"
+ ]
+ ]
+ ];
+ [
+ "2";
+ [
+ [
+ "1";
+ "g"
+ ];
+ [
+ "2";
+ "c"
+ ];
+ [
+ "3";
+ "e"
+ ];
+ [
+ "4";
+ "a"
+ ];
+ [
+ "6";
+ "b"
+ ];
+ [
+ "9";
+ "d"
+ ];
+ [
+ "9";
+ "f"
+ ];
+ [
+ "9";
+ "h"
+ ]
+ ]
+ ];
+ [
+ "3";
+ [
+ [
+ "1";
+ "a"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_BottomBy_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_BottomBy_/results.txt
new file mode 100644
index 00000000000..9a5eb425658
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/canondata/test.test_BottomBy_/results.txt
@@ -0,0 +1,61 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ [
+ "2b";
+ "4d";
+ "5a";
+ "7c";
+ "8e"
+ ]
+ ];
+ [
+ "2";
+ [
+ "1g";
+ "2c";
+ "3e";
+ "4a";
+ "6b"
+ ]
+ ];
+ [
+ "3";
+ [
+ "1a"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_Bottom_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_Bottom_/results.txt
new file mode 100644
index 00000000000..4b34ad59ea8
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/canondata/test.test_Bottom_/results.txt
@@ -0,0 +1,59 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ [
+ "13";
+ "2";
+ "4";
+ "5"
+ ]
+ ];
+ [
+ "2";
+ [
+ "1";
+ "2";
+ "3";
+ "4"
+ ]
+ ];
+ [
+ "3";
+ [
+ "1"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_TopBy_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_TopBy_/results.txt
new file mode 100644
index 00000000000..32e6a4c374a
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/canondata/test.test_TopBy_/results.txt
@@ -0,0 +1,63 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ [
+ "8e";
+ "7c";
+ "5a";
+ "4d";
+ "2b";
+ "13f"
+ ]
+ ];
+ [
+ "2";
+ [
+ "9d";
+ "9f";
+ "9h";
+ "6b";
+ "4a";
+ "3e"
+ ]
+ ];
+ [
+ "3";
+ [
+ "1a"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_TopList_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_TopList_/results.txt
new file mode 100644
index 00000000000..5997422bf79
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/canondata/test.test_TopList_/results.txt
@@ -0,0 +1,57 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ "1";
+ "2";
+ "3"
+ ];
+ [
+ "1";
+ "2"
+ ];
+ [
+ "1";
+ "2"
+ ];
+ [
+ "1"
+ ];
+ [
+ "1"
+ ];
+ [
+ "1"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_TopTuple_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_TopTuple_/results.txt
new file mode 100644
index 00000000000..6c5e14b7e28
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/canondata/test.test_TopTuple_/results.txt
@@ -0,0 +1,103 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ [
+ [
+ "8";
+ "e"
+ ];
+ [
+ "7";
+ "c"
+ ];
+ [
+ "5";
+ "a"
+ ];
+ [
+ "4";
+ "d"
+ ];
+ [
+ "2";
+ "b"
+ ]
+ ]
+ ];
+ [
+ "2";
+ [
+ [
+ "9";
+ "h"
+ ];
+ [
+ "9";
+ "f"
+ ];
+ [
+ "9";
+ "d"
+ ];
+ [
+ "6";
+ "b"
+ ];
+ [
+ "4";
+ "a"
+ ]
+ ]
+ ];
+ [
+ "3";
+ [
+ [
+ "1";
+ "a"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_TopVariant_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_TopVariant_/results.txt
new file mode 100644
index 00000000000..39139793b1b
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/canondata/test.test_TopVariant_/results.txt
@@ -0,0 +1,56 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "VariantType";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Int32"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ "1";
+ "str"
+ ];
+ [
+ "0";
+ "1"
+ ];
+ [
+ "0";
+ "1"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_Top_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_Top_/results.txt
new file mode 100644
index 00000000000..cc053f78cbb
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/canondata/test.test_Top_/results.txt
@@ -0,0 +1,57 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Float"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ [
+ "13";
+ "8";
+ "7"
+ ]
+ ];
+ [
+ "2";
+ [
+ "9";
+ "9";
+ "9"
+ ]
+ ];
+ [
+ "3";
+ [
+ "1"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_Window_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_Window_/results.txt
new file mode 100644
index 00000000000..2035b95bedd
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/canondata/test.test_Window_/results.txt
@@ -0,0 +1,1030 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "idx";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "x";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "upcr_top";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ "1";
+ [
+ "1"
+ ]
+ ];
+ [
+ "2";
+ "7";
+ [
+ "7";
+ "1"
+ ]
+ ];
+ [
+ "3";
+ "5";
+ [
+ "7";
+ "5";
+ "1"
+ ]
+ ];
+ [
+ "4";
+ "4";
+ [
+ "7";
+ "5";
+ "4"
+ ]
+ ];
+ [
+ "5";
+ "3";
+ [
+ "7";
+ "5";
+ "4"
+ ]
+ ];
+ [
+ "6";
+ "11";
+ [
+ "11";
+ "7";
+ "5"
+ ]
+ ];
+ [
+ "7";
+ "2";
+ [
+ "11";
+ "7";
+ "5"
+ ]
+ ];
+ [
+ "8";
+ "11";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ];
+ [
+ "9";
+ "0";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ];
+ [
+ "10";
+ "6";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "idx";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "x";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "upuf_top";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ "1";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ];
+ [
+ "2";
+ "7";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ];
+ [
+ "3";
+ "5";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ];
+ [
+ "4";
+ "4";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ];
+ [
+ "5";
+ "3";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ];
+ [
+ "6";
+ "11";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ];
+ [
+ "7";
+ "2";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ];
+ [
+ "8";
+ "11";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ];
+ [
+ "9";
+ "0";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ];
+ [
+ "10";
+ "6";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "idx";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "x";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "cruf_top";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ "1";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ];
+ [
+ "2";
+ "7";
+ [
+ "11";
+ "11";
+ "7"
+ ]
+ ];
+ [
+ "3";
+ "5";
+ [
+ "11";
+ "11";
+ "6"
+ ]
+ ];
+ [
+ "4";
+ "4";
+ [
+ "11";
+ "11";
+ "6"
+ ]
+ ];
+ [
+ "5";
+ "3";
+ [
+ "11";
+ "11";
+ "6"
+ ]
+ ];
+ [
+ "6";
+ "11";
+ [
+ "11";
+ "11";
+ "6"
+ ]
+ ];
+ [
+ "7";
+ "2";
+ [
+ "11";
+ "6";
+ "2"
+ ]
+ ];
+ [
+ "8";
+ "11";
+ [
+ "11";
+ "6";
+ "0"
+ ]
+ ];
+ [
+ "9";
+ "0";
+ [
+ "6";
+ "0"
+ ]
+ ];
+ [
+ "10";
+ "6";
+ [
+ "6"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "idx";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "x";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "cr22_top";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ "1";
+ [
+ "7";
+ "5";
+ "1"
+ ]
+ ];
+ [
+ "2";
+ "7";
+ [
+ "7";
+ "5";
+ "4"
+ ]
+ ];
+ [
+ "3";
+ "5";
+ [
+ "7";
+ "5";
+ "4"
+ ]
+ ];
+ [
+ "4";
+ "4";
+ [
+ "11";
+ "7";
+ "5"
+ ]
+ ];
+ [
+ "5";
+ "3";
+ [
+ "11";
+ "5";
+ "4"
+ ]
+ ];
+ [
+ "6";
+ "11";
+ [
+ "11";
+ "11";
+ "4"
+ ]
+ ];
+ [
+ "7";
+ "2";
+ [
+ "11";
+ "11";
+ "3"
+ ]
+ ];
+ [
+ "8";
+ "11";
+ [
+ "11";
+ "11";
+ "6"
+ ]
+ ];
+ [
+ "9";
+ "0";
+ [
+ "11";
+ "6";
+ "2"
+ ]
+ ];
+ [
+ "10";
+ "6";
+ [
+ "11";
+ "6";
+ "0"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "idx";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "x";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "upcr_bottom";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ "1";
+ [
+ "1"
+ ]
+ ];
+ [
+ "2";
+ "7";
+ [
+ "1";
+ "7"
+ ]
+ ];
+ [
+ "3";
+ "5";
+ [
+ "1";
+ "5";
+ "7"
+ ]
+ ];
+ [
+ "4";
+ "4";
+ [
+ "1";
+ "4";
+ "5"
+ ]
+ ];
+ [
+ "5";
+ "3";
+ [
+ "1";
+ "3";
+ "4"
+ ]
+ ];
+ [
+ "6";
+ "11";
+ [
+ "1";
+ "3";
+ "4"
+ ]
+ ];
+ [
+ "7";
+ "2";
+ [
+ "1";
+ "2";
+ "3"
+ ]
+ ];
+ [
+ "8";
+ "11";
+ [
+ "1";
+ "2";
+ "3"
+ ]
+ ];
+ [
+ "9";
+ "0";
+ [
+ "0";
+ "1";
+ "2"
+ ]
+ ];
+ [
+ "10";
+ "6";
+ [
+ "0";
+ "1";
+ "2"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "idx";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "x";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "upuf_bottom";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ "1";
+ [
+ "0";
+ "1";
+ "2"
+ ]
+ ];
+ [
+ "2";
+ "7";
+ [
+ "0";
+ "1";
+ "2"
+ ]
+ ];
+ [
+ "3";
+ "5";
+ [
+ "0";
+ "1";
+ "2"
+ ]
+ ];
+ [
+ "4";
+ "4";
+ [
+ "0";
+ "1";
+ "2"
+ ]
+ ];
+ [
+ "5";
+ "3";
+ [
+ "0";
+ "1";
+ "2"
+ ]
+ ];
+ [
+ "6";
+ "11";
+ [
+ "0";
+ "1";
+ "2"
+ ]
+ ];
+ [
+ "7";
+ "2";
+ [
+ "0";
+ "1";
+ "2"
+ ]
+ ];
+ [
+ "8";
+ "11";
+ [
+ "0";
+ "1";
+ "2"
+ ]
+ ];
+ [
+ "9";
+ "0";
+ [
+ "0";
+ "1";
+ "2"
+ ]
+ ];
+ [
+ "10";
+ "6";
+ [
+ "0";
+ "1";
+ "2"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "idx";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "x";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "cruf_bottom";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ "1";
+ [
+ "0";
+ "1";
+ "2"
+ ]
+ ];
+ [
+ "2";
+ "7";
+ [
+ "0";
+ "2";
+ "3"
+ ]
+ ];
+ [
+ "3";
+ "5";
+ [
+ "0";
+ "2";
+ "3"
+ ]
+ ];
+ [
+ "4";
+ "4";
+ [
+ "0";
+ "2";
+ "3"
+ ]
+ ];
+ [
+ "5";
+ "3";
+ [
+ "0";
+ "2";
+ "3"
+ ]
+ ];
+ [
+ "6";
+ "11";
+ [
+ "0";
+ "2";
+ "6"
+ ]
+ ];
+ [
+ "7";
+ "2";
+ [
+ "0";
+ "2";
+ "6"
+ ]
+ ];
+ [
+ "8";
+ "11";
+ [
+ "0";
+ "6";
+ "11"
+ ]
+ ];
+ [
+ "9";
+ "0";
+ [
+ "0";
+ "6"
+ ]
+ ];
+ [
+ "10";
+ "6";
+ [
+ "6"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "idx";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "x";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ];
+ [
+ "cr22_botto";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ "1";
+ [
+ "1";
+ "5";
+ "7"
+ ]
+ ];
+ [
+ "2";
+ "7";
+ [
+ "1";
+ "4";
+ "5"
+ ]
+ ];
+ [
+ "3";
+ "5";
+ [
+ "1";
+ "3";
+ "4"
+ ]
+ ];
+ [
+ "4";
+ "4";
+ [
+ "3";
+ "4";
+ "5"
+ ]
+ ];
+ [
+ "5";
+ "3";
+ [
+ "2";
+ "3";
+ "4"
+ ]
+ ];
+ [
+ "6";
+ "11";
+ [
+ "2";
+ "3";
+ "4"
+ ]
+ ];
+ [
+ "7";
+ "2";
+ [
+ "0";
+ "2";
+ "3"
+ ]
+ ];
+ [
+ "8";
+ "11";
+ [
+ "0";
+ "2";
+ "6"
+ ]
+ ];
+ [
+ "9";
+ "0";
+ [
+ "0";
+ "2";
+ "6"
+ ]
+ ];
+ [
+ "10";
+ "6";
+ [
+ "0";
+ "6";
+ "11"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/top/test/cases/Bottom.in b/yql/essentials/udfs/common/top/test/cases/Bottom.in
new file mode 100644
index 00000000000..1532ec03bb8
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/Bottom.in
@@ -0,0 +1,15 @@
+{"key"="1";"subkey"="5";"value"="a"};
+{"key"="1";"subkey"="2";"value"="b"};
+{"key"="1";"subkey"="7";"value"="c"};
+{"key"="1";"subkey"="4";"value"="d"};
+{"key"="1";"subkey"="8";"value"="e"};
+{"key"="1";"subkey"="13";"value"="f"};
+{"key"="2";"subkey"="4";"value"="a"};
+{"key"="2";"subkey"="6";"value"="b"};
+{"key"="2";"subkey"="2";"value"="c"};
+{"key"="2";"subkey"="9";"value"="d"};
+{"key"="2";"subkey"="3";"value"="e"};
+{"key"="2";"subkey"="9";"value"="f"};
+{"key"="2";"subkey"="1";"value"="g"};
+{"key"="2";"subkey"="9";"value"="h"};
+{"key"="3";"subkey"="1";"value"="a"};
diff --git a/yql/essentials/udfs/common/top/test/cases/Bottom.in.attr b/yql/essentials/udfs/common/top/test/cases/Bottom.in.attr
new file mode 100644
index 00000000000..2a151e9c475
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/Bottom.in.attr
@@ -0,0 +1,30 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/top/test/cases/Bottom.sql b/yql/essentials/udfs/common/top/test/cases/Bottom.sql
new file mode 100644
index 00000000000..b04ba740c8e
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/Bottom.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+USE plato;
+
+SELECT
+ key,
+ BOTTOM(subkey, 4u)
+FROM Input
+GROUP BY key
+ORDER BY key
diff --git a/yql/essentials/udfs/common/top/test/cases/BottomBy.in b/yql/essentials/udfs/common/top/test/cases/BottomBy.in
new file mode 100644
index 00000000000..1532ec03bb8
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/BottomBy.in
@@ -0,0 +1,15 @@
+{"key"="1";"subkey"="5";"value"="a"};
+{"key"="1";"subkey"="2";"value"="b"};
+{"key"="1";"subkey"="7";"value"="c"};
+{"key"="1";"subkey"="4";"value"="d"};
+{"key"="1";"subkey"="8";"value"="e"};
+{"key"="1";"subkey"="13";"value"="f"};
+{"key"="2";"subkey"="4";"value"="a"};
+{"key"="2";"subkey"="6";"value"="b"};
+{"key"="2";"subkey"="2";"value"="c"};
+{"key"="2";"subkey"="9";"value"="d"};
+{"key"="2";"subkey"="3";"value"="e"};
+{"key"="2";"subkey"="9";"value"="f"};
+{"key"="2";"subkey"="1";"value"="g"};
+{"key"="2";"subkey"="9";"value"="h"};
+{"key"="3";"subkey"="1";"value"="a"};
diff --git a/yql/essentials/udfs/common/top/test/cases/BottomBy.in.attr b/yql/essentials/udfs/common/top/test/cases/BottomBy.in.attr
new file mode 100644
index 00000000000..2a151e9c475
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/BottomBy.in.attr
@@ -0,0 +1,30 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/top/test/cases/BottomBy.sql b/yql/essentials/udfs/common/top/test/cases/BottomBy.sql
new file mode 100644
index 00000000000..2d0718da727
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/BottomBy.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+USE plato;
+
+SELECT
+ key,
+ BOTTOM_BY(subkey || value, CAST(subkey AS Uint64), 5u)
+FROM Input
+GROUP BY key
+ORDER BY key
diff --git a/yql/essentials/udfs/common/top/test/cases/BottomByTuple.in b/yql/essentials/udfs/common/top/test/cases/BottomByTuple.in
new file mode 100644
index 00000000000..1532ec03bb8
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/BottomByTuple.in
@@ -0,0 +1,15 @@
+{"key"="1";"subkey"="5";"value"="a"};
+{"key"="1";"subkey"="2";"value"="b"};
+{"key"="1";"subkey"="7";"value"="c"};
+{"key"="1";"subkey"="4";"value"="d"};
+{"key"="1";"subkey"="8";"value"="e"};
+{"key"="1";"subkey"="13";"value"="f"};
+{"key"="2";"subkey"="4";"value"="a"};
+{"key"="2";"subkey"="6";"value"="b"};
+{"key"="2";"subkey"="2";"value"="c"};
+{"key"="2";"subkey"="9";"value"="d"};
+{"key"="2";"subkey"="3";"value"="e"};
+{"key"="2";"subkey"="9";"value"="f"};
+{"key"="2";"subkey"="1";"value"="g"};
+{"key"="2";"subkey"="9";"value"="h"};
+{"key"="3";"subkey"="1";"value"="a"};
diff --git a/yql/essentials/udfs/common/top/test/cases/BottomByTuple.in.attr b/yql/essentials/udfs/common/top/test/cases/BottomByTuple.in.attr
new file mode 100644
index 00000000000..2a151e9c475
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/BottomByTuple.in.attr
@@ -0,0 +1,30 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/top/test/cases/BottomByTuple.sql b/yql/essentials/udfs/common/top/test/cases/BottomByTuple.sql
new file mode 100644
index 00000000000..cdbda066c72
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/BottomByTuple.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+USE plato;
+
+SELECT
+ key,
+ BOTTOM_BY(AsTuple(subkey, value), AsTuple(subkey, value), 10u)
+FROM Input
+GROUP BY key
+ORDER BY key
diff --git a/yql/essentials/udfs/common/top/test/cases/Top.in b/yql/essentials/udfs/common/top/test/cases/Top.in
new file mode 100644
index 00000000000..1532ec03bb8
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/Top.in
@@ -0,0 +1,15 @@
+{"key"="1";"subkey"="5";"value"="a"};
+{"key"="1";"subkey"="2";"value"="b"};
+{"key"="1";"subkey"="7";"value"="c"};
+{"key"="1";"subkey"="4";"value"="d"};
+{"key"="1";"subkey"="8";"value"="e"};
+{"key"="1";"subkey"="13";"value"="f"};
+{"key"="2";"subkey"="4";"value"="a"};
+{"key"="2";"subkey"="6";"value"="b"};
+{"key"="2";"subkey"="2";"value"="c"};
+{"key"="2";"subkey"="9";"value"="d"};
+{"key"="2";"subkey"="3";"value"="e"};
+{"key"="2";"subkey"="9";"value"="f"};
+{"key"="2";"subkey"="1";"value"="g"};
+{"key"="2";"subkey"="9";"value"="h"};
+{"key"="3";"subkey"="1";"value"="a"};
diff --git a/yql/essentials/udfs/common/top/test/cases/Top.in.attr b/yql/essentials/udfs/common/top/test/cases/Top.in.attr
new file mode 100644
index 00000000000..2a151e9c475
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/Top.in.attr
@@ -0,0 +1,30 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/top/test/cases/Top.sql b/yql/essentials/udfs/common/top/test/cases/Top.sql
new file mode 100644
index 00000000000..6a03eca4ee9
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/Top.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+USE plato;
+
+SELECT
+ key,
+ TOP(CAST(subkey AS Float), 3u)
+FROM Input
+GROUP BY key
+ORDER BY key
diff --git a/yql/essentials/udfs/common/top/test/cases/TopBy.in b/yql/essentials/udfs/common/top/test/cases/TopBy.in
new file mode 100644
index 00000000000..1532ec03bb8
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/TopBy.in
@@ -0,0 +1,15 @@
+{"key"="1";"subkey"="5";"value"="a"};
+{"key"="1";"subkey"="2";"value"="b"};
+{"key"="1";"subkey"="7";"value"="c"};
+{"key"="1";"subkey"="4";"value"="d"};
+{"key"="1";"subkey"="8";"value"="e"};
+{"key"="1";"subkey"="13";"value"="f"};
+{"key"="2";"subkey"="4";"value"="a"};
+{"key"="2";"subkey"="6";"value"="b"};
+{"key"="2";"subkey"="2";"value"="c"};
+{"key"="2";"subkey"="9";"value"="d"};
+{"key"="2";"subkey"="3";"value"="e"};
+{"key"="2";"subkey"="9";"value"="f"};
+{"key"="2";"subkey"="1";"value"="g"};
+{"key"="2";"subkey"="9";"value"="h"};
+{"key"="3";"subkey"="1";"value"="a"};
diff --git a/yql/essentials/udfs/common/top/test/cases/TopBy.in.attr b/yql/essentials/udfs/common/top/test/cases/TopBy.in.attr
new file mode 100644
index 00000000000..2a151e9c475
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/TopBy.in.attr
@@ -0,0 +1,30 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/top/test/cases/TopBy.sql b/yql/essentials/udfs/common/top/test/cases/TopBy.sql
new file mode 100644
index 00000000000..b22309185f7
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/TopBy.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+USE plato;
+
+SELECT
+ key,
+ TOP_BY(subkey || value, subkey, 6u)
+FROM Input
+GROUP BY key
+ORDER BY key
diff --git a/yql/essentials/udfs/common/top/test/cases/TopList.sql b/yql/essentials/udfs/common/top/test/cases/TopList.sql
new file mode 100644
index 00000000000..2b06e919fde
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/TopList.sql
@@ -0,0 +1,4 @@
+/* syntax version 1 */
+select TOP(x,10) from
+(select [[1,2],[1],[1,2,3],[1],[1,2],[1]] as x)
+flatten list by x;
diff --git a/yql/essentials/udfs/common/top/test/cases/TopTuple.in b/yql/essentials/udfs/common/top/test/cases/TopTuple.in
new file mode 100644
index 00000000000..1532ec03bb8
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/TopTuple.in
@@ -0,0 +1,15 @@
+{"key"="1";"subkey"="5";"value"="a"};
+{"key"="1";"subkey"="2";"value"="b"};
+{"key"="1";"subkey"="7";"value"="c"};
+{"key"="1";"subkey"="4";"value"="d"};
+{"key"="1";"subkey"="8";"value"="e"};
+{"key"="1";"subkey"="13";"value"="f"};
+{"key"="2";"subkey"="4";"value"="a"};
+{"key"="2";"subkey"="6";"value"="b"};
+{"key"="2";"subkey"="2";"value"="c"};
+{"key"="2";"subkey"="9";"value"="d"};
+{"key"="2";"subkey"="3";"value"="e"};
+{"key"="2";"subkey"="9";"value"="f"};
+{"key"="2";"subkey"="1";"value"="g"};
+{"key"="2";"subkey"="9";"value"="h"};
+{"key"="3";"subkey"="1";"value"="a"};
diff --git a/yql/essentials/udfs/common/top/test/cases/TopTuple.in.attr b/yql/essentials/udfs/common/top/test/cases/TopTuple.in.attr
new file mode 100644
index 00000000000..2a151e9c475
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/TopTuple.in.attr
@@ -0,0 +1,30 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/top/test/cases/TopTuple.sql b/yql/essentials/udfs/common/top/test/cases/TopTuple.sql
new file mode 100644
index 00000000000..c9d77826967
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/TopTuple.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+USE plato;
+
+SELECT
+ key,
+ TOP(AsTuple(subkey, value), 5u)
+FROM Input
+GROUP BY key
+ORDER BY key \ No newline at end of file
diff --git a/yql/essentials/udfs/common/top/test/cases/TopVariant.sql b/yql/essentials/udfs/common/top/test/cases/TopVariant.sql
new file mode 100644
index 00000000000..7f74dfda347
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/TopVariant.sql
@@ -0,0 +1,5 @@
+/* syntax version 1 */
+$vt1 = Variant<Int32,String>;
+select TOP(x,3) from
+(select [Variant(1,"0",$vt1),Variant("str","1",$vt1),Variant(1,"0",$vt1)] as x)
+flatten list by x;
diff --git a/yql/essentials/udfs/common/top/test/cases/Window.sql b/yql/essentials/udfs/common/top/test/cases/Window.sql
new file mode 100644
index 00000000000..657f765fa99
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/cases/Window.sql
@@ -0,0 +1,30 @@
+/* syntax version 1 */
+USE plato;
+
+$src = [
+ <|x:1, idx:1|>,
+ <|x:7, idx:2|>,
+ <|x:5, idx:3|>,
+ <|x:4, idx:4|>,
+ <|x:3, idx:5|>,
+ <|x:11, idx:6|>,
+ <|x:2, idx:7|>,
+ <|x:11, idx:8|>,
+ <|x:0, idx:9|>,
+ <|x:6, idx:10|>,
+];
+
+INSERT INTO @src
+SELECT * FROM AS_TABLE($src) ORDER BY idx;
+
+COMMIT;
+
+SELECT idx, x, TOP(x, 3) OVER (ORDER BY idx ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as upcr_top FROM @src ORDER BY idx;
+SELECT idx, x, TOP(x, 3) OVER () as upuf_top FROM @src ORDER BY idx;
+SELECT idx, x, TOP(x, 3) OVER (ORDER BY idx ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as cruf_top FROM @src ORDER BY idx;
+SELECT idx, x, TOP(x, 3) OVER (ORDER BY idx ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) as cr22_top FROM @src ORDER BY idx;
+
+SELECT idx, x, BOTTOM(x, 3) OVER (ORDER BY idx ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as upcr_bottom FROM @src ORDER BY idx;
+SELECT idx, x, BOTTOM(x, 3) OVER () as upuf_bottom FROM @src ORDER BY idx;
+SELECT idx, x, BOTTOM(x, 3) OVER (ORDER BY idx ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as cruf_bottom FROM @src ORDER BY idx;
+SELECT idx, x, BOTTOM(x, 3) OVER (ORDER BY idx ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) as cr22_botto FROM @src ORDER BY idx;
diff --git a/yql/essentials/udfs/common/top/test/ya.make b/yql/essentials/udfs/common/top/test/ya.make
new file mode 100644
index 00000000000..9bbd032b576
--- /dev/null
+++ b/yql/essentials/udfs/common/top/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/top)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/top/top_udf.cpp b/yql/essentials/udfs/common/top/top_udf.cpp
new file mode 100644
index 00000000000..766067dda5d
--- /dev/null
+++ b/yql/essentials/udfs/common/top/top_udf.cpp
@@ -0,0 +1,954 @@
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/public/udf/udf_type_ops.h>
+
+#include <library/cpp/containers/top_keeper/top_keeper.h>
+
+#include <util/generic/set.h>
+
+#include <algorithm>
+#include <iterator>
+
+using namespace NKikimr;
+using namespace NUdf;
+
+namespace {
+
+using TUnboxedValuePair = std::pair<TUnboxedValue, TUnboxedValue>;
+
+template <EDataSlot Slot, bool IsTop>
+struct TDataCompare {
+ bool operator()(const TUnboxedValue& left, const TUnboxedValue& right) const {
+ if (IsTop) {
+ return CompareValues<Slot>(left, right) > 0;
+ } else {
+ return CompareValues<Slot>(left, right) < 0;
+ }
+ }
+};
+
+template <EDataSlot Slot, bool IsTop>
+struct TDataPairCompare {
+ bool operator()(const TUnboxedValuePair& left, const TUnboxedValuePair& right) const {
+ if (IsTop) {
+ return CompareValues<Slot>(left.first, right.first) > 0;
+ } else {
+ return CompareValues<Slot>(left.first, right.first) < 0;
+ }
+ }
+};
+
+template <bool IsTop>
+struct TGenericCompare {
+ ICompare::TPtr Compare;
+
+ bool operator()(const TUnboxedValue& left, const TUnboxedValue& right) const {
+ if (IsTop) {
+ return Compare->Less(right, left);
+ } else {
+ return Compare->Less(left, right);
+ }
+ }
+};
+
+template <bool IsTop>
+struct TGenericPairCompare {
+ ICompare::TPtr Compare;
+
+ bool operator()(const TUnboxedValuePair& left, const TUnboxedValuePair& right) const {
+ if (IsTop) {
+ return Compare->Less(right.first, left.first);
+ } else {
+ return Compare->Less(left.first, right.first);
+ }
+ }
+};
+
+template <typename TValue, typename TCompare, typename TAllocator>
+class TTopKeeperContainer {
+ TTopKeeper<TValue, TCompare, true, TAllocator> Keeper;
+ using TOrderedSet = TMultiSet<TValue, TCompare, TAllocator>;
+ TMaybe<TOrderedSet> OrderedSet;
+ size_t MaxSize = 0;
+ bool Finalized = false;
+ TCompare Compare;
+public:
+ explicit TTopKeeperContainer(TCompare compare)
+ : Keeper(0, compare)
+ , Compare(compare)
+ {}
+
+ TVector<TValue, TAllocator> GetInternal() {
+ if (OrderedSet) {
+ TVector<TValue, TAllocator> result;
+ std::copy(OrderedSet->begin(), OrderedSet->end(), std::back_inserter(result));
+ return result;
+ }
+ Finalized = true;
+ return Keeper.GetInternal();
+ }
+
+ void Insert(const TValue& value) {
+ if (MaxSize == 0) {
+ return;
+ }
+ if (Finalized && !OrderedSet) {
+ const auto& items = Keeper.Extract();
+ OrderedSet = TOrderedSet{items.begin(), items.end(), Compare};
+ }
+ if (OrderedSet) {
+ if (OrderedSet->size() < MaxSize) {
+ OrderedSet->insert(value);
+ return;
+ }
+ Y_ENSURE(OrderedSet->size() == MaxSize);
+ Y_ENSURE(!OrderedSet->empty());
+ auto last = --OrderedSet->end();
+ if (Compare(value, *last)) {
+ OrderedSet->erase(last);
+ OrderedSet->insert(value);
+ }
+ return;
+ }
+ Keeper.Insert(value);
+ }
+
+ bool IsEmpty() const {
+ return OrderedSet ? OrderedSet->empty() : Keeper.IsEmpty();
+ }
+
+ size_t GetSize() const {
+ return OrderedSet ? OrderedSet->size() : Keeper.GetSize();
+ }
+
+ size_t GetMaxSize() const {
+ return MaxSize;
+ }
+
+ void SetMaxSize(size_t newMaxSize) {
+ MaxSize = newMaxSize;
+ if (Finalized && !OrderedSet) {
+ auto items = Keeper.Extract();
+ auto begin = items.begin();
+ auto end = begin + Min(MaxSize, items.size());
+ OrderedSet = TOrderedSet{begin, end, Compare};
+ }
+ if (OrderedSet) {
+ while (OrderedSet->size() > MaxSize) {
+ auto last = --OrderedSet->end();
+ OrderedSet->erase(last);
+ }
+ return;
+ }
+
+ Keeper.SetMaxSize(MaxSize);
+ }
+};
+
+template <typename TCompare>
+class TTopKeeperWrapperBase {
+protected:
+ TTopKeeperContainer<TUnboxedValue, TCompare, TUnboxedValue::TAllocator> Keeper;
+
+protected:
+ explicit TTopKeeperWrapperBase(TCompare compare)
+ : Keeper(compare)
+ {}
+
+ void Init(const TUnboxedValuePod& value, ui32 maxSize) {
+ Keeper.SetMaxSize(maxSize);
+ AddValue(value);
+ }
+
+ void Merge(TTopKeeperWrapperBase& left, TTopKeeperWrapperBase& right) {
+ Keeper.SetMaxSize(left.Keeper.GetMaxSize());
+ for (const auto& item : left.Keeper.GetInternal()) {
+ AddValue(item);
+ }
+ for (const auto& item : right.Keeper.GetInternal()) {
+ AddValue(item);
+ }
+ }
+
+ void Deserialize(const TUnboxedValuePod& serialized) {
+ auto maxSize = serialized.GetElement(0).Get<ui32>();
+ auto list = serialized.GetElement(1);
+
+ Keeper.SetMaxSize(maxSize);
+ const auto listIter = list.GetListIterator();
+ for (TUnboxedValue current; listIter.Next(current);) {
+ AddValue(current);
+ }
+ }
+
+public:
+ void AddValue(const TUnboxedValuePod& value) {
+ Keeper.Insert(TUnboxedValuePod(value));
+ }
+
+ TUnboxedValue Serialize(const IValueBuilder* builder) {
+ TUnboxedValue* values = nullptr;
+ auto list = builder->NewArray(Keeper.GetSize(), values);
+
+ for (const auto& item : Keeper.GetInternal()) {
+ *values++ = item;
+ }
+
+ TUnboxedValue* items = nullptr;
+ auto result = builder->NewArray(2U, items);
+ items[0] = TUnboxedValuePod((ui32)Keeper.GetMaxSize());
+ items[1] = list;
+
+ return result;
+ }
+
+ TUnboxedValue GetResult(const IValueBuilder* builder) {
+ TUnboxedValue* values = nullptr;
+ auto list = builder->NewArray(Keeper.GetSize(), values);
+
+ for (const auto& item : Keeper.GetInternal()) {
+ *values++ = item;
+ }
+ return list;
+ }
+};
+
+template <typename TCompare>
+class TTopKeeperPairWrapperBase {
+protected:
+ TTopKeeperContainer<TUnboxedValuePair, TCompare, TStdAllocatorForUdf<TUnboxedValuePair>> Keeper;
+
+protected:
+ explicit TTopKeeperPairWrapperBase(TCompare compare)
+ : Keeper(compare)
+ {}
+
+ void Init(const TUnboxedValuePod& key, const TUnboxedValuePod& payload, ui32 maxSize) {
+ Keeper.SetMaxSize(maxSize);
+ AddValue(key, payload);
+ }
+
+ void Merge(TTopKeeperPairWrapperBase& left, TTopKeeperPairWrapperBase& right) {
+ Keeper.SetMaxSize(left.Keeper.GetMaxSize());
+ for (const auto& item : left.Keeper.GetInternal()) {
+ AddValue(item.first, item.second);
+ }
+ for (const auto& item : right.Keeper.GetInternal()) {
+ AddValue(item.first, item.second);
+ }
+ }
+
+ void Deserialize(const TUnboxedValuePod& serialized) {
+ auto maxSize = serialized.GetElement(0).Get<ui32>();
+ auto list = serialized.GetElement(1);
+
+ Keeper.SetMaxSize(maxSize);
+ const auto listIter = list.GetListIterator();
+ for (TUnboxedValue current; listIter.Next(current);) {
+ AddValue(current.GetElement(0), current.GetElement(1));
+ }
+ }
+
+public:
+ void AddValue(const TUnboxedValuePod& key, const TUnboxedValuePod& payload) {
+ Keeper.Insert(std::make_pair(TUnboxedValuePod(key), TUnboxedValuePod(payload)));
+ }
+
+ TUnboxedValue Serialize(const IValueBuilder* builder) {
+ TUnboxedValue* values = nullptr;
+ auto list = builder->NewArray(Keeper.GetSize(), values);
+
+ for (const auto& item : Keeper.GetInternal()) {
+ TUnboxedValue* items = nullptr;
+ auto pair = builder->NewArray(2U, items);
+ items[0] = item.first;
+ items[1] = item.second;
+ *values++ = pair;
+ }
+
+ TUnboxedValue* items = nullptr;
+ auto result = builder->NewArray(2U, items);
+ items[0] = TUnboxedValuePod((ui32)Keeper.GetMaxSize());
+ items[1] = list;
+
+ return result;
+ }
+
+ TUnboxedValue GetResult(const IValueBuilder* builder) {
+ TUnboxedValue* values = nullptr;
+ auto list = builder->NewArray(Keeper.GetSize(), values);
+
+ for (const auto& item : Keeper.GetInternal()) {
+ *values++ = item.second;
+ }
+ return list;
+ }
+};
+
+
+template <EDataSlot Slot, bool HasKey, bool IsTop>
+class TTopKeeperDataWrapper;
+
+template <EDataSlot Slot, bool IsTop>
+class TTopKeeperDataWrapper<Slot, false, IsTop>
+ : public TTopKeeperWrapperBase<TDataCompare<Slot, IsTop>>
+{
+public:
+ using TBase = TTopKeeperWrapperBase<TDataCompare<Slot, IsTop>>;
+
+ TTopKeeperDataWrapper(const TUnboxedValuePod& value, ui32 maxSize)
+ : TBase(TDataCompare<Slot, IsTop>())
+ {
+ TBase::Init(value, maxSize);
+ }
+
+ TTopKeeperDataWrapper(TTopKeeperDataWrapper& left, TTopKeeperDataWrapper& right)
+ : TBase(TDataCompare<Slot, IsTop>())
+ {
+ TBase::Merge(left, right);
+ }
+
+ explicit TTopKeeperDataWrapper(const TUnboxedValuePod& serialized)
+ : TBase(TDataCompare<Slot, IsTop>())
+ {
+ TBase::Deserialize(serialized);
+ }
+};
+
+template <EDataSlot Slot, bool IsTop>
+class TTopKeeperDataWrapper<Slot, true, IsTop>
+ : public TTopKeeperPairWrapperBase<TDataPairCompare<Slot, IsTop>>
+{
+public:
+ using TBase = TTopKeeperPairWrapperBase<TDataPairCompare<Slot, IsTop>>;
+
+ TTopKeeperDataWrapper(const TUnboxedValuePod& key, const TUnboxedValuePod& payload, ui32 maxSize)
+ : TBase(TDataPairCompare<Slot, IsTop>())
+ {
+ TBase::Init(key, payload, maxSize);
+ }
+
+ TTopKeeperDataWrapper(TTopKeeperDataWrapper& left, TTopKeeperDataWrapper& right)
+ : TBase(TDataPairCompare<Slot, IsTop>())
+ {
+ TBase::Merge(left, right);
+ }
+
+ explicit TTopKeeperDataWrapper(const TUnboxedValuePod& serialized)
+ : TBase(TDataPairCompare<Slot, IsTop>())
+ {
+ TBase::Deserialize(serialized);
+ }
+};
+
+template <bool HasKey, bool IsTop>
+class TTopKeeperWrapper;
+
+template <bool IsTop>
+class TTopKeeperWrapper<false, IsTop>
+ : public TTopKeeperWrapperBase<TGenericCompare<IsTop>>
+{
+public:
+ using TBase = TTopKeeperWrapperBase<TGenericCompare<IsTop>>;
+
+ TTopKeeperWrapper(const TUnboxedValuePod& value, ui32 maxSize, ICompare::TPtr compare)
+ : TBase(TGenericCompare<IsTop>{compare})
+ {
+ TBase::Init(value, maxSize);
+ }
+
+ TTopKeeperWrapper(TTopKeeperWrapper& left, TTopKeeperWrapper& right, ICompare::TPtr compare)
+ : TBase(TGenericCompare<IsTop>{compare})
+ {
+ TBase::Merge(left, right);
+ }
+
+ TTopKeeperWrapper(const TUnboxedValuePod& serialized, ICompare::TPtr compare)
+ : TBase(TGenericCompare<IsTop>{compare})
+ {
+ TBase::Deserialize(serialized);
+ }
+};
+
+template <bool IsTop>
+class TTopKeeperWrapper<true, IsTop>
+ : public TTopKeeperPairWrapperBase<TGenericPairCompare<IsTop>>
+{
+public:
+ using TBase = TTopKeeperPairWrapperBase<TGenericPairCompare<IsTop>>;
+
+ TTopKeeperWrapper(const TUnboxedValuePod& key, const TUnboxedValuePod& payload, ui32 maxSize, ICompare::TPtr compare)
+ : TBase(TGenericPairCompare<IsTop>{compare})
+ {
+ TBase::Init(key, payload, maxSize);
+ }
+
+ TTopKeeperWrapper(TTopKeeperWrapper& left, TTopKeeperWrapper& right, ICompare::TPtr compare)
+ : TBase(TGenericPairCompare<IsTop>{compare})
+ {
+ TBase::Merge(left, right);
+ }
+
+ TTopKeeperWrapper(const TUnboxedValuePod& serialized, ICompare::TPtr compare)
+ : TBase(TGenericPairCompare<IsTop>{compare})
+ {
+ TBase::Deserialize(serialized);
+ }
+};
+
+
+template <EDataSlot Slot, bool HasKey, bool IsTop>
+class TTopResourceData;
+
+template <EDataSlot Slot, bool HasKey, bool IsTop>
+TTopResourceData<Slot, HasKey, IsTop>* GetTopResourceData(const TUnboxedValuePod& arg) {
+ TTopResourceData<Slot, HasKey, IsTop>::Validate(arg);
+ return static_cast<TTopResourceData<Slot, HasKey, IsTop>*>(arg.AsBoxed().Get());
+}
+
+template <bool HasKey, bool IsTop>
+class TTopResource;
+
+template <bool HasKey, bool IsTop>
+TTopResource<HasKey, IsTop>* GetTopResource(const TUnboxedValuePod& arg) {
+ TTopResource<HasKey, IsTop>::Validate(arg);
+ return static_cast<TTopResource<HasKey, IsTop>*>(arg.AsBoxed().Get());
+}
+
+
+template <EDataSlot Slot, bool HasKey, bool IsTop>
+class TTopCreateData : public TBoxedValue {
+private:
+ template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr>
+ TUnboxedValue RunImpl(const TUnboxedValuePod* args) const {
+ return TUnboxedValuePod(
+ new TTopResourceData<Slot, HasKey, IsTop>(args[0], args[1].Get<ui32>()));
+ }
+
+ template <bool HasKey_ = HasKey, typename std::enable_if_t<HasKey_>* = nullptr>
+ TUnboxedValue RunImpl(const TUnboxedValuePod* args) const {
+ return TUnboxedValuePod(
+ new TTopResourceData<Slot, HasKey, IsTop>(args[0], args[1], args[2].Get<ui32>()));
+ }
+
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ return RunImpl(args);
+ }
+};
+
+template <bool HasKey, bool IsTop>
+class TTopCreate : public TBoxedValue {
+private:
+ template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr>
+ TUnboxedValue RunImpl(const TUnboxedValuePod* args) const {
+ return TUnboxedValuePod(
+ new TTopResource<HasKey, IsTop>(args[0], args[1].Get<ui32>(), Compare_));
+ }
+
+ template <bool HasKey_ = HasKey, typename std::enable_if_t<HasKey_>* = nullptr>
+ TUnboxedValue RunImpl(const TUnboxedValuePod* args) const {
+ return TUnboxedValuePod(
+ new TTopResource<HasKey, IsTop>(args[0], args[1], args[2].Get<ui32>(), Compare_));
+ }
+
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ return RunImpl(args);
+ }
+
+public:
+ explicit TTopCreate(ICompare::TPtr compare)
+ : Compare_(compare)
+ {}
+
+private:
+ ICompare::TPtr Compare_;
+};
+
+template <EDataSlot Slot, bool HasKey, bool IsTop>
+class TTopAddValueData : public TBoxedValue {
+private:
+ template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr>
+ TUnboxedValue RunImpl(const TUnboxedValuePod* args) const {
+ auto resource = GetTopResourceData<Slot, HasKey, IsTop>(args[0]);
+ resource->Get()->AddValue(args[1]);
+ return TUnboxedValuePod(resource);
+ }
+
+ template <bool HasKey_ = HasKey, typename std::enable_if_t<HasKey_>* = nullptr>
+ TUnboxedValue RunImpl(const TUnboxedValuePod* args) const {
+ auto resource = GetTopResourceData<Slot, HasKey, IsTop>(args[0]);
+ resource->Get()->AddValue(args[1], args[2]);
+ return TUnboxedValuePod(resource);
+ }
+
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ return RunImpl(args);
+ }
+};
+
+template <bool HasKey, bool IsTop>
+class TTopAddValue : public TBoxedValue {
+private:
+ template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr>
+ TUnboxedValue RunImpl(const TUnboxedValuePod* args) const {
+ auto resource = GetTopResource<HasKey, IsTop>(args[0]);
+ resource->Get()->AddValue(args[1]);
+ return TUnboxedValuePod(resource);
+ }
+
+ template <bool HasKey_ = HasKey, typename std::enable_if_t<HasKey_>* = nullptr>
+ TUnboxedValue RunImpl(const TUnboxedValuePod* args) const {
+ auto resource = GetTopResource<HasKey, IsTop>(args[0]);
+ resource->Get()->AddValue(args[1], args[2]);
+ return TUnboxedValuePod(resource);
+ }
+
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ return RunImpl(args);
+ }
+
+public:
+ explicit TTopAddValue(ICompare::TPtr)
+ {}
+};
+
+template <EDataSlot Slot, bool HasKey, bool IsTop>
+class TTopSerializeData : public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override {
+ auto resource = GetTopResourceData<Slot, HasKey, IsTop>(args[0]);
+ return resource->Get()->Serialize(valueBuilder);
+ }
+};
+
+template <bool HasKey, bool IsTop>
+class TTopSerialize : public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override {
+ auto resource = GetTopResource<HasKey, IsTop>(args[0]);
+ return resource->Get()->Serialize(valueBuilder);
+ }
+
+public:
+ explicit TTopSerialize(ICompare::TPtr)
+ {}
+};
+
+template <EDataSlot Slot, bool HasKey, bool IsTop>
+class TTopDeserializeData : public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ return TUnboxedValuePod(new TTopResourceData<Slot, HasKey, IsTop>(args[0]));
+ }
+};
+
+template <bool HasKey, bool IsTop>
+class TTopDeserialize : public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ return TUnboxedValuePod(new TTopResource<HasKey, IsTop>(args[0], Compare_));
+ }
+
+public:
+ explicit TTopDeserialize(ICompare::TPtr compare)
+ : Compare_(compare)
+ {}
+
+private:
+ ICompare::TPtr Compare_;
+};
+
+template <EDataSlot Slot, bool HasKey, bool IsTop>
+class TTopMergeData : public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ auto left = GetTopResourceData<Slot, HasKey, IsTop>(args[0]);
+ auto right = GetTopResourceData<Slot, HasKey, IsTop>(args[1]);
+ return TUnboxedValuePod(new TTopResourceData<Slot, HasKey, IsTop>(*left->Get(), *right->Get()));
+ }
+};
+
+template <bool HasKey, bool IsTop>
+class TTopMerge : public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ auto left = GetTopResource<HasKey, IsTop>(args[0]);
+ auto right = GetTopResource<HasKey, IsTop>(args[1]);
+ return TUnboxedValuePod(new TTopResource<HasKey, IsTop>(*left->Get(), *right->Get(), Compare_));
+ }
+
+public:
+ explicit TTopMerge(ICompare::TPtr compare)
+ : Compare_(compare)
+ {}
+
+private:
+ ICompare::TPtr Compare_;
+};
+
+template <EDataSlot Slot, bool HasKey, bool IsTop>
+class TTopGetResultData : public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override {
+ auto resource = GetTopResourceData<Slot, HasKey, IsTop>(args[0]);
+ return resource->Get()->GetResult(valueBuilder);
+ }
+};
+
+template <bool HasKey, bool IsTop>
+class TTopGetResult : public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override {
+ auto resource = GetTopResource<HasKey, IsTop>(args[0]);
+ return resource->Get()->GetResult(valueBuilder);
+ }
+
+public:
+ explicit TTopGetResult(ICompare::TPtr)
+ {}
+};
+
+
+#define RESOURCE(slot, hasKey, isTop) \
+extern const char TopResourceName_##slot##_##hasKey##_##isTop[] = \
+ "Top.TopResource."#slot"."#hasKey"."#isTop; \
+template <> \
+class TTopResourceData<EDataSlot::slot, hasKey, isTop>: \
+ public TBoxedResource< \
+ TTopKeeperDataWrapper<EDataSlot::slot, hasKey, isTop>, \
+ TopResourceName_##slot##_##hasKey##_##isTop> \
+{ \
+public: \
+ template <typename... Args> \
+ inline TTopResourceData(Args&&... args) \
+ : TBoxedResource(std::forward<Args>(args)...) \
+ {} \
+};
+
+#define RESOURCE_00(slot, ...) RESOURCE(slot, false, false)
+#define RESOURCE_01(slot, ...) RESOURCE(slot, false, true)
+#define RESOURCE_10(slot, ...) RESOURCE(slot, true, false)
+#define RESOURCE_11(slot, ...) RESOURCE(slot, true, true)
+
+UDF_TYPE_ID_MAP(RESOURCE_00)
+UDF_TYPE_ID_MAP(RESOURCE_01)
+UDF_TYPE_ID_MAP(RESOURCE_10)
+UDF_TYPE_ID_MAP(RESOURCE_11)
+
+#define MAKE_IMPL(operation, slot, hasKey, isTop) \
+ case EDataSlot::slot: \
+ builder.Implementation(new operation<EDataSlot::slot, hasKey, isTop>); \
+ break;
+
+#define CREATE_00(slot, ...) MAKE_IMPL(TTopCreateData, slot, false, false)
+#define CREATE_01(slot, ...) MAKE_IMPL(TTopCreateData, slot, false, true)
+#define CREATE_10(slot, ...) MAKE_IMPL(TTopCreateData, slot, true, false)
+#define CREATE_11(slot, ...) MAKE_IMPL(TTopCreateData, slot, true, true)
+
+#define ADD_VALUE_00(slot, ...) MAKE_IMPL(TTopAddValueData, slot, false, false)
+#define ADD_VALUE_01(slot, ...) MAKE_IMPL(TTopAddValueData, slot, false, true)
+#define ADD_VALUE_10(slot, ...) MAKE_IMPL(TTopAddValueData, slot, true, false)
+#define ADD_VALUE_11(slot, ...) MAKE_IMPL(TTopAddValueData, slot, true, true)
+
+#define MERGE_00(slot, ...) MAKE_IMPL(TTopMergeData, slot, false, false)
+#define MERGE_01(slot, ...) MAKE_IMPL(TTopMergeData, slot, false, true)
+#define MERGE_10(slot, ...) MAKE_IMPL(TTopMergeData, slot, true, false)
+#define MERGE_11(slot, ...) MAKE_IMPL(TTopMergeData, slot, true, true)
+
+#define SERIALIZE_00(slot, ...) MAKE_IMPL(TTopSerializeData, slot, false, false)
+#define SERIALIZE_01(slot, ...) MAKE_IMPL(TTopSerializeData, slot, false, true)
+#define SERIALIZE_10(slot, ...) MAKE_IMPL(TTopSerializeData, slot, true, false)
+#define SERIALIZE_11(slot, ...) MAKE_IMPL(TTopSerializeData, slot, true, true)
+
+#define DESERIALIZE_00(slot, ...) MAKE_IMPL(TTopDeserializeData, slot, false, false)
+#define DESERIALIZE_01(slot, ...) MAKE_IMPL(TTopDeserializeData, slot, false, true)
+#define DESERIALIZE_10(slot, ...) MAKE_IMPL(TTopDeserializeData, slot, true, false)
+#define DESERIALIZE_11(slot, ...) MAKE_IMPL(TTopDeserializeData, slot, true, true)
+
+#define GET_RESULT_00(slot, ...) MAKE_IMPL(TTopGetResultData, slot, false, false)
+#define GET_RESULT_01(slot, ...) MAKE_IMPL(TTopGetResultData, slot, false, true)
+#define GET_RESULT_10(slot, ...) MAKE_IMPL(TTopGetResultData, slot, true, false)
+#define GET_RESULT_11(slot, ...) MAKE_IMPL(TTopGetResultData, slot, true, true)
+
+#define MAKE_TYPE(slot, hasKey, isTop) \
+ case EDataSlot::slot: \
+ topType = builder.Resource(TopResourceName_##slot##_##hasKey##_##isTop); \
+ break;
+
+#define TYPE_00(slot, ...) MAKE_TYPE(slot, false, false)
+#define TYPE_01(slot, ...) MAKE_TYPE(slot, false, true)
+#define TYPE_10(slot, ...) MAKE_TYPE(slot, true, false)
+#define TYPE_11(slot, ...) MAKE_TYPE(slot, true, true)
+
+#define PARAMETRIZE(action) \
+ if (hasKey) { \
+ if (isTop) { \
+ switch (*slot) { \
+ UDF_TYPE_ID_MAP(action##_11) \
+ } \
+ } else { \
+ switch (*slot) { \
+ UDF_TYPE_ID_MAP(action##_10) \
+ } \
+ } \
+ } else { \
+ if (isTop) { \
+ switch (*slot) { \
+ UDF_TYPE_ID_MAP(action##_01) \
+ } \
+ } else { \
+ switch (*slot) { \
+ UDF_TYPE_ID_MAP(action##_00) \
+ } \
+ } \
+ }
+
+
+#define RESOURCE_GENERIC(hasKey, isTop) \
+extern const char TopResourceName_Generic_##hasKey##_##isTop[] = \
+ "Top.TopResource.Generic."#hasKey"."#isTop; \
+template <> \
+class TTopResource<hasKey, isTop>: \
+ public TBoxedResource< \
+ TTopKeeperWrapper<hasKey, isTop>, \
+ TopResourceName_Generic_##hasKey##_##isTop> \
+{ \
+public: \
+ template <typename... Args> \
+ inline TTopResource(Args&&... args) \
+ : TBoxedResource(std::forward<Args>(args)...) \
+ {} \
+};
+
+RESOURCE_GENERIC(false, false)
+RESOURCE_GENERIC(false, true)
+RESOURCE_GENERIC(true, false)
+RESOURCE_GENERIC(true, true)
+
+#define MAKE_IMPL_GENERIC(operation, hasKey, isTop) \
+ builder.Implementation(new operation<hasKey, isTop>(compare));
+
+#define CREATE_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopCreate, hasKey, isTop)
+#define ADD_VALUE_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopAddValue, hasKey, isTop)
+#define MERGE_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopMerge, hasKey, isTop)
+#define SERIALIZE_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopSerialize, hasKey, isTop)
+#define DESERIALIZE_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopDeserialize, hasKey, isTop)
+#define GET_RESULT_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopGetResult, hasKey, isTop)
+
+#define TYPE_GENERIC(hasKey, isTop) \
+ topType = builder.Resource(TopResourceName_Generic_##hasKey##_##isTop);
+
+#define PARAMETRIZE_GENERIC(action) \
+ if (hasKey) { \
+ if (isTop) { \
+ action(true, true) \
+ } else { \
+ action(true, false) \
+ } \
+ } else { \
+ if (isTop) { \
+ action(false, true) \
+ } else { \
+ action(false, false) \
+ } \
+ }
+
+
+static const auto CreateName = TStringRef::Of("Create");
+static const auto AddValueName = TStringRef::Of("AddValue");
+static const auto SerializeName = TStringRef::Of("Serialize");
+static const auto DeserializeName = TStringRef::Of("Deserialize");
+static const auto MergeName = TStringRef::Of("Merge");
+static const auto GetResultName = TStringRef::Of("GetResult");
+
+class TTopModule : public IUdfModule {
+public:
+ TStringRef Name() const {
+ return TStringRef::Of("Top");
+ }
+
+ void CleanupOnTerminate() const final {
+ }
+
+ void GetAllFunctions(IFunctionsSink& sink) const final {
+ sink.Add(CreateName)->SetTypeAwareness();
+ sink.Add(AddValueName)->SetTypeAwareness();
+ sink.Add(SerializeName)->SetTypeAwareness();
+ sink.Add(DeserializeName)->SetTypeAwareness();
+ sink.Add(MergeName)->SetTypeAwareness();
+ sink.Add(GetResultName)->SetTypeAwareness();
+ }
+
+ void BuildFunctionTypeInfo(
+ const TStringRef& name,
+ TType* userType,
+ const TStringRef& typeConfig,
+ ui32 flags,
+ IFunctionTypeInfoBuilder& builder) const final
+ {
+ Y_UNUSED(typeConfig);
+
+ try {
+ bool typesOnly = (flags & TFlags::TypesOnly);
+ builder.UserType(userType);
+
+ if (typeConfig.Size() != 2) {
+ builder.SetError(TStringBuilder() << "Invalid type config: " << typeConfig.Data());
+ return;
+ }
+
+ bool hasKey = (typeConfig.Data()[0] == '1');
+ bool isTop = (typeConfig.Data()[1] == '1');
+
+ auto typeHelper = builder.TypeInfoHelper();
+
+ auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType);
+ if (!userTypeInspector || userTypeInspector.GetElementsCount() != 3) {
+ builder.SetError("User type is not a 3-tuple");
+ return;
+ }
+
+ auto valueType = userTypeInspector.GetElementType(2);
+ auto keyType = valueType;
+ auto payloadType = valueType;
+
+ if (hasKey) {
+ auto keyPayloadTypeInspector = TTupleTypeInspector(*typeHelper, valueType);
+ if (!keyPayloadTypeInspector || keyPayloadTypeInspector.GetElementsCount() != 2) {
+ builder.SetError("Key/payload type is not a 2-tuple");
+ return;
+ }
+
+ keyType = keyPayloadTypeInspector.GetElementType(0);
+ payloadType = keyPayloadTypeInspector.GetElementType(1);
+ }
+
+ bool isGeneric = false;
+ ICompare::TPtr compare;
+ TMaybe<EDataSlot> slot;
+
+ auto keyTypeInspector = TDataTypeInspector(*typeHelper, keyType);
+ if (!keyTypeInspector) {
+ isGeneric = true;
+ compare = builder.MakeCompare(keyType);
+ if (!compare) {
+ return;
+ }
+ } else {
+ slot = FindDataSlot(keyTypeInspector.GetTypeId());
+ if (!slot) {
+ builder.SetError("Unknown data type");
+ return;
+ }
+ if (!(GetDataTypeInfo(*slot).Features & NUdf::CanCompare)) {
+ builder.SetError("Data type is not comparable");
+ return;
+ }
+ }
+
+ auto serializedListType = builder.List()->Item(valueType).Build();
+ auto serializedType = builder.Tuple()->Add<ui32>().Add(serializedListType).Build();
+
+ TType* topType = nullptr;
+ if (isGeneric) {
+ PARAMETRIZE_GENERIC(TYPE_GENERIC)
+ } else {
+ PARAMETRIZE(TYPE)
+ }
+
+ if (name == CreateName) {
+ if (hasKey) {
+ builder.Args()->Add(keyType).Add(payloadType).Add<ui32>().Done().Returns(topType);
+ } else {
+ builder.Args()->Add(valueType).Add<ui32>().Done().Returns(topType);
+ }
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ PARAMETRIZE_GENERIC(CREATE_GENERIC)
+ } else {
+ PARAMETRIZE(CREATE)
+ }
+ }
+ builder.IsStrict();
+ }
+
+ if (name == AddValueName) {
+ if (hasKey) {
+ builder.Args()->Add(topType).Add(keyType).Add(payloadType).Done().Returns(topType);
+ } else {
+ builder.Args()->Add(topType).Add(valueType).Done().Returns(topType);
+ }
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ PARAMETRIZE_GENERIC(ADD_VALUE_GENERIC)
+ } else {
+ PARAMETRIZE(ADD_VALUE)
+ }
+ }
+ builder.IsStrict();
+ }
+
+ if (name == SerializeName) {
+ builder.Args()->Add(topType).Done().Returns(serializedType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ PARAMETRIZE_GENERIC(SERIALIZE_GENERIC)
+ } else {
+ PARAMETRIZE(SERIALIZE)
+ }
+ }
+ builder.IsStrict();
+ }
+
+ if (name == DeserializeName) {
+ builder.Args()->Add(serializedType).Done().Returns(topType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ PARAMETRIZE_GENERIC(DESERIALIZE_GENERIC)
+ } else {
+ PARAMETRIZE(DESERIALIZE)
+ }
+ }
+ }
+
+ if (name == MergeName) {
+ builder.Args()->Add(topType).Add(topType).Done().Returns(topType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ PARAMETRIZE_GENERIC(MERGE_GENERIC)
+ } else {
+ PARAMETRIZE(MERGE)
+ }
+ }
+ builder.IsStrict();
+ }
+
+ if (name == GetResultName) {
+ auto listType = builder.List()->Item(payloadType).Build();
+
+ builder.Args()->Add(topType).Done().Returns(listType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ PARAMETRIZE_GENERIC(GET_RESULT_GENERIC)
+ } else {
+ PARAMETRIZE(GET_RESULT)
+ }
+ }
+ builder.IsStrict();
+ }
+
+ } catch (const std::exception& e) {
+ builder.SetError(CurrentExceptionMessage());
+ }
+ }
+};
+
+} // namespace
+
+REGISTER_MODULES(TTopModule)
+
diff --git a/yql/essentials/udfs/common/top/ya.make b/yql/essentials/udfs/common/top/ya.make
new file mode 100644
index 00000000000..5032048e3ba
--- /dev/null
+++ b/yql/essentials/udfs/common/top/ya.make
@@ -0,0 +1,30 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+ FROM_SANDBOX(FILE 7319906274 OUT_NOAUTO libtop_udf.so
+ )
+ END()
+ELSE ()
+YQL_UDF_CONTRIB(top_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+ )
+
+ SRCS(
+ top_udf.cpp
+ )
+
+ PEERDIR(
+ library/cpp/containers/top_keeper
+ )
+
+ END()
+ENDIF ()
+
+
+RECURSE_FOR_TESTS(
+ test
+)
+
diff --git a/yql/essentials/udfs/common/topfreq/static/static_udf.cpp b/yql/essentials/udfs/common/topfreq/static/static_udf.cpp
new file mode 100644
index 00000000000..4075bfa9c2b
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/static/static_udf.cpp
@@ -0,0 +1,10 @@
+#include "topfreq_udf.h"
+
+namespace NYql {
+ namespace NUdf {
+ NUdf::TUniquePtr<NUdf::IUdfModule> CreateTopFreqModule() {
+ return new TTopFreqModule();
+ }
+
+ }
+}
diff --git a/yql/essentials/udfs/common/topfreq/static/topfreq.cpp b/yql/essentials/udfs/common/topfreq/static/topfreq.cpp
new file mode 100644
index 00000000000..c118b52d0a1
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/static/topfreq.cpp
@@ -0,0 +1,213 @@
+#include "topfreq.h"
+#include <cmath>
+#include <algorithm>
+
+using namespace NKikimr;
+using namespace NUdf;
+
+template <typename THash, typename TEquals>
+TTopFreqBase<THash, TEquals>::TTopFreqBase(THash hash, TEquals equals)
+ : Indices_(0, hash, equals)
+{}
+
+template <typename THash, typename TEquals>
+void TTopFreqBase<THash, TEquals>::Init(const TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize) {
+ MinSize_ = minSize;
+ MaxSize_ = maxSize;
+
+ Freqs_.reserve(MaxSize_ + 1);
+ Indices_.reserve(MaxSize_ + 1);
+
+ AddValue(value);
+}
+
+template <typename THash, typename TEquals>
+void TTopFreqBase<THash, TEquals>::Merge(const TTopFreqBase& topFreq1, const TTopFreqBase& topFreq2) {
+ MinSize_ = std::max(topFreq1.MinSize_, topFreq2.MinSize_);
+ MaxSize_ = std::max(topFreq1.MaxSize_, topFreq2.MaxSize_);
+
+ Freqs_.reserve(std::max(MaxSize_ + 1, ui32(topFreq1.Freqs_.size() + topFreq2.Freqs_.size())));
+ Indices_.reserve(MaxSize_ + 1);
+
+ Add(topFreq1);
+ Add(topFreq2);
+}
+
+template <typename THash, typename TEquals>
+void TTopFreqBase<THash, TEquals>::Deserialize(const TUnboxedValuePod& serialized) {
+ MinSize_ = serialized.GetElement(0).Get<ui32>();
+ MaxSize_ = serialized.GetElement(1).Get<ui32>();
+
+ Freqs_.reserve(MaxSize_ + 1);
+ Indices_.reserve(MaxSize_ + 1);
+
+ const auto listIter = serialized.GetElement(2).GetListIterator();
+ for (TUnboxedValue current; listIter.Next(current);) {
+ Update(current.GetElement(1), current.GetElement(0).Get<ui64>());
+ }
+}
+
+template <typename THash, typename TEquals>
+TUnboxedValue TTopFreqBase<THash, TEquals>::Convert(const IValueBuilder* valueBuilder) const {
+ TUnboxedValue* values = nullptr;
+ const auto list = valueBuilder->NewArray(Freqs_.size(), values);
+ for (const auto& item : Freqs_) {
+ TUnboxedValue* items = nullptr;
+ *values++ = valueBuilder->NewArray(2U, items);
+ items[0] = TUnboxedValuePod(item.second);
+ items[1] = item.first;
+ }
+ return list;
+}
+
+template <typename THash, typename TEquals>
+void TTopFreqBase<THash, TEquals>::Add(const TTopFreqBase& otherModeCalc) {
+ for (auto& it : otherModeCalc.Freqs_) {
+ Update(it.first, it.second);
+ }
+
+ TryCompress();
+}
+
+template <typename THash, typename TEquals>
+TUnboxedValue TTopFreqBase<THash, TEquals>::Get(const IValueBuilder* builder, ui32 resultSize) {
+ resultSize = std::min(resultSize, ui32(Freqs_.size()));
+ Compress(resultSize, true);
+ return Convert(builder);
+}
+
+template <typename THash, typename TEquals>
+void TTopFreqBase<THash, TEquals>::AddValue(const TUnboxedValuePod& value) {
+ Update(value, 1);
+ TryCompress();
+}
+
+template <typename THash, typename TEquals>
+void TTopFreqBase<THash, TEquals>::Update(const TUnboxedValuePod& value, ui64 freq) {
+ Freqs_.emplace_back(TUnboxedValuePod(value), freq);
+ auto mapInsertResult = Indices_.emplace(TUnboxedValuePod(value), Freqs_.size() - 1);
+
+ if (!mapInsertResult.second) {
+ Freqs_[mapInsertResult.first->second].second += freq;
+ Freqs_.pop_back();
+ }
+}
+
+template <typename THash, typename TEquals>
+void TTopFreqBase<THash, TEquals>::TryCompress() {
+ auto freqSize = Freqs_.size();
+ if (freqSize > MaxSize_) {
+ Compress(MinSize_);
+ }
+}
+
+template <typename THash, typename TEquals>
+void TTopFreqBase<THash, TEquals>::Compress(ui32 newSize, bool sort) {
+ auto compare = [](const TVectorElement& v1, const TVectorElement& v2) {
+ return v1.second > v2.second;
+ };
+
+ if (sort) {
+ std::sort(Freqs_.begin(), Freqs_.end(), compare);
+ } else {
+ std::nth_element(Freqs_.begin(), Freqs_.begin() + newSize - 1, Freqs_.end(), compare);
+ }
+
+ Indices_.clear();
+ Freqs_.resize(newSize);
+
+ for (ui32 i = 0; i < newSize; i++) {
+ Indices_[Freqs_[i].first] = i;
+ }
+}
+
+template <typename THash, typename TEquals>
+TUnboxedValue TTopFreqBase<THash, TEquals>::Serialize(const IValueBuilder* builder) {
+ if (ui32(Freqs_.size()) > MinSize_) {
+ Compress(MinSize_);
+ }
+
+ TUnboxedValue* items = nullptr;
+ auto tuple = builder->NewArray(3U, items);
+ items[0] = TUnboxedValuePod(MinSize_);
+ items[1] = TUnboxedValuePod(MaxSize_);
+ items[2] = Convert(builder);
+ return tuple;
+}
+
+template <EDataSlot Slot>
+TTopFreqData<Slot>::TTopFreqData(const TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize)
+ : TBase(TUnboxedValueHash<Slot>(), TUnboxedValueEquals<Slot>())
+{
+ TBase::Init(value, minSize, maxSize);
+}
+
+template <EDataSlot Slot>
+TTopFreqData<Slot>::TTopFreqData(const TTopFreqData& topFreq1, const TTopFreqData& topFreq2)
+ : TBase(TUnboxedValueHash<Slot>(), TUnboxedValueEquals<Slot>())
+{
+ TBase::Merge(topFreq1, topFreq2);
+}
+
+template <EDataSlot Slot>
+TTopFreqData<Slot>::TTopFreqData(const TUnboxedValuePod& serialized)
+ : TBase(TUnboxedValueHash<Slot>(), TUnboxedValueEquals<Slot>())
+{
+ TBase::Deserialize(serialized);
+}
+
+template <EDataSlot Slot>
+TUnboxedValue TTopFreqData<Slot>::Serialize(const IValueBuilder* builder) {
+ return TBase::Serialize(builder);
+}
+
+template <EDataSlot Slot>
+TUnboxedValue TTopFreqData<Slot>::Get(const IValueBuilder* builder, ui32 resultSize) {
+ return TBase::Get(builder, resultSize);
+}
+
+template <EDataSlot Slot>
+void TTopFreqData<Slot>::AddValue(const TUnboxedValuePod& value) {
+ TBase::AddValue(value);
+}
+
+#define INSTANCE_FOR(slot, ...) \
+ template class TTopFreqData<EDataSlot::slot>;
+
+UDF_TYPE_ID_MAP(INSTANCE_FOR)
+
+#undef INSTANCE_FOR
+
+TTopFreqGeneric::TTopFreqGeneric(const TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize,
+ IHash::TPtr hash, IEquate::TPtr equate)
+ : TBase(TGenericHash{hash}, TGenericEquals{equate})
+{
+ TBase::Init(value, minSize, maxSize);
+}
+
+TTopFreqGeneric::TTopFreqGeneric(const TTopFreqGeneric& topFreq1, const TTopFreqGeneric& topFreq2,
+ IHash::TPtr hash, IEquate::TPtr equate)
+ : TBase(TGenericHash{hash}, TGenericEquals{equate})
+{
+ TBase::Merge(topFreq1, topFreq2);
+}
+
+TTopFreqGeneric::TTopFreqGeneric(const TUnboxedValuePod& serialized,
+ IHash::TPtr hash, IEquate::TPtr equate)
+ : TBase(TGenericHash{hash}, TGenericEquals{equate})
+{
+ TBase::Deserialize(serialized);
+}
+
+TUnboxedValue TTopFreqGeneric::Serialize(const IValueBuilder* builder) {
+ return TBase::Serialize(builder);
+}
+
+TUnboxedValue TTopFreqGeneric::Get(const IValueBuilder* builder, ui32 resultSize) {
+ return TBase::Get(builder, resultSize);
+}
+
+void TTopFreqGeneric::AddValue(const TUnboxedValuePod& value) {
+ TBase::AddValue(value);
+}
+
diff --git a/yql/essentials/udfs/common/topfreq/static/topfreq.h b/yql/essentials/udfs/common/topfreq/static/topfreq.h
new file mode 100644
index 00000000000..b10574f33f6
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/static/topfreq.h
@@ -0,0 +1,97 @@
+#pragma once
+
+#include <yql/essentials/public/udf/udf_allocator.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/public/udf/udf_type_ops.h>
+
+#include <unordered_map>
+
+template <typename THash, typename TEquals>
+class TTopFreqBase {
+protected:
+ using TUnboxedValuePod = NKikimr::NUdf::TUnboxedValuePod;
+ using TUnboxedValue = NKikimr::NUdf::TUnboxedValue;
+ using IValueBuilder = NKikimr::NUdf::IValueBuilder;
+
+ using TVectorElement = std::pair<TUnboxedValue, ui64>;
+ using TVectorType = std::vector<TVectorElement, NKikimr::NUdf::TStdAllocatorForUdf<TVectorElement>>;
+
+ TVectorType Freqs_;
+ std::unordered_map<TUnboxedValue, ui32, THash, TEquals, NKikimr::NUdf::TStdAllocatorForUdf<std::pair<const TUnboxedValue, ui32>>> Indices_;
+ ui32 MinSize_ = 0;
+ ui32 MaxSize_ = 0;
+
+ void Add(const TTopFreqBase& otherCalc);
+ void Update(const TUnboxedValuePod& key, const ui64 value);
+ void TryCompress();
+ void Compress(ui32 newSize, bool sort = false);
+ TUnboxedValue Convert(const IValueBuilder* valueBuilder) const;
+
+protected:
+ TTopFreqBase(THash hash, TEquals equals);
+
+ void Init(const TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize);
+ void Merge(const TTopFreqBase& TopFreq1, const TTopFreqBase& TopFreq2);
+ void Deserialize(const TUnboxedValuePod& serialized);
+
+ TUnboxedValue Serialize(const IValueBuilder* builder);
+ TUnboxedValue Get(const IValueBuilder* builder, ui32 resultSize);
+ void AddValue(const TUnboxedValuePod& value);
+};
+
+template <NKikimr::NUdf::EDataSlot Slot>
+class TTopFreqData
+ : public TTopFreqBase<
+ NKikimr::NUdf::TUnboxedValueHash<Slot>,
+ NKikimr::NUdf::TUnboxedValueEquals<Slot>>
+{
+public:
+ using TBase = TTopFreqBase<
+ NKikimr::NUdf::TUnboxedValueHash<Slot>,
+ NKikimr::NUdf::TUnboxedValueEquals<Slot>>;
+
+ TTopFreqData(const NKikimr::NUdf::TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize);
+ TTopFreqData(const TTopFreqData& topFreq1, const TTopFreqData& topFreq2);
+ TTopFreqData(const NKikimr::NUdf::TUnboxedValuePod& serialized);
+
+ NKikimr::NUdf::TUnboxedValue Serialize(const NKikimr::NUdf::IValueBuilder* builder);
+ NKikimr::NUdf::TUnboxedValue Get(const NKikimr::NUdf::IValueBuilder* builder, ui32 resultSize);
+ void AddValue(const NKikimr::NUdf::TUnboxedValuePod& value);
+};
+
+struct TGenericHash {
+ NKikimr::NUdf::IHash::TPtr Hash;
+
+ std::size_t operator()(const NKikimr::NUdf::TUnboxedValuePod& value) const {
+ return Hash->Hash(value);
+ }
+};
+
+struct TGenericEquals {
+ NKikimr::NUdf::IEquate::TPtr Equate;
+
+ bool operator()(
+ const NKikimr::NUdf::TUnboxedValuePod& left,
+ const NKikimr::NUdf::TUnboxedValuePod& right) const
+ {
+ return Equate->Equals(left, right);
+ }
+};
+
+class TTopFreqGeneric
+ : public TTopFreqBase<TGenericHash, TGenericEquals>
+{
+public:
+ using TBase = TTopFreqBase<TGenericHash, TGenericEquals>;
+
+ TTopFreqGeneric(const NKikimr::NUdf::TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize,
+ NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate);
+ TTopFreqGeneric(const TTopFreqGeneric& topFreq1, const TTopFreqGeneric& topFreq2,
+ NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate);
+ TTopFreqGeneric(const NKikimr::NUdf::TUnboxedValuePod& serialized,
+ NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate);
+
+ NKikimr::NUdf::TUnboxedValue Serialize(const NKikimr::NUdf::IValueBuilder* builder);
+ NKikimr::NUdf::TUnboxedValue Get(const NKikimr::NUdf::IValueBuilder* builder, ui32 resultSize);
+ void AddValue(const NKikimr::NUdf::TUnboxedValuePod& value);
+};
diff --git a/yql/essentials/udfs/common/topfreq/static/topfreq_udf.h b/yql/essentials/udfs/common/topfreq/static/topfreq_udf.h
new file mode 100644
index 00000000000..d6df05a048f
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/static/topfreq_udf.h
@@ -0,0 +1,393 @@
+#pragma once
+
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_registrator.h>
+#include <yql/essentials/public/udf/udf_type_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_types.h>
+#include "topfreq.h"
+#include <algorithm>
+#include <array>
+
+using namespace NYql;
+using namespace NUdf;
+
+namespace {
+ extern const char TopFreqResourceNameGeneric[] = "TopFreq.TopFreqResource.Generic";
+ class TTopFreqResource:
+ public TBoxedResource<TTopFreqGeneric, TopFreqResourceNameGeneric>
+ {
+ public:
+ template <typename... Args>
+ inline TTopFreqResource(Args&&... args)
+ : TBoxedResource(std::forward<Args>(args)...)
+ {}
+ };
+
+ template <EDataSlot Slot>
+ class TTopFreqResourceData;
+
+ template <EDataSlot Slot>
+ TTopFreqResourceData<Slot>* GetTopFreqResourceData(const TUnboxedValuePod& arg) {
+ TTopFreqResourceData<Slot>::Validate(arg);
+ return static_cast<TTopFreqResourceData<Slot>*>(arg.AsBoxed().Get());
+ }
+
+ TTopFreqResource* GetTopFreqResource(const TUnboxedValuePod& arg) {
+ TTopFreqResource::Validate(arg);
+ return static_cast<TTopFreqResource*>(arg.AsBoxed().Get());
+ }
+
+
+ template <EDataSlot Slot>
+ class TTopFreq_CreateData: public TBoxedValue {
+ private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const {
+ ui32 minSize = args[1].Get<ui32>();
+ return TUnboxedValuePod(new TTopFreqResourceData<Slot>(args[0], minSize, minSize * 2));
+ }
+ };
+
+ class TTopFreq_Create: public TBoxedValue {
+ private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const {
+ ui32 minSize = args[1].Get<ui32>();
+ return TUnboxedValuePod(new TTopFreqResource(args[0], minSize, minSize * 2, Hash_, Equate_));
+ }
+
+ public:
+ TTopFreq_Create(IHash::TPtr hash, IEquate::TPtr equate)
+ : Hash_(hash)
+ , Equate_(equate)
+ {}
+
+ private:
+ IHash::TPtr Hash_;
+ IEquate::TPtr Equate_;
+ };
+
+ template <EDataSlot Slot>
+ class TTopFreq_AddValueData: public TBoxedValue {
+ private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const {
+ const auto topFreq = GetTopFreqResourceData<Slot>(args[0]);
+ topFreq->Get()->AddValue(args[1]);
+ return TUnboxedValuePod(topFreq);
+ }
+ };
+
+ class TTopFreq_AddValue: public TBoxedValue {
+ private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const {
+ const auto topFreq = GetTopFreqResource(args[0]);
+ topFreq->Get()->AddValue(args[1]);
+ return TUnboxedValuePod(topFreq);
+ }
+ };
+
+ template <EDataSlot Slot>
+ class TTopFreq_SerializeData: public TBoxedValue {
+ private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const {
+ return GetTopFreqResourceData<Slot>(args[0])->Get()->Serialize(valueBuilder);
+ }
+ };
+
+ class TTopFreq_Serialize: public TBoxedValue {
+ private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const {
+ return GetTopFreqResource(args[0])->Get()->Serialize(valueBuilder);
+ }
+ };
+
+ template <EDataSlot Slot>
+ class TTopFreq_DeserializeData: public TBoxedValue {
+ private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const {
+ return TUnboxedValuePod(new TTopFreqResourceData<Slot>(args[0]));
+ }
+ };
+
+ class TTopFreq_Deserialize: public TBoxedValue {
+ private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const {
+ return TUnboxedValuePod(new TTopFreqResource(args[0], Hash_, Equate_));
+ }
+
+ public:
+ TTopFreq_Deserialize(IHash::TPtr hash, IEquate::TPtr equate)
+ : Hash_(hash)
+ , Equate_(equate)
+ {}
+
+ private:
+ IHash::TPtr Hash_;
+ IEquate::TPtr Equate_;
+ };
+
+ template <EDataSlot Slot>
+ class TTopFreq_MergeData: public TBoxedValue {
+ private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const {
+ const auto topFreq0 = GetTopFreqResourceData<Slot>(args[0]);
+ const auto topFreq1 = GetTopFreqResourceData<Slot>(args[1]);
+ return TUnboxedValuePod(new TTopFreqResourceData<Slot>(*topFreq0->Get(), *topFreq1->Get()));
+ }
+ };
+
+ class TTopFreq_Merge: public TBoxedValue {
+ private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const {
+ const auto topFreq0 = GetTopFreqResource(args[0]);
+ const auto topFreq1 = GetTopFreqResource(args[1]);
+ return TUnboxedValuePod(new TTopFreqResource(*topFreq0->Get(), *topFreq1->Get(), Hash_, Equate_));
+ }
+
+ public:
+ TTopFreq_Merge(IHash::TPtr hash, IEquate::TPtr equate)
+ : Hash_(hash)
+ , Equate_(equate)
+ {}
+
+ private:
+ IHash::TPtr Hash_;
+ IEquate::TPtr Equate_;
+ };
+
+ template <EDataSlot Slot>
+ class TTopFreq_GetData: public TBoxedValue {
+ private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const {
+ return GetTopFreqResourceData<Slot>(args[0])->Get()->Get(valueBuilder, args[1].Get<ui32>());
+ }
+ };
+
+ class TTopFreq_Get: public TBoxedValue {
+ private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const {
+ return GetTopFreqResource(args[0])->Get()->Get(valueBuilder, args[1].Get<ui32>());
+ }
+ };
+
+
+#define MAKE_RESOURCE(slot, ...) \
+ extern const char TopFreqResourceName##slot[] = "TopFreq.TopFreqResource."#slot; \
+ template <> \
+ class TTopFreqResourceData<EDataSlot::slot>: \
+ public TBoxedResource<TTopFreqData<EDataSlot::slot>, TopFreqResourceName##slot> \
+ { \
+ public: \
+ template <typename... Args> \
+ inline TTopFreqResourceData(Args&&... args) \
+ : TBoxedResource(std::forward<Args>(args)...) \
+ {} \
+ };
+
+ UDF_TYPE_ID_MAP(MAKE_RESOURCE)
+
+#define MAKE_IMPL(operation, slot) \
+ case EDataSlot::slot: \
+ builder.Implementation(new operation<EDataSlot::slot>); \
+ break;
+
+#define MAKE_CREATE(slot, ...) MAKE_IMPL(TTopFreq_CreateData, slot)
+#define MAKE_ADD_VALUE(slot, ...) MAKE_IMPL(TTopFreq_AddValueData, slot)
+#define MAKE_SERIALIZE(slot, ...) MAKE_IMPL(TTopFreq_SerializeData, slot)
+#define MAKE_DESERIALIZE(slot, ...) MAKE_IMPL(TTopFreq_DeserializeData, slot)
+#define MAKE_MERGE(slot, ...) MAKE_IMPL(TTopFreq_MergeData, slot)
+#define MAKE_GET(slot, ...) MAKE_IMPL(TTopFreq_GetData, slot)
+
+#define MAKE_TYPE(slot, ...) \
+ case EDataSlot::slot: \
+ topFreqType = builder.Resource(TopFreqResourceName##slot); \
+ break;
+
+
+ static const auto CreateName = TStringRef::Of("TopFreq_Create");
+ static const auto AddValueName = TStringRef::Of("TopFreq_AddValue");
+ static const auto SerializeName = TStringRef::Of("TopFreq_Serialize");
+ static const auto DeserializeName = TStringRef::Of("TopFreq_Deserialize");
+ static const auto MergeName = TStringRef::Of("TopFreq_Merge");
+ static const auto GetName = TStringRef::Of("TopFreq_Get");
+
+ class TTopFreqModule: public IUdfModule {
+ public:
+ TStringRef Name() const {
+ return TStringRef::Of("TopFreq");
+ }
+
+ void CleanupOnTerminate() const final {
+ }
+
+ void GetAllFunctions(IFunctionsSink& sink) const final {
+ sink.Add(CreateName)->SetTypeAwareness();
+ sink.Add(AddValueName)->SetTypeAwareness();
+ sink.Add(SerializeName)->SetTypeAwareness();
+ sink.Add(DeserializeName)->SetTypeAwareness();
+ sink.Add(MergeName)->SetTypeAwareness();
+ sink.Add(GetName)->SetTypeAwareness();
+ }
+
+ void BuildFunctionTypeInfo(
+ const TStringRef& name,
+ TType* userType,
+ const TStringRef& typeConfig,
+ ui32 flags,
+ IFunctionTypeInfoBuilder& builder) const final
+ {
+ Y_UNUSED(typeConfig);
+
+ try {
+ const bool typesOnly = (flags & TFlags::TypesOnly);
+ builder.UserType(userType);
+
+ auto typeHelper = builder.TypeInfoHelper();
+
+ auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType);
+ if (!userTypeInspector || userTypeInspector.GetElementsCount() != 3) {
+ builder.SetError("User type is not a 3-tuple");
+ return;
+ }
+
+ bool isGeneric = false;
+ IHash::TPtr hash;
+ IEquate::TPtr equate;
+ TMaybe<EDataSlot> slot;
+
+ auto valueType = userTypeInspector.GetElementType(2);
+ auto valueTypeInspector = TDataTypeInspector(*typeHelper, valueType);
+ if (!valueTypeInspector) {
+ isGeneric = true;
+ hash = builder.MakeHash(valueType);
+ equate = builder.MakeEquate(valueType);
+ if (!hash || !equate) {
+ return;
+ }
+ } else {
+ slot = FindDataSlot(valueTypeInspector.GetTypeId());
+ if (!slot) {
+ builder.SetError("Unknown data type");
+ return;
+ }
+ const auto& features = NUdf::GetDataTypeInfo(*slot).Features;
+ if (!(features & NUdf::CanHash) || !(features & NUdf::CanEquate)) {
+ builder.SetError("Data type is not hashable or equatable");
+ return;
+ }
+ }
+
+ auto serializedItemType = builder.Tuple()->Add<ui64>().Add(valueType).Build();
+ auto serializedListType = builder.List()->Item(serializedItemType).Build();
+ auto serializedType = builder.Tuple()->Add<ui32>().Add<ui32>().Add(serializedListType).Build();
+
+ TType* topFreqType = nullptr;
+ if (isGeneric) {
+ topFreqType = builder.Resource(TopFreqResourceNameGeneric);
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_TYPE)
+ }
+ }
+
+ if (name == CreateName) {
+ builder.Args()->Add(valueType).Add<ui32>().Done().Returns(topFreqType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ builder.Implementation(new TTopFreq_Create(hash, equate));
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_CREATE)
+ }
+ }
+ }
+ builder.IsStrict();
+ }
+
+ if (name == AddValueName) {
+ builder.Args()->Add(topFreqType).Add(valueType).Done().Returns(topFreqType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ builder.Implementation(new TTopFreq_AddValue);
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_ADD_VALUE)
+ }
+ }
+ }
+ builder.IsStrict();
+ }
+
+ if (name == MergeName) {
+ builder.Args()->Add(topFreqType).Add(topFreqType).Done().Returns(topFreqType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ builder.Implementation(new TTopFreq_Merge(hash, equate));
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_MERGE)
+ }
+ }
+ }
+ builder.IsStrict();
+ }
+
+ if (name == SerializeName) {
+ builder.Args()->Add(topFreqType).Done().Returns(serializedType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ builder.Implementation(new TTopFreq_Serialize);
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_SERIALIZE)
+ }
+ }
+ }
+ builder.IsStrict();
+ }
+
+ if (name == DeserializeName) {
+ builder.Args()->Add(serializedType).Done().Returns(topFreqType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ builder.Implementation(new TTopFreq_Deserialize(hash, equate));
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_DESERIALIZE)
+ }
+ }
+ }
+ }
+
+ if (name == GetName) {
+ ui32 indexF, indexV;
+ auto itemType = builder.Struct()->AddField<ui64>("Frequency", &indexF).AddField("Value", valueType, &indexV).Build();
+ auto resultType = builder.List()->Item(itemType).Build();
+
+ builder.Args()->Add(topFreqType).Add<ui32>().Done().Returns(resultType);
+
+ if (!typesOnly) {
+ if (isGeneric) {
+ builder.Implementation(new TTopFreq_Get);
+ } else {
+ switch (*slot) {
+ UDF_TYPE_ID_MAP(MAKE_GET)
+ }
+ }
+ }
+ builder.IsStrict();
+ }
+
+ } catch (const std::exception& e) {
+ builder.SetError(CurrentExceptionMessage());
+ }
+ }
+ };
+
+} // namespace
diff --git a/yql/essentials/udfs/common/topfreq/static/ya.make b/yql/essentials/udfs/common/topfreq/static/ya.make
new file mode 100644
index 00000000000..95838f33c49
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/static/ya.make
@@ -0,0 +1,18 @@
+LIBRARY()
+
+YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+)
+
+SRCS(
+ static_udf.cpp
+ topfreq.cpp
+)
+
+PEERDIR(
+ yql/essentials/public/udf
+)
+
+END()
diff --git a/yql/essentials/udfs/common/topfreq/test/canondata/result.json b/yql/essentials/udfs/common/topfreq/test/canondata/result.json
new file mode 100644
index 00000000000..db452a16ce2
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/canondata/result.json
@@ -0,0 +1,27 @@
+{
+ "test.test[Floats]": [
+ {
+ "uri": "file://test.test_Floats_/results.txt"
+ }
+ ],
+ "test.test[Mode]": [
+ {
+ "uri": "file://test.test_Mode_/results.txt"
+ }
+ ],
+ "test.test[TopFreqStruct]": [
+ {
+ "uri": "file://test.test_TopFreqStruct_/results.txt"
+ }
+ ],
+ "test.test[TopFreqTuple]": [
+ {
+ "uri": "file://test.test_TopFreqTuple_/results.txt"
+ }
+ ],
+ "test.test[TopFreq]": [
+ {
+ "uri": "file://test.test_TopFreq_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/topfreq/test/canondata/test.test_Floats_/results.txt b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_Floats_/results.txt
new file mode 100644
index 00000000000..8eac384cff2
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_Floats_/results.txt
@@ -0,0 +1,55 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "Value";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ "2";
+ "-0"
+ ];
+ [
+ "2";
+ "nan"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/topfreq/test/canondata/test.test_Mode_/results.txt b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_Mode_/results.txt
new file mode 100644
index 00000000000..9cd67bf0f96
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_Mode_/results.txt
@@ -0,0 +1,68 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "Value";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "abc";
+ [
+ [
+ "3";
+ "23"
+ ]
+ ]
+ ];
+ [
+ "bbb";
+ [
+ [
+ "3";
+ "37"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreqStruct_/results.txt b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreqStruct_/results.txt
new file mode 100644
index 00000000000..e1cf8558d68
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreqStruct_/results.txt
@@ -0,0 +1,103 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "Value";
+ [
+ "StructType";
+ [
+ [
+ "k";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "v";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ [
+ [
+ "4";
+ [
+ "101";
+ "1"
+ ]
+ ];
+ [
+ "2";
+ [
+ "103";
+ "1"
+ ]
+ ];
+ [
+ "2";
+ [
+ "104";
+ "1"
+ ]
+ ]
+ ]
+ ];
+ [
+ "2";
+ [
+ [
+ "4";
+ [
+ "037";
+ "2"
+ ]
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreqTuple_/results.txt b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreqTuple_/results.txt
new file mode 100644
index 00000000000..c131783fc9d
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreqTuple_/results.txt
@@ -0,0 +1,97 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "Value";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "1";
+ [
+ [
+ "4";
+ [
+ "101";
+ "1"
+ ]
+ ];
+ [
+ "2";
+ [
+ "103";
+ "1"
+ ]
+ ];
+ [
+ "2";
+ [
+ "104";
+ "1"
+ ]
+ ]
+ ]
+ ];
+ [
+ "2";
+ [
+ [
+ "4";
+ [
+ "037";
+ "2"
+ ]
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreq_/results.txt b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreq_/results.txt
new file mode 100644
index 00000000000..c666b42f01d
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreq_/results.txt
@@ -0,0 +1,83 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Frequency";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "Value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "1"
+ ];
+ [
+ [
+ "4";
+ "101"
+ ];
+ [
+ "2";
+ "103"
+ ];
+ [
+ "2";
+ "104"
+ ]
+ ]
+ ];
+ [
+ [
+ "2"
+ ];
+ [
+ [
+ "4";
+ "037"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/topfreq/test/cases/Floats.in b/yql/essentials/udfs/common/topfreq/test/cases/Floats.in
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/cases/Floats.in
diff --git a/yql/essentials/udfs/common/topfreq/test/cases/Floats.sql b/yql/essentials/udfs/common/topfreq/test/cases/Floats.sql
new file mode 100644
index 00000000000..3c2515dc7ee
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/cases/Floats.sql
@@ -0,0 +1,10 @@
+/* syntax version 1 */
+select topfreq(x,10,10) from (
+select frombytes(tobytes(-0.0),Double) as x
+union all
+select frombytes(tobytes(+0.0),Double) as x
+union all
+select Double("nan") as x
+union all
+select Double("nan") as x
+)
diff --git a/yql/essentials/udfs/common/topfreq/test/cases/Mode.in b/yql/essentials/udfs/common/topfreq/test/cases/Mode.in
new file mode 100644
index 00000000000..6cd4d921916
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/cases/Mode.in
@@ -0,0 +1,8 @@
+{"key"="075";"subkey"="1";"value"="abc"};
+{"key"="023";"subkey"="2";"value"="abc"};
+{"key"="023";"subkey"="3";"value"="abc"};
+{"key"="023";"subkey"="4";"value"="abc"};
+{"key"="037";"subkey"="5";"value"="bbb"};
+{"key"="037";"subkey"="6";"value"="bbb"};
+{"key"="200";"subkey"="7";"value"="bbb"};
+{"key"="037";"subkey"="8";"value"="bbb"};
diff --git a/yql/essentials/udfs/common/topfreq/test/cases/Mode.in.attr b/yql/essentials/udfs/common/topfreq/test/cases/Mode.in.attr
new file mode 100644
index 00000000000..2a151e9c475
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/cases/Mode.in.attr
@@ -0,0 +1,30 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/topfreq/test/cases/Mode.sql b/yql/essentials/udfs/common/topfreq/test/cases/Mode.sql
new file mode 100644
index 00000000000..da1ade32e8d
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/cases/Mode.sql
@@ -0,0 +1,14 @@
+/* syntax version 1 */
+USE plato;
+
+SELECT
+ key, Mode(value)
+FROM
+(SELECT
+ cast (key as Int32) as value,
+ "" as subkey,
+ value as key
+FROM Input)
+AS tmp
+GROUP BY key
+ORDER BY key
diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.in b/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.in
new file mode 100644
index 00000000000..b4c81f94c8b
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.in
@@ -0,0 +1,16 @@
+{"key"="101";"subkey"="1";"value"="1"};
+{"key"="101";"subkey"="2";"value"="1"};
+{"key"="101";"subkey"="3";"value"="1"};
+{"key"="101";"subkey"="4";"value"="1"};
+{"key"="103";"subkey"="1";"value"="1"};
+{"key"="103";"subkey"="2";"value"="1"};
+{"key"="104";"subkey"="3";"value"="1"};
+{"key"="104";"subkey"="4";"value"="1"};
+{"key"="102";"subkey"="1";"value"="1"};
+{"key"="105";"subkey"="2";"value"="1"};
+{"key"="106";"subkey"="3";"value"="1"};
+{"key"="107";"subkey"="4";"value"="1"};
+{"key"="037";"subkey"="5";"value"="2"};
+{"key"="037";"subkey"="6";"value"="2"};
+{"key"="037";"subkey"="7";"value"="2"};
+{"key"="037";"subkey"="8";"value"="2"};
diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.in.attr b/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.in.attr
new file mode 100644
index 00000000000..2a151e9c475
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.in.attr
@@ -0,0 +1,30 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.sql b/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.sql
new file mode 100644
index 00000000000..79c412cc875
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.sql
@@ -0,0 +1,14 @@
+/* syntax version 1 */
+USE plato;
+
+SELECT
+ key, TopFreq(value, 3, 5u)
+FROM
+(SELECT
+ key as value,
+ "" as subkey,
+ cast (value as Uint32) as key
+FROM Input)
+AS tmp
+GROUP BY key
+ORDER BY key
diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.in b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.in
new file mode 100644
index 00000000000..b4c81f94c8b
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.in
@@ -0,0 +1,16 @@
+{"key"="101";"subkey"="1";"value"="1"};
+{"key"="101";"subkey"="2";"value"="1"};
+{"key"="101";"subkey"="3";"value"="1"};
+{"key"="101";"subkey"="4";"value"="1"};
+{"key"="103";"subkey"="1";"value"="1"};
+{"key"="103";"subkey"="2";"value"="1"};
+{"key"="104";"subkey"="3";"value"="1"};
+{"key"="104";"subkey"="4";"value"="1"};
+{"key"="102";"subkey"="1";"value"="1"};
+{"key"="105";"subkey"="2";"value"="1"};
+{"key"="106";"subkey"="3";"value"="1"};
+{"key"="107";"subkey"="4";"value"="1"};
+{"key"="037";"subkey"="5";"value"="2"};
+{"key"="037";"subkey"="6";"value"="2"};
+{"key"="037";"subkey"="7";"value"="2"};
+{"key"="037";"subkey"="8";"value"="2"};
diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.in.attr b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.in.attr
new file mode 100644
index 00000000000..2a151e9c475
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.in.attr
@@ -0,0 +1,30 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.sql b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.sql
new file mode 100644
index 00000000000..b188ce2e5d6
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.sql
@@ -0,0 +1,8 @@
+/* syntax version 1 */
+USE plato;
+
+SELECT
+ value, TopFreq(AsStruct(key as k, value as v), 3, 5u)
+FROM Input
+GROUP BY value
+ORDER BY value \ No newline at end of file
diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.in b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.in
new file mode 100644
index 00000000000..b4c81f94c8b
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.in
@@ -0,0 +1,16 @@
+{"key"="101";"subkey"="1";"value"="1"};
+{"key"="101";"subkey"="2";"value"="1"};
+{"key"="101";"subkey"="3";"value"="1"};
+{"key"="101";"subkey"="4";"value"="1"};
+{"key"="103";"subkey"="1";"value"="1"};
+{"key"="103";"subkey"="2";"value"="1"};
+{"key"="104";"subkey"="3";"value"="1"};
+{"key"="104";"subkey"="4";"value"="1"};
+{"key"="102";"subkey"="1";"value"="1"};
+{"key"="105";"subkey"="2";"value"="1"};
+{"key"="106";"subkey"="3";"value"="1"};
+{"key"="107";"subkey"="4";"value"="1"};
+{"key"="037";"subkey"="5";"value"="2"};
+{"key"="037";"subkey"="6";"value"="2"};
+{"key"="037";"subkey"="7";"value"="2"};
+{"key"="037";"subkey"="8";"value"="2"};
diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.in.attr b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.in.attr
new file mode 100644
index 00000000000..2a151e9c475
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.in.attr
@@ -0,0 +1,30 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ }
+}
diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.sql b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.sql
new file mode 100644
index 00000000000..72dd648f0b1
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.sql
@@ -0,0 +1,8 @@
+/* syntax version 1 */
+USE plato;
+
+SELECT
+ value, TopFreq(AsTuple(key, value), 3, 5u)
+FROM Input
+GROUP BY value
+ORDER BY value \ No newline at end of file
diff --git a/yql/essentials/udfs/common/topfreq/test/ya.make b/yql/essentials/udfs/common/topfreq/test/ya.make
new file mode 100644
index 00000000000..1ba3eac0d98
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/topfreq)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/topfreq/topfreq_udf.cpp b/yql/essentials/udfs/common/topfreq/topfreq_udf.cpp
new file mode 100644
index 00000000000..7107f2bd006
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/topfreq_udf.cpp
@@ -0,0 +1,3 @@
+#include "static/topfreq_udf.h"
+
+REGISTER_MODULES(TTopFreqModule)
diff --git a/yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp b/yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp
new file mode 100644
index 00000000000..9ce7b8561fb
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp
@@ -0,0 +1,451 @@
+#include <util/random/shuffle.h>
+#include <library/cpp/testing/unittest/registar.h>
+#include <yql/essentials/minikql/mkql_function_registry.h>
+#include <yql/essentials/minikql/mkql_program_builder.h>
+#include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h>
+#include <yql/essentials/minikql/computation/mkql_computation_node.h>
+#include <yql/essentials/minikql/comp_nodes/mkql_factories.h>
+#include <util/random/random.h>
+#include <array>
+#include <yql/essentials/udfs/common/topfreq/static/topfreq_udf.h>
+
+namespace NYql {
+ using namespace NKikimr::NMiniKQL;
+ namespace NUdf {
+ extern NUdf::TUniquePtr<NUdf::IUdfModule> CreateTopFreqModule();
+ }
+
+ class TSetup {
+ public:
+ TSetup()
+ : MutableFunctionRegistry_(CreateFunctionRegistry(CreateBuiltinRegistry())->Clone())
+ , RandomProvider_(CreateDeterministicRandomProvider(1))
+ , TimeProvider_(CreateDeterministicTimeProvider(10000000))
+ , Alloc_(__LOCATION__)
+ , Env_(Alloc_)
+ {
+ MutableFunctionRegistry_->AddModule("", "TopFreq", NUdf::CreateTopFreqModule());
+ PgmBuidler_.Reset(new TProgramBuilder(Env_, *MutableFunctionRegistry_));
+ }
+
+ TProgramBuilder& GetProgramBuilder() {
+ return *PgmBuidler_.Get();
+ }
+
+ NUdf::TUnboxedValue GetValue(TRuntimeNode& node) {
+ Explorer_.Walk(node.GetNode(), Env_);
+
+ TComputationPatternOpts opts(Alloc_.Ref(), Env_, GetBuiltinFactory(),
+ MutableFunctionRegistry_.Get(),
+ NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi);
+ Pattern_ = MakeComputationPattern(Explorer_, node, {}, opts);
+ Graph_ = Pattern_->Clone(opts.ToComputationOptions(*RandomProvider_, *TimeProvider_));
+
+ return Graph_->GetValue();
+ }
+
+ private:
+ using IMutableFunctionRegistryPtr = TIntrusivePtr<IMutableFunctionRegistry>;
+ using IRandomProviderPtr = TIntrusivePtr<IRandomProvider>;
+ using ITimeProviderPtr = TIntrusivePtr<ITimeProvider>;
+
+ IMutableFunctionRegistryPtr MutableFunctionRegistry_;
+ IRandomProviderPtr RandomProvider_;
+ ITimeProviderPtr TimeProvider_;
+ TScopedAlloc Alloc_;
+ TTypeEnvironment Env_;
+ THolder<TProgramBuilder> PgmBuidler_;
+ IComputationPattern::TPtr Pattern_;
+ THolder<IComputationGraph> Graph_;
+ TExploringNodeVisitor Explorer_;
+ };
+
+ Y_UNIT_TEST_SUITE(TUDFTopFreqTest) {
+ Y_UNIT_TEST(SimpleTopFreq) {
+ TSetup setup;
+ TProgramBuilder& pgmBuilder = setup.GetProgramBuilder();
+
+ const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<i32>::Id);
+ const auto emptyStructType = pgmBuilder.NewEmptyStructType();
+ const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Int32");
+ const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id);
+
+ const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type});
+ const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType});
+ auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType);
+
+ auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType});
+ auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType});
+ auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType);
+
+ auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type});
+ auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType});
+ auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType);
+
+ TRuntimeNode pgmTopFreq;
+ {
+ auto val = pgmBuilder.NewDataLiteral<i32>(3);
+ auto param = pgmBuilder.NewDataLiteral<ui32>(10);
+
+ TVector<TRuntimeNode> params = {val, param};
+ pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params);
+ }
+
+ for (int n = 0; n < 9; n++) {
+ auto value = pgmBuilder.NewDataLiteral<i32>(1);
+ TVector<TRuntimeNode> params = {pgmTopFreq, value};
+ pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params);
+ }
+
+ for (int n = 0; n < 7; n++) {
+ auto value = pgmBuilder.NewDataLiteral<i32>(4);
+ TVector<TRuntimeNode> params = {pgmTopFreq, value};
+ pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params);
+ }
+
+ TRuntimeNode pgmReturn;
+ {
+ auto param = pgmBuilder.NewDataLiteral<ui32>(4);
+ TVector<TRuntimeNode> params = {pgmTopFreq, param};
+ pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params);
+ }
+
+ auto value = setup.GetValue(pgmReturn);
+
+ auto listIterator = value.GetListIterator();
+
+ TUnboxedValue item;
+
+ UNIT_ASSERT(listIterator.Next(item));
+ UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 1);
+ UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 9);
+
+ UNIT_ASSERT(listIterator.Next(item));
+ UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 4);
+ UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 7);
+
+ UNIT_ASSERT(listIterator.Next(item));
+ UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 3);
+ UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 1);
+
+ UNIT_ASSERT(!listIterator.Next(item));
+ }
+
+ Y_UNIT_TEST(MergingTopFreq) {
+ TSetup setup;
+ TProgramBuilder& pgmBuilder = setup.GetProgramBuilder();
+
+ const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id);
+ const auto emptyStructType = pgmBuilder.NewEmptyStructType();
+ const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Uint64");
+ const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id);
+
+ const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type});
+ const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType});
+ auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType);
+
+ auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType});
+ auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType});
+ auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType);
+
+ auto mergeArgsType = pgmBuilder.NewTupleType({resourceType, resourceType});
+ auto mergeUserType = pgmBuilder.NewTupleType({mergeArgsType, emptyStructType, valueType});
+ auto udfTopFreq_Merge = pgmBuilder.Udf("TopFreq.TopFreq_Merge", TRuntimeNode(), mergeUserType);
+
+ auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type});
+ auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType});
+ auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType);
+
+ TRuntimeNode pgmTopFreq;
+ {
+ auto value = pgmBuilder.NewDataLiteral<ui64>(1);
+ auto param = pgmBuilder.NewDataLiteral<ui32>(1);
+ TVector<TRuntimeNode> params = {value, param};
+ pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params);
+ }
+
+ for (int n = 0; n < 1; n++) {
+ auto value = pgmBuilder.NewDataLiteral<ui64>(1);
+ TVector<TRuntimeNode> params = {pgmTopFreq, value};
+ pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params);
+ }
+
+ for (int n = 0; n < 4; n++) {
+ auto value = pgmBuilder.NewDataLiteral<ui64>(5);
+ TVector<TRuntimeNode> params = {pgmTopFreq, value};
+ pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params);
+ }
+
+ for (int n = 0; n < 1; n++) {
+ auto value = pgmBuilder.NewDataLiteral<ui64>(3);
+ TVector<TRuntimeNode> params = {pgmTopFreq, value};
+ pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params);
+ }
+
+ TRuntimeNode pgmTopFreq2;
+ {
+ auto value = pgmBuilder.NewDataLiteral<ui64>(1);
+ auto param = pgmBuilder.NewDataLiteral<ui32>(1);
+ TVector<TRuntimeNode> params = {value, param};
+ pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_Create, params);
+ }
+
+ for (int n = 0; n < 5; n++) {
+ auto value = pgmBuilder.NewDataLiteral<ui64>(1);
+ TVector<TRuntimeNode> params = {pgmTopFreq2, value};
+ pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_AddValue, params);
+ }
+
+ for (int n = 0; n < 5; n++) {
+ auto value = pgmBuilder.NewDataLiteral<ui64>(5);
+ TVector<TRuntimeNode> params = {pgmTopFreq2, value};
+ pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_AddValue, params);
+ }
+
+ TRuntimeNode pgmTopFreq3;
+ {
+ TVector<TRuntimeNode> params = {pgmTopFreq, pgmTopFreq2};
+ pgmTopFreq3 = pgmBuilder.Apply(udfTopFreq_Merge, params);
+ }
+
+ TRuntimeNode pgmReturn;
+ {
+ auto param = pgmBuilder.NewDataLiteral<ui32>(1);
+ TVector<TRuntimeNode> params = {pgmTopFreq3, param};
+ pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params);
+ }
+
+ auto value = setup.GetValue(pgmReturn);
+
+ auto listIterator = value.GetListIterator();
+
+ TUnboxedValue item;
+
+ UNIT_ASSERT(listIterator.Next(item));
+ UNIT_ASSERT_EQUAL(item.GetElement(1).Get<ui64>(), 5);
+ UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 9);
+
+ UNIT_ASSERT(!listIterator.Next(item));
+ }
+
+ Y_UNIT_TEST(SerializedTopFreq) {
+ TSetup setup;
+ TProgramBuilder& pgmBuilder = setup.GetProgramBuilder();
+
+ const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<bool>::Id);
+ const auto emptyStructType = pgmBuilder.NewEmptyStructType();
+ const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Bool");
+ const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id);
+ const auto ui64Type = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id);
+
+ const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type});
+ const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType});
+ auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType);
+
+ auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType});
+ auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType});
+ auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType);
+
+ auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type});
+ auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType});
+ auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType);
+
+ auto serializeArgsType = pgmBuilder.NewTupleType({resourceType});
+ auto serializeUserType = pgmBuilder.NewTupleType({serializeArgsType, emptyStructType, valueType});
+ auto udfTopFreq_Serialize = pgmBuilder.Udf("TopFreq.TopFreq_Serialize", TRuntimeNode(), serializeUserType);
+
+ auto serializedType = pgmBuilder.NewTupleType({ui32Type, ui32Type,
+ pgmBuilder.NewListType(pgmBuilder.NewTupleType({ui64Type, valueType}))});
+
+ auto deserializeArgsType = pgmBuilder.NewTupleType({serializedType});
+ auto deserializeUserType = pgmBuilder.NewTupleType({deserializeArgsType, emptyStructType, valueType});
+ auto udfTopFreq_Deserialize = pgmBuilder.Udf("TopFreq.TopFreq_Deserialize", TRuntimeNode(), deserializeUserType);
+
+ TRuntimeNode pgmTopFreq;
+ {
+ auto value = pgmBuilder.NewDataLiteral<bool>(true);
+ auto param = pgmBuilder.NewDataLiteral<ui32>(10);
+ TVector<TRuntimeNode> params = {value, param};
+ pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params);
+ }
+
+ for (int n = 0; n < 7; n++) {
+ auto value = pgmBuilder.NewDataLiteral<bool>(true);
+ TVector<TRuntimeNode> params = {pgmTopFreq, value};
+ pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params);
+ }
+
+ for (int n = 0; n < 10; n++) {
+ auto value = pgmBuilder.NewDataLiteral<bool>(false);
+ TVector<TRuntimeNode> params = {pgmTopFreq, value};
+ pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params);
+ }
+
+ TRuntimeNode pgmSerializedTopFreq;
+ {
+ TVector<TRuntimeNode> params = {pgmTopFreq};
+ pgmSerializedTopFreq = pgmBuilder.Apply(udfTopFreq_Serialize, params);
+ }
+
+ TRuntimeNode pgmDeserializedTopFreq;
+ {
+ TVector<TRuntimeNode> params = {pgmSerializedTopFreq};
+ pgmDeserializedTopFreq = pgmBuilder.Apply(udfTopFreq_Deserialize, params);
+ }
+
+ TRuntimeNode pgmReturn;
+ {
+ auto param = pgmBuilder.NewDataLiteral<ui32>(3);
+ TVector<TRuntimeNode> params = {pgmDeserializedTopFreq, param};
+ pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params);
+ }
+
+ auto value = setup.GetValue(pgmReturn);
+
+ auto listIterator = value.GetListIterator();
+
+ TUnboxedValue item;
+
+ UNIT_ASSERT(listIterator.Next(item));
+ UNIT_ASSERT_EQUAL(item.GetElement(1).Get<bool>(), false);
+ UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 10);
+
+ UNIT_ASSERT(listIterator.Next(item));
+ UNIT_ASSERT_EQUAL(item.GetElement(1).Get<bool>(), true);
+ UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 8);
+
+ UNIT_ASSERT(!listIterator.Next(item));
+ }
+
+ Y_UNIT_TEST(ApproxTopFreq) {
+ TSetup setup;
+ TProgramBuilder& pgmBuilder = setup.GetProgramBuilder();
+
+ const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id);
+ const auto emptyStructType = pgmBuilder.NewEmptyStructType();
+ const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Uint64");
+ const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id);
+ const auto ui64Type = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id);
+
+ const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type});
+ const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType});
+ auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType);
+
+ auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType});
+ auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType});
+ auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType);
+
+ auto mergeArgsType = pgmBuilder.NewTupleType({resourceType, resourceType});
+ auto mergeUserType = pgmBuilder.NewTupleType({mergeArgsType, emptyStructType, valueType});
+ auto udfTopFreq_Merge = pgmBuilder.Udf("TopFreq.TopFreq_Merge", TRuntimeNode(), mergeUserType);
+
+ auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type});
+ auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType});
+ auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType);
+
+ auto serializeArgsType = pgmBuilder.NewTupleType({resourceType});
+ auto serializeUserType = pgmBuilder.NewTupleType({serializeArgsType, emptyStructType, valueType});
+ auto udfTopFreq_Serialize = pgmBuilder.Udf("TopFreq.TopFreq_Serialize", TRuntimeNode(), serializeUserType);
+
+ auto serializedType = pgmBuilder.NewTupleType({ui32Type, ui32Type,
+ pgmBuilder.NewListType(pgmBuilder.NewTupleType({ui64Type, valueType}))});
+
+ auto deserializeArgsType = pgmBuilder.NewTupleType({serializedType});
+ auto deserializeUserType = pgmBuilder.NewTupleType({deserializeArgsType, emptyStructType, valueType});
+ auto udfTopFreq_Deserialize = pgmBuilder.Udf("TopFreq.TopFreq_Deserialize", TRuntimeNode(), deserializeUserType);
+
+ static const ui64 BigNum = 20;
+ static const ui64 BigEach = 5000;
+ static const ui64 SmallNum = 500;
+ static const ui64 SmallEach = 20;
+ static const ui64 Total = BigNum * BigEach + SmallNum * SmallEach;
+ static const i32 AskFor = 25;
+ static const ui64 BlockSize = 200;
+ static const ui64 BlockCount = 10;
+ static const i32 WorksIfAtLeast = 15;
+
+ std::array<ui64, Total> values;
+ std::array<TRuntimeNode, BlockCount> pgmTopFreqs;
+
+ i32 curIndex = 0;
+ for (ui64 i = 1; i <= BigNum; i++) {
+ for (ui64 j = 0; j < BigEach; j++) {
+ values[curIndex++] = i;
+ }
+ }
+
+ for (ui64 i = BigNum + 1; i <= BigNum + SmallNum; i++) {
+ for (ui64 j = 0; j < SmallEach; j++) {
+ values[curIndex++] = i;
+ }
+ }
+
+ Shuffle(values.begin(), values.end());
+
+ TVector<TRuntimeNode> params;
+ TRuntimeNode param;
+ TRuntimeNode pgmvalue;
+
+ for (ui64 i = 0; i < BlockCount; i++) {
+ {
+ pgmvalue = pgmBuilder.NewDataLiteral<ui64>(values[i * BlockSize]);
+ param = pgmBuilder.NewDataLiteral<ui32>(AskFor);
+ params = {pgmvalue, param};
+ pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Create, params);
+ }
+
+ for (ui64 j = i * BlockSize + 1; j < (i + 1) * BlockSize; j++) {
+ pgmvalue = pgmBuilder.NewDataLiteral<ui64>(values[j]);
+ params = {pgmTopFreqs[i], pgmvalue};
+ pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_AddValue, params);
+ }
+
+ {
+ params = {pgmTopFreqs[i]};
+ pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Serialize, params);
+ }
+ }
+
+ TRuntimeNode pgmMainTopFreq;
+ {
+ pgmvalue = pgmBuilder.NewDataLiteral<ui64>(Total + 2);
+ param = pgmBuilder.NewDataLiteral<ui32>(AskFor);
+ params = {pgmvalue, param};
+ pgmMainTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params);
+ }
+
+ for (ui64 i = 0; i < BlockCount; i++) {
+ params = {pgmTopFreqs[i]};
+ pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Deserialize, params);
+
+ params = {pgmMainTopFreq, pgmTopFreqs[i]};
+ pgmMainTopFreq = pgmBuilder.Apply(udfTopFreq_Merge, params);
+ }
+
+ TRuntimeNode pgmReturn;
+ {
+ param = pgmBuilder.NewDataLiteral<ui32>(AskFor);
+ params = {pgmMainTopFreq, param};
+ pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params);
+ }
+
+ auto value = setup.GetValue(pgmReturn);
+
+ auto listIterator = value.GetListIterator();
+
+ ui32 found = 0;
+
+ for (ui64 i = 0; i < AskFor; i++) {
+ TUnboxedValue item;
+
+ UNIT_ASSERT(listIterator.Next(item));
+ ui64 current = item.GetElement(1).Get<ui64>();
+ if (current <= BigNum)
+ found++;
+ }
+
+ UNIT_ASSERT(!listIterator.Skip());
+ UNIT_ASSERT(found >= WorksIfAtLeast);
+ }
+ }
+}
diff --git a/yql/essentials/udfs/common/topfreq/ut/ya.make b/yql/essentials/udfs/common/topfreq/ut/ya.make
new file mode 100644
index 00000000000..702e9a7214a
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/ut/ya.make
@@ -0,0 +1,15 @@
+UNITTEST_FOR(yql/essentials/udfs/common/topfreq/static)
+
+SRCS(
+ ../topfreq_udf_ut.cpp
+)
+
+PEERDIR(
+ yql/essentials/minikql/comp_nodes/llvm14
+ yql/essentials/public/udf/service/exception_policy
+ yql/essentials/sql/pg_dummy
+)
+
+YQL_LAST_ABI_VERSION()
+
+END()
diff --git a/yql/essentials/udfs/common/topfreq/ya.make b/yql/essentials/udfs/common/topfreq/ya.make
new file mode 100644
index 00000000000..44ec1309ed3
--- /dev/null
+++ b/yql/essentials/udfs/common/topfreq/ya.make
@@ -0,0 +1,32 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+ FROM_SANDBOX(FILE 7319906760 OUT_NOAUTO libtopfreq_udf.so
+ )
+ END()
+ELSE ()
+YQL_UDF_CONTRIB(topfreq_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+ )
+
+ SRCS(
+ topfreq_udf.cpp
+ )
+
+ PEERDIR(
+ yql/essentials/udfs/common/topfreq/static
+ )
+
+ END()
+ENDIF ()
+
+
+RECURSE_FOR_TESTS(
+ test
+ ut
+)
+
+
diff --git a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp
new file mode 100644
index 00000000000..3e90765e405
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp
@@ -0,0 +1 @@
+#include "unicode_base_udf.h" \ No newline at end of file
diff --git a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h
new file mode 100644
index 00000000000..6cbf4b493ad
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h
@@ -0,0 +1,534 @@
+#pragma once
+
+#include <yql/essentials/public/udf/udf_allocator.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/utils/utf8.h>
+
+#include <library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h>
+#include <library/cpp/unicode/normalization/normalization.h>
+#include <library/cpp/unicode/set/unicode_set.h>
+
+#include <library/cpp/deprecated/split/split_iterator.h>
+#include <util/string/join.h>
+#include <util/string/reverse.h>
+#include <util/string/split.h>
+#include <util/string/subst.h>
+#include <util/charset/wide.h>
+#include <util/charset/utf8.h>
+#include <util/string/strip.h>
+#include <util/string/ascii.h>
+#include <util/charset/unidata.h>
+
+using namespace NYql;
+using namespace NUdf;
+using namespace NUnicode;
+
+namespace {
+
+ template <class It>
+ struct TIsUnicodeSpaceAdapter {
+ bool operator()(const It& it) const noexcept {
+ return IsSpace(*it);
+ }
+ };
+
+ template <class It>
+ TIsUnicodeSpaceAdapter<It> IsUnicodeSpaceAdapter(It) {
+ return {};
+ }
+
+#define NORMALIZE_UDF_MAP(XX) \
+ XX(Normalize, NFC) \
+ XX(NormalizeNFD, NFD) \
+ XX(NormalizeNFC, NFC) \
+ XX(NormalizeNFKD, NFKD) \
+ XX(NormalizeNFKC, NFKC)
+
+#define IS_CATEGORY_UDF_MAP(XX) \
+ XX(IsAscii, IsAscii) \
+ XX(IsSpace, IsSpace) \
+ XX(IsUpper, IsUpper) \
+ XX(IsLower, IsLower) \
+ XX(IsDigit, IsDigit) \
+ XX(IsAlpha, IsAlpha) \
+ XX(IsAlnum, IsAlnum) \
+ XX(IsHex, IsHexdigit)
+
+#define NORMALIZE_UDF(name, mode) \
+ SIMPLE_UDF(T##name, TUtf8(TAutoMap<TUtf8>)) { \
+ const auto& inputRef = args[0].AsStringRef(); \
+ const TUtf16String& input = UTF8ToWide(inputRef.Data(), inputRef.Size()); \
+ const TString& output = WideToUTF8(Normalize<mode>(input)); \
+ return valueBuilder->NewString(output); \
+ }
+
+#define IS_CATEGORY_UDF(udfName, function) \
+ SIMPLE_UDF(T##udfName, bool(TAutoMap<TUtf8>)) { \
+ Y_UNUSED(valueBuilder); \
+ const TStringBuf input(args[0].AsStringRef()); \
+ bool result = true; \
+ wchar32 rune; \
+ const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin()); \
+ const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end()); \
+ while (cur != last) { \
+ ReadUTF8CharAndAdvance(rune, cur, last); \
+ if (!function(rune)) { \
+ result = false; \
+ break; \
+ } \
+ } \
+ return TUnboxedValuePod(result); \
+ }
+
+ NORMALIZE_UDF_MAP(NORMALIZE_UDF)
+ IS_CATEGORY_UDF_MAP(IS_CATEGORY_UDF)
+
+ SIMPLE_UDF(TIsUtf, bool(TOptional<char*>)) {
+ Y_UNUSED(valueBuilder);
+ if (args[0]) {
+ return TUnboxedValuePod(IsUtf8(args[0].AsStringRef()));
+ } else {
+ return TUnboxedValuePod(false);
+ }
+ }
+
+ SIMPLE_UDF(TGetLength, ui64(TAutoMap<TUtf8>)) {
+ Y_UNUSED(valueBuilder);
+ const auto& inputRef = args[0].AsStringRef();
+ size_t result;
+ GetNumberOfUTF8Chars(inputRef.Data(), inputRef.Size(), result);
+ return TUnboxedValuePod(static_cast<ui64>(result));
+ }
+
+ SIMPLE_UDF_WITH_OPTIONAL_ARGS(TToUint64, ui64(TAutoMap<TUtf8>, TOptional<ui16>), 1) {
+ Y_UNUSED(valueBuilder);
+ const TString inputStr(args[0].AsStringRef());
+ const char* input = inputStr.data();
+ const int base = static_cast<int>(args[1].GetOrDefault<ui16>(0));
+ char* pos = nullptr;
+ unsigned long long res = std::strtoull(input, &pos, base);
+ ui64 ret = static_cast<ui64>(res);
+ if (!res && pos == input) {
+ UdfTerminate("Input string is not a number");
+ } else if ((res == ULLONG_MAX && errno == ERANGE) || ret != res) {
+ UdfTerminate("Converted value falls out of Uint64 range");
+ } else if (*pos) {
+ UdfTerminate("Input string contains junk after the number");
+ }
+ return TUnboxedValuePod(ret);
+ }
+
+ SIMPLE_UDF_WITH_OPTIONAL_ARGS(TTryToUint64, TOptional<ui64>(TAutoMap<TUtf8>, TOptional<ui16>), 1) {
+ Y_UNUSED(valueBuilder);
+ const TString inputStr(args[0].AsStringRef());
+ const char* input = inputStr.data();
+ const int base = static_cast<int>(args[1].GetOrDefault<ui16>(0));
+ char* pos = nullptr;
+ unsigned long long res = std::strtoull(input, &pos, base);
+ ui64 ret = static_cast<ui64>(res);
+ if (!res && pos == input) {
+ return TUnboxedValuePod();
+ }
+ if ((res == ULLONG_MAX && errno == ERANGE) || ret != res) {
+ return TUnboxedValuePod();
+ }
+ if (*pos) {
+ return TUnboxedValuePod();
+ }
+ return TUnboxedValuePod(ret);
+ }
+
+ SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSubstring, TUtf8(TAutoMap<TUtf8>, TOptional<ui64>, TOptional<ui64>), 1) {
+ const TStringBuf input(args[0].AsStringRef());
+ size_t from = args[1].GetOrDefault<ui64>(0);
+ size_t len = !args[2] ? TStringBuf::npos : size_t(args[2].Get<ui64>());
+ return valueBuilder->NewString(SubstrUTF8(input, from, len));
+ }
+
+ SIMPLE_UDF_WITH_OPTIONAL_ARGS(TFind, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), 1) {
+ Y_UNUSED(valueBuilder);
+ const std::string_view string(args[0].AsStringRef());
+ const std::string_view needle(args[1].AsStringRef());
+ std::string_view::size_type pos = 0U;
+
+ if (auto p = args[2].GetOrDefault<ui64>(0ULL)) {
+ for (auto ptr = string.data(); p && pos < string.size(); --p) {
+ const auto width = WideCharSize(*ptr);
+ pos += width;
+ ptr += width;
+ }
+ }
+
+ if (const auto find = string.find(needle, pos); std::string_view::npos != find) {
+ size_t result;
+ GetNumberOfUTF8Chars(string.data(), find, result);
+ return TUnboxedValuePod(static_cast<ui64>(result));
+ }
+ return TUnboxedValuePod();
+ }
+
+ SIMPLE_UDF_WITH_OPTIONAL_ARGS(TRFind, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), 1) {
+ Y_UNUSED(valueBuilder);
+ const std::string_view string(args[0].AsStringRef());
+ const std::string_view needle(args[1].AsStringRef());
+ std::string_view::size_type pos = std::string_view::npos;
+
+ if (auto p = args[2].GetOrDefault<ui64>(std::string_view::npos); std::string_view::npos != p) {
+ pos = 0ULL;
+ for (auto ptr = string.data(); p && pos < string.size(); --p) {
+ const auto width = WideCharSize(*ptr);
+ pos += width;
+ ptr += width;
+ }
+ }
+
+ if (const auto find = string.rfind(needle, pos); std::string_view::npos != find) {
+ size_t result;
+ GetNumberOfUTF8Chars(string.data(), find, result);
+ return TUnboxedValuePod(static_cast<ui64>(result));
+ }
+ return TUnboxedValuePod();
+ }
+
+ using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>;
+
+ template <typename TIt>
+ static void SplitToListImpl(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValue& input,
+ const std::string_view::const_iterator from,
+ const TIt& it,
+ TTmpVector& result) {
+ for (const auto& elem : it) {
+ result.emplace_back(valueBuilder->SubString(input, std::distance(from, elem.TokenStart()), std::distance(elem.TokenStart(), elem.TokenDelim())));
+ }
+ }
+
+ template <typename TIt>
+ static void SplitToListImpl(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValue& input,
+ const TUtf16String::const_iterator start,
+ const TIt& it,
+ TTmpVector& result) {
+ const std::string_view& original = input.AsStringRef();
+ size_t charPos = 0U, bytePos = 0U;
+ for (const auto& elem : it) {
+ for (const size_t next = std::distance(start, elem.TokenStart()); charPos < next; ++charPos)
+ bytePos += WideCharSize(original[bytePos]);
+ const auto from = bytePos;
+
+ for (const size_t next = charPos + std::distance(elem.TokenStart(), elem.TokenDelim()); charPos < next; ++charPos)
+ bytePos += WideCharSize(original[bytePos]);
+ const auto size = bytePos - from;
+ result.emplace_back(valueBuilder->SubString(input, from, size));
+ }
+ }
+
+ template <typename TIt, typename TStrIt>
+ static void SplitToListImpl(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValue& input,
+ const TStrIt from,
+ TIt& it,
+ bool skipEmpty,
+ TTmpVector& result) {
+ if (skipEmpty) {
+ SplitToListImpl(valueBuilder, input, from, it.SkipEmpty(), result);
+ } else {
+ SplitToListImpl(valueBuilder, input, from, it, result);
+ }
+ }
+
+ constexpr char delimeterStringName[] = "DelimeterString";
+ constexpr char skipEmptyName[] = "SkipEmpty";
+ constexpr char limitName[] = "Limit";
+ using TDelimeterStringArg = TNamedArg<bool, delimeterStringName>;
+ using TSkipEmptyArg = TNamedArg<bool, skipEmptyName>;
+ using TLimitArg = TNamedArg<ui64, limitName>;
+
+ SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSplitToList, TListType<TUtf8>(
+ TOptional<TUtf8>,
+ TUtf8,
+ TDelimeterStringArg,
+ TSkipEmptyArg,
+ TLimitArg
+ ),
+ 3) {
+ TTmpVector result;
+ if (args[0]) {
+ const bool delimiterString = args[2].GetOrDefault<bool>(true);
+ const bool skipEmpty = args[3].GetOrDefault<bool>(false);
+ const auto limit = args[4].GetOrDefault<ui64>(0);
+ if (delimiterString) {
+ const std::string_view input(args[0].AsStringRef());
+ const std::string_view delimeter(args[1].AsStringRef());
+ if (limit) {
+ auto it = StringSplitter(input).SplitByString(delimeter).Limit(limit + 1);
+ SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result);
+ } else {
+ auto it = StringSplitter(input).SplitByString(delimeter);
+ SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result);
+ }
+ } else {
+ const auto& input = UTF8ToWide(args[0].AsStringRef());
+ const auto& delimeter = UTF8ToWide(args[1].AsStringRef());
+ if (limit) {
+ auto it = StringSplitter(input).SplitBySet(delimeter.c_str()).Limit(limit + 1);
+ SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result);
+ } else {
+ auto it = StringSplitter(input).SplitBySet(delimeter.c_str());
+ SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result);
+ }
+ }
+ }
+ return valueBuilder->NewList(result.data(), result.size());
+ }
+
+ SIMPLE_UDF(TJoinFromList, TUtf8(TAutoMap<TListType<TOptional<TUtf8>>>, TUtf8)) {
+ const auto input = args[0].GetListIterator();
+ const std::string_view delimeter(args[1].AsStringRef());
+ std::vector<TString> items;
+
+ for (TUnboxedValue current; input.Next(current);) {
+ if (current) {
+ items.emplace_back(current.AsStringRef());
+ }
+ }
+
+ return valueBuilder->NewString(JoinSeq(delimeter, items));
+ }
+
+ SIMPLE_UDF(TLevensteinDistance, ui64(TAutoMap<TUtf8>, TAutoMap<TUtf8>)) {
+ Y_UNUSED(valueBuilder);
+ const TStringBuf left(args[0].AsStringRef());
+ const TStringBuf right(args[1].AsStringRef());
+ const TUtf16String& leftWide = UTF8ToWide(left);
+ const TUtf16String& rightWide = UTF8ToWide(right);
+ const ui64 result = NLevenshtein::Distance(leftWide, rightWide);
+ return TUnboxedValuePod(result);
+ }
+
+ SIMPLE_UDF(TReplaceAll, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8)) {
+ if (TString result(args[0].AsStringRef()); SubstGlobal(result, args[1].AsStringRef(), args[2].AsStringRef()))
+ return valueBuilder->NewString(result);
+ else
+ return args[0];
+ }
+
+ SIMPLE_UDF(TReplaceFirst, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8)) {
+ std::string result(args[0].AsStringRef());
+ const std::string_view what(args[1].AsStringRef());
+ if (const auto index = result.find(what); index != std::string::npos) {
+ result.replace(index, what.size(), std::string_view(args[2].AsStringRef()));
+ return valueBuilder->NewString(result);
+ }
+ return args[0];
+ }
+
+ SIMPLE_UDF(TReplaceLast, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8)) {
+ std::string result(args[0].AsStringRef());
+ const std::string_view what(args[1].AsStringRef());
+ if (const auto index = result.rfind(what); index != std::string::npos) {
+ result.replace(index, what.size(), std::string_view(args[2].AsStringRef()));
+ return valueBuilder->NewString(result);
+ }
+ return args[0];
+ }
+
+ SIMPLE_UDF(TRemoveAll, TUtf8(TAutoMap<TUtf8>, TUtf8)) {
+ TUtf32String input = UTF8ToUTF32<true>(args[0].AsStringRef());
+ const TUtf32String remove = UTF8ToUTF32<true>(args[1].AsStringRef());
+ const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend());
+ size_t tpos = 0;
+ for (const wchar32 c : input) {
+ if (!chars.contains(c)) {
+ input[tpos++] = c;
+ }
+ }
+ if (tpos != input.size()) {
+ input.resize(tpos);
+ return valueBuilder->NewString(WideToUTF8(input));
+ }
+ return args[0];
+ }
+
+ SIMPLE_UDF(TRemoveFirst, TUtf8(TAutoMap<TUtf8>, TUtf8)) {
+ TUtf32String input = UTF8ToUTF32<true>(args[0].AsStringRef());
+ const TUtf32String remove = UTF8ToUTF32<true>(args[1].AsStringRef());
+ const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend());
+ for (auto it = input.cbegin(); it != input.cend(); ++it) {
+ if (chars.contains(*it)) {
+ input.erase(it);
+ return valueBuilder->NewString(WideToUTF8(input));
+ }
+ }
+ return args[0];
+ }
+
+ SIMPLE_UDF(TRemoveLast, TUtf8(TAutoMap<TUtf8>, TUtf8)) {
+ TUtf32String input = UTF8ToUTF32<true>(args[0].AsStringRef());
+ const TUtf32String remove = UTF8ToUTF32<true>(args[1].AsStringRef());
+ const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend());
+ for (auto it = input.crbegin(); it != input.crend(); ++it) {
+ if (chars.contains(*it)) {
+ input.erase(input.crend() - it - 1, 1);
+ return valueBuilder->NewString(WideToUTF8(input));
+ }
+ }
+ return args[0];
+ }
+
+ SIMPLE_UDF(TToCodePointList, TListType<ui32>(TAutoMap<TUtf8>)) {
+ size_t codePointCount = 0;
+ const auto& inputRef = args[0].AsStringRef();
+ if (!GetNumberOfUTF8Chars(inputRef.Data(), inputRef.Size(), codePointCount)) {
+ // should not happen but still we have to check return code
+ ythrow yexception() << "Unable to count code points";
+ }
+
+ TUnboxedValue* itemsPtr = nullptr;
+ auto result = valueBuilder->NewArray(codePointCount, itemsPtr);
+ const unsigned char* current = reinterpret_cast<const unsigned char*>(inputRef.Data());
+ const unsigned char* end = current + inputRef.Size();
+ wchar32 rune = BROKEN_RUNE;
+ ui32 codePointIndex = 0;
+ RECODE_RESULT retcode = RECODE_OK;
+ while (current < end && RECODE_OK == (retcode = ReadUTF8CharAndAdvance(rune, current, end))) {
+ if (codePointIndex >= codePointCount) {
+ // sanity check
+ ythrow yexception() << "Too big code point index " << codePointIndex << ", expecting only " << codePointCount << " code points";
+ }
+ itemsPtr[codePointIndex++] = TUnboxedValuePod(static_cast<ui32>(rune));
+ }
+
+ if (retcode != RECODE_OK) {
+ ythrow yexception() << "Malformed UTF-8 string";
+ }
+
+ return result;
+ }
+
+ SIMPLE_UDF(TFromCodePointList, TUtf8(TAutoMap<TListType<ui32>>)) {
+ auto input = args[0];
+ if (auto elems = input.GetElements()) {
+ const auto elemCount = input.GetListLength();
+ auto bufferSize = WideToUTF8BufferSize(elemCount);
+ TTempBuf buffer(bufferSize);
+ auto bufferPtr = buffer.Data();
+ auto bufferEnd = buffer.Data() + bufferSize;
+ for (ui64 i = 0; i != elemCount; ++i) {
+ const auto& item = elems[i];
+ const wchar32 rune = item.Get<ui32>();
+ size_t written = 0;
+ WideToUTF8(&rune, 1, bufferPtr, written);
+ Y_ENSURE(written <= 4);
+ bufferPtr += written;
+ Y_ENSURE(bufferPtr <= bufferEnd);
+ }
+ return valueBuilder->NewString(TStringRef(buffer.Data(), bufferPtr - buffer.Data()));
+ }
+
+ std::vector<char, NUdf::TStdAllocatorForUdf<char>> buffer;
+ buffer.reserve(TUnboxedValuePod::InternalBufferSize);
+
+ const auto& iter = input.GetListIterator();
+ char runeBuffer[4] = {};
+ for (NUdf::TUnboxedValue item; iter.Next(item); ) {
+ const wchar32 rune = item.Get<ui32>();
+ size_t written = 0;
+ WideToUTF8(&rune, 1, runeBuffer, written);
+ Y_ENSURE(written <= 4);
+ buffer.insert(buffer.end(), runeBuffer, runeBuffer + written);
+ }
+
+ return valueBuilder->NewString(TStringRef(buffer.data(), buffer.size()));
+ }
+
+ SIMPLE_UDF(TReverse, TUtf8(TAutoMap<TUtf8>)) {
+ auto wide = UTF8ToWide(args[0].AsStringRef());
+ ReverseInPlace(wide);
+ return valueBuilder->NewString(WideToUTF8(wide));
+ }
+
+ SIMPLE_UDF(TToLower, TUtf8(TAutoMap<TUtf8>)) {
+ if (auto wide = UTF8ToWide(args->AsStringRef()); ToLower(wide))
+ return valueBuilder->NewString(WideToUTF8(wide));
+ else
+ return *args;
+ }
+
+ SIMPLE_UDF(TToUpper, TUtf8(TAutoMap<TUtf8>)) {
+ if (auto wide = UTF8ToWide(args->AsStringRef()); ToUpper(wide))
+ return valueBuilder->NewString(WideToUTF8(wide));
+ else
+ return *args;
+ }
+
+ SIMPLE_UDF(TToTitle, TUtf8(TAutoMap<TUtf8>)) {
+ if (auto wide = UTF8ToWide(args->AsStringRef()); ToTitle(wide))
+ return valueBuilder->NewString(WideToUTF8(wide));
+ else
+ return *args;
+ }
+
+ SIMPLE_UDF(TStrip, TUtf8(TAutoMap<TUtf8>)) {
+ const TUtf32String input = UTF8ToUTF32<true>(args[0].AsStringRef());
+ const auto& result = StripString(input, IsUnicodeSpaceAdapter(input.begin()));
+ return valueBuilder->NewString(WideToUTF8(result));
+ }
+
+ SIMPLE_UDF(TIsUnicodeSet, bool(TAutoMap<TUtf8>, TUtf8)) {
+ Y_UNUSED(valueBuilder);
+ const TStringBuf input(args[0].AsStringRef());
+ const TUtf16String& customCategory = UTF8ToWide(args[1].AsStringRef());
+ TUnicodeSet unicodeSet;
+ try {
+ unicodeSet.Parse(customCategory);
+ } catch (...) {
+ UdfTerminate((TStringBuilder() << "Failed to parse unicode set: " << CurrentExceptionMessage()).c_str());
+ }
+ bool result = true;
+ wchar32 rune;
+ const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin());
+ const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end());
+ while (cur != last) {
+ ReadUTF8CharAndAdvance(rune, cur, last);
+ if (!unicodeSet.Has(rune)) {
+ result = false;
+ break;
+ }
+ }
+ return TUnboxedValuePod(result);
+ }
+
+#define REGISTER_NORMALIZE_UDF(name, mode) T##name,
+#define REGISTER_IS_CATEGORY_UDF(name, function) T##name,
+#define EXPORTED_UNICODE_BASE_UDF \
+ NORMALIZE_UDF_MAP(REGISTER_NORMALIZE_UDF) \
+ IS_CATEGORY_UDF_MAP(REGISTER_IS_CATEGORY_UDF) \
+ TIsUtf, \
+ TGetLength, \
+ TSubstring, \
+ TFind, \
+ TRFind, \
+ TSplitToList, \
+ TJoinFromList, \
+ TLevensteinDistance, \
+ TReplaceAll, \
+ TReplaceFirst, \
+ TReplaceLast, \
+ TRemoveAll, \
+ TRemoveFirst, \
+ TRemoveLast, \
+ TToCodePointList, \
+ TFromCodePointList, \
+ TReverse, \
+ TToLower, \
+ TToUpper, \
+ TToTitle, \
+ TToUint64, \
+ TTryToUint64, \
+ TStrip, \
+ TIsUnicodeSet
+}
diff --git a/yql/essentials/udfs/common/unicode_base/lib/ya.make b/yql/essentials/udfs/common/unicode_base/lib/ya.make
new file mode 100644
index 00000000000..f50858d02ae
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/lib/ya.make
@@ -0,0 +1,22 @@
+LIBRARY()
+
+YQL_ABI_VERSION(
+ 2
+ 27
+ 0
+)
+
+SRCS(
+ unicode_base_udf.cpp
+)
+
+PEERDIR(
+ library/cpp/deprecated/split
+ library/cpp/string_utils/levenshtein_diff
+ library/cpp/unicode/normalization
+ library/cpp/unicode/set
+ yql/essentials/public/udf
+ yql/essentials/utils
+)
+
+END()
diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/result.json b/yql/essentials/udfs/common/unicode_base/test/canondata/result.json
new file mode 100644
index 00000000000..0b47a674443
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/canondata/result.json
@@ -0,0 +1,67 @@
+{
+ "test.test[Find]": [
+ {
+ "uri": "file://test.test_Find_/results.txt"
+ }
+ ],
+ "test.test[IsCategory]": [
+ {
+ "uri": "file://test.test_IsCategory_/results.txt"
+ }
+ ],
+ "test.test[List]": [
+ {
+ "uri": "file://test.test_List_/results.txt"
+ }
+ ],
+ "test.test[Remove]": [
+ {
+ "uri": "file://test.test_Remove_/results.txt"
+ }
+ ],
+ "test.test[Replace]": [
+ {
+ "uri": "file://test.test_Replace_/results.txt"
+ }
+ ],
+ "test.test[Strip]": [
+ {
+ "uri": "file://test.test_Strip_/results.txt"
+ }
+ ],
+ "test.test[ToUint64F0]": [
+ {
+ "uri": "file://test.test_ToUint64F0_/extracted"
+ }
+ ],
+ "test.test[ToUint64F1]": [
+ {
+ "uri": "file://test.test_ToUint64F1_/extracted"
+ }
+ ],
+ "test.test[ToUint64F2]": [
+ {
+ "uri": "file://test.test_ToUint64F2_/extracted"
+ }
+ ],
+ "test.test[ToUint64]": [
+ {
+ "uri": "file://test.test_ToUint64_/results.txt"
+ }
+ ],
+ "test.test[To]": [
+ {
+ "uri": "file://test.test_To_/results.txt"
+ }
+ ],
+ "test.test[TryToUint64]": [
+ {
+ "uri": "file://test.test_TryToUint64_/results.txt"
+ }
+ ],
+ "test.test[Unicode]": [
+ {
+ "uri": "file://test.test_Unicode_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Find_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Find_/results.txt
new file mode 100644
index 00000000000..bcccb2b5119
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Find_/results.txt
@@ -0,0 +1,86 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "\xC3\xA4stig, m\xC3\266chten, ausf\xC3\xBChrlich, sp\xC3\xA4ter, k\xC3\xB6nnen, nat\xC3\xBCrlich, universit\xC3\xA4t, \xC3\266ffentlich, r\xC3\274ckw\xC3\xA4rts, k\xC3\xA4mpfen, m\xC3\xB6gen, \xC3\274berall, regelm";
+ "\xC3\266chten, ausf\xC3\xBChrlich, sp\xC3\xA4ter, k\xC3\xB6nnen, nat\xC3\xBCrlich, universit\xC3\xA4t, \xC3\266ffentlich, r\xC3\274ckw\xC3\xA4rts, k\xC3\xA4mpfen, m";
+ "\xC3\xBChrlich, sp\xC3\xA4ter, k\xC3\xB6nnen, nat\xC3\xBCrlich, universit\xC3\xA4t, \xC3\266ffentlich, r\xC3\274ckw\xC3\xA4rts, k\xC3\xA4mpfen, m\xC3\xB6gen, "
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "\xC3\xA4ter, k\xC3\xB6nnen, nat\xC3\xBCrlich, universit\xC3\xA4t, \xC3\266ffentlich, r\xC3\274ckw\xC3\xA4rts, k";
+ "\xC3\266chten, ausf\xC3\xBChrlich, sp\xC3\xA4ter, k\xC3\xB6nnen, nat\xC3\xBCrlich, universit\xC3\xA4t, \xC3\266ffentlich, r\xC3\274ckw\xC3\xA4rts, k\xC3\xA4mpfen, m";
+ "\xC3\xBCrlich, universit\xC3\xA4t, \xC3\266ffentlich, r"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_IsCategory_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_IsCategory_/results.txt
new file mode 100644
index 00000000000..a6fd861c645
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_IsCategory_/results.txt
@@ -0,0 +1,164 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column3";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column4";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column5";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column6";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column7";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column8";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column9";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column10";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column11";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column12";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column13";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column14";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column15";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column16";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column17";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %true;
+ %false;
+ %true;
+ %false;
+ %true;
+ %false;
+ %true;
+ %false;
+ %true;
+ %false;
+ %true;
+ %false;
+ %true;
+ %false;
+ %true;
+ %false;
+ %true;
+ %false
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_List_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_List_/results.txt
new file mode 100644
index 00000000000..5cf8e238cfa
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_List_/results.txt
@@ -0,0 +1,265 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "equals_to_original";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "replace_delimeter";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "just_split";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "first";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "skip_empty";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "multichar_delim_set";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "multichar_delim_string";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "limited";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "a@b@c";
+ "a@b@c";
+ "a#b#c";
+ [
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ "a"
+ ];
+ [
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ "a";
+ "";
+ "";
+ "c"
+ ];
+ [
+ "a@";
+ "c"
+ ];
+ [
+ "a";
+ "b@c"
+ ]
+ ];
+ [
+ "@a@b@c";
+ "@a@b@c";
+ "#a#b#c";
+ [
+ "";
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ ""
+ ];
+ [
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ "";
+ "a";
+ "";
+ "";
+ "c"
+ ];
+ [
+ "@a@";
+ "c"
+ ];
+ [
+ "";
+ "a@b@c"
+ ]
+ ];
+ [
+ "@@@a@a";
+ "@@@a@a";
+ "###a#a";
+ [
+ "";
+ "";
+ "";
+ "a";
+ "a"
+ ];
+ [
+ ""
+ ];
+ [
+ "a";
+ "a"
+ ];
+ [
+ "";
+ "";
+ "";
+ "a";
+ "a"
+ ];
+ [
+ "@@@a@a"
+ ];
+ [
+ "";
+ "@@a@a"
+ ]
+ ];
+ [
+ "d#e#f";
+ "d#e#f";
+ "d#e#f";
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ]
+ ];
+ [
+ "d";
+ "d";
+ "d";
+ [
+ "d"
+ ];
+ [
+ "d"
+ ];
+ [
+ "d"
+ ];
+ [
+ "d"
+ ];
+ [
+ "d"
+ ];
+ [
+ "d"
+ ]
+ ];
+ [
+ "";
+ "";
+ "";
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Remove_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Remove_/results.txt
new file mode 100644
index 00000000000..11bcb15a2f2
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Remove_/results.txt
@@ -0,0 +1,178 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "all";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "first";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "last";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "first2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "last2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD1\x8B\xD0\xB2\xD1\x8B\xD0\xB2"
+ ];
+ [
+ "\xD1\x8B\xD0\xB2\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2"
+ ];
+ [
+ "\xD1\x8B\xD0\xB2\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2"
+ ]
+ ];
+ [
+ [
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD0\xB0\xD1\x87\xD1\x8B"
+ ];
+ [
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x8B"
+ ];
+ [
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD1\x87\xD1\x8B"
+ ];
+ [
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD1\x87\xD1\x8B"
+ ];
+ [
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD1\x87\xD1\x8B"
+ ];
+ [
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD1\x87\xD1\x8B"
+ ]
+ ];
+ [
+ [
+ "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD0\xB2\xD1\x8B\xD1\x8B\xD0\xB2\xD1\x8B\xD0\xB2"
+ ];
+ [
+ "\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2"
+ ];
+ [
+ "\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2"
+ ]
+ ];
+ [
+ [
+ "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD0\xB0\xD1\x84\xD1\x8B"
+ ];
+ [
+ "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD1\x8B"
+ ];
+ [
+ "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x84\xD1\x8B"
+ ];
+ [
+ "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x84\xD1\x8B"
+ ];
+ [
+ "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x84\xD1\x8B"
+ ];
+ [
+ "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD0\xB0\xD1\x8B"
+ ]
+ ];
+ [
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Replace_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Replace_/results.txt
new file mode 100644
index 00000000000..7390dbdbc32
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Replace_/results.txt
@@ -0,0 +1,228 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "all";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "first";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "last";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "first2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "last2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "first3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "last3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD1\x8B\xD0\xB2z\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2z"
+ ];
+ [
+ "\xD1\x8B\xD0\xB2\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2"
+ ];
+ [
+ "\xD1\x8B\xD0\xB2zzz\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2zzz"
+ ]
+ ];
+ [
+ [
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD0\xB0\xD1\x87\xD1\x8B"
+ ];
+ [
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD0\xB0\xD1\x87\xD1\x8B"
+ ];
+ [
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8Fz\xD0\xB0\xD1\x87\xD1\x8B"
+ ];
+ [
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0z\xD1\x87\xD1\x8B"
+ ];
+ [
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD1\x87\xD1\x8B"
+ ];
+ [
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD1\x87\xD1\x8B"
+ ];
+ [
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8Fzzz\xD0\xB0\xD1\x87\xD1\x8B"
+ ];
+ [
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0zzz\xD1\x87\xD1\x8B"
+ ]
+ ];
+ [
+ [
+ "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "z\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2z"
+ ];
+ [
+ "\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2"
+ ];
+ [
+ "zzz\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0"
+ ];
+ [
+ "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2zzz"
+ ]
+ ];
+ [
+ [
+ "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD0\xB0\xD1\x84\xD1\x8B"
+ ];
+ [
+ "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0zzz\xD1\x8B"
+ ];
+ [
+ "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2z\xD0\xB0\xD1\x84\xD1\x8B"
+ ];
+ [
+ "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0z\xD1\x84\xD1\x8B"
+ ];
+ [
+ "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x84\xD1\x8B"
+ ];
+ [
+ "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x84\xD1\x8B"
+ ];
+ [
+ "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2zzz\xD0\xB0\xD1\x84\xD1\x8B"
+ ];
+ [
+ "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0zzz\xD1\x84\xD1\x8B"
+ ]
+ ];
+ [
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Strip_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Strip_/results.txt
new file mode 100644
index 00000000000..613b639ed05
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Strip_/results.txt
@@ -0,0 +1,76 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "column3";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "column4";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "column5";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "column6";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0";
+ "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD0\xB0\xD1\x87\xD1\x8B";
+ "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0";
+ "\xD0\xB0\xD0\xB0\xD0\xB2 \xD1\x8B\xD0\xB0 \xD1\x8B\xD0\xB2\xD0\xB0 \xD1\x8B\xD0\xB2\xD0\xB0";
+ "\xD1\x8B\xD0\xB2\xD0\xB0";
+ "\xD0\xB2\xD0\xB0\xD0\xBE\xD0\xB0\xD0\xBE";
+ ""
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted
new file mode 100644
index 00000000000..d03cedfb327
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted
@@ -0,0 +1,8 @@
+<tmp_path>/program.sql:<main>: Fatal: Execution
+
+ <tmp_path>/program.sql:<main>:2:1: Fatal: Execution of node: Result
+ SELECT
+ ^
+ <tmp_path>/program.sql:<main>:2:1: Fatal: Input string is not a number
+ SELECT
+ ^ \ No newline at end of file
diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted
new file mode 100644
index 00000000000..3ed803548b5
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted
@@ -0,0 +1,8 @@
+<tmp_path>/program.sql:<main>: Fatal: Execution
+
+ <tmp_path>/program.sql:<main>:2:1: Fatal: Execution of node: Result
+ SELECT
+ ^
+ <tmp_path>/program.sql:<main>:2:1: Fatal: Input string contains junk after the number
+ SELECT
+ ^ \ No newline at end of file
diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted
new file mode 100644
index 00000000000..c441fbf4e1b
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted
@@ -0,0 +1,8 @@
+<tmp_path>/program.sql:<main>: Fatal: Execution
+
+ <tmp_path>/program.sql:<main>:2:1: Fatal: Execution of node: Result
+ SELECT
+ ^
+ <tmp_path>/program.sql:<main>:2:1: Fatal: Converted value falls out of Uint64 range
+ SELECT
+ ^ \ No newline at end of file
diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt
new file mode 100644
index 00000000000..9334d2f22d7
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt
@@ -0,0 +1,76 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "column3";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "column4";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "column5";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "column6";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "305441741";
+ "4";
+ "420";
+ "1052688";
+ "42";
+ "33288";
+ "101"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_To_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_To_/results.txt
new file mode 100644
index 00000000000..7f7b2525d78
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_To_/results.txt
@@ -0,0 +1,102 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "lower";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "upper";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "title";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "reverse";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "test";
+ "test";
+ "TEST";
+ "Test";
+ "tset"
+ ];
+ [
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD0\xA2\xD0\x95\xD0\xA1\xD0\xA2";
+ "\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD1\x81\xD0\xB5\xD1\x82"
+ ];
+ [
+ "TeSt";
+ "test";
+ "TEST";
+ "Test";
+ "tSeT"
+ ];
+ [
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD0\xA2\xD0\x95\xD0\xA1\xD0\xA2";
+ "\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD0\xA2\xD1\x81\xD0\x95\xD1\x82"
+ ];
+ [
+ "Eyl\xC3\xBCl";
+ "eyl\xC3\xBCl";
+ "EYL\xC3\x9CL";
+ "Eyl\xC3\xBCl";
+ "l\xC3\xBClyE"
+ ];
+ [
+ "6";
+ "6";
+ "6";
+ "6";
+ "6"
+ ];
+ [
+ "";
+ "";
+ "";
+ "";
+ ""
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt
new file mode 100644
index 00000000000..594ac1486c0
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt
@@ -0,0 +1,198 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "305441741"
+ ];
+ [
+ "4"
+ ];
+ [
+ "420"
+ ];
+ [
+ "1052688"
+ ];
+ [
+ "42"
+ ];
+ [
+ "101010"
+ ];
+ [
+ "101"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Unicode_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Unicode_/results.txt
new file mode 100644
index 00000000000..465ad350553
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Unicode_/results.txt
@@ -0,0 +1,509 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "normalize";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "is";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "length";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "one_end_substring";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "two_end_substring";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "remove_all";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "levenstein";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "code_point_list";
+ [
+ "OptionalType";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Uint32"
+ ]
+ ]
+ ]
+ ];
+ [
+ "from_code_point_list";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "from_lazy_code_point_list";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "reverse";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "find";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "rfind";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "find_from";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "rfind_from";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "Eyl\xC3\xBCl"
+ ];
+ [
+ "Eyl\xC3\xBCl"
+ ];
+ %true;
+ [
+ "5"
+ ];
+ [
+ "yl\xC3\xBCl"
+ ];
+ [
+ "Ey"
+ ];
+ [
+ "Eyl\xC3\xBCl"
+ ];
+ [
+ "5"
+ ];
+ [
+ [
+ "69";
+ "121";
+ "108";
+ "252";
+ "108"
+ ]
+ ];
+ [
+ "Eyl\xC3\xBCl"
+ ];
+ [
+ "Eyl\xC3\xBCl"
+ ];
+ [
+ "l\xC3\xBClyE"
+ ];
+ #;
+ #;
+ #;
+ #
+ ];
+ [
+ [
+ "\xD0\xB6\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F"
+ ];
+ [
+ "\xD0\xB6\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F"
+ ];
+ %true;
+ [
+ "6"
+ ];
+ [
+ "\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F"
+ ];
+ [
+ "\xD0\xB6\xD0\xBD"
+ ];
+ [
+ "\xD0\xB6\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F"
+ ];
+ [
+ "5"
+ ];
+ [
+ [
+ "1078";
+ "1085";
+ "1110";
+ "1118";
+ "1085";
+ "1103"
+ ]
+ ];
+ [
+ "\xD0\xB6\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F"
+ ];
+ [
+ "\xD0\xB6\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F"
+ ];
+ [
+ "\xD1\x8F\xD0\xBD\xD1\x9E\xD1\x96\xD0\xBD\xD0\xB6"
+ ];
+ #;
+ #;
+ #;
+ #
+ ];
+ [
+ [
+ "\xC3\xBAnora"
+ ];
+ [
+ "\xC3\xBAnora"
+ ];
+ %true;
+ [
+ "5"
+ ];
+ [
+ "nora"
+ ];
+ [
+ "\xC3\xBAn"
+ ];
+ [
+ "\xC3\xBAnoa"
+ ];
+ [
+ "5"
+ ];
+ [
+ [
+ "250";
+ "110";
+ "111";
+ "114";
+ "97"
+ ]
+ ];
+ [
+ "\xC3\xBAnora"
+ ];
+ [
+ "\xC3\xBAnora"
+ ];
+ [
+ "aron\xC3\xBA"
+ ];
+ #;
+ #;
+ #;
+ #
+ ];
+ [
+ [
+ "Ci\xD1\x87 Ci\xD1\x87"
+ ];
+ [
+ "Ci\xD1\x87 Ci\xD1\x87"
+ ];
+ %true;
+ [
+ "7"
+ ];
+ [
+ "i\xD1\x87 Ci\xD1\x87"
+ ];
+ [
+ "Ci"
+ ];
+ [
+ "Ci Ci"
+ ];
+ [
+ "5"
+ ];
+ [
+ [
+ "67";
+ "105";
+ "1095";
+ "32";
+ "67";
+ "105";
+ "1095"
+ ]
+ ];
+ [
+ "Ci\xD1\x87 Ci\xD1\x87"
+ ];
+ [
+ "Ci\xD1\x87 Ci\xD1\x87"
+ ];
+ [
+ "\xD1\x87iC \xD1\x87iC"
+ ];
+ #;
+ #;
+ #;
+ #
+ ];
+ [
+ [
+ "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82"
+ ];
+ [
+ "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82"
+ ];
+ %true;
+ [
+ "13"
+ ];
+ [
+ "\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82"
+ ];
+ [
+ "\xD0\xBF\xD1\x80"
+ ];
+ [
+ "\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82"
+ ];
+ [
+ "5"
+ ];
+ [
+ [
+ "1087";
+ "1088";
+ "1080";
+ "1074";
+ "1077";
+ "1090";
+ "32";
+ "1087";
+ "1088";
+ "1080";
+ "1074";
+ "1077";
+ "1090"
+ ]
+ ];
+ [
+ "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82"
+ ];
+ [
+ "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82"
+ ];
+ [
+ "\xD1\x82\xD0\xB5\xD0\xB2\xD0\xB8\xD1\x80\xD0\xBF \xD1\x82\xD0\xB5\xD0\xB2\xD0\xB8\xD1\x80\xD0\xBF"
+ ];
+ [
+ "4"
+ ];
+ [
+ "11"
+ ];
+ [
+ "11"
+ ];
+ [
+ "4"
+ ]
+ ];
+ [
+ [
+ "6"
+ ];
+ [
+ "6"
+ ];
+ %true;
+ [
+ "1"
+ ];
+ [
+ ""
+ ];
+ [
+ "6"
+ ];
+ [
+ "6"
+ ];
+ [
+ "1"
+ ];
+ [
+ [
+ "54"
+ ]
+ ];
+ [
+ "6"
+ ];
+ [
+ "6"
+ ];
+ [
+ "6"
+ ];
+ #;
+ #;
+ #;
+ #
+ ];
+ [
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ %true;
+ [
+ "0"
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ "0"
+ ];
+ [
+ []
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ #;
+ #;
+ #;
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Find.sql b/yql/essentials/udfs/common/unicode_base/test/cases/Find.sql
new file mode 100644
index 00000000000..9a9a58752e3
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/Find.sql
@@ -0,0 +1,13 @@
+$text ="lästig, möchten, ausführlich, später, können, natürlich, universität, öffentlich, rückwärts, kämpfen, mögen, überall, regelmäßig"u;
+
+SELECT
+ Unicode::Substring($text, Unicode::Find($text, "ä"u), Unicode::RFind($text, "ä"u) - Unicode::Find($text, "ä"u)),
+ Unicode::Substring($text, Unicode::Find($text, "ö"u), Unicode::RFind($text, "ö"u) - Unicode::Find($text, "ö"u)),
+ Unicode::Substring($text, Unicode::Find($text, "ü"u), Unicode::RFind($text, "ü"u) - Unicode::Find($text, "ü"u));
+
+
+SELECT
+ Unicode::Substring($text, Unicode::Find($text, "ä"u, 30ul), Unicode::RFind($text, "ä"u, 123ul) - Unicode::Find($text, "ä"u, 30ul)),
+ Unicode::Substring($text, Unicode::Find($text, "ö"u, 9ul), Unicode::RFind($text, "ö"u, 103ul) - Unicode::Find($text, "ö"u, 9ul)),
+ Unicode::Substring($text, Unicode::Find($text, "ü"u, 45ul), Unicode::RFind($text, "ü"u, 83ul) - Unicode::Find($text, "ü"u, 45ul));
+
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.sql b/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.sql
new file mode 100644
index 00000000000..2effa23221e
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.sql
@@ -0,0 +1,21 @@
+/* syntax version 1 */
+SELECT
+ Unicode::IsAscii("sdf"u),
+ Unicode::IsAscii("выавыа"u),
+ Unicode::IsSpace(" \u2002\u200a"u),
+ Unicode::IsSpace("выавыа"u),
+ Unicode::IsUpper("ФЫВ"u),
+ Unicode::IsUpper("вВаВыа"u),
+ Unicode::IsLower("фыв"u),
+ Unicode::IsLower("вВаВыа"u),
+ Unicode::IsDigit("1234"u),
+ Unicode::IsDigit("выавыа"u),
+ Unicode::IsAlpha("фвфы"u),
+ Unicode::IsAlpha("вы2в-а"u),
+ Unicode::IsAlnum("фыв13в"u),
+ Unicode::IsAlnum("выа1-}ыв"u),
+ Unicode::IsHex("0F3A4E"u),
+ Unicode::IsHex("ваоао"u),
+ Unicode::IsUnicodeSet("ваоао"u, "[вао]"u),
+ Unicode::IsUnicodeSet("ваоао"u, "[ваб]"u)
+
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/List.in b/yql/essentials/udfs/common/unicode_base/test/cases/List.in
new file mode 100644
index 00000000000..949cf26c776
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/List.in
@@ -0,0 +1,6 @@
+{"key"="1";"subkey"="1";"value"="a@b@c"};
+{"key"="1";"subkey"="1";"value"="@a@b@c"};
+{"key"="1";"subkey"="1";"value"="@@@a@a"};
+{"key"="2";"subkey"="2";"value"="d#e#f"};
+{"key"="3";"subkey"="3";"value"="d"};
+{"key"="4";"subkey"="4";"value"=""};
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/List.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/List.in.attr
new file mode 100644
index 00000000000..990efb1ff2c
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/List.in.attr
@@ -0,0 +1,12 @@
+{"_yql_row_spec"={
+ "Type"=["StructType";[
+ ["key";["DataType";"Utf8"]];
+ ["subkey";["DataType";"Utf8"]];
+ ["value";["DataType";"Utf8"]]
+ ]];
+ "SortDirections"=[1;1;];
+ "SortedBy"=["key";"subkey";];
+ "SortedByTypes"=[["DataType";"Utf8";];["DataType";"Utf8";];];
+ "SortMembers"=["key";"subkey";];
+}}
+
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/List.sql b/yql/essentials/udfs/common/unicode_base/test/cases/List.sql
new file mode 100644
index 00000000000..814c5cb27b0
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/List.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+SELECT
+ value,
+ Ensure(value, Unicode::JoinFromList(Unicode::SplitToList(value, "@"u), "@"u) == value) AS equals_to_original,
+ Unicode::JoinFromList(Unicode::SplitToList(value, "@"u), "#"u) AS replace_delimeter,
+ Unicode::SplitToList(value, "@"u) AS just_split,
+ Unicode::SplitToList(value, "@"u)[0] as first,
+ Unicode::SplitToList(value, "@"u, true AS SkipEmpty) AS skip_empty,
+ Unicode::SplitToList(value, "b@"u, false AS DelimeterString) AS multichar_delim_set,
+ Unicode::SplitToList(value, "b@"u, true AS DelimeterString) AS multichar_delim_string,
+ Unicode::SplitToList(value, "@"u, 1 AS Limit) AS limited
+FROM Input;
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Remove.sql b/yql/essentials/udfs/common/unicode_base/test/cases/Remove.sql
new file mode 100644
index 00000000000..ee96037f79b
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/Remove.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+SELECT
+ CAST(value AS Utf8),
+ Unicode::RemoveAll(CAST(value AS Utf8), Utf8("фа")) AS all,
+ Unicode::RemoveFirst(CAST(value AS Utf8), Utf8("а")) AS first,
+ Unicode::RemoveLast(CAST(value AS Utf8), Utf8("а")) AS last,
+ Unicode::RemoveFirst(CAST(value AS Utf8), Utf8("фа")) AS first2,
+ Unicode::RemoveLast(CAST(value AS Utf8), Utf8("фа")) AS last2
+FROM Input;
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Replace.sql b/yql/essentials/udfs/common/unicode_base/test/cases/Replace.sql
new file mode 100644
index 00000000000..d6239844133
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/Replace.sql
@@ -0,0 +1,11 @@
+/* syntax version 1 */
+SELECT
+ CAST(value AS Utf8),
+ Unicode::ReplaceAll(CAST(value AS Utf8), Utf8("аф"), Utf8("zzz")) AS all,
+ Unicode::ReplaceFirst(CAST(value AS Utf8), Utf8("а"), Utf8("z")) AS first,
+ Unicode::ReplaceLast(CAST(value AS Utf8), Utf8("а"), Utf8("z")) AS last,
+ Unicode::ReplaceFirst(CAST(value AS Utf8), Utf8("а"), Utf8("")) AS first2,
+ Unicode::ReplaceLast(CAST(value AS Utf8), Utf8("а"), Utf8("")) AS last2,
+ Unicode::ReplaceFirst(CAST(value AS Utf8), Utf8("а"), Utf8("zzz")) AS first3,
+ Unicode::ReplaceLast(CAST(value AS Utf8), Utf8("а"), Utf8("zzz")) AS last3
+FROM Input;
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Strip.sql b/yql/essentials/udfs/common/unicode_base/test/cases/Strip.sql
new file mode 100644
index 00000000000..45bde163e06
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/Strip.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+SELECT
+ Unicode::Strip("ываыва"u),
+ Unicode::Strip(" ячсячсяаачы"u),
+ Unicode::Strip("аавыаываыва "u),
+ Unicode::Strip("аав ыа ыва ыва "u),
+ Unicode::Strip("\u2009ыва\n"u),
+ Unicode::Strip("\u200aваоао\u2002"u),
+ Unicode::Strip(""u)
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/To.in b/yql/essentials/udfs/common/unicode_base/test/cases/To.in
new file mode 100644
index 00000000000..5effdb9971b
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/To.in
@@ -0,0 +1,8 @@
+{"key"="1";"subkey"="1";"value"="test"};
+{"key"="2";"subkey"="2";"value"="\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"};
+{"key"="3";"subkey"="3";"value"="TeSt"};
+{"key"="4";"subkey"="4";"value"="\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"};
+{"key"="5";"subkey"="5";"value"="Eyl\xC3\xBCl"};
+{"key"="6";"subkey"="6";"value"="6"};
+{"key"="4";"subkey"="4";"value"=""};
+
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/To.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/To.in.attr
new file mode 100644
index 00000000000..990efb1ff2c
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/To.in.attr
@@ -0,0 +1,12 @@
+{"_yql_row_spec"={
+ "Type"=["StructType";[
+ ["key";["DataType";"Utf8"]];
+ ["subkey";["DataType";"Utf8"]];
+ ["value";["DataType";"Utf8"]]
+ ]];
+ "SortDirections"=[1;1;];
+ "SortedBy"=["key";"subkey";];
+ "SortedByTypes"=[["DataType";"Utf8";];["DataType";"Utf8";];];
+ "SortMembers"=["key";"subkey";];
+}}
+
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/To.sql b/yql/essentials/udfs/common/unicode_base/test/cases/To.sql
new file mode 100644
index 00000000000..c7207d2dcd6
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/To.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+SELECT
+ value,
+ Unicode::ToLower(value) AS lower,
+ Unicode::ToUpper(value) AS upper,
+ Unicode::ToTitle(value) AS title,
+ Unicode::Reverse(value) AS reverse,
+FROM Input;
+
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql
new file mode 100644
index 00000000000..c4059a85820
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql
@@ -0,0 +1,9 @@
+SELECT
+ Unicode::ToUint64("0x1234abcd"),
+ Unicode::ToUint64("0X4"),
+ Unicode::ToUint64("0644"),
+ Unicode::ToUint64("0101010", 16),
+ Unicode::ToUint64("0101010", 2),
+ Unicode::ToUint64("0101010"),
+ Unicode::ToUint64("101");
+
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg
new file mode 100644
index 00000000000..83cfd96179a
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg
@@ -0,0 +1,2 @@
+xfail
+
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql
new file mode 100644
index 00000000000..dd1182a562d
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql
@@ -0,0 +1,3 @@
+SELECT
+ Unicode::ToUint64("hell");
+
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg
new file mode 100644
index 00000000000..83cfd96179a
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg
@@ -0,0 +1,2 @@
+xfail
+
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql
new file mode 100644
index 00000000000..f42380ee803
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql
@@ -0,0 +1,3 @@
+SELECT
+ Unicode::ToUint64("01238");
+
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg
new file mode 100644
index 00000000000..83cfd96179a
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg
@@ -0,0 +1,2 @@
+xfail
+
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql
new file mode 100644
index 00000000000..1a9b7e2449f
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql
@@ -0,0 +1,3 @@
+SELECT
+ Unicode::ToUint64("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF");
+
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql
new file mode 100644
index 00000000000..b2f4fa850ab
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql
@@ -0,0 +1,17 @@
+SELECT
+ Unicode::TryToUint64("hell", 10);
+
+SELECT
+ Unicode::TryToUint64("01238", 8);
+
+SELECT
+ Unicode::TryToUint64("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", 16);
+
+SELECT
+ Unicode::TryToUint64("0x1234abcd", 16),
+ Unicode::TryToUint64("0X4", 16),
+ Unicode::TryToUint64("0644", 8),
+ Unicode::TryToUint64("0101010", 16),
+ Unicode::TryToUint64("0101010", 2),
+ Unicode::TryToUint64("0101010", 10),
+ Unicode::TryToUint64("101", 10);
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.in b/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.in
new file mode 100644
index 00000000000..55f0307e35c
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.in
@@ -0,0 +1,7 @@
+{"key"="";"subkey"="";"value"="Eyl\xC3\xBCl"};
+{"key"="";"subkey"="";"value"="\xD0\xB6\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F"};
+{"key"="";"subkey"="";"value"="\xC3\xBAnora"};
+{"key"="";"subkey"="";"value"="Ci\xD1\x87 Ci\xD1\x87"};
+{"key"="";"subkey"="";"value"="\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82"};
+{"key"="";"subkey"="";"value"="6"};
+{"key"="";"subkey"="";"value"=""};
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.sql b/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.sql
new file mode 100644
index 00000000000..b330682b6ed
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.sql
@@ -0,0 +1,19 @@
+/* syntax version 1 */
+SELECT
+ value AS value,
+ Unicode::Normalize(value) AS normalize,
+ Unicode::IsUtf(value) AS is,
+ Unicode::GetLength(value) AS length,
+ Unicode::Substring(value, 1) AS one_end_substring,
+ Unicode::Substring(value, 0, 2) AS two_end_substring,
+ Unicode::RemoveAll(value, "\xD1\x87пr") AS remove_all,
+ Unicode::LevensteinDistance(value, value || Unicode::Substring(value, 0, 5)) AS levenstein,
+ Unicode::ToCodePointList(value) AS code_point_list,
+ Unicode::FromCodePointList(Unicode::ToCodePointList(value)) AS from_code_point_list,
+ Unicode::FromCodePointList(YQL::LazyList(Unicode::ToCodePointList(value))) AS from_lazy_code_point_list,
+ Unicode::Reverse(value) AS reverse,
+ Unicode::Find(value, "ет"u) AS find,
+ Unicode::RFind(value, "ет"u) AS rfind,
+ Unicode::Find(value, "ет"u, 7ul) AS find_from,
+ Unicode::RFind(value, "ет"u, 7ul) AS rfind_from
+FROM (SELECT CAST(value AS Utf8) AS value FROM Input);
diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/default.in b/yql/essentials/udfs/common/unicode_base/test/cases/default.in
new file mode 100644
index 00000000000..6a9412ca375
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/cases/default.in
@@ -0,0 +1,5 @@
+{"key"="1";"subkey"="1";"value"="ываыва"};
+{"key"="2";"subkey"="2";"value"="ячсячсяаачы"};
+{"key"="3";"subkey"="3";"value"="аавыаываыва"};
+{"key"="4";"subkey"="4";"value"="gd2цй3ываафы"};
+{"key"="5";"subkey"="5";"value"=""};
diff --git a/yql/essentials/udfs/common/unicode_base/test/ya.make b/yql/essentials/udfs/common/unicode_base/test/ya.make
new file mode 100644
index 00000000000..39cf5f3563b
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/unicode_base)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/unicode_base/unicode_base.cpp b/yql/essentials/udfs/common/unicode_base/unicode_base.cpp
new file mode 100644
index 00000000000..366777ab0eb
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/unicode_base.cpp
@@ -0,0 +1,4 @@
+#include "lib/unicode_base_udf.h"
+
+SIMPLE_MODULE(TUnicodeModule, EXPORTED_UNICODE_BASE_UDF)
+REGISTER_MODULES(TUnicodeModule)
diff --git a/yql/essentials/udfs/common/unicode_base/ya.make b/yql/essentials/udfs/common/unicode_base/ya.make
new file mode 100644
index 00000000000..b51e12ffb34
--- /dev/null
+++ b/yql/essentials/udfs/common/unicode_base/ya.make
@@ -0,0 +1,30 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+ FROM_SANDBOX(FILE 7319907306 OUT_NOAUTO libunicode_udf.so
+ )
+ END()
+ELSE ()
+YQL_UDF_CONTRIB(unicode_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 27
+ 0
+ )
+
+ SRCS(
+ unicode_base.cpp
+ )
+
+ PEERDIR(
+ yql/essentials/udfs/common/unicode_base/lib
+ )
+
+ END()
+ENDIF ()
+
+
+RECURSE_FOR_TESTS(
+ test
+)
+
diff --git a/yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp b/yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp
new file mode 100644
index 00000000000..50a3ee8d1f1
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp
@@ -0,0 +1 @@
+#include "url_base_udf.h" \ No newline at end of file
diff --git a/yql/essentials/udfs/common/url_base/lib/url_base_udf.h b/yql/essentials/udfs/common/url_base/lib/url_base_udf.h
new file mode 100644
index 00000000000..04ad1b4e469
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/lib/url_base_udf.h
@@ -0,0 +1,586 @@
+#pragma once
+
+#include "url_parse.h"
+#include "url_query.h"
+
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+#include <yql/essentials/public/udf/arrow/udf_arrow_helpers.h>
+
+#include <library/cpp/tld/tld.h>
+#include <library/cpp/charset/wide.h>
+#include <library/cpp/unicode/punycode/punycode.h>
+#include <library/cpp/string_utils/quote/quote.h>
+#include <library/cpp/string_utils/url/url.h>
+
+#include <util/string/split.h>
+#include <util/string/subst.h>
+
+using namespace NKikimr;
+using namespace NUdf;
+using namespace NTld;
+using namespace NUrlUdf;
+
+inline bool PrepareUrl(const std::string_view& keyStr, TUri& parser) {
+ const NUri::TParseFlags& parseFlags(TUri::FeaturesRecommended);
+ return parser.ParseAbs(keyStr, parseFlags) == TUri::ParsedOK;
+}
+
+#define ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(udfName, functionName) \
+ BEGIN_SIMPLE_ARROW_UDF(udfName, TOptional<char*>(TOptional<char*>)) { \
+ EMPTY_RESULT_ON_EMPTY_ARG(0); \
+ const std::string_view url(args[0].AsStringRef()); \
+ const std::string_view res(functionName(url)); \
+ return res.empty() ? TUnboxedValue() : \
+ valueBuilder->SubString(args[0], std::distance(url.begin(), res.begin()), res.size()); \
+ } \
+ struct udfName##KernelExec : public TUnaryKernelExec<udfName##KernelExec> { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { \
+ if (!arg) { \
+ return sink(TBlockItem()); \
+ } \
+ const std::string_view url(arg.AsStringRef()); \
+ const std::string_view res(functionName(url)); \
+ if (res.empty()) { \
+ return sink(TBlockItem()); \
+ } \
+ sink(TBlockItem(TStringRef(res))); \
+ } \
+ }; \
+ END_SIMPLE_ARROW_UDF(udfName, udfName##KernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TNormalize, TOptional<char*>(TOptional<char*>)) {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+ TUri url;
+ const bool success = PrepareUrl(args[0].AsStringRef(), url);
+ return success
+ ? valueBuilder->NewString(url.PrintS(TUri::FlagNoFrag))
+ : TUnboxedValue();
+}
+struct TNormalizeKernelExec : public TUnaryKernelExec<TNormalizeKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ if (!arg) {
+ return sink(TBlockItem());
+ }
+ TUri url;
+ const bool success = PrepareUrl(arg.AsStringRef(), url);
+ if (success) {
+ return sink(TBlockItem(TStringRef(url.PrintS(TUri::FlagNoFrag))));
+ }
+ sink(TBlockItem());
+ }
+};
+END_SIMPLE_ARROW_UDF(TNormalize, TNormalizeKernelExec::Do);
+
+BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetScheme, char*(TAutoMap<char*>)) {
+ const std::string_view url(args[0].AsStringRef());
+ const std::string_view prefix(GetSchemePrefix(url));
+ return valueBuilder->SubString(args[0], std::distance(url.begin(), prefix.begin()), prefix.size());
+}
+struct TGetSchemeKernelExec : public TUnaryKernelExec<TGetSchemeKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ const std::string_view url(arg.AsStringRef());
+ const std::string_view prefix(GetSchemePrefix(url));
+ const std::string_view scheme = url.substr(std::distance(url.begin(), prefix.begin()), prefix.size());
+ sink(TBlockItem(scheme));
+ }
+};
+END_SIMPLE_ARROW_UDF(TGetScheme, TGetSchemeKernelExec::Do);
+
+ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TGetHost, GetOnlyHost)
+
+std::string_view GetHostAndPortAfterCut(const std::string_view url) {
+ return GetHostAndPort(CutSchemePrefix(url));
+}
+
+ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TGetHostPort, GetHostAndPortAfterCut)
+
+std::string_view GetSchemeHostParameterized(const std::string_view url) {
+ return GetSchemeHost(url, /* trimHttp */ false);
+}
+
+ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TGetSchemeHost, GetSchemeHostParameterized);
+
+std::string_view GetSchemeHostPortParameterized(const std::string_view url) {
+ return GetSchemeHostAndPort(url, /* trimHttp */ false, /* trimDefaultPort */ false);
+}
+
+ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TGetSchemeHostPort, GetSchemeHostPortParameterized);
+
+BEGIN_SIMPLE_ARROW_UDF(TGetPort, TOptional<ui64>(TOptional<char*>)) {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+ Y_UNUSED(valueBuilder);
+ ui16 port = 0;
+ TStringBuf scheme, host;
+ TString lowerUri(args[0].AsStringRef());
+ std::transform(lowerUri.cbegin(), lowerUri.cbegin() + GetSchemePrefixSize(lowerUri),
+ lowerUri.begin(), [](unsigned char c){ return std::tolower(c); });
+ return TryGetSchemeHostAndPort(lowerUri, scheme, host, port) && port
+ ? TUnboxedValuePod(port)
+ : TUnboxedValuePod();
+}
+struct TGetPortKernelExec : public TUnaryKernelExec<TGetPortKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ if (!arg) {
+ return sink(TBlockItem());
+ }
+ ui16 port = 0;
+ TStringBuf scheme, host;
+ TString lowerUri(arg.AsStringRef());
+ std::transform(lowerUri.cbegin(), lowerUri.cbegin() + GetSchemePrefixSize(lowerUri),
+ lowerUri.begin(), [](unsigned char c){ return std::tolower(c); });
+ if (TryGetSchemeHostAndPort(lowerUri, scheme, host, port) && port) {
+ return sink(TBlockItem(port));
+ }
+ sink(TBlockItem());
+ }
+};
+END_SIMPLE_ARROW_UDF(TGetPort, TGetPortKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TGetTail, TOptional<char*>(TOptional<char*>)) {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+ const TStringBuf url(args[0].AsStringRef());
+ TStringBuf host, tail;
+ SplitUrlToHostAndPath(url, host, tail);
+ return tail.StartsWith('/')
+ ? valueBuilder->NewString(tail)
+ : valueBuilder->NewString(TString('/').append(tail));
+}
+struct TGetTailKernelExec : public TUnaryKernelExec<TGetTailKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ if (!arg) {
+ return sink(TBlockItem());
+ }
+ const TStringBuf url(arg.AsStringRef());
+ TStringBuf host, tail;
+ SplitUrlToHostAndPath(url, host, tail);
+ if (tail.StartsWith('/')) {
+ return sink(TBlockItem(TStringRef(tail)));
+ }
+ sink(TBlockItem(TStringRef(TString('/').append(tail))));
+ }
+};
+END_SIMPLE_ARROW_UDF(TGetTail, TGetTailKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TGetPath, TOptional<char*>(TOptional<char*>)) {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+ const std::string_view url(args[0].AsStringRef());
+ std::string_view cut(CutSchemePrefix(url));
+ const auto s = cut.find('/');
+ if (s == std::string_view::npos) {
+ return valueBuilder->NewString("/");
+ }
+
+ cut.remove_prefix(s);
+ const auto end = cut.find_first_of("?#");
+ if (std::string_view::npos != end) {
+ cut.remove_suffix(cut.size() - end);
+ }
+
+ return valueBuilder->SubString(args[0], std::distance(url.begin(), cut.begin()), cut.length());
+}
+struct TGetPathKernelExec : public TUnaryKernelExec<TGetPathKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ if (!arg) {
+ return sink(TBlockItem());
+ }
+ const std::string_view url(arg.AsStringRef());
+ std::string_view cut(CutSchemePrefix(url));
+ const auto s = cut.find('/');
+ if (s == std::string_view::npos) {
+ return sink(TBlockItem(TStringRef("/")));
+ }
+
+ cut.remove_prefix(s);
+ const auto end = cut.find_first_of("?#");
+ if (std::string_view::npos != end) {
+ cut.remove_suffix(cut.size() - end);
+ }
+ sink(TBlockItem(TStringRef(cut)));
+ }
+};
+END_SIMPLE_ARROW_UDF(TGetPath, TGetPathKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TGetFragment, TOptional<char*>(TOptional<char*>)) {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+ const std::string_view url(args[0].AsStringRef());
+ const auto pos = url.find('#');
+ return pos == std::string_view::npos ? TUnboxedValue() :
+ valueBuilder->SubString(args[0], pos + 1U, url.length() - pos - 1U);
+}
+struct TGetFragmentKernelExec : public TUnaryKernelExec<TGetFragmentKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ if (!arg) {
+ return sink(TBlockItem());
+ }
+ const std::string_view url(arg.AsStringRef());
+ const auto pos = url.find('#');
+ if (pos == std::string_view::npos) {
+ return sink(TBlockItem());
+ }
+ return sink(TBlockItem(arg.AsStringRef().Substring(pos + 1U, url.length() - pos - 1U)));
+ }
+};
+END_SIMPLE_ARROW_UDF(TGetFragment, TGetFragmentKernelExec::Do);
+
+std::optional<std::pair<ui32, ui32>> GetDomain(const std::string_view url, const ui8 level) {
+ const std::string_view host(GetOnlyHost(url));
+ std::vector<std::string_view> parts;
+ StringSplitter(host).Split('.').AddTo(&parts);
+ if (level && parts.size() >= level) {
+ const auto& result = host.substr(std::distance(host.begin(), parts[parts.size() - level].begin()));
+ if (result.empty()) {
+ return std::nullopt;
+ }
+ return std::make_pair(std::distance(url.begin(), result.begin()), result.size());
+ }
+ return std::nullopt;
+}
+
+BEGIN_SIMPLE_ARROW_UDF(TGetDomain, TOptional<char*>(TOptional<char*>, ui8)) {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+ const std::string_view url = args[0].AsStringRef();
+ const std::optional<std::pair<ui32, ui32>> resultOpt = GetDomain(url, args[1].Get<ui8>());
+ if (!resultOpt) {
+ return TUnboxedValue();
+ }
+ const std::pair<ui32, ui32> result = *resultOpt;
+ return valueBuilder->SubString(args[0], result.first, result.second);
+}
+struct TGetDomainKernelExec : public TBinaryKernelExec<TGetDomainKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ if (!arg1) {
+ return sink(TBlockItem());
+ }
+ const auto resultOpt = GetDomain(arg1.AsStringRef(), arg2.As<ui8>());
+ if (!resultOpt) {
+ return sink(TBlockItem());
+ }
+ const auto result = *resultOpt;
+ sink(TBlockItem(arg1.AsStringRef().Substring(result.first, result.second)));
+ }
+};
+END_SIMPLE_ARROW_UDF(TGetDomain, TGetDomainKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TGetTLD, char*(TAutoMap<char*>)) {
+ const TStringBuf url(args[0].AsStringRef());
+ return valueBuilder->NewString(GetZone(GetOnlyHost(url)));
+}
+struct TGetTLDKernelExec : public TUnaryKernelExec<TGetTLDKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ const TStringBuf url(arg.AsStringRef());
+ return sink(TBlockItem(GetZone(GetOnlyHost(url))));
+ }
+};
+END_SIMPLE_ARROW_UDF(TGetTLD, TGetTLDKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TGetDomainLevel, ui64(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ std::vector<std::string_view> parts;
+ StringSplitter(GetOnlyHost(args[0].AsStringRef())).Split('.').AddTo(&parts);
+ return TUnboxedValuePod(ui64(parts.size()));
+}
+struct TGetDomainLevelKernelExec : public TUnaryKernelExec<TGetDomainLevelKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ std::vector<std::string_view> parts;
+ StringSplitter(GetOnlyHost(arg.AsStringRef())).Split('.').AddTo(&parts);
+ return sink(TBlockItem(ui64(parts.size())));
+ }
+};
+END_SIMPLE_ARROW_UDF(TGetDomainLevel, TGetDomainLevelKernelExec::Do);
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TGetSignificantDomain, char*(TAutoMap<char*>, TOptional<TListType<char*>>), 1) {
+ const std::string_view url(args[0].AsStringRef());
+ const std::string_view host(GetOnlyHost(url));
+ std::vector<std::string_view> parts;
+ StringSplitter(host).Split('.').AddTo(&parts);
+ if (parts.size() > 2) {
+ const auto& secondLevel = parts.at(parts.size() - 2);
+ bool secondLevelIsZone = false;
+
+ if (args[1]) {
+ const auto& zonesIterator = args[1].GetListIterator();
+ for (TUnboxedValue item; zonesIterator.Next(item);) {
+ if (secondLevel == item.AsStringRef()) {
+ secondLevelIsZone = true;
+ break;
+ }
+ }
+ } else {
+ static const std::set<std::string_view> zones{"com", "net", "org", "co", "gov", "edu"};
+ secondLevelIsZone = zones.count(secondLevel);
+ }
+
+ const auto from = parts[parts.size() - (secondLevelIsZone ? 3U : 2U)].begin();
+ return valueBuilder->SubString(args[0], std::distance(url.begin(), from), std::distance(from, parts.back().end()));
+ }
+ return valueBuilder->SubString(args[0], std::distance(url.begin(), host.begin()), host.length());
+}
+
+std::optional<std::pair<ui32, ui32>> GetCGIParam(const std::string_view url, const std::string_view key) {
+ const auto queryStart = url.find('?');
+ if (queryStart != std::string_view::npos) {
+ const auto from = queryStart + 1U;
+ const auto anc = url.find('#', from);
+ const auto end = anc == std::string_view::npos ? url.length() : anc;
+ for (auto pos = from; pos && pos < end; ++pos) {
+ const auto equal = url.find('=', pos);
+ const auto amper = url.find('&', pos);
+ if (equal < amper) {
+ const auto& param = url.substr(pos, equal - pos);
+ if (param == key) {
+ return std::make_pair(equal + 1U, std::min(amper, end) - equal - 1U);
+ }
+ }
+ pos = amper;
+ }
+ }
+ return std::nullopt;
+}
+
+BEGIN_SIMPLE_ARROW_UDF(TGetCGIParam, TOptional<char*>(TOptional<char*>, char*)) {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+ const std::string_view url = args[0].AsStringRef();
+ const std::optional<std::pair<ui32, ui32>> resultOpt = GetCGIParam(url, args[1].AsStringRef());
+ if (!resultOpt) {
+ return TUnboxedValue();
+ }
+ const std::pair<ui32, ui32> result = *resultOpt;
+ return valueBuilder->SubString(args[0], result.first, result.second);
+}
+struct TGetCGIParamKernelExec : public TBinaryKernelExec<TGetCGIParamKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ if (!arg1) {
+ return sink(TBlockItem());
+ }
+ const auto resultOpt = GetCGIParam(arg1.AsStringRef(), arg2.AsStringRef());
+ if (!resultOpt) {
+ return sink(TBlockItem());
+ }
+ const auto result = *resultOpt;
+ sink(TBlockItem(arg1.AsStringRef().Substring(result.first, result.second)));
+ }
+};
+END_SIMPLE_ARROW_UDF(TGetCGIParam, TGetCGIParamKernelExec::Do);
+
+ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TCutScheme, CutSchemePrefix)
+
+ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TCutWWW, CutWWWPrefix)
+
+ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TCutWWW2, CutWWWNumberedPrefix)
+
+BEGIN_SIMPLE_ARROW_UDF(TCutQueryStringAndFragment, char*(TAutoMap<char*>)) {
+ const std::string_view input(args[0].AsStringRef());
+ const auto cut = input.find_first_of("?#");
+ return std::string_view::npos == cut ? NUdf::TUnboxedValue(args[0]) : valueBuilder->SubString(args[0], 0U, cut);
+}
+struct TCutQueryStringAndFragmentKernelExec : public TUnaryKernelExec<TCutQueryStringAndFragmentKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ const std::string_view input(arg.AsStringRef());
+ const auto cut = input.find_first_of("?#");
+ sink(TBlockItem(arg.AsStringRef().Substring(0U, cut)));
+ }
+};
+END_SIMPLE_ARROW_UDF(TCutQueryStringAndFragment, TCutQueryStringAndFragmentKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TEncode, TOptional<char*>(TOptional<char*>)) {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+ const std::string_view input(args[0].AsStringRef());
+ if (input.empty()) {
+ return NUdf::TUnboxedValuePod();
+ }
+ TString url(input);
+ UrlEscape(url);
+ return input == url ? NUdf::TUnboxedValue(args[0]) : valueBuilder->NewString(url);
+}
+struct TEncodeKernelExec : public TUnaryKernelExec<TEncodeKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ if (!arg) {
+ return sink(TBlockItem());
+ }
+ const std::string_view input(arg.AsStringRef());
+ if (input.empty()) {
+ return sink(TBlockItem());
+ }
+ TString url(input);
+ UrlEscape(url);
+ sink(TBlockItem(TStringRef(url)));
+ }
+};
+END_SIMPLE_ARROW_UDF(TEncode, TEncodeKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TDecode, TOptional<char*>(TOptional<char*>)) {
+ EMPTY_RESULT_ON_EMPTY_ARG(0);
+ const std::string_view input(args[0].AsStringRef());
+ if (input.empty()) {
+ return NUdf::TUnboxedValuePod();
+ }
+ TString url(input);
+ SubstGlobal(url, '+', ' ');
+ UrlUnescape(url);
+ return input == url ? NUdf::TUnboxedValue(args[0]) : valueBuilder->NewString(url);
+}
+struct TDecodeKernelExec : public TUnaryKernelExec<TDecodeKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ if (!arg) {
+ return sink(TBlockItem());
+ }
+ const std::string_view input(arg.AsStringRef());
+ if (input.empty()) {
+ return sink(TBlockItem());
+ }
+ TString url(input);
+ SubstGlobal(url, '+', ' ');
+ UrlUnescape(url);
+ sink(TBlockItem(TStringRef(url)));
+ }
+};
+END_SIMPLE_ARROW_UDF(TDecode, TDecodeKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TIsKnownTLD, bool(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ return TUnboxedValuePod(IsTld(args[0].AsStringRef()));
+}
+struct TIsKnownTLDKernelExec : public TUnaryKernelExec<TIsKnownTLDKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ sink(TBlockItem(static_cast<ui8>(IsTld(arg.AsStringRef()))));
+ }
+};
+END_SIMPLE_ARROW_UDF(TIsKnownTLD, TIsKnownTLDKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TIsWellKnownTLD, bool(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ return TUnboxedValuePod(IsVeryGoodTld(args[0].AsStringRef()));
+}
+struct TIsWellKnownTLDKernelExec : public TUnaryKernelExec<TIsWellKnownTLDKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ sink(TBlockItem(static_cast<ui8>(IsVeryGoodTld(arg.AsStringRef()))));
+ }
+};
+END_SIMPLE_ARROW_UDF(TIsWellKnownTLD, TIsWellKnownTLDKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(THostNameToPunycode, TOptional<char*>(TAutoMap<char*>)) try {
+ const TUtf16String& input = UTF8ToWide(args[0].AsStringRef());
+ return valueBuilder->NewString(HostNameToPunycode(input));
+} catch (TPunycodeError&) {
+ return TUnboxedValue();
+}
+struct THostNameToPunycodeKernelExec : public TUnaryKernelExec<THostNameToPunycodeKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) try {
+ const TUtf16String& input = UTF8ToWide(arg.AsStringRef());
+ return sink(TBlockItem(TStringRef(HostNameToPunycode(input))));
+ } catch (TPunycodeError&) {
+ return sink(TBlockItem());
+ }
+};
+END_SIMPLE_ARROW_UDF(THostNameToPunycode, THostNameToPunycodeKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TForceHostNameToPunycode, char*(TAutoMap<char*>)) {
+ const TUtf16String& input = UTF8ToWide(args[0].AsStringRef());
+ return valueBuilder->NewString(ForceHostNameToPunycode(input));
+}
+struct TForceHostNameToPunycodeKernelExec : public TUnaryKernelExec<TForceHostNameToPunycodeKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ const TUtf16String& input = UTF8ToWide(arg.AsStringRef());
+ sink(TBlockItem(TStringRef(ForceHostNameToPunycode(input))));
+ }
+};
+END_SIMPLE_ARROW_UDF(TForceHostNameToPunycode, TForceHostNameToPunycodeKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TPunycodeToHostName, TOptional<char*>(TAutoMap<char*>)) try {
+ const TStringRef& input = args[0].AsStringRef();
+ const auto& result = WideToUTF8(PunycodeToHostName(input));
+ return valueBuilder->NewString(result);
+} catch (TPunycodeError&) {
+ return TUnboxedValue();
+}
+struct TPunycodeToHostNameKernelExec : public TUnaryKernelExec<TPunycodeToHostNameKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) try {
+ const TStringRef& input = arg.AsStringRef();
+ const auto& result = WideToUTF8(PunycodeToHostName(input));
+ return sink(TBlockItem(TStringRef(result)));
+ } catch (TPunycodeError&) {
+ return sink(TBlockItem());
+ }
+};
+END_SIMPLE_ARROW_UDF(TPunycodeToHostName, TPunycodeToHostNameKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TForcePunycodeToHostName, char*(TAutoMap<char*>)) {
+ const TStringRef& input = args[0].AsStringRef();
+ const auto& result = WideToUTF8(ForcePunycodeToHostName(input));
+ return valueBuilder->NewString(result);
+}
+struct TForcePunycodeToHostNameKernelExec : public TUnaryKernelExec<TForcePunycodeToHostNameKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ const TStringRef& input = arg.AsStringRef();
+ const auto& result = WideToUTF8(ForcePunycodeToHostName(input));
+ sink(TBlockItem(TStringRef(result)));
+ }
+};
+END_SIMPLE_ARROW_UDF(TForcePunycodeToHostName, TForcePunycodeToHostNameKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TCanBePunycodeHostName, bool(TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ return TUnboxedValuePod(CanBePunycodeHostName(args[0].AsStringRef()));
+}
+struct TCanBePunycodeHostNameKernelExec : public TUnaryKernelExec<TCanBePunycodeHostNameKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) {
+ sink(TBlockItem(static_cast<ui8>(CanBePunycodeHostName(arg.AsStringRef()))));
+ }
+};
+END_SIMPLE_ARROW_UDF(TCanBePunycodeHostName, TCanBePunycodeHostNameKernelExec::Do);
+
+#define EXPORTED_URL_BASE_UDF \
+ TNormalize, \
+ TParse, \
+ TGetScheme, \
+ TGetHost, \
+ TGetHostPort, \
+ TGetSchemeHost, \
+ TGetSchemeHostPort, \
+ TGetPort, \
+ TGetTail, \
+ TGetPath, \
+ TGetFragment, \
+ TGetDomain, \
+ TGetTLD, \
+ TGetDomainLevel, \
+ TGetSignificantDomain, \
+ TGetCGIParam, \
+ TCutScheme, \
+ TCutWWW, \
+ TCutWWW2, \
+ TCutQueryStringAndFragment, \
+ TEncode, \
+ TDecode, \
+ TIsKnownTLD, \
+ TIsWellKnownTLD, \
+ THostNameToPunycode, \
+ TForceHostNameToPunycode, \
+ TPunycodeToHostName, \
+ TForcePunycodeToHostName, \
+ TCanBePunycodeHostName, \
+ TQueryStringToList, \
+ TQueryStringToDict, \
+ TBuildQueryString
diff --git a/yql/essentials/udfs/common/url_base/lib/url_parse.cpp b/yql/essentials/udfs/common/url_base/lib/url_parse.cpp
new file mode 100644
index 00000000000..63015dadc67
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/lib/url_parse.cpp
@@ -0,0 +1,53 @@
+#include "url_parse.h"
+
+#define FIELD_ADD(name) structBuilder->AddField(#name, optionalStringType, &urlParseIndexes.name);
+#define FIELD_FILL(name) \
+ if (value.FldIsSet(TUri::Field##name)) { \
+ fields[UrlParseIndexes.name] = valueBuilder->NewString(value.GetField(TUri::Field##name)); \
+ }
+
+namespace NUrlUdf {
+ using namespace NUri;
+ using namespace NKikimr;
+ using namespace NUdf;
+
+ TUnboxedValue TParse::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const {
+ TUri value;
+ const auto ParseError = value.ParseAbs(args[0].AsStringRef(), ParseFlags);
+ TUnboxedValue* fields = nullptr;
+ const auto result = valueBuilder->NewArray(FieldsCount, fields);
+ if (ParseError == TUri::ParsedOK) {
+ FIELD_MAP(FIELD_FILL)
+ } else {
+ fields[UrlParseIndexes.ParseError] = valueBuilder->NewString(TStringBuilder() << ParseError);
+ }
+ return result;
+ }
+
+ bool TParse::DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ TUrlParseIndexes urlParseIndexes;
+
+ builder.Args(1)->Add<TAutoMap<char*>>();
+ const auto optionalStringType = builder.Optional()->Item<char*>().Build();
+ const auto structBuilder = builder.Struct(FieldsCount);
+ structBuilder->AddField("ParseError", optionalStringType, &urlParseIndexes.ParseError);
+ FIELD_MAP(FIELD_ADD)
+ builder.Returns(structBuilder->Build());
+
+ if (!typesOnly) {
+ builder.Implementation(new TParse(urlParseIndexes));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+}
diff --git a/yql/essentials/udfs/common/url_base/lib/url_parse.h b/yql/essentials/udfs/common/url_base/lib/url_parse.h
new file mode 100644
index 00000000000..b0a1679d1e9
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/lib/url_parse.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <library/cpp/uri/uri.h>
+
+#define FIELD_MAP(XX) \
+ XX(Scheme) \
+ XX(User) \
+ XX(Pass) \
+ XX(Host) \
+ XX(Port) \
+ XX(Path) \
+ XX(Query) \
+ XX(Frag)
+
+#define FIELD_INDEXES(name) ui32 name;
+
+namespace NUrlUdf {
+ using namespace NUri;
+ using namespace NKikimr;
+ using namespace NUdf;
+
+ struct TUrlParseIndexes {
+ ui32 ParseError;
+ FIELD_MAP(FIELD_INDEXES)
+ };
+
+ class TParse: public TBoxedValue {
+ public:
+ TParse(const TUrlParseIndexes& UrlParseIndexes)
+ : UrlParseIndexes(UrlParseIndexes)
+ , ParseFlags(TUri::FeaturesRecommended)
+ {
+ }
+
+ static const TStringRef& Name() {
+ static auto nameRef = TStringRef("Parse");
+ return nameRef;
+ }
+
+ private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override;
+
+ public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly);
+
+ private:
+ const TUrlParseIndexes UrlParseIndexes;
+ const NUri::TParseFlags ParseFlags;
+
+ static constexpr ui32 FieldsCount = sizeof(TUrlParseIndexes) / sizeof(ui32);
+ };
+}
diff --git a/yql/essentials/udfs/common/url_base/lib/url_query.cpp b/yql/essentials/udfs/common/url_base/lib/url_query.cpp
new file mode 100644
index 00000000000..f449be22681
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/lib/url_query.cpp
@@ -0,0 +1,243 @@
+#include "url_query.h"
+
+#include <yql/essentials/public/udf/udf_type_printer.h>
+
+#include <util/string/split.h>
+
+#include <library/cpp/string_utils/quote/quote.h>
+
+namespace NUrlUdf {
+ void TQueryStringParse::MakeSignature(IFunctionTypeInfoBuilder& builder,
+ const TType* retType)
+ {
+ builder.Returns(retType).OptionalArgs(4);
+ auto args = builder.Args();
+ args->Add<TAutoMap<TQueryStr>>();
+ args->Add<TKeepBlankValuesNArg>();
+ args->Add<TStrictNArg>();
+ args->Add<TMaxFieldsNArg>();
+ args->Add<TSeparatorNArg>().Done();
+ }
+
+ std::vector<std::pair<TString, TString>>
+ TQueryStringParse::RunImpl(const TUnboxedValuePod* args) const {
+ const std::string_view query(args[0].AsStringRef());
+ if (query.empty())
+ return {};
+ const bool keepBlankValues = args[1].GetOrDefault(false);
+ const bool strict = args[2].GetOrDefault(true);
+ const ui32 maxFieldCnt = args[3].GetOrDefault(Max<ui32>());
+ const std::string_view sep(args[4] ? args[4].AsStringRef() : "&");
+
+ std::vector<TStringBuf> parts;
+ StringSplitter(query).SplitByString(sep).Collect(&parts);
+ if (parts.size() > maxFieldCnt) {
+ UdfTerminate((TStringBuilder() << Pos_ << "Max number of fields (" << maxFieldCnt
+ << ") exceeded: got " << parts.size()).data());
+ }
+
+ std::vector<std::pair<TString, TString>> pairs;
+ for (const TStringBuf& part: parts) {
+ if (part.empty() && !strict) {
+ continue;
+ }
+ TVector<TString> nvPair = StringSplitter(part).Split('=').Limit(2);
+ if (nvPair.size() != 2) {
+ if (strict) {
+ UdfTerminate((TStringBuilder() << Pos_ << "Bad query field: \""
+ << nvPair[0] << "\"").data());
+ }
+ if (keepBlankValues) {
+ nvPair.emplace_back("");
+ } else {
+ continue;
+ }
+ }
+ if (!nvPair[1].empty() || keepBlankValues) {
+ CGIUnescape(nvPair[0]);
+ CGIUnescape(nvPair[1]);
+ pairs.emplace_back(nvPair[0], nvPair[1]);
+ }
+ }
+ return pairs;
+ }
+
+ bool TQueryStringToList::DeclareSignature(const TStringRef& name,
+ TType*,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ if (Name() == name) {
+ MakeSignature(builder, GetListType(builder));
+ if (!typesOnly) {
+ builder.Implementation(new TQueryStringToList(builder.GetSourcePosition()));
+ }
+ return true;
+ }
+ return false;
+ }
+
+ TUnboxedValue TQueryStringToList::Run(const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const {
+ const auto pairs = RunImpl(args);
+ std::vector<TUnboxedValue> ret;
+ for (const auto& nvPair : pairs) {
+ TUnboxedValue* pair = nullptr;
+ auto item = valueBuilder->NewArray(2U, pair);
+ pair[0] = valueBuilder->NewString(nvPair.first);
+ pair[1] = valueBuilder->NewString(nvPair.second);
+ ret.push_back(item);
+ }
+ return valueBuilder->NewList(ret.data(), ret.size());
+ }
+
+ bool TQueryStringToDict::DeclareSignature(const TStringRef& name,
+ TType*,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ if (Name() == name) {
+ auto dictType = GetDictType(builder);
+ MakeSignature(builder, dictType);
+ if (!typesOnly) {
+ builder.Implementation(new TQueryStringToDict(dictType,
+ builder.GetSourcePosition()));
+ }
+ return true;
+ }
+ return false;
+ }
+
+ TUnboxedValue TQueryStringToDict::Run(const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const {
+ const auto pairs = RunImpl(args);
+ auto ret = valueBuilder->NewDict(DictType_, TDictFlags::Hashed | TDictFlags::Multi);
+ for (const auto& nvPair : pairs) {
+ ret->Add(valueBuilder->NewString(nvPair.first),
+ valueBuilder->NewString(nvPair.second));
+ }
+ return ret->Build();
+ }
+
+ TUnboxedValue TBuildQueryString::Run(const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const {
+ const std::string_view sep(args[1] ? args[1].AsStringRef() : "&");
+ TStringBuilder ret;
+
+ switch(FirstArgTypeId_) {
+ case EFirstArgTypeId::Dict: {
+ TUnboxedValue key, value;
+ const auto dictIt = args[0].GetDictIterator();
+ ui64 wasItem = 0;
+ while (dictIt.NextPair(key, value)) {
+ TString keyEscaped = CGIEscapeRet(key.AsStringRef());
+ const auto listIt = value.GetListIterator();
+ TUnboxedValue item;
+ while (listIt.Next(item)) {
+ if (wasItem++)
+ ret << sep;
+ if (item) {
+ ret << keyEscaped << '=' << CGIEscapeRet(item.AsStringRef());
+ } else {
+ ret << keyEscaped << '=';
+ }
+ }
+ }
+ break;
+ }
+ case EFirstArgTypeId::FlattenDict: {
+ TUnboxedValue key, value;
+ const auto dictIt = args[0].GetDictIterator();
+ ui64 wasKey = 0;
+ while (dictIt.NextPair(key, value)) {
+ if (wasKey++)
+ ret << sep;
+ if (value) {
+ ret << CGIEscapeRet(key.AsStringRef()) << '='
+ << CGIEscapeRet(value.AsStringRef());
+ } else {
+ ret << CGIEscapeRet(key.AsStringRef()) << '=';
+ }
+ }
+ break;
+ }
+ case EFirstArgTypeId::List: {
+ ui64 wasItem = 0;
+ TUnboxedValue item;
+ const auto listIt = args[0].GetListIterator();
+ while (listIt.Next(item)) {
+ if (wasItem++)
+ ret << sep;
+ TUnboxedValue key = item.GetElement(0), val = item.GetElement(1);
+ if (val) {
+ ret << CGIEscapeRet(key.AsStringRef()) << '='
+ << CGIEscapeRet(val.AsStringRef());
+ } else {
+ ret << CGIEscapeRet(key.AsStringRef()) << '=';
+ }
+ }
+ break;
+ }
+ default:
+ Y_ABORT("Current first parameter type is not yet implemented");
+ }
+ return valueBuilder->NewString(ret);
+ }
+
+ bool TBuildQueryString::DeclareSignature(const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ if (Name() == name) {
+ if (!userType) {
+ builder.SetError("Missing user type");
+ return true;
+ }
+ builder.UserType(userType);
+ const auto typeHelper = builder.TypeInfoHelper();
+ const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType);
+ if (!userTypeInspector || !userTypeInspector.GetElementsCount()) {
+ builder.SetError("User type is not tuple");
+ return true;
+ }
+ const auto argsTypeInspector = TTupleTypeInspector(*typeHelper,
+ userTypeInspector.GetElementType(0));
+ if (!argsTypeInspector || !argsTypeInspector.GetElementsCount()) {
+ builder.SetError("Please provide at least one argument");
+ return true;
+ }
+ const auto firstArgType = argsTypeInspector.GetElementType(0);
+ EFirstArgTypeId firstArgTypeId = EFirstArgTypeId::None;
+
+ if (typeHelper->IsSameType(GetDictType(builder), firstArgType) ||
+ typeHelper->IsSameType(GetDictType(builder, true), firstArgType)) {
+ firstArgTypeId = EFirstArgTypeId::Dict;
+ } else if (typeHelper->IsSameType(GetListType(builder), firstArgType) ||
+ typeHelper->IsSameType(GetListType(builder, true), firstArgType) ||
+ typeHelper->GetTypeKind(firstArgType) == ETypeKind::EmptyList)
+ {
+ firstArgTypeId = EFirstArgTypeId::List;
+ } else if (typeHelper->IsSameType(GetFlattenDictType(builder), firstArgType) ||
+ typeHelper->IsSameType(GetFlattenDictType(builder, true), firstArgType) ||
+ typeHelper->GetTypeKind(firstArgType) == ETypeKind::EmptyDict)
+ {
+ firstArgTypeId = EFirstArgTypeId::FlattenDict;
+ }
+ if (firstArgTypeId != EFirstArgTypeId::None) {
+ builder.Returns<TQueryStr>().OptionalArgs(1);
+ auto args = builder.Args();
+ args->Add(firstArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap);
+ args->Add<TSeparatorNArg>().Done();
+ if (!typesOnly) {
+ builder.Implementation(new TBuildQueryString(builder.GetSourcePosition(),
+ firstArgTypeId));
+ }
+ } else {
+ TStringBuilder sb;
+ sb << "Unsupported first argument type: ";
+ TTypePrinter(*typeHelper, firstArgType).Out(sb.Out);
+ builder.SetError(sb);
+ }
+ return true;
+ }
+ return false;
+ }
+}
diff --git a/yql/essentials/udfs/common/url_base/lib/url_query.h b/yql/essentials/udfs/common/url_base/lib/url_query.h
new file mode 100644
index 00000000000..552b8527823
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/lib/url_query.h
@@ -0,0 +1,134 @@
+#pragma once
+
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+namespace NUrlUdf {
+ using namespace NYql::NUdf;
+
+ struct TQueryStringConv : public TBoxedValue {
+ protected:
+ static constexpr char Separator[] = "Separator";
+
+ using TQueryStr = char*;
+ using TSeparatorNArg = TNamedArg<TQueryStr, Separator>;
+
+ static inline TType* GetListType(const IFunctionTypeInfoBuilder& builder,
+ bool optional = false)
+ {
+ auto tupleType = optional ?
+ builder.Tuple()->Add<TQueryStr>().Add(builder.Optional()->Item<TQueryStr>().Build()).Build()
+ : builder.Tuple()->Add<TQueryStr>().Add<TQueryStr>().Build();
+ return builder.List()->Item(tupleType).Build();
+ }
+
+ static inline TType* GetDictType(const IFunctionTypeInfoBuilder& builder,
+ bool optional = false)
+ {
+ auto listType = optional ?
+ builder.List()->Item(builder.Optional()->Item<TQueryStr>().Build()).Build()
+ : builder.List()->Item<TQueryStr>().Build();
+ return builder.Dict()->Key<TQueryStr>().Value(listType).Build();
+ }
+
+ static inline TType* GetFlattenDictType(const IFunctionTypeInfoBuilder& builder,
+ bool optional = false)
+ {
+ return optional ?
+ builder.Dict()->Key<TQueryStr>().Value(builder.Optional()->Item<TQueryStr>().Build()).Build()
+ : builder.Dict()->Key<TQueryStr>().Value<TQueryStr>().Build();
+ }
+ };
+
+ struct TQueryStringParse: public TQueryStringConv {
+ explicit TQueryStringParse(TSourcePosition&& pos) : Pos_(std::move(pos)) {}
+
+ protected:
+ static constexpr char KeepBlankValues[] = "KeepBlankValues";
+ static constexpr char Strict[] = "Strict";
+ static constexpr char MaxFields[] = "MaxFields";
+
+ using TKeepBlankValuesNArg = TNamedArg<bool, KeepBlankValues>;
+ using TStrictNArg = TNamedArg<bool, Strict>;
+ using TMaxFieldsNArg = TNamedArg<ui32, MaxFields>;
+
+ static void MakeSignature(IFunctionTypeInfoBuilder& builder, const TType* retType);
+
+ std::vector<std::pair<TString, TString>>
+ RunImpl(const TUnboxedValuePod* args) const;
+
+ private:
+ TSourcePosition Pos_;
+ };
+
+ struct TQueryStringToList : public TQueryStringParse {
+ explicit TQueryStringToList(TSourcePosition&& pos)
+ : TQueryStringParse(std::forward<TSourcePosition>(pos)) {}
+
+ static const TStringRef& Name() {
+ static const auto name = TStringRef::Of("QueryStringToList");
+ return name;
+ }
+
+ static bool DeclareSignature(const TStringRef& name,
+ TType*,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly);
+
+ TUnboxedValue Run(const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override;
+ };
+
+ struct TQueryStringToDict : public TQueryStringParse {
+ explicit TQueryStringToDict(TType* dictType, TSourcePosition&& pos)
+ : TQueryStringParse(std::move(pos))
+ , DictType_(dictType)
+ {}
+
+ static const TStringRef& Name() {
+ static const auto name = TStringRef::Of("QueryStringToDict");
+ return name;
+ }
+
+ static bool DeclareSignature(const TStringRef& name,
+ TType*,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly);
+
+ TUnboxedValue Run(const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override;
+
+ private:
+ TType* DictType_;
+ };
+
+ class TBuildQueryString : public TQueryStringConv {
+ TSourcePosition Pos_;
+ enum class EFirstArgTypeId {
+ None,
+ Dict,
+ FlattenDict,
+ List,
+ } FirstArgTypeId_;
+
+ public:
+ typedef bool TTypeAwareMarker;
+
+ explicit TBuildQueryString(TSourcePosition&& pos, EFirstArgTypeId firstArgTypeId)
+ : Pos_(std::move(pos))
+ , FirstArgTypeId_(firstArgTypeId)
+ {}
+
+ static const TStringRef& Name() {
+ static const auto name = TStringRef::Of("BuildQueryString");
+ return name;
+ }
+
+ TUnboxedValue Run(const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override;
+
+ static bool DeclareSignature(const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly);
+ };
+}
diff --git a/yql/essentials/udfs/common/url_base/lib/ya.make b/yql/essentials/udfs/common/url_base/lib/ya.make
new file mode 100644
index 00000000000..1d9cfa12d01
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/lib/ya.make
@@ -0,0 +1,27 @@
+LIBRARY()
+
+YQL_ABI_VERSION(
+ 2
+ 37
+ 0
+)
+
+SRCS(
+ url_base_udf.cpp
+ url_parse.cpp
+ url_query.cpp
+)
+
+PEERDIR(
+ library/cpp/charset
+ library/cpp/string_utils/quote
+ library/cpp/string_utils/url
+ library/cpp/tld
+ library/cpp/unicode/punycode
+ library/cpp/uri
+ yql/essentials/public/udf
+ yql/essentials/public/udf/arrow
+ contrib/libs/apache/arrow
+)
+
+END()
diff --git a/yql/essentials/udfs/common/url_base/test/canondata/result.json b/yql/essentials/udfs/common/url_base/test/canondata/result.json
new file mode 100644
index 00000000000..98e905ecde1
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/canondata/result.json
@@ -0,0 +1,47 @@
+{
+ "test.test[BlockPunycode]": [
+ {
+ "uri": "file://test.test_BlockPunycode_/results.txt"
+ }
+ ],
+ "test.test[BlockTld]": [
+ {
+ "uri": "file://test.test_BlockTld_/results.txt"
+ }
+ ],
+ "test.test[BlockUrl]": [
+ {
+ "uri": "file://test.test_BlockUrl_/results.txt"
+ }
+ ],
+ "test.test[Punycode]": [
+ {
+ "uri": "file://test.test_Punycode_/results.txt"
+ }
+ ],
+ "test.test[Tld]": [
+ {
+ "uri": "file://test.test_Tld_/results.txt"
+ }
+ ],
+ "test.test[UrlQueryMaxFieldsErr]": [
+ {
+ "uri": "file://test.test_UrlQueryMaxFieldsErr_/extracted"
+ }
+ ],
+ "test.test[UrlQueryStrictErr]": [
+ {
+ "uri": "file://test.test_UrlQueryStrictErr_/extracted"
+ }
+ ],
+ "test.test[UrlQuery]": [
+ {
+ "uri": "file://test.test_UrlQuery_/results.txt"
+ }
+ ],
+ "test.test[Url]": [
+ {
+ "uri": "file://test.test_Url_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockPunycode_/results.txt b/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockPunycode_/results.txt
new file mode 100644
index 00000000000..69224c4ac6d
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockPunycode_/results.txt
@@ -0,0 +1,106 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hostname_utf";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "punycode_hostname";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "hostname_utf_forced";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "punycode_hostname_forced";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "can_be_punycode";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "ab\xC3\246cd\xC3\266ef";
+ #;
+ #;
+ "ab\xC3\246cd\xC3\266ef";
+ "xn--abcdef-qua4k";
+ %false
+ ];
+ [
+ "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83";
+ #;
+ #;
+ "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83";
+ "xn--d1acpjx3f.xn--p1ag";
+ %false
+ ];
+ [
+ "yandex.ru";
+ [
+ "yandex.ru"
+ ];
+ [
+ "yandex.ru"
+ ];
+ "yandex.ru";
+ "yandex.ru";
+ %false
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ag";
+ [
+ "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ag"
+ ];
+ "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83";
+ "xn--d1acpjx3f.xn--p1ag";
+ %true
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockTld_/results.txt b/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockTld_/results.txt
new file mode 100644
index 00000000000..f45bb011da2
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockTld_/results.txt
@@ -0,0 +1,59 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "tld";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "known";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "well_known";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "ru";
+ %true;
+ %true
+ ];
+ [
+ "123";
+ %false;
+ %false
+ ];
+ [
+ "yandex";
+ %true;
+ %false
+ ];
+ [
+ "sdfsdfsdf";
+ %false;
+ %false
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockUrl_/results.txt b/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockUrl_/results.txt
new file mode 100644
index 00000000000..7b5257063b6
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockUrl_/results.txt
@@ -0,0 +1,1212 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "encode";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "decode";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "param";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "cut_qs_and_fragment";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "host";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "cut_www";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "cut_www2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "tld";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "punycode";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "cut_scheme";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "host_port";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "scheme_host";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "scheme_host_port";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "tail";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "path";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "fragment";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "port";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "domain0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "domain1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "domain3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "domain_level";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "norm";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "http://";
+ [
+ "http://"
+ ];
+ [
+ "http://"
+ ];
+ #;
+ "http://";
+ #;
+ #;
+ #;
+ "";
+ [
+ "http://"
+ ];
+ #;
+ #;
+ [
+ "http://"
+ ];
+ [
+ "http://"
+ ];
+ [
+ "/"
+ ];
+ [
+ "/"
+ ];
+ #;
+ [
+ "80"
+ ];
+ #;
+ #;
+ #;
+ "1";
+ #
+ ];
+ [
+ "http://lenta.ru";
+ [
+ "http://lenta.ru"
+ ];
+ [
+ "http://lenta.ru"
+ ];
+ #;
+ "http://lenta.ru";
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ "ru";
+ [
+ "http://lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "http://lenta.ru"
+ ];
+ [
+ "http://lenta.ru"
+ ];
+ [
+ "/"
+ ];
+ [
+ "/"
+ ];
+ #;
+ [
+ "80"
+ ];
+ #;
+ [
+ "ru"
+ ];
+ #;
+ "2";
+ [
+ "http://lenta.ru/"
+ ]
+ ];
+ [
+ "http://someone.livejournal.com/blog";
+ [
+ "http://someone.livejournal.com/blog"
+ ];
+ [
+ "http://someone.livejournal.com/blog"
+ ];
+ #;
+ "http://someone.livejournal.com/blog";
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ "com";
+ [
+ "http://someone.livejournal.com/blog"
+ ];
+ [
+ "someone.livejournal.com/blog"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "http://someone.livejournal.com"
+ ];
+ [
+ "http://someone.livejournal.com"
+ ];
+ [
+ "/blog"
+ ];
+ [
+ "/blog"
+ ];
+ #;
+ [
+ "80"
+ ];
+ #;
+ [
+ "com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ "3";
+ [
+ "http://someone.livejournal.com/blog"
+ ]
+ ];
+ [
+ "http://bbc.co.uk/";
+ [
+ "http://bbc.co.uk/"
+ ];
+ [
+ "http://bbc.co.uk/"
+ ];
+ #;
+ "http://bbc.co.uk/";
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ "uk";
+ [
+ "http://bbc.co.uk/"
+ ];
+ [
+ "bbc.co.uk/"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "http://bbc.co.uk"
+ ];
+ [
+ "http://bbc.co.uk"
+ ];
+ [
+ "/"
+ ];
+ [
+ "/"
+ ];
+ #;
+ [
+ "80"
+ ];
+ #;
+ [
+ "uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ "3";
+ [
+ "http://bbc.co.uk/"
+ ]
+ ];
+ [
+ "https://www.yandex.com.tr/search";
+ [
+ "https://www.yandex.com.tr/search"
+ ];
+ [
+ "https://www.yandex.com.tr/search"
+ ];
+ #;
+ "https://www.yandex.com.tr/search";
+ [
+ "www.yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ "tr";
+ [
+ "https://www.yandex.com.tr/search"
+ ];
+ [
+ "www.yandex.com.tr/search"
+ ];
+ [
+ "www.yandex.com.tr"
+ ];
+ [
+ "https://www.yandex.com.tr"
+ ];
+ [
+ "https://www.yandex.com.tr"
+ ];
+ [
+ "/search"
+ ];
+ [
+ "/search"
+ ];
+ #;
+ [
+ "443"
+ ];
+ #;
+ [
+ "tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ "4";
+ [
+ "https://www.yandex.com.tr/search"
+ ]
+ ];
+ [
+ "https://www2.yandex.com.tr/search";
+ [
+ "https://www2.yandex.com.tr/search"
+ ];
+ [
+ "https://www2.yandex.com.tr/search"
+ ];
+ #;
+ "https://www2.yandex.com.tr/search";
+ [
+ "www2.yandex.com.tr"
+ ];
+ [
+ "www2.yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ "tr";
+ [
+ "https://www2.yandex.com.tr/search"
+ ];
+ [
+ "www2.yandex.com.tr/search"
+ ];
+ [
+ "www2.yandex.com.tr"
+ ];
+ [
+ "https://www2.yandex.com.tr"
+ ];
+ [
+ "https://www2.yandex.com.tr"
+ ];
+ [
+ "/search"
+ ];
+ [
+ "/search"
+ ];
+ #;
+ [
+ "443"
+ ];
+ #;
+ [
+ "tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ "4";
+ [
+ "https://www2.yandex.com.tr/search"
+ ]
+ ];
+ [
+ "lenta.ru";
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ #;
+ "lenta.ru";
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ "ru";
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "/"
+ ];
+ [
+ "/"
+ ];
+ #;
+ #;
+ #;
+ [
+ "ru"
+ ];
+ #;
+ "2";
+ #
+ ];
+ [
+ "bbc.co.uk/news";
+ [
+ "bbc.co.uk/news"
+ ];
+ [
+ "bbc.co.uk/news"
+ ];
+ #;
+ "bbc.co.uk/news";
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ "uk";
+ [
+ "bbc.co.uk/news"
+ ];
+ [
+ "bbc.co.uk/news"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "/news"
+ ];
+ [
+ "/news"
+ ];
+ #;
+ #;
+ #;
+ [
+ "uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ "3";
+ #
+ ];
+ [
+ "yandex.com.tr/maps?foo=";
+ [
+ "yandex.com.tr/maps?foo="
+ ];
+ [
+ "yandex.com.tr/maps?foo="
+ ];
+ [
+ ""
+ ];
+ "yandex.com.tr/maps";
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ "tr";
+ [
+ "yandex.com.tr/maps?foo="
+ ];
+ [
+ "yandex.com.tr/maps?foo="
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "/maps?foo="
+ ];
+ [
+ "/maps"
+ ];
+ #;
+ #;
+ #;
+ [
+ "tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ "3";
+ #
+ ];
+ [
+ "someone.livejournal.com?foo=bar#top";
+ [
+ "someone.livejournal.com?foo=bar#top"
+ ];
+ [
+ "someone.livejournal.com?foo=bar#top"
+ ];
+ [
+ "bar"
+ ];
+ "someone.livejournal.com";
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ "com";
+ [
+ "someone.livejournal.com?foo=bar#top"
+ ];
+ [
+ "someone.livejournal.com?foo=bar#top"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "/?foo=bar#top"
+ ];
+ [
+ "/"
+ ];
+ [
+ "top"
+ ];
+ #;
+ #;
+ [
+ "com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ "3";
+ #
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k#l.m.n";
+ [
+ "a.b.c.d.e.f.g.h.i.j.k#l.m.n"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k#l.m.n"
+ ];
+ #;
+ "a.b.c.d.e.f.g.h.i.j.k";
+ [
+ "a.b.c.d.e.f.g.h.i.j.k"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k"
+ ];
+ "k";
+ [
+ "a.b.c.d.e.f.g.h.i.j.k#l.m.n"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k#l.m.n"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k"
+ ];
+ [
+ "/#l.m.n"
+ ];
+ [
+ "/"
+ ];
+ [
+ "l.m.n"
+ ];
+ #;
+ #;
+ [
+ "k"
+ ];
+ [
+ "i.j.k"
+ ];
+ "11";
+ #
+ ];
+ [
+ "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4";
+ [
+ "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"
+ ];
+ [
+ "foo.tl.md/ \xD1\x8E\xD0\xBD\xD0\xB8\xD0\xBA\xD0\xBE\xD0\xB4"
+ ];
+ #;
+ "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4";
+ [
+ "foo.tl.md"
+ ];
+ [
+ "foo.tl.md"
+ ];
+ [
+ "foo.tl.md"
+ ];
+ "md";
+ [
+ "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"
+ ];
+ [
+ "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"
+ ];
+ [
+ "foo.tl.md"
+ ];
+ [
+ "foo.tl.md"
+ ];
+ [
+ "foo.tl.md"
+ ];
+ [
+ "/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"
+ ];
+ [
+ "/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"
+ ];
+ #;
+ #;
+ #;
+ [
+ "md"
+ ];
+ [
+ "foo.tl.md"
+ ];
+ "3";
+ #
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai";
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ #;
+ "xn--d1acpjx3f.xn--p1ai";
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ "xn--p1ai";
+ [
+ "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x84"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "/"
+ ];
+ [
+ "/"
+ ];
+ #;
+ #;
+ #;
+ [
+ "xn--p1ai"
+ ];
+ #;
+ "2";
+ #
+ ];
+ [
+ "https://ya.ru:80/search/?text=test&lr=213#top";
+ [
+ "https://ya.ru:80/search/?text=test&lr=213#top"
+ ];
+ [
+ "https://ya.ru:80/search/?text=test&lr=213#top"
+ ];
+ #;
+ "https://ya.ru:80/search/";
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ "ru";
+ [
+ "https://ya.ru:80/search/?text=test&lr=213#top"
+ ];
+ [
+ "ya.ru:80/search/?text=test&lr=213#top"
+ ];
+ [
+ "ya.ru:80"
+ ];
+ [
+ "https://ya.ru"
+ ];
+ [
+ "https://ya.ru:80"
+ ];
+ [
+ "/search/?text=test&lr=213#top"
+ ];
+ [
+ "/search/"
+ ];
+ [
+ "top"
+ ];
+ [
+ "80"
+ ];
+ #;
+ [
+ "ru"
+ ];
+ #;
+ "2";
+ [
+ "https://ya.ru:80/search/?text=test&lr=213"
+ ]
+ ];
+ [
+ "https://ya.ru/search/?text=%2B";
+ [
+ "https://ya.ru/search/?text=%2B"
+ ];
+ [
+ "https://ya.ru/search/?text=+"
+ ];
+ #;
+ "https://ya.ru/search/";
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ "ru";
+ [
+ "https://ya.ru/search/?text=%2B"
+ ];
+ [
+ "ya.ru/search/?text=%2B"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "https://ya.ru"
+ ];
+ [
+ "https://ya.ru"
+ ];
+ [
+ "/search/?text=%2B"
+ ];
+ [
+ "/search/"
+ ];
+ #;
+ [
+ "443"
+ ];
+ #;
+ [
+ "ru"
+ ];
+ #;
+ "2";
+ [
+ "https://ya.ru/search/?text=%2B"
+ ]
+ ];
+ [
+ "goal://market.yandex.ru/product-page_scroll-box_product_visible";
+ [
+ "goal://market.yandex.ru/product-page_scroll-box_product_visible"
+ ];
+ [
+ "goal://market.yandex.ru/product-page_scroll-box_product_visible"
+ ];
+ #;
+ "goal://market.yandex.ru/product-page_scroll-box_product_visible";
+ [
+ "market.yandex.ru"
+ ];
+ [
+ "market.yandex.ru"
+ ];
+ [
+ "market.yandex.ru"
+ ];
+ "ru";
+ [
+ "goal://market.yandex.ru/product-page_scroll-box_product_visible"
+ ];
+ [
+ "market.yandex.ru/product-page_scroll-box_product_visible"
+ ];
+ [
+ "market.yandex.ru"
+ ];
+ [
+ "goal://market.yandex.ru"
+ ];
+ [
+ "goal://market.yandex.ru"
+ ];
+ [
+ "/product-page_scroll-box_product_visible"
+ ];
+ [
+ "/product-page_scroll-box_product_visible"
+ ];
+ #;
+ #;
+ #;
+ [
+ "ru"
+ ];
+ [
+ "market.yandex.ru"
+ ];
+ "3";
+ #
+ ];
+ [
+ "Http://ya.ru";
+ [
+ "Http://ya.ru"
+ ];
+ [
+ "Http://ya.ru"
+ ];
+ #;
+ "Http://ya.ru";
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ "ru";
+ [
+ "Http://ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "Http://ya.ru"
+ ];
+ [
+ "Http://ya.ru"
+ ];
+ [
+ "/"
+ ];
+ [
+ "/"
+ ];
+ #;
+ [
+ "80"
+ ];
+ #;
+ [
+ "ru"
+ ];
+ #;
+ "2";
+ [
+ "http://ya.ru/"
+ ]
+ ];
+ [
+ "ftp://someone.livejournal.com:80/blog";
+ [
+ "ftp://someone.livejournal.com:80/blog"
+ ];
+ [
+ "ftp://someone.livejournal.com:80/blog"
+ ];
+ #;
+ "ftp://someone.livejournal.com:80/blog";
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ "com";
+ [
+ "ftp://someone.livejournal.com:80/blog"
+ ];
+ [
+ "someone.livejournal.com:80/blog"
+ ];
+ [
+ "someone.livejournal.com:80"
+ ];
+ [
+ "ftp://someone.livejournal.com"
+ ];
+ [
+ "ftp://someone.livejournal.com:80"
+ ];
+ [
+ "/blog"
+ ];
+ [
+ "/blog"
+ ];
+ #;
+ [
+ "80"
+ ];
+ #;
+ [
+ "com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ "3";
+ [
+ "ftp://someone.livejournal.com:80/blog"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_Punycode_/results.txt b/yql/essentials/udfs/common/url_base/test/canondata/test.test_Punycode_/results.txt
new file mode 100644
index 00000000000..69224c4ac6d
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_Punycode_/results.txt
@@ -0,0 +1,106 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hostname_utf";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "punycode_hostname";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "hostname_utf_forced";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "punycode_hostname_forced";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "can_be_punycode";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "ab\xC3\246cd\xC3\266ef";
+ #;
+ #;
+ "ab\xC3\246cd\xC3\266ef";
+ "xn--abcdef-qua4k";
+ %false
+ ];
+ [
+ "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83";
+ #;
+ #;
+ "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83";
+ "xn--d1acpjx3f.xn--p1ag";
+ %false
+ ];
+ [
+ "yandex.ru";
+ [
+ "yandex.ru"
+ ];
+ [
+ "yandex.ru"
+ ];
+ "yandex.ru";
+ "yandex.ru";
+ %false
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ag";
+ [
+ "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ag"
+ ];
+ "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83";
+ "xn--d1acpjx3f.xn--p1ag";
+ %true
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_Tld_/results.txt b/yql/essentials/udfs/common/url_base/test/canondata/test.test_Tld_/results.txt
new file mode 100644
index 00000000000..f45bb011da2
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_Tld_/results.txt
@@ -0,0 +1,59 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "tld";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "known";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "well_known";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "ru";
+ %true;
+ %true
+ ];
+ [
+ "123";
+ %false;
+ %false
+ ];
+ [
+ "yandex";
+ %true;
+ %false
+ ];
+ [
+ "sdfsdfsdf";
+ %false;
+ %false
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQueryMaxFieldsErr_/extracted b/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQueryMaxFieldsErr_/extracted
new file mode 100644
index 00000000000..e80a93b2997
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQueryMaxFieldsErr_/extracted
@@ -0,0 +1,8 @@
+<tmp_path>/program.sql:<main>: Fatal: Execution
+
+ <tmp_path>/program.sql:<main>:2:1: Fatal: Execution of node: Result
+ SELECT
+ ^
+ <tmp_path>/program.sql:<main>:4:10: Fatal: Max number of fields (2) exceeded: got 3
+ Url::QueryStringToList("glfilter=78318%3A79492&glfilter=561%3A121037&hid=904", 2 AS MaxFields)
+ ^ \ No newline at end of file
diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQueryStrictErr_/extracted b/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQueryStrictErr_/extracted
new file mode 100644
index 00000000000..6326332de1f
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQueryStrictErr_/extracted
@@ -0,0 +1,8 @@
+<tmp_path>/program.sql:<main>: Fatal: Execution
+
+ <tmp_path>/program.sql:<main>:2:1: Fatal: Execution of node: Result
+ SELECT
+ ^
+ <tmp_path>/program.sql:<main>:4:10: Fatal: Bad query field: "mistaken"
+ Url::QueryStringToList("glfilter=78318%3A79492&mistaken&glfilter=1&hid=904")
+ ^ \ No newline at end of file
diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQuery_/results.txt b/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQuery_/results.txt
new file mode 100644
index 00000000000..303f480161c
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQuery_/results.txt
@@ -0,0 +1,1112 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "base_list";
+ [
+ "ListType";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "base_list_build";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "keep_blank_list";
+ [
+ "ListType";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "keep_blank_list_build";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "sep_semicol_list";
+ [
+ "ListType";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "sep_semicol_list_build";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "base_dict";
+ [
+ "DictType";
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ];
+ [
+ "base_dict_build";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "keep_blank_dict";
+ [
+ "DictType";
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ];
+ [
+ "keep_blank_dict_build";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "sep_semicol_dict";
+ [
+ "DictType";
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ];
+ [
+ "sep_semicol_dict_build";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "glfilter=7893318%3A7949252&glfilter=5099461%3A12103637&glfilter=8292645%3A1&hid=90404&onstock=0&local-offers-first=0";
+ [
+ [
+ "glfilter";
+ "7893318:7949252"
+ ];
+ [
+ "glfilter";
+ "5099461:12103637"
+ ];
+ [
+ "glfilter";
+ "8292645:1"
+ ];
+ [
+ "hid";
+ "90404"
+ ];
+ [
+ "onstock";
+ "0"
+ ];
+ [
+ "local-offers-first";
+ "0"
+ ]
+ ];
+ "glfilter=7893318%3A7949252&glfilter=5099461%3A12103637&glfilter=8292645%3A1&hid=90404&onstock=0&local-offers-first=0";
+ [
+ [
+ "glfilter";
+ "7893318:7949252"
+ ];
+ [
+ "glfilter";
+ "5099461:12103637"
+ ];
+ [
+ "glfilter";
+ "8292645:1"
+ ];
+ [
+ "hid";
+ "90404"
+ ];
+ [
+ "onstock";
+ "0"
+ ];
+ [
+ "local-offers-first";
+ "0"
+ ]
+ ];
+ "glfilter=7893318%3A7949252&glfilter=5099461%3A12103637&glfilter=8292645%3A1&hid=90404&onstock=0&local-offers-first=0";
+ [
+ [
+ "glfilter";
+ "7893318:7949252&glfilter=5099461:12103637&glfilter=8292645:1&hid=90404&onstock=0&local-offers-first=0"
+ ]
+ ];
+ "glfilter=7893318%3A7949252%26glfilter%3D5099461%3A12103637%26glfilter%3D8292645%3A1%26hid%3D90404%26onstock%3D0%26local-offers-first%3D0";
+ [
+ [
+ "glfilter";
+ [
+ "7893318:7949252";
+ "5099461:12103637";
+ "8292645:1"
+ ]
+ ];
+ [
+ "hid";
+ [
+ "90404"
+ ]
+ ];
+ [
+ "onstock";
+ [
+ "0"
+ ]
+ ];
+ [
+ "local-offers-first";
+ [
+ "0"
+ ]
+ ]
+ ];
+ "local-offers-first=0&onstock=0&hid=90404&glfilter=7893318%3A7949252&glfilter=5099461%3A12103637&glfilter=8292645%3A1";
+ [
+ [
+ "glfilter";
+ [
+ "7893318:7949252";
+ "5099461:12103637";
+ "8292645:1"
+ ]
+ ];
+ [
+ "hid";
+ [
+ "90404"
+ ]
+ ];
+ [
+ "onstock";
+ [
+ "0"
+ ]
+ ];
+ [
+ "local-offers-first";
+ [
+ "0"
+ ]
+ ]
+ ];
+ "local-offers-first=0&onstock=0&hid=90404&glfilter=7893318%3A7949252&glfilter=5099461%3A12103637&glfilter=8292645%3A1";
+ [
+ [
+ "glfilter";
+ [
+ "7893318:7949252&glfilter=5099461:12103637&glfilter=8292645:1&hid=90404&onstock=0&local-offers-first=0"
+ ]
+ ]
+ ];
+ "glfilter=7893318%3A7949252%26glfilter%3D5099461%3A12103637%26glfilter%3D8292645%3A1%26hid%3D90404%26onstock%3D0%26local-offers-first%3D0"
+ ];
+ [
+ "";
+ [];
+ "";
+ [];
+ "";
+ [];
+ "";
+ [];
+ "";
+ [];
+ "";
+ [];
+ ""
+ ];
+ [
+ "&";
+ [];
+ "";
+ [];
+ "";
+ [];
+ "";
+ [];
+ "";
+ [];
+ "";
+ [];
+ ""
+ ];
+ [
+ "&&";
+ [];
+ "";
+ [];
+ "";
+ [];
+ "";
+ [];
+ "";
+ [];
+ "";
+ [];
+ ""
+ ];
+ [
+ "=";
+ [];
+ "";
+ [
+ [
+ "";
+ ""
+ ]
+ ];
+ "=";
+ [];
+ "";
+ [];
+ "";
+ [
+ [
+ "";
+ [
+ ""
+ ]
+ ]
+ ];
+ "=";
+ [];
+ ""
+ ];
+ [
+ "&=";
+ [];
+ "";
+ [
+ [
+ "";
+ ""
+ ]
+ ];
+ "=";
+ [];
+ "";
+ [];
+ "";
+ [
+ [
+ "";
+ [
+ ""
+ ]
+ ]
+ ];
+ "=";
+ [];
+ ""
+ ];
+ [
+ "=&";
+ [];
+ "";
+ [
+ [
+ "";
+ ""
+ ]
+ ];
+ "=";
+ [
+ [
+ "";
+ "&"
+ ]
+ ];
+ "=%26";
+ [];
+ "";
+ [
+ [
+ "";
+ [
+ ""
+ ]
+ ]
+ ];
+ "=";
+ [
+ [
+ "";
+ [
+ "&"
+ ]
+ ]
+ ];
+ "=%26"
+ ];
+ [
+ "&==";
+ [
+ [
+ "";
+ "="
+ ]
+ ];
+ "=%3D";
+ [
+ [
+ "";
+ "="
+ ]
+ ];
+ "=%3D";
+ [
+ [
+ "&";
+ "="
+ ]
+ ];
+ "%26=%3D";
+ [
+ [
+ "";
+ [
+ "="
+ ]
+ ]
+ ];
+ "=%3D";
+ [
+ [
+ "";
+ [
+ "="
+ ]
+ ]
+ ];
+ "=%3D";
+ [
+ [
+ "&";
+ [
+ "="
+ ]
+ ]
+ ];
+ "%26=%3D"
+ ];
+ [
+ "&==&";
+ [
+ [
+ "";
+ "="
+ ]
+ ];
+ "=%3D";
+ [
+ [
+ "";
+ "="
+ ]
+ ];
+ "=%3D";
+ [
+ [
+ "&";
+ "=&"
+ ]
+ ];
+ "%26=%3D%26";
+ [
+ [
+ "";
+ [
+ "="
+ ]
+ ]
+ ];
+ "=%3D";
+ [
+ [
+ "";
+ [
+ "="
+ ]
+ ]
+ ];
+ "=%3D";
+ [
+ [
+ "&";
+ [
+ "=&"
+ ]
+ ]
+ ];
+ "%26=%3D%26"
+ ];
+ [
+ "==&";
+ [
+ [
+ "";
+ "="
+ ]
+ ];
+ "=%3D";
+ [
+ [
+ "";
+ "="
+ ]
+ ];
+ "=%3D";
+ [
+ [
+ "";
+ "=&"
+ ]
+ ];
+ "=%3D%26";
+ [
+ [
+ "";
+ [
+ "="
+ ]
+ ]
+ ];
+ "=%3D";
+ [
+ [
+ "";
+ [
+ "="
+ ]
+ ]
+ ];
+ "=%3D";
+ [
+ [
+ "";
+ [
+ "=&"
+ ]
+ ]
+ ];
+ "=%3D%26"
+ ];
+ [
+ "=&=";
+ [];
+ "";
+ [
+ [
+ "";
+ ""
+ ];
+ [
+ "";
+ ""
+ ]
+ ];
+ "=&=";
+ [
+ [
+ "";
+ "&="
+ ]
+ ];
+ "=%26%3D";
+ [];
+ "";
+ [
+ [
+ "";
+ [
+ "";
+ ""
+ ]
+ ]
+ ];
+ "=&=";
+ [
+ [
+ "";
+ [
+ "&="
+ ]
+ ]
+ ];
+ "=%26%3D"
+ ];
+ [
+ "=a";
+ [
+ [
+ "";
+ "a"
+ ]
+ ];
+ "=a";
+ [
+ [
+ "";
+ "a"
+ ]
+ ];
+ "=a";
+ [
+ [
+ "";
+ "a"
+ ]
+ ];
+ "=a";
+ [
+ [
+ "";
+ [
+ "a"
+ ]
+ ]
+ ];
+ "=a";
+ [
+ [
+ "";
+ [
+ "a"
+ ]
+ ]
+ ];
+ "=a";
+ [
+ [
+ "";
+ [
+ "a"
+ ]
+ ]
+ ];
+ "=a"
+ ];
+ [
+ "a";
+ [];
+ "";
+ [
+ [
+ "a";
+ ""
+ ]
+ ];
+ "a=";
+ [];
+ "";
+ [];
+ "";
+ [
+ [
+ "a";
+ [
+ ""
+ ]
+ ]
+ ];
+ "a=";
+ [];
+ ""
+ ];
+ [
+ "a=";
+ [];
+ "";
+ [
+ [
+ "a";
+ ""
+ ]
+ ];
+ "a=";
+ [];
+ "";
+ [];
+ "";
+ [
+ [
+ "a";
+ [
+ ""
+ ]
+ ]
+ ];
+ "a=";
+ [];
+ ""
+ ];
+ [
+ "&a=b";
+ [
+ [
+ "a";
+ "b"
+ ]
+ ];
+ "a=b";
+ [
+ [
+ "a";
+ "b"
+ ]
+ ];
+ "a=b";
+ [
+ [
+ "&a";
+ "b"
+ ]
+ ];
+ "%26a=b";
+ [
+ [
+ "a";
+ [
+ "b"
+ ]
+ ]
+ ];
+ "a=b";
+ [
+ [
+ "a";
+ [
+ "b"
+ ]
+ ]
+ ];
+ "a=b";
+ [
+ [
+ "&a";
+ [
+ "b"
+ ]
+ ]
+ ];
+ "%26a=b"
+ ];
+ [
+ "a=a+b&b=b+c";
+ [
+ [
+ "a";
+ "a b"
+ ];
+ [
+ "b";
+ "b c"
+ ]
+ ];
+ "a=a+b&b=b+c";
+ [
+ [
+ "a";
+ "a b"
+ ];
+ [
+ "b";
+ "b c"
+ ]
+ ];
+ "a=a+b&b=b+c";
+ [
+ [
+ "a";
+ "a b&b=b c"
+ ]
+ ];
+ "a=a+b%26b%3Db+c";
+ [
+ [
+ "a";
+ [
+ "a b"
+ ]
+ ];
+ [
+ "b";
+ [
+ "b c"
+ ]
+ ]
+ ];
+ "b=b+c&a=a+b";
+ [
+ [
+ "a";
+ [
+ "a b"
+ ]
+ ];
+ [
+ "b";
+ [
+ "b c"
+ ]
+ ]
+ ];
+ "b=b+c&a=a+b";
+ [
+ [
+ "a";
+ [
+ "a b&b=b c"
+ ]
+ ]
+ ];
+ "a=a+b%26b%3Db+c"
+ ];
+ [
+ "a=a+b=&b=b+=c";
+ [
+ [
+ "a";
+ "a b="
+ ];
+ [
+ "b";
+ "b =c"
+ ]
+ ];
+ "a=a+b%3D&b=b+%3Dc";
+ [
+ [
+ "a";
+ "a b="
+ ];
+ [
+ "b";
+ "b =c"
+ ]
+ ];
+ "a=a+b%3D&b=b+%3Dc";
+ [
+ [
+ "a";
+ "a b=&b=b =c"
+ ]
+ ];
+ "a=a+b%3D%26b%3Db+%3Dc";
+ [
+ [
+ "a";
+ [
+ "a b="
+ ]
+ ];
+ [
+ "b";
+ [
+ "b =c"
+ ]
+ ]
+ ];
+ "b=b+%3Dc&a=a+b%3D";
+ [
+ [
+ "a";
+ [
+ "a b="
+ ]
+ ];
+ [
+ "b";
+ [
+ "b =c"
+ ]
+ ]
+ ];
+ "b=b+%3Dc&a=a+b%3D";
+ [
+ [
+ "a";
+ [
+ "a b=&b=b =c"
+ ]
+ ]
+ ];
+ "a=a+b%3D%26b%3Db+%3Dc"
+ ];
+ [
+ "a=1&a=2";
+ [
+ [
+ "a";
+ "1"
+ ];
+ [
+ "a";
+ "2"
+ ]
+ ];
+ "a=1&a=2";
+ [
+ [
+ "a";
+ "1"
+ ];
+ [
+ "a";
+ "2"
+ ]
+ ];
+ "a=1&a=2";
+ [
+ [
+ "a";
+ "1&a=2"
+ ]
+ ];
+ "a=1%26a%3D2";
+ [
+ [
+ "a";
+ [
+ "1";
+ "2"
+ ]
+ ]
+ ];
+ "a=1&a=2";
+ [
+ [
+ "a";
+ [
+ "1";
+ "2"
+ ]
+ ]
+ ];
+ "a=1&a=2";
+ [
+ [
+ "a";
+ [
+ "1&a=2"
+ ]
+ ]
+ ];
+ "a=1%26a%3D2"
+ ];
+ [
+ ";a=b";
+ [
+ [
+ ";a";
+ "b"
+ ]
+ ];
+ ";a=b";
+ [
+ [
+ ";a";
+ "b"
+ ]
+ ];
+ ";a=b";
+ [
+ [
+ "a";
+ "b"
+ ]
+ ];
+ "a=b";
+ [
+ [
+ ";a";
+ [
+ "b"
+ ]
+ ]
+ ];
+ ";a=b";
+ [
+ [
+ ";a";
+ [
+ "b"
+ ]
+ ]
+ ];
+ ";a=b";
+ [
+ [
+ "a";
+ [
+ "b"
+ ]
+ ]
+ ];
+ "a=b"
+ ];
+ [
+ "a=a+b;b=b+c";
+ [
+ [
+ "a";
+ "a b;b=b c"
+ ]
+ ];
+ "a=a+b;b%3Db+c";
+ [
+ [
+ "a";
+ "a b;b=b c"
+ ]
+ ];
+ "a=a+b;b%3Db+c";
+ [
+ [
+ "a";
+ "a b"
+ ];
+ [
+ "b";
+ "b c"
+ ]
+ ];
+ "a=a+b;b=b+c";
+ [
+ [
+ "a";
+ [
+ "a b;b=b c"
+ ]
+ ]
+ ];
+ "a=a+b;b%3Db+c";
+ [
+ [
+ "a";
+ [
+ "a b;b=b c"
+ ]
+ ]
+ ];
+ "a=a+b;b%3Db+c";
+ [
+ [
+ "a";
+ [
+ "a b"
+ ]
+ ];
+ [
+ "b";
+ [
+ "b c"
+ ]
+ ]
+ ];
+ "b=b+c;a=a+b"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_Url_/results.txt b/yql/essentials/udfs/common/url_base/test/canondata/test.test_Url_/results.txt
new file mode 100644
index 00000000000..09115e94005
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_Url_/results.txt
@@ -0,0 +1,1640 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "parse";
+ [
+ "StructType";
+ [
+ [
+ "Frag";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "Host";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "ParseError";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "Pass";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "Path";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "Port";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "Query";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "Scheme";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "User";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "significant_domain";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "tl_is_significant";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "tld";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "punycode";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "encode";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "decode";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "param";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "cut_qs_and_fragment";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cut_www";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "cut_www2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "host";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "host_port";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "scheme_host";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "scheme_host_port";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "cut_scheme";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "tail";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "path";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "fragment";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "port";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "domain0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "domain1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "domain3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "domain_level";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "norm";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "http://";
+ [
+ #;
+ #;
+ [
+ "ParsedBadFormat"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ #
+ ];
+ "";
+ "";
+ "";
+ [
+ "http://"
+ ];
+ [
+ "http://"
+ ];
+ [
+ "http://"
+ ];
+ #;
+ "http://";
+ #;
+ #;
+ #;
+ #;
+ [
+ "http://"
+ ];
+ [
+ "http://"
+ ];
+ #;
+ [
+ "/"
+ ];
+ [
+ "/"
+ ];
+ #;
+ [
+ "80"
+ ];
+ #;
+ #;
+ #;
+ "1";
+ #
+ ];
+ [
+ "http://lenta.ru";
+ [
+ #;
+ [
+ "lenta.ru"
+ ];
+ #;
+ #;
+ [
+ "/"
+ ];
+ #;
+ #;
+ [
+ "http"
+ ];
+ #
+ ];
+ "lenta.ru";
+ "lenta.ru";
+ "ru";
+ [
+ "http://lenta.ru"
+ ];
+ [
+ "http://lenta.ru"
+ ];
+ [
+ "http://lenta.ru"
+ ];
+ #;
+ "http://lenta.ru";
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "http://lenta.ru"
+ ];
+ [
+ "http://lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "/"
+ ];
+ [
+ "/"
+ ];
+ #;
+ [
+ "80"
+ ];
+ #;
+ [
+ "ru"
+ ];
+ #;
+ "2";
+ [
+ "http://lenta.ru/"
+ ]
+ ];
+ [
+ "http://someone.livejournal.com/blog";
+ [
+ #;
+ [
+ "someone.livejournal.com"
+ ];
+ #;
+ #;
+ [
+ "/blog"
+ ];
+ #;
+ #;
+ [
+ "http"
+ ];
+ #
+ ];
+ "livejournal.com";
+ "livejournal.com";
+ "com";
+ [
+ "http://someone.livejournal.com/blog"
+ ];
+ [
+ "http://someone.livejournal.com/blog"
+ ];
+ [
+ "http://someone.livejournal.com/blog"
+ ];
+ #;
+ "http://someone.livejournal.com/blog";
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "http://someone.livejournal.com"
+ ];
+ [
+ "http://someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com/blog"
+ ];
+ [
+ "/blog"
+ ];
+ [
+ "/blog"
+ ];
+ #;
+ [
+ "80"
+ ];
+ #;
+ [
+ "com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ "3";
+ [
+ "http://someone.livejournal.com/blog"
+ ]
+ ];
+ [
+ "http://bbc.co.uk/";
+ [
+ #;
+ [
+ "bbc.co.uk"
+ ];
+ #;
+ #;
+ [
+ "/"
+ ];
+ #;
+ #;
+ [
+ "http"
+ ];
+ #
+ ];
+ "bbc.co.uk";
+ "co.uk";
+ "uk";
+ [
+ "http://bbc.co.uk/"
+ ];
+ [
+ "http://bbc.co.uk/"
+ ];
+ [
+ "http://bbc.co.uk/"
+ ];
+ #;
+ "http://bbc.co.uk/";
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "http://bbc.co.uk"
+ ];
+ [
+ "http://bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk/"
+ ];
+ [
+ "/"
+ ];
+ [
+ "/"
+ ];
+ #;
+ [
+ "80"
+ ];
+ #;
+ [
+ "uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ "3";
+ [
+ "http://bbc.co.uk/"
+ ]
+ ];
+ [
+ "https://www.yandex.com.tr/search";
+ [
+ #;
+ [
+ "www.yandex.com.tr"
+ ];
+ #;
+ #;
+ [
+ "/search"
+ ];
+ #;
+ #;
+ [
+ "https"
+ ];
+ #
+ ];
+ "yandex.com.tr";
+ "com.tr";
+ "tr";
+ [
+ "https://www.yandex.com.tr/search"
+ ];
+ [
+ "https://www.yandex.com.tr/search"
+ ];
+ [
+ "https://www.yandex.com.tr/search"
+ ];
+ #;
+ "https://www.yandex.com.tr/search";
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "www.yandex.com.tr"
+ ];
+ [
+ "www.yandex.com.tr"
+ ];
+ [
+ "https://www.yandex.com.tr"
+ ];
+ [
+ "https://www.yandex.com.tr"
+ ];
+ [
+ "www.yandex.com.tr/search"
+ ];
+ [
+ "/search"
+ ];
+ [
+ "/search"
+ ];
+ #;
+ [
+ "443"
+ ];
+ #;
+ [
+ "tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ "4";
+ [
+ "https://www.yandex.com.tr/search"
+ ]
+ ];
+ [
+ "https://www2.yandex.com.tr/search";
+ [
+ #;
+ [
+ "www2.yandex.com.tr"
+ ];
+ #;
+ #;
+ [
+ "/search"
+ ];
+ #;
+ #;
+ [
+ "https"
+ ];
+ #
+ ];
+ "yandex.com.tr";
+ "com.tr";
+ "tr";
+ [
+ "https://www2.yandex.com.tr/search"
+ ];
+ [
+ "https://www2.yandex.com.tr/search"
+ ];
+ [
+ "https://www2.yandex.com.tr/search"
+ ];
+ #;
+ "https://www2.yandex.com.tr/search";
+ [
+ "www2.yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "www2.yandex.com.tr"
+ ];
+ [
+ "www2.yandex.com.tr"
+ ];
+ [
+ "https://www2.yandex.com.tr"
+ ];
+ [
+ "https://www2.yandex.com.tr"
+ ];
+ [
+ "www2.yandex.com.tr/search"
+ ];
+ [
+ "/search"
+ ];
+ [
+ "/search"
+ ];
+ #;
+ [
+ "443"
+ ];
+ #;
+ [
+ "tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ "4";
+ [
+ "https://www2.yandex.com.tr/search"
+ ]
+ ];
+ [
+ "lenta.ru";
+ [
+ #;
+ #;
+ [
+ "ParsedBadFormat"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ #
+ ];
+ "lenta.ru";
+ "lenta.ru";
+ "ru";
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ #;
+ "lenta.ru";
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "lenta.ru"
+ ];
+ [
+ "/"
+ ];
+ [
+ "/"
+ ];
+ #;
+ #;
+ #;
+ [
+ "ru"
+ ];
+ #;
+ "2";
+ #
+ ];
+ [
+ "bbc.co.uk/news";
+ [
+ #;
+ #;
+ [
+ "ParsedBadFormat"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ #
+ ];
+ "bbc.co.uk";
+ "co.uk";
+ "uk";
+ [
+ "bbc.co.uk/news"
+ ];
+ [
+ "bbc.co.uk/news"
+ ];
+ [
+ "bbc.co.uk/news"
+ ];
+ #;
+ "bbc.co.uk/news";
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ [
+ "bbc.co.uk/news"
+ ];
+ [
+ "/news"
+ ];
+ [
+ "/news"
+ ];
+ #;
+ #;
+ #;
+ [
+ "uk"
+ ];
+ [
+ "bbc.co.uk"
+ ];
+ "3";
+ #
+ ];
+ [
+ "yandex.com.tr/maps?foo=";
+ [
+ #;
+ #;
+ [
+ "ParsedBadFormat"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ #
+ ];
+ "yandex.com.tr";
+ "com.tr";
+ "tr";
+ [
+ "yandex.com.tr/maps?foo="
+ ];
+ [
+ "yandex.com.tr/maps?foo="
+ ];
+ [
+ "yandex.com.tr/maps?foo="
+ ];
+ [
+ ""
+ ];
+ "yandex.com.tr/maps";
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ [
+ "yandex.com.tr/maps?foo="
+ ];
+ [
+ "/maps?foo="
+ ];
+ [
+ "/maps"
+ ];
+ #;
+ #;
+ #;
+ [
+ "tr"
+ ];
+ [
+ "yandex.com.tr"
+ ];
+ "3";
+ #
+ ];
+ [
+ "someone.livejournal.com?foo=bar#top";
+ [
+ #;
+ #;
+ [
+ "ParsedBadFormat"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ #
+ ];
+ "livejournal.com";
+ "livejournal.com";
+ "com";
+ [
+ "someone.livejournal.com?foo=bar#top"
+ ];
+ [
+ "someone.livejournal.com?foo=bar#top"
+ ];
+ [
+ "someone.livejournal.com?foo=bar#top"
+ ];
+ [
+ "bar"
+ ];
+ "someone.livejournal.com";
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com?foo=bar#top"
+ ];
+ [
+ "/?foo=bar#top"
+ ];
+ [
+ "/"
+ ];
+ [
+ "top"
+ ];
+ #;
+ #;
+ [
+ "com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ "3";
+ #
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k#l.m.n";
+ [
+ #;
+ #;
+ [
+ "ParsedBadFormat"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ #
+ ];
+ "j.k";
+ "j.k";
+ "k";
+ [
+ "a.b.c.d.e.f.g.h.i.j.k#l.m.n"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k#l.m.n"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k#l.m.n"
+ ];
+ #;
+ "a.b.c.d.e.f.g.h.i.j.k";
+ [
+ "a.b.c.d.e.f.g.h.i.j.k"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k"
+ ];
+ [
+ "a.b.c.d.e.f.g.h.i.j.k#l.m.n"
+ ];
+ [
+ "/#l.m.n"
+ ];
+ [
+ "/"
+ ];
+ [
+ "l.m.n"
+ ];
+ #;
+ #;
+ [
+ "k"
+ ];
+ [
+ "i.j.k"
+ ];
+ "11";
+ #
+ ];
+ [
+ "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4";
+ [
+ #;
+ #;
+ [
+ "ParsedBadFormat"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ #
+ ];
+ "tl.md";
+ "foo.tl.md";
+ "md";
+ [
+ "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"
+ ];
+ [
+ "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"
+ ];
+ [
+ "foo.tl.md/ \xD1\x8E\xD0\xBD\xD0\xB8\xD0\xBA\xD0\xBE\xD0\xB4"
+ ];
+ #;
+ "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4";
+ [
+ "foo.tl.md"
+ ];
+ [
+ "foo.tl.md"
+ ];
+ [
+ "foo.tl.md"
+ ];
+ [
+ "foo.tl.md"
+ ];
+ [
+ "foo.tl.md"
+ ];
+ [
+ "foo.tl.md"
+ ];
+ [
+ "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"
+ ];
+ [
+ "/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"
+ ];
+ [
+ "/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"
+ ];
+ #;
+ #;
+ #;
+ [
+ "md"
+ ];
+ [
+ "foo.tl.md"
+ ];
+ "3";
+ #
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai";
+ [
+ #;
+ #;
+ [
+ "ParsedBadFormat"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ #
+ ];
+ "xn--d1acpjx3f.xn--p1ai";
+ "xn--d1acpjx3f.xn--p1ai";
+ "xn--p1ai";
+ [
+ "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x84"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ #;
+ "xn--d1acpjx3f.xn--p1ai";
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "xn--d1acpjx3f.xn--p1ai"
+ ];
+ [
+ "/"
+ ];
+ [
+ "/"
+ ];
+ #;
+ #;
+ #;
+ [
+ "xn--p1ai"
+ ];
+ #;
+ "2";
+ #
+ ];
+ [
+ "https://ya.ru:80/search/?text=test&lr=213#top";
+ [
+ [
+ "top"
+ ];
+ [
+ "ya.ru"
+ ];
+ #;
+ #;
+ [
+ "/search/"
+ ];
+ [
+ "80"
+ ];
+ [
+ "text=test&lr=213"
+ ];
+ [
+ "https"
+ ];
+ #
+ ];
+ "ya.ru";
+ "ya.ru";
+ "ru";
+ [
+ "https://ya.ru:80/search/?text=test&lr=213#top"
+ ];
+ [
+ "https://ya.ru:80/search/?text=test&lr=213#top"
+ ];
+ [
+ "https://ya.ru:80/search/?text=test&lr=213#top"
+ ];
+ #;
+ "https://ya.ru:80/search/";
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru:80"
+ ];
+ [
+ "https://ya.ru"
+ ];
+ [
+ "https://ya.ru:80"
+ ];
+ [
+ "ya.ru:80/search/?text=test&lr=213#top"
+ ];
+ [
+ "/search/?text=test&lr=213#top"
+ ];
+ [
+ "/search/"
+ ];
+ [
+ "top"
+ ];
+ [
+ "80"
+ ];
+ #;
+ [
+ "ru"
+ ];
+ #;
+ "2";
+ [
+ "https://ya.ru:80/search/?text=test&lr=213"
+ ]
+ ];
+ [
+ "https://ya.ru/search/?text=%2B";
+ [
+ #;
+ [
+ "ya.ru"
+ ];
+ #;
+ #;
+ [
+ "/search/"
+ ];
+ #;
+ [
+ "text=%2B"
+ ];
+ [
+ "https"
+ ];
+ #
+ ];
+ "ya.ru";
+ "ya.ru";
+ "ru";
+ [
+ "https://ya.ru/search/?text=%2B"
+ ];
+ [
+ "https://ya.ru/search/?text=%2B"
+ ];
+ [
+ "https://ya.ru/search/?text=+"
+ ];
+ #;
+ "https://ya.ru/search/";
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "https://ya.ru"
+ ];
+ [
+ "https://ya.ru"
+ ];
+ [
+ "ya.ru/search/?text=%2B"
+ ];
+ [
+ "/search/?text=%2B"
+ ];
+ [
+ "/search/"
+ ];
+ #;
+ [
+ "443"
+ ];
+ #;
+ [
+ "ru"
+ ];
+ #;
+ "2";
+ [
+ "https://ya.ru/search/?text=%2B"
+ ]
+ ];
+ [
+ "goal://market.yandex.ru/product-page_scroll-box_product_visible";
+ [
+ #;
+ #;
+ [
+ "ParsedBadScheme"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ #
+ ];
+ "yandex.ru";
+ "yandex.ru";
+ "ru";
+ [
+ "goal://market.yandex.ru/product-page_scroll-box_product_visible"
+ ];
+ [
+ "goal://market.yandex.ru/product-page_scroll-box_product_visible"
+ ];
+ [
+ "goal://market.yandex.ru/product-page_scroll-box_product_visible"
+ ];
+ #;
+ "goal://market.yandex.ru/product-page_scroll-box_product_visible";
+ [
+ "market.yandex.ru"
+ ];
+ [
+ "market.yandex.ru"
+ ];
+ [
+ "market.yandex.ru"
+ ];
+ [
+ "market.yandex.ru"
+ ];
+ [
+ "goal://market.yandex.ru"
+ ];
+ [
+ "goal://market.yandex.ru"
+ ];
+ [
+ "market.yandex.ru/product-page_scroll-box_product_visible"
+ ];
+ [
+ "/product-page_scroll-box_product_visible"
+ ];
+ [
+ "/product-page_scroll-box_product_visible"
+ ];
+ #;
+ #;
+ #;
+ [
+ "ru"
+ ];
+ [
+ "market.yandex.ru"
+ ];
+ "3";
+ #
+ ];
+ [
+ "Http://ya.ru";
+ [
+ #;
+ [
+ "ya.ru"
+ ];
+ #;
+ #;
+ [
+ "/"
+ ];
+ #;
+ #;
+ [
+ "http"
+ ];
+ #
+ ];
+ "ya.ru";
+ "ya.ru";
+ "ru";
+ [
+ "Http://ya.ru"
+ ];
+ [
+ "Http://ya.ru"
+ ];
+ [
+ "Http://ya.ru"
+ ];
+ #;
+ "Http://ya.ru";
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "Http://ya.ru"
+ ];
+ [
+ "Http://ya.ru"
+ ];
+ [
+ "ya.ru"
+ ];
+ [
+ "/"
+ ];
+ [
+ "/"
+ ];
+ #;
+ [
+ "80"
+ ];
+ #;
+ [
+ "ru"
+ ];
+ #;
+ "2";
+ [
+ "http://ya.ru/"
+ ]
+ ];
+ [
+ "ftp://someone.livejournal.com:80/blog";
+ [
+ #;
+ [
+ "someone.livejournal.com"
+ ];
+ #;
+ #;
+ [
+ "/blog"
+ ];
+ [
+ "80"
+ ];
+ #;
+ [
+ "ftp"
+ ];
+ #
+ ];
+ "livejournal.com";
+ "livejournal.com";
+ "com";
+ [
+ "ftp://someone.livejournal.com:80/blog"
+ ];
+ [
+ "ftp://someone.livejournal.com:80/blog"
+ ];
+ [
+ "ftp://someone.livejournal.com:80/blog"
+ ];
+ #;
+ "ftp://someone.livejournal.com:80/blog";
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ [
+ "someone.livejournal.com:80"
+ ];
+ [
+ "ftp://someone.livejournal.com"
+ ];
+ [
+ "ftp://someone.livejournal.com:80"
+ ];
+ [
+ "someone.livejournal.com:80/blog"
+ ];
+ [
+ "/blog"
+ ];
+ [
+ "/blog"
+ ];
+ #;
+ [
+ "80"
+ ];
+ #;
+ [
+ "com"
+ ];
+ [
+ "someone.livejournal.com"
+ ];
+ "3";
+ [
+ "ftp://someone.livejournal.com:80/blog"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/url_base/test/cases/BlockPunycode.in b/yql/essentials/udfs/common/url_base/test/cases/BlockPunycode.in
new file mode 100644
index 00000000000..50a35c68766
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/BlockPunycode.in
@@ -0,0 +1,4 @@
+{"key"="";"subkey"="";"value"="abæcdöef"};
+{"key"="";"subkey"="";"value"="яндекс.ру"};
+{"key"="";"subkey"="";"value"="yandex.ru"};
+{"key"="";"subkey"="";"value"="xn--d1acpjx3f.xn--p1ag"};
diff --git a/yql/essentials/udfs/common/url_base/test/cases/BlockPunycode.sql b/yql/essentials/udfs/common/url_base/test/cases/BlockPunycode.sql
new file mode 100644
index 00000000000..3dcee5ca489
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/BlockPunycode.sql
@@ -0,0 +1,10 @@
+/* syntax version 1 */
+PRAGMA UseBlocks;
+SELECT
+ value,
+ Url::PunycodeToHostName(value) AS hostname_utf,
+ Url::HostNameToPunycode(Url::PunycodeToHostName(value)) as punycode_hostname,
+ Url::ForcePunycodeToHostName(value) AS hostname_utf_forced,
+ Url::ForceHostNameToPunycode(Url::ForcePunycodeToHostName(value)) as punycode_hostname_forced,
+ Url::CanBePunycodeHostName(value) as can_be_punycode
+FROM Input;
diff --git a/yql/essentials/udfs/common/url_base/test/cases/BlockTld.in b/yql/essentials/udfs/common/url_base/test/cases/BlockTld.in
new file mode 100644
index 00000000000..d98c9c25223
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/BlockTld.in
@@ -0,0 +1,4 @@
+{"key"="";"subkey"="";"value"="ru"};
+{"key"="";"subkey"="";"value"="123"};
+{"key"="";"subkey"="";"value"="yandex"};
+{"key"="";"subkey"="";"value"="sdfsdfsdf"};
diff --git a/yql/essentials/udfs/common/url_base/test/cases/BlockTld.sql b/yql/essentials/udfs/common/url_base/test/cases/BlockTld.sql
new file mode 100644
index 00000000000..7af5d71b1cb
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/BlockTld.sql
@@ -0,0 +1,7 @@
+/* syntax version 1 */
+PRAGMA UseBlocks;
+SELECT
+ Url::GetTLD(value) AS tld,
+ Url::IsKnownTLD(value) AS known,
+ Url::IsWellKnownTLD(value) AS well_known
+FROM Input;
diff --git a/yql/essentials/udfs/common/url_base/test/cases/BlockUrl.in b/yql/essentials/udfs/common/url_base/test/cases/BlockUrl.in
new file mode 100644
index 00000000000..94f998d48f6
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/BlockUrl.in
@@ -0,0 +1,18 @@
+{"key"="";"subkey"="";"value"="http://"};
+{"key"="";"subkey"="";"value"="http://lenta.ru"};
+{"key"="";"subkey"="";"value"="http://someone.livejournal.com/blog"};
+{"key"="";"subkey"="";"value"="http://bbc.co.uk/"};
+{"key"="";"subkey"="";"value"="https://www.yandex.com.tr/search"};
+{"key"="";"subkey"="";"value"="https://www2.yandex.com.tr/search"};
+{"key"="";"subkey"="";"value"="lenta.ru"};
+{"key"="";"subkey"="";"value"="bbc.co.uk/news"};
+{"key"="";"subkey"="";"value"="yandex.com.tr/maps?foo="};
+{"key"="";"subkey"="";"value"="someone.livejournal.com?foo=bar#top"};
+{"key"="";"subkey"="";"value"="a.b.c.d.e.f.g.h.i.j.k#l.m.n"};
+{"key"="";"subkey"="";"value"="foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"};
+{"key"="";"subkey"="";"value"="xn--d1acpjx3f.xn--p1ai"};
+{"key"="";"subkey"="";"value"="https://ya.ru:80/search/?text=test&lr=213#top"};
+{"key"="";"subkey"="";"value"="https://ya.ru/search/?text=%2B"};
+{"key"="";"subkey"="";"value"="goal://market.yandex.ru/product-page_scroll-box_product_visible"};
+{"key"="";"subkey"="";"value"="Http://ya.ru"};
+{"key"="";"subkey"="";"value"="ftp://someone.livejournal.com:80/blog"};
diff --git a/yql/essentials/udfs/common/url_base/test/cases/BlockUrl.sql b/yql/essentials/udfs/common/url_base/test/cases/BlockUrl.sql
new file mode 100644
index 00000000000..9b1726c1dab
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/BlockUrl.sql
@@ -0,0 +1,27 @@
+/* syntax version 1 */
+PRAGMA UseBlocks;
+SELECT
+ value,
+ Url::Encode(value) AS encode,
+ Url::Decode(value) AS decode,
+ Url::GetCGIParam(value, "foo") AS param,
+ Url::CutQueryStringAndFragment(value) AS cut_qs_and_fragment,
+ Url::GetHost(value) as host,
+ Url::CutWWW(Url::GetHost(value)) AS cut_www,
+ Url::CutWWW2(Url::GetHost(value)) AS cut_www2,
+ Url::GetTLD(value) AS tld,
+ Url::PunycodeToHostName(value) AS punycode,
+ Url::CutScheme(value) AS cut_scheme,
+ Url::GetHostPort(value) as host_port,
+ Url::GetSchemeHost(value) AS scheme_host,
+ Url::GetSchemeHostPort(value) AS scheme_host_port,
+ Url::GetTail(value) AS tail,
+ Url::GetPath(value) AS path,
+ Url::GetFragment(value) AS fragment,
+ Url::GetPort(value) AS port,
+ Url::GetDomain(value, 0) as domain0,
+ Url::GetDomain(value, 1) as domain1,
+ Url::GetDomain(value, 3) as domain3,
+ Url::GetDomainLevel(value) as domain_level,
+ Url::Normalize(value) as norm
+FROM Input;
diff --git a/yql/essentials/udfs/common/url_base/test/cases/Punycode.in b/yql/essentials/udfs/common/url_base/test/cases/Punycode.in
new file mode 100644
index 00000000000..50a35c68766
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/Punycode.in
@@ -0,0 +1,4 @@
+{"key"="";"subkey"="";"value"="abæcdöef"};
+{"key"="";"subkey"="";"value"="яндекс.ру"};
+{"key"="";"subkey"="";"value"="yandex.ru"};
+{"key"="";"subkey"="";"value"="xn--d1acpjx3f.xn--p1ag"};
diff --git a/yql/essentials/udfs/common/url_base/test/cases/Punycode.sql b/yql/essentials/udfs/common/url_base/test/cases/Punycode.sql
new file mode 100644
index 00000000000..e5dad2e3037
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/Punycode.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+SELECT
+ value,
+ Url::PunycodeToHostName(value) AS hostname_utf,
+ Url::HostNameToPunycode(Url::PunycodeToHostName(value)) as punycode_hostname,
+ Url::ForcePunycodeToHostName(value) AS hostname_utf_forced,
+ Url::ForceHostNameToPunycode(Url::ForcePunycodeToHostName(value)) as punycode_hostname_forced,
+ Url::CanBePunycodeHostName(value) as can_be_punycode
+FROM Input;
diff --git a/yql/essentials/udfs/common/url_base/test/cases/Tld.in b/yql/essentials/udfs/common/url_base/test/cases/Tld.in
new file mode 100644
index 00000000000..d98c9c25223
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/Tld.in
@@ -0,0 +1,4 @@
+{"key"="";"subkey"="";"value"="ru"};
+{"key"="";"subkey"="";"value"="123"};
+{"key"="";"subkey"="";"value"="yandex"};
+{"key"="";"subkey"="";"value"="sdfsdfsdf"};
diff --git a/yql/essentials/udfs/common/url_base/test/cases/Tld.sql b/yql/essentials/udfs/common/url_base/test/cases/Tld.sql
new file mode 100644
index 00000000000..a26600cf639
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/Tld.sql
@@ -0,0 +1,6 @@
+/* syntax version 1 */
+SELECT
+ Url::GetTLD(value) AS tld,
+ Url::IsKnownTLD(value) AS known,
+ Url::IsWellKnownTLD(value) AS well_known
+FROM Input;
diff --git a/yql/essentials/udfs/common/url_base/test/cases/Url.in b/yql/essentials/udfs/common/url_base/test/cases/Url.in
new file mode 100644
index 00000000000..94f998d48f6
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/Url.in
@@ -0,0 +1,18 @@
+{"key"="";"subkey"="";"value"="http://"};
+{"key"="";"subkey"="";"value"="http://lenta.ru"};
+{"key"="";"subkey"="";"value"="http://someone.livejournal.com/blog"};
+{"key"="";"subkey"="";"value"="http://bbc.co.uk/"};
+{"key"="";"subkey"="";"value"="https://www.yandex.com.tr/search"};
+{"key"="";"subkey"="";"value"="https://www2.yandex.com.tr/search"};
+{"key"="";"subkey"="";"value"="lenta.ru"};
+{"key"="";"subkey"="";"value"="bbc.co.uk/news"};
+{"key"="";"subkey"="";"value"="yandex.com.tr/maps?foo="};
+{"key"="";"subkey"="";"value"="someone.livejournal.com?foo=bar#top"};
+{"key"="";"subkey"="";"value"="a.b.c.d.e.f.g.h.i.j.k#l.m.n"};
+{"key"="";"subkey"="";"value"="foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"};
+{"key"="";"subkey"="";"value"="xn--d1acpjx3f.xn--p1ai"};
+{"key"="";"subkey"="";"value"="https://ya.ru:80/search/?text=test&lr=213#top"};
+{"key"="";"subkey"="";"value"="https://ya.ru/search/?text=%2B"};
+{"key"="";"subkey"="";"value"="goal://market.yandex.ru/product-page_scroll-box_product_visible"};
+{"key"="";"subkey"="";"value"="Http://ya.ru"};
+{"key"="";"subkey"="";"value"="ftp://someone.livejournal.com:80/blog"};
diff --git a/yql/essentials/udfs/common/url_base/test/cases/Url.sql b/yql/essentials/udfs/common/url_base/test/cases/Url.sql
new file mode 100644
index 00000000000..ed50d29ae26
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/Url.sql
@@ -0,0 +1,29 @@
+/* syntax version 1 */
+SELECT
+ value,
+ Url::Parse(value) AS parse,
+ Url::GetSignificantDomain(value) AS significant_domain,
+ Url::GetSignificantDomain(value, AsList("tl")) AS tl_is_significant,
+ Url::GetTLD(value) AS tld,
+ Url::PunycodeToHostName(value) AS punycode,
+ Url::Encode(value) AS encode,
+ Url::Decode(value) AS decode,
+ Url::GetCGIParam(value, "foo") AS param,
+ Url::CutQueryStringAndFragment(value) AS cut_qs_and_fragment,
+ Url::CutWWW(Url::GetHost(value)) AS cut_www,
+ Url::CutWWW2(Url::GetHost(value)) AS cut_www2,
+ Url::GetHost(value) as host,
+ Url::GetHostPort(value) as host_port,
+ Url::GetSchemeHost(value) AS scheme_host,
+ Url::GetSchemeHostPort(value) AS scheme_host_port,
+ Url::CutScheme(value) AS cut_scheme,
+ Url::GetTail(value) AS tail,
+ Url::GetPath(value) AS path,
+ Url::GetFragment(value) AS fragment,
+ Url::GetPort(value) AS port,
+ Url::GetDomain(value, 0) as domain0,
+ Url::GetDomain(value, 1) as domain1,
+ Url::GetDomain(value, 3) as domain3,
+ Url::GetDomainLevel(value) as domain_level,
+ Url::Normalize(value) as norm
+FROM Input;
diff --git a/yql/essentials/udfs/common/url_base/test/cases/UrlQuery.in b/yql/essentials/udfs/common/url_base/test/cases/UrlQuery.in
new file mode 100644
index 00000000000..d9f5d84527c
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/UrlQuery.in
@@ -0,0 +1,20 @@
+{"key"="";"subkey"="";"value"="glfilter=7893318%3A7949252&glfilter=5099461%3A12103637&glfilter=8292645%3A1&hid=90404&onstock=0&local-offers-first=0"};
+{"key"="";"subkey"="";"value"=""};
+{"key"="";"subkey"="";"value"="&"};
+{"key"="";"subkey"="";"value"="&&"};
+{"key"="";"subkey"="";"value"="="};
+{"key"="";"subkey"="";"value"="&="};
+{"key"="";"subkey"="";"value"="=&"};
+{"key"="";"subkey"="";"value"="&=="};
+{"key"="";"subkey"="";"value"="&==&"};
+{"key"="";"subkey"="";"value"="==&"};
+{"key"="";"subkey"="";"value"="=&="};
+{"key"="";"subkey"="";"value"="=a"};
+{"key"="";"subkey"="";"value"="a"};
+{"key"="";"subkey"="";"value"="a="};
+{"key"="";"subkey"="";"value"="&a=b"};
+{"key"="";"subkey"="";"value"="a=a+b&b=b+c"};
+{"key"="";"subkey"="";"value"="a=a+b=&b=b+=c"};
+{"key"="";"subkey"="";"value"="a=1&a=2"};
+{"key"="";"subkey"="";"value"=";a=b"};
+{"key"="";"subkey"="";"value"="a=a+b;b=b+c"};
diff --git a/yql/essentials/udfs/common/url_base/test/cases/UrlQuery.sql b/yql/essentials/udfs/common/url_base/test/cases/UrlQuery.sql
new file mode 100644
index 00000000000..4179fd0a0c8
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/UrlQuery.sql
@@ -0,0 +1,15 @@
+SELECT
+ value,
+ Url::QueryStringToList(value, false AS Strict) AS base_list,
+ Url::BuildQueryString(Url::QueryStringToList(value, false AS Strict)) AS base_list_build,
+ Url::QueryStringToList(value, true AS KeepBlankValues, false AS Strict) AS keep_blank_list,
+ Url::BuildQueryString(Url::QueryStringToList(value, true AS KeepBlankValues, false AS Strict)) AS keep_blank_list_build,
+ Url::QueryStringToList(value, ";" AS Separator, false AS Strict) AS sep_semicol_list,
+ Url::BuildQueryString(Url::QueryStringToList(value, ";" AS Separator, false AS Strict), ";" AS Separator) AS sep_semicol_list_build,
+ Url::QueryStringToDict(value, false AS Strict) AS base_dict,
+ Url::BuildQueryString(Url::QueryStringToDict(value, false AS Strict)) AS base_dict_build,
+ Url::QueryStringToDict(value, true AS KeepBlankValues, false AS Strict) AS keep_blank_dict,
+ Url::BuildQueryString(Url::QueryStringToDict(value, true AS KeepBlankValues, false AS Strict)) AS keep_blank_dict_build,
+ Url::QueryStringToDict(value, ";" AS Separator, false AS Strict) AS sep_semicol_dict,
+ Url::BuildQueryString(Url::QueryStringToDict(value, ";" AS Separator, false AS Strict), ";" AS Separator) AS sep_semicol_dict_build,
+FROM Input;
diff --git a/yql/essentials/udfs/common/url_base/test/cases/UrlQueryMaxFieldsErr.cfg b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryMaxFieldsErr.cfg
new file mode 100644
index 00000000000..5dae597903c
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryMaxFieldsErr.cfg
@@ -0,0 +1 @@
+xfail
diff --git a/yql/essentials/udfs/common/url_base/test/cases/UrlQueryMaxFieldsErr.sql b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryMaxFieldsErr.sql
new file mode 100644
index 00000000000..5cb0cbfc9dc
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryMaxFieldsErr.sql
@@ -0,0 +1,2 @@
+SELECT
+ Url::QueryStringToList("glfilter=78318%3A79492&glfilter=561%3A121037&hid=904", 2 AS MaxFields)
diff --git a/yql/essentials/udfs/common/url_base/test/cases/UrlQueryStrictErr.cfg b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryStrictErr.cfg
new file mode 100644
index 00000000000..5dae597903c
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryStrictErr.cfg
@@ -0,0 +1 @@
+xfail
diff --git a/yql/essentials/udfs/common/url_base/test/cases/UrlQueryStrictErr.sql b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryStrictErr.sql
new file mode 100644
index 00000000000..b036ed5451d
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryStrictErr.sql
@@ -0,0 +1,2 @@
+SELECT
+ Url::QueryStringToList("glfilter=78318%3A79492&mistaken&glfilter=1&hid=904")
diff --git a/yql/essentials/udfs/common/url_base/test/ya.make b/yql/essentials/udfs/common/url_base/test/ya.make
new file mode 100644
index 00000000000..b6abb70add3
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/test/ya.make
@@ -0,0 +1,17 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/url_base)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+DATA(
+ sbr://451427803 # Robots.in
+)
+
+END()
diff --git a/yql/essentials/udfs/common/url_base/url_base.cpp b/yql/essentials/udfs/common/url_base/url_base.cpp
new file mode 100644
index 00000000000..628abe7a301
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/url_base.cpp
@@ -0,0 +1,7 @@
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+#include "lib/url_base_udf.h"
+
+SIMPLE_MODULE(TUrlModule, EXPORTED_URL_BASE_UDF)
+REGISTER_MODULES(TUrlModule)
+
diff --git a/yql/essentials/udfs/common/url_base/ya.make b/yql/essentials/udfs/common/url_base/ya.make
new file mode 100644
index 00000000000..ce981f1f2ac
--- /dev/null
+++ b/yql/essentials/udfs/common/url_base/ya.make
@@ -0,0 +1,32 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+ FROM_SANDBOX(FILE 7319908145 OUT_NOAUTO liburl_udf.so
+ )
+ END()
+ELSE ()
+YQL_UDF_CONTRIB(url_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 37
+ 0
+ )
+
+ SRCS(
+ url_base.cpp
+ )
+
+ PEERDIR(
+ yql/essentials/public/udf
+ yql/essentials/udfs/common/url_base/lib
+ )
+
+ END()
+ENDIF ()
+
+
+RECURSE_FOR_TESTS(
+ test
+)
+
+
diff --git a/yql/essentials/udfs/common/ya.make b/yql/essentials/udfs/common/ya.make
new file mode 100644
index 00000000000..29266857edf
--- /dev/null
+++ b/yql/essentials/udfs/common/ya.make
@@ -0,0 +1,31 @@
+RECURSE(
+ compress_base
+ datetime2
+ digest
+ file
+ histogram
+ hyperloglog
+ ip_base
+ json
+ json2
+ math
+ pire
+ protobuf
+ python
+ re2
+ set
+ stat
+ streaming
+ string
+ top
+ topfreq
+ unicode_base
+ url_base
+ yson2
+)
+
+IF (ARCH_X86_64)
+ RECURSE(
+ hyperscan
+ )
+ENDIF()
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/result.json b/yql/essentials/udfs/common/yson2/test/canondata/result.json
new file mode 100644
index 00000000000..e8db385e454
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/result.json
@@ -0,0 +1,172 @@
+{
+ "test.test[AccessJson]": [
+ {
+ "uri": "file://test.test_AccessJson_/results.txt"
+ }
+ ],
+ "test.test[Access]": [
+ {
+ "uri": "file://test.test_Access_/results.txt"
+ }
+ ],
+ "test.test[Attrs]": [
+ {
+ "uri": "file://test.test_Attrs_/results.txt"
+ }
+ ],
+ "test.test[AutoConvertTo]": [
+ {
+ "uri": "file://test.test_AutoConvertTo_/results.txt"
+ }
+ ],
+ "test.test[Contains]": [
+ {
+ "uri": "file://test.test_Contains_/results.txt"
+ }
+ ],
+ "test.test[ConvertTo]": [
+ {
+ "uri": "file://test.test_ConvertTo_/results.txt"
+ }
+ ],
+ "test.test[Dicts]": [
+ {
+ "uri": "file://test.test_Dicts_/results.txt"
+ }
+ ],
+ "test.test[EmptyDicts]": [
+ {
+ "uri": "file://test.test_EmptyDicts_/results.txt"
+ }
+ ],
+ "test.test[EmptyLists]": [
+ {
+ "uri": "file://test.test_EmptyLists_/results.txt"
+ }
+ ],
+ "test.test[Equals]": [
+ {
+ "uri": "file://test.test_Equals_/results.txt"
+ }
+ ],
+ "test.test[From]": [
+ {
+ "uri": "file://test.test_From_/results.txt"
+ }
+ ],
+ "test.test[GenericConvertToEmptyStruct]": [
+ {
+ "uri": "file://test.test_GenericConvertToEmptyStruct_/results.txt"
+ }
+ ],
+ "test.test[GenericConvertToWithAutoConvert]": [
+ {
+ "uri": "file://test.test_GenericConvertToWithAutoConvert_/results.txt"
+ }
+ ],
+ "test.test[GenericConvertToWithNoStrict]": [
+ {
+ "uri": "file://test.test_GenericConvertToWithNoStrict_/results.txt"
+ }
+ ],
+ "test.test[GenericConvertTo]": [
+ {
+ "uri": "file://test.test_GenericConvertTo_/results.txt"
+ }
+ ],
+ "test.test[GenericFrom]": [
+ {
+ "uri": "file://test.test_GenericFrom_/results.txt"
+ }
+ ],
+ "test.test[GetHash]": [
+ {
+ "uri": "file://test.test_GetHash_/results.txt"
+ }
+ ],
+ "test.test[Get]": [
+ {
+ "uri": "file://test.test_Get_/results.txt"
+ }
+ ],
+ "test.test[GoodForYsonBadForJson]": [
+ {
+ "uri": "file://test.test_GoodForYsonBadForJson_/results.txt"
+ }
+ ],
+ "test.test[ImplicitFromRes]": [
+ {
+ "uri": "file://test.test_ImplicitFromRes_/results.txt"
+ }
+ ],
+ "test.test[IsType]": [
+ {
+ "uri": "file://test.test_IsType_/results.txt"
+ }
+ ],
+ "test.test[JsonSerializeSkipMapEntity]": [
+ {
+ "uri": "file://test.test_JsonSerializeSkipMapEntity_/results.txt"
+ }
+ ],
+ "test.test[JsonWithUtf8]": [
+ {
+ "uri": "file://test.test_JsonWithUtf8_/results.txt"
+ }
+ ],
+ "test.test[JsonWithNanAsString]": [
+ {
+ "uri": "file://test.test_JsonWithNanAsString_/results.txt"
+ }
+ ],
+ "test.test[Lists]": [
+ {
+ "uri": "file://test.test_Lists_/results.txt"
+ }
+ ],
+ "test.test[Lookup]": [
+ {
+ "uri": "file://test.test_Lookup_/results.txt"
+ }
+ ],
+ "test.test[NegativeArrayIndex]": [
+ {
+ "uri": "file://test.test_NegativeArrayIndex_/results.txt"
+ }
+ ],
+ "test.test[ParseString]": [
+ {
+ "uri": "file://test.test_ParseString_/results.txt"
+ }
+ ],
+ "test.test[Scalars]": [
+ {
+ "uri": "file://test.test_Scalars_/results.txt"
+ }
+ ],
+ "test.test[SerializeDouble]": [
+ {
+ "uri": "file://test.test_SerializeDouble_/results.txt"
+ }
+ ],
+ "test.test[Serialize]": [
+ {
+ "uri": "file://test.test_Serialize_/results.txt"
+ }
+ ],
+ "test.test[WeakYsonRest]": [
+ {
+ "uri": "file://test.test_WeakYsonRest_/results.txt"
+ }
+ ],
+ "test.test[WithAttrs]": [
+ {
+ "uri": "file://test.test_WithAttrs_/results.txt"
+ }
+ ],
+ "test.test[YPath]": [
+ {
+ "uri": "file://test.test_YPath_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_AccessJson_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_AccessJson_/results.txt
new file mode 100644
index 00000000000..b3f28755183
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_AccessJson_/results.txt
@@ -0,0 +1,126 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82"
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Access_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Access_/results.txt
new file mode 100644
index 00000000000..c9b9064518f
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Access_/results.txt
@@ -0,0 +1,33 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "text";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "hello world"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Attrs_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Attrs_/results.txt
new file mode 100644
index 00000000000..72c297fab48
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Attrs_/results.txt
@@ -0,0 +1,568 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column8";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column9";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column10";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column11";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column12";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column13";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column14";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column15";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column16";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column17";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column18";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column19";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column20";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column21";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column22";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column23";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column24";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column25";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column26";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column27";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column28";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column29";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column30";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column31";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column32";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column33";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column34";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column35";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ];
+ [
+ "column36";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column37";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column38";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ];
+ [
+ "column39";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column40";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ];
+ [
+ "column41";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column42";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ [
+ %true
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ #;
+ #;
+ [
+ "1"
+ ];
+ [
+ "2"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ [
+ "1"
+ ];
+ [
+ "2"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ [
+ "1"
+ ];
+ [
+ "2"
+ ];
+ [
+ "3"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ #;
+ #;
+ [
+ "foo"
+ ];
+ [
+ "very loooooooooooooooooong string"
+ ];
+ [
+ [
+ "1"
+ ];
+ [
+ "2"
+ ];
+ [
+ "3"
+ ]
+ ];
+ [
+ "b";
+ "c"
+ ];
+ [
+ "a";
+ "b"
+ ];
+ [
+ [
+ "1"
+ ];
+ [
+ "2"
+ ]
+ ];
+ [
+ "a";
+ "b"
+ ];
+ [
+ [
+ "1"
+ ];
+ [
+ "2"
+ ]
+ ];
+ [
+ "a";
+ "b"
+ ];
+ [
+ [
+ "1"
+ ];
+ [
+ "2"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_AutoConvertTo_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_AutoConvertTo_/results.txt
new file mode 100644
index 00000000000..034a71cae2c
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_AutoConvertTo_/results.txt
@@ -0,0 +1,556 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "zero_to_bool";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bool_to_bool";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "int_to_bool";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "uint_to_bool";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "negative_to_bool";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "double_to_bool";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "string_to_bool";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "zero_to_int";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "bool_to_int";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "int_to_int";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "uint_to_int";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "negative_to_int";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "double_to_int";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "string_to_int";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "zero_to_uint";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "bool_to_uint";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "int_to_uint";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "uint_to_uint";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "negative_to_uint";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "double_to_uint";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "string_to_uint";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "zero_to_double";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "bool_to_double";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "int_to_double";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "uint_to_double";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "negative_to_double";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "double_to_double";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "string_to_double";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "zero_to_string";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "bool_to_string";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "int_to_string";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "uint_to_string";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "negative_to_string";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "double_to_string";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "string_to_string";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "struct_stub";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "x";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "y";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "z";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "tuple_stub";
+ [
+ "OptionalType";
+ [
+ "TupleType";
+ [
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "DataType";
+ "Int64"
+ ];
+ [
+ "ListType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ %false
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ "0"
+ ];
+ [
+ "1"
+ ];
+ [
+ "123"
+ ];
+ [
+ "123"
+ ];
+ [
+ "-123"
+ ];
+ [
+ "123"
+ ];
+ [
+ "123"
+ ];
+ [
+ "0"
+ ];
+ [
+ "1"
+ ];
+ [
+ "123"
+ ];
+ [
+ "123"
+ ];
+ [
+ "18446744073709551493"
+ ];
+ [
+ "123"
+ ];
+ [
+ "123"
+ ];
+ [
+ "0"
+ ];
+ [
+ "1"
+ ];
+ [
+ "123"
+ ];
+ [
+ "123"
+ ];
+ [
+ "-123"
+ ];
+ [
+ "123.456"
+ ];
+ [
+ "123"
+ ];
+ [
+ "0"
+ ];
+ [
+ "true"
+ ];
+ [
+ "123"
+ ];
+ [
+ "123"
+ ];
+ [
+ "-123"
+ ];
+ [
+ "123.456"
+ ];
+ [
+ "123"
+ ];
+ [
+ [
+ #;
+ "0";
+ []
+ ]
+ ];
+ [
+ [
+ #;
+ "0";
+ []
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Contains_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Contains_/results.txt
new file mode 100644
index 00000000000..8d72c6dad2c
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Contains_/results.txt
@@ -0,0 +1,122 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ %true
+ ];
+ [
+ %false
+ ];
+ [
+ %false
+ ];
+ [
+ %true
+ ];
+ [
+ %false
+ ];
+ [
+ %true
+ ];
+ [
+ %false
+ ];
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_ConvertTo_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_ConvertTo_/results.txt
new file mode 100644
index 00000000000..4061e6a4f8c
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_ConvertTo_/results.txt
@@ -0,0 +1,245 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "bool";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "int";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "uint";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "double";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "string";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "incorrect";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "number_list";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "string_list";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "yson_list";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ];
+ [
+ "incorrect_list";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "number_dict";
+ [
+ "DictType";
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "string_dict";
+ [
+ "DictType";
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "yson_dict";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "incorrect_dict";
+ [
+ "DictType";
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ %true
+ ];
+ [
+ "123"
+ ];
+ [
+ "123"
+ ];
+ [
+ "123"
+ ];
+ [
+ "123"
+ ];
+ #;
+ [
+ "1";
+ "2";
+ "3"
+ ];
+ [
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ [
+ "123"
+ ];
+ [
+ "{\"a\":1,\"b\":2,\"c\":3}"
+ ];
+ [
+ "{\"a\":4,\"b\":5,\"c\":6}"
+ ]
+ ];
+ [];
+ [
+ [
+ "a";
+ "1"
+ ];
+ [
+ "b";
+ "2"
+ ];
+ [
+ "c";
+ "3"
+ ]
+ ];
+ [
+ [
+ "a";
+ "aaa"
+ ];
+ [
+ "b";
+ "bbb"
+ ];
+ [
+ "c";
+ "ccc"
+ ]
+ ];
+ [
+ "[\"ccc\",\"ddd\"]"
+ ];
+ []
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Dicts_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Dicts_/results.txt
new file mode 100644
index 00000000000..e4b9cb54357
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Dicts_/results.txt
@@ -0,0 +1,178 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "ListType";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column6";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column8";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column9";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column10";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %false;
+ "2";
+ [
+ "a";
+ "b"
+ ];
+ [
+ [
+ "1"
+ ];
+ #
+ ];
+ [
+ [
+ "a";
+ [
+ "1"
+ ]
+ ];
+ [
+ "b";
+ #
+ ]
+ ];
+ %true;
+ %false;
+ [
+ "1"
+ ];
+ #;
+ [
+ "c";
+ "d"
+ ];
+ [
+ [
+ "3"
+ ];
+ [
+ "4"
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_EmptyDicts_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_EmptyDicts_/results.txt
new file mode 100644
index 00000000000..34e7c5f7833
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_EmptyDicts_/results.txt
@@ -0,0 +1,103 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "ListType";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %false;
+ "0";
+ [];
+ [];
+ [];
+ %false;
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_EmptyLists_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_EmptyLists_/results.txt
new file mode 100644
index 00000000000..7a8b8c82e5c
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_EmptyLists_/results.txt
@@ -0,0 +1,50 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %false;
+ "0";
+ []
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Equals_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Equals_/results.txt
new file mode 100644
index 00000000000..736f668ab43
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Equals_/results.txt
@@ -0,0 +1,124 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "a1";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "a2";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "a3";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "b";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "c";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "d";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "e";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "f";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "g";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "h";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "i";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "attrs1";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "attrs2";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %true;
+ %true;
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %true
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_From_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_From_/results.txt
new file mode 100644
index 00000000000..d1e62592cc2
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_From_/results.txt
@@ -0,0 +1,188 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column3";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column4";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column5";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column6";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column7";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column8";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column9";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column10";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column11";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column12";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column13";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column14";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column15";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %true;
+ %true;
+ %false;
+ {
+ "$type" = "boolean";
+ "$value" = "true"
+ };
+ #;
+ {
+ "$type" = "int64";
+ "$value" = "1"
+ };
+ #;
+ {
+ "$type" = "uint64";
+ "$value" = "2"
+ };
+ #;
+ {
+ "$type" = "double";
+ "$value" = "3"
+ };
+ #;
+ {
+ "$type" = "string";
+ "$value" = "foo"
+ };
+ {
+ "$type" = "string";
+ "$value" = "fooooooooooooooooooooooooooooooooo"
+ };
+ #;
+ [
+ {
+ "$type" = "int64";
+ "$value" = "1"
+ };
+ {
+ "$type" = "int64";
+ "$value" = "2"
+ };
+ {
+ "$type" = "int64";
+ "$value" = "3"
+ }
+ ];
+ {
+ "a" = {
+ "$type" = "string";
+ "$value" = "x"
+ };
+ "b" = {
+ "$type" = "string";
+ "$value" = "y"
+ }
+ }
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToEmptyStruct_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToEmptyStruct_/results.txt
new file mode 100644
index 00000000000..1029a098e13
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToEmptyStruct_/results.txt
@@ -0,0 +1,34 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "list_of_empty_structs";
+ [
+ "ListType";
+ [
+ "StructType";
+ []
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [];
+ []
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToWithAutoConvert_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToWithAutoConvert_/results.txt
new file mode 100644
index 00000000000..875f9a5aae1
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToWithAutoConvert_/results.txt
@@ -0,0 +1,338 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "bool";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "int";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "uint";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ];
+ [
+ "optional_double";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "empty_int";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "string";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "utf8";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "int_list";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "nested_list";
+ [
+ "ListType";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "int_dict";
+ [
+ "DictType";
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "tuple";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Int8"
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "DataType";
+ "Yson"
+ ];
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ];
+ [
+ "struct";
+ [
+ "StructType";
+ [
+ [
+ "a";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "b";
+ [
+ "DataType";
+ "Int8"
+ ]
+ ];
+ [
+ "c";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "d";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "e";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Double"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ];
+ [
+ "x";
+ [
+ "TupleType";
+ [
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "y";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int16"
+ ]
+ ]
+ ];
+ [
+ "z";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int8"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %true;
+ "0";
+ "123";
+ [
+ "1.23"
+ ];
+ [
+ "0"
+ ];
+ "1.23";
+ "0";
+ [
+ "1";
+ "2";
+ "3";
+ "7";
+ "8";
+ "0"
+ ];
+ [
+ [
+ [
+ "1"
+ ];
+ [
+ "2"
+ ]
+ ];
+ [
+ [
+ "3"
+ ];
+ #
+ ];
+ [];
+ []
+ ];
+ [
+ [
+ "bar";
+ "2"
+ ];
+ [
+ "foo";
+ "1"
+ ];
+ [
+ "xxx";
+ "0"
+ ]
+ ];
+ [
+ %false;
+ "1";
+ [
+ "foo"
+ ];
+ [
+ "1";
+ "2"
+ ];
+ #;
+ "null"
+ ];
+ [
+ %false;
+ "1";
+ [
+ "foo"
+ ];
+ [
+ "1";
+ "2"
+ ];
+ [
+ "1";
+ "bar"
+ ];
+ [
+ #;
+ #
+ ];
+ #;
+ []
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToWithNoStrict_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToWithNoStrict_/results.txt
new file mode 100644
index 00000000000..5b6f73d6096
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToWithNoStrict_/results.txt
@@ -0,0 +1,287 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "bool";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "int";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "uint";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "optional_double";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "empty_int";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "string";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "utf8";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ]
+ ];
+ [
+ "int_list";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "nested_list";
+ [
+ "ListType";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "int_dict";
+ [
+ "DictType";
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "tuple";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Int8"
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "struct";
+ [
+ "StructType";
+ [
+ [
+ "a";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "b";
+ [
+ "DataType";
+ "Int8"
+ ]
+ ];
+ [
+ "c";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "d";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "e";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Double"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ];
+ [
+ "y";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int16"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ #;
+ #;
+ #;
+ #;
+ #;
+ #;
+ [
+ "1";
+ "2";
+ "3"
+ ];
+ [
+ [
+ [
+ "1"
+ ];
+ [
+ "2"
+ ]
+ ];
+ [
+ [
+ "3"
+ ];
+ #
+ ]
+ ];
+ [
+ [
+ "foo";
+ "1"
+ ]
+ ];
+ [
+ %false;
+ "1";
+ #;
+ [
+ "1";
+ "2"
+ ]
+ ];
+ [
+ %false;
+ "1";
+ [
+ "foo"
+ ];
+ [
+ "1";
+ "2"
+ ];
+ [
+ "1";
+ "bar"
+ ];
+ #
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertTo_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertTo_/results.txt
new file mode 100644
index 00000000000..c1e579f2581
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertTo_/results.txt
@@ -0,0 +1,379 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "bool";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "int";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "uint";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ];
+ [
+ "optional_double";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "empty_int";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int32"
+ ]
+ ]
+ ];
+ [
+ "string";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "utf8";
+ [
+ "DataType";
+ "Utf8"
+ ]
+ ];
+ [
+ "yson";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ];
+ [
+ "json";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "int_list";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "nested_list";
+ [
+ "ListType";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "int_dict";
+ [
+ "DictType";
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "tuple";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Bool"
+ ];
+ [
+ "DataType";
+ "Int8"
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "struct";
+ [
+ "StructType";
+ [
+ [
+ "a";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "b";
+ [
+ "DataType";
+ "Int8"
+ ]
+ ];
+ [
+ "c";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "d";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "e";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Double"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "resource";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "list_of_floats";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Float"
+ ]
+ ]
+ ];
+ [
+ "bad_member";
+ [
+ "OptionalType";
+ [
+ "StructType";
+ [
+ [
+ "a";
+ [
+ "StructType";
+ [
+ [
+ "b";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ [
+ "bad_element";
+ [
+ "OptionalType";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Int64"
+ ];
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %true;
+ "123";
+ "123";
+ [
+ "1.23"
+ ];
+ #;
+ "123";
+ "\xD0\xBF\xD1\x80\xD0\xB8\xD1\x91\xD0\xBC";
+ [
+ {
+ "a" = {
+ "$type" = "string";
+ "$value" = "b"
+ };
+ "c" = #
+ }
+ ];
+ [
+ "[{\"a\":1},{\"a\":2},{\"a\":3}]"
+ ];
+ [
+ "1";
+ "2";
+ "3"
+ ];
+ [
+ [
+ [
+ "1"
+ ];
+ [
+ "2"
+ ]
+ ];
+ [
+ [
+ "3"
+ ];
+ #
+ ]
+ ];
+ [
+ [
+ "bar";
+ "2"
+ ];
+ [
+ "foo";
+ "1"
+ ]
+ ];
+ [
+ %false;
+ "1";
+ [
+ "foo"
+ ];
+ [
+ "1";
+ "2"
+ ]
+ ];
+ [
+ %false;
+ "1";
+ [
+ "foo"
+ ];
+ [
+ "1";
+ "2"
+ ];
+ [
+ "1";
+ "bar"
+ ]
+ ];
+ {
+ "$type" = "string";
+ "$value" = "foo"
+ };
+ [
+ "-3.1416";
+ "42";
+ "0.003"
+ ];
+ #;
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericFrom_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericFrom_/results.txt
new file mode 100644
index 00000000000..7281b38aef0
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericFrom_/results.txt
@@ -0,0 +1,345 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "null";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "empty_list";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "empty_dict";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "bool";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "int";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "uint";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "optional_double";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "empty_int";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "string";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "int_list";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "nested_list";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "int_dict";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "tuple";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "struct";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "utf8";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "yson";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "json";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "resource_list";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "variants";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "double_optional";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ [];
+ {};
+ {
+ "$type" = "boolean";
+ "$value" = "true"
+ };
+ {
+ "$type" = "int64";
+ "$value" = "123"
+ };
+ {
+ "$type" = "uint64";
+ "$value" = "123"
+ };
+ {
+ "$type" = "double";
+ "$value" = "1.23"
+ };
+ #;
+ {
+ "$type" = "string";
+ "$value" = "123"
+ };
+ [
+ {
+ "$type" = "int64";
+ "$value" = "1"
+ };
+ {
+ "$type" = "int64";
+ "$value" = "2"
+ };
+ {
+ "$type" = "int64";
+ "$value" = "3"
+ }
+ ];
+ [
+ [
+ {
+ "$type" = "int64";
+ "$value" = "1"
+ };
+ {
+ "$type" = "int64";
+ "$value" = "2"
+ }
+ ];
+ [
+ {
+ "$type" = "int64";
+ "$value" = "3"
+ };
+ #
+ ]
+ ];
+ {
+ "bar" = {
+ "$type" = "int64";
+ "$value" = "2"
+ };
+ "foo" = {
+ "$type" = "int64";
+ "$value" = "1"
+ }
+ };
+ [
+ {
+ "$type" = "boolean";
+ "$value" = "false"
+ };
+ {
+ "$type" = "int64";
+ "$value" = "1"
+ };
+ {
+ "$type" = "string";
+ "$value" = "foo"
+ };
+ [
+ {
+ "$type" = "int64";
+ "$value" = "1"
+ };
+ {
+ "$type" = "int64";
+ "$value" = "2"
+ }
+ ]
+ ];
+ {
+ "a" = {
+ "$type" = "boolean";
+ "$value" = "false"
+ };
+ "b" = {
+ "$type" = "int64";
+ "$value" = "1"
+ };
+ "c" = {
+ "$type" = "string";
+ "$value" = "foo"
+ };
+ "d" = [
+ {
+ "$type" = "int64";
+ "$value" = "1"
+ };
+ {
+ "$type" = "int64";
+ "$value" = "2"
+ }
+ ];
+ "e" = [
+ {
+ "$type" = "double";
+ "$value" = "1"
+ };
+ {
+ "$type" = "string";
+ "$value" = "bar"
+ }
+ ]
+ };
+ {
+ "$type" = "string";
+ "$value" = "\xC3\x90\xC2\xBF\xC3\x91\xC2\x80\xC3\x90\xC2\xBE\xC3\x90\xC2\xB2\xC3\x90\xC2\xB5\xC3\x91\xC2\x80\xC3\x90\xC2\xBA\xC3\x90\xC2\xB0 \xC3\x91\xC2\x81\xC3\x90\xC2\xB2\xC3\x91\xC2\x8F\xC3\x90\xC2\xB7\xC3\x90\xC2\xB8"
+ };
+ {
+ "a" = {
+ "$type" = "int64";
+ "$value" = "1"
+ };
+ "b" = #
+ };
+ {
+ "a" = {
+ "$type" = "string";
+ "$value" = "foo"
+ };
+ "b" = [
+ {
+ "$type" = "int64";
+ "$value" = "1"
+ };
+ {
+ "$type" = "int64";
+ "$value" = "2"
+ };
+ {
+ "$type" = "int64";
+ "$value" = "3"
+ };
+ {
+ "$type" = "int64";
+ "$value" = "4"
+ };
+ {
+ "$type" = "int64";
+ "$value" = "5"
+ }
+ ]
+ };
+ [
+ {
+ "$type" = "int64";
+ "$value" = "1"
+ };
+ {
+ "$type" = "string";
+ "$value" = "foo"
+ }
+ ];
+ [
+ {
+ "$type" = "int64";
+ "$value" = "1"
+ };
+ {
+ "$type" = "string";
+ "$value" = "2"
+ }
+ ];
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_GetHash_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GetHash_/results.txt
new file mode 100644
index 00000000000..273c8cc1254
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GetHash_/results.txt
@@ -0,0 +1,116 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "a1";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "a2";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "a3";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "b";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "c";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "d";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "e";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "f";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "g";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "h";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "i";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "j";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "7079824331463246373";
+ "7079824331463246373";
+ "7079824331463246373";
+ "9619972962658888907";
+ "7079824331463246372";
+ "16786623923823870811";
+ "5024551639089484741";
+ "18074785969708127853";
+ "12660212615513087259";
+ "6712964724129011716";
+ "6712964724129011716";
+ "16371845032759913096"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Get_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Get_/results.txt
new file mode 100644
index 00000000000..45886142d03
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Get_/results.txt
@@ -0,0 +1,57 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "list_length";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "dict_length";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "scalar_length";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "3"
+ ];
+ [
+ "1"
+ ];
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_GoodForYsonBadForJson_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GoodForYsonBadForJson_/results.txt
new file mode 100644
index 00000000000..e40c1593de7
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GoodForYsonBadForJson_/results.txt
@@ -0,0 +1,82 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ {
+ "$type" = "double";
+ "$value" = "inf"
+ };
+ {
+ "$type" = "string";
+ "$value" = "\"12345\xC3\2667\""
+ }
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_ImplicitFromRes_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_ImplicitFromRes_/results.txt
new file mode 100644
index 00000000000..e3310fc1181
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_ImplicitFromRes_/results.txt
@@ -0,0 +1,41 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [];
+ [
+ []
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_IsType_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_IsType_/results.txt
new file mode 100644
index 00000000000..33e0a72bb98
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_IsType_/results.txt
@@ -0,0 +1,154 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "is_string";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "is_int64";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "is_uint64";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "is_double";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "is_entity";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "is_bool";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "is_list";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "is_dict";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ [
+ %false;
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ [
+ %false;
+ %false;
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ [
+ %false;
+ %false;
+ %false;
+ %true;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %true;
+ %false;
+ %false;
+ %false
+ ];
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %true;
+ %false;
+ %false
+ ];
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %true;
+ %false
+ ];
+ [
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %true
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonSerializeSkipMapEntity_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonSerializeSkipMapEntity_/results.txt
new file mode 100644
index 00000000000..00bda17c89f
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonSerializeSkipMapEntity_/results.txt
@@ -0,0 +1,124 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "res1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "res2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "res3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "res4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "res5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "res6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "res7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "res8";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "{\"a\":123}"
+ ];
+ [
+ "{}"
+ ];
+ [
+ "{\"a\":123}"
+ ];
+ [
+ "[123,null]"
+ ];
+ [
+ "{\"a\":1,\"c\":1}"
+ ];
+ [
+ "{\"a\":{\"$attributes\":{\"c\":1,\"e\":3},\"$value\":23},\"b\":1}"
+ ];
+ [
+ "{\"b\":1}"
+ ];
+ [
+ "{\"$attributes\":{},\"$value\":23}"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonWithNanAsString_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonWithNanAsString_/results.txt
new file mode 100644
index 00000000000..ad19dad97df
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonWithNanAsString_/results.txt
@@ -0,0 +1,59 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "\"nan\""
+ ];
+ [
+ "\"inf\""
+ ];
+ [
+ "\"-inf\""
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonWithUtf8_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonWithUtf8_/results.txt
new file mode 100644
index 00000000000..6b113045b03
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonWithUtf8_/results.txt
@@ -0,0 +1,67 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "\"\xD0\xA5\xD1\x8D\xD0\xBB\xD0\xBB\xD0\xBE\xD1\x83!\""
+ ];
+ [
+ "\"\xC3\x90\xC2\xA5\xC3\x91\xC2\x8D\xC3\x90\xC2\xBB\xC3\x90\xC2\xBB\xC3\x90\xC2\xBE\xC3\x91\xC2\x83!\""
+ ];
+ %true;
+ [
+ "\"\xC3\x90\xC2\xA5\xC3\x91\xC2\x8D\xC3\x90\xC2\xBB\xC3\x90\xC2\xBB\xC3\x90\xC2\xBE\xC3\x91\xC2\x83!\""
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Lists_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Lists_/results.txt
new file mode 100644
index 00000000000..dde36e18651
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Lists_/results.txt
@@ -0,0 +1,142 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "ListType";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %false;
+ "3";
+ [
+ [
+ "1"
+ ];
+ [
+ "2"
+ ];
+ #
+ ];
+ [
+ [];
+ [];
+ [
+ [
+ "3"
+ ];
+ [
+ "4"
+ ]
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ #;
+ [
+ "456"
+ ]
+ ];
+ [
+ [
+ "123"
+ ];
+ #
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Lookup_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Lookup_/results.txt
new file mode 100644
index 00000000000..2938fff72b3
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Lookup_/results.txt
@@ -0,0 +1,225 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column8";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column9";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column10";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "column11";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column12";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ];
+ [
+ "column13";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column14";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column15";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column16";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ "1"
+ ];
+ [
+ "1"
+ ];
+ [
+ "2"
+ ];
+ [
+ "2"
+ ];
+ [
+ "3"
+ ];
+ [
+ "3"
+ ];
+ [
+ "x"
+ ];
+ [
+ "x"
+ ];
+ "2";
+ [
+ "2"
+ ];
+ "1";
+ [
+ "1"
+ ];
+ #;
+ #;
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_NegativeArrayIndex_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_NegativeArrayIndex_/results.txt
new file mode 100644
index 00000000000..86f5896b243
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_NegativeArrayIndex_/results.txt
@@ -0,0 +1,133 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column8";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "2"
+ ];
+ [
+ "7"
+ ];
+ [
+ "3"
+ ];
+ [
+ "6"
+ ];
+ [
+ "7"
+ ];
+ [
+ "1"
+ ];
+ #;
+ #;
+ [
+ "1"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_ParseString_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_ParseString_/results.txt
new file mode 100644
index 00000000000..f599aeaeaa4
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_ParseString_/results.txt
@@ -0,0 +1,128 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ];
+ [
+ "column7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ {
+ "$type" = "uint64";
+ "$value" = "0"
+ }
+ ];
+ [
+ {
+ "$type" = "uint64";
+ "$value" = "1"
+ }
+ ];
+ [
+ {
+ "$type" = "int64";
+ "$value" = "2"
+ }
+ ];
+ [
+ {
+ "$type" = "int64";
+ "$value" = "3"
+ }
+ ];
+ #;
+ #;
+ #;
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Scalars_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Scalars_/results.txt
new file mode 100644
index 00000000000..32556166cba
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Scalars_/results.txt
@@ -0,0 +1,462 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column4";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column5";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column6";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column7";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column8";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "column9";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column10";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column11";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column12";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column13";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column14";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column15";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "column16";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column17";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column18";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column19";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column20";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column21";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column22";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column23";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ];
+ [
+ "column24";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column25";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column26";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column27";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column28";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column29";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column30";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Double"
+ ]
+ ]
+ ];
+ [
+ "column31";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column32";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column33";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column34";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column35";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column36";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column37";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #;
+ [
+ %true
+ ];
+ [
+ %true
+ ];
+ [
+ %false
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ #;
+ #;
+ [
+ "1"
+ ];
+ [
+ "2"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ #;
+ [
+ "1"
+ ];
+ [
+ "2"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ [
+ "1"
+ ];
+ [
+ "2"
+ ];
+ [
+ "3"
+ ];
+ #;
+ #;
+ #;
+ #;
+ #;
+ #;
+ #;
+ [
+ "foo"
+ ];
+ [
+ "very loooooooooooooooooong string"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_SerializeDouble_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_SerializeDouble_/results.txt
new file mode 100644
index 00000000000..1891e2e7ab4
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_SerializeDouble_/results.txt
@@ -0,0 +1,66 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Json"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ {
+ "double1" = {
+ "$type" = "double";
+ "$value" = "1.0000000001"
+ };
+ "double2" = {
+ "$type" = "double";
+ "$value" = "1.000000001"
+ };
+ "double3" = {
+ "$type" = "double";
+ "$value" = "1000000000.5"
+ };
+ "double4" = {
+ "$type" = "double";
+ "$value" = "10000000005"
+ };
+ "double5" = {
+ "$type" = "double";
+ "$value" = "10000000000.5"
+ };
+ "double6" = {
+ "$type" = "double";
+ "$value" = "100000000005"
+ }
+ };
+ [
+ "{\"double1\":1.0000000001,\"double2\":1.000000001,\"double3\":1000000000.5,\"double4\":10000000005,\"double5\":10000000000.5,\"double6\":100000000005}"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Serialize_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Serialize_/results.txt
new file mode 100644
index 00000000000..a9f067d9a0c
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Serialize_/results.txt
@@ -0,0 +1,70 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ {
+ "$attributes" = {
+ "a" = {
+ "$type" = "int64";
+ "$value" = "1"
+ }
+ };
+ "$value" = [
+ #;
+ {
+ "a" = {
+ "$type" = "int64";
+ "$value" = "1"
+ }
+ };
+ {
+ "b" = {
+ "$type" = "uint64";
+ "$value" = "2"
+ };
+ "c" = []
+ };
+ {
+ "$attributes" = {
+ "q" = {
+ "$type" = "string";
+ "$value" = "foo"
+ }
+ };
+ "$type" = "double";
+ "$value" = "3"
+ };
+ {};
+ {
+ "$type" = "string";
+ "$value" = "foo"
+ };
+ {
+ "$type" = "string";
+ "$value" = "very loooooooooooooooooong string"
+ }
+ ]
+ }
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_WeakYsonRest_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_WeakYsonRest_/results.txt
new file mode 100644
index 00000000000..46991df7fbe
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_WeakYsonRest_/results.txt
@@ -0,0 +1,53 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "animal";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "wombat"
+ ]
+ ];
+ [
+ [
+ "dog"
+ ]
+ ];
+ [
+ [
+ "chipmunk"
+ ]
+ ];
+ [
+ [
+ "hamster"
+ ]
+ ];
+ [
+ [
+ "dingo"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_WithAttrs_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_WithAttrs_/results.txt
new file mode 100644
index 00000000000..1149280a84b
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_WithAttrs_/results.txt
@@ -0,0 +1,91 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ];
+ [
+ "column3";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ {
+ "$attributes" = {
+ "a" = {
+ "$type" = "int64";
+ "$value" = "2"
+ }
+ };
+ "$type" = "int64";
+ "$value" = "1"
+ }
+ ];
+ [
+ {
+ "$type" = "int64";
+ "$value" = "1"
+ }
+ ];
+ #;
+ [
+ {
+ "$attributes" = {
+ "b" = {
+ "$type" = "int64";
+ "$value" = "3"
+ }
+ };
+ "$type" = "int64";
+ "$value" = "1"
+ }
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_YPath_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_YPath_/results.txt
new file mode 100644
index 00000000000..b1f2759eda8
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_YPath_/results.txt
@@ -0,0 +1,112 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "data";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "attrs";
+ [
+ "DictType";
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "miss";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Yson"
+ ]
+ ]
+ ];
+ [
+ "num";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ]
+ ];
+ [
+ "str_attr";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "miss_attr";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ];
+ [
+ "bad_conv";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "123"
+ ];
+ [
+ [
+ "x";
+ "y"
+ ]
+ ];
+ #;
+ [
+ "123"
+ ];
+ [
+ "y"
+ ];
+ #;
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/cases/Access.sql b/yql/essentials/udfs/common/yson2/test/cases/Access.sql
new file mode 100644
index 00000000000..13ae815e27b
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/Access.sql
@@ -0,0 +1,4 @@
+/* syntax version 1 */
+$yson = cast('{"commands"=[{"command"="say";"text"="hello world"}]}' as yson);
+SELECT Yson::ConvertToString($yson["command" || "s"].0["text"]) as text;
+
diff --git a/yql/essentials/udfs/common/yson2/test/cases/AccessJson.sql b/yql/essentials/udfs/common/yson2/test/cases/AccessJson.sql
new file mode 100644
index 00000000000..504da2c7119
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/AccessJson.sql
@@ -0,0 +1,12 @@
+PRAGMA yson.DisableStrict;
+$yson = cast(@@{a="привет"}@@ as yson);
+$yson_node = Yson::Parse($yson);
+
+select Yson::ConvertToString($yson.a);
+select Yson::ConvertToString($yson_node.a);
+
+$json = cast(@@{"a":"привет"}@@ as json);
+$json_node = Yson::ParseJson($json);
+
+select Yson::ConvertToString($json.a);
+select Yson::ConvertToString($json_node.a);
diff --git a/yql/essentials/udfs/common/yson2/test/cases/Attrs.sql b/yql/essentials/udfs/common/yson2/test/cases/Attrs.sql
new file mode 100644
index 00000000000..41709ce806f
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/Attrs.sql
@@ -0,0 +1,56 @@
+/* syntax version 1 */
+
+$no_strict = Yson::Options(false AS Strict);
+
+select
+Yson::ConvertToBool(Yson::Parse(Yson('<a=1>#')), $no_strict),
+Yson::ConvertToBool(Yson::Parse(Yson('<a=1>%true'))),
+Yson::ConvertToBool(Yson::Parse(Yson('<a=1>1')), $no_strict),
+Yson::ConvertToBool(Yson::Parse(Yson('<a=1>2u')), $no_strict),
+Yson::ConvertToBool(Yson::Parse(Yson('<a=1>3.0')), $no_strict),
+Yson::ConvertToBool(Yson::Parse(Yson('<a=1>foo')), $no_strict),
+Yson::ConvertToBool(Yson::Parse(Yson('<a=1>"very loooooooooooooooooong string"')), $no_strict),
+
+Yson::ConvertToInt64(Yson::Parse(Yson('<a=1>#')), $no_strict),
+Yson::ConvertToInt64(Yson::Parse(Yson('<a=1>%true')), $no_strict),
+Yson::ConvertToInt64(Yson::Parse(Yson('<a=1>1'))),
+Yson::ConvertToInt64(Yson::Parse(Yson('<a=1>2u'))),
+Yson::ConvertToInt64(Yson::Parse(Yson('<a=1>3.0')), $no_strict),
+Yson::ConvertToInt64(Yson::Parse(Yson('<a=1>foo')), $no_strict),
+Yson::ConvertToInt64(Yson::Parse(Yson('<a=1>"very loooooooooooooooooong string"')), $no_strict),
+
+Yson::ConvertToUint64(Yson::Parse(Yson('<a=1>#')), $no_strict),
+Yson::ConvertToUint64(Yson::Parse(Yson('<a=1>%true')), $no_strict),
+Yson::ConvertToUint64(Yson::Parse(Yson('<a=1>1'))),
+Yson::ConvertToUint64(Yson::Parse(Yson('<a=1>2u'))),
+Yson::ConvertToUint64(Yson::Parse(Yson('<a=1>3.0')), $no_strict),
+Yson::ConvertToUint64(Yson::Parse(Yson('<a=1>foo')), $no_strict),
+Yson::ConvertToUint64(Yson::Parse(Yson('<a=1>"very loooooooooooooooooong string"')), $no_strict),
+
+Yson::ConvertToDouble(Yson::Parse(Yson('<a=1>#')), $no_strict),
+Yson::ConvertToDouble(Yson::Parse(Yson('<a=1>%true')), $no_strict),
+Yson::ConvertToDouble(Yson::Parse(Yson('<a=1>1'))),
+Yson::ConvertToDouble(Yson::Parse(Yson('<a=1>2u'))),
+Yson::ConvertToDouble(Yson::Parse(Yson('<a=1>3.0'))),
+Yson::ConvertToDouble(Yson::Parse(Yson('<a=1>foo')), $no_strict),
+Yson::ConvertToDouble(Yson::Parse(Yson('<a=1>"very loooooooooooooooooong string"')), $no_strict),
+
+Yson::ConvertToString(Yson::Parse(Yson('<a=1>#')), $no_strict),
+Yson::ConvertToString(Yson::Parse(Yson('<a=1>%true')), $no_strict),
+Yson::ConvertToString(Yson::Parse(Yson('<a=1>1')), $no_strict),
+Yson::ConvertToString(Yson::Parse(Yson('<a=1>2u')), $no_strict),
+Yson::ConvertToString(Yson::Parse(Yson('<a=1>3.0')), $no_strict),
+Yson::ConvertToString(Yson::Parse(Yson('<a=1>foo'))),
+Yson::ConvertToString(Yson::Parse(Yson('<a=1>"very loooooooooooooooooong string"'))),
+
+ListMap(Yson::ConvertToList(Yson::Parse(Yson('<a=1>[1;2;3]'))), Yson::ConvertToInt64),
+DictKeys(Yson::ConvertToDict(Yson::Parse(Yson('<a=1>{b=1;c=2}')))),
+
+DictKeys(Yson::Attributes(Yson::Parse(Yson('<a=1;b=2>#')))),
+ListMap(DictPayloads(Yson::Attributes(Yson::Parse(Yson('<a=1;b=2>#')))), Yson::ConvertToInt64),
+
+DictKeys(Yson::Attributes(Yson::Parse(Yson('<a=1;b=2>[]')))),
+ListMap(DictPayloads(Yson::Attributes(Yson::Parse(Yson('<a=1;b=2>[]')))), Yson::ConvertToInt64),
+
+DictKeys(Yson::Attributes(Yson::Parse(Yson('<a=1;b=2>{}')))),
+ListMap(DictPayloads(Yson::Attributes(Yson::Parse(Yson('<a=1;b=2>{}')))), Yson::ConvertToInt64);
diff --git a/yql/essentials/udfs/common/yson2/test/cases/AutoConvertTo.sql b/yql/essentials/udfs/common/yson2/test/cases/AutoConvertTo.sql
new file mode 100644
index 00000000000..0655fea2eca
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/AutoConvertTo.sql
@@ -0,0 +1,53 @@
+$zero = Yson::Parse("0u");
+$bool = Yson::FromBool(true);
+$int = Yson::Parse("123");
+$uint = Yson::Parse("123u");
+$negative = Yson::Parse("-123");
+$double = Yson::Parse("123.456");
+$string = Yson::Parse("\"123\"");
+
+$options = Yson::Options(true AS AutoConvert);
+
+SELECT
+ Yson::ConvertToBool($zero, $options) AS zero_to_bool,
+ Yson::ConvertToBool($bool, $options) AS bool_to_bool,
+ Yson::ConvertToBool($int, $options) AS int_to_bool,
+ Yson::ConvertToBool($uint, $options) AS uint_to_bool,
+ Yson::ConvertToBool($negative, $options) AS negative_to_bool,
+ Yson::ConvertToBool($double, $options) AS double_to_bool,
+ Yson::ConvertToBool($string, $options) AS string_to_bool,
+
+ Yson::ConvertToInt64($zero, $options) AS zero_to_int,
+ Yson::ConvertToInt64($bool, $options) AS bool_to_int,
+ Yson::ConvertToInt64($int, $options) AS int_to_int,
+ Yson::ConvertToInt64($uint, $options) AS uint_to_int,
+ Yson::ConvertToInt64($negative, $options) AS negative_to_int,
+ Yson::ConvertToInt64($double, $options) AS double_to_int,
+ Yson::ConvertToInt64($string, $options) AS string_to_int,
+
+ Yson::ConvertToUint64($zero, $options) AS zero_to_uint,
+ Yson::ConvertToUint64($bool, $options) AS bool_to_uint,
+ Yson::ConvertToUint64($int, $options) AS int_to_uint,
+ Yson::ConvertToUint64($uint, $options) AS uint_to_uint,
+ Yson::ConvertToUint64($negative, $options) AS negative_to_uint,
+ Yson::ConvertToUint64($double, $options) AS double_to_uint,
+ Yson::ConvertToUint64($string, $options) AS string_to_uint,
+
+ Yson::ConvertToDouble($zero, $options) AS zero_to_double,
+ Yson::ConvertToDouble($bool, $options) AS bool_to_double,
+ Yson::ConvertToDouble($int, $options) AS int_to_double,
+ Yson::ConvertToDouble($uint, $options) AS uint_to_double,
+ Yson::ConvertToDouble($negative, $options) AS negative_to_double,
+ Yson::ConvertToDouble($double, $options) AS double_to_double,
+ Yson::ConvertToDouble($string, $options) AS string_to_double,
+
+ Yson::ConvertToString($zero, $options) AS zero_to_string,
+ Yson::ConvertToString($bool, $options) AS bool_to_string,
+ Yson::ConvertToString($int, $options) AS int_to_string,
+ Yson::ConvertToString($uint, $options) AS uint_to_string,
+ Yson::ConvertToString($negative, $options) AS negative_to_string,
+ Yson::ConvertToString($double, $options) AS double_to_string,
+ Yson::ConvertToString($string, $options) AS string_to_string,
+
+ Yson::ConvertTo($string, Struct<x:Double?, y:Int64, z:List<Bool>>, $options) AS struct_stub,
+ Yson::ConvertTo($double, Tuple<Double?, Int64, List<Bool>>, $options) AS tuple_stub;
diff --git a/yql/essentials/udfs/common/yson2/test/cases/Contains.sql b/yql/essentials/udfs/common/yson2/test/cases/Contains.sql
new file mode 100644
index 00000000000..565a999e5f3
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/Contains.sql
@@ -0,0 +1,11 @@
+/* syntax version 1 */
+
+select
+Yson::Contains(Yson::Parse('{a=1}'),'a'),
+Yson::Contains(Yson::Parse('{a=1}'),'b'),
+Yson::Contains(Yson::Parse('[]'),'0'),
+Yson::Contains(Yson::Parse('[1;2]'),'0'),
+Yson::Contains(Yson::Parse('[1;2]'),'2'),
+Yson::Contains(Yson::Parse('[1;2]'),'-2'),
+Yson::Contains(Yson::Parse('[1;2]'),'-3'),
+Yson::Contains(Yson::Parse('2'),'2', Yson::Options(false AS Strict));
diff --git a/yql/essentials/udfs/common/yson2/test/cases/ConvertTo.sql b/yql/essentials/udfs/common/yson2/test/cases/ConvertTo.sql
new file mode 100644
index 00000000000..b15c5249201
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/ConvertTo.sql
@@ -0,0 +1,34 @@
+/* syntax version 1 */
+$bool = Yson::Parse("true");
+$number = Yson::Parse("123");
+$string = Yson::Parse("\"123\"");
+$number_list = Yson::Parse("[1;2;3]");
+$string_list = Yson::Parse("[\"a\";\"b\";\"c\"]");
+$yson_list = Yson::Parse("[123;{a=1;b=2;c=3};{a=4;b=5;c=6}]");
+$number_dict = Yson::Parse("{a=1;b=2;c=3}");
+$string_dict = Yson::Parse("{a=\"aaa\";b=\"bbb\";c=\"ccc\"}");
+$yson_dict = Yson::Parse("{a=123;b=\"bbb\";c=[\"ccc\";\"ddd\"]}");
+$options = Yson::Options(true AS Strict);
+$no_strict = Yson::Options(false AS Strict);
+
+SELECT
+ Yson::ConvertToBool($bool, $options) AS `bool`,
+ Yson::ConvertToInt64($number, $options) AS `int`,
+ Yson::ConvertToUint64($number, $options) AS `uint`,
+ Yson::ConvertToDouble($number, $options) AS `double`,
+ Yson::ConvertToString($string, $options) AS `string`,
+ Yson::ConvertToInt64($string, $no_strict) AS incorrect,
+ Yson::ConvertToUint64List($number_list) AS number_list,
+ Yson::ConvertToStringList($string_list) AS string_list,
+ ListMap(
+ Yson::ConvertToList($yson_list),
+ ($item) -> { return Yson::SerializeJson($item); }
+ ) AS yson_list,
+ Yson::ConvertToStringList($number_list, $no_strict) AS incorrect_list,
+ Yson::ConvertToInt64Dict($number_dict) AS number_dict,
+ Yson::ConvertToStringDict($string_dict) AS string_dict,
+ Yson::SerializeJson(
+ Yson::ConvertToDict($yson_dict)["c"]
+ ) AS yson_dict,
+ Yson::ConvertToBoolDict($number_dict, $no_strict) AS incorrect_dict;
+
diff --git a/yql/essentials/udfs/common/yson2/test/cases/Dicts.sql b/yql/essentials/udfs/common/yson2/test/cases/Dicts.sql
new file mode 100644
index 00000000000..a0b9ce59685
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/Dicts.sql
@@ -0,0 +1,13 @@
+$x = Yson::Parse("{a=1;a=2;b={c=3;d=4}}");
+$no_strict = Yson::Options(false AS Strict);
+select Yson::ConvertToDict($x) is null,
+ DictLength(Yson::ConvertToDict($x)),
+ DictKeys(Yson::ConvertToDict($x)),
+ ListMap(DictPayloads(Yson::ConvertToDict($x)), ($i)->(Yson::ConvertToInt64($i, $no_strict))),
+ ListMap(DictItems(Yson::ConvertToDict($x)),($p)->(($p.0,Yson::ConvertToInt64($p.1, $no_strict)))),
+ DictContains(Yson::ConvertToDict($x),"a"),
+ DictContains(Yson::ConvertToDict($x),"c"),
+ Yson::ConvertToInt64(DictLookup(Yson::ConvertToDict($x),"a")),
+ Yson::ConvertToInt64(DictLookup(Yson::ConvertToDict($x),"c")),
+ DictKeys(Yson::ConvertToDict(Yson::ConvertToDict($x)["b"])),
+ ListMap(DictPayloads(Yson::ConvertToDict(Yson::ConvertToDict($x)["b"])),($y)->(Yson::ConvertToInt64($y)))
diff --git a/yql/essentials/udfs/common/yson2/test/cases/EmptyDicts.sql b/yql/essentials/udfs/common/yson2/test/cases/EmptyDicts.sql
new file mode 100644
index 00000000000..779634413ae
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/EmptyDicts.sql
@@ -0,0 +1,9 @@
+/* syntax version 1 */
+$x = Yson::Parse("{}");
+select Yson::ConvertToDict($x) is null,
+ DictLength(Yson::ConvertToDict($x)),
+ DictKeys(Yson::ConvertToDict($x)),
+ ListMap(DictPayloads(Yson::ConvertToDict($x)),($y)->(Yson::ConvertToInt64($y))),
+ ListMap(DictItems(Yson::ConvertToDict($x)),($p)->(($p.0,Yson::ConvertToInt64($p.1)))),
+ DictContains(Yson::ConvertToDict($x),"a"),
+ Yson::ConvertToInt64(DictLookup(Yson::ConvertToDict($x),"a"));
diff --git a/yql/essentials/udfs/common/yson2/test/cases/EmptyLists.sql b/yql/essentials/udfs/common/yson2/test/cases/EmptyLists.sql
new file mode 100644
index 00000000000..60c879a9481
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/EmptyLists.sql
@@ -0,0 +1,5 @@
+/* syntax version 1 */
+$x = Yson::Parse("[]");
+select Yson::ConvertToList($x) is null,
+ ListLength(Yson::ConvertToList($x)),
+ ListMap(Yson::ConvertToList($x), ($y)->(Yson::ConvertToInt64($y)));
diff --git a/yql/essentials/udfs/common/yson2/test/cases/Equals.sql b/yql/essentials/udfs/common/yson2/test/cases/Equals.sql
new file mode 100644
index 00000000000..a64ea70b054
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/Equals.sql
@@ -0,0 +1,26 @@
+$a1 = Yson::Parse(Yson("{a=1;b=2}"));
+$a2 = Yson::Parse(Yson("{a=1;b=2;}"));
+$a3 = Yson::Parse(Yson("{b=2;a=1}"));
+$b = Yson::Parse(Yson("#"));
+$c = Yson::Parse(Yson("{a=1;b=3}"));
+$d = Yson::Parse(Yson("{a=#}"));
+$e = Yson::Parse(Yson("[a;1;b;2]"));
+$f = Yson::Parse(Yson("{a=1u;b=2}"));
+$g = Yson::Parse(Yson("{a=1;b=\"2\"}"));
+$h = Yson::Parse(Yson("<foo=bar>{a=1;b=2}"));
+$i = Yson::Parse(Yson("{a=1;b=<foo=bar>2}"));
+
+SELECT
+ Yson::Equals($a1, $a1) AS a1,
+ Yson::Equals($a1, $a2) AS a2,
+ Yson::Equals($a1, $a3) AS a3,
+ Yson::Equals($a1, $b) AS b,
+ Yson::Equals($a1, $c) AS c,
+ Yson::Equals($a1, $d) AS d,
+ Yson::Equals($a1, $e) AS e,
+ Yson::Equals($a1, $f) AS f,
+ Yson::Equals($a1, $g) AS g,
+ Yson::Equals($a1, $h) AS h,
+ Yson::Equals($a1, $i) AS i,
+ Yson::Equals($h, $i) AS attrs1,
+ Yson::Equals($i, $i) AS attrs2;
diff --git a/yql/essentials/udfs/common/yson2/test/cases/From.sql b/yql/essentials/udfs/common/yson2/test/cases/From.sql
new file mode 100644
index 00000000000..2a1f6ed15a3
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/From.sql
@@ -0,0 +1,21 @@
+/* syntax version 1 */
+select
+Yson::IsEntity(Yson::From(NULL)),
+Yson::IsEntity(Yson::Parse(Yson("#"))),
+Yson::IsEntity(Yson::Parse(Yson("1"))),
+
+Yson::SerializeText(Yson::FromBool(true)),
+Yson::SerializeText(Yson::FromBool(Nothing(Bool?))),
+Yson::SerializeText(Yson::FromInt64(1l)),
+Yson::SerializeText(Yson::FromInt64(Nothing(Int64?))),
+Yson::SerializeText(Yson::FromUint64(2ul)),
+Yson::SerializeText(Yson::FromUint64(Nothing(Uint64?))),
+Yson::SerializeText(Yson::FromDouble(3.)),
+Yson::SerializeText(Yson::FromDouble(Nothing(Double?))),
+Yson::SerializeText(Yson::FromString("foo")),
+Yson::SerializeText(Yson::FromString("fooooooooooooooooooooooooooooooooo")),
+Yson::SerializeText(Yson::FromString(Nothing(String?))),
+
+Yson::SerializeText(Yson::FromList(Yson::ConvertToList(Yson::Parse(Yson("[1;2;3]"))))),
+Yson::SerializeText(Yson::FromDict(Yson::ConvertToDict(Yson::Parse(Yson("{a=x;b=y}")))));
+
diff --git a/yql/essentials/udfs/common/yson2/test/cases/GenericConvertTo.sql b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertTo.sql
new file mode 100644
index 00000000000..b23d524cff0
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertTo.sql
@@ -0,0 +1,19 @@
+SELECT
+ Yson::ConvertTo(Yson::Parse(Yson("%true")), Bool) AS `bool`,
+ Yson::ConvertTo(Yson::Parse(Yson("123")), Int64) AS `int`,
+ Yson::ConvertTo(Yson::Parse(Yson("123u")), Uint8) AS `uint`,
+ Yson::ConvertTo(Yson::Parse(Yson("1.23")), Double?) AS optional_double,
+ Yson::ConvertTo(Yson::Parse(Yson("#")), Int32?) AS empty_int,
+ Yson::ConvertTo(Yson::Parse(Yson("\"123\"")), String) AS `string`,
+ Yson::ConvertTo(Yson::Parse(Yson("\"приём\"")), Utf8) AS `utf8`,
+ Yson::ConvertTo(Yson::Parse(Yson("{a=b;c=#}")), Yson?) AS `yson`,
+ Yson::ConvertTo(Yson::Parse(Yson("[{a=1};{a=2};{a=3}]")), Json?) AS `json`,
+ Yson::ConvertTo(Yson::Parse(Yson("[1;2;3]")), List<Int64>) AS int_list,
+ Yson::ConvertTo(Yson::Parse(Yson("[[1;2];[3;#]]")), List<List<Int64?>>) AS nested_list,
+ Yson::ConvertTo(Yson::Parse(Yson("{foo=1;bar=2}")), Dict<String,Int64>) AS int_dict,
+ Yson::ConvertTo(Yson::Parse(Yson("[%false;1;\"foo\";[1;2]]")), Tuple<Bool,Int8,String?,List<Int64>>) AS `tuple`,
+ Yson::ConvertTo(Yson::Parse(Yson("{a=%false;b=1;c=foo;d=[1;2];e=[1.0;bar]}")), Struct<a:Bool,b:Int8,c:String?,d:List<Int64>,e:Tuple<Double,String>>) AS `struct`,
+ Yson::Serialize(Yson::ConvertTo(Yson::Parse(Yson("foo")), Resource<'Yson2.Node'>)) AS `resource`,
+ Yson::ConvertTo(Yson::Parse(Yson("[-3.1416; 42.0; 0.003]")), List<Float>) AS list_of_floats,
+ Yson::ConvertTo(@@{a=[1]}@@y, Struct<a: Struct<b: Int64>>?, Yson::Options(false AS Strict)) as bad_member,
+ Yson::ConvertTo(@@[1;2]@@y, Tuple<Int64, Tuple<Int64>>?, Yson::Options(false AS Strict)) as bad_element;
diff --git a/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToEmptyStruct.sql b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToEmptyStruct.sql
new file mode 100644
index 00000000000..7210c1d47ae
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToEmptyStruct.sql
@@ -0,0 +1,2 @@
+SELECT
+ Yson::ConvertTo(@@[{"year"="9999"; "a"="three"; "b"=3}; {"year"="9999"; "a"="four"; "b"=4}]@@y, List<Struct<>>) as list_of_empty_structs;
diff --git a/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToWithAutoConvert.sql b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToWithAutoConvert.sql
new file mode 100644
index 00000000000..6f1e0a9ff4e
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToWithAutoConvert.sql
@@ -0,0 +1,15 @@
+$ac = Yson::Options(true AS AutoConvert);
+
+SELECT
+ Yson::ConvertTo(Yson::Parse(Yson("yes")), Bool, $ac) AS `bool`,
+ Yson::ConvertTo(Yson::Parse(Yson("no")), Int64, $ac) AS `int`,
+ Yson::ConvertTo(Yson::Parse(Yson("123.7")), Uint8, $ac) AS `uint`,
+ Yson::ConvertTo(Yson::Parse(Yson(@@"1.23"@@)), Double?, $ac) AS optional_double,
+ Yson::ConvertTo(Yson::Parse(Yson("many")), Int32?, $ac) AS empty_int,
+ Yson::ConvertTo(Yson::Parse(Yson("1.23")), String, $ac) AS `string`,
+ Yson::ConvertTo(Yson::Parse(Yson("0u")), Utf8, $ac) AS `utf8`,
+ Yson::ConvertTo(Yson::Parse(Yson(@@[1;2;3;7.7;"8";"9.0"]@@)), List<Int64>, $ac) AS int_list,
+ Yson::ConvertTo(Yson::Parse(Yson("[[1;2];[3;#];5;#]")), List<List<Int64?>>, $ac) AS nested_list,
+ Yson::ConvertTo(Yson::Parse(Yson("{foo=1;bar=2.0;xxx=#}")), Dict<String,Int64>, $ac) AS int_dict,
+ Yson::ConvertTo(Yson::Parse(Yson("[%false;1;\"foo\";[1;2]]")), Tuple<Bool,Int8,String?,List<Int64>,Yson,Json>, $ac) AS `tuple`,
+ Yson::ConvertTo(Yson::Parse(Yson("{a=%false;b=1;c=foo;d=[1;2];e=[1.0;bar]}")), Struct<a:Bool,b:Int8,c:String?,d:List<Int64>,e:Tuple<Double,String>,x:Tuple<Double?,String?>,y:Int16?,z:List<Int8>>, $ac) AS `struct`;
diff --git a/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToWithNoStrict.sql b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToWithNoStrict.sql
new file mode 100644
index 00000000000..f87b30c9973
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToWithNoStrict.sql
@@ -0,0 +1,15 @@
+$ns = Yson::Options(false AS Strict);
+
+SELECT
+ Yson::ConvertTo(Yson::Parse(Yson("yes")), Bool?, $ns) AS `bool`,
+ Yson::ConvertTo(Yson::Parse(Yson("no")), Int64?, $ns) AS `int`,
+ Yson::ConvertTo(Yson::Parse(Yson("123.7")), Uint8?, $ns) AS `uint`,
+ Yson::ConvertTo(Yson::Parse(Yson(@@"1.23"@@)), Double?, $ns) AS optional_double,
+ Yson::ConvertTo(Yson::Parse(Yson("many")), Int32?, $ns) AS empty_int,
+ Yson::ConvertTo(Yson::Parse(Yson("1.23")), String?, $ns) AS `string`,
+ Yson::ConvertTo(Yson::Parse(Yson("0u")), Utf8?, $ns) AS `utf8`,
+ Yson::ConvertTo(Yson::Parse(Yson(@@[1;2;3;7.7;"8";"9.0"]@@)), List<Int64>, $ns) AS int_list,
+ Yson::ConvertTo(Yson::Parse(Yson("[[1;2];[3;#];5;#]")), List<List<Int64?>>, $ns) AS nested_list,
+ Yson::ConvertTo(Yson::Parse(Yson("{foo=1;bar=2.0;xxx=#}")), Dict<String,Int64>, $ns) AS int_dict,
+ Yson::ConvertTo(Yson::Parse(Yson("[%false;1;42;[1;2;3.3]]")), Tuple<Bool,Int8,String?,List<Int64>>, $ns) AS `tuple`,
+ Yson::ConvertTo(Yson::Parse(Yson("{a=%false;b=1;c=foo;d=[1;2];e=[1.0;bar]}")), Struct<a:Bool,b:Int8,c:String?,d:List<Int64>,e:Tuple<Double,String>,y:Int16?>, $ns) AS `struct`;
diff --git a/yql/essentials/udfs/common/yson2/test/cases/GenericFrom.sql b/yql/essentials/udfs/common/yson2/test/cases/GenericFrom.sql
new file mode 100644
index 00000000000..2f42d8fb95d
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/GenericFrom.sql
@@ -0,0 +1,21 @@
+SELECT
+ Yson::Serialize(Yson::From(null)) AS `null`,
+ Yson::Serialize(Yson::From([])) AS `empty_list`,
+ Yson::Serialize(Yson::From({})) AS `empty_dict`,
+ Yson::Serialize(Yson::From(true)) AS `bool`,
+ Yson::Serialize(Yson::From(123)) AS `int`,
+ Yson::Serialize(Yson::From(123u)) AS `uint`,
+ Yson::Serialize(Yson::From(Just(1.23))) AS optional_double,
+ Yson::Serialize(Yson::From(Nothing(Int8?))) AS empty_int,
+ Yson::Serialize(Yson::From("123")) AS `string`,
+ Yson::Serialize(Yson::From(AsList(1, 2, 3))) AS int_list,
+ Yson::Serialize(Yson::From(AsList(AsList(1, 2), AsList(3, 1/0)))) AS nested_list,
+ Yson::Serialize(Yson::From(AsDict(AsTuple("foo", 1), AsTuple("bar", 2)))) AS int_dict,
+ Yson::Serialize(Yson::From(AsTuple(false, 1, "foo", AsList(1,2)))) AS `tuple`,
+ Yson::Serialize(Yson::From(AsStruct(false AS a, 1 AS b, "foo" AS c, AsList(1,2) AS d, AsTuple(1.0, "bar") AS e))) AS `struct`,
+ Yson::Serialize(Yson::From(Utf8("проверка связи"))) AS `utf8`,
+ Yson::Serialize(Yson::From(Yson("{a=1;b=#}"))) AS `yson`,
+ Yson::Serialize(Yson::From(Json(@@{"a":"foo","b":[1,2,3,4,5]}@@))) AS `json`,
+ Yson::Serialize(Yson::From(AsList(Yson::From(1), Yson::From("foo")))) AS resource_list,
+ Yson::Serialize(Yson::From([AsVariant(1, "one"), AsVariant("2", "two")])) AS `variants`,
+ Yson::Serialize(Yson::From(Just(1u/0u))) AS double_optional;
diff --git a/yql/essentials/udfs/common/yson2/test/cases/Get.sql b/yql/essentials/udfs/common/yson2/test/cases/Get.sql
new file mode 100644
index 00000000000..4449aef41de
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/Get.sql
@@ -0,0 +1,8 @@
+$list = Yson::Parse("[\"abc\"; 123; #;]");
+$dict = Yson::Parse("{\"a\"=1;}");
+$scalar = Yson::Parse("123");
+
+SELECT
+ Yson::GetLength($list) AS list_length,
+ Yson::GetLength($dict) AS dict_length,
+ Yson::GetLength($scalar, Yson::Options(false AS Strict)) AS scalar_length;
diff --git a/yql/essentials/udfs/common/yson2/test/cases/GetHash.sql b/yql/essentials/udfs/common/yson2/test/cases/GetHash.sql
new file mode 100644
index 00000000000..e869c45e9aa
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/GetHash.sql
@@ -0,0 +1,27 @@
+$a1 = Yson::Parse(Yson("{a=1;b=2}"));
+$a2 = Yson::Parse(Yson("{a=1;b=2;}"));
+$a3 = Yson::Parse(Yson("{b=2;a=1}"));
+$b = Yson::Parse(Yson("#"));
+$c = Yson::Parse(Yson("{a=1;b=3}"));
+$d = Yson::Parse(Yson("{a=#}"));
+$e = Yson::Parse(Yson("[a;1;b;2]"));
+$f = Yson::Parse(Yson("{a=1u;b=2}"));
+$g = Yson::Parse(Yson("{a=1;b=\"2\"}"));
+$h = Yson::Parse(Yson("<foo=bar>{a=1;b=2}"));
+$i = Yson::Parse(Yson("{a=1;b=<foo=bar>2}"));
+$j = Yson::Parse(Yson("[1;a;b;2]"));
+
+SELECT
+ Yson::GetHash($a1) AS a1,
+ Yson::GetHash($a2) AS a2,
+ Yson::GetHash($a3) AS a3,
+ Yson::GetHash($b) AS b,
+ Yson::GetHash($c) AS c,
+ Yson::GetHash($d) AS d,
+ Yson::GetHash($e) AS e,
+ Yson::GetHash($f) AS f,
+ Yson::GetHash($g) AS g,
+ Yson::GetHash($h) AS h,
+ Yson::GetHash($i) AS i,
+ Yson::GetHash($j) AS j,
+
diff --git a/yql/essentials/udfs/common/yson2/test/cases/GoodForYsonBadForJson.sql b/yql/essentials/udfs/common/yson2/test/cases/GoodForYsonBadForJson.sql
new file mode 100644
index 00000000000..034b19e8905
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/GoodForYsonBadForJson.sql
@@ -0,0 +1,7 @@
+$inf = Yson::From(1./0.);
+$binary = Yson::From("\"12345\xf67\"");
+
+SELECT Yson::Serialize($inf), Yson::Serialize($binary);
+
+PRAGMA yson.DisableStrict;
+SELECT Yson::SerializeJson($inf), Yson::SerializeJson($binary);
diff --git a/yql/essentials/udfs/common/yson2/test/cases/ImplicitFromRes.sql b/yql/essentials/udfs/common/yson2/test/cases/ImplicitFromRes.sql
new file mode 100644
index 00000000000..a42e370c598
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/ImplicitFromRes.sql
@@ -0,0 +1,4 @@
+/* syntax version 1 */
+select
+ Yson::Parse(Yson::Parse("[]"y)),
+ Yson::ParseJson(Yson::Parse("[]"y));
diff --git a/yql/essentials/udfs/common/yson2/test/cases/IsType.sql b/yql/essentials/udfs/common/yson2/test/cases/IsType.sql
new file mode 100644
index 00000000000..1407ad43661
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/IsType.sql
@@ -0,0 +1,12 @@
+$all = [@@"str"@@y, "-13"y, "42u"y, "3.14"y, "#"y, "%false"y, "[1;2;3;]"y, "{}"y];
+
+select
+ Yson::IsString(y) as is_string,
+ Yson::IsInt64(y) as is_int64,
+ Yson::IsUint64(y) as is_uint64,
+ Yson::IsDouble(y) as is_double,
+ Yson::IsEntity(y) as is_entity,
+ Yson::IsBool(y) as is_bool,
+ Yson::IsList(y) as is_list,
+ Yson::IsDict(y) as is_dict
+FROM AS_TABLE(ListMap($all, ($y)->(<|'y':$y|>)));
diff --git a/yql/essentials/udfs/common/yson2/test/cases/JsonSerializeSkipMapEntity.sql b/yql/essentials/udfs/common/yson2/test/cases/JsonSerializeSkipMapEntity.sql
new file mode 100644
index 00000000000..e2b292f495b
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/JsonSerializeSkipMapEntity.sql
@@ -0,0 +1,18 @@
+$node1 = Yson::Parse(Yson(@@{a=123}@@));
+$node2 = Yson::Parse(Yson(@@{a=#}@@));
+$node3 = Yson::Parse(Yson(@@{a=123;b=#}@@));
+$node4 = Yson::Parse(Yson(@@[123;#]@@));
+$node5 = Yson::Parse(Yson(@@{a=1;b=#;c=1;d=#;e=#}@@));
+$node6 = Yson::Parse(Yson(@@{b=1;a=<c=1;d=#;e=3>23}@@));
+$node7 = Yson::Parse(Yson(@@{b=1;a=<c=1;d=#;e=3>#}@@));
+$node8 = Yson::Parse(Yson(@@<d=#>23@@));
+
+SELECT
+ Yson::SerializeJson($node1, true as SkipMapEntity) AS res1,
+ Yson::SerializeJson($node2, true as SkipMapEntity) AS res2,
+ Yson::SerializeJson($node3, true as SkipMapEntity) AS res3,
+ Yson::SerializeJson($node4, true as SkipMapEntity) AS res4,
+ Yson::SerializeJson($node5, true as SkipMapEntity) AS res5,
+ Yson::SerializeJson($node6, true as SkipMapEntity) AS res6,
+ Yson::SerializeJson($node7, true as SkipMapEntity) AS res7,
+ Yson::SerializeJson($node8, true as SkipMapEntity) AS res8;
diff --git a/yql/essentials/udfs/common/yson2/test/cases/JsonWithNanAsString.sql b/yql/essentials/udfs/common/yson2/test/cases/JsonWithNanAsString.sql
new file mode 100644
index 00000000000..33002ffc034
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/JsonWithNanAsString.sql
@@ -0,0 +1,8 @@
+$src = Yson::From(0./0.); -- nan
+$src1 = Yson::From(1./0.); -- inf
+$src2 = Yson::From(-1./0.); -- -inf
+
+SELECT
+ Yson::SerializeJson($src, true AS WriteNanAsString),
+ Yson::SerializeJson($src1, true AS WriteNanAsString),
+ Yson::SerializeJson($src2, true AS WriteNanAsString) \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/cases/JsonWithUtf8.sql b/yql/essentials/udfs/common/yson2/test/cases/JsonWithUtf8.sql
new file mode 100644
index 00000000000..00f4c98cd98
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/JsonWithUtf8.sql
@@ -0,0 +1,5 @@
+$src = Yson::From("Хэллоу!");
+SELECT Yson::SerializeJson($src, false AS EncodeUtf8), Yson::SerializeJson($src, true AS EncodeUtf8),
+ Yson::Equals(Yson::ParseJson(Json("\"\xD0\xA5\xD1\x8D\xD0\xBB\xD0\xBB\xD0\xBE\xD1\x83!\"")), Yson::ParseJsonDecodeUtf8(Json("\"\xC3\x90\xC2\xA5\xC3\x91\xC2\x8D\xC3\x90\xC2\xBB\xC3\x90\xC2\xBB\xC3\x90\xC2\xBE\xC3\x91\xC2\x83!\""))),
+ Yson::SerializeJsonEncodeUtf8($src);
+
diff --git a/yql/essentials/udfs/common/yson2/test/cases/Lists.sql b/yql/essentials/udfs/common/yson2/test/cases/Lists.sql
new file mode 100644
index 00000000000..4d1c74e6cbc
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/Lists.sql
@@ -0,0 +1,10 @@
+/* syntax version 1 */
+$x = Yson::Parse("[1;2;[3;4]]");
+$no_strict = Yson::Options(false AS Strict);
+select Yson::ConvertToList($x) is null,
+ ListLength(Yson::ConvertToList($x)),
+ ListMap(Yson::ConvertToList($x), ($i)->(Yson::ConvertToInt64($i,$no_strict))),
+ ListMap(Yson::ConvertToList($x), ($x)->(ListMap(Yson::ConvertToList($x, Yson::Options(false AS Strict)), Yson::ConvertToInt64)));
+
+$int_and_str = Yson(@@[123;"456"]@@);
+SELECT ListMap(Yson::ConvertToList($int_and_str), Yson::ConvertToString), ListMap(Yson::ConvertToList($int_and_str), Yson::ConvertToInt64);
diff --git a/yql/essentials/udfs/common/yson2/test/cases/Lookup.sql b/yql/essentials/udfs/common/yson2/test/cases/Lookup.sql
new file mode 100644
index 00000000000..e24b1564177
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/Lookup.sql
@@ -0,0 +1,29 @@
+/* syntax version 1 */
+$no_strict = Yson::Options(false AS Strict);
+
+select
+Yson::ConvertToBool(Yson::Lookup(Yson::Parse('{a=%true}'), 'a')),
+Yson::LookupBool(Yson::Parse('{a=%true}'), 'a'),
+
+Yson::ConvertToInt64(Yson::Lookup(Yson::Parse('{a=1}'), 'a')),
+Yson::LookupInt64(Yson::Parse('{a=1}'), 'a'),
+
+Yson::ConvertToUint64(Yson::Lookup(Yson::Parse('{a=2u}'), 'a')),
+Yson::LookupUint64(Yson::Parse('{a=2u}'), 'a'),
+
+Yson::ConvertToDouble(Yson::Lookup(Yson::Parse('{a=3.0}'), 'a')),
+Yson::LookupDouble(Yson::Parse('{a=3.0}'), 'a'),
+
+Yson::ConvertToString(Yson::Lookup(Yson::Parse('{a=x}'), 'a')),
+Yson::LookupString(Yson::Parse('{a=x}'), 'a'),
+
+ListLength(Yson::ConvertToList(Yson::Lookup(Yson::Parse('{a=[1;2]}'), 'a'))),
+ListLength(Yson::LookupList(Yson::Parse('{a=[1;2]}'), 'a')),
+
+DictLength(Yson::ConvertToDict(Yson::Lookup(Yson::Parse('{a={b=c}}'), 'a'))),
+DictLength(Yson::LookupDict(Yson::Parse('{a={b=c}}'), 'a')),
+
+Yson::LookupString(Yson::Parse('[]'), '0'),
+
+Yson::LookupString(Yson::Parse('{a=12345}'), 'a', $no_strict),
+Yson::LookupDouble(Yson::Parse(@@{a="12345"}@@), 'a', $no_strict);
diff --git a/yql/essentials/udfs/common/yson2/test/cases/NegativeArrayIndex.sql b/yql/essentials/udfs/common/yson2/test/cases/NegativeArrayIndex.sql
new file mode 100644
index 00000000000..bc1f0bc6b5a
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/NegativeArrayIndex.sql
@@ -0,0 +1,13 @@
+$node = Yson::Parse(@@[1;2;3;4;5;6;7]@@);
+
+SELECT
+ Yson::YPathInt64($node, "/+1"),
+ Yson::YPathInt64($node, "/-1"),
+ Yson::YPathInt64($node, "/+2"),
+ Yson::YPathInt64($node, "/-2"),
+ Yson::YPathInt64($node, "/+6"),
+ Yson::YPathInt64($node, "/-7"),
+ Yson::YPathInt64($node, "/+7"),
+ Yson::YPathInt64($node, "/-8"),
+ Yson::YPathInt64($node, "/0");
+
diff --git a/yql/essentials/udfs/common/yson2/test/cases/ParseString.sql b/yql/essentials/udfs/common/yson2/test/cases/ParseString.sql
new file mode 100644
index 00000000000..6968ec07c6a
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/ParseString.sql
@@ -0,0 +1,11 @@
+$options = Yson::Options(false AS Strict);
+
+SELECT
+ Yson::Parse("0u"),
+ Yson::Parse(Just("1u")),
+ Yson::ParseJson("2"),
+ Yson::ParseJson(Just("3")),
+ Yson::Parse("", $options),
+ Yson::Parse(Just(""), $options),
+ Yson::ParseJson("", $options),
+ Yson::ParseJson(Just(""), $options);
diff --git a/yql/essentials/udfs/common/yson2/test/cases/Scalars.sql b/yql/essentials/udfs/common/yson2/test/cases/Scalars.sql
new file mode 100644
index 00000000000..2b8d751957b
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/Scalars.sql
@@ -0,0 +1,46 @@
+/* syntax version 1 */
+$no_strict = Yson::Options(false AS Strict);
+
+select
+Yson::ConvertToBool(Yson::Parse('#'), $no_strict),
+Yson::ConvertToBool(Yson::Parse('%true')),
+Yson::ConvertToBool(Yson::Parse('true')),
+Yson::ConvertToBool(Yson::Parse('false')),
+Yson::ConvertToBool(Yson::Parse('1'), $no_strict),
+Yson::ConvertToBool(Yson::Parse('2u'), $no_strict),
+Yson::ConvertToBool(Yson::Parse('3.0'), $no_strict),
+Yson::ConvertToBool(Yson::Parse('foo'), $no_strict),
+Yson::ConvertToBool(Yson::Parse('"very loooooooooooooooooong string"'), $no_strict),
+
+Yson::ConvertToInt64(Yson::Parse('#'), $no_strict),
+Yson::ConvertToInt64(Yson::Parse('%true'), $no_strict),
+Yson::ConvertToInt64(Yson::Parse('1')),
+Yson::ConvertToInt64(Yson::Parse('2u')),
+Yson::ConvertToInt64(Yson::Parse('3.0'), $no_strict),
+Yson::ConvertToInt64(Yson::Parse('foo'), $no_strict),
+Yson::ConvertToInt64(Yson::Parse('"very loooooooooooooooooong string"'), $no_strict),
+
+Yson::ConvertToUint64(Yson::Parse('#'), $no_strict),
+Yson::ConvertToUint64(Yson::Parse('%true'), $no_strict),
+Yson::ConvertToUint64(Yson::Parse('-1'), $no_strict),
+Yson::ConvertToUint64(Yson::Parse('1')),
+Yson::ConvertToUint64(Yson::Parse('2u')),
+Yson::ConvertToUint64(Yson::Parse('3.0'), $no_strict),
+Yson::ConvertToUint64(Yson::Parse('foo'), $no_strict),
+Yson::ConvertToUint64(Yson::Parse('"very loooooooooooooooooong string"'), $no_strict),
+
+Yson::ConvertToDouble(Yson::Parse('#'), $no_strict),
+Yson::ConvertToDouble(Yson::Parse('%true'), $no_strict),
+Yson::ConvertToDouble(Yson::Parse('1')),
+Yson::ConvertToDouble(Yson::Parse('2u')),
+Yson::ConvertToDouble(Yson::Parse('3.0')),
+Yson::ConvertToDouble(Yson::Parse('foo'), $no_strict),
+Yson::ConvertToDouble(Yson::Parse('"very loooooooooooooooooong string"'), $no_strict),
+
+Yson::ConvertToString(Yson::Parse('#'), $no_strict),
+Yson::ConvertToString(Yson::Parse('%true'), $no_strict),
+Yson::ConvertToString(Yson::Parse('1'), $no_strict),
+Yson::ConvertToString(Yson::Parse('2u'), $no_strict),
+Yson::ConvertToString(Yson::Parse('3.0'), $no_strict),
+Yson::ConvertToString(Yson::Parse('foo')),
+Yson::ConvertToString(Yson::Parse('"very loooooooooooooooooong string"')),
diff --git a/yql/essentials/udfs/common/yson2/test/cases/Serialize.sql b/yql/essentials/udfs/common/yson2/test/cases/Serialize.sql
new file mode 100644
index 00000000000..88eb075d878
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/Serialize.sql
@@ -0,0 +1,3 @@
+/* syntax version 1 */
+select
+Yson::SerializeText(Yson::Parse(Yson('<a=1>[#;{a=1};{b=2u;c=[]};<q=foo>3.0;{};foo;"very loooooooooooooooooong string"]')));
diff --git a/yql/essentials/udfs/common/yson2/test/cases/SerializeDouble.sql b/yql/essentials/udfs/common/yson2/test/cases/SerializeDouble.sql
new file mode 100644
index 00000000000..5b3b1c4402f
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/SerializeDouble.sql
@@ -0,0 +1,13 @@
+$s = <|
+ double1: 1.0000000001,
+ double2: 1.000000001,
+ double3: 1000000000.5,
+ double4: 10000000005.0,
+ double5: 10000000000.5,
+ double6: 100000000005.0,
+|>;
+
+SELECT
+ Yson::Serialize(Yson::From($s)),
+ Yson::SerializeJson(Yson::From($s))
+; \ No newline at end of file
diff --git a/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.in b/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.in
new file mode 100644
index 00000000000..18703eb2520
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.in
@@ -0,0 +1,5 @@
+{"key"="020";"subkey"="1";"_rest"={"animal"="wombat";"size"="small";"weightMin"=20.;"weightMax"=35.;"wild"=%true};"binZ"="\x04";"strY"="\1\x08test";"realZ"="\x03\x18-DT\xfb!\t@" ;"uiData"=1};
+{"key"="075";"subkey"="5";"_rest"={"animal"="dog";"size"="huge";"weightMin"=5.;"weightMax"=75.;"pet"=%true} ;"binZ"=%true ;"strY"="\1\nfunny" ;"realZ"="\x03iW\x14\x8b\n\xbf\x05@" ;"uiData"=1u};
+{"key"="150";"subkey"="4";"_rest"={"animal"="chipmunk";"size"="small";"weightMin"=0.05;"weightMax"=0.15;"wild"=%true} ;"binZ"="\x05";"strY"="\1\nbunny" ;"realZ"="\x03\xcd;\x7ff\x9e\xa0\xf6?" ;"uiData"=100500u};
+{"key"="500";"subkey"="2";"_rest"={"animal"="hamster";"size"="verysmall";"weightMin"=0.015;"weightMax"=0.045;"pet"=%true} ;"binZ"=%false;"strY"="33.33" ;"realZ"="\x03\x00\x00\x00\x00\x00\x00\xf0?" ;"uiData"=10010005001000000u};
+{"key"="800";"subkey"="3";"_rest"={"animal"="dingo";"size"="huge";"weightMin"=10.;"weightMax"=20.;"wild"=%true} ;"binZ"=%false;"strY"="\1\x06zzz" ;"realZ"="\x03\x00\x00\x00\x00\x00\x00\xf0\xbf";"uiData"=33};
diff --git a/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.in.attr b/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.in.attr
new file mode 100644
index 00000000000..6ce47a00153
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.in.attr
@@ -0,0 +1,12 @@
+{"_yql_row_spec"={
+ "Type"=["StructType";[
+ ["key";["DataType";"String"]];
+ ["subkey";["DataType";"String"]];
+ ["_rest";["OptionalType";["DataType";"Yson"]]]
+ ]];
+ "SortDirections"=[1;1;];
+ "SortedBy"=["key";"subkey";];
+ "SortedByTypes"=[["DataType";"String";];["DataType";"String";];];
+ "SortMembers"=["key";"subkey";];
+ "DefaultValues"={"key"="\"\"";"subkey"="\"\"";}
+}}
diff --git a/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.sql b/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.sql
new file mode 100644
index 00000000000..2c84ad498bb
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.sql
@@ -0,0 +1,7 @@
+/* postgres can not */
+USE plato;
+
+--INSERT INTO Output
+SELECT
+ WeakField(animal, "String")
+FROM Input
diff --git a/yql/essentials/udfs/common/yson2/test/cases/WithAttrs.sql b/yql/essentials/udfs/common/yson2/test/cases/WithAttrs.sql
new file mode 100644
index 00000000000..a4a21610ad6
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/WithAttrs.sql
@@ -0,0 +1,7 @@
+/* syntax version 1 */
+
+select
+Yson::SerializeText(Yson::WithAttributes(Yson::Parse('1'), Yson::Parse('{a=2}'))),
+Yson::SerializeText(Yson::WithAttributes(Yson::Parse('1'), Yson::Parse('{}'))),
+Yson::SerializeText(Yson::WithAttributes(Yson::Parse('1'), Yson::Parse('#'))),
+Yson::SerializeText(Yson::WithAttributes(Yson::Parse('<c=2>1'), Yson::Parse('{b=3}')));
diff --git a/yql/essentials/udfs/common/yson2/test/cases/YPath.sql b/yql/essentials/udfs/common/yson2/test/cases/YPath.sql
new file mode 100644
index 00000000000..30b3bc372c9
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/cases/YPath.sql
@@ -0,0 +1,13 @@
+$node = Yson::Parse(@@<x="y">{abc=123;}@@);
+$data = Yson::YPath($node, "/abc");
+$attrs = Yson::YPath($node, "/@");
+$miss = Yson::YPath($node, "/def");
+
+SELECT
+ Yson::ConvertToInt64($data) AS data,
+ Yson::ConvertToStringDict($attrs) AS attrs,
+ Yson::SerializePretty($miss) AS miss,
+ Yson::YPathInt64($node, "/abc") AS num,
+ Yson::YPathString($node, "/@/x") AS str_attr,
+ Yson::YPathBool($node, "/@/mis") AS miss_attr,
+ Yson::YPathString($node, "/abc", Yson::Options(false as Strict)) AS bad_conv;
diff --git a/yql/essentials/udfs/common/yson2/test/ya.make b/yql/essentials/udfs/common/yson2/test/ya.make
new file mode 100644
index 00000000000..a13d3b13b9c
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/test/ya.make
@@ -0,0 +1,12 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/yson2)
+
+TIMEOUT(300)
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/yson2/ya.make b/yql/essentials/udfs/common/yson2/ya.make
new file mode 100644
index 00000000000..64bb6b56ecc
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/ya.make
@@ -0,0 +1,32 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+ FROM_SANDBOX(FILE 7319908881 OUT_NOAUTO libyson2_udf.so
+ )
+ END()
+ELSE ()
+YQL_UDF_CONTRIB(yson2_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 28
+ 0
+ )
+
+ SRCS(
+ yson2_udf.cpp
+ )
+
+ PEERDIR(
+ library/cpp/containers/stack_vector
+ library/cpp/yson_pull
+ yql/essentials/minikql/dom
+ )
+
+ END()
+ENDIF ()
+
+
+RECURSE_FOR_TESTS(
+ test
+)
+
diff --git a/yql/essentials/udfs/common/yson2/yson2_udf.cpp b/yql/essentials/udfs/common/yson2/yson2_udf.cpp
new file mode 100644
index 00000000000..76dbe07c55a
--- /dev/null
+++ b/yql/essentials/udfs/common/yson2/yson2_udf.cpp
@@ -0,0 +1,1203 @@
+#include <yql/essentials/minikql/dom/node.h>
+#include <yql/essentials/minikql/dom/json.h>
+#include <yql/essentials/minikql/dom/yson.h>
+#include <yql/essentials/minikql/dom/make.h>
+#include <yql/essentials/minikql/dom/peel.h>
+#include <yql/essentials/minikql/dom/hash.h>
+#include <yql/essentials/minikql/dom/convert.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/public/udf/udf_type_printer.h>
+
+#include <library/cpp/yson_pull/exceptions.h>
+
+#include <util/string/split.h>
+
+using namespace NYql::NUdf;
+using namespace NYql::NDom;
+using namespace NYsonPull;
+
+namespace {
+
+constexpr char OptionsResourceName[] = "Yson2.Options";
+
+using TOptionsResource = TResource<OptionsResourceName>;
+using TNodeResource = TResource<NodeResourceName>;
+
+using TDictType = TDict<char*, TNodeResource>;
+using TInt64DictType = TDict<char*, i64>;
+using TUint64DictType = TDict<char*, ui64>;
+using TBoolDictType = TDict<char*, bool>;
+using TDoubleDictType = TDict<char*, double>;
+using TStringDictType = TDict<char*, char*>;
+
+enum class EOptions : ui8 {
+ Strict = 1,
+ AutoConvert = 2
+};
+
+union TOpts {
+ ui8 Raw = 0;
+ struct {
+ bool Strict: 1;
+ bool AutoConvert: 1;
+ };
+};
+
+static_assert(sizeof(TOpts) == 1U, "Wrong TOpts size.");
+
+TOpts ParseOptions(TUnboxedValuePod x) {
+ if (x) {
+ return TOpts{x.Get<ui8>()};
+ }
+ return {};
+}
+
+class TOptions : public TBoxedValue {
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ ui8 options = 0;
+
+ if (args[0] && args[0].Get<bool>()) {
+ options |= ui8(EOptions::AutoConvert);
+ }
+
+ if (args[1] && args[1].Get<bool>()) {
+ options |= ui8(EOptions::Strict);
+ }
+
+ return TUnboxedValuePod(options);
+ }
+public:
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("Options");
+ return name;
+ }
+
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ auto argsBuilder = builder.Args(2U);
+ argsBuilder->Add<TOptional<bool>>().Name(TStringRef::Of("AutoConvert"));
+ argsBuilder->Add<TOptional<bool>>().Name(TStringRef::Of("Strict"));
+ builder.Returns(builder.Resource(OptionsResourceName));
+ builder.OptionalArgs(2U);
+ if (!typesOnly) {
+ builder.Implementation(new TOptions);
+ }
+
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
+ }
+};
+
+using TConverterPtr = TUnboxedValuePod (*)(TUnboxedValuePod, const IValueBuilder*, const TSourcePosition& pos);
+
+template <TConverterPtr Converter>
+class TLazyConveterT : public TManagedBoxedValue {
+public:
+ TLazyConveterT(TUnboxedValue&& original, const IValueBuilder* valueBuilder, const TSourcePosition& pos)
+ : Original(std::move(original)), ValueBuilder(valueBuilder), Pos_(pos)
+ {}
+private:
+ template <bool NoSwap>
+ class TIterator: public TManagedBoxedValue {
+ public:
+ TIterator(TUnboxedValue&& original, const IValueBuilder* valueBuilder, const TSourcePosition& pos)
+ : Original(std::move(original)), ValueBuilder(valueBuilder), Pos_(pos)
+ {}
+
+ private:
+ bool Skip() final {
+ return Original.Skip();
+ }
+
+ bool Next(TUnboxedValue& value) final {
+ if (Original.Next(value)) {
+ if constexpr (!NoSwap) {
+ value = Converter(value.Release(), ValueBuilder, Pos_);
+ }
+ return true;
+ }
+ return false;
+ }
+
+ bool NextPair(TUnboxedValue& key, TUnboxedValue& payload) final {
+ if (Original.NextPair(key, payload)) {
+ if constexpr (NoSwap) {
+ payload = Converter(payload.Release(), ValueBuilder, Pos_);
+ } else {
+ key = Converter(key.Release(), ValueBuilder, Pos_);
+ }
+ return true;
+ }
+ return false;
+ }
+
+ const TUnboxedValue Original;
+ const IValueBuilder *const ValueBuilder;
+ const TSourcePosition Pos_;
+ };
+
+ ui64 GetDictLength() const final {
+ return Original.GetDictLength();
+ }
+
+ ui64 GetListLength() const final {
+ return Original.GetListLength();
+ }
+
+ bool HasFastListLength() const final {
+ return Original.HasFastListLength();
+ }
+
+ bool HasDictItems() const final {
+ return Original.HasDictItems();
+ }
+
+ bool HasListItems() const final {
+ return Original.HasListItems();
+ }
+
+ TUnboxedValue GetListIterator() const final {
+ return TUnboxedValuePod(new TIterator<false>(Original.GetListIterator(), ValueBuilder, Pos_));
+ }
+
+ TUnboxedValue GetDictIterator() const final {
+ return TUnboxedValuePod(new TIterator<true>(Original.GetDictIterator(), ValueBuilder, Pos_));
+ }
+
+ TUnboxedValue GetKeysIterator() const final {
+ return TUnboxedValuePod(new TIterator<true>(Original.GetKeysIterator(), ValueBuilder, Pos_));
+ }
+
+ TUnboxedValue GetPayloadsIterator() const override {
+ return TUnboxedValuePod(new TIterator<false>(Original.GetPayloadsIterator(), ValueBuilder, Pos_));
+ }
+
+ bool Contains(const TUnboxedValuePod& key) const final {
+ return Original.Contains(key);
+ }
+
+ TUnboxedValue Lookup(const TUnboxedValuePod& key) const final {
+ if (auto lookup = Original.Lookup(key)) {
+ return Converter(lookup.Release().GetOptionalValue(), ValueBuilder, Pos_).MakeOptional();
+ }
+ return {};
+ }
+
+ bool IsSortedDict() const final {
+ return Original.IsSortedDict();
+ }
+
+ const TUnboxedValue Original;
+ const IValueBuilder *const ValueBuilder;
+ const TSourcePosition Pos_;
+};
+
+template<bool Strict, bool AutoConvert, TConverterPtr Converter = nullptr>
+TUnboxedValuePod ConvertToListImpl(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ if (!x) {
+ return valueBuilder->NewEmptyList().Release();
+ }
+
+ switch (GetNodeType(x)) {
+ case ENodeType::List:
+ if (!x.IsBoxed())
+ break;
+ if constexpr (Converter != nullptr) {
+ if constexpr (Strict || AutoConvert) {
+ return TUnboxedValuePod(new TLazyConveterT<Converter>(x, valueBuilder, pos));
+ } else {
+ TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator> values;
+ if (const auto elements = x.GetElements()) {
+ const auto size = x.GetListLength();
+ values.reserve(size);
+ for (ui32 i = 0U; i < size; ++i) {
+ if (auto converted = Converter(elements[i], valueBuilder, pos)) {
+ values.emplace_back(std::move(converted));
+ }
+ }
+ } else {
+ const auto it = x.GetListIterator();
+ for (TUnboxedValue v; it.Next(v);) {
+ if (auto converted = Converter(v.Release(), valueBuilder, pos)) {
+ values.emplace_back(std::move(converted));
+ }
+ }
+ }
+ if (values.empty()) {
+ break;
+ }
+ return valueBuilder->NewList(values.data(), values.size()).Release();
+ }
+ }
+ return x;
+ case ENodeType::Attr:
+ return ConvertToListImpl<Strict, AutoConvert, Converter>(x.GetVariantItem().Release(), valueBuilder, pos);
+ default:
+ if constexpr (Strict) {
+ if (!IsNodeType<ENodeType::List>(x)) {
+ UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Cannot parse list from " << TDebugPrinter(x)).c_str());
+ }
+ }
+ }
+
+ return valueBuilder->NewEmptyList().Release();
+}
+
+template<bool Strict, bool AutoConvert, TConverterPtr Converter = nullptr>
+TUnboxedValuePod ConvertToDictImpl(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ if (!x) {
+ return valueBuilder->NewEmptyList().Release();
+ }
+
+ switch (GetNodeType(x)) {
+ case ENodeType::Dict:
+ if (!x.IsBoxed())
+ break;
+ if constexpr (Converter != nullptr) {
+ if constexpr (Strict || AutoConvert) {
+ return TUnboxedValuePod(new TLazyConveterT<Converter>(x, valueBuilder, pos));
+ } else if (const auto size = x.GetDictLength()) {
+ TSmallVec<TPair, TStdAllocatorForUdf<TPair>> pairs;
+ pairs.reserve(size);
+ const auto it = x.GetDictIterator();
+ for (TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ if (auto converted = Converter(payload, valueBuilder, pos)) {
+ pairs.emplace_back(std::move(key), std::move(converted));
+ }
+ }
+ if (pairs.empty()) {
+ break;
+ }
+ return TUnboxedValuePod(IBoxedValuePtr(new TMapNode(pairs.data(), pairs.size())));
+ }
+ }
+ return x;
+ case ENodeType::Attr:
+ return ConvertToDictImpl<Strict, AutoConvert, Converter>(x.GetVariantItem().Release(), valueBuilder, pos);
+ default:
+ if constexpr (Strict) {
+ if (!IsNodeType<ENodeType::Dict>(x)) {
+ UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Cannot parse dict from " << TDebugPrinter(x)).c_str());
+ }
+ }
+ }
+
+ return valueBuilder->NewEmptyList().Release();
+}
+
+template <TConverterPtr Converter = nullptr>
+TUnboxedValuePod LookupImpl(TUnboxedValuePod dict, const TUnboxedValuePod key, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ switch (GetNodeType(dict)) {
+ case ENodeType::Dict:
+ if (dict.IsBoxed()) {
+ if (auto payload = dict.Lookup(key)) {
+ if constexpr (Converter != nullptr) {
+ return Converter(payload.Release().GetOptionalValue(), valueBuilder, pos);
+ }
+ return payload.Release();
+ }
+ }
+ return {};
+ case ENodeType::List:
+ if (dict.IsBoxed()) {
+ if (const i32 size = dict.GetListLength()) {
+ if (i32 index; TryFromString(key.AsStringRef(), index) && index < size && index >= -size) {
+ if (index < 0)
+ index += size;
+ if constexpr (Converter != nullptr) {
+ return Converter(dict.Lookup(TUnboxedValuePod(index)).Release(), valueBuilder, pos);
+ }
+ return dict.Lookup(TUnboxedValuePod(index)).Release();
+ }
+ }
+ }
+ return {};
+ case ENodeType::Attr:
+ return LookupImpl<Converter>(dict.GetVariantItem().Release(), key, valueBuilder, pos);
+ default:
+ return {};
+ }
+}
+
+template <TConverterPtr Converter = nullptr>
+TUnboxedValuePod YPathImpl(TUnboxedValuePod dict, const TUnboxedValuePod key, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ const std::string_view path = key.AsStringRef();
+ if (path.size() < 2U || path.front() != '/' || path.back() == '/') {
+ UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Invalid YPath: '" << path << "'.").data());
+ }
+
+ for (const auto s : StringSplitter(path.substr(path[1U] == '/' ? 2U : 1U)).Split('/')) {
+ const bool attr = IsNodeType<ENodeType::Attr>(dict);
+ if (const std::string_view subpath = s.Token(); subpath == "@") {
+ if (attr)
+ dict = SetNodeType<ENodeType::Dict>(dict);
+ else
+ return {};
+ } else {
+ if (attr) {
+ dict = dict.GetVariantItem().Release();
+ }
+
+ const auto subkey = valueBuilder->SubString(key, std::distance(path.begin(), subpath.begin()), subpath.size());
+ dict = LookupImpl<nullptr>(dict, subkey, valueBuilder, pos);
+ }
+
+ if (!dict) {
+ return {};
+ }
+ }
+
+ if constexpr (Converter != nullptr) {
+ return Converter(dict, valueBuilder, pos);
+ }
+
+ return dict;
+}
+
+template<bool Strict, bool AutoConvert>
+TUnboxedValuePod ContainsImpl(TUnboxedValuePod dict, TUnboxedValuePod key, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ switch (GetNodeType(dict)) {
+ case ENodeType::Attr:
+ return ContainsImpl<Strict, AutoConvert>(dict.GetVariantItem().Release(), key, valueBuilder, pos);
+ case ENodeType::Dict:
+ if (dict.IsBoxed())
+ return TUnboxedValuePod(dict.Contains(key));
+ else
+ return TUnboxedValuePod(false);
+ case ENodeType::List:
+ if (dict.IsBoxed()) {
+ if (const i32 size = dict.GetListLength()) {
+ if (i32 index; TryFromString(key.AsStringRef(), index)) {
+ return TUnboxedValuePod(index < size && index >= -size);
+ }
+ }
+ }
+ return TUnboxedValuePod(false);
+ default:
+ if constexpr (Strict && !AutoConvert)
+ UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Can't check contains on scalar " << TDebugPrinter(dict)).c_str());
+ else
+ return {};
+ }
+}
+
+template<bool Strict, bool AutoConvert>
+TUnboxedValuePod GetLengthImpl(TUnboxedValuePod dict, const IValueBuilder* valueBuilder, const TSourcePosition& pos) {
+ switch (GetNodeType(dict)) {
+ case ENodeType::Attr:
+ return GetLengthImpl<Strict, AutoConvert>(dict.GetVariantItem().Release(), valueBuilder, pos);
+ case ENodeType::Dict:
+ return TUnboxedValuePod(dict.IsBoxed() ? dict.GetDictLength() : ui64(0));
+ case ENodeType::List:
+ return TUnboxedValuePod(dict.IsBoxed() ? dict.GetListLength() : ui64(0));
+ default:
+ if constexpr (Strict && !AutoConvert)
+ UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Can't get container length from scalar " << TDebugPrinter(dict)).c_str());
+ else
+ return {};
+ }
+}
+
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBool, TOptional<bool>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToBool<true, true> : &ConvertToBool<true, false>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToBool<false, true> : &ConvertToBool<false, false>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64, TOptional<i64>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToIntegral<true, true, i64> : &ConvertToIntegral<true, false, i64>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToIntegral<false, true, i64> : &ConvertToIntegral<false, false, i64>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64, TOptional<ui64>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToIntegral<true, true, ui64> : &ConvertToIntegral<true, false, ui64>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToIntegral<false, true, ui64> : &ConvertToIntegral<false, false, ui64>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDouble, TOptional<double>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToFloat<true, true, double> : &ConvertToFloat<true, false, double>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToFloat<false, true, double> : &ConvertToFloat<false, false, double>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToString, TOptional<char*>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToString<true, true, false> : &ConvertToString<true, false, false>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToString<false, true, false> : &ConvertToString<false, false, false>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToList, TListType<TNodeResource>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToListImpl<true, true> : &ConvertToListImpl<true, false>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToListImpl<false, true> : &ConvertToListImpl<false, false>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64List, TListType<i64>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToIntegral<true, true, i64>> : &ConvertToListImpl<true, false, &ConvertToIntegral<true, false, i64>>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToIntegral<false, true, i64>> : &ConvertToListImpl<false, false, &ConvertToIntegral<false, false, i64>>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64List, TListType<ui64>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToIntegral<true, true, ui64>> : &ConvertToListImpl<true, false, &ConvertToIntegral<true, false, ui64>>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToIntegral<false, true, ui64>> : &ConvertToListImpl<false, false, &ConvertToIntegral<false, false, ui64>>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBoolList, TListType<bool>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToBool<true, true>> : &ConvertToListImpl<true, false, &ConvertToBool<true, false>>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToBool<false, true>> : &ConvertToListImpl<false, false, &ConvertToBool<false, false>>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDoubleList, TListType<double>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToFloat<true, true, double>> : &ConvertToListImpl<true, false, &ConvertToFloat<true, false, double>>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToFloat<false, true, double>> : &ConvertToListImpl<false, false, &ConvertToFloat<false, false, double>>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToStringList, TListType<char*>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToString<true, true, false>> : &ConvertToListImpl<true, false, &ConvertToString<true, false, false>>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToString<false, true, false>> : &ConvertToListImpl<false, false, &ConvertToString<false, false, false>>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDict, TDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToDictImpl<true, true> : &ConvertToDictImpl<true, false>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToDictImpl<false, true> : &ConvertToDictImpl<false, false>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64Dict, TInt64DictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToIntegral<true, true, i64>> : &ConvertToDictImpl<true, false, &ConvertToIntegral<true, false, i64>>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToIntegral<false, true, i64>> : &ConvertToDictImpl<false, false, &ConvertToIntegral<false, false, i64>>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64Dict, TUint64DictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToIntegral<true, true, ui64>> : &ConvertToDictImpl<true, false, &ConvertToIntegral<true, false, ui64>>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToIntegral<false, true, ui64>> : &ConvertToDictImpl<false, false, &ConvertToIntegral<false, false, ui64>>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBoolDict, TBoolDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToBool<true, true>> : &ConvertToDictImpl<true, false, &ConvertToBool<true, false>>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToBool<false, true>> : &ConvertToDictImpl<false, false, &ConvertToBool<false, false>>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDoubleDict, TDoubleDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToFloat<true, true, double>> : &ConvertToDictImpl<true, false, &ConvertToFloat<true, false, double>>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToFloat<false, true, double>> : &ConvertToDictImpl<false, false, &ConvertToFloat<false, false, double>>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToStringDict, TStringDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToString<true, true, false>> : &ConvertToDictImpl<true, false, &ConvertToString<true, false, false>>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToString<false, true, false>> : &ConvertToDictImpl<false, false, &ConvertToString<false, false, false>>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_STRICT_UDF(TAttributes, TDictType(TAutoMap<TNodeResource>)) {
+ const auto x = args[0];
+ if (IsNodeType<ENodeType::Attr>(x)) {
+ return x;
+ }
+
+ return valueBuilder->NewEmptyList();
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TContains, TOptional<bool>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &ContainsImpl<true, true> : &ContainsImpl<true, false>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &ContainsImpl<false, true> : &ContainsImpl<false, false>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TGetLength, TOptional<ui64>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &GetLengthImpl<true, true> : &GetLengthImpl<true, false>)(args[0], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &GetLengthImpl<false, true> : &GetLengthImpl<false, false>)(args[0], valueBuilder, GetPos());
+}
+
+SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TLookup, TOptional<TNodeResource>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ return LookupImpl(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupBool, TOptional<bool>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &LookupImpl<&ConvertToBool<true, true>> : &LookupImpl<&ConvertToBool<true, false>>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &LookupImpl<&ConvertToBool<false, true>> : &LookupImpl<&ConvertToBool<false, false>>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupInt64, TOptional<i64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<true, true, i64>> : &LookupImpl<&ConvertToIntegral<true, false, i64>>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<false, true, i64>> : &LookupImpl<&ConvertToIntegral<false, false, i64>>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupUint64, TOptional<ui64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<true, true, ui64>> : &LookupImpl<&ConvertToIntegral<true, false, ui64>>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<false, true, ui64>> : &LookupImpl<&ConvertToIntegral<false, false, ui64>>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupDouble, TOptional<double>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &LookupImpl<&ConvertToFloat<true, true, double>> : &LookupImpl<&ConvertToFloat<true, false, double>>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &LookupImpl<&ConvertToFloat<false, true, double>> : &LookupImpl<&ConvertToFloat<false, false, double>>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupString, TOptional<char*>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &LookupImpl<&ConvertToString<true, true, false>> : &LookupImpl<&ConvertToString<true, false, false>>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &LookupImpl<&ConvertToString<false, true, false>> : &LookupImpl<&ConvertToString<false, false, false>>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupList, TOptional<TListType<TNodeResource>>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &LookupImpl<&ConvertToListImpl<true, true>> : &LookupImpl<&ConvertToListImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &LookupImpl<&ConvertToListImpl<false, true>> : &LookupImpl<&ConvertToListImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupDict, TOptional<TDictType>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &LookupImpl<&ConvertToDictImpl<true, true>> : &LookupImpl<&ConvertToDictImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &LookupImpl<&ConvertToDictImpl<false, true>> : &LookupImpl<&ConvertToDictImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPath, TOptional<TNodeResource>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ return YPathImpl(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathBool, TOptional<bool>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &YPathImpl<&ConvertToBool<true, true>> : &YPathImpl<&ConvertToBool<true, false>>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &YPathImpl<&ConvertToBool<false, true>> : &YPathImpl<&ConvertToBool<false, false>>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathInt64, TOptional<i64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<true, true, i64>> : &YPathImpl<&ConvertToIntegral<true, false, i64>>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<false, true, i64>> : &YPathImpl<&ConvertToIntegral<false, false, i64>>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathUint64, TOptional<ui64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<true, true, ui64>> : &YPathImpl<&ConvertToIntegral<true, false, ui64>>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<false, true, ui64>> : &YPathImpl<&ConvertToIntegral<false, false, ui64>>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathDouble, TOptional<double>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &YPathImpl<&ConvertToFloat<true, true, double>> : &YPathImpl<&ConvertToFloat<true, false, double>>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &YPathImpl<&ConvertToFloat<false, true, double>> : &YPathImpl<&ConvertToFloat<false, false, double>>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathString, TOptional<char*>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &YPathImpl<&ConvertToString<true, true, false>> : &YPathImpl<&ConvertToString<true, false, false>>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &YPathImpl<&ConvertToString<false, true, false>> : &YPathImpl<&ConvertToString<false, false, false>>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathList, TOptional<TListType<TNodeResource>>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &YPathImpl<&ConvertToListImpl<true, true>> : &YPathImpl<&ConvertToListImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &YPathImpl<&ConvertToListImpl<false, true>> : &YPathImpl<&ConvertToListImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathDict, TOptional<TDictType>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) {
+ if (const auto options = ParseOptions(args[2]); options.Strict)
+ return (options.AutoConvert ? &YPathImpl<&ConvertToDictImpl<true, true>> : &YPathImpl<&ConvertToDictImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos());
+ else
+ return (options.AutoConvert ? &YPathImpl<&ConvertToDictImpl<false, true>> : &YPathImpl<&ConvertToDictImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos());
+}
+
+SIMPLE_STRICT_UDF(TSerialize, TYson(TAutoMap<TNodeResource>)) {
+ return valueBuilder->NewString(SerializeYsonDomToBinary(args[0]));
+}
+
+SIMPLE_STRICT_UDF(TSerializeText, TYson(TAutoMap<TNodeResource>)) {
+ return valueBuilder->NewString(SerializeYsonDomToText(args[0]));
+}
+
+SIMPLE_STRICT_UDF(TSerializePretty, TYson(TAutoMap<TNodeResource>)) {
+ return valueBuilder->NewString(SerializeYsonDomToPrettyText(args[0]));
+}
+
+constexpr char SkipMapEntity[] = "SkipMapEntity";
+constexpr char EncodeUtf8[] = "EncodeUtf8";
+constexpr char WriteNanAsString[] = "WriteNanAsString";
+
+SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSerializeJson, TOptional<TJson>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>, TNamedArg<bool, SkipMapEntity>, TNamedArg<bool, EncodeUtf8>, TNamedArg<bool, WriteNanAsString>), 4) try {
+ return valueBuilder->NewString(SerializeJsonDom(args[0], args[2].GetOrDefault(false), args[3].GetOrDefault(false), args[4].GetOrDefault(false)));
+} catch (const std::exception& e) {
+ if (ParseOptions(args[1]).Strict) {
+ UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(GetPos()) << " " << e.what()).data());
+ }
+ return {};
+}
+
+SIMPLE_STRICT_UDF(TWithAttributes, TOptional<TNodeResource>(TAutoMap<TNodeResource>, TAutoMap<TNodeResource>)) {
+ Y_UNUSED(valueBuilder);
+ TUnboxedValue x = args[0];
+ auto y = args[1];
+
+ if (!IsNodeType<ENodeType::Dict>(y)) {
+ return {};
+ }
+
+ if (IsNodeType<ENodeType::Attr>(x)) {
+ x = x.GetVariantItem();
+ }
+
+ if (y.IsEmbedded()) {
+ return x;
+ }
+
+ if (!y.IsBoxed()) {
+ return {};
+ }
+
+ // clone dict as attrnode
+ if (const auto resource = y.GetResource()) {
+ return SetNodeType<ENodeType::Attr>(TUnboxedValuePod(new TAttrNode(std::move(x), static_cast<const TPair*>(resource), y.GetDictLength())));
+ } else {
+ TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items;
+ items.reserve(y.GetDictLength());
+ const auto it = y.GetDictIterator();
+ for (TUnboxedValue x, y; it.NextPair(x, y);) {
+ items.emplace_back(std::move(x), std::move(y));
+ }
+
+ if (items.empty()) {
+ return x;
+ }
+
+ return SetNodeType<ENodeType::Attr>(TUnboxedValuePod(new TAttrNode(std::move(x), items.data(), items.size())));
+ }
+}
+
+template<ENodeType Type>
+TUnboxedValuePod IsTypeImpl(TUnboxedValuePod y) {
+ if (IsNodeType<ENodeType::Attr>(y)) {
+ y = y.GetVariantItem().Release();
+ }
+
+ return TUnboxedValuePod(IsNodeType<Type>(y));
+}
+
+SIMPLE_STRICT_UDF(TIsString, bool(TAutoMap<TNodeResource>)) {
+ Y_UNUSED(valueBuilder);
+ return IsTypeImpl<ENodeType::String>(*args);
+}
+
+SIMPLE_STRICT_UDF(TIsInt64, bool(TAutoMap<TNodeResource>)) {
+ Y_UNUSED(valueBuilder);
+ return IsTypeImpl<ENodeType::Int64>(*args);
+}
+
+SIMPLE_STRICT_UDF(TIsUint64, bool(TAutoMap<TNodeResource>)) {
+ Y_UNUSED(valueBuilder);
+ return IsTypeImpl<ENodeType::Uint64>(*args);
+}
+
+SIMPLE_STRICT_UDF(TIsBool, bool(TAutoMap<TNodeResource>)) {
+ Y_UNUSED(valueBuilder);
+ return IsTypeImpl<ENodeType::Bool>(*args);
+}
+
+SIMPLE_STRICT_UDF(TIsDouble, bool(TAutoMap<TNodeResource>)) {
+ Y_UNUSED(valueBuilder);
+ return IsTypeImpl<ENodeType::Double>(*args);
+}
+
+SIMPLE_STRICT_UDF(TIsList, bool(TAutoMap<TNodeResource>)) {
+ Y_UNUSED(valueBuilder);
+ return IsTypeImpl<ENodeType::List>(*args);
+}
+
+SIMPLE_STRICT_UDF(TIsDict, bool(TAutoMap<TNodeResource>)) {
+ Y_UNUSED(valueBuilder);
+ return IsTypeImpl<ENodeType::Dict>(*args);
+}
+
+SIMPLE_STRICT_UDF(TIsEntity, bool(TAutoMap<TNodeResource>)) {
+ Y_UNUSED(valueBuilder);
+ return IsTypeImpl<ENodeType::Entity>(*args);
+}
+
+SIMPLE_STRICT_UDF(TEquals, bool(TAutoMap<TNodeResource>, TAutoMap<TNodeResource>)) {
+ Y_UNUSED(valueBuilder);
+ return TUnboxedValuePod(EquateDoms(args[0], args[1]));
+}
+
+SIMPLE_STRICT_UDF(TGetHash, ui64(TAutoMap<TNodeResource>)) {
+ Y_UNUSED(valueBuilder);
+ return TUnboxedValuePod(HashDom(args[0]));
+}
+
+namespace {
+
+class TBase: public TBoxedValue {
+public:
+ typedef bool TTypeAwareMarker;
+
+ TBase(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape)
+ : Pos_(pos), TypeHelper_(typeHelper), Shape_(shape)
+ {}
+
+protected:
+ template<bool MoreTypesAllowed>
+ static const TType* CheckType(const ITypeInfoHelper::TPtr typeHelper, const TType* shape) {
+ switch (const auto kind = typeHelper->GetTypeKind(shape)) {
+ case ETypeKind::Null:
+ case ETypeKind::EmptyList:
+ case ETypeKind::EmptyDict:
+ return MoreTypesAllowed ? nullptr : shape;
+ case ETypeKind::Data:
+ switch (TDataTypeInspector(*typeHelper, shape).GetTypeId()) {
+ case TDataType<char*>::Id:
+ case TDataType<TUtf8>::Id:
+ case TDataType<bool>::Id:
+ case TDataType<i8>::Id:
+ case TDataType<i16>::Id:
+ case TDataType<i32>::Id:
+ case TDataType<i64>::Id:
+ case TDataType<ui8>::Id:
+ case TDataType<ui16>::Id:
+ case TDataType<ui32>::Id:
+ case TDataType<ui64>::Id:
+ case TDataType<float>::Id:
+ case TDataType<double>::Id:
+ case TDataType<TYson>::Id:
+ case TDataType<TJson>::Id:
+ return nullptr;
+ default:
+ return shape;
+ }
+ case ETypeKind::Optional:
+ return CheckType<MoreTypesAllowed>(typeHelper, TOptionalTypeInspector(*typeHelper, shape).GetItemType());
+ case ETypeKind::List:
+ return CheckType<MoreTypesAllowed>(typeHelper, TListTypeInspector(*typeHelper, shape).GetItemType());
+ case ETypeKind::Dict: {
+ const auto dictTypeInspector = TDictTypeInspector(*typeHelper, shape);
+ if (const auto keyType = dictTypeInspector.GetKeyType(); ETypeKind::Data == typeHelper->GetTypeKind(keyType))
+ if (const auto keyId = TDataTypeInspector(*typeHelper, keyType).GetTypeId(); keyId == TDataType<char*>::Id || keyId == TDataType<TUtf8>::Id)
+ return CheckType<MoreTypesAllowed>(typeHelper, dictTypeInspector.GetValueType());
+ return shape;
+ }
+ case ETypeKind::Tuple:
+ if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, shape); auto count = tupleTypeInspector.GetElementsCount()) do
+ if (const auto bad = CheckType<MoreTypesAllowed>(typeHelper, tupleTypeInspector.GetElementType(--count)))
+ return bad;
+ while (count);
+ return nullptr;
+ case ETypeKind::Struct:
+ if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, shape); auto count = structTypeInspector.GetMembersCount()) do
+ if (const auto bad = CheckType<MoreTypesAllowed>(typeHelper, structTypeInspector.GetMemberType(--count)))
+ return bad;
+ while (count);
+ return nullptr;
+ case ETypeKind::Variant:
+ if constexpr (MoreTypesAllowed)
+ return CheckType<MoreTypesAllowed>(typeHelper, TVariantTypeInspector(*typeHelper, shape).GetUnderlyingType());
+ else
+ return shape;
+ case ETypeKind::Resource:
+ if (const auto inspector = TResourceTypeInspector(*typeHelper, shape); TStringBuf(inspector.GetTag()) == NodeResourceName)
+ return nullptr;
+ [[fallthrough]]; // AUTOGENERATED_FALLTHROUGH_FIXME
+ default:
+ return shape;
+ }
+ }
+
+ const TSourcePosition Pos_;
+ const ITypeInfoHelper::TPtr TypeHelper_;
+ const TType *const Shape_;
+};
+
+class TFrom: public TBase {
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
+ return MakeDom(TypeHelper_.Get(), Shape_, *args, valueBuilder);
+ }
+public:
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("From");
+ return name;
+ }
+
+ TFrom(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape)
+ : TBase(pos, typeHelper, shape)
+ {}
+
+ static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) {
+ if (Name() == name) {
+ if (!userType) {
+ builder.SetError("Missing user type.");
+ return true;
+ }
+
+ builder.UserType(userType);
+ const auto typeHelper = builder.TypeInfoHelper();
+ const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType);
+ if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) {
+ builder.SetError("Invalid user type.");
+ return true;
+ }
+
+ const auto argsTypeTuple = userTypeInspector.GetElementType(0);
+ const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple);
+ if (!argsTypeInspector) {
+ builder.SetError("Invalid user type - expected tuple.");
+ return true;
+ }
+
+ if (argsTypeInspector.GetElementsCount() != 1) {
+ builder.SetError("Expected single argument.");
+ return true;
+ }
+
+ const auto inputType = argsTypeInspector.GetElementType(0);
+ if (const auto badType = CheckType<true>(typeHelper, inputType)) {
+ ::TStringBuilder sb;
+ sb << "Impossible to create DOM from incompatible with Yson type: ";
+ TTypePrinter(*typeHelper, inputType).Out(sb.Out);
+ if (badType != inputType) {
+ sb << " Incompatible type: ";
+ TTypePrinter(*typeHelper, badType).Out(sb.Out);
+ }
+ builder.SetError(sb);
+ return true;
+ }
+
+ builder.Args()->Add(inputType).Done().Returns(builder.Resource(NodeResourceName));
+
+ if (!typesOnly) {
+ builder.Implementation(new TFrom(builder.GetSourcePosition(), typeHelper, inputType));
+ }
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
+ }
+};
+
+class TConvert: public TBase {
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final {
+ if (const auto options = ParseOptions(args[1]); options.Strict)
+ return (options.AutoConvert ? &PeelDom<true, true> : &PeelDom<true, false>)(TypeHelper_.Get(), Shape_, args[0], valueBuilder, Pos_);
+ else
+ return (options.AutoConvert ? &PeelDom<false, true> : &PeelDom<false, false>)(TypeHelper_.Get(), Shape_, args[0], valueBuilder, Pos_);
+ }
+
+public:
+ TConvert(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape)
+ : TBase(pos, typeHelper, shape)
+ {}
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("ConvertTo");
+ return name;
+ }
+
+
+ static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) {
+ if (Name() == name) {
+ const auto optionsType = builder.Optional()->Item(builder.Resource(OptionsResourceName)).Build();
+ builder.OptionalArgs(1);
+
+ if (!userType) {
+ builder.SetError("Missing user type.");
+ return true;
+ }
+
+ builder.UserType(userType);
+ const auto typeHelper = builder.TypeInfoHelper();
+ const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType);
+ if (!userTypeInspector || userTypeInspector.GetElementsCount() < 3) {
+ builder.SetError("Invalid user type.");
+ return true;
+ }
+
+ const auto argsTypeTuple = userTypeInspector.GetElementType(0);
+ const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple);
+ if (!argsTypeInspector) {
+ builder.SetError("Invalid user type - expected tuple.");
+ return true;
+ }
+
+ if (const auto argsCount = argsTypeInspector.GetElementsCount(); argsCount < 1 || argsCount > 2) {
+ ::TStringBuilder sb;
+ sb << "Invalid user type - expected one or two arguments, got: " << argsCount;
+ builder.SetError(sb);
+ return true;
+ }
+
+ const auto resultType = userTypeInspector.GetElementType(2);
+ if (const auto badType = CheckType<false>(typeHelper, resultType)) {
+ ::TStringBuilder sb;
+ sb << "Impossible to convert DOM to incompatible with Yson type: ";
+ TTypePrinter(*typeHelper, resultType).Out(sb.Out);
+ if (badType != resultType) {
+ sb << " Incompatible type: ";
+ TTypePrinter(*typeHelper, badType).Out(sb.Out);
+ }
+ builder.SetError(sb);
+ return true;
+ }
+
+ builder.Args()->Add(builder.Resource(NodeResourceName)).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionsType);
+ builder.Returns(resultType);
+
+ if (!typesOnly) {
+ builder.Implementation(new TConvert(builder.GetSourcePosition(), typeHelper, resultType));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+};
+
+template<typename TYJson, bool DecodeUtf8 = false>
+class TParse: public TBoxedValue {
+public:
+ typedef bool TTypeAwareMarker;
+private:
+ const TSourcePosition Pos_;
+ const bool StrictType_;
+
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final;
+public:
+ TParse(TSourcePosition pos, bool strictType)
+ : Pos_(pos), StrictType_(strictType)
+ {}
+
+ static const TStringRef& Name();
+
+ static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) {
+ if (Name() == name) {
+ auto typeId = TDataType<TYJson>::Id;
+ if (userType) {
+ const auto typeHelper = builder.TypeInfoHelper();
+ const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType);
+ if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) {
+ builder.SetError("Missing or invalid user type.");
+ return true;
+ }
+
+ const auto argsTypeTuple = userTypeInspector.GetElementType(0);
+ const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple);
+ if (!argsTypeInspector) {
+ builder.SetError("Invalid user type - expected tuple.");
+ return true;
+ }
+
+ const auto argsCount = argsTypeInspector.GetElementsCount();
+ if (argsCount < 1 || argsCount > 2) {
+ ::TStringBuilder sb;
+ sb << "Invalid user type - expected one or two arguments, got: " << argsCount;
+ builder.SetError(sb);
+ return true;
+ }
+
+ const auto inputType = argsTypeInspector.GetElementType(0);
+ auto dataType = inputType;
+ if (const auto optInspector = TOptionalTypeInspector(*typeHelper, inputType)) {
+ dataType = optInspector.GetItemType();
+ }
+
+ if (const auto resInspector = TResourceTypeInspector(*typeHelper, dataType)) {
+ typeId = TDataType<TYJson>::Id;
+ } else {
+ const auto dataInspector = TDataTypeInspector(*typeHelper, dataType);
+ typeId = dataInspector.GetTypeId();
+ }
+
+ builder.UserType(userType);
+ }
+
+ const auto optionsType = builder.Optional()->Item(builder.Resource(OptionsResourceName)).Build();
+ builder.OptionalArgs(1);
+
+ switch (typeId) {
+ case TDataType<TYJson>::Id:
+ builder.Args()->Add<TAutoMap<TYJson>>().Add(optionsType).Done().Returns(builder.Resource(NodeResourceName));
+ builder.IsStrict();
+ break;
+ case TDataType<TUtf8>::Id:
+ builder.Args()->Add<TAutoMap<TUtf8>>().Add(optionsType).Done().Returns(builder.Optional()->Item(builder.Resource(NodeResourceName)).Build());
+ break;
+ default:
+ builder.Args()->Add<TAutoMap<char*>>().Add(optionsType).Done().Returns(builder.Optional()->Item(builder.Resource(NodeResourceName)).Build());
+ break;
+ }
+
+ if (!typesOnly) {
+ builder.Implementation(new TParse(builder.GetSourcePosition(), TDataType<TYJson>::Id == typeId));
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+};
+
+template<>
+TUnboxedValue TParse<TYson, false>::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try {
+ return TryParseYsonDom(args[0].AsStringRef(), valueBuilder);
+} catch (const std::exception& e) {
+ if (StrictType_ || ParseOptions(args[1]).Strict) {
+ UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(Pos_) << " " << e.what()).data());
+ }
+ return TUnboxedValuePod();
+}
+
+template<>
+TUnboxedValue TParse<TJson, false>::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try {
+ return TryParseJsonDom(args[0].AsStringRef(), valueBuilder);
+} catch (const std::exception& e) {
+ if (StrictType_ || ParseOptions(args[1]).Strict) {
+ UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(Pos_) << " " << e.what()).data());
+ }
+ return TUnboxedValuePod();
+}
+
+template<>
+TUnboxedValue TParse<TJson, true>::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try {
+ return TryParseJsonDom(args[0].AsStringRef(), valueBuilder, true);
+} catch (const std::exception& e) {
+ if (StrictType_ || ParseOptions(args[1]).Strict) {
+ UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(Pos_) << " " << e.what()).data());
+ }
+ return TUnboxedValuePod();
+}
+
+template<>
+const TStringRef& TParse<TYson, false>::Name() {
+ static auto yson = TStringRef::Of("Parse");
+ return yson;
+}
+
+template<>
+const TStringRef& TParse<TJson, false>::Name() {
+ static auto yson = TStringRef::Of("ParseJson");
+ return yson;
+}
+
+template<>
+const TStringRef& TParse<TJson, true>::Name() {
+ static auto yson = TStringRef::Of("ParseJsonDecodeUtf8");
+ return yson;
+}
+
+}
+
+// TODO: optimizer that marks UDFs as strict if Yson::Options(false as Strict) is given
+SIMPLE_MODULE(TYson2Module,
+ TOptions,
+ TParse<TYson>,
+ TParse<TJson>,
+ TParse<TJson, true>,
+ TConvert,
+ TConvertToBool,
+ TConvertToInt64,
+ TConvertToUint64,
+ TConvertToDouble,
+ TConvertToString,
+ TConvertToList,
+ TConvertToBoolList,
+ TConvertToInt64List,
+ TConvertToUint64List,
+ TConvertToDoubleList,
+ TConvertToStringList,
+ TConvertToDict,
+ TConvertToBoolDict,
+ TConvertToInt64Dict,
+ TConvertToUint64Dict,
+ TConvertToDoubleDict,
+ TConvertToStringDict,
+ TAttributes,
+ TContains,
+ TLookup,
+ TLookupBool,
+ TLookupInt64,
+ TLookupUint64,
+ TLookupDouble,
+ TLookupString,
+ TLookupList,
+ TLookupDict,
+ TYPath,
+ TYPathBool,
+ TYPathInt64,
+ TYPathUint64,
+ TYPathDouble,
+ TYPathString,
+ TYPathList,
+ TYPathDict,
+ TSerialize,
+ TSerializeText,
+ TSerializePretty,
+ TSerializeJson,
+ TWithAttributes,
+ TIsString,
+ TIsInt64,
+ TIsUint64,
+ TIsBool,
+ TIsDouble,
+ TIsList,
+ TIsDict,
+ TIsEntity,
+ TFrom,
+ TGetLength,
+ TEquals,
+ TGetHash
+);
+
+REGISTER_MODULES(TYson2Module);
diff --git a/yql/essentials/udfs/ya.make b/yql/essentials/udfs/ya.make
index b8d9b2d2132..c452d78775f 100644
--- a/yql/essentials/udfs/ya.make
+++ b/yql/essentials/udfs/ya.make
@@ -1,4 +1,5 @@
RECURSE(
+ common
examples
logs
test