diff options
| author | YDBot <[email protected]> | 2025-10-07 00:52:12 +0000 |
|---|---|---|
| committer | YDBot <[email protected]> | 2025-10-07 00:52:12 +0000 |
| commit | 0f7e91aa5ae33d20f2ef0d5cb2c80bb66924417f (patch) | |
| tree | 6307f61a54aea3be0afeeaf766a30597fe047e81 | |
| parent | 86c7a4b4c37e709428ebc61fb8a6e79c7260374c (diff) | |
| parent | 3ab4f52f9654e20e38084313aa6861cb0c17eb1d (diff) | |
Sync branches 251007-0050
450 files changed, 21407 insertions, 20423 deletions
diff --git a/build/conf/compilers/gnu_compiler.conf b/build/conf/compilers/gnu_compiler.conf index c51baf680fd..3e83fd1c160 100644 --- a/build/conf/compilers/gnu_compiler.conf +++ b/build/conf/compilers/gnu_compiler.conf @@ -55,36 +55,40 @@ when ($CLANG == "yes" && $ARCH_AARCH64 == "yes") { CFLAGS+=-mno-outline-atomics } -when ($CLANG == "yes" && $COMPILER_VERSION == "18") { - CFLAGS+=-Wno-array-parameter -Wno-deprecate-lax-vec-conv-all -Wno-unqualified-std-cast-call -Wno-unused-but-set-parameter -Wno-implicit-function-declaration -Wno-int-conversion -Wno-incompatible-function-pointer-types -Wno-address-of-packed-member - CFLAGS+=-Wno-deprecated-this-capture -Wno-missing-designated-field-initializers \ - -Wno-format -Wno-vla-cxx-extension -Wno-invalid-offsetof \ - when ($OS_ANDROID == "yes") { - # https://github.com/llvm/llvm-project/issues/68933 is open in clang18 (fix is backported to our build) - CFLAGS+=-Wno-missing-field-initializers - } - when ($MAPSMOBI_BUILD_TARGET == "yes") { - CFLAGS+=-Wno-deprecated-declarations - } +# NB: Since there is no ">=", exact versions have to be spelled below + +when ($CLANG == "yes" && ($COMPILER_VERSION == "18" || $COMPILER_VERSION == "20" || $COMPILER_VERSION == "21")) { + # new warnings for clang18 + CFLAGS+=-Wno-array-parameter \ + -Wno-deprecate-lax-vec-conv-all \ + -Wno-unqualified-std-cast-call \ + -Wno-unused-but-set-parameter \ + -Wno-implicit-function-declaration \ + -Wno-int-conversion \ + -Wno-incompatible-function-pointer-types \ + -Wno-address-of-packed-member \ + -Wno-deprecated-this-capture \ + -Wno-missing-designated-field-initializers \ + -Wno-format \ + -Wno-vla-cxx-extension \ + -Wno-invalid-offsetof } -when ($CLANG == "yes" && $COMPILER_VERSION == "20") { - CFLAGS+=-Wno-array-parameter -Wno-deprecate-lax-vec-conv-all -Wno-unqualified-std-cast-call -Wno-unused-but-set-parameter -Wno-implicit-function-declaration -Wno-int-conversion -Wno-incompatible-function-pointer-types -Wno-address-of-packed-member - CFLAGS+=-Wno-deprecated-this-capture -Wno-missing-designated-field-initializers \ - -Wno-format -Wno-vla-cxx-extension -Wno-invalid-offsetof \ +when ($CLANG == "yes" && $COMPILER_VERSION == "18" && $OS_ANDROID == "yes") { + # https://github.com/llvm/llvm-project/issues/68933 is open in NDK27/clang18 + # For other platforms, the fix is backported to our build + CFLAGS+=-Wno-missing-field-initializers +} +when ($CLANG == "yes" && ($COMPILER_VERSION == "20" || $COMPILER_VERSION == "21")) { # new warnings for clang20 CFLAGS+=-Wno-alias-template-in-declaration-name \ - -Wno-cast-function-type-mismatch \ - -Wno-explicit-specialization-storage-class \ - -Wno-extraneous-template-head \ - -Wno-missing-template-arg-list-after-template-kw \ - -Wno-nontrivial-memcall \ - -Wno-strict-primary-template-shadow \ - - when ($MAPSMOBI_BUILD_TARGET == "yes") { - CFLAGS+=-Wno-deprecated-declarations - } + -Wno-cast-function-type-mismatch \ + -Wno-explicit-specialization-storage-class \ + -Wno-extraneous-template-head \ + -Wno-missing-template-arg-list-after-template-kw \ + -Wno-nontrivial-memcall \ + -Wno-strict-primary-template-shadow } _CFLAGS_MSAN_TRACK_ORIGIN= diff --git a/build/external_resources/ymake/public.resources.json b/build/external_resources/ymake/public.resources.json index 4f60dcad587..6f013f2d36a 100644 --- a/build/external_resources/ymake/public.resources.json +++ b/build/external_resources/ymake/public.resources.json @@ -1,19 +1,19 @@ { "by_platform": { "darwin": { - "uri": "sbr:9835961820" + "uri": "sbr:9907876625" }, "darwin-arm64": { - "uri": "sbr:9835960630" + "uri": "sbr:9907875651" }, "linux": { - "uri": "sbr:9835963691" + "uri": "sbr:9907878587" }, "linux-aarch64": { - "uri": "sbr:9835959794" + "uri": "sbr:9907874761" }, "win32": { - "uri": "sbr:9835962816" + "uri": "sbr:9907877791" } } } diff --git a/build/external_resources/ymake/resources.json b/build/external_resources/ymake/resources.json index c5fc5ff3dd0..5ad71b16280 100644 --- a/build/external_resources/ymake/resources.json +++ b/build/external_resources/ymake/resources.json @@ -1,19 +1,19 @@ { "by_platform": { "darwin": { - "uri": "sbr:9835934559" + "uri": "sbr:9907877955" }, "darwin-arm64": { - "uri": "sbr:9835933382" + "uri": "sbr:9907876698" }, "linux": { - "uri": "sbr:9835936486" + "uri": "sbr:9907879194" }, "linux-aarch64": { - "uri": "sbr:9835931983" + "uri": "sbr:9907875708" }, "win32": { - "uri": "sbr:9835935543" + "uri": "sbr:9907878726" } } } diff --git a/build/mapping.conf.json b/build/mapping.conf.json index 4fce9eef4cd..03f21ab311c 100644 --- a/build/mapping.conf.json +++ b/build/mapping.conf.json @@ -622,6 +622,7 @@ "9790047688": "{registry_endpoint}/9790047688", "9824364966": "{registry_endpoint}/9824364966", "9854287477": "{registry_endpoint}/9854287477", + "9909336016": "{registry_endpoint}/9909336016", "5486731632": "{registry_endpoint}/5486731632", "5514350352": "{registry_endpoint}/5514350352", "5514360398": "{registry_endpoint}/5514360398", @@ -876,6 +877,7 @@ "9713893885": "{registry_endpoint}/9713893885", "9772289945": "{registry_endpoint}/9772289945", "9835961820": "{registry_endpoint}/9835961820", + "9907876625": "{registry_endpoint}/9907876625", "5766171800": "{registry_endpoint}/5766171800", "5805430761": "{registry_endpoint}/5805430761", "5829025456": "{registry_endpoint}/5829025456", @@ -978,6 +980,7 @@ "9713893529": "{registry_endpoint}/9713893529", "9772289314": "{registry_endpoint}/9772289314", "9835960630": "{registry_endpoint}/9835960630", + "9907875651": "{registry_endpoint}/9907875651", "5766173070": "{registry_endpoint}/5766173070", "5805432830": "{registry_endpoint}/5805432830", "5829031598": "{registry_endpoint}/5829031598", @@ -1080,6 +1083,7 @@ "9713894916": "{registry_endpoint}/9713894916", "9772291630": "{registry_endpoint}/9772291630", "9835963691": "{registry_endpoint}/9835963691", + "9907878587": "{registry_endpoint}/9907878587", "5766171341": "{registry_endpoint}/5766171341", "5805430188": "{registry_endpoint}/5805430188", "5829023352": "{registry_endpoint}/5829023352", @@ -1182,6 +1186,7 @@ "9713893031": "{registry_endpoint}/9713893031", "9772288547": "{registry_endpoint}/9772288547", "9835959794": "{registry_endpoint}/9835959794", + "9907874761": "{registry_endpoint}/9907874761", "8270821739": "{registry_endpoint}/8270821739", "8295446553": "{registry_endpoint}/8295446553", "8326170338": "{registry_endpoint}/8326170338", @@ -1214,6 +1219,7 @@ "9713894338": "{registry_endpoint}/9713894338", "9772290670": "{registry_endpoint}/9772290670", "9835962816": "{registry_endpoint}/9835962816", + "9907877791": "{registry_endpoint}/9907877791", "5766172695": "{registry_endpoint}/5766172695", "5805432230": "{registry_endpoint}/5805432230", "5829029743": "{registry_endpoint}/5829029743", @@ -2312,6 +2318,7 @@ "9790047688": "devtools/ya/test/programs/test_tool/bin/test_tool for linux", "9824364966": "devtools/ya/test/programs/test_tool/bin/test_tool for linux", "9854287477": "devtools/ya/test/programs/test_tool/bin/test_tool for linux", + "9909336016": "devtools/ya/test/programs/test_tool/bin/test_tool for linux", "5486731632": "devtools/ya/test/programs/test_tool/bin3/test_tool3 for linux", "5514350352": "devtools/ya/test/programs/test_tool/bin3/test_tool3 for linux", "5514360398": "devtools/ya/test/programs/test_tool/bin3/test_tool3 for linux", @@ -2566,6 +2573,7 @@ "9713893885": "devtools/ymake/bin/ymake for darwin", "9772289945": "devtools/ymake/bin/ymake for darwin", "9835961820": "devtools/ymake/bin/ymake for darwin", + "9907876625": "devtools/ymake/bin/ymake for darwin", "5766171800": "devtools/ymake/bin/ymake for darwin-arm64", "5805430761": "devtools/ymake/bin/ymake for darwin-arm64", "5829025456": "devtools/ymake/bin/ymake for darwin-arm64", @@ -2668,6 +2676,7 @@ "9713893529": "devtools/ymake/bin/ymake for darwin-arm64", "9772289314": "devtools/ymake/bin/ymake for darwin-arm64", "9835960630": "devtools/ymake/bin/ymake for darwin-arm64", + "9907875651": "devtools/ymake/bin/ymake for darwin-arm64", "5766173070": "devtools/ymake/bin/ymake for linux", "5805432830": "devtools/ymake/bin/ymake for linux", "5829031598": "devtools/ymake/bin/ymake for linux", @@ -2770,6 +2779,7 @@ "9713894916": "devtools/ymake/bin/ymake for linux", "9772291630": "devtools/ymake/bin/ymake for linux", "9835963691": "devtools/ymake/bin/ymake for linux", + "9907878587": "devtools/ymake/bin/ymake for linux", "5766171341": "devtools/ymake/bin/ymake for linux-aarch64", "5805430188": "devtools/ymake/bin/ymake for linux-aarch64", "5829023352": "devtools/ymake/bin/ymake for linux-aarch64", @@ -2872,6 +2882,7 @@ "9713893031": "devtools/ymake/bin/ymake for linux-aarch64", "9772288547": "devtools/ymake/bin/ymake for linux-aarch64", "9835959794": "devtools/ymake/bin/ymake for linux-aarch64", + "9907874761": "devtools/ymake/bin/ymake for linux-aarch64", "8270821739": "devtools/ymake/bin/ymake for win32", "8295446553": "devtools/ymake/bin/ymake for win32", "8326170338": "devtools/ymake/bin/ymake for win32", @@ -2904,6 +2915,7 @@ "9713894338": "devtools/ymake/bin/ymake for win32", "9772290670": "devtools/ymake/bin/ymake for win32", "9835962816": "devtools/ymake/bin/ymake for win32", + "9907877791": "devtools/ymake/bin/ymake for win32", "5766172695": "devtools/ymake/bin/ymake for win32-clang-cl", "5805432230": "devtools/ymake/bin/ymake for win32-clang-cl", "5829029743": "devtools/ymake/bin/ymake for win32-clang-cl", diff --git a/build/platform/linux_sdk/ya.make b/build/platform/linux_sdk/ya.make index cc0eb8d035e..9650a177f6a 100644 --- a/build/platform/linux_sdk/ya.make +++ b/build/platform/linux_sdk/ya.make @@ -54,12 +54,16 @@ ELSEIF (ARCH_PPC64LE) ELSEIF (ARCH_ARM6) IF (ARM6_FLOAT_ABI == "hard" AND OS_SDK == "ubuntu-16") DECLARE_EXTERNAL_RESOURCE(OS_SDK_ROOT sbr:1323200692) + ELSEIF(ARM6_FLOAT_ABI == "hard" AND OS_SDK == "ubuntu-18") + DECLARE_EXTERNAL_RESOURCE(OS_SDK_ROOT sbr:9899440846) ELSE() MESSAGE(FATAL_ERROR "There is no ${OS_SDK} SDK for ARMv6 32 bit (float ABI: ${ARM6_FLOAT_ABI})") ENDIF() ELSEIF (ARCH_ARM7) IF (ARM7_FLOAT_ABI == "hard" AND OS_SDK == "ubuntu-16") DECLARE_EXTERNAL_RESOURCE(OS_SDK_ROOT sbr:1323200692) + ELSEIF(ARM7_FLOAT_ABI == "hard" AND OS_SDK == "ubuntu-18") + DECLARE_EXTERNAL_RESOURCE(OS_SDK_ROOT sbr:9899440846) ELSEIF (ARM7_FLOAT_ABI == "softfp" AND OS_SDK == "ubuntu-18") DECLARE_EXTERNAL_RESOURCE(OS_SDK_ROOT sbr:2402287545) ELSE() diff --git a/build/platform/test_tool/host.ya.make.inc b/build/platform/test_tool/host.ya.make.inc index 3f6d744be78..a79eed6b6b3 100644 --- a/build/platform/test_tool/host.ya.make.inc +++ b/build/platform/test_tool/host.ya.make.inc @@ -1,12 +1,12 @@ IF (HOST_OS_DARWIN AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9854347679) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9909378846) ELSEIF (HOST_OS_DARWIN AND HOST_ARCH_ARM64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9854344589) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9909377264) ELSEIF (HOST_OS_LINUX AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9854352848) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9909382111) ELSEIF (HOST_OS_LINUX AND HOST_ARCH_AARCH64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9854342438) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9909375982) ELSEIF (HOST_OS_WINDOWS AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9854350018) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9909380537) ENDIF() diff --git a/build/platform/test_tool/host_os.ya.make.inc b/build/platform/test_tool/host_os.ya.make.inc index 6f2f069da67..6726ab3db78 100644 --- a/build/platform/test_tool/host_os.ya.make.inc +++ b/build/platform/test_tool/host_os.ya.make.inc @@ -1,12 +1,12 @@ IF (HOST_OS_DARWIN AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9854283071) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9909333231) ELSEIF (HOST_OS_DARWIN AND HOST_ARCH_ARM64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9854280336) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9909331581) ELSEIF (HOST_OS_LINUX AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9854287477) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9909336016) ELSEIF (HOST_OS_LINUX AND HOST_ARCH_AARCH64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9854278456) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9909330087) ELSEIF (HOST_OS_WINDOWS AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9854285169) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:9909334535) ENDIF() diff --git a/build/platform/yfm/resources.json b/build/platform/yfm/resources.json index 55f205d77b2..bdc81928958 100644 --- a/build/platform/yfm/resources.json +++ b/build/platform/yfm/resources.json @@ -1,16 +1,16 @@ { "by_platform": { "win32-x86_64": { - "uri": "sbr:9808280246" + "uri": "sbr:9915061557" }, "darwin-x86_64": { - "uri": "sbr:9808277754" + "uri": "sbr:9915059253" }, "linux-x86_64": { - "uri": "sbr:9808274819" + "uri": "sbr:9915056735" }, "darwin-arm64": { - "uri": "sbr:9808277754" + "uri": "sbr:9915059253" } } } diff --git a/build/plugins/lib/nots/package_manager/base/utils.py b/build/plugins/lib/nots/package_manager/base/utils.py index c49ab4c918a..2828cef9063 100644 --- a/build/plugins/lib/nots/package_manager/base/utils.py +++ b/build/plugins/lib/nots/package_manager/base/utils.py @@ -63,6 +63,10 @@ def build_nm_store_path(moddir: str) -> str: return os.path.join(build_nots_path(), "nm_store", moddir) +def build_vs_store_path(moddir: str) -> str: + return os.path.join(build_nots_path(), "vm_store", moddir) + + def build_traces_store_path(moddir: str) -> str: return os.path.join(build_nots_path(), "traces", moddir) diff --git a/build/sysincl/misc.yml b/build/sysincl/misc.yml index 335d1903111..7aab18b3aa1 100644 --- a/build/sysincl/misc.yml +++ b/build/sysincl/misc.yml @@ -427,17 +427,6 @@ - contrib/libs/glibcasm/glibc/include/stdint.h - contrib/libs/cxxsupp/libcxx/include/stdint.h -# protoc uses raw string literals with #include statements, which drive our include parser crazy -- source_filter: "^contrib/libs/protoc" - includes: - - "$base$google/protobuf/$path$" - - "google/protobuf/$path$" - - "$fwd_to$.$fwd_to_suffix$.h" - - "$fwd_to$.proto.h" - - "$name$.proto.h" - - "$path$" - - "$h_include$" - - source_filter: "^contrib/tools/bison/data" includes: - "]b4_location_include[" diff --git a/contrib/libs/openjpeg/.yandex_meta/override.nix b/contrib/libs/openjpeg/.yandex_meta/override.nix index d173012e1fb..2bce90e4961 100644 --- a/contrib/libs/openjpeg/.yandex_meta/override.nix +++ b/contrib/libs/openjpeg/.yandex_meta/override.nix @@ -1,11 +1,11 @@ pkgs: attrs: with pkgs; with attrs; rec { - version = "2.5.3"; + version = "2.5.4"; src = fetchFromGitHub { owner = "uclouvain"; repo = "openjpeg"; rev = "v${version}"; - sha256 = "sha256-ONPahcQ80e3ahYRQU+Tu8Z7ZTARjRlpXqPAYpUlX5sY="; + sha256 = "sha256-HSXGdpHUbwlYy5a+zKpcLo2d+b507Qf5nsaMghVBlZ8="; }; patches = []; diff --git a/contrib/libs/openjpeg/CHANGELOG.md b/contrib/libs/openjpeg/CHANGELOG.md index 926e6f1f4f3..24daac65a67 100644 --- a/contrib/libs/openjpeg/CHANGELOG.md +++ b/contrib/libs/openjpeg/CHANGELOG.md @@ -1,5 +1,21 @@ # Changelog +## [v2.5.4](https://github.com/uclouvain/openjpeg/releases/v2.5.4) (2025-09-20) + +[Full Changelog](https://github.com/uclouvain/openjpeg/compare/v2.5.3...v2.5.4) + +**Closed issues:** + +- CMake configure warning with CMake 3.30 and newer [\#1579](https://github.com/uclouvain/openjpeg/issues/1579) + +**Merged pull requests:** + +- pkgconfig: drop unused libraries from `Libs.private` [\#1591](https://github.com/uclouvain/openjpeg/pull/1591) ([bgilbert](https://github.com/bgilbert)) +- Fix CMake warning: Compatibility with CMake \< 3.10 will be removed [\#1580](https://github.com/uclouvain/openjpeg/pull/1580) ([dzenanz](https://github.com/dzenanz)) +- code documentation updates [\#1576](https://github.com/uclouvain/openjpeg/pull/1576) ([weanti](https://github.com/weanti)) +- Fixed ICC profile copy failure on write [\#1574](https://github.com/uclouvain/openjpeg/pull/1574) ([mircomir](https://github.com/mircomir)) +- opj\_jp2\_read\_header: Check for error after parsing header. [\#1573](https://github.com/uclouvain/openjpeg/pull/1573) ([sebras](https://github.com/sebras)) (CVE-2025-54874) + ## [v2.5.3](https://github.com/uclouvain/openjpeg/releases/v2.5.3) (2024-12-09) [Full Changelog](https://github.com/uclouvain/openjpeg/compare/v2.5.2...v2.5.3) diff --git a/contrib/libs/openjpeg/NEWS.md b/contrib/libs/openjpeg/NEWS.md index 9e8b6b72a81..5990166cc12 100644 --- a/contrib/libs/openjpeg/NEWS.md +++ b/contrib/libs/openjpeg/NEWS.md @@ -2,6 +2,17 @@ More details in the [CHANGELOG](https://github.com/uclouvain/openjpeg/blob/master/CHANGELOG.md) +## OpenJPEG 2.5.4 (Sept 2025) + +No API/ABI break compared to v2.5.3 + +### Bug fixes + +* opj\_jp2\_read\_header: Check for error after parsing header. [\#1573](https://github.com/uclouvain/openjpeg/pull/1573) +* pkgconfig: drop unused libraries from `Libs.private` [\#1591](https://github.com/uclouvain/openjpeg/pull/1591) +* Fix CMake warning: Compatibility with CMake \< 3.10 will be removed [\#1580](https://github.com/uclouvain/openjpeg/pull/1580) +* Fixed ICC profile copy failure on write [\#1574](https://github.com/uclouvain/openjpeg/pull/1574) + ## OpenJPEG 2.5.3 (Dec 2024) No API/ABI break compared to v2.5.2 diff --git a/contrib/libs/openjpeg/jp2.c b/contrib/libs/openjpeg/jp2.c index 4df055a542a..3d16d1075fe 100644 --- a/contrib/libs/openjpeg/jp2.c +++ b/contrib/libs/openjpeg/jp2.c @@ -1987,6 +1987,12 @@ OPJ_BOOL opj_jp2_setup_encoder(opj_jp2_t *jp2, if (image->icc_profile_len) { jp2->meth = 2; jp2->enumcs = 0; + jp2->color.icc_profile_buf = (OPJ_BYTE *)opj_malloc(image->icc_profile_len); + if (jp2->color.icc_profile_buf) { + jp2->color.icc_profile_len = image->icc_profile_len; + memcpy(jp2->color.icc_profile_buf, image->icc_profile_buf, + image->icc_profile_len); + } } else { jp2->meth = 1; if (image->color_space == OPJ_CLRSPC_SRGB) { @@ -2873,7 +2879,7 @@ OPJ_BOOL opj_jp2_read_header(opj_stream_private_t *p_stream, p_image, p_manager); - if (p_image && *p_image) { + if (ret && p_image && *p_image) { /* Set Image Color Space */ if (jp2->enumcs == 16) { (*p_image)->color_space = OPJ_CLRSPC_SRGB; diff --git a/contrib/libs/openjpeg/openjpeg.h b/contrib/libs/openjpeg/openjpeg.h index 59abd323aed..fe2f1ee0c4f 100644 --- a/contrib/libs/openjpeg/openjpeg.h +++ b/contrib/libs/openjpeg/openjpeg.h @@ -635,6 +635,8 @@ typedef void * opj_codec_t; /* * Callback function prototype for read function + * @return returns The number of bytes delivered into + * \a p_buffer. -1 signals end of stream. */ typedef OPJ_SIZE_T(* opj_stream_read_fn)(void * p_buffer, OPJ_SIZE_T p_nb_bytes, void * p_user_data) ; @@ -1239,7 +1241,6 @@ OPJ_API void OPJ_CALLCONV opj_stream_set_user_data(opj_stream_t* p_stream, /** * Sets the length of the user data for the stream. - * * @param p_stream the stream to modify * @param data_length length of the user_data. */ @@ -1437,6 +1438,8 @@ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_set_decoded_components(opj_codec_t *p_codec, * that is to say at the highest resolution level, even if requesting the image at lower * resolution levels. * + * Note: If p_start_x, p_start_y, p_end_x, p_end_y are all 0, then the whole image is decoded. + * * Generally opj_set_decode_area() should be followed by opj_decode(), and the * codec cannot be re-used. * In the particular case of an image made of a single tile, several sequences of diff --git a/contrib/libs/openjpeg/opj_config.h b/contrib/libs/openjpeg/opj_config.h index 50d156357da..850cae5c4df 100644 --- a/contrib/libs/openjpeg/opj_config.h +++ b/contrib/libs/openjpeg/opj_config.h @@ -9,6 +9,6 @@ /* Version number. */ #define OPJ_VERSION_MAJOR 2 #define OPJ_VERSION_MINOR 5 -#define OPJ_VERSION_BUILD 3 +#define OPJ_VERSION_BUILD 4 #endif diff --git a/contrib/libs/openjpeg/opj_config_private-linux.h b/contrib/libs/openjpeg/opj_config_private-linux.h index 8076e7d32a6..cf5d183a063 100644 --- a/contrib/libs/openjpeg/opj_config_private-linux.h +++ b/contrib/libs/openjpeg/opj_config_private-linux.h @@ -1,6 +1,6 @@ /* create opj_config_private.h for CMake */ -#define OPJ_PACKAGE_VERSION "2.5.3" +#define OPJ_PACKAGE_VERSION "2.5.4" /* Not used by openjp2*/ /*#define HAVE_MEMORY_H 1*/ diff --git a/contrib/libs/openjpeg/ya.make b/contrib/libs/openjpeg/ya.make index 28dcb166d3d..2dc9720c728 100644 --- a/contrib/libs/openjpeg/ya.make +++ b/contrib/libs/openjpeg/ya.make @@ -9,9 +9,9 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(2.5.3) +VERSION(2.5.4) -ORIGINAL_SOURCE(https://github.com/uclouvain/openjpeg/archive/v2.5.3.tar.gz) +ORIGINAL_SOURCE(https://github.com/uclouvain/openjpeg/archive/v2.5.4.tar.gz) ADDINCL( contrib/libs/openjpeg diff --git a/tools/ruff_linter/wrapper.py b/tools/ruff_linter/wrapper.py index 5ea8b6d6666..eecac1f267a 100644 --- a/tools/ruff_linter/wrapper.py +++ b/tools/ruff_linter/wrapper.py @@ -62,7 +62,6 @@ def process_file(orig_filename: str, ruff_bin: str, orig_config: Path, source_ro if file_path.startswith(('fintech/uservices', 'taxi', 'sdg', 'electro')): # TODO(alevitskii) TPS-28865, TPS-31380. Run checks for fintech and taxi in build root too. - # TODO(alevitskii): have to give a pass for sdg, because they started using `extend`... filename = os.path.realpath(orig_filename) if os.path.islink(orig_filename) else orig_filename config = orig_config.resolve() if orig_config.is_symlink() else orig_config else: @@ -110,8 +109,6 @@ def main(): # otherwise we risk allowing to steal from arcadia. To do that we need to mark modules 1st-party/3rd-party # in pyproject.toml. extend_option_present = check_extend_option_present(style_config_path) - # TODO(alevitskii): sdg is an unfortunate victim of an oversight, we need to fix it - is_sdg = str(style_config_path.relative_to(params.source_root)).startswith('sdg') ruff_bin = get_ruff_bin(params) @@ -119,7 +116,7 @@ def main(): for file_name in params.files: start_time = time.perf_counter() - if extend_option_present and not is_sdg: + if extend_option_present: elapsed = time.perf_counter() - start_time report.add( file_name, diff --git a/util/generic/yexception.h b/util/generic/yexception.h index c87868ffd42..3e4bcedda7d 100644 --- a/util/generic/yexception.h +++ b/util/generic/yexception.h @@ -156,8 +156,8 @@ private: #define ythrow throw __LOCATION__ + namespace NPrivate { - /// Encapsulates data for one of the most common case in which - /// exception message consists of single constant string + /// Encapsulates data for the most common case when + /// an exception message consists of a single constant string struct TSimpleExceptionMessage { TSourceLocation Location; TStringBuf Message; diff --git a/util/system/compiler.h b/util/system/compiler.h index 82401f95f1e..f0dd5b9c5b1 100644 --- a/util/system/compiler.h +++ b/util/system/compiler.h @@ -75,7 +75,7 @@ * * void Foo(const int argumentUsedOnlyForDebugPurposes Y_DECLARE_UNUSED) { * assert(argumentUsedOnlyForDebugPurposes == 42); - * // however you may as well omit `Y_DECLARE_UNUSED` and use `UNUSED` macro instead + * // however you may as well omit `Y_DECLARE_UNUSED` and use `Y_UNUSED` macro instead * Y_UNUSED(argumentUsedOnlyForDebugPurposes); * } * @endcode diff --git a/util/system/env.h b/util/system/env.h index 55f43bd9b82..6458e918b83 100644 --- a/util/system/env.h +++ b/util/system/env.h @@ -9,12 +9,12 @@ * @param key String identifying the name of the environmental variable to look for * @param def String that returns if environmental variable not found by key * - * @return String that is associated with the matched environment variable or empty string if + * @return String that is associated with the matched environment variable or the value of `def` parameter if * such variable is missing. * * @note Use it only in pair with `SetEnv` as there may be inconsistency in their behaviour * otherwise. - * @note Calls to `GetEnv` and `SetEnv` from different threads must be synchronized. + * @note Calls to `GetEnv` and environment modifying functions (`SetEnv` or `UnsetEnv`) from different threads must be synchronized. * @see SetEnv */ TString GetEnv(const TString& key, const TString& def = TString()); @@ -22,29 +22,28 @@ TString GetEnv(const TString& key, const TString& def = TString()); /** * Search the environment list provided by the host environment for associated variable. * - * @param key String identifying the name of the environmental variable to look for - * - * @return String that is associated with the matched environment - * variable or empty optional value if such variable is missing. + * @param key String identifying the name of the environmental variable to look for * - * @throws TSystemError If name of the variable has invalid format + * @return String that is associated with the matched environment + * variable or empty optional value if such variable is missing. * - * @note Use it only in pair with `SetEnv` as there may be inconsistency - * in their behaviour otherwise. - * @note Calls to `TryGetEnv` and `SetEnv` from different threads must be synchronized. - * @see SetEnv + * @note Use it only in pair with `SetEnv` as there may be inconsistency in their behaviour otherwise. + * @note Calls to `TryGetEnv` and environment modifying functions (`SetEnv` or `UnsetEnv`) from different threads must be synchronized. + * @see SetEnv */ TMaybe<TString> TryGetEnv(const TString& key); /** * Add or change environment variable provided by the host environment. * - * @param key String identifying the name of the environment variable to set or change - * @param value Value to assign - + * @param key String identifying the name of the environment variable to set or change + * @param value Value to assign + * + * @throws TSystemError On error + * * @note Use it only in pair with `GetEnv` as there may be inconsistency in their behaviour * otherwise. - * @note Calls to `GetEnv` and `SetEnv` from different threads must be synchronized. + * @note Calls to `SetEnv` and `GetEnv`, `TryGetEnv`, `UnsetEnv` from different threads must be synchronized. * @see GetEnv */ void SetEnv(const TString& key, const TString& value); @@ -52,11 +51,13 @@ void SetEnv(const TString& key, const TString& value); /** * Remove environment variable from the host environment. * - * @param key String identifying the name of the environment variable to remove + * @param key String identifying the name of the environment variable to remove * - * @note If key does not exist in the environment, then the environment is unchanged, - * and the function returns normally. - * @note Calls to `GetEnv` and `SetEnv` from different threads must be synchronized. - * @see GetEnv + * @throws TSystemError On error + * + * @note If key does not exist in the environment, then the environment is unchanged, + * and the function returns normally. + * @note Calls to `UnsetEnv` and `GetEnv`, `TryGetEnv`, `SetEnv` from different threads must be synchronized. + * @see GetEnv */ void UnsetEnv(const TString& key); diff --git a/util/system/platform.h b/util/system/platform.h index 363bf4fdec0..6ca7b96f7ef 100644 --- a/util/system/platform.h +++ b/util/system/platform.h @@ -194,7 +194,7 @@ #define _dll_ #endif -// 16, 32 or 64 +// 32 or 64 #if defined(__sparc_v9__) || defined(_x86_64_) || defined(_ia64_) || defined(_arm64_) || defined(_ppc64_) || defined(_wasm64_) #define _64_ #else @@ -39,33 +39,33 @@ REGISTRY_ENDPOINT = os.environ.get("YA_REGISTRY_ENDPOINT", "https://devtools-reg PLATFORM_MAP = { "data": { "win32": { - "md5": "56c3708a62a7b102623c6fc1e7e75816", + "md5": "40689a14b94f9ced95802a443b579fc0", "urls": [ - f"{REGISTRY_ENDPOINT}/9854301469" + f"{REGISTRY_ENDPOINT}/9909414548" ] }, "darwin": { - "md5": "a8697bbb033bce04dc6f0f931e6ac57e", + "md5": "26570127608a24baf4f382bd14ae4065", "urls": [ - f"{REGISTRY_ENDPOINT}/9854298811" + f"{REGISTRY_ENDPOINT}/9909412629" ] }, "darwin-arm64": { - "md5": "47e939a3619094a76c0829296f5604cf", + "md5": "4192f74085136804b534b1895725abff", "urls": [ - f"{REGISTRY_ENDPOINT}/9854295952" + f"{REGISTRY_ENDPOINT}/9909410722" ] }, "linux-aarch64": { - "md5": "f5109cb97f24f78e8ee2adaa6c70482e", + "md5": "8801d43aab77c669f692f25f8134e845", "urls": [ - f"{REGISTRY_ENDPOINT}/9854293716" + f"{REGISTRY_ENDPOINT}/9909408592" ] }, "linux": { - "md5": "30087ee041e0221fc3da36a15b076d59", + "md5": "2c09de47efdc838f04501836bdf972dd", "urls": [ - f"{REGISTRY_ENDPOINT}/9854304686" + f"{REGISTRY_ENDPOINT}/9909416605" ] } } diff --git a/yql/essentials/core/extract_predicate/ut/extract_predicate_ut.cpp b/yql/essentials/core/extract_predicate/ut/extract_predicate_ut.cpp index cdb37c7111b..eb3410a121d 100644 --- a/yql/essentials/core/extract_predicate/ut/extract_predicate_ut.cpp +++ b/yql/essentials/core/extract_predicate/ut/extract_predicate_ut.cpp @@ -62,8 +62,8 @@ Y_UNIT_TEST_SUITE(TYqlExtractPredicate) { lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory(); NSQLTranslationV1::TParsers parsers; - parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory(); - parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory(); + parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory(/*isAmbiguityError=*/ true); + parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory(/*isAmbiguityError=*/ true); NSQLTranslation::TTranslators translators( nullptr, diff --git a/yql/essentials/core/issue/protos/issue_id.proto b/yql/essentials/core/issue/protos/issue_id.proto index a3c7ee81cb3..90fe70ce034 100644 --- a/yql/essentials/core/issue/protos/issue_id.proto +++ b/yql/essentials/core/issue/protos/issue_id.proto @@ -157,6 +157,7 @@ message TIssuesIds { YQL_DEPRECATED_BINDINGS = 4538; YQL_HINT_INVALID_PARAMETERS = 4539; YQL_UNTYPED_STRING_LITERALS = 4540; + YQL_SYNTAX_AMBIGUITY = 4602; // yql parser errors YQL_NOT_ALLOWED_IN_DISCOVERY = 4600; diff --git a/yql/essentials/core/issue/yql_issue.txt b/yql/essentials/core/issue/yql_issue.txt index 2bb925e03e3..948c9aa01b7 100644 --- a/yql/essentials/core/issue/yql_issue.txt +++ b/yql/essentials/core/issue/yql_issue.txt @@ -676,6 +676,10 @@ ids { severity: S_WARNING } ids { + code: YQL_SYNTAX_AMBIGUITY + severity: S_WARNING +} +ids { code: CORE_LINEAGE_INTERNAL_ERROR severity: S_WARNING } diff --git a/yql/essentials/minikql/comp_nodes/ut/mkql_computation_node_ut.cpp b/yql/essentials/minikql/comp_nodes/ut/mkql_computation_node_ut.cpp index 04512a00e5b..7593c6eaaba 100644 --- a/yql/essentials/minikql/comp_nodes/ut/mkql_computation_node_ut.cpp +++ b/yql/essentials/minikql/comp_nodes/ut/mkql_computation_node_ut.cpp @@ -18,6 +18,15 @@ namespace { constexpr auto TotalSambles = 222222U; +template <typename T> +void AssertNumericValuesEqual(T actual, T expected) { + if constexpr (std::is_floating_point_v<T>) { + UNIT_ASSERT(std::abs(actual - expected) < 0.0003); + } else { + UNIT_ASSERT_VALUES_EQUAL(actual, expected); + } +} + } std::vector<std::pair<i8, double>> MakeSamples() { @@ -5013,6 +5022,183 @@ Y_UNIT_TEST_SUITE(TMiniKQLComputationNodeTest) { const auto t2 = TInstant::Now(); Cout << t2 - t1 << Endl; } + + Y_UNIT_TEST_LLVM(TestBuiltinsAdd) { + TSetup<LLVM> setup; + TProgramBuilder& pb = *setup.PgmBuilder; + + auto testAdd = [&](auto val1, auto val2, auto expected) { + using T = decltype(val1); + const auto data1 = pb.NewDataLiteral<T>(val1); + const auto data2 = pb.NewDataLiteral<T>(val2); + const auto result = pb.Add(data1, data2); + const auto graph = setup.BuildGraph(result); + + AssertNumericValuesEqual(graph->GetValue().template Get<T>(), expected); + }; + + testAdd(std::numeric_limits<i8>::max(), i8(1), std::numeric_limits<i8>::min()); + testAdd(std::numeric_limits<ui8>::max(), ui8(1), ui8(0)); + testAdd(std::numeric_limits<i16>::max(), i16(1), std::numeric_limits<i16>::min()); + testAdd(std::numeric_limits<ui16>::max(), ui16(1), ui16(0)); + testAdd(std::numeric_limits<i32>::max(), i32(1), std::numeric_limits<i32>::min()); + testAdd(std::numeric_limits<ui32>::max(), ui32(1), ui32(0)); + testAdd(std::numeric_limits<i64>::max(), i64(1), std::numeric_limits<i64>::min()); + testAdd(std::numeric_limits<ui64>::max(), ui64(1), ui64(0)); + + testAdd(3.14f, 2.71f, 5.85f); + testAdd(1.5, 2.5, 4.0); + } + + Y_UNIT_TEST_LLVM(TestBuiltinsSub) { + TSetup<LLVM> setup; + TProgramBuilder& pb = *setup.PgmBuilder; + + auto testSub = [&](auto val1, auto val2, auto expected) { + using T = decltype(val1); + const auto data1 = pb.NewDataLiteral<T>(val1); + const auto data2 = pb.NewDataLiteral<T>(val2); + const auto result = pb.Sub(data1, data2); + const auto graph = setup.BuildGraph(result); + AssertNumericValuesEqual(graph->GetValue().template Get<T>(), expected); + }; + + testSub(std::numeric_limits<i8>::min(), i8(1), std::numeric_limits<i8>::max()); + testSub(ui8(0), ui8(1), std::numeric_limits<ui8>::max()); + testSub(std::numeric_limits<i16>::min(), i16(1), std::numeric_limits<i16>::max()); + testSub(ui16(0), ui16(1), std::numeric_limits<ui16>::max()); + testSub(std::numeric_limits<i32>::min(), i32(1), std::numeric_limits<i32>::max()); + testSub(ui32(0), ui32(1), std::numeric_limits<ui32>::max()); + testSub(std::numeric_limits<i64>::min(), i64(1), std::numeric_limits<i64>::max()); + testSub(ui64(0), ui64(1), std::numeric_limits<ui64>::max()); + + testSub(5.5f, 2.3f, 3.2f); + testSub(10.0, 3.5, 6.5); + } + + Y_UNIT_TEST_LLVM(TestBuiltinsMul) { + TSetup<LLVM> setup; + TProgramBuilder& pb = *setup.PgmBuilder; + + auto testMul = [&](auto val1, auto val2, auto expected) { + using T = decltype(val1); + const auto data1 = pb.NewDataLiteral<T>(val1); + const auto data2 = pb.NewDataLiteral<T>(val2); + const auto result = pb.Mul(data1, data2); + const auto graph = setup.BuildGraph(result); + AssertNumericValuesEqual(graph->GetValue().template Get<T>(), expected); + }; + + testMul(std::numeric_limits<i8>::max(), i8(2), i8(-2)); + testMul(std::numeric_limits<ui8>::max(), ui8(2), ui8(254)); + testMul(std::numeric_limits<i16>::max(), i16(2), i16(-2)); + testMul(std::numeric_limits<ui16>::max(), ui16(2), ui16(65534)); + testMul(std::numeric_limits<i32>::max(), i32(2), i32(-2)); + testMul(std::numeric_limits<ui32>::max(), ui32(2), ui32(4294967294)); + testMul(std::numeric_limits<i64>::max(), i64(2), i64(-2)); + testMul(std::numeric_limits<ui64>::max(), ui64(2), ui64(18446744073709551614ULL)); + + testMul(2.5f, 4.0f, 10.0f); + testMul(3.0, 7.0, 21.0); + } + + Y_UNIT_TEST_LLVM(TestBuiltinsInc) { + TSetup<LLVM> setup; + TProgramBuilder& pb = *setup.PgmBuilder; + + auto testInc = [&](auto val, auto expected) { + using T = decltype(val); + const auto data = pb.NewDataLiteral<T>(val); + const auto result = pb.Increment(data); + const auto graph = setup.BuildGraph(result); + AssertNumericValuesEqual(graph->GetValue().template Get<T>(), expected); + }; + + testInc(std::numeric_limits<i8>::max(), std::numeric_limits<i8>::min()); + testInc(std::numeric_limits<ui8>::max(), ui8(0)); + testInc(std::numeric_limits<i16>::max(), std::numeric_limits<i16>::min()); + testInc(std::numeric_limits<ui16>::max(), ui16(0)); + testInc(std::numeric_limits<i32>::max(), std::numeric_limits<i32>::min()); + testInc(std::numeric_limits<ui32>::max(), ui32(0)); + testInc(std::numeric_limits<i64>::max(), std::numeric_limits<i64>::min()); + testInc(std::numeric_limits<ui64>::max(), ui64(0)); + + testInc(5.5f, 6.5f); + testInc(10.0, 11.0); + } + + Y_UNIT_TEST_LLVM(TestBuiltinsDec) { + TSetup<LLVM> setup; + TProgramBuilder& pb = *setup.PgmBuilder; + + auto testDec = [&](auto val, auto expected) { + using T = decltype(val); + const auto data = pb.NewDataLiteral<T>(val); + const auto result = pb.Decrement(data); + const auto graph = setup.BuildGraph(result); + AssertNumericValuesEqual(graph->GetValue().template Get<T>(), expected); + }; + + testDec(std::numeric_limits<i8>::min(), std::numeric_limits<i8>::max()); + testDec(ui8(0), std::numeric_limits<ui8>::max()); + testDec(std::numeric_limits<i16>::min(), std::numeric_limits<i16>::max()); + testDec(ui16(0), std::numeric_limits<ui16>::max()); + testDec(std::numeric_limits<i32>::min(), std::numeric_limits<i32>::max()); + testDec(ui32(0), std::numeric_limits<ui32>::max()); + testDec(std::numeric_limits<i64>::min(), std::numeric_limits<i64>::max()); + testDec(ui64(0), std::numeric_limits<ui64>::max()); + + testDec(7.5f, 6.5f); + testDec(20.0, 19.0); + } + + Y_UNIT_TEST_LLVM(TestBuiltinsMinus) { + TSetup<LLVM> setup; + TProgramBuilder& pb = *setup.PgmBuilder; + + auto testMinus = [&](auto val, auto expected) { + using T = decltype(val); + const auto data = pb.NewDataLiteral<T>(val); + const auto result = pb.Minus(data); + const auto graph = setup.BuildGraph(result); + AssertNumericValuesEqual(graph->GetValue().template Get<T>(), expected); + }; + + testMinus(std::numeric_limits<i8>::min(), std::numeric_limits<i8>::min()); + testMinus(std::numeric_limits<ui8>::max(), ui8(1)); + testMinus(std::numeric_limits<i16>::min(), std::numeric_limits<i16>::min()); + testMinus(std::numeric_limits<ui16>::max(), ui16(1)); + testMinus(std::numeric_limits<i32>::min(), std::numeric_limits<i32>::min()); + testMinus(std::numeric_limits<ui32>::max(), ui32(1)); + testMinus(std::numeric_limits<i64>::min(), std::numeric_limits<i64>::min()); + testMinus(std::numeric_limits<ui64>::max(), ui64(1)); + + testMinus(3.14f, -3.14f); + testMinus(2.5, -2.5); + } + + Y_UNIT_TEST_LLVM(TestBuiltinsAbs) { + TSetup<LLVM> setup; + TProgramBuilder& pb = *setup.PgmBuilder; + + auto testAbs = [&](auto val, auto expected) { + using T = decltype(val); + const auto data = pb.NewDataLiteral<T>(val); + const auto result = pb.Abs(data); + const auto graph = setup.BuildGraph(result); + AssertNumericValuesEqual(graph->GetValue().template Get<T>(), expected); + }; + + // Test INT_MIN cases - this is UB in abs() but we're not fixing it per user request + // These tests document the current behavior (wraps to INT_MIN) + testAbs(std::numeric_limits<i8>::min(), std::numeric_limits<i8>::min()); + testAbs(std::numeric_limits<i16>::min(), std::numeric_limits<i16>::min()); + testAbs(std::numeric_limits<i32>::min(), std::numeric_limits<i32>::min()); + testAbs(std::numeric_limits<i64>::min(), std::numeric_limits<i64>::min()); + + testAbs(-3.14f, 3.14f); + testAbs(-2.5, 2.5); + } } } diff --git a/yql/essentials/minikql/computation/mkql_computation_node_codegen.h.txt b/yql/essentials/minikql/computation/mkql_computation_node_codegen.h.txt index d90b41db6eb..93eab5a73aa 100644 --- a/yql/essentials/minikql/computation/mkql_computation_node_codegen.h.txt +++ b/yql/essentials/minikql/computation/mkql_computation_node_codegen.h.txt @@ -1019,7 +1019,7 @@ public: {} protected: - NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override { + NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override Y_NO_SANITIZE("undefined") { return FetchFunc(Ctx, static_cast<const NUdf::TUnboxedValuePod&>(Stream), State, result); } diff --git a/yql/essentials/minikql/invoke_builtins/mkql_builtins_abs.cpp b/yql/essentials/minikql/invoke_builtins/mkql_builtins_abs.cpp index 3a0a9e34f29..0b3a75e2be0 100644 --- a/yql/essentials/minikql/invoke_builtins/mkql_builtins_abs.cpp +++ b/yql/essentials/minikql/invoke_builtins/mkql_builtins_abs.cpp @@ -1,4 +1,5 @@ #include "mkql_builtins_decimal.h" // Y_IGNORE +#include "mkql_safe_ops.h" #include <cmath> @@ -18,7 +19,8 @@ inline T Abs(T v) { template <typename T, std::enable_if_t<std::is_signed<T>::value && std::is_integral<T>::value>* = nullptr> inline T Abs(T v) { - return std::abs(v); + // Use SafeNeg to avoid UB on INT_MIN + return v < 0 ? SafeNeg(v) : v; } template<typename TInput, typename TOutput> @@ -57,7 +59,7 @@ struct TAbs : public TSimpleArithmeticUnary<TInput, TOutput, TAbs<TInput, TOutpu struct TDecimalAbs : public TDecimalUnary<TDecimalAbs> { static NUdf::TUnboxedValuePod Execute(const NUdf::TUnboxedValuePod& arg) { const auto a = arg.GetInt128(); - return a < 0 ? NUdf::TUnboxedValuePod(-a) : arg; + return a < 0 ? NUdf::TUnboxedValuePod(SafeNeg(a)) : arg; } #ifndef MKQL_DISABLE_CODEGEN diff --git a/yql/essentials/minikql/invoke_builtins/mkql_builtins_add.cpp b/yql/essentials/minikql/invoke_builtins/mkql_builtins_add.cpp index 5dac2620dea..0d15bb7aad2 100644 --- a/yql/essentials/minikql/invoke_builtins/mkql_builtins_add.cpp +++ b/yql/essentials/minikql/invoke_builtins/mkql_builtins_add.cpp @@ -1,6 +1,7 @@ #include "mkql_builtins_impl.h" // Y_IGNORE #include "mkql_builtins_datetime.h" #include "mkql_builtins_decimal.h" // Y_IGNORE +#include "mkql_safe_ops.h" #include <yql/essentials/minikql/mkql_type_ops.h> @@ -15,7 +16,7 @@ struct TAdd : public TSimpleArithmeticBinary<TLeft, TRight, TOutput, TAdd<TLeft, static TOutput Do(TOutput left, TOutput right) { - return left + right; + return SafeAdd(left, right); } #ifndef MKQL_DISABLE_CODEGEN @@ -34,7 +35,7 @@ struct TDecimalAdd { static NUdf::TUnboxedValuePod Execute(const NUdf::TUnboxedValuePod& left, const NUdf::TUnboxedValuePod& right) { const auto l = left.GetInt128(); const auto r = right.GetInt128(); - const auto a = l + r; + const auto a = SafeAdd(l, r); using namespace NYql::NDecimal; @@ -108,7 +109,7 @@ struct TDateTimeAddT { { const auto lv = ToScaledDate<TLeft>(left.template Get<typename TLeft::TLayout>()); const auto rv = ToScaledDate<TRight>(right.template Get<typename TRight::TLayout>()); - const auto ret = lv + rv; + const auto ret = SafeAdd(lv, rv); if (IsBadScaledDate<TOutput>(ret)) { return NUdf::TUnboxedValuePod(); } @@ -168,7 +169,7 @@ struct TBigIntervalAdd { return NUdf::TUnboxedValuePod(); } - i64 ret = lv + rv; + i64 ret = SafeAdd(lv, rv); if (IsBadInterval<NUdf::TDataType<NUdf::TInterval64>>(ret)) { return NUdf::TUnboxedValuePod(); } diff --git a/yql/essentials/minikql/invoke_builtins/mkql_builtins_dec.cpp b/yql/essentials/minikql/invoke_builtins/mkql_builtins_dec.cpp index 3400b709094..ddcd8f5ee2c 100644 --- a/yql/essentials/minikql/invoke_builtins/mkql_builtins_dec.cpp +++ b/yql/essentials/minikql/invoke_builtins/mkql_builtins_dec.cpp @@ -1,4 +1,5 @@ #include "mkql_builtins_decimal.h" // Y_IGNORE +#include "mkql_safe_ops.h" namespace NKikimr { namespace NMiniKQL { @@ -9,7 +10,7 @@ template<typename TInput, typename TOutput> struct TDecrement : public TSimpleArithmeticUnary<TInput, TOutput, TDecrement<TInput, TOutput>> { static TOutput Do(TInput val) { - return --val; + return SafeDec(val); } #ifndef MKQL_DISABLE_CODEGEN @@ -32,7 +33,7 @@ struct TDecimalDec { const auto& bounds = GetBounds<Precision, true, false>(); if (v > bounds.first && v < bounds.second) - return NUdf::TUnboxedValuePod(--v); + return NUdf::TUnboxedValuePod(SafeDec(v)); return NUdf::TUnboxedValuePod(IsNan(v) ? Nan() : (v > 0 ? +Inf() : -Inf())); } diff --git a/yql/essentials/minikql/invoke_builtins/mkql_builtins_inc.cpp b/yql/essentials/minikql/invoke_builtins/mkql_builtins_inc.cpp index 4e0a56e0c60..a267573e985 100644 --- a/yql/essentials/minikql/invoke_builtins/mkql_builtins_inc.cpp +++ b/yql/essentials/minikql/invoke_builtins/mkql_builtins_inc.cpp @@ -1,4 +1,5 @@ #include "mkql_builtins_decimal.h" // Y_IGNORE +#include "mkql_safe_ops.h" namespace NKikimr { namespace NMiniKQL { @@ -9,7 +10,7 @@ template<typename TInput, typename TOutput> struct TIncrement : public TSimpleArithmeticUnary<TInput, TOutput, TIncrement<TInput, TOutput>> { static TOutput Do(TInput val) { - return ++val; + return SafeInc(val); } #ifndef MKQL_DISABLE_CODEGEN @@ -32,7 +33,7 @@ struct TDecimalInc { const auto& bounds = GetBounds<Precision, false, true>(); if (v > bounds.first && v < bounds.second) - return NUdf::TUnboxedValuePod(++v); + return NUdf::TUnboxedValuePod(SafeInc(v)); return NUdf::TUnboxedValuePod(IsNan(v) ? Nan() : (v > 0 ? +Inf() : -Inf())); } diff --git a/yql/essentials/minikql/invoke_builtins/mkql_builtins_minus.cpp b/yql/essentials/minikql/invoke_builtins/mkql_builtins_minus.cpp index 63c68532f2e..adf251ffd5b 100644 --- a/yql/essentials/minikql/invoke_builtins/mkql_builtins_minus.cpp +++ b/yql/essentials/minikql/invoke_builtins/mkql_builtins_minus.cpp @@ -1,4 +1,5 @@ #include "mkql_builtins_decimal.h" // Y_IGNORE +#include "mkql_safe_ops.h" namespace NKikimr { namespace NMiniKQL { @@ -11,7 +12,7 @@ struct TMinus : public TSimpleArithmeticUnary<TInput, TOutput, TMinus<TInput, TO static TOutput Do(TInput val) { - return -val; + return SafeNeg(val); } #ifndef MKQL_DISABLE_CODEGEN diff --git a/yql/essentials/minikql/invoke_builtins/mkql_builtins_mul.cpp b/yql/essentials/minikql/invoke_builtins/mkql_builtins_mul.cpp index a8e48c32f9d..de6e4f88565 100644 --- a/yql/essentials/minikql/invoke_builtins/mkql_builtins_mul.cpp +++ b/yql/essentials/minikql/invoke_builtins/mkql_builtins_mul.cpp @@ -1,5 +1,6 @@ #include "mkql_builtins_impl.h" // Y_IGNORE #include "mkql_builtins_datetime.h" +#include "mkql_safe_ops.h" #include <yql/essentials/minikql/mkql_type_ops.h> @@ -14,7 +15,7 @@ struct TMul : public TSimpleArithmeticBinary<TLeft, TRight, TOutput, TMul<TLeft, static TOutput Do(TOutput left, TOutput right) { - return left * right; + return SafeMul(left, right); } #ifndef MKQL_DISABLE_CODEGEN @@ -39,7 +40,7 @@ struct TNumMulInterval { { const auto lv = static_cast<typename TOutput::TLayout>(left.template Get<typename TLeft::TLayout>()); const auto rv = static_cast<typename TOutput::TLayout>(right.template Get<typename TRight::TLayout>()); - const auto ret = lv * rv; + const auto ret = SafeMul(lv, rv); if (rv == 0 || lv == 0) { return NUdf::TUnboxedValuePod(ret); } diff --git a/yql/essentials/minikql/invoke_builtins/mkql_builtins_sub.cpp b/yql/essentials/minikql/invoke_builtins/mkql_builtins_sub.cpp index 18da4fe4c92..bd0bd50418b 100644 --- a/yql/essentials/minikql/invoke_builtins/mkql_builtins_sub.cpp +++ b/yql/essentials/minikql/invoke_builtins/mkql_builtins_sub.cpp @@ -1,6 +1,7 @@ #include "mkql_builtins_impl.h" // Y_IGNORE #include "mkql_builtins_datetime.h" #include "mkql_builtins_decimal.h" // Y_IGNORE +#include "mkql_safe_ops.h" #include <yql/essentials/minikql/mkql_type_ops.h> @@ -15,7 +16,7 @@ struct TSub : public TSimpleArithmeticBinary<TLeft, TRight, TOutput, TSub<TLeft, static TOutput Do(TOutput left, TOutput right) { - return left - right; + return SafeSub(left, right); } #ifndef MKQL_DISABLE_CODEGEN @@ -31,7 +32,7 @@ struct TDecimalSub { static NUdf::TUnboxedValuePod Execute(const NUdf::TUnboxedValuePod& left, const NUdf::TUnboxedValuePod& right) { const auto l = left.GetInt128(); const auto r = right.GetInt128(); - const auto s = l - r; + const auto s = SafeSub(l, r); using namespace NYql::NDecimal; @@ -102,7 +103,7 @@ struct TDateTimeSub : public TSimpleArithmeticBinary<typename TLeft::TLayout, ty static typename TOutput::TLayout Do(typename TLeft::TLayout left, typename TRight::TLayout right) { - return ToScaledDate<TLeft>(left) - ToScaledDate<TRight>(right); + return SafeSub(ToScaledDate<TLeft>(left), ToScaledDate<TRight>(right)); } #ifndef MKQL_DISABLE_CODEGEN @@ -129,7 +130,7 @@ struct TIntervalSubInterval { { const auto lv = left.template Get<typename TLeft::TLayout>(); const auto rv = right.template Get<typename TRight::TLayout>(); - const auto ret = lv - rv; + const auto ret = SafeSub(lv, rv); return IsBadInterval<TOutput>(ret) ? NUdf::TUnboxedValuePod() : NUdf::TUnboxedValuePod(ret); } @@ -169,7 +170,7 @@ struct TBigIntervalSub { return NUdf::TUnboxedValuePod(); } - i64 ret = lv - rv; + i64 ret = SafeSub(lv, rv); if (IsBadInterval<NUdf::TDataType<NUdf::TInterval64>>(ret)) { return NUdf::TUnboxedValuePod(); } @@ -217,7 +218,7 @@ struct TAnyDateTimeSubIntervalT { { const auto lv = ToScaledDate<TLeft>(left.template Get<typename TLeft::TLayout>()); const auto rv = ToScaledDate<TRight>(right.template Get<typename TRight::TLayout>()); - const auto ret = lv - rv; + const auto ret = SafeSub(lv, rv); if (IsBadDateTime<TOutput>(ret)) { return NUdf::TUnboxedValuePod(); } diff --git a/yql/essentials/minikql/invoke_builtins/mkql_numeric_cast.h b/yql/essentials/minikql/invoke_builtins/mkql_numeric_cast.h new file mode 100644 index 00000000000..e7f2444cb08 --- /dev/null +++ b/yql/essentials/minikql/invoke_builtins/mkql_numeric_cast.h @@ -0,0 +1,62 @@ +#pragma once + +#include <yql/essentials/public/decimal/yql_wide_int.h> + +#include <type_traits> + +namespace NKikimr { +namespace NMiniKQL { + +// TMakeUnsigned - a safe alternative to std::make_unsigned that avoids UB +// when specializing for custom types like NYql::TWide +template<typename T> +struct TMakeUnsigned { + using type = std::make_unsigned_t<T>; +}; + +template<typename T> +using TMakeUnsigned_t = typename TMakeUnsigned<T>::type; + +// Specializations for NYql::TWide types +template<> +struct TMakeUnsigned<NYql::TWide<i8>> { + using type = NYql::TWide<ui8>; +}; + +template<> +struct TMakeUnsigned<NYql::TWide<ui8>> { + using type = NYql::TWide<ui8>; +}; + +template<> +struct TMakeUnsigned<NYql::TWide<i16>> { + using type = NYql::TWide<ui16>; +}; + +template<> +struct TMakeUnsigned<NYql::TWide<ui16>> { + using type = NYql::TWide<ui16>; +}; + +template<> +struct TMakeUnsigned<NYql::TWide<i32>> { + using type = NYql::TWide<ui32>; +}; + +template<> +struct TMakeUnsigned<NYql::TWide<ui32>> { + using type = NYql::TWide<ui32>; +}; + +template<> +struct TMakeUnsigned<NYql::TWide<i64>> { + using type = NYql::TWide<ui64>; +}; + +template<> +struct TMakeUnsigned<NYql::TWide<ui64>> { + using type = NYql::TWide<ui64>; +}; + +} // namespace NMiniKQL +} // namespace NKikimr diff --git a/yql/essentials/minikql/invoke_builtins/mkql_safe_ops.h b/yql/essentials/minikql/invoke_builtins/mkql_safe_ops.h new file mode 100644 index 00000000000..d1e92def8d9 --- /dev/null +++ b/yql/essentials/minikql/invoke_builtins/mkql_safe_ops.h @@ -0,0 +1,109 @@ +#pragma once + +#include <yql/essentials/minikql/defs.h> +#include <yql/essentials/minikql/invoke_builtins/mkql_numeric_cast.h> +#include <yql/essentials/public/decimal/yql_decimal.h> + +#include <type_traits> +#include <concepts> + +namespace NKikimr { +namespace NMiniKQL { + +// Safe arithmetic operations that avoid undefined behavior from signed integer overflow +// by performing operations in unsigned arithmetic and casting back to signed. + +// SafeAdd: Addition without signed overflow UB +template <typename T> +[[nodiscard]] constexpr T SafeAdd(T u, T v) + requires std::is_integral_v<T> || std::is_same_v<T, NYql::NDecimal::TInt128> +{ + using TUnsigned = TMakeUnsigned_t<T>; + return static_cast<T>(static_cast<TUnsigned>(u) + + static_cast<TUnsigned>(v)); +} + +template <typename T> +[[nodiscard]] constexpr T SafeAdd(T u, T v) + requires std::is_floating_point_v<T> +{ + return u + v; +} + +// SafeSub: Subtraction without signed overflow UB +template <typename T> +[[nodiscard]] constexpr T SafeSub(T u, T v) + requires std::is_integral_v<T> || std::is_same_v<T, NYql::NDecimal::TInt128> +{ + using TUnsigned = TMakeUnsigned_t<T>; + return static_cast<T>(static_cast<TUnsigned>(u) - + static_cast<TUnsigned>(v)); +} + +template <typename T> +[[nodiscard]] constexpr T SafeSub(T u, T v) + requires std::is_floating_point_v<T> +{ + return u - v; +} + +// SafeMul: Multiplication without signed overflow UB +// Special handling for 16-bit types to avoid implicit promotion overflow +template <typename T> +[[nodiscard]] constexpr T SafeMul(T u, T v) + requires std::is_same_v<T, i16> || std::is_same_v<T, ui16> +{ + return static_cast<ui32>(u) * static_cast<ui32>(v); +} + +template <typename T> +[[nodiscard]] constexpr T SafeMul(T u, T v) + requires (std::is_integral_v<T> || std::is_same_v<T, NYql::NDecimal::TInt128>) && + (!std::is_same_v<T, i16>) && (!std::is_same_v<T, ui16>) +{ + using TUnsigned = TMakeUnsigned_t<T>; + return static_cast<T>(static_cast<TUnsigned>(u) * + static_cast<TUnsigned>(v)); +} + +template <typename T> +[[nodiscard]] constexpr T SafeMul(T u, T v) + requires std::is_floating_point_v<T> +{ + return u * v; +} + +// SafeInc: Increment without signed overflow UB +template <typename T> +[[nodiscard]] constexpr T SafeInc(T u) +{ + return SafeAdd(u, T{1}); +} + +// SafeDec: Decrement without signed overflow UB +template <typename T> +[[nodiscard]] constexpr T SafeDec(T u) +{ + return SafeSub(u, T{1}); +} + +// SafeNeg: Negation without signed overflow UB (e.g., -INT_MIN) +// Uses two's complement: -x = ~x + 1 +template <typename T> +[[nodiscard]] constexpr T SafeNeg(T u) + requires std::is_integral_v<T> || std::is_same_v<T, NYql::NDecimal::TInt128> +{ + using TUnsigned = TMakeUnsigned_t<T>; + return static_cast<T>(~static_cast<TUnsigned>(u) + TUnsigned{1}); +} + +template <typename T> +[[nodiscard]] constexpr T SafeNeg(T u) + requires std::is_floating_point_v<T> +{ + return -u; +} + + +} // namespace NMiniKQL +} // namespace NKikimr diff --git a/yql/essentials/minikql/jsonpath/parser/parser.cpp b/yql/essentials/minikql/jsonpath/parser/parser.cpp index 39105a0c731..27678ba3928 100644 --- a/yql/essentials/minikql/jsonpath/parser/parser.cpp +++ b/yql/essentials/minikql/jsonpath/parser/parser.cpp @@ -39,6 +39,10 @@ private: Issues_.back().SetCode(TIssuesIds::JSONPATH_PARSE_ERROR, TSeverityIds::S_ERROR); } + void AddIssue(NYql::TIssue&& issue) override { + Issues_.AddIssue(std::forward<NYql::TIssue>(issue)); + } + TIssues& Issues_; }; diff --git a/yql/essentials/parser/common/antlr4/error_listener.cpp b/yql/essentials/parser/common/antlr4/error_listener.cpp index c8d72abc949..73c14175314 100644 --- a/yql/essentials/parser/common/antlr4/error_listener.cpp +++ b/yql/essentials/parser/common/antlr4/error_listener.cpp @@ -1,10 +1,27 @@ #include "error_listener.h" +#include <yql/essentials/core/issue/yql_issue.h> + +#include <util/generic/vector.h> +#include <util/string/builder.h> +#include <util/string/join.h> + namespace antlr4 { - YqlErrorListener::YqlErrorListener(NAST::IErrorCollector* errors, bool* error) + TVector<size_t> ToVector(const antlrcpp::BitSet& ambigAlts) { + TVector<size_t> result; + for (size_t i = 0; i < ambigAlts.size(); ++i) { + if (ambigAlts.test(i)) { + result.push_back(i); + } + } + return result; + } + + YqlErrorListener::YqlErrorListener(NAST::IErrorCollector* errors, bool* error, bool isAmbiguityError) : Errors_(errors) , Error_(error) + , IsAmbiguityError_(isAmbiguityError) { } @@ -16,4 +33,53 @@ namespace antlr4 { Errors_->Error(line, charPositionInLine, msg.c_str()); } + void YqlErrorListener::reportAmbiguity( + Parser* recognizer, + const dfa::DFA& dfa, + size_t startIndex, + size_t stopIndex, + bool exact, + const antlrcpp::BitSet& ambigAlts, + atn::ATNConfigSet* configs) + { + Y_UNUSED(configs); + + size_t ruleIndex = dfa.atnStartState->ruleIndex; + std::string_view ruleName = recognizer->getRuleNames()[ruleIndex]; + + if (// FIXME(YQL-20410): It is a known ambiguity, remove it when + // an expression (x NOT NULL) is a syntax error. + ruleName == "xor_subexpr" || + // Known ambiguity, on ANTLR3 syntactic predicates were used. + ruleName == "neq_subexpr") { + return; + } + + TokenStream* tokens = recognizer->getTokenStream(); + Token* start = tokens->get(startIndex); + Token* stop = tokens->get(stopIndex); + + TString alternatives = JoinSeq(", ", ToVector(ambigAlts)); + + NYql::TPosition startPos(start->getCharPositionInLine(), start->getLine(), "unknown"); + NYql::TPosition stopPos(stop->getCharPositionInLine(), stop->getLine(), "unknown"); + + TString message = TStringBuilder() + << "An" << (exact ? " exactly " : " ") + << "ambiguous decision " << dfa.decision + << " at rule '" << ruleName << "'" + << " with conflicted alternatives {" << alternatives << "}"; + + NYql::TIssue issue(std::move(startPos), std::move(stopPos), std::move(message)); + + if (IsAmbiguityError_) { + *Error_ = true; + issue.SetCode(NYql::UNEXPECTED_ERROR, NYql::TSeverityIds::S_FATAL); + } else { + issue.SetCode(NYql::TIssuesIds::YQL_SYNTAX_AMBIGUITY, NYql::TSeverityIds::S_WARNING); + } + + Errors_->Report(std::move(issue)); + } + } // namespace antlr4 diff --git a/yql/essentials/parser/common/antlr4/error_listener.h b/yql/essentials/parser/common/antlr4/error_listener.h index b78e1563cd7..3d8f6af8a6b 100644 --- a/yql/essentials/parser/common/antlr4/error_listener.h +++ b/yql/essentials/parser/common/antlr4/error_listener.h @@ -9,14 +9,24 @@ namespace antlr4 { class ANTLR4CPP_PUBLIC YqlErrorListener: public BaseErrorListener { NAST::IErrorCollector* Errors_; bool* Error_; + const bool IsAmbiguityError_; public: - YqlErrorListener(NAST::IErrorCollector* errors, bool* error); + YqlErrorListener(NAST::IErrorCollector* errors, bool* error, bool isAmbiguityError = false); virtual void syntaxError( Recognizer* recognizer, Token* offendingSymbol, size_t line, size_t charPositionInLine, const std::string& msg, std::exception_ptr e) override; + + void reportAmbiguity( + Parser* recognizer, + const dfa::DFA& dfa, + size_t startIndex, + size_t stopIndex, + bool exact, + const antlrcpp::BitSet& ambigAlts, + atn::ATNConfigSet* configs) override; }; } // namespace antlr4 diff --git a/yql/essentials/parser/common/error.cpp b/yql/essentials/parser/common/error.cpp index 9fad2892213..b6b087631c7 100644 --- a/yql/essentials/parser/common/error.cpp +++ b/yql/essentials/parser/common/error.cpp @@ -14,35 +14,29 @@ namespace NAST { } void IErrorCollector::Error(ui32 line, ui32 col, const TString& message) { - if (NumErrors_ + 1 == MaxErrors_) { - AddError(0, 0, "Too many errors"); - ++NumErrors_; - } - - if (NumErrors_ >= MaxErrors_) { - ythrow TTooManyErrors() << "Too many errors"; - } - + GuardTooManyErrors(); AddError(line, col, message); ++NumErrors_; } - TErrorOutput::TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors) - : IErrorCollector(maxErrors) - , Err(err) - , Name(name) - { + void IErrorCollector::Report(NYql::TIssue&& issue) { + GuardTooManyErrors(); + bool isError = issue.GetSeverity() >= NYql::TSeverityIds::S_WARNING; + AddIssue(std::forward<NYql::TIssue>(issue)); + if (isError) { + ++NumErrors_; + } } - TErrorOutput::~TErrorOutput() - { - } + void IErrorCollector::GuardTooManyErrors() { + if (NumErrors_ + 1 == MaxErrors_) { + AddError(0, 0, "Too many errors"); + ++NumErrors_; + } - void TErrorOutput::AddError(ui32 line, ui32 col, const TString& message) { - if (!Name.empty()) { - Err << "Query " << Name << ": "; + if (NumErrors_ >= MaxErrors_) { + ythrow TTooManyErrors() << "Too many errors"; } - Err << "Line " << line << " column " << col << " error: " << message; } } // namespace NAST diff --git a/yql/essentials/parser/common/error.h b/yql/essentials/parser/common/error.h index 966d996381e..81a920a2809 100644 --- a/yql/essentials/parser/common/error.h +++ b/yql/essentials/parser/common/error.h @@ -1,5 +1,7 @@ #pragma once +#include <yql/essentials/public/issue/yql_issue.h> + #include <util/generic/yexception.h> #include <util/generic/fwd.h> @@ -18,25 +20,19 @@ namespace NAST { // throws TTooManyErrors void Error(ui32 line, ui32 col, const TString& message); + // throws TTooManyErrors + void Report(NYql::TIssue&& issue); + private: + void GuardTooManyErrors(); + virtual void AddError(ui32 line, ui32 col, const TString& message) = 0; + virtual void AddIssue(NYql::TIssue&& issue) = 0; + protected: const size_t MaxErrors_; size_t NumErrors_; }; - class TErrorOutput: public IErrorCollector { - public: - TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors); - virtual ~TErrorOutput(); - - private: - void AddError(ui32 line, ui32 col, const TString& message) override; - - public: - IOutputStream& Err; - TString Name; - }; - } // namespace NAST diff --git a/yql/essentials/parser/common/issue.h b/yql/essentials/parser/common/issue.h index 5573adb03c3..3a17252cbdb 100644 --- a/yql/essentials/parser/common/issue.h +++ b/yql/essentials/parser/common/issue.h @@ -22,6 +22,12 @@ namespace NSQLTranslation { Issues_.AddIssue(NYql::TPosition(col, line, File_), message); } + void AddIssue(NYql::TIssue&& issue) override { + issue.Position.File = File_; + issue.EndPosition.File = File_; + Issues_.AddIssue(std::forward<NYql::TIssue>(issue)); + } + private: NYql::TIssues& Issues_; const TString File_; diff --git a/yql/essentials/parser/common/ya.make b/yql/essentials/parser/common/ya.make index b0ae371dfe4..290ae7be8b5 100644 --- a/yql/essentials/parser/common/ya.make +++ b/yql/essentials/parser/common/ya.make @@ -2,6 +2,7 @@ LIBRARY() PEERDIR( yql/essentials/public/issue + yql/essentials/core/issue ) SRCS( diff --git a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.cpp b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.cpp index a66ba70780f..b88b90b0af3 100644 --- a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.cpp +++ b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.cpp @@ -1,12 +1 @@ #include "proto_ast_antlr4.h" - -antlr4::YqlErrorListener::YqlErrorListener(NProtoAST::IErrorCollector* errors, bool* error) - : Errors_(errors), Error_(error) -{ -} - -void antlr4::YqlErrorListener::syntaxError(Recognizer * /*recognizer*/, Token * /*offendingSymbol*/, - size_t line, size_t charPositionInLine, const std::string &msg, std::exception_ptr /*e*/) { - *Error_ = true; - Errors_->Error(line, charPositionInLine, msg.c_str()); -} diff --git a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h index 13cbbc6e73b..12b8b87fea1 100644 --- a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h +++ b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h @@ -35,23 +35,38 @@ namespace NProtoAST { class TProtoASTBuilder4 { public: - TProtoASTBuilder4(TStringBuf data, const TString& queryName = "query", google::protobuf::Arena* arena = nullptr) + TProtoASTBuilder4( + TStringBuf data, + const TString& queryName = "query", + google::protobuf::Arena* arena = nullptr, + bool isAmbiguityError = false, + bool isAmbiguityDebugging = false + ) : QueryName_(queryName) + , IsAmbiguityError_(isAmbiguityError) , InputStream_(data) , Lexer_(&InputStream_) , TokenStream_(&Lexer_) , Parser_(&TokenStream_, arena) { + if (isAmbiguityDebugging) { + Parser_ + .template getInterpreter<antlr4::atn::ParserATNSimulator>() + ->setPredictionMode(antlr4::atn::PredictionMode::LL_EXACT_AMBIG_DETECTION); + } } google::protobuf::Message* BuildAST(IErrorCollector& errors) { // TODO: find a better way to break on lexer errors - typename antlr4::YqlErrorListener listener(&errors, &Parser_.error); + typename antlr4::YqlErrorListener listener(&errors, &Parser_.error, IsAmbiguityError_); Parser_.removeErrorListeners(); Parser_.addErrorListener(&listener); try { auto result = Parser_.Parse(&errors); Parser_.removeErrorListener(&listener); + if (Parser_.error) { + result = nullptr; + } Parser_.error = false; return result; } catch (const TTooManyErrors&) { @@ -68,6 +83,7 @@ namespace NProtoAST { private: TString QueryName_; + bool IsAmbiguityError_; antlr4::ANTLRInputStream InputStream_; TLexer Lexer_; diff --git a/yql/essentials/parser/proto_ast/gen/jsonpath/ya.make b/yql/essentials/parser/proto_ast/gen/jsonpath/ya.make index 783b5945feb..c520e141792 100644 --- a/yql/essentials/parser/proto_ast/gen/jsonpath/ya.make +++ b/yql/essentials/parser/proto_ast/gen/jsonpath/ya.make @@ -47,6 +47,10 @@ ENDIF() SRCS(JsonPathParser.proto) +PEERDIR( + yql/essentials/public/issue/protos +) + EXCLUDE_TAGS(GO_PROTO JAVA_PROTO) END() diff --git a/yql/essentials/providers/common/udf_resolve/yql_outproc_udf_resolver.cpp b/yql/essentials/providers/common/udf_resolve/yql_outproc_udf_resolver.cpp index a8fb9ff5258..e598b2d370f 100644 --- a/yql/essentials/providers/common/udf_resolve/yql_outproc_udf_resolver.cpp +++ b/yql/essentials/providers/common/udf_resolve/yql_outproc_udf_resolver.cpp @@ -221,24 +221,18 @@ public: udfRequest->SetLangVer(udf->LangVer); } - if (request.UdfsSize() && !request.ImportsSize()) { - ctx.AddError(ExceptionToIssue(yexception() << "Attempt to load udfs without any import")); - return false; - } - if (request.UdfsSize() && request.ImportsSize()) { - TResolveResult response; - try { - response = RunResolverAndParseResult(request, { }, *filesBox); - filesBox->Destroy(); - } catch (const std::exception& e) { - ctx.AddError(ExceptionToIssue(e)); - return false; - } - // extract regardless of hasErrors value - hasErrors = !ExtractMetadata(response, usedImports, externalFunctions, ctx) || hasErrors; + TResolveResult response; + try { + response = RunResolverAndParseResult(request, { }, *filesBox); + filesBox->Destroy(); + } catch (const std::exception& e) { + ctx.AddError(ExceptionToIssue(e)); + return false; } + // extract regardless of hasErrors value + hasErrors = !ExtractMetadata(response, usedImports, externalFunctions, ctx) || hasErrors; hasErrors = !LoadFunctionsMetadata(loadedFunctions, *FunctionRegistry_, TypeInfoHelper_, ctx, logLevel) || hasErrors; if (!hasErrors) { diff --git a/yql/essentials/sql/v1/complete/analysis/yql/yql_ut.cpp b/yql/essentials/sql/v1/complete/analysis/yql/yql_ut.cpp index 3d064e4673b..233086d9ee4 100644 --- a/yql/essentials/sql/v1/complete/analysis/yql/yql_ut.cpp +++ b/yql/essentials/sql/v1/complete/analysis/yql/yql_ut.cpp @@ -25,7 +25,7 @@ public: Settings_.SyntaxVersion = 1; Lexers_.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); - Parsers_.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory(); + Parsers_.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory(/*isAmbiguityError=*/ true); } NYql::TAstParseResult Parse(const TString& query) { diff --git a/yql/essentials/sql/v1/complete/check/check_complete_ut.cpp b/yql/essentials/sql/v1/complete/check/check_complete_ut.cpp index 7743f574321..3b4f28e0fd6 100644 --- a/yql/essentials/sql/v1/complete/check/check_complete_ut.cpp +++ b/yql/essentials/sql/v1/complete/check/check_complete_ut.cpp @@ -24,8 +24,8 @@ Y_UNIT_TEST_SUITE(CheckTests) { }; NSQLTranslationV1::TParsers parsers = { - .Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory(), - .Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory(), + .Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory(/*isAmbiguityError=*/ true), + .Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory(/*isAmbiguityError=*/ true), }; NSQLTranslation::TTranslators translators( diff --git a/yql/essentials/sql/v1/format/sql_format_ut.h b/yql/essentials/sql/v1/format/sql_format_ut.h index d487b956e37..395b12caa4e 100644 --- a/yql/essentials/sql/v1/format/sql_format_ut.h +++ b/yql/essentials/sql/v1/format/sql_format_ut.h @@ -584,6 +584,8 @@ Y_UNIT_TEST(CreateTopic) { "CREATE TOPIC topic1 (\n\tCONSUMER c1,\n\tCONSUMER c2 WITH (important = TRUE)\n);\n"}, {"create topic topic1 (consumer c1) with (partition_count_limit = 5)", "CREATE TOPIC topic1 (\n\tCONSUMER c1\n) WITH (\n\tpartition_count_limit = 5\n);\n"}, + {"create topic topic1 (consumer c1) with (availability_period=Interval('PT9H'))", + "CREATE TOPIC topic1 (\n\tCONSUMER c1\n) WITH (\n\tavailability_period = Interval('PT9H')\n);\n"}, }; TSetup setup; @@ -600,7 +602,8 @@ Y_UNIT_TEST(AlterTopic) { "ALTER TOPIC topic1\n\tADD CONSUMER c1,\n\tDROP CONSUMER c2\n;\n"}, {"alter topic topic1 set (supported_codecs = 'RAW'), RESET (retention_period)", "ALTER TOPIC topic1\n\tSET (supported_codecs = 'RAW'),\n\tRESET (retention_period)\n;\n"}, - + {"alter topic topic1 alter consumer c1 set (availability_period=Interval('PT9H')), alter consumer c2 reset (availability_period)", + "ALTER TOPIC topic1\n\tALTER CONSUMER c1 SET (availability_period = Interval('PT9H')),\n\tALTER CONSUMER c2 RESET (availability_period)\n;\n"}, }; TSetup setup; diff --git a/yql/essentials/sql/v1/node.h b/yql/essentials/sql/v1/node.h index e8a83b41bf2..fcb0a15955a 100644 --- a/yql/essentials/sql/v1/node.h +++ b/yql/essentials/sql/v1/node.h @@ -1345,6 +1345,7 @@ namespace NSQLTranslationV1 { }; TNodePtr Important; + NYql::TResetableSetting<TNodePtr, void> AvailabilityPeriod; NYql::TResetableSetting<TNodePtr, void> ReadFromTs; NYql::TResetableSetting<TNodePtr, void> SupportedCodecs; }; diff --git a/yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.cpp b/yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.cpp index 33ec032f6cf..6cdf0c6d2fb 100644 --- a/yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.cpp +++ b/yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.cpp @@ -10,26 +10,52 @@ namespace { class TParser : public NSQLTranslation::IParser { public: + explicit TParser(bool isAmbuguityError, bool isAmbiguityDebugging) + : IsAmbiguityError_(isAmbuguityError) + , IsAmbiguityDebugging_(isAmbiguityDebugging) + { + } + google::protobuf::Message* Parse( const TString& query, const TString& queryName, NProtoAST::IErrorCollector& err, google::protobuf::Arena* arena) final { YQL_ENSURE(arena); - NProtoAST::TProtoASTBuilder4<NALPDefaultAntlr4::SQLv1Antlr4Parser, NALPDefaultAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena); + NProtoAST::TProtoASTBuilder4< + NALPDefaultAntlr4::SQLv1Antlr4Parser, + NALPDefaultAntlr4::SQLv1Antlr4Lexer> + builder(query, queryName, arena, IsAmbiguityError_, IsAmbiguityDebugging_); return builder.BuildAST(err); } + +private: + bool IsAmbiguityError_; + bool IsAmbiguityDebugging_; }; class TFactory: public NSQLTranslation::IParserFactory { public: + explicit TFactory(bool isAmbuguityError, bool isAmbiguityDebugging) + : IsAmbiguityError_(isAmbuguityError) + , IsAmbiguityDebugging_(isAmbiguityDebugging) + { + } + std::unique_ptr<NSQLTranslation::IParser> MakeParser() const final { - return std::make_unique<TParser>(); + return std::make_unique<TParser>(IsAmbiguityError_, IsAmbiguityDebugging_); } + +private: + bool IsAmbiguityError_; + bool IsAmbiguityDebugging_; }; } -NSQLTranslation::TParserFactoryPtr MakeAntlr4ParserFactory() { - return MakeIntrusive<TFactory>(); +NSQLTranslation::TParserFactoryPtr MakeAntlr4ParserFactory( + bool isAmbiguityError, + bool isAmbiguityDebugging) +{ + return MakeIntrusive<TFactory>(isAmbiguityError, isAmbiguityDebugging); } } diff --git a/yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h b/yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h index 5fcb4606736..5b33b43c0d3 100644 --- a/yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h +++ b/yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h @@ -3,6 +3,8 @@ namespace NSQLTranslationV1 { -NSQLTranslation::TParserFactoryPtr MakeAntlr4ParserFactory(); +NSQLTranslation::TParserFactoryPtr MakeAntlr4ParserFactory( + bool isAmbiguityError = false, + bool isAmbiguityDebugging = false); } diff --git a/yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.cpp b/yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.cpp index fdc0cf64704..bf0160ade02 100644 --- a/yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.cpp +++ b/yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.cpp @@ -10,26 +10,52 @@ namespace { class TParser : public NSQLTranslation::IParser { public: + explicit TParser(bool isAmbuguityError, bool isAmbiguityDebugging) + : IsAmbiguityError_(isAmbuguityError) + , IsAmbiguityDebugging_(isAmbiguityDebugging) + { + } + google::protobuf::Message* Parse( const TString& query, const TString& queryName, NProtoAST::IErrorCollector& err, google::protobuf::Arena* arena) final { YQL_ENSURE(arena); - NProtoAST::TProtoASTBuilder4<NALPAnsiAntlr4::SQLv1Antlr4Parser, NALPAnsiAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena); + NProtoAST::TProtoASTBuilder4< + NALPAnsiAntlr4::SQLv1Antlr4Parser, + NALPAnsiAntlr4::SQLv1Antlr4Lexer> + builder(query, queryName, arena, IsAmbiguityError_, IsAmbiguityDebugging_); return builder.BuildAST(err); } + +private: + bool IsAmbiguityError_; + bool IsAmbiguityDebugging_; }; class TFactory: public NSQLTranslation::IParserFactory { public: + explicit TFactory(bool isAmbuguityError, bool isAmbiguityDebugging) + : IsAmbiguityError_(isAmbuguityError) + , IsAmbiguityDebugging_(isAmbiguityDebugging) + { + } + std::unique_ptr<NSQLTranslation::IParser> MakeParser() const final { - return std::make_unique<TParser>(); + return std::make_unique<TParser>(IsAmbiguityError_, IsAmbiguityDebugging_); } + +private: + bool IsAmbiguityError_; + bool IsAmbiguityDebugging_; }; } -NSQLTranslation::TParserFactoryPtr MakeAntlr4AnsiParserFactory() { - return MakeIntrusive<TFactory>(); +NSQLTranslation::TParserFactoryPtr MakeAntlr4AnsiParserFactory( + bool isAmbiguityError, + bool isAmbiguityDebugging) +{ + return MakeIntrusive<TFactory>(isAmbiguityError, isAmbiguityDebugging); } } diff --git a/yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.h b/yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.h index 15174f44d23..395d89674d6 100644 --- a/yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.h +++ b/yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.h @@ -3,6 +3,8 @@ namespace NSQLTranslationV1 { -NSQLTranslation::TParserFactoryPtr MakeAntlr4AnsiParserFactory(); +NSQLTranslation::TParserFactoryPtr MakeAntlr4AnsiParserFactory( + bool isAmbiguityError = false, + bool isAmbiguityDebugging = false); } diff --git a/yql/essentials/sql/v1/query.cpp b/yql/essentials/sql/v1/query.cpp index d20359a8b1e..0b2461180b5 100644 --- a/yql/essentials/sql/v1/query.cpp +++ b/yql/essentials/sql/v1/query.cpp @@ -1778,22 +1778,28 @@ static INode::TPtr CreateConsumerDesc(const TTopicConsumerDescription& desc, con if (desc.Settings.Important) { settings = node.L(settings, node.Q(node.Y(node.Q("important"), desc.Settings.Important))); } + if (const auto& availabilityPeriod = desc.Settings.AvailabilityPeriod) { + if (availabilityPeriod.IsSet()) { + settings = node.L(settings, node.Q(node.Y(node.Q("setAvailabilityPeriod"), availabilityPeriod.GetValueSet()))); + } else { + YQL_ENSURE(alter, "Cannot reset on create"); + settings = node.L(settings, node.Q(node.Y(node.Q("resetAvailabilityPeriod"), node.Q(node.Y())))); + } + } if (const auto& readFromTs = desc.Settings.ReadFromTs) { if (readFromTs.IsSet()) { settings = node.L(settings, node.Q(node.Y(node.Q("setReadFromTs"), readFromTs.GetValueSet()))); - } else if (alter) { + } else { + YQL_ENSURE(alter, "Cannot reset on create"); settings = node.L(settings, node.Q(node.Y(node.Q("resetReadFromTs"), node.Q(node.Y())))); - } else { - YQL_ENSURE(false, "Cannot reset on create"); } } if (const auto& readFromTs = desc.Settings.SupportedCodecs) { if (readFromTs.IsSet()) { settings = node.L(settings, node.Q(node.Y(node.Q("setSupportedCodecs"), readFromTs.GetValueSet()))); - } else if (alter) { - settings = node.L(settings, node.Q(node.Y(node.Q("resetSupportedCodecs"), node.Q(node.Y())))); } else { - YQL_ENSURE(false, "Cannot reset on create"); + YQL_ENSURE(alter, "Cannot reset on create"); + settings = node.L(settings, node.Q(node.Y(node.Q("resetSupportedCodecs"), node.Q(node.Y())))); } } return node.Y( diff --git a/yql/essentials/sql/v1/sql.cpp b/yql/essentials/sql/v1/sql.cpp index c21611a4a96..7ddc2a40b18 100644 --- a/yql/essentials/sql/v1/sql.cpp +++ b/yql/essentials/sql/v1/sql.cpp @@ -43,7 +43,6 @@ TAstNode* SqlASTsToYqls(const std::vector<::NSQLv1Generated::TRule_sql_stmt_core void SqlASTToYqlImpl(NYql::TAstParseResult& res, const google::protobuf::Message& protoAst, TContext& ctx) { - YQL_ENSURE(!ctx.Issues.Size()); res.Root = SqlASTToYql(protoAst, ctx); res.Pool = std::move(ctx.Pool); if (!res.Root) { diff --git a/yql/essentials/sql/v1/sql_translation.cpp b/yql/essentials/sql/v1/sql_translation.cpp index 81d177cb51b..8675f9e1308 100644 --- a/yql/essentials/sql/v1/sql_translation.cpp +++ b/yql/essentials/sql/v1/sql_translation.cpp @@ -2503,12 +2503,13 @@ bool TSqlTranslation::CreateTableSettings(const TRule_with_table_settings& setti return true; } -bool StoreConsumerSettingsEntry( +static bool StoreConsumerSettingsEntry( const TIdentifier& id, const TRule_topic_consumer_setting_value* value, TSqlExpression& ctx, TTopicConsumerSettings& settings, - bool reset + bool reset, bool alter ) { YQL_ENSURE(value || reset); + const TStringBuf statement = alter ? "ALTER CONSUMER"sv : "CONSUMER"sv; TNodePtr valueExprNode; if (value) { valueExprNode = ctx.Build(value->GetRule_expr1()); @@ -2519,7 +2520,7 @@ bool StoreConsumerSettingsEntry( } if (to_lower(id.Name) == "important") { if (settings.Important) { - ctx.Error() << to_upper(id.Name) << " specified multiple times in ALTER CONSUMER statements for single consumer"; + ctx.Error() << to_upper(id.Name) << " specified multiple times in " << statement << " statement for single consumer"; return false; } if (reset) { @@ -2531,10 +2532,23 @@ bool StoreConsumerSettingsEntry( return false; } settings.Important = valueExprNode; - + } else if (to_lower(id.Name) == "availability_period") { + if (settings.AvailabilityPeriod) { + ctx.Error() << to_upper(id.Name) << " specified multiple times in " << statement << " statement for single consumer"; + return false; + } + if (reset) { + settings.AvailabilityPeriod.Reset(); + } else { + if (valueExprNode->GetOpName() != "Interval") { + ctx.Error() << "Literal of Interval type is expected for " << to_upper(id.Name) << " setting"; + return false; + } + settings.AvailabilityPeriod.Set(valueExprNode); + } } else if (to_lower(id.Name) == "read_from") { if (settings.ReadFromTs) { - ctx.Error() << to_upper(id.Name) << " specified multiple times in ALTER CONSUMER statements for single consumer"; + ctx.Error() << to_upper(id.Name) << " specified multiple times in " << statement << " statement for single consumer"; return false; } if (reset) { @@ -2545,7 +2559,7 @@ bool StoreConsumerSettingsEntry( } } else if (to_lower(id.Name) == "supported_codecs") { if (settings.SupportedCodecs) { - ctx.Error() << to_upper(id.Name) << " specified multiple times in ALTER CONSUMER statements for single consumer"; + ctx.Error() << to_upper(id.Name) << " specified multiple times in " << statement << " statement for single consumer"; return false; } if (reset) { @@ -2576,7 +2590,8 @@ bool TSqlTranslation::CreateConsumerSettings( if (!StoreConsumerSettingsEntry( IdEx(firstEntry.GetRule_an_id1(), *this), &firstEntry.GetRule_topic_consumer_setting_value3(), - expr, settings, false + expr, settings, false, + /* alter = */ false )) { return false; } @@ -2585,7 +2600,8 @@ bool TSqlTranslation::CreateConsumerSettings( if (!StoreConsumerSettingsEntry( IdEx(entry.GetRule_an_id1(), *this), &entry.GetRule_topic_consumer_setting_value3(), - expr, settings, false + expr, settings, false, + /* alter = */ false )) { return false; } @@ -2626,7 +2642,8 @@ bool TSqlTranslation::AlterTopicConsumerEntry( if (!StoreConsumerSettingsEntry( IdEx(resetNode.GetRule_an_id3(), *this), nullptr, - expr, alterConsumer.Settings, true + expr, alterConsumer.Settings, true, + /* alter = */ true )) { return false; } @@ -2635,7 +2652,8 @@ bool TSqlTranslation::AlterTopicConsumerEntry( if (!StoreConsumerSettingsEntry( IdEx(resetItem.GetRule_an_id2(), *this), nullptr, - expr, alterConsumer.Settings, true + expr, alterConsumer.Settings, true, + /* alter = */ true )) { return false; } diff --git a/yql/essentials/sql/v1/sql_ut_common.h b/yql/essentials/sql/v1/sql_ut_common.h index d6d86e9954b..391b11c2040 100644 --- a/yql/essentials/sql/v1/sql_ut_common.h +++ b/yql/essentials/sql/v1/sql_ut_common.h @@ -8505,15 +8505,18 @@ Y_UNIT_TEST_SUITE(ExternalTable) { } Y_UNIT_TEST_SUITE(TopicsDDL) { - void TestQuery(const TString& query, bool expectOk = true) { + void TestQuery(const TString& query, bool expectOk = true, const TVector<TString> issueSubstrings = {}) { TStringBuilder finalQuery; finalQuery << "use plato;" << Endl << query; auto res = SqlToYql(finalQuery, 10, "kikimr"); if (expectOk) { - UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString()); + UNIT_ASSERT_C(res.IsOk(), "Query: " << query << "\n" << "Issues: " << res.Issues.ToString()); } else { - UNIT_ASSERT(!res.IsOk()); + UNIT_ASSERT_C(!res.IsOk(), "Query: " << query << "\n" << "Issues: " << res.Issues.ToString()); + for (const auto& issue : issueSubstrings) { + UNIT_ASSERT_STRING_CONTAINS_C(res.Issues.ToOneLineString(), issue, "Query: " << query << "\n" << "Issues: " << res.Issues.ToString()); + } } } @@ -8539,6 +8542,9 @@ Y_UNIT_TEST_SUITE(TopicsDDL) { TestQuery(R"( CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons2 WITH (important = false)) WITH (supported_codecs = "1,2,3"); )"); + TestQuery(R"( + CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons2 WITH (important = false, availability_period = Interval('PT9H'))) WITH (supported_codecs = "1,2,3"); + )"); } Y_UNIT_TEST(AlterTopicSimple) { @@ -8563,6 +8569,8 @@ Y_UNIT_TEST_SUITE(TopicsDDL) { ALTER CONSUMER consumer3 SET (important = false, read_from = 1), ALTER CONSUMER consumer3 RESET (supported_codecs), DROP CONSUMER consumer4, + ALTER CONSUMER consumer5 SET (availability_period = Interval('PT9H')), + ALTER CONSUMER consumer6 RESET (availability_period), SET (partition_count_limit = 11, retention_period = Interval('PT1H')), RESET(metering_mode) )"); @@ -8593,26 +8601,36 @@ Y_UNIT_TEST_SUITE(TopicsDDL) { TestQuery(R"( CREATE TOPIC topic1 WITH (retention_period = 123); - )", false); + )", false, {"3:58: Error: Literal of Interval type is expected for retention"}); TestQuery(R"( CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons1 WITH (important = false)); - )", false); + )", false, {"3:59: Error: Consumer cons1 defined more than once"}); TestQuery(R"( CREATE TOPIC topic1 (CONSUMER cons1 WITH (bad_option = false)); - )", false); + )", false, {"3:68: Error: BAD_OPTION: unknown option for consumer"}); + TestQuery(R"( + CREATE TOPIC topic1 (CONSUMER cons1 WITH (important = false, important = true)); + )", false, {"3:86: Error: IMPORTANT specified multiple times in CONSUMER statement for single consumer"}); TestQuery(R"( ALTER TOPIC topic1 ADD CONSUMER cons1, ALTER CONSUMER cons1 RESET (important); - )", false); + )", false, {"3:80: Error: IMPORTANT reset is not supported"}); TestQuery(R"( ALTER TOPIC topic1 ADD CONSUMER consumer1, ALTER CONSUMER consumer3 SET (supported_codecs = "RAW", read_from = 1), ALTER CONSUMER consumer3 RESET (supported_codecs); - )", false); + )", false, {"5:49: Error: SUPPORTED_CODECS specified multiple times in ALTER CONSUMER statement for single consumer"}); TestQuery(R"( ALTER TOPIC topic1 ADD CONSUMER consumer1, ALTER CONSUMER consumer3 SET (supported_codecs = "RAW", read_from = 1), ALTER CONSUMER consumer3 SET (read_from = 2); - )", false); + )", false, {"5:59: Error: READ_FROM specified multiple times in CONSUMER statement for single consumer"}); + TestQuery(R"( + CREATE TOPIC topic1 (CONSUMER cons1 WITH (availability_period = 3600)); + )", false, {"3:77: Error: Literal of Interval type is expected for AVAILABILITY_PERIOD setting"}); + TestQuery(R"( + ALTER TOPIC topic1 + ALTER CONSUMER consumer3 SET (availability_period = false); + )", false, {"4:69: Error: Literal of Interval type is expected for AVAILABILITY_PERIOD setting"}); } Y_UNIT_TEST(TopicWithPrefix) { diff --git a/yql/essentials/tests/sql/sql2yql/test_sql2yql.py b/yql/essentials/tests/sql/sql2yql/test_sql2yql.py index 4e91cf2f2b5..c45072f3d28 100644 --- a/yql/essentials/tests/sql/sql2yql/test_sql2yql.py +++ b/yql/essentials/tests/sql/sql2yql/test_sql2yql.py @@ -52,6 +52,7 @@ def get_sql2yql_cmd(suite, case, case_file, out_dir, ansi_lexer, test_format, te cmd.append('--yql') cmd.append('--test-lexers') cmd.append('--test-complete') + cmd.append('--test-syntax-ambiguity') cmd.append('--output=%s' % os.path.join(out_dir, 'sql.yql')) if suite == 'kikimr': cmd.append('--cluster=plato@kikimr') diff --git a/yql/essentials/tools/arrow_kernels_dump/arrow_kernels_dump.cpp b/yql/essentials/tools/arrow_kernels_dump/arrow_kernels_dump.cpp index 9c7778bfa70..a12b9b8781d 100644 --- a/yql/essentials/tools/arrow_kernels_dump/arrow_kernels_dump.cpp +++ b/yql/essentials/tools/arrow_kernels_dump/arrow_kernels_dump.cpp @@ -5,7 +5,7 @@ #include <util/generic/algorithm.h> #include <util/folder/path.h> -int main(int argc, char **argv) { +int main(int argc, char** argv) { Y_UNUSED(argc); Cerr << TFsPath(argv[0]).GetName() << " ABI version: " << NKikimr::NUdf::CurrentAbiVersionStr() << Endl; diff --git a/yql/essentials/tools/arrow_kernels_dump/ya.make b/yql/essentials/tools/arrow_kernels_dump/ya.make index 95b00f95b25..29659ab6725 100644 --- a/yql/essentials/tools/arrow_kernels_dump/ya.make +++ b/yql/essentials/tools/arrow_kernels_dump/ya.make @@ -1,5 +1,7 @@ PROGRAM() +ENABLE(YQL_STYLE_CPP) + SRCS( arrow_kernels_dump.cpp ) diff --git a/yql/essentials/tools/astdiff/astdiff.cpp b/yql/essentials/tools/astdiff/astdiff.cpp index f35b04bbf34..2414bb71a0a 100644 --- a/yql/essentials/tools/astdiff/astdiff.cpp +++ b/yql/essentials/tools/astdiff/astdiff.cpp @@ -29,11 +29,10 @@ std::string CalculateDiff(const TString& oldAst, const TString& newAst) { return ss.str(); } - const int DIFF_LINES_LIMIT = 16; void DumpSmallNodes(const TExprNode* rootOne, const TExprNode* rootTwo) { - const auto isDumpSmall = [] (const TString& dump) { + const auto isDumpSmall = [](const TString& dump) { return std::count(dump.begin(), dump.end(), '\n') < DIFF_LINES_LIMIT; }; const auto rootOneDump = rootOne->Dump(); @@ -45,10 +44,11 @@ void DumpSmallNodes(const TExprNode* rootOne, const TExprNode* rootTwo) { return; } - Cerr << rootOneDump << '\n' << rootTwoDump; + Cerr << rootOneDump << '\n' + << rootTwoDump; } -int Main(int argc, const char *argv[]) +int Main(int argc, const char* argv[]) { if (argc != 3) { PrintProgramSvnVersion(); @@ -103,17 +103,20 @@ int Main(int argc, const char *argv[]) if (rootOne->Type() != rootTwo->Type()) { Cerr << "Node in " << fileOne << " at [" << rootOnePos.Row << ":" << rootOnePos.Column << "] type is " << rootOne->Type() << Endl; Cerr << "Node in " << fileTwo << " at [" << rootTwoPos.Row << ":" << rootTwoPos.Column << "] type is " << rootTwo->Type() << Endl; - Cerr << "\nFile diff:\n" << diff; + Cerr << "\nFile diff:\n" + << diff; } else if (rootOne->ChildrenSize() != rootTwo->ChildrenSize()) { Cerr << "Node '" << rootOne->Content() << "' in " << fileOne << " at [" << rootOnePos.Row << ":" << rootOnePos.Column << "] has " << rootOne->ChildrenSize() << " children." << Endl; Cerr << "Node '" << rootTwo->Content() << "' in " << fileTwo << " at [" << rootTwoPos.Row << ":" << rootTwoPos.Column << "] has " << rootTwo->ChildrenSize() << " children." << Endl; DumpSmallNodes(rootOne, rootTwo); - Cerr << "\nFile diff:\n" << diff; + Cerr << "\nFile diff:\n" + << diff; } else { Cerr << "Node in " << fileOne << " at [" << rootOnePos.Row << ":" << rootOnePos.Column << "]:"; Cerr << "Node in " << fileTwo << " at [" << rootTwoPos.Row << ":" << rootTwoPos.Column << "]:"; DumpSmallNodes(rootOne, rootTwo); - Cerr << "\nFile diff:\n" << diff; + Cerr << "\nFile diff:\n" + << diff; } return 5; } @@ -121,14 +124,13 @@ int Main(int argc, const char *argv[]) return 0; } -int main(int argc, const char *argv[]) { +int main(int argc, const char* argv[]) { NYql::NBacktrace::RegisterKikimrFatalActions(); NYql::NBacktrace::EnableKikimrSymbolize(); try { return Main(argc, argv); - } - catch (...) { + } catch (...) { Cerr << CurrentExceptionMessage() << Endl; return 1; } diff --git a/yql/essentials/tools/astdiff/ya.make b/yql/essentials/tools/astdiff/ya.make index 2facdcd095a..e7e3e193c1a 100644 --- a/yql/essentials/tools/astdiff/ya.make +++ b/yql/essentials/tools/astdiff/ya.make @@ -1,5 +1,7 @@ PROGRAM(astdiff) +ENABLE(YQL_STYLE_CPP) + SRCS( astdiff.cpp ) diff --git a/yql/essentials/tools/langver_dump/langver_dump.cpp b/yql/essentials/tools/langver_dump/langver_dump.cpp index ea048c7377e..b6f92553816 100644 --- a/yql/essentials/tools/langver_dump/langver_dump.cpp +++ b/yql/essentials/tools/langver_dump/langver_dump.cpp @@ -12,7 +12,7 @@ void WriteVersion(NJsonWriter::TBuf& json, TLangVersion ver) { json.WriteString(result); } -int Main(int argc, const char *argv[]) +int Main(int argc, const char* argv[]) { Y_UNUSED(argc); Y_UNUSED(argv); @@ -35,14 +35,13 @@ int Main(int argc, const char *argv[]) return 0; } -int main(int argc, const char *argv[]) { +int main(int argc, const char* argv[]) { NYql::NBacktrace::RegisterKikimrFatalActions(); NYql::NBacktrace::EnableKikimrSymbolize(); try { return Main(argc, argv); - } - catch (...) { + } catch (...) { Cerr << CurrentExceptionMessage() << Endl; return 1; } diff --git a/yql/essentials/tools/langver_dump/test/test.py b/yql/essentials/tools/langver_dump/test/test.py index cf366233197..c2d38143b96 100644 --- a/yql/essentials/tools/langver_dump/test/test.py +++ b/yql/essentials/tools/langver_dump/test/test.py @@ -9,12 +9,8 @@ TOOL_PATH = yatest.common.binary_path('yql/essentials/tools/langver_dump/langver def test_langver_dump(): with open(os.path.join(DATA_PATH, "langver.json")) as f: langver_from_file = json.load(f) - res = yatest.common.execute( - [TOOL_PATH], - check_exit_code=True, - wait=True - ) + res = yatest.common.execute([TOOL_PATH], check_exit_code=True, wait=True) langver_from_tool = json.loads(res.stdout) - assert langver_from_file == langver_from_tool, 'JSON_DIFFER\n' \ - 'File:\n %(langver_from_file)s\n\n' \ - 'Tool:\n %(langver_from_tool)s\n' % locals() + assert langver_from_file == langver_from_tool, ( + 'JSON_DIFFER\n' 'File:\n %(langver_from_file)s\n\n' 'Tool:\n %(langver_from_tool)s\n' % locals() + ) diff --git a/yql/essentials/tools/langver_dump/ya.make b/yql/essentials/tools/langver_dump/ya.make index c89ab5ebd56..725844ff728 100644 --- a/yql/essentials/tools/langver_dump/ya.make +++ b/yql/essentials/tools/langver_dump/ya.make @@ -1,5 +1,7 @@ PROGRAM() +ENABLE(YQL_STYLE_CPP) + SRCS( langver_dump.cpp ) diff --git a/yql/essentials/tools/minirun/minirun.cpp b/yql/essentials/tools/minirun/minirun.cpp index 1282212bb2c..d2887e549a5 100644 --- a/yql/essentials/tools/minirun/minirun.cpp +++ b/yql/essentials/tools/minirun/minirun.cpp @@ -32,13 +32,12 @@ public: } }; -} // NYql +} // namespace NYql -int main(int argc, const char *argv[]) { +int main(int argc, const char* argv[]) { try { return NYql::TMiniRunTool().Main(argc, argv); - } - catch (...) { + } catch (...) { Cerr << CurrentExceptionMessage() << Endl; return 1; } diff --git a/yql/essentials/tools/minirun/ya.make b/yql/essentials/tools/minirun/ya.make index bb1a62dbb63..4dad8f00ed3 100644 --- a/yql/essentials/tools/minirun/ya.make +++ b/yql/essentials/tools/minirun/ya.make @@ -1,5 +1,7 @@ PROGRAM() +ENABLE(YQL_STYLE_CPP) + SUBSCRIBER(g:yql) ALLOCATOR(J) diff --git a/yql/essentials/tools/pg-make-test/__main__.py b/yql/essentials/tools/pg-make-test/__main__.py index e21104e7c7c..4497dda592f 100644 --- a/yql/essentials/tools/pg-make-test/__main__.py +++ b/yql/essentials/tools/pg-make-test/__main__.py @@ -120,7 +120,7 @@ class TestCaseBuilder: statements = list(self.split_out_file(splitted_stmts, outdata, logger)) logger.debug("Matching sql statements to .out file lines") - for (s_sql, s_out) in statements: + for s_sql, s_out in statements: stmt = '\n'.join(str(sql_line) for sql_line in s_sql) only_out_stmts[stmt] += 1 logger.debug( @@ -144,7 +144,9 @@ class TestCaseBuilder: for init_script in init_scripts: logger.debug("Running init script %s '%s'", self.config.runner, init_script) - with open(init_script, 'rb') as f, open(init_out_name, 'wb') as fout, open(init_err_name, 'wb') as ferr: + with open(init_script, 'rb') as f, open(init_out_name, 'wb') as fout, open( + init_err_name, 'wb' + ) as ferr: pi = subprocess.run(runner_args, stdin=f, stdout=fout, stderr=ferr) if pi.returncode != 0: @@ -164,7 +166,7 @@ class TestCaseBuilder: real_statements = list(self.split_out_file(splitted_stmts, out, logger)) logger.debug("Matching sql statements to pgrun's output") - for (s_sql, s_out) in real_statements: + for s_sql, s_out in real_statements: stmt = '\n'.join(str(sql_line) for sql_line in s_sql) logger.debug( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n%s\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n%s\n============================", @@ -178,21 +180,31 @@ class TestCaseBuilder: del only_out_stmts[stmt] else: only_pgrun_stmts[stmt] += 1 - reserrfile = reserrfile_base if outfile_idx == 0 else reserrfile_base.with_suffix(reserrfile_base.suffix + ".{0}".format(outfile_idx)) + reserrfile = ( + reserrfile_base + if outfile_idx == 0 + else reserrfile_base.with_suffix(reserrfile_base.suffix + ".{0}".format(outfile_idx)) + ) shutil.move(test_err_name, reserrfile) if only_pgrun_stmts: - logger.info("Statements in pgrun output, but not in out file:\n%s", - "\n--------------------------------\n".join(stmt for stmt in only_pgrun_stmts)) + logger.info( + "Statements in pgrun output, but not in out file:\n%s", + "\n--------------------------------\n".join(stmt for stmt in only_pgrun_stmts), + ) if only_out_stmts: - logger.info("Statements in out file, but not in pgrun output:\n%s", - "\n--------------------------------\n".join(stmt for stmt in only_out_stmts)) + logger.info( + "Statements in out file, but not in pgrun output:\n%s", + "\n--------------------------------\n".join(stmt for stmt in only_out_stmts), + ) stmts_run = 0 stmts = [] outs = [] - assert len(statements) == len(real_statements), f"Incorrect statements split in {test_name}. Statements in out-file: {len(statements)}, statements in pgrun output: {len(real_statements)}" - for ((l_sql, out), (r_sql, res)) in zip(statements, real_statements): + assert len(statements) == len( + real_statements + ), f"Incorrect statements split in {test_name}. Statements in out-file: {len(statements)}, statements in pgrun output: {len(real_statements)}" + for (l_sql, out), (r_sql, res) in zip(statements, real_statements): if l_sql != r_sql: logger.warning("out SQL <> pgrun SQL:\n <: %s\n >: %s", l_sql, r_sql) break @@ -478,7 +490,7 @@ def patch_cases(cases, patches, patchdir): default=INIT_SCRIPTS_CFG, required=False, multiple=False, - type=click.Path(exists=True, dir_okay=False, resolve_path=True, path_type=Path) + type=click.Path(exists=True, dir_okay=False, resolve_path=True, path_type=Path), ) @click.option( "--initscriptsdir", @@ -486,7 +498,7 @@ def patch_cases(cases, patches, patchdir): default=INIT_SCRIPTS_DIR, required=False, multiple=False, - type=click.Path(exists=True, file_okay=False, resolve_path=True, path_type=Path) + type=click.Path(exists=True, file_okay=False, resolve_path=True, path_type=Path), ) @click.option("--skip", "-s", help="Comma-separated list of testsuits to skip", multiple=False, type=click.STRING) @click.option("--runner", help="Test runner", default=RUNNER, required=False, multiple=False, type=click.STRING) @@ -514,7 +526,22 @@ def patch_cases(cases, patches, patchdir): ) @click.option("--debug/--no-debug", help="Logs verbosity", default=False, required=False) @click.version_option(version=svn_version(), prog_name=PROGRAM_NAME) -def cli(cases, srcdir, dstdir, patchdir, udf, initscriptscfg, initscriptsdir, skip, runner, splitter, report, parallel, logfile, debug): +def cli( + cases, + srcdir, + dstdir, + patchdir, + udf, + initscriptscfg, + initscriptsdir, + skip, + runner, + splitter, + report, + parallel, + logfile, + debug, +): setup_logging(logfile, debug) if udf: @@ -553,7 +580,11 @@ def cli(cases, srcdir, dstdir, patchdir, udf, initscriptscfg, initscriptsdir, sk builder = TestCaseBuilder(config) if config.parallel: with Pool() as pool: - results = list(pool.imap_unordered(builder.build, [(test_case, init_scripts.get(test_case.stem) or []) for test_case in cases])) + results = list( + pool.imap_unordered( + builder.build, [(test_case, init_scripts.get(test_case.stem) or []) for test_case in cases] + ) + ) else: results = [builder.build(c) for c in cases] diff --git a/yql/essentials/tools/pg_catalog_dump/pg_catalog_dump.cpp b/yql/essentials/tools/pg_catalog_dump/pg_catalog_dump.cpp index e2daf449722..2a1014eee02 100644 --- a/yql/essentials/tools/pg_catalog_dump/pg_catalog_dump.cpp +++ b/yql/essentials/tools/pg_catalog_dump/pg_catalog_dump.cpp @@ -7,7 +7,7 @@ using namespace NYql; using namespace NJson; -int Main(int argc, const char *argv[]) +int Main(int argc, const char* argv[]) { Y_UNUSED(argc); Y_UNUSED(argv); @@ -27,14 +27,12 @@ int Main(int argc, const char *argv[]) json.WriteKey("strict").WriteBool(desc.IsStrict); const auto& retTypeDesc = NPg::LookupType(desc.ResultType); json.WriteKey("ret_type").WriteString(retTypeDesc.Name); - json.WriteKey("ret_type_fixed").WriteBool(retTypeDesc.PassByValue && - retTypeDesc.TypeLen > 0 && retTypeDesc.TypeLen <= 8); + json.WriteKey("ret_type_fixed").WriteBool(retTypeDesc.PassByValue && retTypeDesc.TypeLen > 0 && retTypeDesc.TypeLen <= 8); if (desc.VariadicType != 0) { const auto& varTypeDesc = NPg::LookupType(desc.VariadicType); - json.WriteKey("var_type").WriteString( varTypeDesc.Name); + json.WriteKey("var_type").WriteString(varTypeDesc.Name); if (varTypeDesc.Name != "any") { - json.WriteKey("var_type_fixed").WriteBool(varTypeDesc.PassByValue && - varTypeDesc.TypeLen > 0 && varTypeDesc.TypeLen <= 8); + json.WriteKey("var_type_fixed").WriteBool(varTypeDesc.PassByValue && varTypeDesc.TypeLen > 0 && varTypeDesc.TypeLen <= 8); } } json.WriteKey("args").BeginList(); @@ -42,8 +40,7 @@ int Main(int argc, const char *argv[]) const auto& argTypeDesc = NPg::LookupType(a); json.BeginObject(); json.WriteKey("arg_type").WriteString(argTypeDesc.Name); - json.WriteKey("arg_type_fixed").WriteBool(argTypeDesc.PassByValue && - argTypeDesc.TypeLen > 0 && argTypeDesc.TypeLen <= 8); + json.WriteKey("arg_type_fixed").WriteBool(argTypeDesc.PassByValue && argTypeDesc.TypeLen > 0 && argTypeDesc.TypeLen <= 8); json.EndObject(); } json.EndList(); @@ -67,8 +64,7 @@ int Main(int argc, const char *argv[]) const auto& argTypeDesc = NPg::LookupType(a); json.BeginObject(); json.WriteKey("arg_type").WriteString(argTypeDesc.Name); - json.WriteKey("arg_type_fixed").WriteBool(argTypeDesc.PassByValue && - argTypeDesc.TypeLen > 0 && argTypeDesc.TypeLen <= 8); + json.WriteKey("arg_type_fixed").WriteBool(argTypeDesc.PassByValue && argTypeDesc.TypeLen > 0 && argTypeDesc.TypeLen <= 8); json.EndObject(); } @@ -87,20 +83,15 @@ int Main(int argc, const char *argv[]) const auto& transDesc = NPg::LookupType(NPg::LookupProc(desc.TransFuncId).ResultType); json.WriteKey("trans_type").WriteString(transDesc.Name); - json.WriteKey("trans_type_fixed").WriteBool(transDesc.PassByValue && - transDesc.TypeLen > 0 && transDesc.TypeLen <= 8); + json.WriteKey("trans_type_fixed").WriteBool(transDesc.PassByValue && transDesc.TypeLen > 0 && transDesc.TypeLen <= 8); - const auto& serializedDesc = NPg::LookupType(NPg::LookupProc(desc.SerializeFuncId ? - desc.SerializeFuncId : desc.TransFuncId).ResultType); + const auto& serializedDesc = NPg::LookupType(NPg::LookupProc(desc.SerializeFuncId ? desc.SerializeFuncId : desc.TransFuncId).ResultType); json.WriteKey("serialized_type").WriteString(serializedDesc.Name); - json.WriteKey("serialized_type_fixed").WriteBool(serializedDesc.PassByValue && - serializedDesc.TypeLen > 0 && serializedDesc.TypeLen <= 8); + json.WriteKey("serialized_type_fixed").WriteBool(serializedDesc.PassByValue && serializedDesc.TypeLen > 0 && serializedDesc.TypeLen <= 8); - const auto& retDesc = NPg::LookupType(NPg::LookupProc(desc.FinalFuncId ? - desc.FinalFuncId : desc.TransFuncId).ResultType); + const auto& retDesc = NPg::LookupType(NPg::LookupProc(desc.FinalFuncId ? desc.FinalFuncId : desc.TransFuncId).ResultType); json.WriteKey("ret_type").WriteString(retDesc.Name); - json.WriteKey("ret_type_fixed").WriteBool(retDesc.PassByValue && - retDesc.TypeLen > 0 && retDesc.TypeLen <= 8); + json.WriteKey("ret_type_fixed").WriteBool(retDesc.PassByValue && retDesc.TypeLen > 0 && retDesc.TypeLen <= 8); json.WriteKey("has_init_value").WriteBool(!desc.InitValue.empty()); json.EndObject(); @@ -113,14 +104,13 @@ int Main(int argc, const char *argv[]) return 0; } -int main(int argc, const char *argv[]) { +int main(int argc, const char* argv[]) { NYql::NBacktrace::RegisterKikimrFatalActions(); NYql::NBacktrace::EnableKikimrSymbolize(); try { return Main(argc, argv); - } - catch (...) { + } catch (...) { Cerr << CurrentExceptionMessage() << Endl; return 1; } diff --git a/yql/essentials/tools/pg_catalog_dump/ya.make b/yql/essentials/tools/pg_catalog_dump/ya.make index d49789a7614..bf170e9f14c 100644 --- a/yql/essentials/tools/pg_catalog_dump/ya.make +++ b/yql/essentials/tools/pg_catalog_dump/ya.make @@ -1,5 +1,7 @@ PROGRAM(pg_catalog_dump) +ENABLE(YQL_STYLE_CPP) + SRCS( pg_catalog_dump.cpp ) diff --git a/yql/essentials/tools/pgrun/pgrun.cpp b/yql/essentials/tools/pgrun/pgrun.cpp index 65e4da83d8e..7f8cfec4a01 100644 --- a/yql/essentials/tools/pgrun/pgrun.cpp +++ b/yql/essentials/tools/pgrun/pgrun.cpp @@ -49,7 +49,7 @@ namespace NMiniKQL = NKikimr::NMiniKQL; const ui32 PRETTY_FLAGS = NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote | NYql::TAstPrintFlags::AdaptArbitraryContent; -enum class EByteaOutput{ +enum class EByteaOutput { hex, escape, }; @@ -66,8 +66,7 @@ bool IsEscapedChar(const TString& s, size_t pos) { } class TStatementIterator final - : public TInputRangeAdaptor<TStatementIterator> -{ + : public TInputRangeAdaptor<TStatementIterator> { enum class State { InOperator, EndOfOperator, @@ -133,8 +132,9 @@ public: const TString* Next() { - if (TStringBuf::npos == Pos_) + if (TStringBuf::npos == Pos_) { return nullptr; + } size_t startPos = Pos_; size_t curPos = Pos_; @@ -186,8 +186,9 @@ public: stmt << RemoveEmptyLines(rawStmt, inStatement); // inv: Pos_ is at the start of next token - if (startPos == endPos) + if (startPos == endPos) { return nullptr; + } stmt << '\n'; Cur_ = stmt; @@ -198,7 +199,6 @@ public: } private: - // States: // - in-operator // - line comment @@ -219,43 +219,45 @@ private: // $tag$, not preceded by alnum char (a bit of simplification here but sufficient) -> tag := tag, next: $ string literal // ; -> current_mode := end-of-operator, next: end-of-operator - // - line comment - // EOL -> next: current_mode + // - line comment + // EOL -> next: current_mode - // - block comment - // /* -> ++depth - // */ -> --depth, if (depth == 0) -> next: current_mode + // - block comment + // /* -> ++depth + // */ -> --depth, if (depth == 0) -> next: current_mode - // - quoted identifier - // " -> next: in-operator + // - quoted identifier + // " -> next: in-operator - // - string literal - // ' -> next: in-operator + // - string literal + // ' -> next: in-operator - // - E string literal - // ' -> if not preceeded by \ next: in-operator + // - E string literal + // ' -> if not preceeded by \ next: in-operator - // - $ string literal - // $tag$ -> next: in-operator + // - $ string literal + // $tag$ -> next: in-operator - // - end-of-operator - // -- -> next: line comment, just once - // /* -> depth := 1, next: block comment - // non-space char -> unget, emit, current_mode := in-operator, next: in-operator + // - end-of-operator + // -- -> next: line comment, just once + // /* -> depth := 1, next: block comment + // non-space char -> unget, emit, current_mode := in-operator, next: in-operator // In every state: // EOS -> emit if consumed part of the input is not empty bool SaveDollarTag() { - if (Pos_ + 1 == Program_.length()) + if (Pos_ + 1 == Program_.length()) { return false; + } auto p = Program_.cbegin() + (Pos_ + 1); - if (std::isdigit(*p)) + if (std::isdigit(*p)) { return false; + } - for (;p != Program_.cend(); ++p) { + for (; p != Program_.cend(); ++p) { if (*p == '$') { auto bp = &Program_[Pos_]; auto l = p - bp; @@ -264,8 +266,9 @@ private: return true; } - if (!(std::isalpha(*p) || std::isdigit(*p) || *p == '_')) + if (!(std::isalpha(*p) || std::isdigit(*p) || *p == '_')) { return false; + } } return false; } @@ -300,8 +303,8 @@ private: switch (Program_[Pos_]) { case '\'': State_ = (!StandardConformingStrings_ || 0 < Pos_ && std::toupper(Program_[Pos_ - 1]) == 'E') - ? State::EscapedStringLiteral - : State::StringLiteral; + ? State::EscapedStringLiteral + : State::StringLiteral; break; case '"': @@ -310,8 +313,8 @@ private: case ';': State_ = Mode_ = IsCopyFromStdin(startPos, Pos_) - ? State::InCopyFromStdin - : State::EndOfOperator; + ? State::InCopyFromStdin + : State::EndOfOperator; break; case '-': @@ -331,8 +334,9 @@ private: case '$': if (Pos_ == 0 || std::isspace(Program_[Pos_ - 1])) { - if (SaveDollarTag()) + if (SaveDollarTag()) { State_ = State::DollarStringLiteral; + } } break; @@ -346,7 +350,7 @@ private: break; case ':': - if (Pos_ == 0 || Program_[Pos_-1] == '\n') { + if (Pos_ == 0 || Program_[Pos_ - 1] == '\n') { State_ = State::InVar; } break; @@ -420,8 +424,9 @@ private: bool LineCommentParser() { Pos_ = Program_.find('\n', Pos_); - if (TString::npos == Pos_) + if (TString::npos == Pos_) { return true; + } ++Pos_; if (Program_.length() == Pos_) { @@ -441,10 +446,11 @@ private: bool BlockCommentParser() { Pos_ = Program_.find_first_of("*/", Pos_); - if (TString::npos == Pos_) + if (TString::npos == Pos_) { return true; + } - switch(Program_[Pos_]) { + switch (Program_[Pos_]) { case '/': if (Pos_ < Program_.length() && Program_[Pos_ + 1] == '*') { ++Depth_; @@ -475,8 +481,9 @@ private: bool QuotedIdentifierParser() { Pos_ = Program_.find('"', Pos_); - if (TString::npos == Pos_) + if (TString::npos == Pos_) { return true; + } ++Pos_; if (Program_.length() == Pos_) { @@ -493,8 +500,9 @@ private: bool StringLiteralParser() { Pos_ = Program_.find('\'', Pos_); - if (TString::npos == Pos_) + if (TString::npos == Pos_) { return true; + } ++Pos_; if (Program_.length() == Pos_) { @@ -511,8 +519,9 @@ private: bool EscapedStringLiteralParser() { Pos_ = Program_.find('\'', Pos_); - if (TString::npos == Pos_) + if (TString::npos == Pos_) { return true; + } if (IsEscapedChar(Program_, Pos_)) { ++Pos_; @@ -536,8 +545,9 @@ private: Pos_ = Program_.find(Tag_, Pos_); - if (TString::npos == Pos_) + if (TString::npos == Pos_) { return true; + } Pos_ += Tag_.length(); if (Program_.length() == Pos_) { @@ -556,8 +566,9 @@ private: bool MetaCommandParser() { Pos_ = Program_.find('\n', Pos_); - if (TString::npos == Pos_) + if (TString::npos == Pos_) { return true; + } ++Pos_; if (Program_.length() == Pos_) { @@ -571,8 +582,9 @@ private: bool InCopyFromStdinParser() { Pos_ = Program_.find("\n\\.\n", Pos_); - if (TString::npos == Pos_) + if (TString::npos == Pos_) { return true; + } Pos_ += 4; return Emit(false); @@ -583,8 +595,9 @@ private: // TODO: validate var name Pos_ = Program_.find('\n', Pos_); - if (TString::npos == Pos_) + if (TString::npos == Pos_) { return true; + } ++Pos_; if (Program_.length() == Pos_) { @@ -670,14 +683,17 @@ TString GetFormattedStmt(const TStringBuf& stmt) { } pos = next_pos + 1; } - if (pos < stmt.length()) + if (pos < stmt.length()) { result += stmt.substr(pos); + } - if (0 < result.length() && '\n' == result.back()) + if (0 < result.length() && '\n' == result.back()) { result.pop_back(); + } - if (0 < result.length() && '\r' == result.back()) + if (0 < result.length() && '\r' == result.back()) { result.pop_back(); + } return result; } @@ -708,8 +724,9 @@ TString GetPgErrorMessage(const TIssue& issue) { auto pos = msg.find(anchor); - if (TString::npos == pos) + if (TString::npos == pos) { return TString(msg); + } return msg.substr(pos + anchor.length()); } @@ -718,7 +735,7 @@ void WriteErrorToStream(const TProgramPtr program) { program->PrintErrorsTo(Cerr); - for (const auto& topIssue: program->Issues()) { + for (const auto& topIssue : program->Issues()) { WalkThroughIssues(topIssue, true, [&](const TIssue& issue, ui16 /*level*/) { const auto msg = GetPgErrorMessage(issue); Cout << msg; @@ -737,10 +754,10 @@ inline const TString FormatBool(const TString& value) static const TString T = "t"; static const TString F = "f"; - return (value == "true") ? T - : (value == "false") ? F - : (value == nullRepr) ? nullRepr - : ythrow yexception() << "Unexpected bool literal: " << value; + return (value == "true") ? T + : (value == "false") ? F + : (value == nullRepr) ? nullRepr + : ythrow yexception() << "Unexpected bool literal: " << value; } inline const TString FormatNumeric(const TString& value) @@ -756,11 +773,11 @@ const TString FormatFloat(const TString& value, std::function<TString(const TStr static const TString minf = "-Infinity"; try { - return (value == "") ? "" - : (value == "nan") ? nan - : (value == "inf") ? inf - : (value == "-inf") ? minf - : formatter(value); + return (value == "") ? "" + : (value == "nan") ? nan + : (value == "inf") ? inf + : (value == "-inf") ? minf + : formatter(value); } catch (const std::exception& e) { Cerr << "Unexpected float value '" << value << "'\n"; return ""; @@ -770,13 +787,13 @@ const TString FormatFloat(const TString& value, std::function<TString(const TStr inline const TString FormatFloat4(const TString& value) { return FormatFloat(value, - [] (const TString& val) { return TString(fmt::format("{:.8g}", std::stof(val))); }); + [](const TString& val) { return TString(fmt::format("{:.8g}", std::stof(val))); }); } inline const TString FormatFloat8(const TString& value) { return FormatFloat(value, - [] (const TString& val) { return TString(fmt::format("{:.15g}", std::stod(val))); }); + [](const TString& val) { return TString(fmt::format("{:.15g}", std::stod(val))); }); } inline const TString FormatTransparent(const TString& value) @@ -784,14 +801,14 @@ inline const TString FormatTransparent(const TString& value) return value; } -static const THashMap<TColumnType, CellFormatter> ColumnFormatters { - { "bool", FormatBool }, - { "numeric", FormatNumeric }, - { "float4", FormatFloat4 }, - { "float8", FormatFloat8 }, +static const THashMap<TColumnType, CellFormatter> ColumnFormatters{ + {"bool", FormatBool}, + {"numeric", FormatNumeric}, + {"float4", FormatFloat4}, + {"float8", FormatFloat8}, }; -static const THashSet<TColumnType> RightAlignedTypes { +static const THashSet<TColumnType> RightAlignedTypes{ "int2", "int4", "int8", @@ -812,11 +829,13 @@ struct TColumn { std::string FormatCell(const TString& data, const TColumn& column, size_t index, size_t numberOfColumns) { const auto delim = (index == 0) ? " " : " | "; - if (column.RightAligned) + if (column.RightAligned) { return fmt::format("{0}{1:>{2}}", delim, data, column.Width); + } - if (index == numberOfColumns - 1) + if (index == numberOfColumns - 1) { return fmt::format("{0}{1}", delim, data); + } return fmt::format("{0}{1:<{2}}", delim, data, column.Width); } @@ -824,8 +843,8 @@ std::string FormatCell(const TString& data, const TColumn& column, size_t index, TString GetCellData(const NYT::TNode& cell, const TColumn& column) { if (column.Type == "bytea") { const auto rawValue = (cell.IsList()) - ? Base64Decode(cell.AsList()[0].AsString()) - : cell.AsString(); + ? Base64Decode(cell.AsList()[0].AsString()) + : cell.AsString(); switch (byteaOutput) { case EByteaOutput::hex: { @@ -845,13 +864,13 @@ TString GetCellData(const NYT::TNode& cell, const TColumn& column) { TString result; ui64 expectedSize = std::accumulate(rawValue.cbegin(), rawValue.cend(), 0U, - [] (ui64 acc, char c) { - return acc + ((c == '\\') - ? 2 - : ((ui8)c < 0x20 || 0x7e < (ui8)c) - ? 4 - : 1); - }); + [](ui64 acc, char c) { + return acc + ((c == '\\') + ? 2 + : ((ui8)c < 0x20 || 0x7e < (ui8)c) + ? 4 + : 1); + }); result.resize(expectedSize); auto p = result.begin(); for (const auto c : rawValue) { @@ -864,7 +883,7 @@ TString GetCellData(const NYT::TNode& cell, const TColumn& column) { *p++ = '\\'; *p++ = ((val >> 6) & 03) + '0'; *p++ = ((val >> 3) & 07) + '0'; - *p++ = (val & 07) + '0'; + *p++ = (val & 07) + '0'; } else { *p++ = c; } @@ -884,7 +903,7 @@ void WriteTableToStream(IOutputStream& stream, const NYT::TNode::TListType& cols TVector<TColumn> columns; TList<TVector<TString>> formattedData; - for (const auto& col: cols) { + for (const auto& col : cols) { const auto& colName = col[0].AsString(); const auto& colType = col[1][1].AsString(); @@ -900,17 +919,19 @@ void WriteTableToStream(IOutputStream& stream, const NYT::TNode::TListType& cols for (const auto& row : rows) { auto& rowData = formattedData.emplace_back(); - { int i = 0; - for (const auto& cell : row.AsList()) { - auto& c = columns[i]; + { + int i = 0; + for (const auto& cell : row.AsList()) { + auto& c = columns[i]; - const auto cellData = cell.HasValue() ? GetCellData(cell, c) : nullRepr; + const auto cellData = cell.HasValue() ? GetCellData(cell, c) : nullRepr; - rowData.emplace_back(c.Formatter(cellData)); - c.Width = std::max(c.Width, rowData.back().length()); + rowData.emplace_back(c.Formatter(cellData)); + c.Width = std::max(c.Width, rowData.back().length()); - ++i; - }} + ++i; + } + } } if (columns.empty()) { @@ -918,7 +939,8 @@ void WriteTableToStream(IOutputStream& stream, const NYT::TNode::TListType& cols } else { const auto totalTableWidth = std::accumulate(columns.cbegin(), columns.cend(), std::size_t{0}, - [] (const auto& sum, const auto& elem) { return sum + elem.Width; }) + columns.size() * 3 - 1; + [](const auto& sum, const auto& elem) { return sum + elem.Width; }) + + columns.size() * 3 - 1; TString filler(totalTableWidth, '-'); stream << fmt::format(" {0:^{1}} ", columns[0].Name, columns[0].Width); for (size_t i = 1, pos = columns[0].Width + 2; i < columns.size(); ++i) { @@ -928,11 +950,12 @@ void WriteTableToStream(IOutputStream& stream, const NYT::TNode::TListType& cols filler[pos] = '+'; pos += c.Width + 3; } - stream << '\n' << filler; + stream << '\n' + << filler; } for (const auto& row : formattedData) { - stream << '\n'; + stream << '\n'; for (size_t i = 0; i < row.size(); ++i) { stream << FormatCell(row[i], columns[i], i, columns.size()); @@ -948,9 +971,9 @@ std::pair<TString, TString> GetYtTableDataPaths(const TFsPath& dataDir, const TS } void CreateYtFileTable(const TFsPath& dataDir, const TString tableName, const TExprNode::TPtr columnsNode, - THashMap<TString, TString>& tablesMapping, TExprContext& ctx, const TPosition& pos) { - const auto [dataFilePath, attrFilePath] = - GetYtTableDataPaths(dataDir, tableName); + THashMap<TString, TString>& tablesMapping, TExprContext& ctx, const TPosition& pos) { + const auto [dataFilePath, attrFilePath] = + GetYtTableDataPaths(dataDir, tableName); TFile dataFile{dataFilePath, CreateNew}; TFile attrFile{attrFilePath, CreateNew}; @@ -963,18 +986,18 @@ void CreateYtFileTable(const TFsPath& dataDir, const TString tableName, const TE TStringBuilder ysonType; ysonType << "[\"StructType\";["; - for (const auto &columnNode : columnsNode->Children()) { - const auto &colName = columnNode->Child(0)->Content(); - const auto &colTypeNode = columnNode->Child(1); + for (const auto& columnNode : columnsNode->Children()) { + const auto& colName = columnNode->Child(0)->Content(); + const auto& colTypeNode = columnNode->Child(1); - columnOrder.AddColumn(TString(colName)); + columnOrder.AddColumn(TString(colName)); - ysonType << fmt::format("[\"{0}\";[\"{1}\";\"{2}\";];];", - colName, colTypeNode->Content(), - colTypeNode->Child(0)->Content()); + ysonType << fmt::format("[\"{0}\";[\"{1}\";\"{2}\";];];", + colName, colTypeNode->Content(), + colTypeNode->Child(0)->Content()); } ysonType << "];]"; - const auto *typeNode = NCommon::ParseTypeFromYson(TStringBuf(ysonType), ctx, pos); + const auto* typeNode = NCommon::ParseTypeFromYson(TStringBuf(ysonType), ctx, pos); rowSpec->SetType(typeNode->Cast<TStructExprType>()); rowSpec->SetColumnOrder(std::move(columnOrder)); @@ -1054,9 +1077,8 @@ void WriteToYtTableScheme( Y_ENSURE(columnsNode); CreateYtFileTable(tempDir.Path(), TString(tableName), columnsNode->ChildPtr(1), - yqlNativeServices->GetTablesMapping(), ctx, writeNode.Pos(ctx)); - } - else if (mode == "drop") { + yqlNativeServices->GetTablesMapping(), ctx, writeNode.Pos(ctx)); + } else if (mode == "drop") { DeleteYtFileTable(tempDir.Path(), TString(tableName), yqlNativeServices->GetTablesMapping()); } } @@ -1095,7 +1117,7 @@ void FillTablesMapping(const TFsPath& dataDir, THashMap<TString, TString>& table dataDir.List(children); bool regMsgLogged = false; - for (const auto& f: children) { + for (const auto& f : children) { if (f.GetExtension() != "attr") { continue; } @@ -1158,7 +1180,7 @@ int Main(int argc, char* argv[]) fileStorage = WithAsync(fileStorage); auto funcRegistry = CreateFunctionRegistry(&NYql::NBacktrace::KikimrBackTrace, CreateBuiltinRegistry(), false, udfsPaths); - IUdfResolver::TPtr udfResolver = NCommon::CreateSimpleUdfResolver(funcRegistry.Get(), fileStorage, true);; + IUdfResolver::TPtr udfResolver = NCommon::CreateSimpleUdfResolver(funcRegistry.Get(), fileStorage, true); bool keepTempFiles = true; bool emulateOutputForMultirun = false; @@ -1206,7 +1228,8 @@ int Main(int argc, char* argv[]) const auto stmt = GetFormattedStmt(raw_stmt); Cout << stmt << '\n'; - Cerr << "<sql-statement>\n" << stmt << "\n</sql-statement>\n"; + Cerr << "<sql-statement>\n" + << stmt << "\n</sql-statement>\n"; if (stmt[0] == '\\') { ProcessMetaCmd(stmt); @@ -1268,15 +1291,15 @@ int Main(int argc, char* argv[]) PrintExprTo(program, Cerr); } - static const THashSet<TString> ignoredNodes{"CommitAll!", "Commit!" }; + static const THashSet<TString> ignoredNodes{"CommitAll!", "Commit!"}; const auto opNode = NYql::FindNode(program->ExprRoot(), - [] (const TExprNode::TPtr& node) { return !ignoredNodes.contains(node->Content()); }); + [](const TExprNode::TPtr& node) { return !ignoredNodes.contains(node->Content()); }); if (opNode->IsCallable("Write!")) { Y_ENSURE(opNode->ChildrenSize() == 5); const auto* keyNode = opNode->Child(2); const bool isWriteToTableSchemeNode = keyNode->IsCallable("Key") && 0 < keyNode->ChildrenSize() && - keyNode->Child(0)->Child(0)->IsAtom("tablescheme"); + keyNode->Child(0)->Child(0)->IsAtom("tablescheme"); if (isWriteToTableSchemeNode) { try { @@ -1311,9 +1334,9 @@ int Main(int argc, char* argv[]) } if (program->HasResults()) { - if (needPrintResult) { + if (needPrintResult) { Cerr << program->ResultsAsString() << Endl; - } + } const auto root = ParseYson(program->ResultsAsString()); @@ -1340,10 +1363,8 @@ int main(int argc, char* argv[]) } } return Main(argc, argv); - } - catch (...) { + } catch (...) { Cerr << CurrentExceptionMessage() << Endl; return 1; } } - diff --git a/yql/essentials/tools/pgrun/ya.make b/yql/essentials/tools/pgrun/ya.make index b22318c1698..a497f5d2e33 100644 --- a/yql/essentials/tools/pgrun/ya.make +++ b/yql/essentials/tools/pgrun/ya.make @@ -2,6 +2,8 @@ IF (NOT OPENSOURCE) PROGRAM(pgrun) +ENABLE(YQL_STYLE_CPP) + ALLOCATOR(J) SRCS( diff --git a/yql/essentials/tools/purebench/purebench.cpp b/yql/essentials/tools/purebench/purebench.cpp index 48ea07b20a4..a500afce61a 100644 --- a/yql/essentials/tools/purebench/purebench.cpp +++ b/yql/essentials/tools/purebench/purebench.cpp @@ -34,10 +34,11 @@ using namespace NYql::NPureCalc; using namespace NKikimr::NMiniKQL; using namespace NYql::NUdf; -struct TPickleInputSpec : public TInputSpecBase { +struct TPickleInputSpec: public TInputSpecBase { TPickleInputSpec(const TVector<NYT::TNode>& schemas) : Schemas(schemas) - {} + { + } const TVector<NYT::TNode>& GetSchemas() const final { return Schemas; @@ -53,13 +54,12 @@ public: const TPickleInputSpec& /* inputSpec */, ui32 index, IInputStream* underlying, - IWorker* worker - ) - : TCustomListValue(memInfo) - , Underlying_(underlying) - , Worker_(worker) - , ScopedAlloc_(Worker_->GetScopedAlloc()) - , Packer_(false, Worker_->GetInputType(index)) + IWorker* worker) + : TCustomListValue(memInfo) + , Underlying_(underlying) + , Worker_(worker) + , ScopedAlloc_(Worker_->GetScopedAlloc()) + , Packer_(false, Worker_->GetInputType(index)) { } @@ -78,7 +78,7 @@ public: YQL_ENSURE(read == sizeof(len)); if (len > RecordBuffer_.size()) { - RecordBuffer_.resize(Max<size_t>(2*RecordBuffer_.size(), len)); + RecordBuffer_.resize(Max<size_t>(2 * RecordBuffer_.size(), len)); } Underlying_->LoadOrFail(RecordBuffer_.data(), len); @@ -109,9 +109,9 @@ struct TInputSpecTraits<TPickleInputSpec> { static void PreparePullListWorker(const TPickleInputSpec& spec, IPullListWorker* worker, const TVector<IInputStream*>& streams) { YQL_ENSURE(worker->GetInputsCount() == streams.size(), - "number of input streams should match number of inputs provided by spec"); + "number of input streams should match number of inputs provided by spec"); - with_lock(worker->GetScopedAlloc()) { + with_lock (worker->GetScopedAlloc()) { auto& holderFactory = worker->GetGraph().GetHolderFactory(); for (ui32 i = 0; i < streams.size(); i++) { auto input = holderFactory.template Create<TPickleListValue>( @@ -122,10 +122,11 @@ struct TInputSpecTraits<TPickleInputSpec> { } }; -struct TPickleOutputSpec : public TOutputSpecBase { +struct TPickleOutputSpec: public TOutputSpecBase { TPickleOutputSpec(const NYT::TNode& schema) : Schema(schema) - {} + { + } const NYT::TNode& GetSchema() const final { return Schema; @@ -146,7 +147,8 @@ public: TPickleOutputHandle(TWorkerHolder<IPullListWorker> worker) : Worker_(std::move(worker)) , Packer_(false, Worker_->GetOutputType()) - {} + { + } NKikimr::NMiniKQL::TType* GetOutputType() const final { return const_cast<NKikimr::NMiniKQL::TType*>(Worker_->GetOutputType()); @@ -159,7 +161,7 @@ public: TBindTerminator bind(Worker_->GetGraph().GetTerminator()); - with_lock(Worker_->GetScopedAlloc()) { + with_lock (Worker_->GetScopedAlloc()) { const auto outputIterator = Worker_->GetOutputIterator(); TUnboxedValue value; @@ -192,10 +194,11 @@ struct TOutputSpecTraits<TPickleOutputSpec> { } }; -struct TPrintOutputSpec : public TOutputSpecBase { +struct TPrintOutputSpec: public TOutputSpecBase { TPrintOutputSpec(const NYT::TNode& schema) : Schema(schema) - {} + { + } const NYT::TNode& GetSchema() const final { return Schema; @@ -208,7 +211,8 @@ class TPrintOutputHandle final: public TStreamOutputHandle { public: TPrintOutputHandle(TWorkerHolder<IPullListWorker> worker) : Worker_(std::move(worker)) - {} + { + } NKikimr::NMiniKQL::TType* GetOutputType() const final { return const_cast<NKikimr::NMiniKQL::TType*>(Worker_->GetOutputType()); @@ -221,7 +225,7 @@ public: TBindTerminator bind(Worker_->GetGraph().GetTerminator()); - with_lock(Worker_->GetScopedAlloc()) { + with_lock (Worker_->GetScopedAlloc()) { const auto outputIterator = Worker_->GetOutputIterator(); TUnboxedValue value; @@ -278,7 +282,7 @@ TStringStream MakeGenInput(ui64 count) { } template <typename TInputSpec, typename TOutputSpec> -using TRunCallable = std::function<void (const THolder<TPullListProgram<TInputSpec, TOutputSpec>>&)>; +using TRunCallable = std::function<void(const THolder<TPullListProgram<TInputSpec, TOutputSpec>>&)>; template <typename TOutputSpec> NYT::TNode RunGenSql( @@ -286,8 +290,7 @@ NYT::TNode RunGenSql( const TVector<NYT::TNode>& inputSchema, const TString& sql, ETranslationMode isPg, - TRunCallable<TPickleInputSpec, TOutputSpec> runCallable -) { + TRunCallable<TPickleInputSpec, TOutputSpec> runCallable) { auto inputSpec = TPickleInputSpec(inputSchema); auto outputSpec = TOutputSpec({NYT::TNode::CreateEntity()}); auto program = factory->MakePullListProgram(inputSpec, outputSpec, sql, isPg); @@ -303,8 +306,7 @@ void ShowResults( const TVector<NYT::TNode>& inputSchema, const TString& sql, ETranslationMode isPg, - TStream* input -) { + TStream* input) { auto inputSpec = TInputSpec(inputSchema); auto outputSpec = TPrintOutputSpec({NYT::TNode::CreateEntity()}); auto program = factory->MakePullListProgram(inputSpec, outputSpec, sql, isPg); @@ -327,8 +329,7 @@ double RunBenchmarks( const TString& sql, ETranslationMode isPg, ui32 repeats, - TRunCallable<TInputSpec, TOutputSpec> runCallable -) { + TRunCallable<TInputSpec, TOutputSpec> runCallable) { auto inputSpec = TInputSpec(inputSchema); auto outputSpec = TOutputSpec({NYT::TNode::CreateEntity()}); auto program = factory->MakePullListProgram(inputSpec, outputSpec, sql, isPg); @@ -353,12 +354,12 @@ double RunBenchmarks( times.erase(times.end() - times.size() / 3, times.end()); double sum = std::transform_reduce(times.cbegin(), times.cend(), - .0, std::plus{}, [](auto t) { return std::log(t.MicroSeconds()); }); + .0, std::plus{}, [](auto t) { return std::log(t.MicroSeconds()); }); return std::exp(sum / times.size()); } -int Main(int argc, const char *argv[]) +int Main(int argc, const char* argv[]) { Y_UNUSED(NUdf::GetStaticSymbols()); using namespace NLastGetopt; @@ -489,7 +490,8 @@ int Main(int argc, const char *argv[]) factory, {outputGenSchema}, testSql, isPgTest, repeats, [&](const auto& program) { auto handle = program->Apply(StreamFromVector(outputGenStream)); - while (/* arrow::compute::ExecBatch* batch = */ handle->Fetch()) {} + while (/* arrow::compute::ExecBatch* batch = */ handle->Fetch()) { + } }); } @@ -499,7 +501,7 @@ int Main(int argc, const char *argv[]) return 0; } -int main(int argc, const char *argv[]) { +int main(int argc, const char* argv[]) { if (argc > 1 && TString(argv[1]) != TStringBuf("--ndebug")) { Cerr << "purebench ABI version: " << NKikimr::NUdf::CurrentAbiVersionStr() << Endl; } @@ -510,7 +512,8 @@ int main(int argc, const char *argv[]) { try { return Main(argc, argv); } catch (const TCompileError& e) { - Cerr << e.what() << "\n" << e.GetIssues(); + Cerr << e.what() << "\n" + << e.GetIssues(); } catch (...) { Cerr << CurrentExceptionMessage() << Endl; return 1; diff --git a/yql/essentials/tools/purebench/test/test.py b/yql/essentials/tools/purebench/test/test.py index d2324907654..2924e62758f 100644 --- a/yql/essentials/tools/purebench/test/test.py +++ b/yql/essentials/tools/purebench/test/test.py @@ -5,8 +5,7 @@ PUREBENCH = yatest.common.build_path('yql/essentials/tools/purebench/purebench') def test_purebench_smoke(): - result = yatest.common.execute([PUREBENCH, '--ndebug', '-r', '1'], - text=True, check_exit_code=True) + result = yatest.common.execute([PUREBENCH, '--ndebug', '-r', '1'], text=True, check_exit_code=True) # Mask elapsed time and duration, since both can change in # different environments. stdout = result.stdout diff --git a/yql/essentials/tools/purebench/ya.make b/yql/essentials/tools/purebench/ya.make index dd1d962bcb8..5d9e2eb8b2b 100644 --- a/yql/essentials/tools/purebench/ya.make +++ b/yql/essentials/tools/purebench/ya.make @@ -2,6 +2,8 @@ IF (NOT OPENSOURCE) PROGRAM(purebench) +ENABLE(YQL_STYLE_CPP) + ALLOCATOR(J) SRCS( diff --git a/yql/essentials/tools/sql2yql/sql2yql.cpp b/yql/essentials/tools/sql2yql/sql2yql.cpp index 2e34c196694..5d891333c74 100644 --- a/yql/essentials/tools/sql2yql/sql2yql.cpp +++ b/yql/essentials/tools/sql2yql/sql2yql.cpp @@ -22,7 +22,6 @@ #include <yql/essentials/parser/pg_wrapper/interface/context.h> #include <yql/essentials/providers/common/gateways_utils/gateways_utils.h> - #include <library/cpp/getopt/last_getopt.h> #include <library/cpp/testing/unittest/registar.h> @@ -107,8 +106,7 @@ bool TestFormat( const TString& queryFile, const NYql::TAstParseResult& parseRes, const TString& outFileName, - const bool checkDoubleFormatting -) { + const bool checkDoubleFormatting) { TStringStream yqlProgram; parseRes.Root->PrettyPrintTo(yqlProgram, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote); @@ -128,7 +126,7 @@ bool TestFormat( NYql::TAstParseResult frmParseRes = NSQLTranslation::SqlToYql(translators, frmQuery, settings); if (!frmParseRes.Issues.Empty()) { frmParseRes.Issues.PrintWithProgramTo(Cerr, queryFile, frmQuery); - if (AnyOf(frmParseRes.Issues, [](const auto& issue) { return issue.GetSeverity() == NYql::TSeverityIds::S_ERROR;})) { + if (AnyOf(frmParseRes.Issues, [](const auto& issue) { return issue.GetSeverity() <= NYql::TSeverityIds::S_ERROR; })) { return false; } } @@ -204,7 +202,6 @@ private: char Delim_; }; - void ParseProtoConfig(const TString& cfgFile, google::protobuf::Message* config) { TString configData = TFileInput(cfgFile).ReadAll(); @@ -221,7 +218,6 @@ static THolder<TMessage> ParseProtoConfig(const TString& cfgFile) { return config; } - int BuildAST(int argc, char* argv[]) { NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default(); @@ -273,27 +269,26 @@ int BuildAST(int argc, char* argv[]) { opts.AddLongOption("test-antlr4", "check antlr4 parser").NoArgument(); opts.AddLongOption("test-lexers", "check other lexers").NoArgument(); opts.AddLongOption("test-complete", "check completion engine").NoArgument(); + opts.AddLongOption("test-syntax-ambiguity", "test syntax ambiguity").NoArgument(); + opts.AddLongOption("debug-syntax-ambiguity", "debug syntax ambiguity").NoArgument(); opts.AddLongOption("format-output", "Saves formatted query to it").RequiredArgument("format-output").StoreResult(&outFileNameFormat); opts.AddLongOption("langver", "Set current language version").Optional().RequiredArgument("VER").Handler1T<TString>(langVerHandler); opts.AddLongOption("mem-limit", "Set memory limit in megabytes").Handler1T<ui32>(0, NYql::SetAddressSpaceLimit); - opts.AddLongOption("gateways-cfg", "Gateways configuration file").Optional().RequiredArgument("FILE") - .Handler1T<TString>([&gatewaysConfig, &clusterMapping](const TString& file) { - gatewaysConfig = ParseProtoConfig<NYql::TGatewaysConfig>(file); - GetClusterMappingFromGateways(*gatewaysConfig, clusterMapping); - }); - opts.AddLongOption("pg-ext", "Pg extensions config file").Optional().RequiredArgument("FILE") - .Handler1T<TString>([](const TString& file) { - - auto pgExtConfig = ParseProtoConfig<NYql::NProto::TPgExtensions>(file); - if (!pgExtConfig) { - throw yexception() << "Bad format of config file " << file; - } - TVector<NYql::NPg::TExtensionDesc> extensions; - NYql::PgExtensionsFromProto(*pgExtConfig, extensions); - NYql::NPg::RegisterExtensions(extensions, true, - *NSQLTranslationPG::CreateExtensionSqlParser(), - NKikimr::NMiniKQL::CreateExtensionLoader().get()); - }); + opts.AddLongOption("gateways-cfg", "Gateways configuration file").Optional().RequiredArgument("FILE").Handler1T<TString>([&gatewaysConfig, &clusterMapping](const TString& file) { + gatewaysConfig = ParseProtoConfig<NYql::TGatewaysConfig>(file); + GetClusterMappingFromGateways(*gatewaysConfig, clusterMapping); + }); + opts.AddLongOption("pg-ext", "Pg extensions config file").Optional().RequiredArgument("FILE").Handler1T<TString>([](const TString& file) { + auto pgExtConfig = ParseProtoConfig<NYql::NProto::TPgExtensions>(file); + if (!pgExtConfig) { + throw yexception() << "Bad format of config file " << file; + } + TVector<NYql::NPg::TExtensionDesc> extensions; + NYql::PgExtensionsFromProto(*pgExtConfig, extensions); + NYql::NPg::RegisterExtensions(extensions, true, + *NSQLTranslationPG::CreateExtensionSqlParser(), + NKikimr::NMiniKQL::CreateExtensionLoader().get()); + }); opts.SetFreeArgDefaultTitle("query file"); opts.AddHelpOption(); @@ -321,14 +316,17 @@ int BuildAST(int argc, char* argv[]) { lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory(); NSQLTranslationV1::TParsers parsers; - parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory(); - parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory(); + parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory( + res.Has("test-syntax-ambiguity"), + res.Has("debug-syntax-ambiguity")); + parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory( + res.Has("test-syntax-ambiguity"), + res.Has("debug-syntax-ambiguity")); NSQLTranslation::TTranslators translators( nullptr, NSQLTranslationV1::MakeTranslator(lexers, parsers), - NSQLTranslationPG::MakeTranslator() - ); + NSQLTranslationPG::MakeTranslator()); TVector<TString> queries; int errors = 0; @@ -365,7 +363,7 @@ int BuildAST(int argc, char* argv[]) { queries.push_back(queryString); } - for (const auto& query: queries) { + for (const auto& query : queries) { if (query.empty()) { continue; } @@ -400,7 +398,7 @@ int BuildAST(int argc, char* argv[]) { issues.PrintTo(Cerr); } - bool hasError = AnyOf(issues, [](const auto& issue) { return issue.GetSeverity() == NYql::TSeverityIds::S_ERROR;}); + bool hasError = AnyOf(issues, [](const auto& issue) { return issue.GetSeverity() == NYql::TSeverityIds::S_ERROR; }); if (hasError) { ++errors; } @@ -413,7 +411,7 @@ int BuildAST(int argc, char* argv[]) { } else { if (res.Has("tree") || res.Has("diff") || res.Has("dump")) { google::protobuf::Message* ast(NSQLTranslation::SqlAST(translators, query, queryFile, parseRes.Issues, - NSQLTranslation::SQL_MAX_PARSER_ERRORS, settings)); + NSQLTranslation::SQL_MAX_PARSER_ERRORS, settings)); if (ast) { if (res.Has("tree")) { out << ast->DebugString() << Endl; @@ -430,7 +428,7 @@ int BuildAST(int argc, char* argv[]) { NSQLTranslation::TSQLHints hints; auto lexer = SqlLexer(translators, query, parseRes.Issues, settings); if (lexer && CollectSqlHints(*lexer, query, queryFile, settings.File, hints, parseRes.Issues, - settings.MaxErrors, settings.Antlr4Parser)) { + settings.MaxErrors, settings.Antlr4Parser)) { parseRes = NSQLTranslation::SqlASTToYql(translators, query, *ast, hints, settings); } } @@ -455,7 +453,7 @@ int BuildAST(int argc, char* argv[]) { bool hasError = false; if (!parseRes.Issues.Empty()) { - hasError = AnyOf(parseRes.Issues, [](const auto& issue) { return issue.GetSeverity() == NYql::TSeverityIds::S_ERROR;}); + hasError = AnyOf(parseRes.Issues, [](const auto& issue) { return issue.GetSeverity() <= NYql::TSeverityIds::S_ERROR; }); if (hasError || !noDebug) { parseRes.Issues.PrintWithProgramTo(Cerr, queryFile, query); diff --git a/yql/essentials/tools/sql2yql/ya.make b/yql/essentials/tools/sql2yql/ya.make index 030fe870256..06fdf58b80c 100644 --- a/yql/essentials/tools/sql2yql/ya.make +++ b/yql/essentials/tools/sql2yql/ya.make @@ -2,6 +2,8 @@ IF (NOT EXPORT_CMAKE OR NOT OPENSOURCE OR OPENSOURCE_PROJECT != "yt") PROGRAM(sql2yql) +ENABLE(YQL_STYLE_CPP) + PEERDIR( contrib/libs/antlr3_cpp_runtime library/cpp/getopt diff --git a/yql/essentials/tools/sql_formatter/sql_formatter.cpp b/yql/essentials/tools/sql_formatter/sql_formatter.cpp index d25e4817c91..546db2da8f7 100644 --- a/yql/essentials/tools/sql_formatter/sql_formatter.cpp +++ b/yql/essentials/tools/sql_formatter/sql_formatter.cpp @@ -24,12 +24,11 @@ int RunFormat(int argc, char* argv[]) { opts.AddLongOption('p', "print-query", "print given query before parsing").NoArgument(); opts.AddLongOption('f', "obfuscate", "obfuscate query").NoArgument(); opts.AddLongOption("ansi-lexer", "use ansi lexer").NoArgument(); - opts.AddLongOption("langver", "Set current language version").Optional().RequiredArgument("VER") - .Handler1T<TString>([&](const TString& str) { - if (!NYql::ParseLangVersion(str, langver)) { - throw yexception() << "Failed to parse language version: " << str; - } - }); + opts.AddLongOption("langver", "Set current language version").Optional().RequiredArgument("VER").Handler1T<TString>([&](const TString& str) { + if (!NYql::ParseLangVersion(str, langver)) { + throw yexception() << "Failed to parse language version: " << str; + } + }); opts.SetFreeArgsNum(0); opts.AddHelpOption(); @@ -73,8 +72,7 @@ int RunFormat(int argc, char* argv[]) { TString frm_query; TString error; NYql::TIssues issues; - if (!formatter->Format(queryString, frm_query, issues, res.Has("obfuscate") ? - NSQLFormat::EFormatMode::Obfuscate : NSQLFormat::EFormatMode::Pretty)) { + if (!formatter->Format(queryString, frm_query, issues, res.Has("obfuscate") ? NSQLFormat::EFormatMode::Obfuscate : NSQLFormat::EFormatMode::Pretty)) { ++errors; Cerr << "Error formatting query: " << issues.ToString() << Endl; } else { diff --git a/yql/essentials/tools/sql_formatter/ya.make b/yql/essentials/tools/sql_formatter/ya.make index 558dbe3bd5a..748c9ad2471 100644 --- a/yql/essentials/tools/sql_formatter/ya.make +++ b/yql/essentials/tools/sql_formatter/ya.make @@ -2,6 +2,8 @@ IF (NOT EXPORT_CMAKE OR NOT OPENSOURCE OR OPENSOURCE_PROJECT != "yt") PROGRAM() +ENABLE(YQL_STYLE_CPP) + PEERDIR( library/cpp/getopt contrib/libs/protobuf diff --git a/yql/essentials/tools/sql_functions_dump/sql_functions_dump.cpp b/yql/essentials/tools/sql_functions_dump/sql_functions_dump.cpp index 66dae62d636..4094d2ffce9 100644 --- a/yql/essentials/tools/sql_functions_dump/sql_functions_dump.cpp +++ b/yql/essentials/tools/sql_functions_dump/sql_functions_dump.cpp @@ -5,7 +5,7 @@ using namespace NYql; -int Main(int argc, const char *argv[]) +int Main(int argc, const char* argv[]) { Y_UNUSED(argc); Y_UNUSED(argv); @@ -26,14 +26,13 @@ int Main(int argc, const char *argv[]) return 0; } -int main(int argc, const char *argv[]) { +int main(int argc, const char* argv[]) { NYql::NBacktrace::RegisterKikimrFatalActions(); NYql::NBacktrace::EnableKikimrSymbolize(); try { return Main(argc, argv); - } - catch (...) { + } catch (...) { Cerr << CurrentExceptionMessage() << Endl; return 1; } diff --git a/yql/essentials/tools/sql_functions_dump/test/test.py b/yql/essentials/tools/sql_functions_dump/test/test.py index ca4d1ad5b87..0e8a6ef6ab5 100644 --- a/yql/essentials/tools/sql_functions_dump/test/test.py +++ b/yql/essentials/tools/sql_functions_dump/test/test.py @@ -9,12 +9,8 @@ TOOL_PATH = yatest.common.binary_path('yql/essentials/tools/sql_functions_dump/s def test_functions_dump(): with open(os.path.join(DATA_PATH, "sql_functions.json")) as f: func_from_file = json.load(f) - res = yatest.common.execute( - [TOOL_PATH], - check_exit_code=True, - wait=True - ) + res = yatest.common.execute([TOOL_PATH], check_exit_code=True, wait=True) func_from_tool = json.loads(res.stdout) - assert func_from_tool == func_from_file, 'JSON_DIFFER\n' \ - 'File:\n %(func_from_file)s\n\n' \ - 'Tool:\n %(func_from_tool)s\n' % locals() + assert func_from_tool == func_from_file, ( + 'JSON_DIFFER\n' 'File:\n %(func_from_file)s\n\n' 'Tool:\n %(func_from_tool)s\n' % locals() + ) diff --git a/yql/essentials/tools/sql_functions_dump/ya.make b/yql/essentials/tools/sql_functions_dump/ya.make index 73dda185476..5467c07bbfc 100644 --- a/yql/essentials/tools/sql_functions_dump/ya.make +++ b/yql/essentials/tools/sql_functions_dump/ya.make @@ -1,5 +1,7 @@ PROGRAM() +ENABLE(YQL_STYLE_CPP) + SRCS( sql_functions_dump.cpp ) diff --git a/yql/essentials/tools/types_dump/test/test.py b/yql/essentials/tools/types_dump/test/test.py index 1f809863c29..565ebb87322 100644 --- a/yql/essentials/tools/types_dump/test/test.py +++ b/yql/essentials/tools/types_dump/test/test.py @@ -9,12 +9,8 @@ TOOL_PATH = yatest.common.binary_path('yql/essentials/tools/types_dump/types_dum def test_types_dump(): with open(os.path.join(DATA_PATH, "types.json")) as f: types_from_file = json.load(f) - res = yatest.common.execute( - [TOOL_PATH], - check_exit_code=True, - wait=True - ) + res = yatest.common.execute([TOOL_PATH], check_exit_code=True, wait=True) types_from_tool = json.loads(res.stdout) - assert types_from_tool == types_from_file, 'JSON_DIFFER\n' \ - 'File:\n %(types_from_file)s\n\n' \ - 'Tool:\n %(types_from_tool)s\n' % locals() + assert types_from_tool == types_from_file, ( + 'JSON_DIFFER\n' 'File:\n %(types_from_file)s\n\n' 'Tool:\n %(types_from_tool)s\n' % locals() + ) diff --git a/yql/essentials/tools/types_dump/types_dump.cpp b/yql/essentials/tools/types_dump/types_dump.cpp index 32117c20e89..8ce475e9d2b 100644 --- a/yql/essentials/tools/types_dump/types_dump.cpp +++ b/yql/essentials/tools/types_dump/types_dump.cpp @@ -6,7 +6,7 @@ using namespace NYql; -int Main(int argc, const char *argv[]) +int Main(int argc, const char* argv[]) { Y_UNUSED(argc); Y_UNUSED(argv); @@ -45,14 +45,13 @@ int Main(int argc, const char *argv[]) return 0; } -int main(int argc, const char *argv[]) { +int main(int argc, const char* argv[]) { NYql::NBacktrace::RegisterKikimrFatalActions(); NYql::NBacktrace::EnableKikimrSymbolize(); try { return Main(argc, argv); - } - catch (...) { + } catch (...) { Cerr << CurrentExceptionMessage() << Endl; return 1; } diff --git a/yql/essentials/tools/types_dump/ya.make b/yql/essentials/tools/types_dump/ya.make index 5bea1a09d80..c59a3c43143 100644 --- a/yql/essentials/tools/types_dump/ya.make +++ b/yql/essentials/tools/types_dump/ya.make @@ -1,5 +1,7 @@ PROGRAM() +ENABLE(YQL_STYLE_CPP) + SRCS( types_dump.cpp ) diff --git a/yql/essentials/tools/udf_dep_stub/ya.make b/yql/essentials/tools/udf_dep_stub/ya.make index abe778ed121..69bb3f8f45c 100644 --- a/yql/essentials/tools/udf_dep_stub/ya.make +++ b/yql/essentials/tools/udf_dep_stub/ya.make @@ -1,5 +1,7 @@ DLL(yql_udf_dep_stub) +ENABLE(YQL_STYLE_CPP) + SRCS( main.cpp ) diff --git a/yql/essentials/tools/udf_probe/udf_probe.cpp b/yql/essentials/tools/udf_probe/udf_probe.cpp index 7f7e7d28a8c..89e2dc2567b 100644 --- a/yql/essentials/tools/udf_probe/udf_probe.cpp +++ b/yql/essentials/tools/udf_probe/udf_probe.cpp @@ -10,7 +10,7 @@ void ListModules(const TString& dir) { TVector<TString> udfPaths; NMiniKQL::FindUdfsInDir(dir, &udfPaths); auto funcRegistry = CreateFunctionRegistry(nullptr, IBuiltinFunctionRegistry::TPtr(), false, udfPaths, - NUdf::IRegistrator::TFlags::TypesOnly); + NUdf::IRegistrator::TFlags::TypesOnly); for (auto& m : funcRegistry->GetAllModuleNames()) { auto path = *funcRegistry->FindUdfPath(m); @@ -18,7 +18,7 @@ void ListModules(const TString& dir) { } } -int main(int argc, char **argv) { +int main(int argc, char** argv) { try { if (argc != 2) { Cerr << "Expected directory path\n"; diff --git a/yql/essentials/tools/udf_probe/ya.make b/yql/essentials/tools/udf_probe/ya.make index d6cbedec450..4df603700bc 100644 --- a/yql/essentials/tools/udf_probe/ya.make +++ b/yql/essentials/tools/udf_probe/ya.make @@ -1,5 +1,7 @@ PROGRAM() +ENABLE(YQL_STYLE_CPP) + SRCS( udf_probe.cpp ) diff --git a/yql/essentials/tools/udf_resolver/discover.cpp b/yql/essentials/tools/udf_resolver/discover.cpp index 240cd174b46..b531010def0 100644 --- a/yql/essentials/tools/udf_resolver/discover.cpp +++ b/yql/essentials/tools/udf_resolver/discover.cpp @@ -37,10 +37,10 @@ NYql::TResolveResult DoDiscover(const NYql::TResolve& inMsg, IMutableFunctionReg if (inserted) { THashSet<TString> modules; functionRegistry.LoadUdfs(import.GetPath(), - {}, - NUdf::IRegistrator::TFlags::TypesOnly, - import.GetCustomUdfPrefix(), - &modules); + {}, + NUdf::IRegistrator::TFlags::TypesOnly, + import.GetCustomUdfPrefix(), + &modules); FillImportResultModules(modules, *importRes); it->second = modules; } else { @@ -67,7 +67,7 @@ NYql::TResolveResult DoDiscover(const NYql::TResolve& inMsg, IMutableFunctionReg TFunctionTypeInfo funcInfo; if (!f.second.IsTypeAwareness) { auto status = functionRegistry.FindFunctionTypeInfo(NYql::UnknownLangVersion, env, typeInfoHelper, - nullptr, funcName, nullptr, nullptr, NUdf::IUdfModule::TFlags::TypesOnly, {}, nullptr, logProvider.Get(), &funcInfo); + nullptr, funcName, nullptr, nullptr, NUdf::IUdfModule::TFlags::TypesOnly, {}, nullptr, logProvider.Get(), &funcInfo); if (!status.IsOk()) { udfRes->SetError("Failed to resolve signature, error: " + status.GetError()); @@ -106,7 +106,7 @@ void Print(const NYql::TResolveResult& result, IOutputStream& out, bool printAsP } void DiscoverInFiles(const TVector<TString>& udfPaths, IOutputStream& out, bool printAsProto, - NYql::NUdf::ELogLevel logLevel) { + NYql::NUdf::ELogLevel logLevel) { NYql::TResolve inMsg; inMsg.SetRuntimeLogLevel(static_cast<ui32>(logLevel)); for (auto& path : udfPaths) { @@ -123,7 +123,7 @@ void DiscoverInFiles(const TVector<TString>& udfPaths, IOutputStream& out, bool Print(result, out, printAsProto); } -} +} // namespace void DiscoverInDir(const TString& dir, IOutputStream& out, bool printAsProto, NYql::NUdf::ELogLevel logLevel) { TVector<TString> udfPaths; @@ -132,7 +132,7 @@ void DiscoverInDir(const TString& dir, IOutputStream& out, bool printAsProto, NY } void DiscoverInFile(const TString& filePath, IOutputStream& out, bool printAsProto, NYql::NUdf::ELogLevel logLevel) { - DiscoverInFiles({ filePath }, out, printAsProto, logLevel); + DiscoverInFiles({filePath}, out, printAsProto, logLevel); } void Discover(IInputStream& in, IOutputStream& out, bool printAsProto) { @@ -154,4 +154,4 @@ void FillImportResultModules(const THashSet<TString>& modules, NYql::TImportResu importRes.AddModules(m); } } -} +} // namespace NUdfResolver diff --git a/yql/essentials/tools/udf_resolver/discover.h b/yql/essentials/tools/udf_resolver/discover.h index d32d3fa5a5a..3c78604d7de 100644 --- a/yql/essentials/tools/udf_resolver/discover.h +++ b/yql/essentials/tools/udf_resolver/discover.h @@ -10,10 +10,10 @@ namespace NUdfResolver { void DiscoverInDir(const TString& dir, IOutputStream& out, bool printAsProto, - NYql::NUdf::ELogLevel logLevel); + NYql::NUdf::ELogLevel logLevel); void DiscoverInFile(const TString& filePath, IOutputStream& out, bool printAsProto, - NYql::NUdf::ELogLevel logLevel); + NYql::NUdf::ELogLevel logLevel); void Discover(IInputStream& in, IOutputStream& out, bool printAsProto); void FillImportResultModules(const THashSet<TString>& modules, NYql::TImportResult& importRes); ; -} +} // namespace NUdfResolver diff --git a/yql/essentials/tools/udf_resolver/udf_resolver.cpp b/yql/essentials/tools/udf_resolver/udf_resolver.cpp index 9db127d0864..b04f52a7cdd 100644 --- a/yql/essentials/tools/udf_resolver/udf_resolver.cpp +++ b/yql/essentials/tools/udf_resolver/udf_resolver.cpp @@ -28,71 +28,70 @@ #include <util/string/builder.h> #ifdef _linux_ -#include <sys/types.h> -#include <sys/prctl.h> -#include <sys/resource.h> -#include <sys/syscall.h> -#include <sys/socket.h> -#include <sys/stat.h> -#ifndef GRND_RANDOM -#include <sys/random.h> -#endif + #include <sys/types.h> + #include <sys/prctl.h> + #include <sys/resource.h> + #include <sys/syscall.h> + #include <sys/socket.h> + #include <sys/stat.h> + #ifndef GRND_RANDOM + #include <sys/random.h> + #endif -#include <linux/filter.h> -#include <linux/seccomp.h> -#include <linux/audit.h> -#ifndef GRND_RANDOM -#include <linux/random.h> -#endif + #include <linux/filter.h> + #include <linux/seccomp.h> + #include <linux/audit.h> + #ifndef GRND_RANDOM + #include <linux/random.h> + #endif -#ifndef __SI_MAX_SIZE -#define __SI_MAX_SIZE 128 -#endif - -#ifndef __SI_PAD_SIZE -#if __WORDSIZE == 64 -# define __SI_PAD_SIZE ((__SI_MAX_SIZE / sizeof (int)) - 4) -#else -# define __SI_PAD_SIZE ((__SI_MAX_SIZE / sizeof (int)) - 3) -#endif -#endif + #ifndef __SI_MAX_SIZE + #define __SI_MAX_SIZE 128 + #endif + #ifndef __SI_PAD_SIZE + #if __WORDSIZE == 64 + #define __SI_PAD_SIZE ((__SI_MAX_SIZE / sizeof(int)) - 4) + #else + #define __SI_PAD_SIZE ((__SI_MAX_SIZE / sizeof(int)) - 3) + #endif + #endif -#if !defined(SYS_newfstatat) -#if defined(__x86_64__) - #define SYS_newfstatat 262 -#elif defined(__i386__) - #error Unsupported syscall -#elif defined(__aarch64__) - #define SYS_newfstatat 79 -#elif defined(__arm__) - #error Unsupported syscall -#elif defined(__powerpc__) - #define SYS_newfstatat 291 -#else -#error Unsupported platform -#endif -#endif + #if !defined(SYS_newfstatat) + #if defined(__x86_64__) + #define SYS_newfstatat 262 + #elif defined(__i386__) + #error Unsupported syscall + #elif defined(__aarch64__) + #define SYS_newfstatat 79 + #elif defined(__arm__) + #error Unsupported syscall + #elif defined(__powerpc__) + #define SYS_newfstatat 291 + #else + #error Unsupported platform + #endif + #endif -#if !defined(SYS_clone3) - #define SYS_clone3 435 -#endif + #if !defined(SYS_clone3) + #define SYS_clone3 435 + #endif -#if !defined(SYS_rseq) -#if defined(__x86_64__) - #define SYS_rseq 334 -#elif defined(__i386__) - #define SYS_rseq 386 -#elif defined(__aarch64__) - #define SYS_rseq 293 -#elif defined(__arm__) - #define SYS_rseq 398 -#elif defined(__powerpc__) - #define SYS_rseq 387 -#else -#error Unsupported platform -#endif -#endif + #if !defined(SYS_rseq) + #if defined(__x86_64__) + #define SYS_rseq 334 + #elif defined(__i386__) + #define SYS_rseq 386 + #elif defined(__aarch64__) + #define SYS_rseq 293 + #elif defined(__arm__) + #define SYS_rseq 398 + #elif defined(__powerpc__) + #define SYS_rseq 387 + #else + #error Unsupported platform + #endif + #endif #endif @@ -136,10 +135,10 @@ void ResolveUDFs() { if (inserted) { THashSet<TString> modules; newRegistry->LoadUdfs(import.GetPath(), - {}, - NUdf::IRegistrator::TFlags::TypesOnly, - import.GetCustomUdfPrefix(), - &modules); + {}, + NUdf::IRegistrator::TFlags::TypesOnly, + import.GetCustomUdfPrefix(), + &modules); NUdfResolver::FillImportResultModules(modules, *importRes); it->second = modules; @@ -168,17 +167,17 @@ void ResolveUDFs() { mkqlUserType = NYql::NCommon::ParseTypeFromYson(TStringBuf{udf.GetUserType()}, pgmBuilder, err); if (!mkqlUserType) { udfRes->SetError(TStringBuilder() << "Invalid user type for function: " - << udf.GetName() << ", error: " << err.Str()); + << udf.GetName() << ", error: " << err.Str()); continue; } } TFunctionTypeInfo funcInfo; auto status = newRegistry->FindFunctionTypeInfo(udf.GetLangVer(), env, typeInfoHelper, nullptr, - udf.GetName(), mkqlUserType, udf.GetTypeConfig(), NUdf::IUdfModule::TFlags::TypesOnly, {}, nullptr, logProvider.Get(), &funcInfo); + udf.GetName(), mkqlUserType, udf.GetTypeConfig(), NUdf::IUdfModule::TFlags::TypesOnly, {}, nullptr, logProvider.Get(), &funcInfo); if (!status.IsOk()) { udfRes->SetError(TStringBuilder() << "Failed to find UDF function: " << udf.GetName() - << ", reason: " << status.GetError()); + << ", reason: " << status.GetError()); continue; } @@ -198,8 +197,8 @@ void ResolveUDFs() { udfRes->SetMaxLangVer(funcInfo.MaxLangVer); } catch (yexception& e) { udfRes->SetError(TStringBuilder() - << "Internal error was found when udf metadata is loading for function: " << udf.GetName() - << ", reason: " << e.what()); + << "Internal error was found when udf metadata is loading for function: " << udf.GetName() + << ", reason: " << e.what()); } } @@ -210,7 +209,7 @@ void ListModules(const TString& dir) { TVector<TString> udfPaths; NMiniKQL::FindUdfsInDir(dir, &udfPaths); auto funcRegistry = CreateFunctionRegistry(&NYql::NBacktrace::KikimrBackTrace, IBuiltinFunctionRegistry::TPtr(), false, udfPaths, - NUdf::IRegistrator::TFlags::TypesOnly); + NUdf::IRegistrator::TFlags::TypesOnly); for (auto& m : funcRegistry->GetAllModuleNames()) { auto path = *funcRegistry->FindUdfPath(m); @@ -220,43 +219,40 @@ void ListModules(const TString& dir) { // NOLINTBEGIN(readability-identifier-naming) #ifdef _linux_ -struct my_siginfo_t - { - int si_signo; /* Signal number. */ -#if __SI_ERRNO_THEN_CODE - int si_errno; /* If non-zero, an errno value associated with - this signal, as defined in <errno.h>. */ - int si_code; /* Signal code. */ -#else +struct my_siginfo_t { + int si_signo; /* Signal number. */ + #if __SI_ERRNO_THEN_CODE + int si_errno; /* If non-zero, an errno value associated with + this signal, as defined in <errno.h>. */ + int si_code; /* Signal code. */ + #else int si_code; int si_errno; -#endif -#if __WORDSIZE == 64 - int __pad0; /* Explicit padding. */ -#endif - union - { + #endif + #if __WORDSIZE == 64 + int __pad0; /* Explicit padding. */ + #endif + union { int _pad[__SI_PAD_SIZE]; struct - { - void *_call_addr; /* Calling user insn. */ - int _syscall; /* Triggering system call number. */ + { + void* _call_addr; /* Calling user insn. */ + int _syscall; /* Triggering system call number. */ unsigned int _arch; /* AUDIT_ARCH_* of syscall. */ - } _sigsys; + } _sigsys; - } _sifields; - }; + } _sifields; +}; // NOLINTEND(readability-identifier-naming) -void SigSysHandler(int sig, my_siginfo_t *info, void *) { - Cerr << "SigSysHandler: " << sig << ", code: " << info->si_code << ", errno: " << - info->si_errno << ", call: " << info->_sifields._sigsys._syscall << ", arch:" << info->_sifields._sigsys._arch << "\n"; +void SigSysHandler(int sig, my_siginfo_t* info, void*) { + Cerr << "SigSysHandler: " << sig << ", code: " << info->si_code << ", errno: " << info->si_errno << ", call: " << info->_sifields._sigsys._syscall << ", arch:" << info->_sifields._sigsys._arch << "\n"; // repeat SIGSYS signal (this will kill current process) raise(sig); } #endif -int main(int argc, char **argv) { +int main(int argc, char** argv) { NYql::NBacktrace::RegisterKikimrFatalActions(); NYql::NBacktrace::EnableKikimrSymbolize(); @@ -265,11 +261,11 @@ int main(int argc, char **argv) { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_flags = SA_RESETHAND | SA_SIGINFO; - typedef void (*TSigSysHandler)(int, siginfo_t *, void *); + typedef void (*TSigSysHandler)(int, siginfo_t*, void*); sa.sa_sigaction = (TSigSysHandler)SigSysHandler; sigfillset(&sa.sa_mask); if (sigaction(SIGSYS, &sa, nullptr) == -1) { - ythrow TSystemError() << "Cannot set handler for signal " << strsignal(SIGSYS); + ythrow TSystemError() << "Cannot set handler for signal " << strsignal(SIGSYS); } #endif @@ -344,26 +340,26 @@ int main(int argc, char **argv) { if (res.Has("filter-syscalls")) { #ifdef _linux_ -#define ArchField offsetof(struct seccomp_data, arch) // NOLINT(readability-identifier-naming) + #define ArchField offsetof(struct seccomp_data, arch) // NOLINT(readability-identifier-naming) -// NOLINTNEXTLINE(readability-identifier-naming) -#define Allow(syscall) \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SYS_##syscall, 0, 1), \ - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) + // NOLINTNEXTLINE(readability-identifier-naming) + #define Allow(syscall) \ + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, SYS_##syscall, 0, 1), \ + BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW) struct sock_filter filter[] = { /* validate arch */ - BPF_STMT(BPF_LD+BPF_W+BPF_ABS, ArchField), - BPF_JUMP( BPF_JMP+BPF_JEQ+BPF_K, AUDIT_ARCH_X86_64, 1, 0), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP), + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, ArchField), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, AUDIT_ARCH_X86_64, 1, 0), + BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_TRAP), /* load syscall */ - BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)), + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct seccomp_data, nr)), - /* list of allowed syscalls */ -#ifndef _arm64_ + /* list of allowed syscalls */ + #ifndef _arm64_ Allow(access), -#endif + #endif Allow(brk), Allow(chdir), Allow(clock_gettime), @@ -371,13 +367,13 @@ int main(int argc, char **argv) { Allow(clone), Allow(clone3), Allow(close), -#ifndef _arm64_ + #ifndef _arm64_ Allow(creat), -#endif + #endif Allow(dup), -#ifndef _arm64_ + #ifndef _arm64_ Allow(dup2), -#endif + #endif Allow(dup3), Allow(eventfd2), Allow(exit), @@ -391,18 +387,18 @@ int main(int argc, char **argv) { Allow(futex), Allow(get_robust_list), Allow(getcwd), -#ifndef _arm64_ + #ifndef _arm64_ Allow(getdents), -#endif + #endif Allow(getdents64), Allow(getegid), Allow(geteuid), Allow(getgid), Allow(getgroups), Allow(getpgid), -#ifndef _arm64_ + #ifndef _arm64_ Allow(getpgrp), -#endif + #endif Allow(getpid), Allow(getppid), Allow(getpriority), @@ -416,21 +412,21 @@ int main(int argc, char **argv) { Allow(getxattr), Allow(ioctl), Allow(lgetxattr), -#ifndef _arm64_ + #ifndef _arm64_ Allow(link), -#endif + #endif Allow(listxattr), Allow(llistxattr), Allow(lremovexattr), Allow(lseek), Allow(lsetxattr), -#ifndef _arm64_ + #ifndef _arm64_ Allow(lstat), -#endif + #endif Allow(madvise), -#ifndef _arm64_ + #ifndef _arm64_ Allow(mkdir), -#endif + #endif Allow(mkdirat), Allow(mlock), Allow(mlockall), @@ -441,27 +437,27 @@ int main(int argc, char **argv) { Allow(munmap), Allow(nanosleep), Allow(newfstatat), -#ifndef _arm64_ + #ifndef _arm64_ Allow(open), -#endif + #endif Allow(openat), Allow(pipe2), Allow(prctl), Allow(pread64), Allow(pwrite64), Allow(read), -#ifndef _arm64_ + #ifndef _arm64_ Allow(readlink), -#endif + #endif Allow(readv), Allow(removexattr), -#ifndef _arm64_ + #ifndef _arm64_ Allow(rename), -#endif + #endif Allow(renameat), -#ifndef _arm64_ + #ifndef _arm64_ Allow(rmdir), -#endif + #endif Allow(rseq), Allow(rt_sigaction), Allow(rt_sigpending), @@ -475,26 +471,25 @@ int main(int argc, char **argv) { Allow(sched_setaffinity), Allow(set_robust_list), Allow(setxattr), -#ifndef _arm64_ + #ifndef _arm64_ Allow(stat), -#endif + #endif Allow(sysinfo), Allow(sigaltstack), Allow(uname), -#ifndef _arm64_ + #ifndef _arm64_ Allow(unlink), -#endif + #endif Allow(unlinkat), Allow(write), Allow(writev), /* and if we don't match above, die */ - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP), + BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_TRAP), }; struct sock_fprog filterprog = { - .len = sizeof(filter)/sizeof(filter[0]), - .filter = filter - }; + .len = sizeof(filter) / sizeof(filter[0]), + .filter = filter}; if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) { ythrow yexception() << "prctl(PR_SET_NO_NEW_PRIVS, 1, ...) failed with: " << LastSystemErrorText(); diff --git a/yql/essentials/tools/udf_resolver/ya.make b/yql/essentials/tools/udf_resolver/ya.make index 2dff7abf78f..4cc31ea96bd 100644 --- a/yql/essentials/tools/udf_resolver/ya.make +++ b/yql/essentials/tools/udf_resolver/ya.make @@ -1,5 +1,7 @@ PROGRAM() +ENABLE(YQL_STYLE_CPP) + SRCS( udf_resolver.cpp discover.cpp diff --git a/yql/essentials/tools/yql_complete/ya.make b/yql/essentials/tools/yql_complete/ya.make index 07c7b7cffdc..7a620d58514 100644 --- a/yql/essentials/tools/yql_complete/ya.make +++ b/yql/essentials/tools/yql_complete/ya.make @@ -2,6 +2,8 @@ IF (NOT EXPORT_CMAKE OR NOT OPENSOURCE OR OPENSOURCE_PROJECT != "yt") PROGRAM() +ENABLE(YQL_STYLE_CPP) + PEERDIR( library/cpp/getopt library/cpp/iterator diff --git a/yql/essentials/tools/yql_facade_run/ya.make b/yql/essentials/tools/yql_facade_run/ya.make index 67804ded36e..1cf8a196d38 100644 --- a/yql/essentials/tools/yql_facade_run/ya.make +++ b/yql/essentials/tools/yql_facade_run/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( yql_facade_run.cpp ) diff --git a/yql/essentials/tools/yql_facade_run/yql_facade_run.cpp b/yql/essentials/tools/yql_facade_run/yql_facade_run.cpp index 553cf14b40f..48c0f27d1dc 100644 --- a/yql/essentials/tools/yql_facade_run/yql_facade_run.cpp +++ b/yql/essentials/tools/yql_facade_run/yql_facade_run.cpp @@ -74,7 +74,7 @@ namespace { const ui32 PRETTY_FLAGS = NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote | - NYql::TAstPrintFlags::AdaptArbitraryContent; + NYql::TAstPrintFlags::AdaptArbitraryContent; template <typename TMessage> THolder<TMessage> ParseProtoFromResource(TStringBuf resourceName) { @@ -91,7 +91,7 @@ THolder<TMessage> ParseProtoFromResource(TStringBuf resourceName) { return config; } -class TOptPipelineConfigurator : public NYql::IPipelineConfigurator { +class TOptPipelineConfigurator: public NYql::IPipelineConfigurator { public: TOptPipelineConfigurator(NYql::TProgramPtr prg, IOutputStream* planStream, IOutputStream* exprStream, bool withTypes) : Program_(std::move(prg)) @@ -107,7 +107,7 @@ public: void AfterTypeAnnotation(NYql::TTransformationPipeline* pipeline) const final { pipeline->Add(NYql::TExprLogTransformer::Sync("OptimizedExpr", NYql::NLog::EComponent::Core, NYql::NLog::ELevel::TRACE), - "OptTrace", NYql::TIssuesIds::CORE, "OptTrace"); + "OptTrace", NYql::TIssuesIds::CORE, "OptTrace"); } void AfterOptimize(NYql::TTransformationPipeline* pipeline) const final { @@ -118,6 +118,7 @@ public: pipeline->Add(NYql::TPlanOutputTransformer::Sync(PlanStream_, Program_->GetPlanBuilder(), Program_->GetOutputFormat()), "PlanOutput"); } } + private: NYql::TProgramPtr Program_; IOutputStream* PlanStream_; @@ -125,7 +126,7 @@ private: bool WithTypes_; }; -class TPeepHolePipelineConfigurator : public NYql::IPipelineConfigurator { +class TPeepHolePipelineConfigurator: public NYql::IPipelineConfigurator { public: TPeepHolePipelineConfigurator() { } @@ -136,7 +137,7 @@ public: void AfterTypeAnnotation(NYql::TTransformationPipeline* pipeline) const final { pipeline->Add(NYql::TExprLogTransformer::Sync("OptimizedExpr", NYql::NLog::EComponent::Core, NYql::NLog::ELevel::TRACE), - "OptTrace", NYql::TIssuesIds::CORE, "OptTrace"); + "OptTrace", NYql::TIssuesIds::CORE, "OptTrace"); } void AfterOptimize(NYql::TTransformationPipeline* pipeline) const final { @@ -144,8 +145,7 @@ public: } }; -} // unnamed - +} // namespace namespace NYql { @@ -188,7 +188,7 @@ void TFacadeRunOptions::ParseProtoConfig(const TString& cfgFile, google::protobu } } -void TFacadeRunOptions::Parse(int argc, const char *argv[]) { +void TFacadeRunOptions::Parse(int argc, const char* argv[]) { User = GetUsername(); if (EnableCredentials) { @@ -205,79 +205,69 @@ void TFacadeRunOptions::Parse(int argc, const char *argv[]) { NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default(); opts.AddHelpOption(); - opts.AddLongOption('p', "program", "Program file (use - to read from stdin)").Optional().RequiredArgument("FILE") - .Handler1T<TString>([this](const TString& file) { - ProgramFile = file; - if (ProgramFile == "-") { - ProgramFile = "-stdin-"; - ProgramText = Cin.ReadAll(); - } else { - ProgramText = TFileInput(ProgramFile).ReadAll(); - } - }); + opts.AddLongOption('p', "program", "Program file (use - to read from stdin)").Optional().RequiredArgument("FILE").Handler1T<TString>([this](const TString& file) { + ProgramFile = file; + if (ProgramFile == "-") { + ProgramFile = "-stdin-"; + ProgramText = Cin.ReadAll(); + } else { + ProgramText = TFileInput(ProgramFile).ReadAll(); + } + }); opts.AddLongOption('s', "sql", "Program is SQL query").NoArgument().StoreValue(&ProgramType, EProgramType::Sql); if (PgSupport) { opts.AddLongOption("pg", "Program has PG syntax").NoArgument().StoreValue(&ProgramType, EProgramType::Pg); - opts.AddLongOption("pg-ext", "Pg extensions config file").Optional().RequiredArgument("FILE") - .Handler1T<TString>([this](const TString& file) { - PgExtConfig = TFacadeRunOptions::ParseProtoConfig<NProto::TPgExtensions>(file); - }); + opts.AddLongOption("pg-ext", "Pg extensions config file").Optional().RequiredArgument("FILE").Handler1T<TString>([this](const TString& file) { + PgExtConfig = TFacadeRunOptions::ParseProtoConfig<NProto::TPgExtensions>(file); + }); } - opts.AddLongOption('f', "file", "Additional files").RequiredArgument("name@path") - .KVHandler([this](TString name, TString path) { - if (name.empty() || path.empty()) { - throw yexception() << "Incorrect file mapping, expected form name@path, e.g. [email protected]"; - } + opts.AddLongOption('f', "file", "Additional files").RequiredArgument("name@path").KVHandler([this](TString name, TString path) { + if (name.empty() || path.empty()) { + throw yexception() << "Incorrect file mapping, expected form name@path, e.g. [email protected]"; + } - auto& entry = DataTable[NYql::TUserDataKey::File(NYql::GetDefaultFilePrefix() + name)]; - entry.Type = NYql::EUserDataType::PATH; - entry.Data = path; - }, '@'); + auto& entry = DataTable[NYql::TUserDataKey::File(NYql::GetDefaultFilePrefix() + name)]; + entry.Type = NYql::EUserDataType::PATH; + entry.Data = path; + }, '@'); - opts.AddLongOption('U', "url", "Additional urls").RequiredArgument("name@path") - .KVHandler([this](TString name, TString url) { - if (name.empty() || url.empty()) { - throw yexception() << "url mapping, expected form name@url, e.g. MyUrl@http://example.com/file"; - } + opts.AddLongOption('U', "url", "Additional urls").RequiredArgument("name@path").KVHandler([this](TString name, TString url) { + if (name.empty() || url.empty()) { + throw yexception() << "url mapping, expected form name@url, e.g. MyUrl@http://example.com/file"; + } - auto& entry = DataTable[NYql::TUserDataKey::File(NYql::GetDefaultFilePrefix() + name)]; - entry.Type = NYql::EUserDataType::URL; - entry.Data = url; - }, '@'); + auto& entry = DataTable[NYql::TUserDataKey::File(NYql::GetDefaultFilePrefix() + name)]; + entry.Type = NYql::EUserDataType::URL; + entry.Data = url; + }, '@'); - opts.AddLongOption('m', "mounts", "Mount points config file.").Optional().RequiredArgument("FILE") - .Handler1T<TString>([this](const TString& file) { - MountConfig = TFacadeRunOptions::ParseProtoConfig<NYqlMountConfig::TMountConfig>(file); - }); - opts.AddLongOption("params-file", "Query parameters values in YSON format").Optional().RequiredArgument("FILE") - .Handler1T<TString>([this](const TString& file) { - Params = TFileInput(file).ReadAll(); - }); + opts.AddLongOption('m', "mounts", "Mount points config file.").Optional().RequiredArgument("FILE").Handler1T<TString>([this](const TString& file) { + MountConfig = TFacadeRunOptions::ParseProtoConfig<NYqlMountConfig::TMountConfig>(file); + }); + opts.AddLongOption("params-file", "Query parameters values in YSON format").Optional().RequiredArgument("FILE").Handler1T<TString>([this](const TString& file) { + Params = TFileInput(file).ReadAll(); + }); opts.AddLongOption("yson-attrs", "Provide operation yson attribues").Optional().RequiredArgument("VALUE").StoreResult(&YsonAttrs); - opts.AddLongOption('G', "gateways", TStringBuilder() << "Used gateways, available: " << JoinSeq(",", SupportedGateways_)).DefaultValue(JoinSeq(",", GatewayTypes)) - .Handler1T<TString>([this](const TString& gateways) { - GatewayTypes.clear(); - ::StringSplitter(gateways).Split(',').Consume([&](const TStringBuf& val) { - if (!SupportedGateways_.contains(val)) { - throw yexception() << "Unsupported gateway \"" << val << '"'; - } - GatewayTypes.emplace(val); - }); - }); - opts.AddLongOption("gateways-cfg", "Gateways configuration file").Optional().RequiredArgument("FILE") - .Handler1T<TString>([this](const TString& file) { - GatewaysConfig = TFacadeRunOptions::ParseProtoConfig<TGatewaysConfig>(file); - }); - opts.AddLongOption("fs-cfg", "Fs configuration file").Optional().RequiredArgument("FILE") - .Handler1T<TString>([this](const TString& file) { - FsConfig = MakeHolder<TFileStorageConfig>(); - LoadFsConfigFromFile(file, *FsConfig); + opts.AddLongOption('G', "gateways", TStringBuilder() << "Used gateways, available: " << JoinSeq(",", SupportedGateways_)).DefaultValue(JoinSeq(",", GatewayTypes)).Handler1T<TString>([this](const TString& gateways) { + GatewayTypes.clear(); + ::StringSplitter(gateways).Split(',').Consume([&](const TStringBuf& val) { + if (!SupportedGateways_.contains(val)) { + throw yexception() << "Unsupported gateway \"" << val << '"'; + } + GatewayTypes.emplace(val); }); + }); + opts.AddLongOption("gateways-cfg", "Gateways configuration file").Optional().RequiredArgument("FILE").Handler1T<TString>([this](const TString& file) { + GatewaysConfig = TFacadeRunOptions::ParseProtoConfig<TGatewaysConfig>(file); + }); + opts.AddLongOption("fs-cfg", "Fs configuration file").Optional().RequiredArgument("FILE").Handler1T<TString>([this](const TString& file) { + FsConfig = MakeHolder<TFileStorageConfig>(); + LoadFsConfigFromFile(file, *FsConfig); + }); opts.AddLongOption('u', "udf", "Load shared library with UDF by given path").RequiredArgument("PATH").AppendTo(&UdfsPaths); - opts.AddLongOption("udfs-dir", "Load all shared libraries with UDFs found in given directory").RequiredArgument("DIR") - .Handler1T<TString>([this](const TString& dir) { - NKikimr::NMiniKQL::FindUdfsInDir(dir, &UdfsPaths); - }); + opts.AddLongOption("udfs-dir", "Load all shared libraries with UDFs found in given directory").RequiredArgument("DIR").Handler1T<TString>([this](const TString& dir) { + NKikimr::NMiniKQL::FindUdfsInDir(dir, &UdfsPaths); + }); opts.AddLongOption("udf-resolver", "Path to udf-resolver").Optional().RequiredArgument("PATH").StoreResult(&UdfResolverPath); opts.AddLongOption("udf-resolver-log", "Path to udf resolver log").Optional().RequiredArgument("PATH").StoreResult(&UdfResolverLog); opts.AddLongOption("udf-resolver-filter-syscalls", "Filter syscalls in udf resolver").Optional().NoArgument().SetFlag(&UdfResolverFilterSyscalls); @@ -287,36 +277,32 @@ void TFacadeRunOptions::Parse(int argc, const char *argv[]) { opts.AddLongOption("compile-only", "Compile program and exit").NoArgument().StoreValue(&Mode, ERunMode::Compile); opts.AddLongOption("validate", "Validate program and exit").NoArgument().StoreValue(&Mode, ERunMode::Validate); opts.AddLongOption("lineage", "Calculate program lineage and exit").NoArgument().StoreValue(&Mode, ERunMode::Lineage); - opts.AddLongOption('O',"optimize", "Optimize program and exit").NoArgument().StoreValue(&Mode, ERunMode::Optimize); + opts.AddLongOption('O', "optimize", "Optimize program and exit").NoArgument().StoreValue(&Mode, ERunMode::Optimize); opts.AddLongOption('D', "discover", "Discover tables in the program and exit").NoArgument().StoreValue(&Mode, ERunMode::Discover); opts.AddLongOption("peephole", "Perform peephole program optimization and exit").NoArgument().StoreValue(&Mode, ERunMode::Peephole); - opts.AddLongOption('R',"run", "Run program (use by default)").NoArgument().StoreValue(&Mode, ERunMode::Run); + opts.AddLongOption('R', "run", "Run program (use by default)").NoArgument().StoreValue(&Mode, ERunMode::Run); opts.AddLongOption('L', "show-log", "Show transformation log").Optional().NoArgument().SetFlag(&ShowLog); opts.AddLongOption('v', "verbosity", "Log verbosity level").Optional().RequiredArgument("LEVEL").StoreResult(&Verbosity); opts.AddLongOption("print-ast", "Print AST after loading").NoArgument().SetFlag(&PrintAst); - opts.AddLongOption("print-expr", "Print rebuild AST before execution").NoArgument() - .Handler0([this]() { - if (!ExprStream) { - ExprStream = &Cout; - } - }); + opts.AddLongOption("print-expr", "Print rebuild AST before execution").NoArgument().Handler0([this]() { + if (!ExprStream) { + ExprStream = &Cout; + } + }); opts.AddLongOption("with-types", "Print types annotation").NoArgument().SetFlag(&WithTypes); - opts.AddLongOption("trace-opt", "Print AST in the begin of each transformation").NoArgument() - .Handler0([this]() { - TraceOptStream = &Cerr; - }); - opts.AddLongOption("expr-file", "Print AST to that file instead of stdout").Optional().RequiredArgument("FILE") - .Handler1T<TString>([this](const TString& file) { - ExprStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file); - ExprStream = ExprStreamHolder_.Get(); - }); - opts.AddLongOption("print-result", "Print program execution result to stdout").NoArgument() - .Handler0([this]() { - if (!ResultStream) { - ResultStream = &Cout; - } - }); + opts.AddLongOption("trace-opt", "Print AST in the begin of each transformation").NoArgument().Handler0([this]() { + TraceOptStream = &Cerr; + }); + opts.AddLongOption("expr-file", "Print AST to that file instead of stdout").Optional().RequiredArgument("FILE").Handler1T<TString>([this](const TString& file) { + ExprStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file); + ExprStream = ExprStreamHolder_.Get(); + }); + opts.AddLongOption("print-result", "Print program execution result to stdout").NoArgument().Handler0([this]() { + if (!ResultStream) { + ResultStream = &Cout; + } + }); opts.AddLongOption("format", "Results format") .Optional() .RequiredArgument("STR") @@ -333,22 +319,19 @@ void TFacadeRunOptions::Parse(int argc, const char *argv[]) { } }); - opts.AddLongOption("result-file", "Print program execution result to file").Optional().RequiredArgument("FILE") - .Handler1T<TString>([this](const TString& file) { - ResultStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file); - ResultStream = ResultStreamHolder_.Get(); - }); - opts.AddLongOption('P',"trace-plan", "Print plan before execution").NoArgument() - .Handler0([this]() { - if (!PlanStream) { - PlanStream = &Cerr; - } - }); - opts.AddLongOption("plan-file", "Print program plan to file").Optional().RequiredArgument("FILE") - .Handler1T<TString>([this](const TString& file) { - PlanStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file); - PlanStream = PlanStreamHolder_.Get(); - }); + opts.AddLongOption("result-file", "Print program execution result to file").Optional().RequiredArgument("FILE").Handler1T<TString>([this](const TString& file) { + ResultStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file); + ResultStream = ResultStreamHolder_.Get(); + }); + opts.AddLongOption('P', "trace-plan", "Print plan before execution").NoArgument().Handler0([this]() { + if (!PlanStream) { + PlanStream = &Cerr; + } + }); + opts.AddLongOption("plan-file", "Print program plan to file").Optional().RequiredArgument("FILE").Handler1T<TString>([this](const TString& file) { + PlanStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file); + PlanStream = PlanStreamHolder_.Get(); + }); opts.AddLongOption("err-file", "Print validate/optimize/runtime errors to file") .Handler1T<TString>([this](const TString& file) { ErrStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file); @@ -363,15 +346,14 @@ void TFacadeRunOptions::Parse(int argc, const char *argv[]) { .Handler1T<TString>([this](const TString& mode) { ValidateMode = NUdf::ValidateModeByStr(mode); }); - opts.AddLongOption("stat", "Print execution statistics").Optional().OptionalArgument("FILE") - .Handler1T<TString>([this](const TString& file) { - if (file) { - StatStreamHolder_ = MakeHolder<TFileOutput>(file); - StatStream = StatStreamHolder_.Get(); - } else { - StatStream = &Cerr; - } - }); + opts.AddLongOption("stat", "Print execution statistics").Optional().OptionalArgument("FILE").Handler1T<TString>([this](const TString& file) { + if (file) { + StatStreamHolder_ = MakeHolder<TFileOutput>(file); + StatStream = StatStreamHolder_.Get(); + } else { + StatStream = &Cerr; + } + }); opts.AddLongOption("full-stat", "Output full execution statistics").Optional().NoArgument().SetFlag(&FullStatistics); opts.AddLongOption("sql-flags", "SQL translator pragma flags").SplitHandler(&SqlFlags, ','); @@ -411,29 +393,25 @@ void TFacadeRunOptions::Parse(int argc, const char *argv[]) { }); } if (EnableQPlayer) { - opts.AddLongOption("qstorage-dir", "Directory for QStorage").RequiredArgument("DIR") - .Handler1T<TString>([this](const TString& dir) { - QPlayerStorage_ = MakeFileQStorage(dir); - }); + opts.AddLongOption("qstorage-dir", "Directory for QStorage").RequiredArgument("DIR").Handler1T<TString>([this](const TString& dir) { + QPlayerStorage_ = MakeFileQStorage(dir); + }); opts.AddLongOption("op-id", "QStorage operation id").StoreResult(&OperationId).DefaultValue("dummy_op"); - opts.AddLongOption("capture", "Write query metadata to QStorage").NoArgument() - .Handler0([this]() { - if (EQPlayerMode::Replay == QPlayerMode) { - throw yexception() << "replay and capture options can't be used simultaneously"; - } - QPlayerMode = EQPlayerMode::Capture; - }); - opts.AddLongOption("replay", "Read query metadata from QStorage").NoArgument() - .Handler0([this]() { - if (EQPlayerMode::Capture == QPlayerMode) { - throw yexception() << "replay and capture options can't be used simultaneously"; - } - QPlayerMode = EQPlayerMode::Replay; - }); - opts.AddLongOption("gateways-patch", "QPlayer patch for gateways conf").Optional().RequiredArgument("FILE") - .Handler1T<TString>([this](const TString& file) { - GatewaysPatch = TFileInput(file).ReadAll(); - }); + opts.AddLongOption("capture", "Write query metadata to QStorage").NoArgument().Handler0([this]() { + if (EQPlayerMode::Replay == QPlayerMode) { + throw yexception() << "replay and capture options can't be used simultaneously"; + } + QPlayerMode = EQPlayerMode::Capture; + }); + opts.AddLongOption("replay", "Read query metadata from QStorage").NoArgument().Handler0([this]() { + if (EQPlayerMode::Capture == QPlayerMode) { + throw yexception() << "replay and capture options can't be used simultaneously"; + } + QPlayerMode = EQPlayerMode::Replay; + }); + opts.AddLongOption("gateways-patch", "QPlayer patch for gateways conf").Optional().RequiredArgument("FILE").Handler1T<TString>([this](const TString& file) { + GatewaysPatch = TFileInput(file).ReadAll(); + }); } if (CustomTests) { @@ -441,28 +419,27 @@ void TFacadeRunOptions::Parse(int argc, const char *argv[]) { opts.AddLongOption("test-format", "Compare formatted query's AST with the original query's AST (only syntaxVersion=1 is supported)").NoArgument().SetFlag(&TestSqlFormat); opts.AddLongOption("test-lexers", "Compare lexers").NoArgument().SetFlag(&TestLexers); opts.AddLongOption("test-complete", "check completion engine").NoArgument().SetFlag(&TestComplete); + opts.AddLongOption("test-syntax-ambiguity", "check syntax ambiguities").NoArgument().SetFlag(&TestSyntaxAmbiguities); opts.AddLongOption("validate-result-format", "Check that result-format can parse Result").NoArgument().SetFlag(&ValidateResultFormat); } - opts.AddLongOption("langver", "Set current language version").Optional().RequiredArgument("VER") - .Handler1T<TString>([this](const TString& str) { - if (str == "unknown") { - LangVer = UnknownLangVersion; - } else if (!ParseLangVersion(str, LangVer)) { - throw yexception() << "Failed to parse language version: " << str; - } - }); + opts.AddLongOption("langver", "Set current language version").Optional().RequiredArgument("VER").Handler1T<TString>([this](const TString& str) { + if (str == "unknown") { + LangVer = UnknownLangVersion; + } else if (!ParseLangVersion(str, LangVer)) { + throw yexception() << "Failed to parse language version: " << str; + } + }); - opts.AddLongOption("max-langver", "Set maximum language version").Optional().RequiredArgument("VER") - .Handler1T<TString>([this](const TString& str) { - if (!ParseLangVersion(str, MaxLangVer)) { - throw yexception() << "Failed to parse language version: " << str; - } - }); + opts.AddLongOption("max-langver", "Set maximum language version").Optional().RequiredArgument("VER").Handler1T<TString>([this](const TString& str) { + if (!ParseLangVersion(str, MaxLangVer)) { + throw yexception() << "Failed to parse language version: " << str; + } + }); opts.SetFreeArgsMax(0); - for (auto& ext: OptExtenders_) { + for (auto& ext : OptExtenders_) { ext(opts); } @@ -512,12 +489,12 @@ void TFacadeRunOptions::Parse(int argc, const char *argv[]) { } if (EnableCredentials && Token) { - for (auto name: SupportedGateways_) { + for (auto name : SupportedGateways_) { Credentials->AddCredential(TStringBuilder() << "default_" << name, TCredential(name, "", Token)); } } - for (auto& handle: OptHandlers_) { + for (auto& handle : OptHandlers_) { handle(res); } } @@ -535,21 +512,20 @@ TIntrusivePtr<NKikimr::NMiniKQL::IFunctionRegistry> TFacadeRunner::GetFuncRegist return FuncRegistry_; } -int TFacadeRunner::Main(int argc, const char *argv[]) { +int TFacadeRunner::Main(int argc, const char* argv[]) { NYql::NBacktrace::RegisterKikimrFatalActions(); NYql::NBacktrace::EnableKikimrSymbolize(); EnableKikimrBacktraceFormat(); try { return DoMain(argc, argv); - } - catch (...) { + } catch (...) { Cerr << CurrentExceptionMessage() << Endl; return 1; } } -int TFacadeRunner::DoMain(int argc, const char *argv[]) { +int TFacadeRunner::DoMain(int argc, const char* argv[]) { Y_UNUSED(NUdf::GetStaticSymbols()); RunOptions_.Parse(argc, argv); @@ -570,15 +546,16 @@ int TFacadeRunner::DoMain(int argc, const char *argv[]) { TVector<NPg::TExtensionDesc> extensions; PgExtensionsFromProto(*RunOptions_.PgExtConfig, extensions); NPg::RegisterExtensions(extensions, RunOptions_.QPlayerContext.CanRead(), - *NSQLTranslationPG::CreateExtensionSqlParser(), - NKikimr::NMiniKQL::CreateExtensionLoader().get()); + *NSQLTranslationPG::CreateExtensionSqlParser(), + NKikimr::NMiniKQL::CreateExtensionLoader().get()); } NPg::GetSqlLanguageParser()->Freeze(); } auto funcRegistry = NKikimr::NMiniKQL::CreateFunctionRegistry(&NYql::NBacktrace::KikimrBackTrace, - NKikimr::NMiniKQL::CreateBuiltinRegistry(), true, RunOptions_.UdfsPaths)->Clone(); + NKikimr::NMiniKQL::CreateBuiltinRegistry(), true, RunOptions_.UdfsPaths) + ->Clone(); NKikimr::NMiniKQL::FillStaticModules(*funcRegistry); FuncRegistry_ = funcRegistry; @@ -586,14 +563,15 @@ int TFacadeRunner::DoMain(int argc, const char *argv[]) { lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory(); NSQLTranslationV1::TParsers parsers; - parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory(); - parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory(); + parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory( + /*isAmbiguityError=*/RunOptions_.TestSyntaxAmbiguities); + parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory( + /*isAmbiguityError=*/RunOptions_.TestSyntaxAmbiguities); NSQLTranslation::TTranslators translators( nullptr, NSQLTranslationV1::MakeTranslator(lexers, parsers), - NSQLTranslationPG::MakeTranslator() - ); + NSQLTranslationPG::MakeTranslator()); TExprContext ctx; if (RunOptions_.PgSupport) { @@ -601,13 +579,12 @@ int TFacadeRunner::DoMain(int argc, const char *argv[]) { } IModuleResolver::TPtr moduleResolver; TModuleResolver::TModuleChecker moduleChecker; - if (RunOptions_.TestLexers || RunOptions_.TestComplete) { - moduleChecker = [ - lexers, parsers, - testLexers = RunOptions_.TestLexers, - testComplete = RunOptions_.TestComplete, - clusters = ClusterMapping_](const TString& query, const TString& fileName, TExprContext& ctx) { - + if (RunOptions_.TestLexers || RunOptions_.TestComplete || RunOptions_.TestSyntaxAmbiguities) { + moduleChecker = [lexers, parsers, + testLexers = RunOptions_.TestLexers, + testComplete = RunOptions_.TestComplete, + testSyntaxAmbiguities = RunOptions_.TestSyntaxAmbiguities, + clusters = ClusterMapping_](const TString& query, const TString& fileName, TExprContext& ctx) { if (testLexers) { TIssues issues; if (!NSQLTranslationV1::CheckLexers(TPosition(0, 0, fileName), query, issues)) { @@ -621,17 +598,24 @@ int TFacadeRunner::DoMain(int argc, const char *argv[]) { } } - if (testComplete) { + if (testComplete || testSyntaxAmbiguities) { google::protobuf::Arena arena; NSQLTranslation::TTranslationSettings settings; settings.Arena = &arena; settings.ClusterMapping = clusters; settings.SyntaxVersion = 1; + settings.AlwaysAllowExports = true; auto ast = NSQLTranslationV1::SqlToYql(lexers, parsers, query, settings); if (!ast.IsOk()) { - return true; + auto issue = TIssue(TPosition(0, 0, fileName), "Translation failed"); + for (const auto& i : ast.Issues) { + issue.AddSubIssue(MakeIntrusive<TIssue>(i)); + } + + ctx.AddError(issue); + return false; } TIssues issues; @@ -661,10 +645,10 @@ int TFacadeRunner::DoMain(int argc, const char *argv[]) { } moduleResolver = std::make_shared<TModuleResolver>(translators, std::move(modules), ctx.NextUniqueId, - ClusterMapping_, RunOptions_.SqlFlags, RunOptions_.Mode >= ERunMode::Validate, THolder<TExprContext>(), moduleChecker); + ClusterMapping_, RunOptions_.SqlFlags, RunOptions_.Mode >= ERunMode::Validate, THolder<TExprContext>(), moduleChecker); } else { if (!GetYqlDefaultModuleResolver(ctx, moduleResolver, ClusterMapping_, - RunOptions_.OptimizeLibs && RunOptions_.Mode >= ERunMode::Validate, moduleChecker)) { + RunOptions_.OptimizeLibs && RunOptions_.Mode >= ERunMode::Validate, moduleChecker)) { *RunOptions_.ErrStream << "Errors loading default YQL libraries:" << Endl; ctx.IssueManager.GetIssues().PrintTo(*RunOptions_.ErrStream); return -1; @@ -675,7 +659,7 @@ int TFacadeRunner::DoMain(int argc, const char *argv[]) { if (RunOptions_.Mode >= ERunMode::Validate) { std::vector<NFS::IDownloaderPtr> downloaders; - for (auto& factory: FsDownloadFactories_) { + for (auto& factory : FsDownloadFactories_) { if (auto download = factory()) { downloaders.push_back(std::move(download)); } @@ -709,8 +693,8 @@ int TFacadeRunner::DoMain(int argc, const char *argv[]) { RunOptions_.PrintInfo(TStringBuilder() << TInstant::Now().ToStringLocalUpToSeconds() << " Udfs scanned"); } else { udfResolver = FileStorage_ && RunOptions_.UdfResolverPath - ? NCommon::CreateOutProcUdfResolver(FuncRegistry_.Get(), FileStorage_, RunOptions_.UdfResolverPath, {}, {}, RunOptions_.UdfResolverFilterSyscalls, {}) - : NCommon::CreateSimpleUdfResolver(FuncRegistry_.Get(), FileStorage_, true); + ? NCommon::CreateOutProcUdfResolver(FuncRegistry_.Get(), FileStorage_, RunOptions_.UdfResolverPath, {}, {}, RunOptions_.UdfResolverFilterSyscalls, {}) + : NCommon::CreateSimpleUdfResolver(FuncRegistry_.Get(), FileStorage_, true); if (RunOptions_.UdfResolverLog) { udfResolver = NCommon::CreateUdfResolverDecoratorWithLogger(FuncRegistry_.Get(), udfResolver, RunOptions_.UdfResolverLog, RunOptions_.OperationId); } @@ -720,14 +704,14 @@ int TFacadeRunner::DoMain(int argc, const char *argv[]) { if (RunOptions_.PgSupport) { dataProvidersInit.push_back(GetPgDataProviderInitializer()); } - for (auto& factory: ProviderFactories_) { + for (auto& factory : ProviderFactories_) { if (auto init = factory()) { dataProvidersInit.push_back(std::move(init)); } } TVector<IUrlListerPtr> urlListers; - for (auto& factory: UrlListerFactories_) { + for (auto& factory : UrlListerFactories_) { if (auto listener = factory()) { urlListers.push_back(std::move(listener)); } @@ -757,7 +741,6 @@ int TFacadeRunner::DoMain(int argc, const char *argv[]) { } int TFacadeRunner::DoRun(TProgramFactory& factory) { - TProgramPtr program = factory.Create(RunOptions_.ProgramFile, RunOptions_.ProgramText, RunOptions_.OperationId, EHiddenMode::Disable, RunOptions_.QPlayerContext, RunOptions_.GatewaysPatch); program->SetLanguageVersion(RunOptions_.LangVer); program->SetMaxLanguageVersion(RunOptions_.MaxLangVer); @@ -852,6 +835,39 @@ int TFacadeRunner::DoRun(TProgramFactory& factory) { return -1; } } + if (!fail && RunOptions_.TestSyntaxAmbiguities && 1 == RunOptions_.SyntaxVersion) { + NSQLTranslationV1::TLexers lexers = { + .Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(), + .Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory(), + }; + + NSQLTranslationV1::TParsers parsers = { + .Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory( + /*isAmbiguityError=*/true), + .Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory( + /*isAmbiguityError=*/true), + }; + + NSQLTranslation::TTranslators translators( + /* v0 = */ nullptr, + NSQLTranslationV1::MakeTranslator(lexers, parsers), + /* pg = */ nullptr); + + NYql::TIssues issues; + google::protobuf::Message* message = NSQLTranslation::SqlAST( + translators, + RunOptions_.ProgramText, + RunOptions_.ProgramFile, + issues, + NSQLTranslation::SQL_MAX_PARSER_ERRORS, + settings); + + if (!message) { + *RunOptions_.ErrStream << "Syntax ambiguity was detected" << Endl; + issues.PrintTo(*RunOptions_.ErrStream); + return -1; + } + } } else { RunOptions_.PrintInfo("Parse YQL..."); if (!program->ParseYql()) { @@ -921,7 +937,7 @@ int TFacadeRunner::DoRun(TProgramFactory& factory) { } if (!RunOptions_.FullExpr && ERunMode::Peephole != RunOptions_.Mode) { - program->Print(RunOptions_.ExprStream, RunOptions_.PlanStream, /*cleanPlan*/true); + program->Print(RunOptions_.ExprStream, RunOptions_.PlanStream, /*cleanPlan*/ true); } program->ConfigureYsonResultFormat(RunOptions_.ResultsFormat); @@ -980,12 +996,12 @@ TProgram::TStatus TFacadeRunner::DoRunProgram(TProgramPtr program) { TProgram::TStatus status = TProgram::TStatus::Ok; auto defOptConfig = TOptPipelineConfigurator(program, RunOptions_.FullExpr ? RunOptions_.PlanStream : nullptr, RunOptions_.FullExpr ? RunOptions_.ExprStream : nullptr, RunOptions_.WithTypes); - IPipelineConfigurator* optConfig = OptPipelineConfigurator_ ? OptPipelineConfigurator_ : &defOptConfig; + IPipelineConfigurator* optConfig = OptPipelineConfigurator_ ? OptPipelineConfigurator_ : &defOptConfig; if (ERunMode::Peephole == RunOptions_.Mode) { RunOptions_.PrintInfo("Peephole..."); auto defConfig = TPeepHolePipelineConfigurator(); - IPipelineConfigurator* config = PeepholePipelineConfigurator_ ? PeepholePipelineConfigurator_ : &defConfig; + IPipelineConfigurator* config = PeepholePipelineConfigurator_ ? PeepholePipelineConfigurator_ : &defConfig; status = program->OptimizeWithConfig(RunOptions_.User, *config); } else if (ERunMode::Run == RunOptions_.Mode) { RunOptions_.PrintInfo("Run program..."); @@ -1007,4 +1023,4 @@ TProgram::TStatus TFacadeRunner::DoRunProgram(TProgramPtr program) { return status; } -} // NYql +} // namespace NYql diff --git a/yql/essentials/tools/yql_facade_run/yql_facade_run.h b/yql/essentials/tools/yql_facade_run/yql_facade_run.h index d894369981a..b7fdcbbb0ae 100644 --- a/yql/essentials/tools/yql_facade_run/yql_facade_run.h +++ b/yql/essentials/tools/yql_facade_run/yql_facade_run.h @@ -24,45 +24,45 @@ #include <functional> namespace NKikimr::NMiniKQL { - class IFunctionRegistry; -} +class IFunctionRegistry; +} // namespace NKikimr::NMiniKQL namespace NYql { - class TFileStorageConfig; - class TGatewaysConfig; -} +class TFileStorageConfig; +class TGatewaysConfig; +} // namespace NYql namespace NYql::NProto { - class TPgExtensions; -} +class TPgExtensions; +} // namespace NYql::NProto namespace NYqlMountConfig { - class TMountConfig; -} +class TMountConfig; +} // namespace NYqlMountConfig namespace NYql { enum class ERunMode { - Parse /* "parse" */, - Compile /* "compile" */, - Validate /* "validate" */, - Optimize /* "optimize" */, - Peephole /* "peephole" */, - Lineage /* "lineage" */, - Discover /* "discover" */, - Run /* "run" */, + Parse /* "parse" */, + Compile /* "compile" */, + Validate /* "validate" */, + Optimize /* "optimize" */, + Peephole /* "peephole" */, + Lineage /* "lineage" */, + Discover /* "discover" */, + Run /* "run" */, }; enum class EProgramType { - SExpr /* "s-expr" */, - Sql /* "sql" */, - Pg /* "pg" */, + SExpr /* "s-expr" */, + Sql /* "sql" */, + Pg /* "pg" */, }; enum class EQPlayerMode { - None /* "none" */, + None /* "none" */, Capture /* "capture" */, - Replay /* "replay" */, + Replay /* "replay" */, }; class TFacadeRunOptions { @@ -92,6 +92,7 @@ public: bool TestSqlFormat = false; bool TestLexers = false; bool TestComplete = false; + bool TestSyntaxAmbiguities = false; THashMap<TString, NSQLTranslation::TTableBindingSettings> Bindings; bool PrintAst = false; @@ -141,7 +142,7 @@ public: bool OptimizeLibs = true; bool CustomTests = false; - void Parse(int argc, const char *argv[]); + void Parse(int argc, const char* argv[]); void AddOptExtension(std::function<void(NLastGetopt::TOpts& opts)> optExtender) { OptExtenders_.push_back(std::move(optExtender)); @@ -183,7 +184,7 @@ public: TFacadeRunner(TString name); ~TFacadeRunner(); - int Main(int argc, const char *argv[]); + int Main(int argc, const char* argv[]); void AddFsDownloadFactory(std::function<NFS::IDownloaderPtr()> factory) { FsDownloadFactories_.push_back(std::move(factory)); @@ -199,7 +200,7 @@ public: } template <class TPbConfig> void FillClusterMapping(const TPbConfig& config, const TString& provider) { - for (auto& cluster: config.GetClusterMapping()) { + for (auto& cluster : config.GetClusterMapping()) { ClusterMapping_.emplace(to_lower(cluster.GetName()), provider); } } @@ -222,7 +223,7 @@ public: } protected: - virtual int DoMain(int argc, const char *argv[]); + virtual int DoMain(int argc, const char* argv[]); virtual int DoRun(TProgramFactory& factory); virtual TProgram::TStatus DoRunProgram(TProgramPtr program); @@ -242,4 +243,4 @@ private: std::unique_ptr<NYql::NLog::YqlLoggerScope> YqlLogger_; }; -} // NYql +} // namespace NYql diff --git a/yql/essentials/tools/yql_highlight/generator.cpp b/yql/essentials/tools/yql_highlight/generator.cpp index 45f95624760..27d7eff9118 100644 --- a/yql/essentials/tools/yql_highlight/generator.cpp +++ b/yql/essentials/tools/yql_highlight/generator.cpp @@ -4,28 +4,28 @@ namespace NSQLHighlight { - class TOnlyFunctionGenerator: public IGenerator { - public: - explicit TOnlyFunctionGenerator(TGeneratorFunction function) - : Function_(std::move(function)) - { - } +class TOnlyFunctionGenerator: public IGenerator { +public: + explicit TOnlyFunctionGenerator(TGeneratorFunction function) + : Function_(std::move(function)) + { + } - void Write(IOutputStream& out, const THighlighting& highlighting, bool ansi) override { - Function_(out, highlighting, ansi); - } + void Write(IOutputStream& out, const THighlighting& highlighting, bool ansi) override { + Function_(out, highlighting, ansi); + } - void Write(const TFsPath& path, const THighlighting& highlighting, bool ansi) override { - TFileOutput out(path); - Write(out, highlighting, ansi); - } + void Write(const TFsPath& path, const THighlighting& highlighting, bool ansi) override { + TFileOutput out(path); + Write(out, highlighting, ansi); + } - private: - TGeneratorFunction Function_; - }; +private: + TGeneratorFunction Function_; +}; - IGenerator::TPtr MakeOnlyFileGenerator(TGeneratorFunction function) { - return new TOnlyFunctionGenerator(std::move(function)); - } +IGenerator::TPtr MakeOnlyFileGenerator(TGeneratorFunction function) { + return new TOnlyFunctionGenerator(std::move(function)); +} } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/generator.h b/yql/essentials/tools/yql_highlight/generator.h index 83341276ef9..0d7f6180e52 100644 --- a/yql/essentials/tools/yql_highlight/generator.h +++ b/yql/essentials/tools/yql_highlight/generator.h @@ -7,16 +7,16 @@ namespace NSQLHighlight { - class IGenerator: public TThrRefBase { - public: - using TPtr = TIntrusivePtr<IGenerator>; +class IGenerator: public TThrRefBase { +public: + using TPtr = TIntrusivePtr<IGenerator>; - virtual void Write(IOutputStream& out, const THighlighting& highlighting, bool ansi) = 0; - virtual void Write(const TFsPath& path, const THighlighting& highlighting, bool ansi) = 0; - }; + virtual void Write(IOutputStream& out, const THighlighting& highlighting, bool ansi) = 0; + virtual void Write(const TFsPath& path, const THighlighting& highlighting, bool ansi) = 0; +}; - using TGeneratorFunction = std::function<void(IOutputStream&, const THighlighting&, bool)>; +using TGeneratorFunction = std::function<void(IOutputStream&, const THighlighting&, bool)>; - IGenerator::TPtr MakeOnlyFileGenerator(TGeneratorFunction function); +IGenerator::TPtr MakeOnlyFileGenerator(TGeneratorFunction function); } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/generator_highlight_js.cpp b/yql/essentials/tools/yql_highlight/generator_highlight_js.cpp index d8673a24601..1a04c7d6b4d 100644 --- a/yql/essentials/tools/yql_highlight/generator_highlight_js.cpp +++ b/yql/essentials/tools/yql_highlight/generator_highlight_js.cpp @@ -8,114 +8,114 @@ namespace NSQLHighlight { - TString ToHighlightJSClass(EUnitKind kind) { - switch (kind) { - case EUnitKind::Keyword: - return "keyword"; - case EUnitKind::Punctuation: - return "punctuation"; - case EUnitKind::QuotedIdentifier: - return "symbol"; - case EUnitKind::BindParameterIdentifier: - return "variable"; - case EUnitKind::TypeIdentifier: - return "type"; - case EUnitKind::FunctionIdentifier: - return "title.function"; - case EUnitKind::Identifier: - return ""; - case EUnitKind::Literal: - return "number"; - case EUnitKind::StringLiteral: - return "string"; - case EUnitKind::Comment: - return "comment"; - case EUnitKind::Whitespace: - return ""; - case EUnitKind::Error: - return ""; - } +TString ToHighlightJSClass(EUnitKind kind) { + switch (kind) { + case EUnitKind::Keyword: + return "keyword"; + case EUnitKind::Punctuation: + return "punctuation"; + case EUnitKind::QuotedIdentifier: + return "symbol"; + case EUnitKind::BindParameterIdentifier: + return "variable"; + case EUnitKind::TypeIdentifier: + return "type"; + case EUnitKind::FunctionIdentifier: + return "title.function"; + case EUnitKind::Identifier: + return ""; + case EUnitKind::Literal: + return "number"; + case EUnitKind::StringLiteral: + return "string"; + case EUnitKind::Comment: + return "comment"; + case EUnitKind::Whitespace: + return ""; + case EUnitKind::Error: + return ""; } +} - // FIXME: copy-pasted from generator_textmate.cpp. - TString ToTextMateRegex(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) { - TStringBuilder regex; - - if (unit.IsPlain) { - regex << R"re(\b)re"; - } - - if (!pattern.Before.empty()) { - regex << "(?<=" << pattern.Before << ")"; - } +// FIXME: copy-pasted from generator_textmate.cpp. +TString ToTextMateRegex(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) { + TStringBuilder regex; - regex << "(" << pattern.Body << ")"; + if (unit.IsPlain) { + regex << R"re(\b)re"; + } - if (!pattern.After.empty()) { - regex << "(?=" << pattern.After << ")"; - } + if (!pattern.Before.empty()) { + regex << "(?<=" << pattern.Before << ")"; + } - if (unit.IsPlain) { - regex << R"re(\b)re"; - } + regex << "(" << pattern.Body << ")"; - return regex; + if (!pattern.After.empty()) { + regex << "(?=" << pattern.After << ")"; } - NJson::TJsonValue ToHighlightJSPattern(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) { - NJson::TJsonMap json; - json["className"] = ToHighlightJSClass(unit.Kind); - json["begin"] = ToTextMateRegex(unit, pattern); - return json; + if (unit.IsPlain) { + regex << R"re(\b)re"; } - NJson::TJsonValue ToHighlightJSPattern(const TUnit& unit, const TRangePattern& pattern) { - NJson::TJsonMap json; - json["className"] = ToHighlightJSClass(unit.Kind); - json["begin"] = RE2::QuoteMeta(pattern.Begin); - json["end"] = RE2::QuoteMeta(pattern.End); - return json; - } + return regex; +} + +NJson::TJsonValue ToHighlightJSPattern(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) { + NJson::TJsonMap json; + json["className"] = ToHighlightJSClass(unit.Kind); + json["begin"] = ToTextMateRegex(unit, pattern); + return json; +} - NJson::TJsonValue ToHighlightJSContains(const THighlighting& highlighting) { - NJson::TJsonArray array; +NJson::TJsonValue ToHighlightJSPattern(const TUnit& unit, const TRangePattern& pattern) { + NJson::TJsonMap json; + json["className"] = ToHighlightJSClass(unit.Kind); + json["begin"] = RE2::QuoteMeta(pattern.Begin); + json["end"] = RE2::QuoteMeta(pattern.End); + return json; +} - for (const TUnit& unit : highlighting.Units) { - if (unit.IsCodeGenExcluded || unit.Kind == EUnitKind::Identifier) { - continue; - } +NJson::TJsonValue ToHighlightJSContains(const THighlighting& highlighting) { + NJson::TJsonArray array; - for (const NSQLTranslationV1::TRegexPattern& pattern : unit.Patterns) { - array.AppendValue(ToHighlightJSPattern(unit, pattern)); - } - if (auto range = unit.RangePattern) { - array.AppendValue(ToHighlightJSPattern(unit, *range)); - } + for (const TUnit& unit : highlighting.Units) { + if (unit.IsCodeGenExcluded || unit.Kind == EUnitKind::Identifier) { + continue; } - return array; + for (const NSQLTranslationV1::TRegexPattern& pattern : unit.Patterns) { + array.AppendValue(ToHighlightJSPattern(unit, pattern)); + } + if (auto range = unit.RangePattern) { + array.AppendValue(ToHighlightJSPattern(unit, *range)); + } } - NJson::TJsonValue ToHighlightJSON(const THighlighting& highlighting) { - NJson::TJsonMap json; - json["name"] = highlighting.Name; - json["case_insensitive"] = IsCaseInsensitive(highlighting); - json["contains"] = NJson::TJsonArray{{NJson::TJsonMap{ - {"begin", ""}, - {"end", ";"}, - {"endsWithParent", true}, - {"lexemes", R"re(\w+)re"}, - {"contains", ToHighlightJSContains(highlighting)}, - }}}; - return json; - } + return array; +} - void GenerateHighlightJS(IOutputStream& out, const THighlighting& highlighting, bool /* ansi */) { - Print(out, ToHighlightJSON(highlighting)); - } +NJson::TJsonValue ToHighlightJSON(const THighlighting& highlighting) { + NJson::TJsonMap json; + json["name"] = highlighting.Name; + json["case_insensitive"] = IsCaseInsensitive(highlighting); + json["contains"] = NJson::TJsonArray{{NJson::TJsonMap{ + {"begin", ""}, + {"end", ";"}, + {"endsWithParent", true}, + {"lexemes", R"re(\w+)re"}, + {"contains", ToHighlightJSContains(highlighting)}, + }}}; + return json; +} - IGenerator::TPtr MakeHighlightJSGenerator() { - return MakeOnlyFileGenerator(GenerateHighlightJS); - } +void GenerateHighlightJS(IOutputStream& out, const THighlighting& highlighting, bool /* ansi */) { + Print(out, ToHighlightJSON(highlighting)); +} + +IGenerator::TPtr MakeHighlightJSGenerator() { + return MakeOnlyFileGenerator(GenerateHighlightJS); +} } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/generator_highlight_js.h b/yql/essentials/tools/yql_highlight/generator_highlight_js.h index 160a4cf708d..d262a593d52 100644 --- a/yql/essentials/tools/yql_highlight/generator_highlight_js.h +++ b/yql/essentials/tools/yql_highlight/generator_highlight_js.h @@ -4,6 +4,6 @@ namespace NSQLHighlight { - IGenerator::TPtr MakeHighlightJSGenerator(); +IGenerator::TPtr MakeHighlightJSGenerator(); } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/generator_json.cpp b/yql/essentials/tools/yql_highlight/generator_json.cpp index 846d2f85d2d..c77f116183d 100644 --- a/yql/essentials/tools/yql_highlight/generator_json.cpp +++ b/yql/essentials/tools/yql_highlight/generator_json.cpp @@ -6,10 +6,10 @@ namespace NSQLHighlight { - IGenerator::TPtr MakeJsonGenerator() { - return MakeOnlyFileGenerator([](IOutputStream& out, const THighlighting& highlighting, bool /* ansi */) { - Print(out, ToJson(highlighting)); - }); - } +IGenerator::TPtr MakeJsonGenerator() { + return MakeOnlyFileGenerator([](IOutputStream& out, const THighlighting& highlighting, bool /* ansi */) { + Print(out, ToJson(highlighting)); + }); +} } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/generator_json.h b/yql/essentials/tools/yql_highlight/generator_json.h index 24f09041ff5..5fc3b36c413 100644 --- a/yql/essentials/tools/yql_highlight/generator_json.h +++ b/yql/essentials/tools/yql_highlight/generator_json.h @@ -4,6 +4,6 @@ namespace NSQLHighlight { - IGenerator::TPtr MakeJsonGenerator(); +IGenerator::TPtr MakeJsonGenerator(); } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/generator_monarch.cpp b/yql/essentials/tools/yql_highlight/generator_monarch.cpp index 56867072ec6..8ad28e7d15d 100644 --- a/yql/essentials/tools/yql_highlight/generator_monarch.cpp +++ b/yql/essentials/tools/yql_highlight/generator_monarch.cpp @@ -10,215 +10,215 @@ namespace NSQLHighlight { - TString ToMonarchRegex(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) { - TStringBuilder regex; +TString ToMonarchRegex(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) { + TStringBuilder regex; - if (unit.IsPlain && pattern.Before.empty()) { - regex << R"re(\b)re"; - } + if (unit.IsPlain && pattern.Before.empty()) { + regex << R"re(\b)re"; + } - regex << "(" << pattern.Body << ")"; + regex << "(" << pattern.Body << ")"; - if (!pattern.After.empty()) { - regex << "(?=" << pattern.After << ")"; - } - - if (unit.IsPlain && pattern.Before.empty()) { - regex << R"re(\b)re"; - } - - return regex; + if (!pattern.After.empty()) { + regex << "(?=" << pattern.After << ")"; } - TString ToMonarchSelector(EUnitKind kind) { - switch (kind) { - case EUnitKind::Keyword: - return "keyword"; - case EUnitKind::Punctuation: - return "operator.sql"; - case EUnitKind::QuotedIdentifier: - return "string.tablepath"; - case EUnitKind::BindParameterIdentifier: - return "variable"; - case EUnitKind::TypeIdentifier: - return "keyword.type"; - case EUnitKind::FunctionIdentifier: - return "support.function"; - case EUnitKind::Identifier: - return "identifier"; - case EUnitKind::Literal: - return "number"; - case EUnitKind::StringLiteral: - return "string"; - case EUnitKind::Comment: - return "comment"; - case EUnitKind::Whitespace: - return "white"; - case EUnitKind::Error: - return ""; - } + if (unit.IsPlain && pattern.Before.empty()) { + regex << R"re(\b)re"; } - TString ToMonarchStateName(EUnitKind kind) { - switch (kind) { - case EUnitKind::Keyword: - return "keyword"; - case EUnitKind::Punctuation: - return "punctuation"; - case EUnitKind::QuotedIdentifier: - return "quotedIdentifier"; - case EUnitKind::BindParameterIdentifier: - return "bindParameterIdentifier"; - case EUnitKind::TypeIdentifier: - return "typeIdentifier"; - case EUnitKind::FunctionIdentifier: - return "functionIdentifier"; - case EUnitKind::Identifier: - return "identifier"; - case EUnitKind::Literal: - return "literal"; - case EUnitKind::StringLiteral: - return "stringLiteral"; - case EUnitKind::Comment: - return "comment"; - case EUnitKind::Whitespace: - return "whitespace"; - case EUnitKind::Error: - return "error"; - } - } + return regex; +} - NJson::TJsonValue ToMonarchMultiLineState(const TUnit& unit, bool ansi) { - Y_ENSURE(unit.RangePattern); - - TString group = ToMonarchSelector(unit.Kind); - TString begin = RE2::QuoteMeta(unit.RangePattern->Begin); - TString end = RE2::QuoteMeta(unit.RangePattern->End); +TString ToMonarchSelector(EUnitKind kind) { + switch (kind) { + case EUnitKind::Keyword: + return "keyword"; + case EUnitKind::Punctuation: + return "operator.sql"; + case EUnitKind::QuotedIdentifier: + return "string.tablepath"; + case EUnitKind::BindParameterIdentifier: + return "variable"; + case EUnitKind::TypeIdentifier: + return "keyword.type"; + case EUnitKind::FunctionIdentifier: + return "support.function"; + case EUnitKind::Identifier: + return "identifier"; + case EUnitKind::Literal: + return "number"; + case EUnitKind::StringLiteral: + return "string"; + case EUnitKind::Comment: + return "comment"; + case EUnitKind::Whitespace: + return "white"; + case EUnitKind::Error: + return ""; + } +} - NJson::TJsonValue json; +TString ToMonarchStateName(EUnitKind kind) { + switch (kind) { + case EUnitKind::Keyword: + return "keyword"; + case EUnitKind::Punctuation: + return "punctuation"; + case EUnitKind::QuotedIdentifier: + return "quotedIdentifier"; + case EUnitKind::BindParameterIdentifier: + return "bindParameterIdentifier"; + case EUnitKind::TypeIdentifier: + return "typeIdentifier"; + case EUnitKind::FunctionIdentifier: + return "functionIdentifier"; + case EUnitKind::Identifier: + return "identifier"; + case EUnitKind::Literal: + return "literal"; + case EUnitKind::StringLiteral: + return "stringLiteral"; + case EUnitKind::Comment: + return "comment"; + case EUnitKind::Whitespace: + return "whitespace"; + case EUnitKind::Error: + return "error"; + } +} - if (unit.Kind == EUnitKind::StringLiteral) { - json.AppendValue(NJson::TJsonArray{ - "#py", - NJson::TJsonMap{ - {"token", "string.python"}, - {"nextEmbedded", "python"}, - {"next", "@embedded"}, - {"goBack", 3}, - }, - }); - json.AppendValue(NJson::TJsonArray{ - "\\/\\/js", - NJson::TJsonMap{ - {"token", "string.js"}, - {"nextEmbedded", "javascript"}, - {"next", "@embedded"}, - {"goBack", 4}, - }, - }); - json.AppendValue(NJson::TJsonArray{ - "{", - NJson::TJsonMap{ - {"token", "string.json"}, - {"nextEmbedded", "json"}, - {"next", "@embedded"}, - {"goBack", 1}, - }, - }); - } else if (unit.Kind == EUnitKind::Comment && ansi) { - json.AppendValue(NJson::TJsonArray{begin, group, "@" + group}); - } +NJson::TJsonValue ToMonarchMultiLineState(const TUnit& unit, bool ansi) { + Y_ENSURE(unit.RangePattern); - json.AppendValue(NJson::TJsonArray{"[^" + begin + "]", group}); - json.AppendValue(NJson::TJsonArray{end, group, "@pop"}); - json.AppendValue(NJson::TJsonArray{begin, group}); + TString group = ToMonarchSelector(unit.Kind); + TString begin = RE2::QuoteMeta(unit.RangePattern->Begin); + TString end = RE2::QuoteMeta(unit.RangePattern->End); - return json; - } + NJson::TJsonValue json; - NJson::TJsonValue MonarchEmbeddedState() { - return NJson::TJsonArray{{NJson::TJsonArray{ - "([^@]|^)([@]{4})*[@]{2}([@]([^@]|$)|[^@]|$)", + if (unit.Kind == EUnitKind::StringLiteral) { + json.AppendValue(NJson::TJsonArray{ + "#py", NJson::TJsonMap{ - {"token", "@rematch"}, - {"next", "@pop"}, - {"nextEmbedded", "@pop"}, + {"token", "string.python"}, + {"nextEmbedded", "python"}, + {"next", "@embedded"}, + {"goBack", 3}, }, - }}}; + }); + json.AppendValue(NJson::TJsonArray{ + "\\/\\/js", + NJson::TJsonMap{ + {"token", "string.js"}, + {"nextEmbedded", "javascript"}, + {"next", "@embedded"}, + {"goBack", 4}, + }, + }); + json.AppendValue(NJson::TJsonArray{ + "{", + NJson::TJsonMap{ + {"token", "string.json"}, + {"nextEmbedded", "json"}, + {"next", "@embedded"}, + {"goBack", 1}, + }, + }); + } else if (unit.Kind == EUnitKind::Comment && ansi) { + json.AppendValue(NJson::TJsonArray{begin, group, "@" + group}); } - NJson::TJsonValue ToMonarchWhitespaceState(const THighlighting& highlighting) { - NJson::TJsonValue json; + json.AppendValue(NJson::TJsonArray{"[^" + begin + "]", group}); + json.AppendValue(NJson::TJsonArray{end, group, "@pop"}); + json.AppendValue(NJson::TJsonArray{begin, group}); - const TUnit& ws = *FindIfPtr(highlighting.Units, [](const TUnit& unit) { - return unit.Kind == EUnitKind::Whitespace; - }); - Y_ENSURE(ws.Patterns.size() == 1); - json.AppendValue(NJson::TJsonArray{ToMonarchRegex(ws, ws.Patterns.at(0)), "white"}); + return json; +} + +NJson::TJsonValue MonarchEmbeddedState() { + return NJson::TJsonArray{{NJson::TJsonArray{ + "([^@]|^)([@]{4})*[@]{2}([@]([^@]|$)|[^@]|$)", + NJson::TJsonMap{ + {"token", "@rematch"}, + {"next", "@pop"}, + {"nextEmbedded", "@pop"}, + }, + }}}; +} + +NJson::TJsonValue ToMonarchWhitespaceState(const THighlighting& highlighting) { + NJson::TJsonValue json; - ForEachMultiLine(highlighting, [&](const TUnit& unit) { - json.AppendValue(NJson::TJsonArray{ - RE2::QuoteMeta(unit.RangePattern->Begin), - ToMonarchSelector(unit.Kind), - "@" + ToMonarchStateName(unit.Kind), - }); + const TUnit& ws = *FindIfPtr(highlighting.Units, [](const TUnit& unit) { + return unit.Kind == EUnitKind::Whitespace; + }); + Y_ENSURE(ws.Patterns.size() == 1); + json.AppendValue(NJson::TJsonArray{ToMonarchRegex(ws, ws.Patterns.at(0)), "white"}); + + ForEachMultiLine(highlighting, [&](const TUnit& unit) { + json.AppendValue(NJson::TJsonArray{ + RE2::QuoteMeta(unit.RangePattern->Begin), + ToMonarchSelector(unit.Kind), + "@" + ToMonarchStateName(unit.Kind), }); + }); - return json; - } + return json; +} - NJson::TJsonValue ToMonarchRootState(const THighlighting& highlighting, bool ansi) { - NJson::TJsonValue json; - json.AppendValue(NJson::TJsonMap{{"include", "@whitespace"}}); - for (const TUnit& unit : highlighting.Units) { - if (unit.IsCodeGenExcluded) { - continue; - } +NJson::TJsonValue ToMonarchRootState(const THighlighting& highlighting, bool ansi) { + NJson::TJsonValue json; + json.AppendValue(NJson::TJsonMap{{"include", "@whitespace"}}); + for (const TUnit& unit : highlighting.Units) { + if (unit.IsCodeGenExcluded) { + continue; + } - TString group = ToMonarchSelector(unit.Kind); + TString group = ToMonarchSelector(unit.Kind); - const auto* patterns = &unit.Patterns; - if (!unit.PatternsANSI.Empty() && ansi) { - patterns = unit.PatternsANSI.Get(); - } + const auto* patterns = &unit.Patterns; + if (!unit.PatternsANSI.Empty() && ansi) { + patterns = unit.PatternsANSI.Get(); + } - for (const NSQLTranslationV1::TRegexPattern& pattern : *patterns) { - TString regex = ToMonarchRegex(unit, pattern); - json.AppendValue(NJson::TJsonArray{regex, group}); - } + for (const NSQLTranslationV1::TRegexPattern& pattern : *patterns) { + TString regex = ToMonarchRegex(unit, pattern); + json.AppendValue(NJson::TJsonArray{regex, group}); } - return json; } + return json; +} - void GenerateMonarch(IOutputStream& out, const THighlighting& highlighting, bool ansi) { - NJsonWriter::TBuf buf(NJsonWriter::HEM_DONT_ESCAPE_HTML, &out); - buf.SetIndentSpaces(4); +void GenerateMonarch(IOutputStream& out, const THighlighting& highlighting, bool ansi) { + NJsonWriter::TBuf buf(NJsonWriter::HEM_DONT_ESCAPE_HTML, &out); + buf.SetIndentSpaces(4); - const auto write_json = [&](TStringBuf key, const NJson::TJsonValue& json) { - buf.WriteKey(key); - buf.WriteJsonValue(&json); - }; + const auto write_json = [&](TStringBuf key, const NJson::TJsonValue& json) { + buf.WriteKey(key); + buf.WriteJsonValue(&json); + }; - buf.BeginObject(); + buf.BeginObject(); - buf.WriteKey("ignoreCase"); - buf.WriteBool(IsCaseInsensitive(highlighting)); + buf.WriteKey("ignoreCase"); + buf.WriteBool(IsCaseInsensitive(highlighting)); - buf.WriteKey("tokenizer"); - buf.BeginObject(); - write_json("root", ToMonarchRootState(highlighting, ansi)); - write_json("whitespace", ToMonarchWhitespaceState(highlighting)); - ForEachMultiLine(highlighting, [&](const TUnit& unit) { - write_json(ToMonarchStateName(unit.Kind), ToMonarchMultiLineState(unit, ansi)); - }); - write_json("embedded", MonarchEmbeddedState()); - buf.EndObject(); + buf.WriteKey("tokenizer"); + buf.BeginObject(); + write_json("root", ToMonarchRootState(highlighting, ansi)); + write_json("whitespace", ToMonarchWhitespaceState(highlighting)); + ForEachMultiLine(highlighting, [&](const TUnit& unit) { + write_json(ToMonarchStateName(unit.Kind), ToMonarchMultiLineState(unit, ansi)); + }); + write_json("embedded", MonarchEmbeddedState()); + buf.EndObject(); - buf.EndObject(); - } + buf.EndObject(); +} - IGenerator::TPtr MakeMonarchGenerator() { - return MakeOnlyFileGenerator(GenerateMonarch); - } +IGenerator::TPtr MakeMonarchGenerator() { + return MakeOnlyFileGenerator(GenerateMonarch); +} } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/generator_monarch.h b/yql/essentials/tools/yql_highlight/generator_monarch.h index d42841a53b3..ef6a9492e1d 100644 --- a/yql/essentials/tools/yql_highlight/generator_monarch.h +++ b/yql/essentials/tools/yql_highlight/generator_monarch.h @@ -4,6 +4,6 @@ namespace NSQLHighlight { - IGenerator::TPtr MakeMonarchGenerator(); +IGenerator::TPtr MakeMonarchGenerator(); } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/generator_textmate.cpp b/yql/essentials/tools/yql_highlight/generator_textmate.cpp index 8de2a201eb2..3450715ecb6 100644 --- a/yql/essentials/tools/yql_highlight/generator_textmate.cpp +++ b/yql/essentials/tools/yql_highlight/generator_textmate.cpp @@ -14,323 +14,323 @@ namespace NSQLHighlight { - namespace NTextMate { +namespace NTextMate { - using TRegex = TString; +using TRegex = TString; - struct TRange { - TRegex Begin; - TRegex End; - }; - - struct TMatcher { - TString Name; - TString Group; - std::variant<TRegex, TRange> Pattern; - }; +struct TRange { + TRegex Begin; + TRegex End; +}; - struct TLanguage { - TString Name; - TString ScopeName; - TString FileType; - TVector<TMatcher> Matchers; - }; +struct TMatcher { + TString Name; + TString Group; + std::variant<TRegex, TRange> Pattern; +}; - } // namespace NTextMate +struct TLanguage { + TString Name; + TString ScopeName; + TString FileType; + TVector<TMatcher> Matchers; +}; - namespace { +} // namespace NTextMate - NTextMate::TRegex ToTextMateRegex(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) { - TStringBuilder regex; +namespace { - if (pattern.IsCaseInsensitive) { - regex << "(?i)"; - } +NTextMate::TRegex ToTextMateRegex(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) { + TStringBuilder regex; - if (unit.IsPlain) { - regex << R"re(\b)re"; - } + if (pattern.IsCaseInsensitive) { + regex << "(?i)"; + } - if (!pattern.Before.empty()) { - regex << "(?<=" << pattern.Before << ")"; - } + if (unit.IsPlain) { + regex << R"re(\b)re"; + } - regex << "(" << pattern.Body << ")"; + if (!pattern.Before.empty()) { + regex << "(?<=" << pattern.Before << ")"; + } - if (!pattern.After.empty()) { - regex << "(?=" << pattern.After << ")"; - } + regex << "(" << pattern.Body << ")"; - if (unit.IsPlain) { - regex << R"re(\b)re"; - } + if (!pattern.After.empty()) { + regex << "(?=" << pattern.After << ")"; + } - return regex; - } + if (unit.IsPlain) { + regex << R"re(\b)re"; + } - TString ToTextMateGroup(EUnitKind kind) { - switch (kind) { - case EUnitKind::Keyword: - return "keyword.control"; - case EUnitKind::Punctuation: - return "keyword.operator"; - case EUnitKind::QuotedIdentifier: - return "string.interpolated"; - case EUnitKind::BindParameterIdentifier: - return "variable.parameter"; - case EUnitKind::TypeIdentifier: - return "entity.name.type"; - case EUnitKind::FunctionIdentifier: - return "entity.name.function"; - case EUnitKind::Identifier: - return "variable.other"; - case EUnitKind::Literal: - return "constant.numeric"; - case EUnitKind::StringLiteral: - return "string.quoted.double"; - case EUnitKind::Comment: - return "comment.block"; - case EUnitKind::Whitespace: - return ""; - case EUnitKind::Error: - return ""; - } - } + return regex; +} - TString ToTextMateName(EUnitKind kind) { - return ToString(kind); - } +TString ToTextMateGroup(EUnitKind kind) { + switch (kind) { + case EUnitKind::Keyword: + return "keyword.control"; + case EUnitKind::Punctuation: + return "keyword.operator"; + case EUnitKind::QuotedIdentifier: + return "string.interpolated"; + case EUnitKind::BindParameterIdentifier: + return "variable.parameter"; + case EUnitKind::TypeIdentifier: + return "entity.name.type"; + case EUnitKind::FunctionIdentifier: + return "entity.name.function"; + case EUnitKind::Identifier: + return "variable.other"; + case EUnitKind::Literal: + return "constant.numeric"; + case EUnitKind::StringLiteral: + return "string.quoted.double"; + case EUnitKind::Comment: + return "comment.block"; + case EUnitKind::Whitespace: + return ""; + case EUnitKind::Error: + return ""; + } +} - TMaybe<NTextMate::TMatcher> TextMateMultilinePattern(const TUnit& unit) { - auto range = unit.RangePattern; - if (!range) { - return Nothing(); - } +TString ToTextMateName(EUnitKind kind) { + return ToString(kind); +} - return NTextMate::TMatcher{ - .Name = ToTextMateName(unit.Kind), - .Group = ToTextMateGroup(unit.Kind), - .Pattern = NTextMate::TRange{ - .Begin = RE2::QuoteMeta(range->Begin), - .End = RE2::QuoteMeta(range->End), - }, - }; - } +TMaybe<NTextMate::TMatcher> TextMateMultilinePattern(const TUnit& unit) { + auto range = unit.RangePattern; + if (!range) { + return Nothing(); + } - NTextMate::TMatcher ToTextMatePattern(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) { - return NTextMate::TMatcher{ - .Name = ToTextMateName(unit.Kind), - .Group = ToTextMateGroup(unit.Kind), - .Pattern = ToTextMateRegex(unit, pattern), - }; - } + return NTextMate::TMatcher{ + .Name = ToTextMateName(unit.Kind), + .Group = ToTextMateGroup(unit.Kind), + .Pattern = NTextMate::TRange{ + .Begin = RE2::QuoteMeta(range->Begin), + .End = RE2::QuoteMeta(range->End), + }, + }; +} - } // namespace +NTextMate::TMatcher ToTextMatePattern(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) { + return NTextMate::TMatcher{ + .Name = ToTextMateName(unit.Kind), + .Group = ToTextMateGroup(unit.Kind), + .Pattern = ToTextMateRegex(unit, pattern), + }; +} - NTextMate::TLanguage ToTextMateLanguage(const THighlighting& highlighting) { - NTextMate::TLanguage language = { - .Name = highlighting.Name, - .ScopeName = "source." + highlighting.Extension, - .FileType = highlighting.Extension, - }; +} // namespace - for (const TUnit& unit : highlighting.Units) { - if (unit.IsCodeGenExcluded) { - continue; - } +NTextMate::TLanguage ToTextMateLanguage(const THighlighting& highlighting) { + NTextMate::TLanguage language = { + .Name = highlighting.Name, + .ScopeName = "source." + highlighting.Extension, + .FileType = highlighting.Extension, + }; - for (const NSQLTranslationV1::TRegexPattern& pattern : unit.Patterns) { - language.Matchers.emplace_back(ToTextMatePattern(unit, pattern)); - } - if (auto textmate = TextMateMultilinePattern(unit)) { - language.Matchers.emplace_back(*textmate); - } + for (const TUnit& unit : highlighting.Units) { + if (unit.IsCodeGenExcluded) { + continue; } - return language; - } - - NJson::TJsonValue ToJson(const NTextMate::TMatcher& matcher) { - NJson::TJsonMap json = {{"name", matcher.Group}}; - std::visit([&](const auto& pattern) { - using T = std::decay_t<decltype(pattern)>; - - if constexpr (std::is_same_v<T, NTextMate::TRegex>) { - json["match"] = pattern; - } else if constexpr (std::is_same_v<T, NTextMate::TRange>) { - json["begin"] = pattern.Begin; - json["end"] = pattern.End; - } else { - static_assert(false); - } - }, matcher.Pattern); - return json; + for (const NSQLTranslationV1::TRegexPattern& pattern : unit.Patterns) { + language.Matchers.emplace_back(ToTextMatePattern(unit, pattern)); + } + if (auto textmate = TextMateMultilinePattern(unit)) { + language.Matchers.emplace_back(*textmate); + } } - NJson::TJsonValue ToJson(const NTextMate::TLanguage& language) { - NJson::TJsonMap root; - root["$schema"] = "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json"; - root["name"] = language.FileType; - root["scopeName"] = language.ScopeName; - root["scope"] = language.ScopeName; - root["fileTypes"] = NJson::TJsonArray({language.FileType}); + return language; +} - root["patterns"].AppendValue(NJson::TJsonMap({ - {"begin", "@@#py"}, - {"end", "@@"}, - {"patterns", NJson::TJsonArray({NJson::TJsonMap{{"include", "source.python"}}})}, - })); +NJson::TJsonValue ToJson(const NTextMate::TMatcher& matcher) { + NJson::TJsonMap json = {{"name", matcher.Group}}; + std::visit([&](const auto& pattern) { + using T = std::decay_t<decltype(pattern)>; - root["patterns"].AppendValue(NJson::TJsonMap({ - {"begin", "@@//js"}, - {"end", "@@"}, - {"patterns", NJson::TJsonArray({NJson::TJsonMap{{"include", "source.js"}}})}, - })); + if constexpr (std::is_same_v<T, NTextMate::TRegex>) { + json["match"] = pattern; + } else if constexpr (std::is_same_v<T, NTextMate::TRange>) { + json["begin"] = pattern.Begin; + json["end"] = pattern.End; + } else { + static_assert(false); + } + }, matcher.Pattern); + return json; +} - root["patterns"].AppendValue(NJson::TJsonMap({ - {"begin", "@@{"}, - {"end", "@@"}, - {"patterns", NJson::TJsonArray({NJson::TJsonMap{{"include", "source.json"}}})}, - })); +NJson::TJsonValue ToJson(const NTextMate::TLanguage& language) { + NJson::TJsonMap root; + root["$schema"] = "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json"; + root["name"] = language.FileType; + root["scopeName"] = language.ScopeName; + root["scope"] = language.ScopeName; + root["fileTypes"] = NJson::TJsonArray({language.FileType}); - THashSet<TString> visited; - for (const NTextMate::TMatcher& matcher : language.Matchers) { - root["repository"][matcher.Name]["patterns"].AppendValue(ToJson(matcher)); + root["patterns"].AppendValue(NJson::TJsonMap({ + {"begin", "@@#py"}, + {"end", "@@"}, + {"patterns", NJson::TJsonArray({NJson::TJsonMap{{"include", "source.python"}}})}, + })); - if (!visited.contains(matcher.Name)) { - root["patterns"].AppendValue(NJson::TJsonMap({{"include", "#" + matcher.Name}})); - visited.emplace(matcher.Name); - } - } + root["patterns"].AppendValue(NJson::TJsonMap({ + {"begin", "@@//js"}, + {"end", "@@"}, + {"patterns", NJson::TJsonArray({NJson::TJsonMap{{"include", "source.js"}}})}, + })); - return root; - } + root["patterns"].AppendValue(NJson::TJsonMap({ + {"begin", "@@{"}, + {"end", "@@"}, + {"patterns", NJson::TJsonArray({NJson::TJsonMap{{"include", "source.json"}}})}, + })); - TString EscapeXML(TString string) { - SubstGlobal(string, "<", "<"); - SubstGlobal(string, ">", ">"); - return string; - } - - void WriteXML(IOutputStream& out, const NJson::TJsonValue& json, TString indent = "") { - static constexpr TStringBuf extra = " "; + THashSet<TString> visited; + for (const NTextMate::TMatcher& matcher : language.Matchers) { + root["repository"][matcher.Name]["patterns"].AppendValue(ToJson(matcher)); - if (TString string; json.GetString(&string)) { - out << indent << "<string>" << EscapeXML(string) << "</string>" << "\n"; - } else if (NJson::TJsonValue::TMapType dict; json.GetMap(&dict)) { - out << indent << "<dict>" << '\n'; - for (const auto& [key, value] : dict) { - out << indent << extra << "<key>" << EscapeXML(key) << "</key>" << '\n'; - WriteXML(out, value, indent + extra); - } - out << indent << "</dict>" << '\n'; - } else if (NJson::TJsonValue::TArray array; json.GetArray(&array)) { - out << indent << "<array>" << '\n'; - for (const auto& value : array) { - WriteXML(out, value, indent + extra); - } - out << indent << "</array>" << '\n'; - } else { - TStringStream str; - Print(str, json); - ythrow yexception() << "Unexpected JSON '" + str.Str() + "'"; + if (!visited.contains(matcher.Name)) { + root["patterns"].AppendValue(NJson::TJsonMap({{"include", "#" + matcher.Name}})); + visited.emplace(matcher.Name); } } - void GenerateTextMateJson(IOutputStream& out, const THighlighting& highlighting, bool /* ansi */) { - Print(out, ToJson(ToTextMateLanguage(highlighting))); - } + return root; +} - static const THashMap<TString, TString> UUID = { - {"InfoYQL", "059de4a7-ff49-4dbd-8a9d-a8114b77c4b9"}, - {"SyntaxYQL", "bb7a80e5-733c-4ea6-9654-40db0675950c"}, - {"InfoYQLs", "7f536d44-2667-430e-b145-540992400cb3"}, - {"SyntaxYQLs", "6e62e13a-487b-4333-bbb2-9453d0783f8f"}, - }; +TString EscapeXML(TString string) { + SubstGlobal(string, "<", "<"); + SubstGlobal(string, ">", ">"); + return string; +} + +void WriteXML(IOutputStream& out, const NJson::TJsonValue& json, TString indent = "") { + static constexpr TStringBuf extra = " "; - class TTextMateBundleGenerator: public IGenerator { - private: - template <class TWriter> - void Write( - NTar::TArchiveWriter& acrhive, - TStringBuf path, - TWriter writer, - const NTextMate::TLanguage& langugage) - { - TStringStream stream; - writer(stream, langugage); - TBlob blob = TBlob::FromString(stream.Str()); - acrhive.WriteFile(TString(path), blob); + if (TString string; json.GetString(&string)) { + out << indent << "<string>" << EscapeXML(string) << "</string>" << "\n"; + } else if (NJson::TJsonValue::TMapType dict; json.GetMap(&dict)) { + out << indent << "<dict>" << '\n'; + for (const auto& [key, value] : dict) { + out << indent << extra << "<key>" << EscapeXML(key) << "</key>" << '\n'; + WriteXML(out, value, indent + extra); } + out << indent << "</dict>" << '\n'; + } else if (NJson::TJsonValue::TArray array; json.GetArray(&array)) { + out << indent << "<array>" << '\n'; + for (const auto& value : array) { + WriteXML(out, value, indent + extra); + } + out << indent << "</array>" << '\n'; + } else { + TStringStream str; + Print(str, json); + ythrow yexception() << "Unexpected JSON '" + str.Str() + "'"; + } +} - public: - void Write(IOutputStream& out, const THighlighting& highlighting, bool /* ansi */) final { - const auto [bundle, info, syntax] = Paths(highlighting); +void GenerateTextMateJson(IOutputStream& out, const THighlighting& highlighting, bool /* ansi */) { + Print(out, ToJson(ToTextMateLanguage(highlighting))); +} - out << "File " << bundle << "/" << info << ":" << '\n'; - WriteInfo(out, ToTextMateLanguage(highlighting)); - out << "File " << bundle << "/" << syntax << ":" << '\n'; - WriteSyntax(out, ToTextMateLanguage(highlighting)); - } +static const THashMap<TString, TString> UUID = { + {"InfoYQL", "059de4a7-ff49-4dbd-8a9d-a8114b77c4b9"}, + {"SyntaxYQL", "bb7a80e5-733c-4ea6-9654-40db0675950c"}, + {"InfoYQLs", "7f536d44-2667-430e-b145-540992400cb3"}, + {"SyntaxYQLs", "6e62e13a-487b-4333-bbb2-9453d0783f8f"}, +}; - void Write(const TFsPath& path, const THighlighting& highlighting, bool /* ansi */) final { - const auto [bundle, info, syntax] = Paths(highlighting); +class TTextMateBundleGenerator: public IGenerator { +private: + template <class TWriter> + void Write( + NTar::TArchiveWriter& acrhive, + TStringBuf path, + TWriter writer, + const NTextMate::TLanguage& langugage) + { + TStringStream stream; + writer(stream, langugage); + TBlob blob = TBlob::FromString(stream.Str()); + acrhive.WriteFile(TString(path), blob); + } - if (TString name = path.GetName(); !name.StartsWith(bundle)) { - ythrow yexception() - << "Invalid path '" << name - << "', expected '" << bundle << "' " - << "as an archive name"; - } +public: + void Write(IOutputStream& out, const THighlighting& highlighting, bool /* ansi */) final { + const auto [bundle, info, syntax] = Paths(highlighting); - NTextMate::TLanguage language = ToTextMateLanguage(highlighting); + out << "File " << bundle << "/" << info << ":" << '\n'; + WriteInfo(out, ToTextMateLanguage(highlighting)); + out << "File " << bundle << "/" << syntax << ":" << '\n'; + WriteSyntax(out, ToTextMateLanguage(highlighting)); + } - NTar::TArchiveWriter archive(path); - Write(archive, info, WriteInfo, language); - Write(archive, syntax, WriteSyntax, language); - } + void Write(const TFsPath& path, const THighlighting& highlighting, bool /* ansi */) final { + const auto [bundle, info, syntax] = Paths(highlighting); - private: - static std::tuple<TString, TString, TString> Paths(const THighlighting& h) { - return { - TStringBuilder() << h.Name << ".tmbundle", - TStringBuilder() << "info.plist", - TStringBuilder() << "Syntaxes/" << h.Name << ".tmLanguage", - }; + if (TString name = path.GetName(); !name.StartsWith(bundle)) { + ythrow yexception() + << "Invalid path '" << name + << "', expected '" << bundle << "' " + << "as an archive name"; } - static void WriteInfo(IOutputStream& out, const NTextMate::TLanguage& language) { - out << R"(<?xml version="1.0" encoding="UTF-8"?>)" << '\n'; - out << R"(<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">)" << '\n'; - out << R"(<plist version="1.0">)" << '\n'; - out << R"(<dict>)" << '\n'; - out << R"( <key>name</key>)" << '\n'; - out << R"( <string>)" << language.Name << R"(</string>)" << '\n'; - out << R"( <key>uuid</key>)" << '\n'; - out << R"( <string>)" << UUID.at("Info" + language.Name) << R"(</string>)" << '\n'; - out << R"(</dict>)" << '\n'; - out << R"(</plist>)" << '\n'; - } + NTextMate::TLanguage language = ToTextMateLanguage(highlighting); - static void WriteSyntax(IOutputStream& out, const NTextMate::TLanguage& language) { - NJson::TJsonValue json = ToJson(language); - json.EraseValue("$schema"); - json["uuid"] = UUID.at("Syntax" + language.Name); + NTar::TArchiveWriter archive(path); + Write(archive, info, WriteInfo, language); + Write(archive, syntax, WriteSyntax, language); + } - out << R"(<?xml version="1.0" encoding="UTF-8"?>)" << '\n'; - out << R"(<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">)" << '\n'; - out << R"(<plist version="1.0">)" << '\n'; - WriteXML(out, json); - out << R"(</plist>)" << '\n'; - } - }; +private: + static std::tuple<TString, TString, TString> Paths(const THighlighting& h) { + return { + TStringBuilder() << h.Name << ".tmbundle", + TStringBuilder() << "info.plist", + TStringBuilder() << "Syntaxes/" << h.Name << ".tmLanguage", + }; + } - IGenerator::TPtr MakeTextMateJsonGenerator() { - return MakeOnlyFileGenerator(GenerateTextMateJson); + static void WriteInfo(IOutputStream& out, const NTextMate::TLanguage& language) { + out << R"(<?xml version="1.0" encoding="UTF-8"?>)" << '\n'; + out << R"(<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">)" << '\n'; + out << R"(<plist version="1.0">)" << '\n'; + out << R"(<dict>)" << '\n'; + out << R"( <key>name</key>)" << '\n'; + out << R"( <string>)" << language.Name << R"(</string>)" << '\n'; + out << R"( <key>uuid</key>)" << '\n'; + out << R"( <string>)" << UUID.at("Info" + language.Name) << R"(</string>)" << '\n'; + out << R"(</dict>)" << '\n'; + out << R"(</plist>)" << '\n'; } - IGenerator::TPtr MakeTextMateBundleGenerator() { - return new TTextMateBundleGenerator(); + static void WriteSyntax(IOutputStream& out, const NTextMate::TLanguage& language) { + NJson::TJsonValue json = ToJson(language); + json.EraseValue("$schema"); + json["uuid"] = UUID.at("Syntax" + language.Name); + + out << R"(<?xml version="1.0" encoding="UTF-8"?>)" << '\n'; + out << R"(<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">)" << '\n'; + out << R"(<plist version="1.0">)" << '\n'; + WriteXML(out, json); + out << R"(</plist>)" << '\n'; } +}; + +IGenerator::TPtr MakeTextMateJsonGenerator() { + return MakeOnlyFileGenerator(GenerateTextMateJson); +} + +IGenerator::TPtr MakeTextMateBundleGenerator() { + return new TTextMateBundleGenerator(); +} } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/generator_textmate.h b/yql/essentials/tools/yql_highlight/generator_textmate.h index d1ffbac87a2..87e4dad947f 100644 --- a/yql/essentials/tools/yql_highlight/generator_textmate.h +++ b/yql/essentials/tools/yql_highlight/generator_textmate.h @@ -4,8 +4,8 @@ namespace NSQLHighlight { - IGenerator::TPtr MakeTextMateJsonGenerator(); +IGenerator::TPtr MakeTextMateJsonGenerator(); - IGenerator::TPtr MakeTextMateBundleGenerator(); +IGenerator::TPtr MakeTextMateBundleGenerator(); } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/generator_vim.cpp b/yql/essentials/tools/yql_highlight/generator_vim.cpp index a709e4cba90..4029844dc0d 100644 --- a/yql/essentials/tools/yql_highlight/generator_vim.cpp +++ b/yql/essentials/tools/yql_highlight/generator_vim.cpp @@ -10,172 +10,172 @@ namespace NSQLHighlight { - namespace { +namespace { - TString ToVim(TString regex) { - static RE2 LikelyUnquotedLParen(R"((^|[^\\])(\())"); - static RE2 LikelyNonGreedyMatch(R"re((^|[^\\])(\*\?))re"); +TString ToVim(TString regex) { + static RE2 LikelyUnquotedLParen(R"((^|[^\\])(\())"); + static RE2 LikelyNonGreedyMatch(R"re((^|[^\\])(\*\?))re"); - // We can leave some capturing groups in case `\\\\(`, - // but it is okay as the goal is to meet the Vim limit. + // We can leave some capturing groups in case `\\\\(`, + // but it is okay as the goal is to meet the Vim limit. - YQL_ENSURE(!regex.Contains(R"(\\*?)"), "" << regex); + YQL_ENSURE(!regex.Contains(R"(\\*?)"), "" << regex); - RE2::GlobalReplace(®ex, LikelyUnquotedLParen, R"(\1%()"); - RE2::GlobalReplace(®ex, LikelyNonGreedyMatch, R"re(\1{-})re"); + RE2::GlobalReplace(®ex, LikelyUnquotedLParen, R"(\1%()"); + RE2::GlobalReplace(®ex, LikelyNonGreedyMatch, R"re(\1{-})re"); - return regex; - } - - TString ToVim(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) { - TStringBuilder vim; + return regex; +} - vim << R"(")"; - vim << R"(\v)"; +TString ToVim(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) { + TStringBuilder vim; - if (unit.IsPlain) { - vim << R"(<)"; - } + vim << R"(")"; + vim << R"(\v)"; - if (pattern.IsCaseInsensitive) { - vim << R"(\c)"; - } + if (unit.IsPlain) { + vim << R"(<)"; + } - if (!pattern.Before.empty()) { - vim << "(" << ToVim(pattern.Before) << ")@<="; - } + if (pattern.IsCaseInsensitive) { + vim << R"(\c)"; + } - vim << "(" << ToVim(pattern.Body) << ")"; + if (!pattern.Before.empty()) { + vim << "(" << ToVim(pattern.Before) << ")@<="; + } - if (!pattern.After.empty()) { - vim << "(" << ToVim(pattern.After) << ")@="; - } + vim << "(" << ToVim(pattern.Body) << ")"; - if (unit.IsPlain) { - vim << R"(>)"; - } + if (!pattern.After.empty()) { + vim << "(" << ToVim(pattern.After) << ")@="; + } - vim << R"(")"; + if (unit.IsPlain) { + vim << R"(>)"; + } - // Prevent a range pattern conflict - if (unit.RangePattern) { - SubstGlobal(vim, "|\\n", ""); - } + vim << R"(")"; - return vim; - } + // Prevent a range pattern conflict + if (unit.RangePattern) { + SubstGlobal(vim, "|\\n", ""); + } - TString ToVimName(EUnitKind kind) { - switch (kind) { - case EUnitKind::Keyword: - return "yqlKeyword"; - case EUnitKind::Punctuation: - return "yqlPunctuation"; - case EUnitKind::QuotedIdentifier: - return "yqlQuotedIdentifier"; - case EUnitKind::BindParameterIdentifier: - return "yqlBindParameterIdentifier"; - case EUnitKind::TypeIdentifier: - return "yqlTypeIdentifier"; - case EUnitKind::FunctionIdentifier: - return "yqlFunctionIdentifier"; - case EUnitKind::Identifier: - return "yqlIdentifier"; - case EUnitKind::Literal: - return "yqlLiteral"; - case EUnitKind::StringLiteral: - return "yqlStringLiteral"; - case EUnitKind::Comment: - return "yqlComment"; - case EUnitKind::Whitespace: - return "yqlWhitespace"; - case EUnitKind::Error: - return "yqlError"; - } - } + return vim; +} - TString VimRangeEscaped(TString range) { - SubstGlobal(range, "*", "\\*"); - return range; - } +TString ToVimName(EUnitKind kind) { + switch (kind) { + case EUnitKind::Keyword: + return "yqlKeyword"; + case EUnitKind::Punctuation: + return "yqlPunctuation"; + case EUnitKind::QuotedIdentifier: + return "yqlQuotedIdentifier"; + case EUnitKind::BindParameterIdentifier: + return "yqlBindParameterIdentifier"; + case EUnitKind::TypeIdentifier: + return "yqlTypeIdentifier"; + case EUnitKind::FunctionIdentifier: + return "yqlFunctionIdentifier"; + case EUnitKind::Identifier: + return "yqlIdentifier"; + case EUnitKind::Literal: + return "yqlLiteral"; + case EUnitKind::StringLiteral: + return "yqlStringLiteral"; + case EUnitKind::Comment: + return "yqlComment"; + case EUnitKind::Whitespace: + return "yqlWhitespace"; + case EUnitKind::Error: + return "yqlError"; + } +} - void PrintRules(IOutputStream& out, const TUnit& unit) { - TString name = ToVimName(unit.Kind); - for (const auto& pattern : std::ranges::reverse_view(unit.Patterns)) { - out << "syn match " << ToVimName(unit.Kind) << " " - << ToVim(unit, pattern) << '\n'; - } - if (auto range = unit.RangePattern) { - out << "syntax region " << name << "Multiline" << " " - << "start=\"" << VimRangeEscaped(range->Begin) << "\" " - << "end=\"" << VimRangeEscaped(range->End) << "\"" - << '\n'; - } - } +TString VimRangeEscaped(TString range) { + SubstGlobal(range, "*", "\\*"); + return range; +} - TVector<TStringBuf> ToVimGroups(EUnitKind kind) { - switch (kind) { - case EUnitKind::Keyword: - return {"Keyword"}; - case EUnitKind::Punctuation: - return {"Operator"}; - case EUnitKind::QuotedIdentifier: - return {"Special", "Underlined"}; - case EUnitKind::BindParameterIdentifier: - return {"Define"}; - case EUnitKind::TypeIdentifier: - return {"Type"}; - case EUnitKind::FunctionIdentifier: - return {"Function"}; - case EUnitKind::Identifier: - return {"Identifier"}; - case EUnitKind::Literal: - return {"Number"}; - case EUnitKind::StringLiteral: - return {"String"}; - case EUnitKind::Comment: - return {"Comment"}; - case EUnitKind::Whitespace: - return {}; - case EUnitKind::Error: - return {}; - } - } +void PrintRules(IOutputStream& out, const TUnit& unit) { + TString name = ToVimName(unit.Kind); + for (const auto& pattern : std::ranges::reverse_view(unit.Patterns)) { + out << "syn match " << ToVimName(unit.Kind) << " " + << ToVim(unit, pattern) << '\n'; + } + if (auto range = unit.RangePattern) { + out << "syntax region " << name << "Multiline" << " " + << "start=\"" << VimRangeEscaped(range->Begin) << "\" " + << "end=\"" << VimRangeEscaped(range->End) << "\"" + << '\n'; + } +} - } // namespace +TVector<TStringBuf> ToVimGroups(EUnitKind kind) { + switch (kind) { + case EUnitKind::Keyword: + return {"Keyword"}; + case EUnitKind::Punctuation: + return {"Operator"}; + case EUnitKind::QuotedIdentifier: + return {"Special", "Underlined"}; + case EUnitKind::BindParameterIdentifier: + return {"Define"}; + case EUnitKind::TypeIdentifier: + return {"Type"}; + case EUnitKind::FunctionIdentifier: + return {"Function"}; + case EUnitKind::Identifier: + return {"Identifier"}; + case EUnitKind::Literal: + return {"Number"}; + case EUnitKind::StringLiteral: + return {"String"}; + case EUnitKind::Comment: + return {"Comment"}; + case EUnitKind::Whitespace: + return {}; + case EUnitKind::Error: + return {}; + } +} - void GenerateVim(IOutputStream& out, const THighlighting& highlighting, bool /* ansi */) { - out << "if exists(\"b:current_syntax\")" << '\n'; - out << " finish" << '\n'; - out << "endif" << '\n'; - out << '\n'; +} // namespace - for (const TUnit& unit : std::ranges::reverse_view(highlighting.Units)) { - if (unit.IsCodeGenExcluded) { - continue; - } +void GenerateVim(IOutputStream& out, const THighlighting& highlighting, bool /* ansi */) { + out << "if exists(\"b:current_syntax\")" << '\n'; + out << " finish" << '\n'; + out << "endif" << '\n'; + out << '\n'; - PrintRules(out, unit); + for (const TUnit& unit : std::ranges::reverse_view(highlighting.Units)) { + if (unit.IsCodeGenExcluded) { + continue; } - out << '\n'; + PrintRules(out, unit); + } + + out << '\n'; - for (const TUnit& unit : std::ranges::reverse_view(highlighting.Units)) { - TString name = ToVimName(unit.Kind); - for (TStringBuf group : ToVimGroups(unit.Kind)) { - out << "highlight default link " << name << "Multiline" << " " << group << '\n'; - out << "highlight default link " << name << " " << group << '\n'; - } + for (const TUnit& unit : std::ranges::reverse_view(highlighting.Units)) { + TString name = ToVimName(unit.Kind); + for (TStringBuf group : ToVimGroups(unit.Kind)) { + out << "highlight default link " << name << "Multiline" << " " << group << '\n'; + out << "highlight default link " << name << " " << group << '\n'; } + } - out << '\n'; + out << '\n'; - out << "let b:current_syntax = \"" << highlighting.Extension << "\"" << '\n'; - out.Flush(); - } + out << "let b:current_syntax = \"" << highlighting.Extension << "\"" << '\n'; + out.Flush(); +} - IGenerator::TPtr MakeVimGenerator() { - return MakeOnlyFileGenerator(GenerateVim); - } +IGenerator::TPtr MakeVimGenerator() { + return MakeOnlyFileGenerator(GenerateVim); +} } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/generator_vim.h b/yql/essentials/tools/yql_highlight/generator_vim.h index 1e3927f8417..560e67f31d4 100644 --- a/yql/essentials/tools/yql_highlight/generator_vim.h +++ b/yql/essentials/tools/yql_highlight/generator_vim.h @@ -4,6 +4,6 @@ namespace NSQLHighlight { - IGenerator::TPtr MakeVimGenerator(); +IGenerator::TPtr MakeVimGenerator(); } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/highlighting.cpp b/yql/essentials/tools/yql_highlight/highlighting.cpp index 238eb1f32b3..d38eb96bfae 100644 --- a/yql/essentials/tools/yql_highlight/highlighting.cpp +++ b/yql/essentials/tools/yql_highlight/highlighting.cpp @@ -2,12 +2,12 @@ namespace NSQLHighlight { - bool IsCaseInsensitive(const THighlighting& highlighting) { - return AnyOf(highlighting.Units, [](const TUnit& unit) { - return AnyOf(unit.Patterns, [](const NSQLTranslationV1::TRegexPattern& p) { - return p.IsCaseInsensitive; - }); +bool IsCaseInsensitive(const THighlighting& highlighting) { + return AnyOf(highlighting.Units, [](const TUnit& unit) { + return AnyOf(unit.Patterns, [](const NSQLTranslationV1::TRegexPattern& p) { + return p.IsCaseInsensitive; }); - } + }); +} } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/highlighting.h b/yql/essentials/tools/yql_highlight/highlighting.h index 6cb4e0dd691..66b5bdc7721 100644 --- a/yql/essentials/tools/yql_highlight/highlighting.h +++ b/yql/essentials/tools/yql_highlight/highlighting.h @@ -4,18 +4,18 @@ namespace NSQLHighlight { - bool IsCaseInsensitive(const THighlighting& highlighting); +bool IsCaseInsensitive(const THighlighting& highlighting); - template <std::invocable<const TUnit&> Action> - void ForEachMultiLine(const THighlighting& highlighting, Action action) { - for (const TUnit& unit : highlighting.Units) { - TMaybe<TRangePattern> range = unit.RangePattern; - if (!range) { - continue; - } - - action(unit); +template <std::invocable<const TUnit&> Action> +void ForEachMultiLine(const THighlighting& highlighting, Action action) { + for (const TUnit& unit : highlighting.Units) { + TMaybe<TRangePattern> range = unit.RangePattern; + if (!range) { + continue; } + + action(unit); } +} } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/json.cpp b/yql/essentials/tools/yql_highlight/json.cpp index 114f53b9668..2c0c0167292 100644 --- a/yql/essentials/tools/yql_highlight/json.cpp +++ b/yql/essentials/tools/yql_highlight/json.cpp @@ -9,14 +9,14 @@ namespace NSQLHighlight { - void Print(IOutputStream& out, const NJson::TJsonValue& json) { - NJson::TJsonWriterConfig config = { - .SortKeys = true, - }; +void Print(IOutputStream& out, const NJson::TJsonValue& json) { + NJson::TJsonWriterConfig config = { + .SortKeys = true, + }; - TStringStream output; - NJson::WriteJson(&output, &json, config); - YQL_ENSURE(NJson::PrettifyJson(output.Str(), out)); - } + TStringStream output; + NJson::WriteJson(&output, &json, config); + YQL_ENSURE(NJson::PrettifyJson(output.Str(), out)); +} } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/json.h b/yql/essentials/tools/yql_highlight/json.h index 08dd43173fb..748459cc5a5 100644 --- a/yql/essentials/tools/yql_highlight/json.h +++ b/yql/essentials/tools/yql_highlight/json.h @@ -4,6 +4,6 @@ namespace NSQLHighlight { - void Print(IOutputStream& out, const NJson::TJsonValue& json); +void Print(IOutputStream& out, const NJson::TJsonValue& json); } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/ya.make b/yql/essentials/tools/yql_highlight/ya.make index 15a0fd5ab01..4f50a969445 100644 --- a/yql/essentials/tools/yql_highlight/ya.make +++ b/yql/essentials/tools/yql_highlight/ya.make @@ -1,6 +1,8 @@ IF (NOT EXPORT_CMAKE OR NOT OPENSOURCE OR OPENSOURCE_PROJECT != "yt") PROGRAM() + ENABLE(YQL_STYLE_CPP) + PEERDIR( library/cpp/getopt library/cpp/json diff --git a/yql/essentials/tools/yql_highlight/yqls_highlight.cpp b/yql/essentials/tools/yql_highlight/yqls_highlight.cpp index 2894b3f9529..1707405c2ec 100644 --- a/yql/essentials/tools/yql_highlight/yqls_highlight.cpp +++ b/yql/essentials/tools/yql_highlight/yqls_highlight.cpp @@ -2,97 +2,97 @@ namespace NSQLHighlight { - using TRe = NSQLTranslationV1::TRegexPattern; - using NSQLTranslationV1::Merged; +using TRe = NSQLTranslationV1::TRegexPattern; +using NSQLTranslationV1::Merged; - THighlighting MakeYQLsHighlighting() { - TString id = R"re([A-Za-z_\-0-9]+)re"; - TString lower = R"re([a-z_0-9])re" + SubstGlobalCopy(id, '+', '*'); - TString title = R"re([A-Z])re" + SubstGlobalCopy(id, '+', '*'); +THighlighting MakeYQLsHighlighting() { + TString id = R"re([A-Za-z_\-0-9]+)re"; + TString lower = R"re([a-z_0-9])re" + SubstGlobalCopy(id, '+', '*'); + TString title = R"re([A-Z])re" + SubstGlobalCopy(id, '+', '*'); - TRe keywords = Merged({ - {"let"}, - {"return"}, - {"quote"}, - {"block"}, - {"lambda"}, - {"declare"}, - {"import"}, - {"export"}, - {"library"}, - {"override_library"}, - {"package"}, - {"set_package_version"}, - }); - keywords.Before = R"re(\()re"; + TRe keywords = Merged({ + {"let"}, + {"return"}, + {"quote"}, + {"block"}, + {"lambda"}, + {"declare"}, + {"import"}, + {"export"}, + {"library"}, + {"override_library"}, + {"package"}, + {"set_package_version"}, + }); + keywords.Before = R"re(\()re"; - return { - .Name = "YQLs", - .Extension = "yqls", - .Units = { - TUnit{ - .Kind = EUnitKind::Comment, - .Patterns = {TRe{R"re(#.*)re"}}, - .IsPlain = false, - }, - TUnit{ - .Kind = EUnitKind::Keyword, - .Patterns = {keywords}, - }, - TUnit{ - .Kind = EUnitKind::BindParameterIdentifier, - .Patterns = {TRe{"world"}}, - }, - TUnit{ - .Kind = EUnitKind::QuotedIdentifier, - .Patterns = { - TRe{.Body = id + "!", .Before = R"re(\()re"}, - }, - .IsPlain = false, - }, - TUnit{ - .Kind = EUnitKind::FunctionIdentifier, - .Patterns = { - TRe{.Body = title, .Before = R"re(\()re"}, - TRe{.Body = "'" + id + "\\." + id}, - }, - .IsPlain = false, - }, - TUnit{ - .Kind = EUnitKind::Literal, - .Patterns = {TRe{"'" + id}}, - .IsPlain = false, - }, - TUnit{ - .Kind = EUnitKind::Identifier, - .Patterns = {TRe{lower}}, + return { + .Name = "YQLs", + .Extension = "yqls", + .Units = { + TUnit{ + .Kind = EUnitKind::Comment, + .Patterns = {TRe{R"re(#.*)re"}}, + .IsPlain = false, + }, + TUnit{ + .Kind = EUnitKind::Keyword, + .Patterns = {keywords}, + }, + TUnit{ + .Kind = EUnitKind::BindParameterIdentifier, + .Patterns = {TRe{"world"}}, + }, + TUnit{ + .Kind = EUnitKind::QuotedIdentifier, + .Patterns = { + TRe{.Body = id + "!", .Before = R"re(\()re"}, }, - TUnit{ - .Kind = EUnitKind::StringLiteral, - .Patterns = { - TRe{R"re(\"[^\"\n]*\")re"}, - TRe{R"re(\@\@(.|\n)*\@\@)re"}, - }, - .RangePattern = TRangePattern{ - .Begin = "@@", - .End = "@@", - }, - .IsPlain = false, + .IsPlain = false, + }, + TUnit{ + .Kind = EUnitKind::FunctionIdentifier, + .Patterns = { + TRe{.Body = title, .Before = R"re(\()re"}, + TRe{.Body = "'" + id + "\\." + id}, }, - TUnit{ - .Kind = EUnitKind::Punctuation, - .Patterns = {TRe{R"re(['\(\)])re"}}, - .IsPlain = false, - .IsCodeGenExcluded = true, + .IsPlain = false, + }, + TUnit{ + .Kind = EUnitKind::Literal, + .Patterns = {TRe{"'" + id}}, + .IsPlain = false, + }, + TUnit{ + .Kind = EUnitKind::Identifier, + .Patterns = {TRe{lower}}, + }, + TUnit{ + .Kind = EUnitKind::StringLiteral, + .Patterns = { + TRe{R"re(\"[^\"\n]*\")re"}, + TRe{R"re(\@\@(.|\n)*\@\@)re"}, }, - TUnit{ - .Kind = EUnitKind::Whitespace, - .Patterns = {TRe{R"re(\s+)re"}}, - .IsPlain = false, - .IsCodeGenExcluded = true, + .RangePattern = TRangePattern{ + .Begin = "@@", + .End = "@@", }, + .IsPlain = false, + }, + TUnit{ + .Kind = EUnitKind::Punctuation, + .Patterns = {TRe{R"re(['\(\)])re"}}, + .IsPlain = false, + .IsCodeGenExcluded = true, + }, + TUnit{ + .Kind = EUnitKind::Whitespace, + .Patterns = {TRe{R"re(\s+)re"}}, + .IsPlain = false, + .IsCodeGenExcluded = true, }, - }; - } + }, + }; +} } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/yqls_highlight.h b/yql/essentials/tools/yql_highlight/yqls_highlight.h index d803015331b..ab35dfe1a78 100644 --- a/yql/essentials/tools/yql_highlight/yqls_highlight.h +++ b/yql/essentials/tools/yql_highlight/yqls_highlight.h @@ -4,6 +4,6 @@ namespace NSQLHighlight { - THighlighting MakeYQLsHighlighting(); +THighlighting MakeYQLsHighlighting(); } // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_linter/ya.make b/yql/essentials/tools/yql_linter/ya.make index ac6cd7013ce..8f8ae6ca7cf 100644 --- a/yql/essentials/tools/yql_linter/ya.make +++ b/yql/essentials/tools/yql_linter/ya.make @@ -2,6 +2,8 @@ IF (NOT EXPORT_CMAKE OR NOT OPENSOURCE OR OPENSOURCE_PROJECT != "yt") PROGRAM() +ENABLE(YQL_STYLE_CPP) + PEERDIR( library/cpp/getopt yql/essentials/public/fastcheck diff --git a/yql/essentials/tools/yql_linter/yql_linter.cpp b/yql/essentials/tools/yql_linter/yql_linter.cpp index 0b97b3ee012..51dfe2b01d2 100644 --- a/yql/essentials/tools/yql_linter/yql_linter.cpp +++ b/yql/essentials/tools/yql_linter/yql_linter.cpp @@ -26,13 +26,12 @@ int Run(int argc, char* argv[]) { opts.AddLongOption('v', "verbose", "show lint issues").NoArgument(); opts.AddLongOption("list-checks", "list all enabled checks and exit").NoArgument(); opts.AddLongOption("checks", "comma-separated list of globs with optional '-' prefix").StoreResult(&checks); - opts.AddLongOption('C', "cluster", "cluster to service mapping").RequiredArgument("name@service") - .KVHandler([&](TString cluster, TString provider) { - if (cluster.empty() || provider.empty()) { - throw yexception() << "Incorrect service mapping, expected form cluster@provider, e.g. plato@yt"; - } - clusterMapping[cluster] = provider; - }, '@'); + opts.AddLongOption('C', "cluster", "cluster to service mapping").RequiredArgument("name@service").KVHandler([&](TString cluster, TString provider) { + if (cluster.empty() || provider.empty()) { + throw yexception() << "Incorrect service mapping, expected form cluster@provider, e.g. plato@yt"; + } + clusterMapping[cluster] = provider; + }, '@'); opts.AddLongOption('m', "mode", "query mode, allowed values: " + GetEnumAllNames<NYql::NFastCheck::EMode>()).StoreResult(&modeStr); opts.AddLongOption('s', "syntax", "query syntax, allowed values: " + GetEnumAllNames<NYql::NFastCheck::ESyntax>()).StoreResult(&syntaxStr); @@ -40,12 +39,11 @@ int Run(int argc, char* argv[]) { opts.AddLongOption("cluster-system", "cluster system").StoreResult(&clusterSystem); opts.AddLongOption("ansi-lexer", "use ansi lexer").NoArgument(); opts.AddLongOption("no-colors", "disable colors for output").NoArgument(); - opts.AddLongOption("langver", "Set current language version").Optional().RequiredArgument("VER") - .Handler1T<TString>([&](const TString& str) { - if (!NYql::ParseLangVersion(str, langver)) { - throw yexception() << "Failed to parse language version: " << str; - } - }); + opts.AddLongOption("langver", "Set current language version").Optional().RequiredArgument("VER").Handler1T<TString>([&](const TString& str) { + if (!NYql::ParseLangVersion(str, langver)) { + throw yexception() << "Failed to parse language version: " << str; + } + }); opts.SetFreeArgsNum(0); opts.AddHelpOption(); @@ -90,8 +88,8 @@ int Run(int argc, char* argv[]) { checkReq.Program = queryString; checkReq.Syntax = NYql::NFastCheck::ESyntax::YQL; checkReq.ClusterMapping = clusterMapping; - checkReq.Mode = FromString<NYql::NFastCheck::EMode>(modeStr); - checkReq.Syntax = FromString<NYql::NFastCheck::ESyntax>(syntaxStr); + checkReq.Mode = FromString<NYql::NFastCheck::EMode>(modeStr); + checkReq.Syntax = FromString<NYql::NFastCheck::ESyntax>(syntaxStr); checkReq.ClusterMode = FromString<NYql::NFastCheck::EClusterMode>(clusterModeStr); checkReq.ClusterSystem = clusterSystem; auto checkResp = NYql::NFastCheck::RunChecks(checkReq); diff --git a/yql/essentials/types/binary_json/format.h b/yql/essentials/types/binary_json/format.h index 36479ba68d8..a10d3d62c53 100644 --- a/yql/essentials/types/binary_json/format.h +++ b/yql/essentials/types/binary_json/format.h @@ -139,4 +139,4 @@ static_assert(sizeof(TMeta) == sizeof(ui32)); */ using TBinaryJson = TBuffer; -}
\ No newline at end of file +} // namespace NKikimr::NBinaryJson diff --git a/yql/essentials/types/binary_json/read.cpp b/yql/essentials/types/binary_json/read.cpp index 2d4b40c04b8..d31a4f854cc 100644 --- a/yql/essentials/types/binary_json/read.cpp +++ b/yql/essentials/types/binary_json/read.cpp @@ -152,13 +152,11 @@ TBinaryJsonReader::TBinaryJsonReader(TStringBuf buffer) Y_ENSURE( Header_.Version == CURRENT_VERSION, TStringBuilder() << "Version in BinaryJson `" << static_cast<ui64>(Header_.Version) << "` " - << "does not match current version `" << static_cast<ui64>(CURRENT_VERSION) << "`" - ); + << "does not match current version `" << static_cast<ui64>(CURRENT_VERSION) << "`"); Y_ENSURE( Header_.StringOffset < Buffer_.size(), - "StringOffset must be inside buffer" - ); + "StringOffset must be inside buffer"); // Tree starts right after Header TreeStart_ = sizeof(Header_); @@ -324,7 +322,7 @@ void ReadContainerToJson(const TContainerCursor& cursor, TJsonWriter& writer) { } } -} +} // namespace TString SerializeToJson(const TBinaryJson& binaryJson) { return SerializeToJson(TStringBuf(binaryJson.Data(), binaryJson.Size())); @@ -575,7 +573,7 @@ private: TStringBuf Buffer_; }; -} +} // namespace TMaybe<TStringBuf> IsValidBinaryJsonWithError(TStringBuf buffer) { return TBinaryJsonValidator(buffer).ValidateWithError(); @@ -585,4 +583,4 @@ bool IsValidBinaryJson(TStringBuf buffer) { return !IsValidBinaryJsonWithError(buffer).Defined(); } -} +} // namespace NKikimr::NBinaryJson diff --git a/yql/essentials/types/binary_json/read.h b/yql/essentials/types/binary_json/read.h index 5bdeac34fb7..879f0e0a36f 100644 --- a/yql/essentials/types/binary_json/read.h +++ b/yql/essentials/types/binary_json/read.h @@ -17,7 +17,7 @@ class TContainerCursor; * @brief Reads values inside BinaryJson. `Read...` methods of this class are not intended for direct use. * Consider using `GetRootCursor` method to get more convenient interface over BinaryJson data */ -class TBinaryJsonReader : public TSimpleRefCount<TBinaryJsonReader> { +class TBinaryJsonReader: public TSimpleRefCount<TBinaryJsonReader> { public: template <typename... Args> static TIntrusivePtr<TBinaryJsonReader> Make(Args&&... args) { @@ -55,7 +55,7 @@ private: T ReadPOD(ui32 offset) const { static_assert(std::is_pod_v<T>, "Type must be POD"); Y_ENSURE(offset + sizeof(T) <= Buffer_.size(), - TStringBuilder() << "Not enough space in buffer to read value (" << offset << " + " << sizeof(T) << " > " << Buffer_.size() << ")"); + TStringBuilder() << "Not enough space in buffer to read value (" << offset << " + " << sizeof(T) << " > " << Buffer_.size() << ")"); return ReadUnaligned<T>(Buffer_.data() + offset); } @@ -187,4 +187,4 @@ bool IsValidBinaryJson(TStringBuf buffer); TMaybe<TStringBuf> IsValidBinaryJsonWithError(TStringBuf buffer); -} +} // namespace NKikimr::NBinaryJson diff --git a/yql/essentials/types/binary_json/ut/container_ut.cpp b/yql/essentials/types/binary_json/ut/container_ut.cpp index 869d2323bdd..8c3b05146a5 100644 --- a/yql/essentials/types/binary_json/ut/container_ut.cpp +++ b/yql/essentials/types/binary_json/ut/container_ut.cpp @@ -5,7 +5,7 @@ using namespace NKikimr::NBinaryJson; -class TBinaryJsonContainerTest : public TBinaryJsonTestBase { +class TBinaryJsonContainerTest: public TBinaryJsonTestBase { public: TBinaryJsonContainerTest() : TBinaryJsonTestBase() @@ -13,12 +13,12 @@ public: } UNIT_TEST_SUITE(TBinaryJsonContainerTest); - UNIT_TEST(TestGetType); - UNIT_TEST(TestGetSize); - UNIT_TEST(TestGetElement); - UNIT_TEST(TestArrayIterator); - UNIT_TEST(TestLookup); - UNIT_TEST(TestObjectIterator); + UNIT_TEST(TestGetType); + UNIT_TEST(TestGetSize); + UNIT_TEST(TestGetElement); + UNIT_TEST(TestArrayIterator); + UNIT_TEST(TestLookup); + UNIT_TEST(TestObjectIterator); UNIT_TEST_SUITE_END(); void TestGetType() { @@ -174,15 +174,16 @@ public: "five": "string", "six": [], "seven": {} - })", { - {"one", "123"}, - {"two", "null"}, - {"three", "false"}, - {"four", "true"}, - {"five", "\"string\""}, - {"six", "[]"}, - {"seven", "{}"}, - }}, + })", + { + {"one", "123"}, + {"two", "null"}, + {"three", "false"}, + {"four", "true"}, + {"five", "\"string\""}, + {"six", "[]"}, + {"seven", "{}"}, + }}, }; for (const auto& testCase : testCases) { diff --git a/yql/essentials/types/binary_json/ut/entry_ut.cpp b/yql/essentials/types/binary_json/ut/entry_ut.cpp index 247fc5034cf..84047333cb4 100644 --- a/yql/essentials/types/binary_json/ut/entry_ut.cpp +++ b/yql/essentials/types/binary_json/ut/entry_ut.cpp @@ -5,7 +5,7 @@ using namespace NKikimr::NBinaryJson; -class TBinaryJsonEntryTest : public TBinaryJsonTestBase { +class TBinaryJsonEntryTest: public TBinaryJsonTestBase { public: TBinaryJsonEntryTest() : TBinaryJsonTestBase() @@ -13,11 +13,11 @@ public: } UNIT_TEST_SUITE(TBinaryJsonEntryTest); - UNIT_TEST(TestGetType); - UNIT_TEST(TestGetContainer); - UNIT_TEST(TestGetString); - UNIT_TEST(TestGetNumber); - UNIT_TEST(TestOutOfBounds); + UNIT_TEST(TestGetType); + UNIT_TEST(TestGetContainer); + UNIT_TEST(TestGetString); + UNIT_TEST(TestGetNumber); + UNIT_TEST(TestOutOfBounds); UNIT_TEST_SUITE_END(); void TestGetType() { @@ -97,10 +97,10 @@ public: void TestOutOfBounds() { const TVector<std::pair<TString, double>> testCases = { - { "1e100000000", std::numeric_limits<double>::infinity() }, - { "-1e100000000", -std::numeric_limits<double>::infinity() }, - { "1.797693135e+308", std::numeric_limits<double>::infinity() }, - { "-1.797693135e+308", -std::numeric_limits<double>::infinity() }, + {"1e100000000", std::numeric_limits<double>::infinity()}, + {"-1e100000000", -std::numeric_limits<double>::infinity()}, + {"1.797693135e+308", std::numeric_limits<double>::infinity()}, + {"-1.797693135e+308", -std::numeric_limits<double>::infinity()}, }; for (const auto& testCase : testCases) { @@ -118,4 +118,3 @@ public: }; UNIT_TEST_SUITE_REGISTRATION(TBinaryJsonEntryTest); - diff --git a/yql/essentials/types/binary_json/ut/identity_ut.cpp b/yql/essentials/types/binary_json/ut/identity_ut.cpp index 4179293287b..025024d2b3f 100644 --- a/yql/essentials/types/binary_json/ut/identity_ut.cpp +++ b/yql/essentials/types/binary_json/ut/identity_ut.cpp @@ -7,7 +7,7 @@ using namespace NKikimr; -class TBinaryJsonIdentityTest : public TBinaryJsonTestBase { +class TBinaryJsonIdentityTest: public TBinaryJsonTestBase { public: TBinaryJsonIdentityTest() : TBinaryJsonTestBase() @@ -15,28 +15,28 @@ public: } UNIT_TEST_SUITE(TBinaryJsonIdentityTest); - UNIT_TEST(TestReadToJsonDom); - UNIT_TEST(TestSerializeToJson); - UNIT_TEST(TestSerializeDomToBinaryJson); + UNIT_TEST(TestReadToJsonDom); + UNIT_TEST(TestSerializeToJson); + UNIT_TEST(TestSerializeDomToBinaryJson); UNIT_TEST_SUITE_END(); const TVector<TString> TestCases = { - "false", - "true", - "null", - "\"test string\"", - "\"\"", - "1.2345", - "1", - "-23", - "0", - "0.12345", - "{}", - "{\"a\":1}", - "[]", - "[1]", - R"([{"key":[true,false,null,"first","second","second","third"]},"fourth",0.34])", - }; + "false", + "true", + "null", + "\"test string\"", + "\"\"", + "1.2345", + "1", + "-23", + "0", + "0.12345", + "{}", + "{\"a\":1}", + "[]", + "[1]", + R"([{"key":[true,false,null,"first","second","second","third"]},"fourth",0.34])", + }; void TestReadToJsonDom() { for (const TStringBuf json : TestCases) { diff --git a/yql/essentials/types/binary_json/ut/valid_ut.cpp b/yql/essentials/types/binary_json/ut/valid_ut.cpp index 30783758770..4115b7299f9 100644 --- a/yql/essentials/types/binary_json/ut/valid_ut.cpp +++ b/yql/essentials/types/binary_json/ut/valid_ut.cpp @@ -12,7 +12,7 @@ using namespace NKikimr::NBinaryJson; -class TBinaryJsonValidnessTest : public TBinaryJsonTestBase { +class TBinaryJsonValidnessTest: public TBinaryJsonTestBase { public: TBinaryJsonValidnessTest() : TBinaryJsonTestBase() @@ -20,9 +20,9 @@ public: } UNIT_TEST_SUITE(TBinaryJsonValidnessTest); - UNIT_TEST(TestValidness); - UNIT_TEST(TestRandom); - UNIT_TEST(TestVersionCheck); + UNIT_TEST(TestValidness); + UNIT_TEST(TestRandom); + UNIT_TEST(TestVersionCheck); UNIT_TEST_SUITE_END(); void TestValidness() { diff --git a/yql/essentials/types/binary_json/ut/ya.make b/yql/essentials/types/binary_json/ut/ya.make index 49ef9c24a04..8c5ce8701df 100644 --- a/yql/essentials/types/binary_json/ut/ya.make +++ b/yql/essentials/types/binary_json/ut/ya.make @@ -1,5 +1,7 @@ UNITTEST_FOR(yql/essentials/types/binary_json) +ENABLE(YQL_STYLE_CPP) + SRCS( container_ut.cpp identity_ut.cpp diff --git a/yql/essentials/types/binary_json/ut_benchmark/write.cpp b/yql/essentials/types/binary_json/ut_benchmark/write.cpp index f82b1c1c3d8..51efaee89c3 100644 --- a/yql/essentials/types/binary_json/ut_benchmark/write.cpp +++ b/yql/essentials/types/binary_json/ut_benchmark/write.cpp @@ -9,7 +9,7 @@ // ya test -r -D BENCHMARK_MAKE_LARGE_PART #ifndef BENCHMARK_MAKE_LARGE_PART -#define BENCHMARK_MAKE_LARGE_PART 0 + #define BENCHMARK_MAKE_LARGE_PART 0 #endif using namespace NKikimr::NBinaryJson; @@ -36,15 +36,15 @@ TString GetTestJsonString() { } static void BenchWriteSimdJson(benchmark::State& state) { - TString value = GetTestJsonString(); - TStringBuf buf(value); - for (auto _ : state) { - auto result = SerializeToBinaryJson(buf); - benchmark::DoNotOptimize(result); - benchmark::ClobberMemory(); - } + TString value = GetTestJsonString(); + TStringBuf buf(value); + for (auto _ : state) { + auto result = SerializeToBinaryJson(buf); + benchmark::DoNotOptimize(result); + benchmark::ClobberMemory(); + } } -} +} // namespace BENCHMARK(BenchWriteSimdJson)->MinTime(1); diff --git a/yql/essentials/types/binary_json/ut_benchmark/ya.make b/yql/essentials/types/binary_json/ut_benchmark/ya.make index b4b94af4ecd..17060e862aa 100644 --- a/yql/essentials/types/binary_json/ut_benchmark/ya.make +++ b/yql/essentials/types/binary_json/ut_benchmark/ya.make @@ -1,5 +1,7 @@ G_BENCHMARK() +ENABLE(YQL_STYLE_CPP) + TAG(ya:fat) SIZE(LARGE) TIMEOUT(600) diff --git a/yql/essentials/types/binary_json/write.cpp b/yql/essentials/types/binary_json/write.cpp index 225e54c23e2..12232898037 100644 --- a/yql/essentials/types/binary_json/write.cpp +++ b/yql/essentials/types/binary_json/write.cpp @@ -415,10 +415,12 @@ private: /** * @brief Callbacks for textual JSON parser. Essentially wrapper around TJsonIndex methods */ -class TBinaryJsonCallbacks : public TJsonCallbacks { +class TBinaryJsonCallbacks: public TJsonCallbacks { public: TBinaryJsonCallbacks(bool throwException, bool allowInf) - : TJsonCallbacks(/* throwException */ throwException), AllowInf_(allowInf) { + : TJsonCallbacks(/* throwException */ throwException) + , AllowInf_(allowInf) + { } bool OnNull() override { @@ -723,7 +725,7 @@ template <typename TOnDemandValue> return simdjson::SUCCESS; #undef RETURN_IF_NOT_SUCCESS } -} +} // namespace std::variant<TBinaryJson, TString> SerializeToBinaryJsonImpl(const TStringBuf json, bool allowInf) { std::variant<TBinaryJson, TString> res; @@ -760,5 +762,4 @@ TBinaryJson SerializeToBinaryJson(const NUdf::TUnboxedValue& value) { return std::move(serializer).Serialize(); } -} - +} // namespace NKikimr::NBinaryJson diff --git a/yql/essentials/types/binary_json/write.h b/yql/essentials/types/binary_json/write.h index 61dea9bfb40..819cb002375 100644 --- a/yql/essentials/types/binary_json/write.h +++ b/yql/essentials/types/binary_json/write.h @@ -8,7 +8,7 @@ namespace NYql::NUdf { class TUnboxedValue; -}; +}; // namespace NYql::NUdf namespace NKikimr::NBinaryJson { @@ -21,5 +21,4 @@ std::variant<TBinaryJson, TString> SerializeToBinaryJson(const TStringBuf json, * @brief Translates DOM layout from `yql/library/dom` library into BinaryJson */ TBinaryJson SerializeToBinaryJson(const NYql::NUdf::TUnboxedValue& value); -} - +} // namespace NKikimr::NBinaryJson diff --git a/yql/essentials/types/dynumber/cast.h b/yql/essentials/types/dynumber/cast.h index 2625ec446fc..b8d1f158192 100644 --- a/yql/essentials/types/dynumber/cast.h +++ b/yql/essentials/types/dynumber/cast.h @@ -111,4 +111,4 @@ TMaybe<T> TryFromDyNumber(TStringBuf buffer) { return result; } -} +} // namespace NKikimr::NDyNumber diff --git a/yql/essentials/types/dynumber/dynumber.cpp b/yql/essentials/types/dynumber/dynumber.cpp index 2e4b997f07b..1344cf0b346 100644 --- a/yql/essentials/types/dynumber/dynumber.cpp +++ b/yql/essentials/types/dynumber/dynumber.cpp @@ -11,24 +11,31 @@ namespace NKikimr::NDyNumber { bool IsValidDyNumber(TStringBuf buffer) { const auto size = buffer.size(); - if (!size) + if (!size) { return false; + } switch (const auto data = buffer.data(); *data) { case '\x00': - if (size < 2U || size > 21U) + if (size < 2U || size > 21U) { return false; - for (auto i = 2U; i < size; ++i) - if ((data[i] & '\x0F') < '\x06' || ((data[i] >> '\x04') & '\x0F') < '\x06') + } + for (auto i = 2U; i < size; ++i) { + if ((data[i] & '\x0F') < '\x06' || ((data[i] >> '\x04') & '\x0F') < '\x06') { return false; + } + } break; case '\x01': return 1U == size; case '\x02': - if (size < 2U || size > 21U) + if (size < 2U || size > 21U) { return false; - for (auto i = 2U; i < size; ++i) - if ((data[i] & '\x0F') > '\x09' || ((data[i] >> '\x04') & '\x0F') > '\x09') + } + for (auto i = 2U; i < size; ++i) { + if ((data[i] & '\x0F') > '\x09' || ((data[i] >> '\x04') & '\x0F') > '\x09') { return false; + } + } break; default: return false; @@ -37,8 +44,9 @@ bool IsValidDyNumber(TStringBuf buffer) { } bool IsValidDyNumberString(TStringBuf str) { - if (str.empty()) + if (str.empty()) { return false; + } auto s = str.data(); auto l = str.size(); const bool neg = '-' == *s; @@ -46,8 +54,9 @@ bool IsValidDyNumberString(TStringBuf str) { ++s; --l; } - if (!l) + if (!l) { return false; + } bool hasDot = false; auto beforeDot = 0U; auto nonZeroAfterDot = 0U; @@ -58,28 +67,34 @@ bool IsValidDyNumberString(TStringBuf str) { for (auto i = 0U; i < l; ++i) { const auto c = s[i]; const bool isZero = '0' == c; - if (!hasDot && isZero && !beforeDot) + if (!hasDot && isZero && !beforeDot) { continue; + } if (c == '.') { - if (hasDot) + if (hasDot) { return false; + } hasDot = true; continue; } - if (c =='e' || c == 'E') { - if (++i >= l) + if (c == 'e' || c == 'E') { + if (++i >= l) { return false; - if (!TryFromString(s + i, l - i, ePower)) + } + if (!TryFromString(s + i, l - i, ePower)) { return false; + } break; } - if (!std::isdigit(c)) + if (!std::isdigit(c)) { return false; + } if (!hasDot) { ++beforeDot; } else { - if (!isZero) + if (!isZero) { hasNonZeroAfterDot = true; + } if (hasNonZeroAfterDot) { if (isZero) { ++tailZeros; @@ -89,28 +104,33 @@ bool IsValidDyNumberString(TStringBuf str) { } } else { ++zeroAfterDot; - if (beforeDot) + if (beforeDot) { ++tailZeros; + } } } } auto effectivePower = ePower; - if (beforeDot) + if (beforeDot) { effectivePower += beforeDot; - else if (hasNonZeroAfterDot) + } else if (hasNonZeroAfterDot) { effectivePower -= zeroAfterDot; - else + } else { return true; - if (beforeDot + zeroAfterDot + nonZeroAfterDot > 38U) + } + if (beforeDot + zeroAfterDot + nonZeroAfterDot > 38U) { return false; - if (effectivePower < -129 || effectivePower > 126) + } + if (effectivePower < -129 || effectivePower > 126) { return false; + } return true; } TMaybe<TString> ParseDyNumberString(TStringBuf str) { - if (str.empty()) + if (str.empty()) { return Nothing(); + } auto s = str.data(); auto l = str.size(); const bool neg = '-' == *s; @@ -118,8 +138,9 @@ TMaybe<TString> ParseDyNumberString(TStringBuf str) { ++s; --l; } - if (!l) + if (!l) { return Nothing(); + } bool hasDot = false; auto beforeDot = 0U; auto nonZeroAfterDot = 0U; @@ -133,23 +154,28 @@ TMaybe<TString> ParseDyNumberString(TStringBuf str) { for (auto i = 0U; i < l; ++i) { const auto c = s[i]; const bool isZero = '0' == c; - if (!hasDot && isZero && !beforeDot) + if (!hasDot && isZero && !beforeDot) { continue; + } if (c == '.') { - if (hasDot) + if (hasDot) { return Nothing(); + } hasDot = true; continue; } - if (c =='e' || c == 'E') { - if (++i >= l) + if (c == 'e' || c == 'E') { + if (++i >= l) { return Nothing(); - if (!TryFromString(s + i, l - i, ePower)) + } + if (!TryFromString(s + i, l - i, ePower)) { return Nothing(); + } break; } - if (!std::isdigit(c)) + if (!std::isdigit(c)) { return Nothing(); + } if (!hasDot) { ++beforeDot; if (isZero) { @@ -161,8 +187,9 @@ TMaybe<TString> ParseDyNumberString(TStringBuf str) { data.emplace_back(c - '0'); } } else { - if (!isZero) + if (!isZero) { hasNonZeroAfterDot = true; + } if (hasNonZeroAfterDot) { if (isZero) { ++tailZeros; @@ -178,37 +205,44 @@ TMaybe<TString> ParseDyNumberString(TStringBuf str) { } } else { ++zeroAfterDot; - if (beforeDot) + if (beforeDot) { ++tailZeros; + } } } } auto effectivePower = ePower; - if (beforeDot) + if (beforeDot) { effectivePower += beforeDot; - else if (hasNonZeroAfterDot) + } else if (hasNonZeroAfterDot) { effectivePower -= zeroAfterDot; - else + } else { return "\x01"; - if (beforeDot + zeroAfterDot + nonZeroAfterDot > 38U) + } + if (beforeDot + zeroAfterDot + nonZeroAfterDot > 38U) { return Nothing(); - if (effectivePower < -129 || effectivePower > 126) + } + if (effectivePower < -129 || effectivePower > 126) { return Nothing(); - if (data.size() % 2U) + } + if (data.size() % 2U) { data.emplace_back('\x00'); - + } + TString result; result.reserve(2U + (data.size() >> 1U)); if (neg) { result.append('\x00'); result.append(char(126 - effectivePower)); - for (auto i = 0U; i < data.size(); i += 2U) - result.append((('\x0F' - data[i]) << '\x04') | ('\x0F' - data[i + 1])); + for (auto i = 0U; i < data.size(); i += 2U) { + result.append((('\x0F' - data[i]) << '\x04') | ('\x0F' - data[i + 1])); + } } else { result.append('\x02'); result.append(char(effectivePower + 129)); - for (auto i = 0U; i < data.size(); i += 2U) - result.append((data[i] << '\x04') | data[i + 1]); + for (auto i = 0U; i < data.size(); i += 2U) { + result.append((data[i] << '\x04') | data[i + 1]); + } } // Cerr << str << ": " << HexText(TStringBuf{result.c_str(), result.size()}) << Endl; @@ -231,25 +265,29 @@ TMaybe<TString> DyNumberToString(TStringBuf buffer) { return out; } const bool negative = !*s++; - if (negative) + if (negative) { out << '-'; + } if (0U >= --l) { return Nothing(); } auto power = ui8(*s++); - if (negative) + if (negative) { power = '\xFF' - power; + } out << '.'; const auto digits = negative ? "FEDCBA9876543210" : "0123456789ABCDEF"; while (--l) { const auto c = *s++; out << digits[(c >> '\x04') & '\x0F']; - if (const auto digit = c & '\x0F'; digit != (negative ? '\x0F' : '\x00') || l > 1U) + if (const auto digit = c & '\x0F'; digit != (negative ? '\x0F' : '\x00') || l > 1U) { out << digits[digit]; + } } - if (const auto e = power - 129) + if (const auto e = power - 129) { out << 'e' << e; + } return out; } -} +} // namespace NKikimr::NDyNumber diff --git a/yql/essentials/types/dynumber/dynumber.h b/yql/essentials/types/dynumber/dynumber.h index f64d1a294f3..b8a88311e85 100644 --- a/yql/essentials/types/dynumber/dynumber.h +++ b/yql/essentials/types/dynumber/dynumber.h @@ -32,4 +32,4 @@ TMaybe<TString> ParseDyNumberString(TStringBuf str); */ TMaybe<TString> DyNumberToString(TStringBuf buffer); -}
\ No newline at end of file +} // namespace NKikimr::NDyNumber diff --git a/yql/essentials/types/dynumber/ut/dynumber_ut.cpp b/yql/essentials/types/dynumber/ut/dynumber_ut.cpp index 6b7d388a75c..5eae66700cc 100644 --- a/yql/essentials/types/dynumber/ut/dynumber_ut.cpp +++ b/yql/essentials/types/dynumber/ut/dynumber_ut.cpp @@ -9,175 +9,175 @@ using namespace NKikimr::NDyNumber; namespace { - void TestDyNumber(TStringBuf test) { - UNIT_ASSERT(IsValidDyNumberString(test)); +void TestDyNumber(TStringBuf test) { + UNIT_ASSERT(IsValidDyNumberString(test)); - const auto dyNumber = ParseDyNumberString(test); - UNIT_ASSERT(dyNumber.Defined()); - UNIT_ASSERT(IsValidDyNumber(*dyNumber)); + const auto dyNumber = ParseDyNumberString(test); + UNIT_ASSERT(dyNumber.Defined()); + UNIT_ASSERT(IsValidDyNumber(*dyNumber)); - const auto restoredTest = DyNumberToString(*dyNumber); - UNIT_ASSERT(restoredTest.Defined()); - UNIT_ASSERT(IsValidDyNumberString(*restoredTest)); + const auto restoredTest = DyNumberToString(*dyNumber); + UNIT_ASSERT(restoredTest.Defined()); + UNIT_ASSERT(IsValidDyNumberString(*restoredTest)); - const auto dyNumberAfterString = ParseDyNumberString(*restoredTest); - UNIT_ASSERT(dyNumberAfterString.Defined()); - UNIT_ASSERT(IsValidDyNumber(*dyNumberAfterString)); + const auto dyNumberAfterString = ParseDyNumberString(*restoredTest); + UNIT_ASSERT(dyNumberAfterString.Defined()); + UNIT_ASSERT(IsValidDyNumber(*dyNumberAfterString)); - UNIT_ASSERT_EQUAL(*dyNumber, *dyNumberAfterString); - } + UNIT_ASSERT_EQUAL(*dyNumber, *dyNumberAfterString); +} - template <typename T> - void TestCast(TStringBuf test, TMaybe<T> value) { - UNIT_ASSERT_C(IsValidDyNumberString(test), test); +template <typename T> +void TestCast(TStringBuf test, TMaybe<T> value) { + UNIT_ASSERT_C(IsValidDyNumberString(test), test); - const auto dyNumber = ParseDyNumberString(test); - UNIT_ASSERT(dyNumber.Defined()); - UNIT_ASSERT(IsValidDyNumber(*dyNumber)); + const auto dyNumber = ParseDyNumberString(test); + UNIT_ASSERT(dyNumber.Defined()); + UNIT_ASSERT(IsValidDyNumber(*dyNumber)); - const auto casted = TryFromDyNumber<T>(*dyNumber); + const auto casted = TryFromDyNumber<T>(*dyNumber); - if constexpr (std::is_integral<T>::value) { - UNIT_ASSERT_VALUES_EQUAL(casted, value); - } else if (casted && value) { - UNIT_ASSERT_DOUBLES_EQUAL(*casted, *value, 1e-9); - } else { - UNIT_ASSERT_C(!casted && !value, "Casted: " << casted << ", value: " << value); - } + if constexpr (std::is_integral<T>::value) { + UNIT_ASSERT_VALUES_EQUAL(casted, value); + } else if (casted && value) { + UNIT_ASSERT_DOUBLES_EQUAL(*casted, *value, 1e-9); + } else { + UNIT_ASSERT_C(!casted && !value, "Casted: " << casted << ", value: " << value); } } +} // namespace Y_UNIT_TEST_SUITE(TDyNumberTests) { - Y_UNIT_TEST(ParseAndRestore) { - TestDyNumber("0"); - TestDyNumber(".0"); - TestDyNumber("1"); - TestDyNumber("18"); - TestDyNumber("181"); - TestDyNumber("1817"); - TestDyNumber("-1"); - TestDyNumber("-18"); - TestDyNumber("-181"); - TestDyNumber("-1817"); - TestDyNumber(".023"); - TestDyNumber("0.93"); - TestDyNumber("724.1"); - TestDyNumber("150e2"); - TestDyNumber("15e3"); - TestDyNumber("0.150e4"); - TestDyNumber("0.15e4"); - TestDyNumber("1E-130"); - TestDyNumber("9.9999999999999999999999999999999999999E+125"); - TestDyNumber("9.9999999999999999999999999999999999999000E+125"); - TestDyNumber("-1E-130"); - TestDyNumber("-9.9999999999999999999999999999999999999E+125"); - TestDyNumber("-9.9999999999999999999999999999999999999000E+125"); - } +Y_UNIT_TEST(ParseAndRestore) { + TestDyNumber("0"); + TestDyNumber(".0"); + TestDyNumber("1"); + TestDyNumber("18"); + TestDyNumber("181"); + TestDyNumber("1817"); + TestDyNumber("-1"); + TestDyNumber("-18"); + TestDyNumber("-181"); + TestDyNumber("-1817"); + TestDyNumber(".023"); + TestDyNumber("0.93"); + TestDyNumber("724.1"); + TestDyNumber("150e2"); + TestDyNumber("15e3"); + TestDyNumber("0.150e4"); + TestDyNumber("0.15e4"); + TestDyNumber("1E-130"); + TestDyNumber("9.9999999999999999999999999999999999999E+125"); + TestDyNumber("9.9999999999999999999999999999999999999000E+125"); + TestDyNumber("-1E-130"); + TestDyNumber("-9.9999999999999999999999999999999999999E+125"); + TestDyNumber("-9.9999999999999999999999999999999999999000E+125"); +} - Y_UNIT_TEST(Cast) { - TestCast<int>("0", 0); +Y_UNIT_TEST(Cast) { + TestCast<int>("0", 0); - TestCast<int>("1", 1); - TestCast<int>("-1", -1); + TestCast<int>("1", 1); + TestCast<int>("-1", -1); - TestCast<int>("12", 12); - TestCast<int>("-12", -12); - TestCast<int>("123", 123); - TestCast<int>("-123", -123); - TestCast<int>("1234", 1234); - TestCast<int>("-1234", -1234); + TestCast<int>("12", 12); + TestCast<int>("-12", -12); + TestCast<int>("123", 123); + TestCast<int>("-123", -123); + TestCast<int>("1234", 1234); + TestCast<int>("-1234", -1234); - TestCast<int>(ToString(Max<int>()), Max<int>()); - TestCast<int>(ToString(Min<int>()), Min<int>()); + TestCast<int>(ToString(Max<int>()), Max<int>()); + TestCast<int>(ToString(Min<int>()), Min<int>()); - TestCast<i8> ("200", Nothing()); - TestCast<i16>("40000", Nothing()); - TestCast<i32>("3000000000", Nothing()); + TestCast<i8>("200", Nothing()); + TestCast<i16>("40000", Nothing()); + TestCast<i32>("3000000000", Nothing()); - TestCast<ui8> ("300", Nothing()); - TestCast<ui16>("70000", Nothing()); - TestCast<ui32>("5000000000", Nothing()); + TestCast<ui8>("300", Nothing()); + TestCast<ui16>("70000", Nothing()); + TestCast<ui32>("5000000000", Nothing()); - // int to floating point - TestCast<double>("1", 1); - TestCast<double>("12", 12); - TestCast<double>("123", 123); + // int to floating point + TestCast<double>("1", 1); + TestCast<double>("12", 12); + TestCast<double>("123", 123); - // floating point to int - TestCast<int>("0.1", Nothing()); - TestCast<int>("0.23", Nothing()); - TestCast<int>("1.2", Nothing()); - TestCast<int>("1.23", Nothing()); - TestCast<int>("12.3", Nothing()); - TestCast<int>("123.4", Nothing()); + // floating point to int + TestCast<int>("0.1", Nothing()); + TestCast<int>("0.23", Nothing()); + TestCast<int>("1.2", Nothing()); + TestCast<int>("1.23", Nothing()); + TestCast<int>("12.3", Nothing()); + TestCast<int>("123.4", Nothing()); - // double - TestCast<double>("0.1", 0.1); - TestCast<double>("0.23", 0.23); - TestCast<double>("-1.23", -1.23); - TestCast<double>("12.3", 12.3); - TestCast<double>("123.4", 123.4); - TestCast<double>("1.23E20", 1.23E20); - TestCast<double>("1.23E-20", 1.23E-20); + // double + TestCast<double>("0.1", 0.1); + TestCast<double>("0.23", 0.23); + TestCast<double>("-1.23", -1.23); + TestCast<double>("12.3", 12.3); + TestCast<double>("123.4", 123.4); + TestCast<double>("1.23E20", 1.23E20); + TestCast<double>("1.23E-20", 1.23E-20); - // float - TestCast<float>("-0.1", -0.1f); - TestCast<float>("-0.23", -0.23f); - TestCast<float>("1.23", 1.23f); - TestCast<float>("-12.3", -12.3f); - TestCast<float>("-123.4", -123.4f); - TestCast<float>("-1.23E10", -1.23E10f); - TestCast<float>("-1.23E-10", -1.23E-10f); + // float + TestCast<float>("-0.1", -0.1f); + TestCast<float>("-0.23", -0.23f); + TestCast<float>("1.23", 1.23f); + TestCast<float>("-12.3", -12.3f); + TestCast<float>("-123.4", -123.4f); + TestCast<float>("-1.23E10", -1.23E10f); + TestCast<float>("-1.23E-10", -1.23E-10f); - // unsigned Max - TestCast<ui8> (ToString(Max<ui8>()), Max<ui8>()); - TestCast<ui16>(ToString(Max<ui16>()), Max<ui16>()); - TestCast<ui32>(ToString(Max<ui32>()), Max<ui32>()); - TestCast<ui64>(ToString(Max<ui64>()), Max<ui64>()); + // unsigned Max + TestCast<ui8>(ToString(Max<ui8>()), Max<ui8>()); + TestCast<ui16>(ToString(Max<ui16>()), Max<ui16>()); + TestCast<ui32>(ToString(Max<ui32>()), Max<ui32>()); + TestCast<ui64>(ToString(Max<ui64>()), Max<ui64>()); - // signed Max - TestCast<i8> (ToString(Max<i8>()), Max<i8>()); - TestCast<i16>(ToString(Max<i16>()), Max<i16>()); - TestCast<i32>(ToString(Max<i32>()), Max<i32>()); - TestCast<i64>(ToString(Max<i64>()), Max<i64>()); + // signed Max + TestCast<i8>(ToString(Max<i8>()), Max<i8>()); + TestCast<i16>(ToString(Max<i16>()), Max<i16>()); + TestCast<i32>(ToString(Max<i32>()), Max<i32>()); + TestCast<i64>(ToString(Max<i64>()), Max<i64>()); - // signed Min - TestCast<i8> (ToString(Min<i8>()), Min<i8>()); - TestCast<i16>(ToString(Min<i16>()), Min<i16>()); - TestCast<i32>(ToString(Min<i32>()), Min<i32>()); - TestCast<i64>(ToString(Min<i64>()), Min<i64>()); + // signed Min + TestCast<i8>(ToString(Min<i8>()), Min<i8>()); + TestCast<i16>(ToString(Min<i16>()), Min<i16>()); + TestCast<i32>(ToString(Min<i32>()), Min<i32>()); + TestCast<i64>(ToString(Min<i64>()), Min<i64>()); - // unsigned out of range - TestCast<ui8> (ToString(static_cast<ui64>(Max<ui8>()) + 1), Nothing()); - TestCast<ui16>(ToString(static_cast<ui64>(Max<ui16>()) + 1), Nothing()); - TestCast<ui32>(ToString(static_cast<ui64>(Max<ui32>()) + 1), Nothing()); + // unsigned out of range + TestCast<ui8>(ToString(static_cast<ui64>(Max<ui8>()) + 1), Nothing()); + TestCast<ui16>(ToString(static_cast<ui64>(Max<ui16>()) + 1), Nothing()); + TestCast<ui32>(ToString(static_cast<ui64>(Max<ui32>()) + 1), Nothing()); - // signed out of range (right) - TestCast<i8> (ToString(static_cast<i64>(Max<i8>()) + 1), Nothing()); - TestCast<i16>(ToString(static_cast<i64>(Max<i16>()) + 1), Nothing()); - TestCast<i32>(ToString(static_cast<i64>(Max<i32>()) + 1), Nothing()); + // signed out of range (right) + TestCast<i8>(ToString(static_cast<i64>(Max<i8>()) + 1), Nothing()); + TestCast<i16>(ToString(static_cast<i64>(Max<i16>()) + 1), Nothing()); + TestCast<i32>(ToString(static_cast<i64>(Max<i32>()) + 1), Nothing()); - // signed out of range (left) - TestCast<i8> (ToString(static_cast<i64>(Min<i8>()) - 1), Nothing()); - TestCast<i16>(ToString(static_cast<i64>(Min<i16>()) - 1), Nothing()); - TestCast<i32>(ToString(static_cast<i64>(Min<i32>()) - 1), Nothing()); + // signed out of range (left) + TestCast<i8>(ToString(static_cast<i64>(Min<i8>()) - 1), Nothing()); + TestCast<i16>(ToString(static_cast<i64>(Min<i16>()) - 1), Nothing()); + TestCast<i32>(ToString(static_cast<i64>(Min<i32>()) - 1), Nothing()); - // positive signed to unsigned - TestCast<ui8> (ToString(Max<i8>()), Max<i8>()); - TestCast<ui16>(ToString(Max<i16>()), Max<i16>()); - TestCast<ui32>(ToString(Max<i32>()), Max<i32>()); - TestCast<ui64>(ToString(Max<i64>()), Max<i64>()); + // positive signed to unsigned + TestCast<ui8>(ToString(Max<i8>()), Max<i8>()); + TestCast<ui16>(ToString(Max<i16>()), Max<i16>()); + TestCast<ui32>(ToString(Max<i32>()), Max<i32>()); + TestCast<ui64>(ToString(Max<i64>()), Max<i64>()); - // negative signed to unsigned - TestCast<ui8> (ToString(Min<i8>()), Nothing()); - TestCast<ui16>(ToString(Min<i16>()), Nothing()); - TestCast<ui32>(ToString(Min<i32>()), Nothing()); - TestCast<ui64>(ToString(Min<i64>()), Nothing()); + // negative signed to unsigned + TestCast<ui8>(ToString(Min<i8>()), Nothing()); + TestCast<ui16>(ToString(Min<i16>()), Nothing()); + TestCast<ui32>(ToString(Min<i32>()), Nothing()); + TestCast<ui64>(ToString(Min<i64>()), Nothing()); - // DyNumber limits - TestCast<ui64>("9.9999999999999999999999999999999999999E+125", Nothing()); - TestCast<ui64>("-9.9999999999999999999999999999999999999E+125", Nothing()); - TestCast<double>("1E-130", 1E-130); - TestCast<double>("-1E-130", -1E-130); - } + // DyNumber limits + TestCast<ui64>("9.9999999999999999999999999999999999999E+125", Nothing()); + TestCast<ui64>("-9.9999999999999999999999999999999999999E+125", Nothing()); + TestCast<double>("1E-130", 1E-130); + TestCast<double>("-1E-130", -1E-130); } +} // Y_UNIT_TEST_SUITE(TDyNumberTests) diff --git a/yql/essentials/types/dynumber/ut/ya.make b/yql/essentials/types/dynumber/ut/ya.make index b632325e5b4..196ee912d53 100644 --- a/yql/essentials/types/dynumber/ut/ya.make +++ b/yql/essentials/types/dynumber/ut/ya.make @@ -1,5 +1,7 @@ UNITTEST_FOR(yql/essentials/types/dynumber) +ENABLE(YQL_STYLE_CPP) + SRCS( dynumber_ut.cpp ) diff --git a/yql/essentials/types/dynumber/ya.make b/yql/essentials/types/dynumber/ya.make index 5d372e0c314..f58b6173376 100644 --- a/yql/essentials/types/dynumber/ya.make +++ b/yql/essentials/types/dynumber/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + PEERDIR( library/cpp/containers/stack_vector ) diff --git a/yql/essentials/types/uuid/uuid.cpp b/yql/essentials/types/uuid/uuid.cpp index 4a7f2bd406d..e735b56ac2c 100644 --- a/yql/essentials/types/uuid/uuid.cpp +++ b/yql/essentials/types/uuid/uuid.cpp @@ -8,8 +8,7 @@ namespace NUuid { static void WriteHexDigit(ui8 digit, IOutputStream& out) { if (digit <= 9) { out << char('0' + digit); - } - else { + } else { out << char('a' + digit - 10); } } @@ -77,6 +76,5 @@ void UuidHalfsToByteString(ui64 low, ui64 hi, IOutputStream& out) { out.Write(buf.Bytes, 16); } -} -} - +} // namespace NUuid +} // namespace NKikimr diff --git a/yql/essentials/types/uuid/uuid.h b/yql/essentials/types/uuid/uuid.h index 8f771d3753d..3fed9d383ce 100644 --- a/yql/essentials/types/uuid/uuid.h +++ b/yql/essentials/types/uuid/uuid.h @@ -23,20 +23,17 @@ inline bool GetDigit(char c, ui32& digit) { digit = 0; if ('0' <= c && c <= '9') { digit = c - '0'; - } - else if ('a' <= c && c <= 'f') { + } else if ('a' <= c && c <= 'f') { digit = c - 'a' + 10; - } - else if ('A' <= c && c <= 'F') { + } else if ('A' <= c && c <= 'F') { digit = c - 'A' + 10; - } - else { + } else { return false; // non-hex character } return true; } -template<typename T> +template <typename T> inline bool IsValidUuid(const T& buf) { if (buf.Size() != 36) { return false; @@ -57,7 +54,7 @@ inline bool IsValidUuid(const T& buf) { return true; } -template<typename T> +template <typename T> bool ParseUuidToArray(const T& buf, ui16* dw, bool shortForm) { if (buf.size() != (shortForm ? 32 : 36)) { return false; @@ -99,7 +96,7 @@ bool ParseUuidToArray(const T& buf, ui16* dw, bool shortForm) { return true; } -inline void UuidHalfsToBytes(char *dst, size_t dstSize, ui64 hi, ui64 low) { +inline void UuidHalfsToBytes(char* dst, size_t dstSize, ui64 hi, ui64 low) { union { char Bytes[UUID_LEN]; ui64 Half[2]; @@ -110,7 +107,7 @@ inline void UuidHalfsToBytes(char *dst, size_t dstSize, ui64 hi, ui64 low) { memcpy(dst, buf.Bytes, sizeof(buf)); } -inline void UuidBytesToHalfs(const char *str, size_t sz, ui64 &high, ui64 &low) { +inline void UuidBytesToHalfs(const char* str, size_t sz, ui64& high, ui64& low) { union { char Bytes[UUID_LEN]; ui64 Half[2]; @@ -121,5 +118,5 @@ inline void UuidBytesToHalfs(const char *str, size_t sz, ui64 &high, ui64 &low) high = buf.Half[1]; } -} -} +} // namespace NUuid +} // namespace NKikimr diff --git a/yql/essentials/types/uuid/ya.make b/yql/essentials/types/uuid/ya.make index 15cd576d234..1103b8e83d4 100644 --- a/yql/essentials/types/uuid/ya.make +++ b/yql/essentials/types/uuid/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( uuid.cpp ) diff --git a/yql/essentials/udfs/common/compress_base/compress_udf.cpp b/yql/essentials/udfs/common/compress_base/compress_udf.cpp index efd2d0b3c54..2323f0a082a 100644 --- a/yql/essentials/udfs/common/compress_base/compress_udf.cpp +++ b/yql/essentials/udfs/common/compress_base/compress_udf.cpp @@ -3,15 +3,15 @@ using namespace NYql::NUdf; namespace NCompress { - SIMPLE_MODULE(TCompressModule, EXPORTED_COMPRESS_BASE_UDF); -} +SIMPLE_MODULE(TCompressModule, EXPORTED_COMPRESS_BASE_UDF); +} // namespace NCompress namespace NDecompress { - SIMPLE_MODULE(TDecompressModule, EXPORTED_DECOMPRESS_BASE_UDF); -} +SIMPLE_MODULE(TDecompressModule, EXPORTED_DECOMPRESS_BASE_UDF); +} // namespace NDecompress namespace NTryDecompress { - SIMPLE_MODULE(TTryDecompressModule, EXPORTED_TRY_DECOMPRESS_BASE_UDF); -} +SIMPLE_MODULE(TTryDecompressModule, EXPORTED_TRY_DECOMPRESS_BASE_UDF); +} // namespace NTryDecompress REGISTER_MODULES(NCompress::TCompressModule, NDecompress::TDecompressModule, NTryDecompress::TTryDecompressModule); diff --git a/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp index 237abe271eb..c9b6f7eb890 100644 --- a/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp +++ b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp @@ -1 +1 @@ -#include "compress_base_udf.h"
\ No newline at end of file +#include "compress_base_udf.h" diff --git a/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h index 58709134d6a..9e655d96469 100644 --- a/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h +++ b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h @@ -16,203 +16,210 @@ using namespace NYql::NUdf; namespace NCompress { - SIMPLE_UDF(TGzip, char*(TAutoMap<char*>, ui8)) { - TString result; - TStringOutput output(result); - TZLibCompress compress(&output, ZLib::GZip, args[1].Get<ui8>()); - compress.Write(args[0].AsStringRef()); - compress.Finish(); - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TGzip, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TZLibCompress compress(&output, ZLib::GZip, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); +} - SIMPLE_UDF(TZlib, char*(TAutoMap<char*>, ui8)) { - TString result; - TStringOutput output(result); - TZLibCompress compress(&output, ZLib::ZLib, args[1].Get<ui8>()); - compress.Write(args[0].AsStringRef()); - compress.Finish(); - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TZlib, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TZLibCompress compress(&output, ZLib::ZLib, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); +} - SIMPLE_UDF(TBrotli, char*(TAutoMap<char*>, ui8)) { - TString result; - TStringOutput output(result); - TBrotliCompress compress(&output, args[1].Get<ui8>()); - compress.Write(args[0].AsStringRef()); - compress.Finish(); - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TBrotli, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TBrotliCompress compress(&output, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); +} - SIMPLE_UDF(TLzma, char*(TAutoMap<char*>, ui8)) { - TString result; - TStringOutput output(result); - TLzmaCompress compress(&output, args[1].Get<ui8>()); - compress.Write(args[0].AsStringRef()); - compress.Finish(); - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TLzma, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TLzmaCompress compress(&output, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); +} - SIMPLE_UDF(TBZip2, char*(TAutoMap<char*>, ui8)) { - TString result; - TStringOutput output(result); - TBZipCompress compress(&output, args[1].Get<ui8>()); - compress.Write(args[0].AsStringRef()); - compress.Finish(); - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TBZip2, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TBZipCompress compress(&output, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); +} - SIMPLE_UDF(TSnappy, char*(TAutoMap<char*>)) { - TString result; - const TStringRef& input = args[0].AsStringRef(); - snappy::Compress(input.Data(), input.Size(), &result); - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TSnappy, char*(TAutoMap<char*>)) { + TString result; + const TStringRef& input = args[0].AsStringRef(); + snappy::Compress(input.Data(), input.Size(), &result); + return valueBuilder->NewString(result); +} - SIMPLE_UDF(TZstd, char*(TAutoMap<char*>, ui8)) { - TString result; - TStringOutput output(result); - TZstdCompress compress(&output, args[1].Get<ui8>()); - compress.Write(args[0].AsStringRef()); - compress.Finish(); - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TZstd, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TZstdCompress compress(&output, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); } +} // namespace NCompress namespace NDecompress { - SIMPLE_UDF(TGzip, char*(TAutoMap<char*>)) { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TZLibDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } +SIMPLE_UDF(TGzip, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZLibDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} - SIMPLE_UDF(TZlib, char*(TAutoMap<char*>)) { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TZLibDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } +SIMPLE_UDF(TZlib, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZLibDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} - SIMPLE_UDF(TBrotli, char*(TAutoMap<char*>)) { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TBrotliDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } +SIMPLE_UDF(TBrotli, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TBrotliDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} - SIMPLE_UDF(TLzma, char*(TAutoMap<char*>)) { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TLzmaDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } +SIMPLE_UDF(TLzma, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TLzmaDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} - SIMPLE_UDF(TBZip2, char*(TAutoMap<char*>)) { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TBZipDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } +SIMPLE_UDF(TBZip2, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TBZipDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} - SIMPLE_UDF(TSnappy, char*(TAutoMap<char*>)) { - TString result; - const auto& value = args->AsStringRef(); - if (snappy::Uncompress(value.Data(), value.Size(), &result)) { - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TSnappy, char*(TAutoMap<char*>)) { + TString result; + const auto& value = args->AsStringRef(); + if (snappy::Uncompress(value.Data(), value.Size(), &result)) { + return valueBuilder->NewString(result); + } - ythrow yexception() << "failed to decompress message with snappy"; - } + ythrow yexception() << "failed to decompress message with snappy"; +} - SIMPLE_UDF(TZstd, char*(TAutoMap<char*>)) { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TZstdDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } +SIMPLE_UDF(TZstd, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZstdDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} - SIMPLE_UDF(TXz, char*(TAutoMap<char*>)) { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TXzDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } +SIMPLE_UDF(TXz, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TXzDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); } +} // namespace NDecompress namespace NTryDecompress { - SIMPLE_UDF(TGzip, TOptional<char*>(TAutoMap<char*>)) try { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TZLibDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } catch (const std::exception&) { - return TUnboxedValuePod(); - } +SIMPLE_UDF(TGzip, TOptional<char*>(TAutoMap<char*>)) +try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZLibDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} catch (const std::exception&) { + return TUnboxedValuePod(); +} - SIMPLE_UDF(TZlib, TOptional<char*>(TAutoMap<char*>)) try { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TZLibDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } catch (const std::exception&) { - return TUnboxedValuePod(); - } +SIMPLE_UDF(TZlib, TOptional<char*>(TAutoMap<char*>)) +try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZLibDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} catch (const std::exception&) { + return TUnboxedValuePod(); +} - SIMPLE_UDF(TBrotli, TOptional<char*>(TAutoMap<char*>)) try { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TBrotliDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } catch (const std::exception&) { - return TUnboxedValuePod(); - } +SIMPLE_UDF(TBrotli, TOptional<char*>(TAutoMap<char*>)) +try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TBrotliDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} catch (const std::exception&) { + return TUnboxedValuePod(); +} - SIMPLE_UDF(TLzma, TOptional<char*>(TAutoMap<char*>)) try { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TLzmaDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } catch (const std::exception&) { - return TUnboxedValuePod(); - } +SIMPLE_UDF(TLzma, TOptional<char*>(TAutoMap<char*>)) +try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TLzmaDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} catch (const std::exception&) { + return TUnboxedValuePod(); +} - SIMPLE_UDF(TBZip2, TOptional<char*>(TAutoMap<char*>)) try { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TBZipDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } catch (const std::exception&) { - return TUnboxedValuePod(); - } +SIMPLE_UDF(TBZip2, TOptional<char*>(TAutoMap<char*>)) +try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TBZipDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} catch (const std::exception&) { + return TUnboxedValuePod(); +} - SIMPLE_UDF(TSnappy, TOptional<char*>(TAutoMap<char*>)) { - TString result; - const auto& value = args->AsStringRef(); - if (snappy::Uncompress(value.Data(), value.Size(), &result)) { - return valueBuilder->NewString(result); - } - return TUnboxedValuePod(); +SIMPLE_UDF(TSnappy, TOptional<char*>(TAutoMap<char*>)) { + TString result; + const auto& value = args->AsStringRef(); + if (snappy::Uncompress(value.Data(), value.Size(), &result)) { + return valueBuilder->NewString(result); } + return TUnboxedValuePod(); +} - SIMPLE_UDF(TZstd, TOptional<char*>(TAutoMap<char*>)) try { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TZstdDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } catch (const std::exception&) { - return TUnboxedValuePod(); - } +SIMPLE_UDF(TZstd, TOptional<char*>(TAutoMap<char*>)) +try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZstdDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} catch (const std::exception&) { + return TUnboxedValuePod(); +} - SIMPLE_UDF(TXz, TOptional<char*>(TAutoMap<char*>)) try { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TXzDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } catch (const std::exception&) { - return TUnboxedValuePod(); - } +SIMPLE_UDF(TXz, TOptional<char*>(TAutoMap<char*>)) +try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TXzDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} catch (const std::exception&) { + return TUnboxedValuePod(); } +} // namespace NTryDecompress -#define EXPORTED_COMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd -#define EXPORTED_DECOMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd, TXz +#define EXPORTED_COMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd +#define EXPORTED_DECOMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd, TXz #define EXPORTED_TRY_DECOMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd, TXz diff --git a/yql/essentials/udfs/common/compress_base/lib/ya.make b/yql/essentials/udfs/common/compress_base/lib/ya.make index ca606d244a0..0d03f21c597 100644 --- a/yql/essentials/udfs/common/compress_base/lib/ya.make +++ b/yql/essentials/udfs/common/compress_base/lib/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( compress_base_udf.cpp ) diff --git a/yql/essentials/udfs/common/compress_base/ya.make b/yql/essentials/udfs/common/compress_base/ya.make index 4859a4e53cd..93861b3d98b 100644 --- a/yql/essentials/udfs/common/compress_base/ya.make +++ b/yql/essentials/udfs/common/compress_base/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( compress_udf.cpp ) diff --git a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp index acc42ad2cbe..f6ae912cb79 100644 --- a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp +++ b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp @@ -61,25 +61,25 @@ extern const char TM64ResourceName[] = "DateTime2.TM64"; namespace { -template<typename Type> +template <typename Type> static void PrintTypeAlternatives(NUdf::IFunctionTypeInfoBuilder& builder, - ITypeInfoHelper::TPtr typeInfoHelper, TStringBuilder& strBuilder) + ITypeInfoHelper::TPtr typeInfoHelper, TStringBuilder& strBuilder) { TTypePrinter(*typeInfoHelper, builder.SimpleType<Type>()).Out(strBuilder.Out); } -template<typename Type, typename Head, typename... Tail> +template <typename Type, typename Head, typename... Tail> static void PrintTypeAlternatives(NUdf::IFunctionTypeInfoBuilder& builder, - ITypeInfoHelper::TPtr typeInfoHelper, TStringBuilder& strBuilder) + ITypeInfoHelper::TPtr typeInfoHelper, TStringBuilder& strBuilder) { PrintTypeAlternatives<Type>(builder, typeInfoHelper, strBuilder); strBuilder << " or "; PrintTypeAlternatives<Head, Tail...>(builder, typeInfoHelper, strBuilder); } -template<typename... Types> +template <typename... Types> static void SetInvalidTypeError(NUdf::IFunctionTypeInfoBuilder& builder, - ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType) + ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType) { ::TStringBuilder sb; sb << "Invalid argument type: got "; @@ -91,26 +91,25 @@ static void SetInvalidTypeError(NUdf::IFunctionTypeInfoBuilder& builder, } static void SetResourceExpectedError(NUdf::IFunctionTypeInfoBuilder& builder, - ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType) + ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType) { SetInvalidTypeError< TResource<TMResourceName>, - TResource<TM64ResourceName> - >(builder, typeInfoHelper, argType); + TResource<TM64ResourceName>>(builder, typeInfoHelper, argType); } static void SetIntervalExpectedError(NUdf::IFunctionTypeInfoBuilder& builder, - ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType) + ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType) { SetInvalidTypeError<TInterval, TInterval64>(builder, typeInfoHelper, argType); } -template<const char* TResourceName> +template <const char* TResourceName> static void PrintTagAlternatives(TStringBuilder& strBuilder) { strBuilder << "'" << TResourceName << "'"; } -template<const char* TResourceName, const char* Head, const char*... Tail> +template <const char* TResourceName, const char* Head, const char*... Tail> static void PrintTagAlternatives(TStringBuilder& strBuilder) { PrintTagAlternatives<TResourceName>(strBuilder); strBuilder << " or "; @@ -118,7 +117,7 @@ static void PrintTagAlternatives(TStringBuilder& strBuilder) { } static void SetUnexpectedTagError(NUdf::IFunctionTypeInfoBuilder& builder, - TStringRef tag) + TStringRef tag) { ::TStringBuilder sb; sb << "Unexpected Resource tag: got '" << tag << "', but "; @@ -153,20 +152,20 @@ public: return value * i64(86400) * TWResult(ScaleAfterSeconds); } - template<typename TTzDate> + template <typename TTzDate> static TResult TzBlockCore(TBlockItem tzDate); - template<> + template <> static TResult TzBlockCore<TTzDate>(TBlockItem tzDate) { return DateCore(tzDate.Get<ui16>()); } - template<> + template <> static TResult TzBlockCore<TTzDatetime>(TBlockItem tzDate) { return DatetimeCore(tzDate.Get<ui32>()); } - template<> + template <> static TResult TzBlockCore<TTzTimestamp>(TBlockItem tzDate) { return TimestampCore(tzDate.Get<ui64>()); } @@ -200,7 +199,7 @@ public: return name; } - template<typename TTzDate, typename TOutput> + template <typename TTzDate, typename TOutput> static auto MakeTzBlockExec() { using TReader = TTzDateBlockReader<TTzDate, /*Nullable*/ false>; return UnaryPreallocatedReaderExecImpl<TReader, TOutput, TzBlockCore<TTzDate>>; @@ -253,7 +252,6 @@ public: isOptional = true; } - TDataTypeInspector data(*typeInfoHelper, argType); if (!data) { builder.SetError("Data type expected"); @@ -301,11 +299,11 @@ public: if (typeId == TDataType<TDate>::Id || typeId == TDataType<TTzDate>::Id) { if (block) { const auto exec = (typeId == TDataType<TTzDate>::Id) - ? MakeTzBlockExec<TTzDate, TResult>() - : UnaryPreallocatedExecImpl<ui16, TResult, DateCore>; + ? MakeTzBlockExec<TTzDate, TResult>() + : UnaryPreallocatedExecImpl<ui16, TResult, DateCore>; builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); } else { builder.Implementation(new TUnaryOverOptionalImpl<ui16, TResult, DateCore>()); } @@ -315,11 +313,11 @@ public: if (typeId == TDataType<TDatetime>::Id || typeId == TDataType<TTzDatetime>::Id) { if (block) { const auto exec = (typeId == TDataType<TTzDatetime>::Id) - ? MakeTzBlockExec<TTzDatetime, TResult>() - : UnaryPreallocatedExecImpl<ui32, TResult, DatetimeCore>; + ? MakeTzBlockExec<TTzDatetime, TResult>() + : UnaryPreallocatedExecImpl<ui32, TResult, DatetimeCore>; builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); } else { builder.Implementation(new TUnaryOverOptionalImpl<ui32, TResult, DatetimeCore>()); } @@ -329,11 +327,11 @@ public: if (typeId == TDataType<TTimestamp>::Id || typeId == TDataType<TTzTimestamp>::Id) { if (block) { const auto exec = (typeId == TDataType<TTzTimestamp>::Id) - ? MakeTzBlockExec<TTzTimestamp, TResult>() - : UnaryPreallocatedExecImpl<ui64, TResult, TimestampCore>; + ? MakeTzBlockExec<TTzTimestamp, TResult>() + : UnaryPreallocatedExecImpl<ui64, TResult, TimestampCore>; builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); } else { builder.Implementation(new TUnaryOverOptionalImpl<ui64, TResult, TimestampCore>()); } @@ -343,7 +341,7 @@ public: if (typeId == TDataType<TInterval>::Id) { if (block) { builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - UnaryPreallocatedExecImpl<i64, TSignedResult, IntervalCore>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + UnaryPreallocatedExecImpl<i64, TSignedResult, IntervalCore>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); } else { builder.Implementation(new TUnaryOverOptionalImpl<i64, TSignedResult, IntervalCore>()); } @@ -485,7 +483,7 @@ struct TGetTimeComponent { if (typeId == TDataType<TDate>::Id) { if (block) { builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - UnaryPreallocatedExecImpl<ui16, TFieldStorage, Core<ui16, true, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + UnaryPreallocatedExecImpl<ui16, TFieldStorage, Core<ui16, true, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); } else { builder.Implementation(new TUnaryOverOptionalImpl<ui16, TFieldStorage, Core<ui16, true, false>>()); } @@ -494,7 +492,7 @@ struct TGetTimeComponent { if (typeId == TDataType<TDatetime>::Id) { if (block) { builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - UnaryPreallocatedExecImpl<ui32, TFieldStorage, Core<ui32, false, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + UnaryPreallocatedExecImpl<ui32, TFieldStorage, Core<ui32, false, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); } else { builder.Implementation(new TUnaryOverOptionalImpl<ui32, TFieldStorage, Core<ui32, false, false>>()); } @@ -503,7 +501,7 @@ struct TGetTimeComponent { if (typeId == TDataType<TTimestamp>::Id) { if (block) { builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - UnaryPreallocatedExecImpl<ui64, TFieldStorage, Core<ui64, false, true>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + UnaryPreallocatedExecImpl<ui64, TFieldStorage, Core<ui64, false, true>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); } else { builder.Implementation(new TUnaryOverOptionalImpl<ui64, TFieldStorage, Core<ui64, false, true>>()); } @@ -531,6 +529,7 @@ struct TGetTimeComponent { SetUnexpectedTagError(builder, resource.GetTag()); return true; } + private: template <typename TInput, bool AlwaysZero, bool InputFractional> static TFieldStorage Core(TInput val) { @@ -553,8 +552,8 @@ private: } } - template<typename TResult, TResult (*Func)(const TUnboxedValuePod&)> - class TImpl : public TBoxedValue { + template <typename TResult, TResult (*Func)(const TUnboxedValuePod&)> + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { Y_UNUSED(valueBuilder); @@ -563,7 +562,7 @@ private: } }; - template<typename TResult, const char* TResourceName, TResult (*Func)(const TUnboxedValuePod&)> + template <typename TResult, const char* TResourceName, TResult (*Func)(const TUnboxedValuePod&)> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<TResult>(); builder.Args()->Add<TAutoMap<TResource<TResourceName>>>(); @@ -576,21 +575,21 @@ private: namespace { -template<const char* TResourceName, typename TValue, - typename TStorage = std::conditional_t<TResourceName == TMResourceName, - TTMStorage, TTM64Storage>> +template <const char* TResourceName, typename TValue, + typename TStorage = std::conditional_t<TResourceName == TMResourceName, + TTMStorage, TTM64Storage>> const TStorage& Reference(const TValue& value) { return *reinterpret_cast<const TStorage*>(value.GetRawPtr()); } -template<const char* TResourceName, typename TValue, - typename TStorage = std::conditional_t<TResourceName == TMResourceName, - TTMStorage, TTM64Storage>> +template <const char* TResourceName, typename TValue, + typename TStorage = std::conditional_t<TResourceName == TMResourceName, + TTMStorage, TTM64Storage>> TStorage& Reference(TValue& value) { return *reinterpret_cast<TStorage*>(value.GetRawPtr()); } -template<const char* TResourceName> +template <const char* TResourceName> TUnboxedValuePod DoAddMonths(const TUnboxedValuePod& date, i64 months, const NUdf::IDateBuilder& builder) { auto result = date; auto& storage = Reference<TResourceName>(result); @@ -600,12 +599,12 @@ TUnboxedValuePod DoAddMonths(const TUnboxedValuePod& date, i64 months, const NUd return result; } -template<const char* TResourceName> +template <const char* TResourceName> TUnboxedValuePod DoAddQuarters(const TUnboxedValuePod& date, i64 quarters, const NUdf::IDateBuilder& builder) { return DoAddMonths<TResourceName>(date, quarters * 3ll, builder); } -template<const char* TResourceName> +template <const char* TResourceName> TUnboxedValuePod DoAddYears(const TUnboxedValuePod& date, i64 years, const NUdf::IDateBuilder& builder) { auto result = date; auto& storage = Reference<TResourceName>(result); @@ -615,681 +614,671 @@ TUnboxedValuePod DoAddYears(const TUnboxedValuePod& date, i64 years, const NUdf: return result; } -#define ACCESSORS_POLY(field, type, wtype) \ - template<const char* TResourceName, typename TValue, typename TRetType \ - = std::conditional_t<TResourceName == TMResourceName, type, wtype>> \ - inline TRetType Get##field(const TValue& tm) { \ - return (TRetType)Reference<TResourceName>(tm).field; \ - } \ - template<const char* TResourceName, typename TValue, typename TArgType \ - = std::conditional_t<TResourceName == TMResourceName, type, wtype>> \ - inline void Set##field(TValue& tm, TArgType value) { \ - Reference<TResourceName>(tm).field = value; \ - } \ +#define ACCESSORS_POLY(field, type, wtype) \ + template <const char* TResourceName, typename TValue, typename TRetType = std::conditional_t<TResourceName == TMResourceName, type, wtype>> \ + inline TRetType Get##field(const TValue& tm) { \ + return (TRetType)Reference<TResourceName>(tm).field; \ + } \ + template <const char* TResourceName, typename TValue, typename TArgType = std::conditional_t<TResourceName == TMResourceName, type, wtype>> \ + inline void Set##field(TValue& tm, TArgType value) { \ + Reference<TResourceName>(tm).field = value; \ + } #define ACCESSORS(field, type) \ ACCESSORS_POLY(field, type, type) - ACCESSORS_POLY(Year, ui16, i32) - ACCESSORS(DayOfYear, ui16) - ACCESSORS(WeekOfYear, ui8) - ACCESSORS(WeekOfYearIso8601, ui8) - ACCESSORS(DayOfWeek, ui8) - ACCESSORS(Month, ui8) - ACCESSORS(Day, ui8) - ACCESSORS(Hour, ui8) - ACCESSORS(Minute, ui8) - ACCESSORS(Second, ui8) - ACCESSORS(Microsecond, ui32) - ACCESSORS(TimezoneId, ui16) +ACCESSORS_POLY(Year, ui16, i32) +ACCESSORS(DayOfYear, ui16) +ACCESSORS(WeekOfYear, ui8) +ACCESSORS(WeekOfYearIso8601, ui8) +ACCESSORS(DayOfWeek, ui8) +ACCESSORS(Month, ui8) +ACCESSORS(Day, ui8) +ACCESSORS(Hour, ui8) +ACCESSORS(Minute, ui8) +ACCESSORS(Second, ui8) +ACCESSORS(Microsecond, ui32) +ACCESSORS(TimezoneId, ui16) #undef ACCESSORS #undef ACCESSORS_POLY - template<const char* TResourceName> - inline bool ValidateYear(std::conditional_t<TResourceName == TMResourceName, ui16, i32> year) { - if constexpr (TResourceName == TMResourceName) { - return year >= NUdf::MIN_YEAR || year < NUdf::MAX_YEAR; - } else { - return year >= NUdf::MIN_YEAR32 || year < NUdf::MAX_YEAR32; - } +template <const char* TResourceName> +inline bool ValidateYear(std::conditional_t<TResourceName == TMResourceName, ui16, i32> year) { + if constexpr (TResourceName == TMResourceName) { + return year >= NUdf::MIN_YEAR || year < NUdf::MAX_YEAR; + } else { + return year >= NUdf::MIN_YEAR32 || year < NUdf::MAX_YEAR32; } +} - inline bool ValidateMonth(ui8 month) { - return month >= 1 && month <= 12; - } +inline bool ValidateMonth(ui8 month) { + return month >= 1 && month <= 12; +} - inline bool ValidateDay(ui8 day) { - return day >= 1 && day <= 31; - } +inline bool ValidateDay(ui8 day) { + return day >= 1 && day <= 31; +} - inline bool ValidateHour(ui8 hour) { - return hour < 24; - } +inline bool ValidateHour(ui8 hour) { + return hour < 24; +} - inline bool ValidateMinute(ui8 minute) { - return minute < 60; - } +inline bool ValidateMinute(ui8 minute) { + return minute < 60; +} - inline bool ValidateSecond(ui8 second) { - return second < 60; - } +inline bool ValidateSecond(ui8 second) { + return second < 60; +} - inline bool ValidateMicrosecond(ui32 microsecond) { - return microsecond < 1000000; - } +inline bool ValidateMicrosecond(ui32 microsecond) { + return microsecond < 1000000; +} - inline bool ValidateTimezoneId(ui16 timezoneId) { - const auto& zones = NTi::GetTimezones(); - return timezoneId < zones.size() && !zones[timezoneId].empty(); - } +inline bool ValidateTimezoneId(ui16 timezoneId) { + const auto& zones = NTi::GetTimezones(); + return timezoneId < zones.size() && !zones[timezoneId].empty(); +} - inline bool ValidateMonthShortName(const std::string_view& monthName, ui8& month) { - static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) { - int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size())); - if (cmp == 0) - return a.size() < b.size(); - return cmp < 0; - }; - static const std::map<std::string_view, ui8, decltype(cmp)> mp = { - {"jan", 1}, - {"feb", 2}, - {"mar", 3}, - {"apr", 4}, - {"may", 5}, - {"jun", 6}, - {"jul", 7}, - {"aug", 8}, - {"sep", 9}, - {"oct", 10}, - {"nov", 11}, - {"dec", 12} - }; - const auto& it = mp.find(monthName); - if (it != mp.end()) { - month = it -> second; - return true; +inline bool ValidateMonthShortName(const std::string_view& monthName, ui8& month) { + static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) { + int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size())); + if (cmp == 0) { + return a.size() < b.size(); } - return false; + return cmp < 0; + }; + static const std::map<std::string_view, ui8, decltype(cmp)> mp = { + {"jan", 1}, + {"feb", 2}, + {"mar", 3}, + {"apr", 4}, + {"may", 5}, + {"jun", 6}, + {"jul", 7}, + {"aug", 8}, + {"sep", 9}, + {"oct", 10}, + {"nov", 11}, + {"dec", 12}}; + const auto& it = mp.find(monthName); + if (it != mp.end()) { + month = it->second; + return true; } + return false; +} - inline bool ValidateMonthFullName(const std::string_view& monthName, ui8& month) { - static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) { - int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size())); - if (cmp == 0) - return a.size() < b.size(); - return cmp < 0; - }; - static const std::map<std::string_view, ui8, decltype(cmp)> mp = { - {"january", 1}, - {"february", 2}, - {"march", 3}, - {"april", 4}, - {"may", 5}, - {"june", 6}, - {"july", 7}, - {"august", 8}, - {"september", 9}, - {"october", 10}, - {"november", 11}, - {"december", 12} - }; - const auto& it = mp.find(monthName); - if (it != mp.end()) { - month = it -> second; - return true; +inline bool ValidateMonthFullName(const std::string_view& monthName, ui8& month) { + static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) { + int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size())); + if (cmp == 0) { + return a.size() < b.size(); } - return false; + return cmp < 0; + }; + static const std::map<std::string_view, ui8, decltype(cmp)> mp = { + {"january", 1}, + {"february", 2}, + {"march", 3}, + {"april", 4}, + {"may", 5}, + {"june", 6}, + {"july", 7}, + {"august", 8}, + {"september", 9}, + {"october", 10}, + {"november", 11}, + {"december", 12}}; + const auto& it = mp.find(monthName); + if (it != mp.end()) { + month = it->second; + return true; } + return false; +} - template<typename TType> - inline bool Validate(typename TDataType<TType>::TLayout arg); +template <typename TType> +inline bool Validate(typename TDataType<TType>::TLayout arg); - template<> - inline bool Validate<TTimestamp>(ui64 timestamp) { - return timestamp < MAX_TIMESTAMP; - } +template <> +inline bool Validate<TTimestamp>(ui64 timestamp) { + return timestamp < MAX_TIMESTAMP; +} - template<> - inline bool Validate<TTimestamp64>(i64 timestamp) { - return timestamp >= MIN_TIMESTAMP64 && timestamp <= MAX_TIMESTAMP64; - } +template <> +inline bool Validate<TTimestamp64>(i64 timestamp) { + return timestamp >= MIN_TIMESTAMP64 && timestamp <= MAX_TIMESTAMP64; +} - template<> - inline bool Validate<TInterval>(i64 interval) { - return interval > -i64(MAX_TIMESTAMP) && interval < i64(MAX_TIMESTAMP); - } +template <> +inline bool Validate<TInterval>(i64 interval) { + return interval > -i64(MAX_TIMESTAMP) && interval < i64(MAX_TIMESTAMP); +} - template<> - inline bool Validate<TInterval64>(i64 interval) { - return interval >= -MAX_INTERVAL64 && interval <= MAX_INTERVAL64; - } +template <> +inline bool Validate<TInterval64>(i64 interval) { + return interval >= -MAX_INTERVAL64 && interval <= MAX_INTERVAL64; +} - // Split +// Split - template<typename TUserDataType, bool Nullable> - using TSplitArgReader = std::conditional_t<TTzDataType<TUserDataType>::Result, - TTzDateBlockReader<TUserDataType, Nullable>, - TFixedSizeBlockReader<typename TDataType<TUserDataType>::TLayout, Nullable>>; +template <typename TUserDataType, bool Nullable> +using TSplitArgReader = std::conditional_t<TTzDataType<TUserDataType>::Result, + TTzDateBlockReader<TUserDataType, Nullable>, + TFixedSizeBlockReader<typename TDataType<TUserDataType>::TLayout, Nullable>>; - template<typename TUserDataType> - struct TSplitKernelExec : TUnaryKernelExec<TSplitKernelExec<TUserDataType>, TSplitArgReader<TUserDataType, false>, TResourceArrayBuilder<false>> { - static void Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& valueBuilder); +template <typename TUserDataType> +struct TSplitKernelExec: TUnaryKernelExec<TSplitKernelExec<TUserDataType>, TSplitArgReader<TUserDataType, false>, TResourceArrayBuilder<false>> { + static void Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& valueBuilder); - template<typename TSink> - static void Process(const IValueBuilder* valueBuilder, TBlockItem arg, const TSink& sink) { - try { - TBlockItem res {0}; - Split(arg, Reference<TMResourceName>(res), *valueBuilder); - sink(res); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << e.what()).c_str()); - } + template <typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem arg, const TSink& sink) { + try { + TBlockItem res{0}; + Split(arg, Reference<TMResourceName>(res), *valueBuilder); + sink(res); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << e.what()).c_str()); } - }; + } +}; - template <typename TUserDataType> - class TSplit : public TBoxedValue { - const TSourcePosition Pos_; +template <typename TUserDataType> +class TSplit: public TBoxedValue { + const TSourcePosition Pos_; - public: - explicit TSplit(TSourcePosition pos) - : Pos_(pos) - {} +public: + explicit TSplit(TSourcePosition pos) + : Pos_(pos) + { + } - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override; + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; - static bool DeclareSignature( - TStringRef name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) - { - const auto typeInfoHelper = builder.TypeInfoHelper(); + static bool DeclareSignature( + TStringRef name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + const auto typeInfoHelper = builder.TypeInfoHelper(); - TTupleTypeInspector tuple(*typeInfoHelper, userType); - Y_ENSURE(tuple); - Y_ENSURE(tuple.GetElementsCount() > 0); - TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); - Y_ENSURE(argsTuple); + TTupleTypeInspector tuple(*typeInfoHelper, userType); + Y_ENSURE(tuple); + Y_ENSURE(tuple.GetElementsCount() > 0); + TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); + Y_ENSURE(argsTuple); - if (argsTuple.GetElementsCount() != 1) { - builder.SetError("Expected one argument"); - return true; - } - auto argType = argsTuple.GetElementType(0); + if (argsTuple.GetElementsCount() != 1) { + builder.SetError("Expected one argument"); + return true; + } + auto argType = argsTuple.GetElementType(0); - builder.UserType(userType); - builder.SupportsBlocks(); - builder.IsStrict(); + builder.UserType(userType); + builder.SupportsBlocks(); + builder.IsStrict(); - TBlockTypeInspector block(*typeInfoHelper, argType); - if (block) { - const auto* blockArgType = builder.Block(false)->Item<TUserDataType>().Build(); - builder.Args()->Add(blockArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap); - const auto* retType = builder.Resource(TMResourceName); - const auto* blockRetType = builder.Block(false)->Item(retType).Build(); - builder.Returns(blockRetType); + TBlockTypeInspector block(*typeInfoHelper, argType); + if (block) { + const auto* blockArgType = builder.Block(false)->Item<TUserDataType>().Build(); + builder.Args()->Add(blockArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap); + const auto* retType = builder.Resource(TMResourceName); + const auto* blockRetType = builder.Block(false)->Item(retType).Build(); + builder.Returns(blockRetType); - if (!typesOnly) { - builder.Implementation(new TSimpleArrowUdfImpl({blockArgType}, retType, block.IsScalar(), - TSplitKernelExec<TUserDataType>::Do, builder, TString(name), arrow::compute::NullHandling::COMPUTED_NO_PREALLOCATE)); - } + if (!typesOnly) { + builder.Implementation(new TSimpleArrowUdfImpl({blockArgType}, retType, block.IsScalar(), + TSplitKernelExec<TUserDataType>::Do, builder, TString(name), arrow::compute::NullHandling::COMPUTED_NO_PREALLOCATE)); + } + } else { + builder.Args()->Add<TUserDataType>().Flags(ICallablePayload::TArgumentFlags::AutoMap); + if constexpr (NUdf::TDataType<TUserDataType>::Features & NYql::NUdf::ExtDateType) { + builder.Returns(builder.Resource(TM64ResourceName)); } else { - builder.Args()->Add<TUserDataType>().Flags(ICallablePayload::TArgumentFlags::AutoMap); - if constexpr (NUdf::TDataType<TUserDataType>::Features & NYql::NUdf::ExtDateType) { - builder.Returns(builder.Resource(TM64ResourceName)); - } else { - builder.Returns(builder.Resource(TMResourceName)); - } - - if (!typesOnly) { - builder.Implementation(new TSplit<TUserDataType>(builder.GetSourcePosition())); - } + builder.Returns(builder.Resource(TMResourceName)); } - return true; + if (!typesOnly) { + builder.Implementation(new TSplit<TUserDataType>(builder.GetSourcePosition())); + } } - }; - - template <> - void TSplitKernelExec<TDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { - storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>()); - } - template <> - void TSplitKernelExec<TDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { - storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>()); + return true; } +}; - template <> - void TSplitKernelExec<TTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { - storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>()); - } +template <> +void TSplitKernelExec<TDate>::Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& builder) { + storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>()); +} - template <> - void TSplitKernelExec<TTzDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { - storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>(), arg.GetTimezoneId()); - } +template <> +void TSplitKernelExec<TDatetime>::Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& builder) { + storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>()); +} - template <> - void TSplitKernelExec<TTzDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { - storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>(), arg.GetTimezoneId()); - } +template <> +void TSplitKernelExec<TTimestamp>::Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& builder) { + storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>()); +} - template <> - void TSplitKernelExec<TTzTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { - storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>(), arg.GetTimezoneId()); - } +template <> +void TSplitKernelExec<TTzDate>::Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& builder) { + storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>(), arg.GetTimezoneId()); +} - template <> - void TSplitKernelExec<TDate32>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { - ythrow yexception() << "Not implemented"; - } +template <> +void TSplitKernelExec<TTzDatetime>::Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& builder) { + storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>(), arg.GetTimezoneId()); +} - template <> - void TSplitKernelExec<TDatetime64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { - ythrow yexception() << "Not implemented"; - } +template <> +void TSplitKernelExec<TTzTimestamp>::Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& builder) { + storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>(), arg.GetTimezoneId()); +} - template <> - void TSplitKernelExec<TTimestamp64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { - ythrow yexception() << "Not implemented"; - } +template <> +void TSplitKernelExec<TDate32>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; +} - template <> - void TSplitKernelExec<TTzDate32>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { - ythrow yexception() << "Not implemented"; - } +template <> +void TSplitKernelExec<TDatetime64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; +} - template <> - void TSplitKernelExec<TTzDatetime64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { - ythrow yexception() << "Not implemented"; - } +template <> +void TSplitKernelExec<TTimestamp64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; +} - template <> - void TSplitKernelExec<TTzTimestamp64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { - ythrow yexception() << "Not implemented"; - } +template <> +void TSplitKernelExec<TTzDate32>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; +} - template <> - TUnboxedValue TSplit<TDate>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +void TSplitKernelExec<TTzDatetime64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; +} - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TMResourceName>(result); - storage.FromDate(builder, args[0].Get<ui16>()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - } +template <> +void TSplitKernelExec<TTzTimestamp64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; +} - template <> - TUnboxedValue TSplit<TDate32>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TDate>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - TUnboxedValuePod result(0); - auto& storage = Reference<TM64ResourceName>(result); - storage.FromDate32(valueBuilder->GetDateBuilder(), args[0].Get<i32>()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TMResourceName>(result); + storage.FromDate(builder, args[0].Get<ui16>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TDatetime>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TDate32>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TMResourceName>(result); - storage.FromDatetime(builder, args[0].Get<ui32>()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + TUnboxedValuePod result(0); + auto& storage = Reference<TM64ResourceName>(result); + storage.FromDate32(valueBuilder->GetDateBuilder(), args[0].Get<i32>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TDatetime64>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TDatetime>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - TUnboxedValuePod result(0); - auto& storage = Reference<TM64ResourceName>(result); - storage.FromDatetime64(valueBuilder->GetDateBuilder(), args[0].Get<i64>()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TMResourceName>(result); + storage.FromDatetime(builder, args[0].Get<ui32>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TTimestamp>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TDatetime64>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TMResourceName>(result); - storage.FromTimestamp(builder, args[0].Get<ui64>()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + TUnboxedValuePod result(0); + auto& storage = Reference<TM64ResourceName>(result); + storage.FromDatetime64(valueBuilder->GetDateBuilder(), args[0].Get<i64>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TTimestamp64>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TTimestamp>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - TUnboxedValuePod result(0); - auto& storage = Reference<TM64ResourceName>(result); - storage.FromTimestamp64(valueBuilder->GetDateBuilder(), args[0].Get<i64>()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TMResourceName>(result); + storage.FromTimestamp(builder, args[0].Get<ui64>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TTzDate>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TTimestamp64>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TMResourceName>(result); - storage.FromDate(builder, args[0].Get<ui16>(), args[0].GetTimezoneId()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + TUnboxedValuePod result(0); + auto& storage = Reference<TM64ResourceName>(result); + storage.FromTimestamp64(valueBuilder->GetDateBuilder(), args[0].Get<i64>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TTzDate32>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TTzDate>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TM64ResourceName>(result); - storage.FromDate32(builder, args[0].Get<i32>(), args[0].GetTimezoneId()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TMResourceName>(result); + storage.FromDate(builder, args[0].Get<ui16>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} +template <> +TUnboxedValue TSplit<TTzDate32>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - template <> - TUnboxedValue TSplit<TTzDatetime>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); - - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TMResourceName>(result); - storage.FromDatetime(builder, args[0].Get<ui32>(), args[0].GetTimezoneId()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TM64ResourceName>(result); + storage.FromDate32(builder, args[0].Get<i32>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TTzDatetime64>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TTzDatetime>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TM64ResourceName>(result); - storage.FromDatetime64(builder, args[0].Get<i64>(), args[0].GetTimezoneId()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TMResourceName>(result); + storage.FromDatetime(builder, args[0].Get<ui32>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} +template <> +TUnboxedValue TSplit<TTzDatetime64>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - template <> - TUnboxedValue TSplit<TTzTimestamp>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); - - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TMResourceName>(result); - storage.FromTimestamp(builder, args[0].Get<ui64>(), args[0].GetTimezoneId()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TM64ResourceName>(result); + storage.FromDatetime64(builder, args[0].Get<i64>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TTzTimestamp64>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TTzTimestamp>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TM64ResourceName>(result); - storage.FromTimestamp64(builder, args[0].Get<i64>(), args[0].GetTimezoneId()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TMResourceName>(result); + storage.FromTimestamp(builder, args[0].Get<ui64>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - // Make* +template <> +TUnboxedValue TSplit<TTzTimestamp64>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - template<typename TUserDataType, bool Nullable> - using TMakeResBuilder = std::conditional_t<TTzDataType<TUserDataType>::Result, - TTzDateArrayBuilder<TUserDataType, Nullable>, - TFixedSizeArrayBuilder<typename TDataType<TUserDataType>::TLayout, Nullable>>; + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TM64ResourceName>(result); + storage.FromTimestamp64(builder, args[0].Get<i64>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } +} - template<typename TUserDataType> - struct TMakeDateKernelExec : TUnaryKernelExec<TMakeDateKernelExec<TUserDataType>, TReaderTraits::TResource<false>, TMakeResBuilder<TUserDataType, false>> { - static TBlockItem Make(TTMStorage& storage, const IValueBuilder& valueBuilder); +// Make* - template<typename TSink> - static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { - auto& storage = Reference<TMResourceName>(item); - sink(TBlockItem(Make(storage, *valueBuilder))); - } - }; +template <typename TUserDataType, bool Nullable> +using TMakeResBuilder = std::conditional_t<TTzDataType<TUserDataType>::Result, + TTzDateArrayBuilder<TUserDataType, Nullable>, + TFixedSizeArrayBuilder<typename TDataType<TUserDataType>::TLayout, Nullable>>; - template<> TBlockItem TMakeDateKernelExec<TDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { - TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ false)); - return res; - } +template <typename TUserDataType> +struct TMakeDateKernelExec: TUnaryKernelExec<TMakeDateKernelExec<TUserDataType>, TReaderTraits::TResource<false>, TMakeResBuilder<TUserDataType, false>> { + static TBlockItem Make(TTMStorage& storage, const IValueBuilder& valueBuilder); - template<> TBlockItem TMakeDateKernelExec<TDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { - TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder())); - return res; + template <typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { + auto& storage = Reference<TMResourceName>(item); + sink(TBlockItem(Make(storage, *valueBuilder))); } +}; - template<> TBlockItem TMakeDateKernelExec<TTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { - TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder())); - return res; - } +template <> +TBlockItem TMakeDateKernelExec<TDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ false)); + return res; +} - template<> TBlockItem TMakeDateKernelExec<TTzDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { - TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ true)); - res.SetTimezoneId(storage.TimezoneId); - return res; - } +template <> +TBlockItem TMakeDateKernelExec<TDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder())); + return res; +} - template<> TBlockItem TMakeDateKernelExec<TTzDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { - TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder())); - res.SetTimezoneId(storage.TimezoneId); - return res; - } +template <> +TBlockItem TMakeDateKernelExec<TTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder())); + return res; +} - template<> TBlockItem TMakeDateKernelExec<TTzTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { - TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder())); - res.SetTimezoneId(storage.TimezoneId); - return res; - } +template <> +TBlockItem TMakeDateKernelExec<TTzDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ true)); + res.SetTimezoneId(storage.TimezoneId); + return res; +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDate, TDate(TAutoMap<TResource<TMResourceName>>)) { - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TMResourceName>(args[0]); - return TUnboxedValuePod(storage.ToDate(builder, false)); - } - END_SIMPLE_ARROW_UDF(TMakeDate, TMakeDateKernelExec<TDate>::Do); +template <> +TBlockItem TMakeDateKernelExec<TTzDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder())); + res.SetTimezoneId(storage.TimezoneId); + return res; +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDatetime, TDatetime(TAutoMap<TResource<TMResourceName>>)) { - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TMResourceName>(args[0]); - return TUnboxedValuePod(storage.ToDatetime(builder)); - } - END_SIMPLE_ARROW_UDF(TMakeDatetime, TMakeDateKernelExec<TDatetime>::Do); +template <> +TBlockItem TMakeDateKernelExec<TTzTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder())); + res.SetTimezoneId(storage.TimezoneId); + return res; +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTimestamp, TTimestamp(TAutoMap<TResource<TMResourceName>>)) { - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TMResourceName>(args[0]); - return TUnboxedValuePod(storage.ToTimestamp(builder)); - } - END_SIMPLE_ARROW_UDF(TMakeTimestamp, TMakeDateKernelExec<TTimestamp>::Do); +BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDate, TDate(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TMResourceName>(args[0]); + return TUnboxedValuePod(storage.ToDate(builder, false)); +} +END_SIMPLE_ARROW_UDF(TMakeDate, TMakeDateKernelExec<TDate>::Do); - BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDate, TTzDate(TAutoMap<TResource<TMResourceName>>)) { - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TMResourceName>(args[0]); - try { - TUnboxedValuePod result(storage.ToDate(builder, true)); - result.SetTimezoneId(storage.TimezoneId); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << "Timestamp " - << storage.ToString() - << " cannot be casted to TzDate" - ).c_str()); - } - } - END_SIMPLE_ARROW_UDF(TMakeTzDate, TMakeDateKernelExec<TTzDate>::Do); +BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDatetime, TDatetime(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TMResourceName>(args[0]); + return TUnboxedValuePod(storage.ToDatetime(builder)); +} +END_SIMPLE_ARROW_UDF(TMakeDatetime, TMakeDateKernelExec<TDatetime>::Do); - BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDatetime, TTzDatetime(TAutoMap<TResource<TMResourceName>>)) { - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TMResourceName>(args[0]); - TUnboxedValuePod result(storage.ToDatetime(builder)); - result.SetTimezoneId(storage.TimezoneId); - return result; - } - END_SIMPLE_ARROW_UDF(TMakeTzDatetime, TMakeDateKernelExec<TTzDatetime>::Do); +BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTimestamp, TTimestamp(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TMResourceName>(args[0]); + return TUnboxedValuePod(storage.ToTimestamp(builder)); +} +END_SIMPLE_ARROW_UDF(TMakeTimestamp, TMakeDateKernelExec<TTimestamp>::Do); - BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzTimestamp, TTzTimestamp(TAutoMap<TResource<TMResourceName>>)) { - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TMResourceName>(args[0]); - TUnboxedValuePod result(storage.ToTimestamp(builder)); +BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDate, TTzDate(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TMResourceName>(args[0]); + try { + TUnboxedValuePod result(storage.ToDate(builder, true)); result.SetTimezoneId(storage.TimezoneId); return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << "Timestamp " + << storage.ToString() + << " cannot be casted to TzDate") + .c_str()); } - END_SIMPLE_ARROW_UDF(TMakeTzTimestamp, TMakeDateKernelExec<TTzTimestamp>::Do); +} +END_SIMPLE_ARROW_UDF(TMakeTzDate, TMakeDateKernelExec<TTzDate>::Do); +BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDatetime, TTzDatetime(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TMResourceName>(args[0]); + TUnboxedValuePod result(storage.ToDatetime(builder)); + result.SetTimezoneId(storage.TimezoneId); + return result; +} +END_SIMPLE_ARROW_UDF(TMakeTzDatetime, TMakeDateKernelExec<TTzDatetime>::Do); - SIMPLE_STRICT_UDF(TConvert, TResource<TM64ResourceName>(TAutoMap<TResource<TMResourceName>>)) { - Y_UNUSED(valueBuilder); - TUnboxedValuePod result(0); - auto& arg = Reference<TMResourceName>(args[0]); - auto& storage = Reference<TM64ResourceName>(result); - storage.From(arg); - return result; - } +BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzTimestamp, TTzTimestamp(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TMResourceName>(args[0]); + TUnboxedValuePod result(storage.ToTimestamp(builder)); + result.SetTimezoneId(storage.TimezoneId); + return result; +} +END_SIMPLE_ARROW_UDF(TMakeTzTimestamp, TMakeDateKernelExec<TTzTimestamp>::Do); - SIMPLE_STRICT_UDF(TMakeDate32, TDate32(TAutoMap<TResource<TM64ResourceName>>)) { - auto& storage = Reference<TM64ResourceName>(args[0]); - return TUnboxedValuePod(storage.ToDate32(valueBuilder->GetDateBuilder(), false)); - } +SIMPLE_STRICT_UDF(TConvert, TResource<TM64ResourceName>(TAutoMap<TResource<TMResourceName>>)) { + Y_UNUSED(valueBuilder); + TUnboxedValuePod result(0); + auto& arg = Reference<TMResourceName>(args[0]); + auto& storage = Reference<TM64ResourceName>(result); + storage.From(arg); + return result; +} - SIMPLE_STRICT_UDF(TMakeDatetime64, TDatetime64(TAutoMap<TResource<TM64ResourceName>>)) { - auto& storage = Reference<TM64ResourceName>(args[0]); - return TUnboxedValuePod(storage.ToDatetime64(valueBuilder->GetDateBuilder())); - } +SIMPLE_STRICT_UDF(TMakeDate32, TDate32(TAutoMap<TResource<TM64ResourceName>>)) { + auto& storage = Reference<TM64ResourceName>(args[0]); + return TUnboxedValuePod(storage.ToDate32(valueBuilder->GetDateBuilder(), false)); +} - SIMPLE_STRICT_UDF(TMakeTimestamp64, TTimestamp64(TAutoMap<TResource<TM64ResourceName>>)) { - auto& storage = Reference<TM64ResourceName>(args[0]); - return TUnboxedValuePod(storage.ToTimestamp64(valueBuilder->GetDateBuilder())); - } +SIMPLE_STRICT_UDF(TMakeDatetime64, TDatetime64(TAutoMap<TResource<TM64ResourceName>>)) { + auto& storage = Reference<TM64ResourceName>(args[0]); + return TUnboxedValuePod(storage.ToDatetime64(valueBuilder->GetDateBuilder())); +} - SIMPLE_STRICT_UDF(TMakeTzDate32, TTzDate32(TAutoMap<TResource<TM64ResourceName>>)) { - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TM64ResourceName>(args[0]); - try { - TUnboxedValuePod result(storage.ToDate32(builder, true)); - result.SetTimezoneId(storage.TimezoneId); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << "Timestamp " - << storage.ToString() - << " cannot be casted to TzDate32" - ).c_str()); - } - } +SIMPLE_STRICT_UDF(TMakeTimestamp64, TTimestamp64(TAutoMap<TResource<TM64ResourceName>>)) { + auto& storage = Reference<TM64ResourceName>(args[0]); + return TUnboxedValuePod(storage.ToTimestamp64(valueBuilder->GetDateBuilder())); +} - SIMPLE_STRICT_UDF(TMakeTzDatetime64, TTzDatetime64(TAutoMap<TResource<TM64ResourceName>>)) { - auto& storage = Reference<TM64ResourceName>(args[0]); - TUnboxedValuePod result(storage.ToDatetime64(valueBuilder->GetDateBuilder())); +SIMPLE_STRICT_UDF(TMakeTzDate32, TTzDate32(TAutoMap<TResource<TM64ResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TM64ResourceName>(args[0]); + try { + TUnboxedValuePod result(storage.ToDate32(builder, true)); result.SetTimezoneId(storage.TimezoneId); return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << "Timestamp " + << storage.ToString() + << " cannot be casted to TzDate32") + .c_str()); } +} - SIMPLE_STRICT_UDF(TMakeTzTimestamp64, TTzTimestamp64(TAutoMap<TResource<TM64ResourceName>>)) { - auto& storage = Reference<TM64ResourceName>(args[0]); - TUnboxedValuePod result(storage.ToTimestamp64(valueBuilder->GetDateBuilder())); - result.SetTimezoneId(storage.TimezoneId); - return result; - } +SIMPLE_STRICT_UDF(TMakeTzDatetime64, TTzDatetime64(TAutoMap<TResource<TM64ResourceName>>)) { + auto& storage = Reference<TM64ResourceName>(args[0]); + TUnboxedValuePod result(storage.ToDatetime64(valueBuilder->GetDateBuilder())); + result.SetTimezoneId(storage.TimezoneId); + return result; +} + +SIMPLE_STRICT_UDF(TMakeTzTimestamp64, TTzTimestamp64(TAutoMap<TResource<TM64ResourceName>>)) { + auto& storage = Reference<TM64ResourceName>(args[0]); + TUnboxedValuePod result(storage.ToTimestamp64(valueBuilder->GetDateBuilder())); + result.SetTimezoneId(storage.TimezoneId); + return result; +} - // Get* +// Get* // #define GET_METHOD(field, type) \ // struct TGet##field##KernelExec : TUnaryKernelExec<TGet##field##KernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<type, false>> { \ @@ -1305,9 +1294,9 @@ TUnboxedValuePod DoAddYears(const TUnboxedValuePod& date, i64 years, const NUdf: // } \ // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGet##field, TGet##field##KernelExec::Do, arrow::compute::NullHandling::INTERSECTION); -template<const char* TUdfName, - typename TResultType, TResultType (*Accessor)(const TUnboxedValuePod&), - typename TResultWType, TResultWType (*WAccessor)(const TUnboxedValuePod&)> +template <const char* TUdfName, + typename TResultType, TResultType (*Accessor)(const TUnboxedValuePod&), + typename TResultWType, TResultWType (*WAccessor)(const TUnboxedValuePod&)> class TGetDateComponent: public ::NYql::NUdf::TBoxedValue { public: typedef bool TTypeAwareMarker; @@ -1393,9 +1382,10 @@ public: SetUnexpectedTagError(builder, resource.GetTag()); return true; } + private: - template<typename TResult, TResult (*Func)(const TUnboxedValuePod&)> - class TImpl : public TBoxedValue { + template <typename TResult, TResult (*Func)(const TUnboxedValuePod&)> + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { Y_UNUSED(valueBuilder); @@ -1404,7 +1394,7 @@ private: } }; - template<typename TResult, const char* TResourceName, TResult (*Func)(const TUnboxedValuePod&)> + template <typename TResult, const char* TResourceName, TResult (*Func)(const TUnboxedValuePod&)> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<TResult>(); builder.Args()->Add<TAutoMap<TResource<TResourceName>>>(); @@ -1416,7 +1406,7 @@ private: }; // TODO: Merge this with <TGetDateComponent> class. -template<const char* TUdfName, auto Accessor, auto WAccessor> +template <const char* TUdfName, auto Accessor, auto WAccessor> class TGetDateComponentName: public ::NYql::NUdf::TBoxedValue { public: typedef bool TTypeAwareMarker; @@ -1502,9 +1492,10 @@ public: SetUnexpectedTagError(builder, resource.GetTag()); return true; } + private: - template<auto Func> - class TImpl : public TBoxedValue { + template <auto Func> + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { EMPTY_RESULT_ON_EMPTY_ARG(0); @@ -1512,7 +1503,7 @@ private: } }; - template<const char* TResourceName, auto Func> + template <const char* TResourceName, auto Func> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<char*>(); builder.Args()->Add<TAutoMap<TResource<TResourceName>>>(); @@ -1523,115 +1514,111 @@ private: } }; - // template<typename TValue> - // TValue GetMonthNameValue(size_t idx) { - // static const std::array<TValue, 12U> monthNames = {{ - // TValue::Embedded(TStringRef::Of("January")), - // TValue::Embedded(TStringRef::Of("February")), - // TValue::Embedded(TStringRef::Of("March")), - // TValue::Embedded(TStringRef::Of("April")), - // TValue::Embedded(TStringRef::Of("May")), - // TValue::Embedded(TStringRef::Of("June")), - // TValue::Embedded(TStringRef::Of("July")), - // TValue::Embedded(TStringRef::Of("August")), - // TValue::Embedded(TStringRef::Of("September")), - // TValue::Embedded(TStringRef::Of("October")), - // TValue::Embedded(TStringRef::Of("November")), - // TValue::Embedded(TStringRef::Of("December")) - // }}; - // return monthNames.at(idx); - // } +// template<typename TValue> +// TValue GetMonthNameValue(size_t idx) { +// static const std::array<TValue, 12U> monthNames = {{ +// TValue::Embedded(TStringRef::Of("January")), +// TValue::Embedded(TStringRef::Of("February")), +// TValue::Embedded(TStringRef::Of("March")), +// TValue::Embedded(TStringRef::Of("April")), +// TValue::Embedded(TStringRef::Of("May")), +// TValue::Embedded(TStringRef::Of("June")), +// TValue::Embedded(TStringRef::Of("July")), +// TValue::Embedded(TStringRef::Of("August")), +// TValue::Embedded(TStringRef::Of("September")), +// TValue::Embedded(TStringRef::Of("October")), +// TValue::Embedded(TStringRef::Of("November")), +// TValue::Embedded(TStringRef::Of("December")) +// }}; +// return monthNames.at(idx); +// } - // struct TGetMonthNameKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> { - // template<typename TSink> - // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { - // Y_UNUSED(valueBuilder); - // sink(GetMonthNameValue<TBlockItem>(GetMonth(item) - 1U)); - // } - // }; +// struct TGetMonthNameKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> { +// template<typename TSink> +// static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { +// Y_UNUSED(valueBuilder); +// sink(GetMonthNameValue<TBlockItem>(GetMonth(item) - 1U)); +// } +// }; - // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetMonthName, char*(TAutoMap<TResource<TMResourceName>>)) { - // Y_UNUSED(valueBuilder); - // return GetMonthNameValue<TUnboxedValue>(GetMonth(*args) - 1U); - // } - // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetMonthName, TGetMonthNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); +// BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetMonthName, char*(TAutoMap<TResource<TMResourceName>>)) { +// Y_UNUSED(valueBuilder); +// return GetMonthNameValue<TUnboxedValue>(GetMonth(*args) - 1U); +// } +// END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetMonthName, TGetMonthNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); -template<const char* TResourceName> +template <const char* TResourceName> TUnboxedValue GetMonthName(const IValueBuilder* valueBuilder, const TUnboxedValuePod& arg) { Y_UNUSED(valueBuilder); - static const std::array<TUnboxedValue, 12U> monthNames = {{ - TUnboxedValuePod::Embedded(TStringRef::Of("January")), - TUnboxedValuePod::Embedded(TStringRef::Of("February")), - TUnboxedValuePod::Embedded(TStringRef::Of("March")), - TUnboxedValuePod::Embedded(TStringRef::Of("April")), - TUnboxedValuePod::Embedded(TStringRef::Of("May")), - TUnboxedValuePod::Embedded(TStringRef::Of("June")), - TUnboxedValuePod::Embedded(TStringRef::Of("July")), - TUnboxedValuePod::Embedded(TStringRef::Of("August")), - TUnboxedValuePod::Embedded(TStringRef::Of("September")), - TUnboxedValuePod::Embedded(TStringRef::Of("October")), - TUnboxedValuePod::Embedded(TStringRef::Of("November")), - TUnboxedValuePod::Embedded(TStringRef::Of("December")) - }}; + static const std::array<TUnboxedValue, 12U> monthNames = {{TUnboxedValuePod::Embedded(TStringRef::Of("January")), + TUnboxedValuePod::Embedded(TStringRef::Of("February")), + TUnboxedValuePod::Embedded(TStringRef::Of("March")), + TUnboxedValuePod::Embedded(TStringRef::Of("April")), + TUnboxedValuePod::Embedded(TStringRef::Of("May")), + TUnboxedValuePod::Embedded(TStringRef::Of("June")), + TUnboxedValuePod::Embedded(TStringRef::Of("July")), + TUnboxedValuePod::Embedded(TStringRef::Of("August")), + TUnboxedValuePod::Embedded(TStringRef::Of("September")), + TUnboxedValuePod::Embedded(TStringRef::Of("October")), + TUnboxedValuePod::Embedded(TStringRef::Of("November")), + TUnboxedValuePod::Embedded(TStringRef::Of("December"))}}; return monthNames.at(GetMonth<TResourceName>(arg) - 1U); } - // struct TGetDayOfMonthKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<ui8, false>> { - // template<typename TSink> - // static void Process(TBlockItem item, const TSink& sink) { - // sink(GetDay(item)); - // } - // }; +// struct TGetDayOfMonthKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<ui8, false>> { +// template<typename TSink> +// static void Process(TBlockItem item, const TSink& sink) { +// sink(GetDay(item)); +// } +// }; - // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfMonth, ui8(TAutoMap<TResource<TMResourceName>>)) { - // Y_UNUSED(valueBuilder); - // return TUnboxedValuePod(GetDay(args[0])); - // } - // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfMonth, TGetDayOfMonthKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); +// BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfMonth, ui8(TAutoMap<TResource<TMResourceName>>)) { +// Y_UNUSED(valueBuilder); +// return TUnboxedValuePod(GetDay(args[0])); +// } +// END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfMonth, TGetDayOfMonthKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); -template<const char* TResourceName> +template <const char* TResourceName> TUnboxedValue GetDayOfWeekName(const IValueBuilder* valueBuilder, const TUnboxedValuePod& arg) { Y_UNUSED(valueBuilder); - static const std::array<TUnboxedValue, 7U> dayNames = {{ - TUnboxedValuePod::Embedded(TStringRef::Of("Monday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Tuesday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Wednesday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Thursday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Friday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Saturday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Sunday")) - }}; + static const std::array<TUnboxedValue, 7U> dayNames = {{TUnboxedValuePod::Embedded(TStringRef::Of("Monday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Tuesday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Wednesday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Thursday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Friday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Saturday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Sunday"))}}; return dayNames.at(GetDayOfWeek<TResourceName>(arg) - 1U); } - // struct TGetDayOfWeekNameKernelExec : TUnaryKernelExec<TGetDayOfWeekNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> { - // template<typename TSink> - // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { - // Y_UNUSED(valueBuilder); - // sink(GetDayNameValue<TBlockItem>(GetDayOfWeek(item) - 1U)); - // } - // }; +// struct TGetDayOfWeekNameKernelExec : TUnaryKernelExec<TGetDayOfWeekNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> { +// template<typename TSink> +// static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { +// Y_UNUSED(valueBuilder); +// sink(GetDayNameValue<TBlockItem>(GetDayOfWeek(item) - 1U)); +// } +// }; - // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfWeekName, char*(TAutoMap<TResource<TMResourceName>>)) { - // Y_UNUSED(valueBuilder); - // return GetDayNameValue<TUnboxedValuePod>(GetDayOfWeek(*args) - 1U); - // } - // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfWeekName, TGetDayOfWeekNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); +// BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfWeekName, char*(TAutoMap<TResource<TMResourceName>>)) { +// Y_UNUSED(valueBuilder); +// return GetDayNameValue<TUnboxedValuePod>(GetDayOfWeek(*args) - 1U); +// } +// END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfWeekName, TGetDayOfWeekNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); - struct TTGetTimezoneNameKernelExec : TUnaryKernelExec<TTGetTimezoneNameKernelExec, TReaderTraits::TResource<false>, TStringArrayBuilder<arrow::BinaryType, false>> { - template<typename TSink> - static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { - Y_UNUSED(valueBuilder); - auto timezoneId = GetTimezoneId<TMResourceName>(item); - if (timezoneId >= NTi::GetTimezones().size()) { - sink(TBlockItem{}); - } else { - sink(TBlockItem{NTi::GetTimezones()[timezoneId]}); - } +struct TTGetTimezoneNameKernelExec: TUnaryKernelExec<TTGetTimezoneNameKernelExec, TReaderTraits::TResource<false>, TStringArrayBuilder<arrow::BinaryType, false>> { + template <typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { + Y_UNUSED(valueBuilder); + auto timezoneId = GetTimezoneId<TMResourceName>(item); + if (timezoneId >= NTi::GetTimezones().size()) { + sink(TBlockItem{}); + } else { + sink(TBlockItem{NTi::GetTimezones()[timezoneId]}); } - }; + } +}; -template<const char* TResourceName> +template <const char* TResourceName> TUnboxedValue GetTimezoneName(const IValueBuilder* valueBuilder, const TUnboxedValuePod& arg) { const ui16 tzId = GetTimezoneId<TResourceName>(arg); const auto& tzNames = NTi::GetTimezones(); @@ -1641,212 +1628,202 @@ TUnboxedValue GetTimezoneName(const IValueBuilder* valueBuilder, const TUnboxedV return valueBuilder->NewString(tzNames[tzId]); } - // Update - - class TUpdate : public TBoxedValue { - public: - typedef bool TTypeAwareMarker; - static const TStringRef& Name() { - static auto name = TStringRef::Of("Update"); - return name; - } +// Update - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) - { - if (Name() != name) { - return false; - } - - if (!userType) { - // XXX: Function became polymorphic when overload for - // wide resources was implemented. Hence, to make it - // backward compatible with previous versions, the - // absence of the userType is considered as using the - // old version (i.e. without type awareness) that - // provides implementation only for narrow dates. - BuildSignature<TMResourceName>(builder, typesOnly); - return true; - } +class TUpdate: public TBoxedValue { +public: + typedef bool TTypeAwareMarker; + static const TStringRef& Name() { + static auto name = TStringRef::Of("Update"); + return name; + } - builder.UserType(userType); + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + if (Name() != name) { + return false; + } - const auto typeInfoHelper = builder.TypeInfoHelper(); - TTupleTypeInspector tuple(*typeInfoHelper, userType); - Y_ENSURE(tuple, "Tuple with args and options tuples expected"); - Y_ENSURE(tuple.GetElementsCount() > 0, - "Tuple has to contain positional arguments"); + if (!userType) { + // XXX: Function became polymorphic when overload for + // wide resources was implemented. Hence, to make it + // backward compatible with previous versions, the + // absence of the userType is considered as using the + // old version (i.e. without type awareness) that + // provides implementation only for narrow dates. + BuildSignature<TMResourceName>(builder, typesOnly); + return true; + } - TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); - Y_ENSURE(argsTuple, "Tuple with args expected"); - if (argsTuple.GetElementsCount() == 0) { - builder.SetError("At least one argument expected"); - return true; - } + builder.UserType(userType); - auto argType = argsTuple.GetElementType(0); + const auto typeInfoHelper = builder.TypeInfoHelper(); + TTupleTypeInspector tuple(*typeInfoHelper, userType); + Y_ENSURE(tuple, "Tuple with args and options tuples expected"); + Y_ENSURE(tuple.GetElementsCount() > 0, + "Tuple has to contain positional arguments"); - if (const auto optType = TOptionalTypeInspector(*typeInfoHelper, argType)) { - argType = optType.GetItemType(); - } + TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); + Y_ENSURE(argsTuple, "Tuple with args expected"); + if (argsTuple.GetElementsCount() == 0) { + builder.SetError("At least one argument expected"); + return true; + } - TResourceTypeInspector resource(*typeInfoHelper, argType); - if (!resource) { - TDataTypeInspector data(*typeInfoHelper, argType); - if (!data) { - SetResourceExpectedError(builder, typeInfoHelper, argType); - return true; - } + auto argType = argsTuple.GetElementType(0); - const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; - if (features & NUdf::ExtDateType) { - BuildSignature<TM64ResourceName>(builder, typesOnly); - return true; - } - if (features & (NUdf::DateType | NUdf::TzDateType)) { - BuildSignature<TMResourceName>(builder, typesOnly); - return true; - } + if (const auto optType = TOptionalTypeInspector(*typeInfoHelper, argType)) { + argType = optType.GetItemType(); + } + TResourceTypeInspector resource(*typeInfoHelper, argType); + if (!resource) { + TDataTypeInspector data(*typeInfoHelper, argType); + if (!data) { SetResourceExpectedError(builder, typeInfoHelper, argType); return true; } - if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) { + const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; + if (features & NUdf::ExtDateType) { BuildSignature<TM64ResourceName>(builder, typesOnly); return true; } - - if (resource.GetTag() == TStringRef::Of(TMResourceName)) { + if (features & (NUdf::DateType | NUdf::TzDateType)) { BuildSignature<TMResourceName>(builder, typesOnly); return true; } - SetUnexpectedTagError(builder, resource.GetTag()); + SetResourceExpectedError(builder, typeInfoHelper, argType); return true; } - private: - template<const char* TResourceName> - class TImpl : public TBoxedValue { - public: - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); - auto result = args[0]; - if (args[1]) { - auto year = args[1].Get<std::conditional_t<TResourceName == TMResourceName, ui16, i32>>(); - if (!ValidateYear<TResourceName>(year)) { - return TUnboxedValuePod(); - } - SetYear<TResourceName>(result, year); - } - if (args[2]) { - auto month = args[2].Get<ui8>(); - if (!ValidateMonth(month)) { - return TUnboxedValuePod(); - } - SetMonth<TResourceName>(result, month); + if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) { + BuildSignature<TM64ResourceName>(builder, typesOnly); + return true; + } + + if (resource.GetTag() == TStringRef::Of(TMResourceName)) { + BuildSignature<TMResourceName>(builder, typesOnly); + return true; + } + + SetUnexpectedTagError(builder, resource.GetTag()); + return true; + } + +private: + template <const char* TResourceName> + class TImpl: public TBoxedValue { + public: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + auto result = args[0]; + + if (args[1]) { + auto year = args[1].Get<std::conditional_t<TResourceName == TMResourceName, ui16, i32>>(); + if (!ValidateYear<TResourceName>(year)) { + return TUnboxedValuePod(); } - if (args[3]) { - auto day = args[3].Get<ui8>(); - if (!ValidateDay(day)) { - return TUnboxedValuePod(); - } - SetDay<TResourceName>(result, day); + SetYear<TResourceName>(result, year); + } + if (args[2]) { + auto month = args[2].Get<ui8>(); + if (!ValidateMonth(month)) { + return TUnboxedValuePod(); } - if (args[4]) { - auto hour = args[4].Get<ui8>(); - if (!ValidateHour(hour)) { - return TUnboxedValuePod(); - } - SetHour<TResourceName>(result, hour); + SetMonth<TResourceName>(result, month); + } + if (args[3]) { + auto day = args[3].Get<ui8>(); + if (!ValidateDay(day)) { + return TUnboxedValuePod(); } - if (args[5]) { - auto minute = args[5].Get<ui8>(); - if (!ValidateMinute(minute)) { - return TUnboxedValuePod(); - } - SetMinute<TResourceName>(result, minute); + SetDay<TResourceName>(result, day); + } + if (args[4]) { + auto hour = args[4].Get<ui8>(); + if (!ValidateHour(hour)) { + return TUnboxedValuePod(); } - if (args[6]) { - auto second = args[6].Get<ui8>(); - if (!ValidateSecond(second)) { - return TUnboxedValuePod(); - } - SetSecond<TResourceName>(result, second); + SetHour<TResourceName>(result, hour); + } + if (args[5]) { + auto minute = args[5].Get<ui8>(); + if (!ValidateMinute(minute)) { + return TUnboxedValuePod(); } - if (args[7]) { - auto microsecond = args[7].Get<ui32>(); - if (!ValidateMicrosecond(microsecond)) { - return TUnboxedValuePod(); - } - SetMicrosecond<TResourceName>(result, microsecond); + SetMinute<TResourceName>(result, minute); + } + if (args[6]) { + auto second = args[6].Get<ui8>(); + if (!ValidateSecond(second)) { + return TUnboxedValuePod(); } - if (args[8]) { - auto timezoneId = args[8].Get<ui16>(); - if (!ValidateTimezoneId(timezoneId)) { - return TUnboxedValuePod(); - } - SetTimezoneId<TResourceName>(result, timezoneId); + SetSecond<TResourceName>(result, second); + } + if (args[7]) { + auto microsecond = args[7].Get<ui32>(); + if (!ValidateMicrosecond(microsecond)) { + return TUnboxedValuePod(); } - - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TResourceName>(result); - if (!storage.Validate(builder)) { + SetMicrosecond<TResourceName>(result, microsecond); + } + if (args[8]) { + auto timezoneId = args[8].Get<ui16>(); + if (!ValidateTimezoneId(timezoneId)) { return TUnboxedValuePod(); } - return result; - } catch (const std::exception& e) { - TStringBuilder sb; - sb << CurrentExceptionMessage(); - sb << Endl << "[" << TStringBuf(Name()) << "]" ; - UdfTerminate(sb.c_str()); + SetTimezoneId<TResourceName>(result, timezoneId); } - } - }; - - template<const char* TResourceName> - static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { - builder.Returns<TOptional<TResource<TResourceName>>>(); - builder.OptionalArgs(8).Args()->Add<TAutoMap<TResource<TResourceName>>>() - .template Add<TOptional<std::conditional_t<TResourceName == TMResourceName, ui16, i32>>>().Name("Year") - .template Add<TOptional<ui8>>().Name("Month") - .template Add<TOptional<ui8>>().Name("Day") - .template Add<TOptional<ui8>>().Name("Hour") - .template Add<TOptional<ui8>>().Name("Minute") - .template Add<TOptional<ui8>>().Name("Second") - .template Add<TOptional<ui32>>().Name("Microsecond") - .template Add<TOptional<ui16>>().Name("TimezoneId"); - builder.IsStrict(); - if (!typesOnly) { - builder.Implementation(new TImpl<TResourceName>()); + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TResourceName>(result); + if (!storage.Validate(builder)) { + return TUnboxedValuePod(); + } + return result; + } catch (const std::exception& e) { + TStringBuilder sb; + sb << CurrentExceptionMessage(); + sb << Endl << "[" << TStringBuf(Name()) << "]"; + UdfTerminate(sb.c_str()); } - } + } }; - // From* - - template<typename TInput, typename TOutput, i64 UsecMultiplier> - inline TUnboxedValuePod TFromConverter(TInput arg) { - using TLayout = TDataType<TOutput>::TLayout; - const TLayout usec = TLayout(arg) * UsecMultiplier; - return Validate<TOutput>(usec) ? TUnboxedValuePod(usec) : TUnboxedValuePod(); + template <const char* TResourceName> + static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { + builder.Returns<TOptional<TResource<TResourceName>>>(); + builder.OptionalArgs(8).Args()->Add<TAutoMap<TResource<TResourceName>>>().template Add<TOptional<std::conditional_t<TResourceName == TMResourceName, ui16, i32>>>().Name("Year").template Add<TOptional<ui8>>().Name("Month").template Add<TOptional<ui8>>().Name("Day").template Add<TOptional<ui8>>().Name("Hour").template Add<TOptional<ui8>>().Name("Minute").template Add<TOptional<ui8>>().Name("Second").template Add<TOptional<ui32>>().Name("Microsecond").template Add<TOptional<ui16>>().Name("TimezoneId"); + builder.IsStrict(); + if (!typesOnly) { + builder.Implementation(new TImpl<TResourceName>()); + } } +}; +// From* - template<typename TInput, typename TOutput, i64 UsecMultiplier> - using TFromConverterKernel = TUnaryUnsafeFixedSizeFilterKernel<TInput, - typename TDataType<TOutput>::TLayout, [] (TInput arg) { - using TLayout = TDataType<TOutput>::TLayout; - const TLayout usec = TLayout(arg) * UsecMultiplier; - return std::make_pair(usec, Validate<TOutput>(usec)); - }>; +template <typename TInput, typename TOutput, i64 UsecMultiplier> +inline TUnboxedValuePod TFromConverter(TInput arg) { + using TLayout = TDataType<TOutput>::TLayout; + const TLayout usec = TLayout(arg) * UsecMultiplier; + return Validate<TOutput>(usec) ? TUnboxedValuePod(usec) : TUnboxedValuePod(); +} +template <typename TInput, typename TOutput, i64 UsecMultiplier> +using TFromConverterKernel = TUnaryUnsafeFixedSizeFilterKernel<TInput, + typename TDataType<TOutput>::TLayout, [](TInput arg) { + using TLayout = TDataType<TOutput>::TLayout; + const TLayout usec = TLayout(arg) * UsecMultiplier; + return std::make_pair(usec, Validate<TOutput>(usec)); + }>; #define DATETIME_FROM_CONVERTER_UDF(name, retType, argType, usecMultiplier) \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##name, TOptional<retType>(TAutoMap<argType>)) { \ @@ -1856,38 +1833,38 @@ TUnboxedValue GetTimezoneName(const IValueBuilder* valueBuilder, const TUnboxedV \ END_SIMPLE_ARROW_UDF(T##name, (TFromConverterKernel<argType, retType, usecMultiplier>::Do)) -#define DATETIME_FROM_CONVERTER_UDF_N(space, name, retType, argType, usecMultiplier) \ - namespace N##space { \ - DATETIME_FROM_CONVERTER_UDF(name, retType, argType, usecMultiplier); \ +#define DATETIME_FROM_CONVERTER_UDF_N(space, name, retType, argType, usecMultiplier) \ + namespace N##space { \ + DATETIME_FROM_CONVERTER_UDF(name, retType, argType, usecMultiplier); \ } - DATETIME_FROM_CONVERTER_UDF(FromSeconds, TTimestamp, ui32, UsecondsInSecond); - DATETIME_FROM_CONVERTER_UDF(FromMilliseconds, TTimestamp, ui64, UsecondsInMilliseconds); - DATETIME_FROM_CONVERTER_UDF(FromMicroseconds, TTimestamp, ui64, 1); +DATETIME_FROM_CONVERTER_UDF(FromSeconds, TTimestamp, ui32, UsecondsInSecond); +DATETIME_FROM_CONVERTER_UDF(FromMilliseconds, TTimestamp, ui64, UsecondsInMilliseconds); +DATETIME_FROM_CONVERTER_UDF(FromMicroseconds, TTimestamp, ui64, 1); - DATETIME_FROM_CONVERTER_UDF(FromSeconds64, TTimestamp64, i64, UsecondsInSecond); - DATETIME_FROM_CONVERTER_UDF(FromMilliseconds64, TTimestamp64, i64, UsecondsInMilliseconds); - DATETIME_FROM_CONVERTER_UDF(FromMicroseconds64, TTimestamp64, i64, 1); +DATETIME_FROM_CONVERTER_UDF(FromSeconds64, TTimestamp64, i64, UsecondsInSecond); +DATETIME_FROM_CONVERTER_UDF(FromMilliseconds64, TTimestamp64, i64, UsecondsInMilliseconds); +DATETIME_FROM_CONVERTER_UDF(FromMicroseconds64, TTimestamp64, i64, 1); - DATETIME_FROM_CONVERTER_UDF(IntervalFromDays, TInterval, i32, UsecondsInDay); - DATETIME_FROM_CONVERTER_UDF(IntervalFromHours, TInterval, i32, UsecondsInHour); - DATETIME_FROM_CONVERTER_UDF(IntervalFromMinutes, TInterval, i32, UsecondsInMinute); - DATETIME_FROM_CONVERTER_UDF_N(Legacy, IntervalFromSeconds, TInterval, i32, UsecondsInSecond); - DATETIME_FROM_CONVERTER_UDF_N(Actual, IntervalFromSeconds, TInterval, i64, UsecondsInSecond); - DATETIME_FROM_CONVERTER_UDF(IntervalFromMilliseconds, TInterval, i64, UsecondsInMilliseconds); - DATETIME_FROM_CONVERTER_UDF(IntervalFromMicroseconds, TInterval, i64, 1); +DATETIME_FROM_CONVERTER_UDF(IntervalFromDays, TInterval, i32, UsecondsInDay); +DATETIME_FROM_CONVERTER_UDF(IntervalFromHours, TInterval, i32, UsecondsInHour); +DATETIME_FROM_CONVERTER_UDF(IntervalFromMinutes, TInterval, i32, UsecondsInMinute); +DATETIME_FROM_CONVERTER_UDF_N(Legacy, IntervalFromSeconds, TInterval, i32, UsecondsInSecond); +DATETIME_FROM_CONVERTER_UDF_N(Actual, IntervalFromSeconds, TInterval, i64, UsecondsInSecond); +DATETIME_FROM_CONVERTER_UDF(IntervalFromMilliseconds, TInterval, i64, UsecondsInMilliseconds); +DATETIME_FROM_CONVERTER_UDF(IntervalFromMicroseconds, TInterval, i64, 1); - DATETIME_FROM_CONVERTER_UDF(Interval64FromDays, TInterval64, i32, UsecondsInDay); - DATETIME_FROM_CONVERTER_UDF(Interval64FromHours, TInterval64, i64, UsecondsInHour); - DATETIME_FROM_CONVERTER_UDF(Interval64FromMinutes, TInterval64, i64, UsecondsInMinute); - DATETIME_FROM_CONVERTER_UDF(Interval64FromSeconds, TInterval64, i64, UsecondsInSecond); - DATETIME_FROM_CONVERTER_UDF(Interval64FromMilliseconds, TInterval64, i64, UsecondsInMilliseconds); - DATETIME_FROM_CONVERTER_UDF(Interval64FromMicroseconds, TInterval64, i64, 1); +DATETIME_FROM_CONVERTER_UDF(Interval64FromDays, TInterval64, i32, UsecondsInDay); +DATETIME_FROM_CONVERTER_UDF(Interval64FromHours, TInterval64, i64, UsecondsInHour); +DATETIME_FROM_CONVERTER_UDF(Interval64FromMinutes, TInterval64, i64, UsecondsInMinute); +DATETIME_FROM_CONVERTER_UDF(Interval64FromSeconds, TInterval64, i64, UsecondsInSecond); +DATETIME_FROM_CONVERTER_UDF(Interval64FromMilliseconds, TInterval64, i64, UsecondsInMilliseconds); +DATETIME_FROM_CONVERTER_UDF(Interval64FromMicroseconds, TInterval64, i64, 1); - // To* +// To* -template<const char* TUdfName, typename TResult, typename TWResult, i64 ScaleSeconds> -class TToConverter : public TBoxedValue { +template <const char* TUdfName, typename TResult, typename TWResult, i64 ScaleSeconds> +class TToConverter: public TBoxedValue { public: typedef bool TTypeAwareMarker; static const ::NYql::NUdf::TStringRef& Name() { @@ -1955,8 +1932,9 @@ public: SetIntervalExpectedError(builder, typeInfoHelper, argType); return true; } + private: - class TImpl : public TBoxedValue { + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { try { @@ -1965,14 +1943,13 @@ private: } catch (const std::exception& e) { TStringBuilder sb; sb << CurrentExceptionMessage(); - sb << Endl << "[" << TStringBuf(Name()) << "]" ; + sb << Endl << "[" << TStringBuf(Name()) << "]"; UdfTerminate(sb.c_str()); } } - }; - template<typename TInput, typename TOutput> + template <typename TInput, typename TOutput> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<TOutput>(); builder.Args()->Add<TAutoMap<TInput>>(); @@ -1983,34 +1960,33 @@ private: } }; - // StartOf* - - template<auto Core> - struct TStartOfKernelExec : TUnaryKernelExec<TStartOfKernelExec<Core>, TResourceBlockReader<false>, TResourceArrayBuilder<true>> { - template<typename TSink> - static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { - if (auto res = Core(Reference<TMResourceName>(item), *valueBuilder)) { - Reference<TMResourceName>(item) = res.GetRef(); - sink(item); - } else { - sink(TBlockItem{}); - } +// StartOf* +template <auto Core> +struct TStartOfKernelExec: TUnaryKernelExec<TStartOfKernelExec<Core>, TResourceBlockReader<false>, TResourceArrayBuilder<true>> { + template <typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { + if (auto res = Core(Reference<TMResourceName>(item), *valueBuilder)) { + Reference<TMResourceName>(item) = res.GetRef(); + sink(item); + } else { + sink(TBlockItem{}); } - }; + } +}; - template<const char* TResourceName, auto Core> - TUnboxedValue SimpleDatetimeToDatetimeUdf(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) { - auto result = args[0]; - auto& storage = Reference<TResourceName>(result); - if (auto res = Core(storage, *valueBuilder)) { - storage = res.GetRef(); - return result; - } - return TUnboxedValuePod{}; +template <const char* TResourceName, auto Core> +TUnboxedValue SimpleDatetimeToDatetimeUdf(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) { + auto result = args[0]; + auto& storage = Reference<TResourceName>(result); + if (auto res = Core(storage, *valueBuilder)) { + storage = res.GetRef(); + return result; } + return TUnboxedValuePod{}; +} -template<const char* TUdfName, auto Boundary, auto WBoundary> +template <const char* TUdfName, auto Boundary, auto WBoundary> class TBoundaryOf: public ::NYql::NUdf::TBoxedValue { public: typedef bool TTypeAwareMarker; @@ -2096,23 +2072,24 @@ public: SetUnexpectedTagError(builder, resource.GetTag()); return true; } + private: - template<auto Func> - class TImpl : public TBoxedValue { + template <auto Func> + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { try { return Func(valueBuilder, args); } catch (const std::exception&) { - TStringBuilder sb; - sb << CurrentExceptionMessage(); - sb << Endl << "[" << TStringBuf(Name()) << "]" ; - UdfTerminate(sb.c_str()); + TStringBuilder sb; + sb << CurrentExceptionMessage(); + sb << Endl << "[" << TStringBuf(Name()) << "]"; + UdfTerminate(sb.c_str()); } } }; - template< const char* TResourceName, auto Func> + template <const char* TResourceName, auto Func> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<TOptional<TResource<TResourceName>>>(); builder.Args()->Add<TAutoMap<TResource<TResourceName>>>(); @@ -2123,222 +2100,222 @@ private: } }; - template<typename TStorage> - void SetStartOfDay(TStorage& storage) { - storage.Hour = 0; - storage.Minute = 0; - storage.Second = 0; - storage.Microsecond = 0; - } +template <typename TStorage> +void SetStartOfDay(TStorage& storage) { + storage.Hour = 0; + storage.Minute = 0; + storage.Second = 0; + storage.Microsecond = 0; +} - template<typename TStorage> - void SetEndOfDay(TStorage& storage) { - storage.Hour = 23; - storage.Minute = 59; - storage.Second = 59; - storage.Microsecond = 999999; - } +template <typename TStorage> +void SetEndOfDay(TStorage& storage) { + storage.Hour = 23; + storage.Minute = 59; + storage.Second = 59; + storage.Microsecond = 999999; +} - template<typename TStorage> - TMaybe<TStorage> StartOfYear(TStorage storage, const IValueBuilder& valueBuilder) { - storage.Month = 1; - storage.Day = 1; - SetStartOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { - return {}; - } - return storage; +template <typename TStorage> +TMaybe<TStorage> StartOfYear(TStorage storage, const IValueBuilder& valueBuilder) { + storage.Month = 1; + storage.Day = 1; + SetStartOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> EndOfYear(TStorage storage, const IValueBuilder& valueBuilder) { - storage.Month = 12; - storage.Day = 31; - SetEndOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { - return {}; - } - return storage; +template <typename TStorage> +TMaybe<TStorage> EndOfYear(TStorage storage, const IValueBuilder& valueBuilder) { + storage.Month = 12; + storage.Day = 31; + SetEndOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> StartOfQuarter(TStorage storage, const IValueBuilder& valueBuilder) { - storage.Month = (storage.Month - 1) / 3 * 3 + 1; - storage.Day = 1; - SetStartOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { - return {}; - } - return storage; +template <typename TStorage> +TMaybe<TStorage> StartOfQuarter(TStorage storage, const IValueBuilder& valueBuilder) { + storage.Month = (storage.Month - 1) / 3 * 3 + 1; + storage.Day = 1; + SetStartOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> EndOfQuarter(TStorage storage, const IValueBuilder& valueBuilder) { - storage.Month = ((storage.Month - 1) / 3 + 1) * 3; - storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year)); - SetEndOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { - return {}; - } - return storage; +template <typename TStorage> +TMaybe<TStorage> EndOfQuarter(TStorage storage, const IValueBuilder& valueBuilder) { + storage.Month = ((storage.Month - 1) / 3 + 1) * 3; + storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year)); + SetEndOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> StartOfMonth(TStorage storage, const IValueBuilder& valueBuilder) { - storage.Day = 1; - SetStartOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { - return {}; - } - return storage; +template <typename TStorage> +TMaybe<TStorage> StartOfMonth(TStorage storage, const IValueBuilder& valueBuilder) { + storage.Day = 1; + SetStartOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> EndOfMonth(TStorage storage, const IValueBuilder& valueBuilder) { - storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year)); - SetEndOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { - return {}; - } - return storage; +template <typename TStorage> +TMaybe<TStorage> EndOfMonth(TStorage storage, const IValueBuilder& valueBuilder) { + storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year)); + SetEndOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> StartOfWeek(TStorage storage, const IValueBuilder& valueBuilder) { - const ui32 shift = 86400u * (storage.DayOfWeek - 1u); - if constexpr (std::is_same_v<TStorage, TTMStorage>) { - if (shift > storage.ToDatetime(valueBuilder.GetDateBuilder())) { - return {}; - } - storage.FromDatetime(valueBuilder.GetDateBuilder(), storage.ToDatetime(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId); - } else { - if (shift > storage.ToDatetime64(valueBuilder.GetDateBuilder())) { - return {}; - } - storage.FromDatetime64(valueBuilder.GetDateBuilder(), storage.ToDatetime64(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId); - } - SetStartOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { +template <typename TStorage> +TMaybe<TStorage> StartOfWeek(TStorage storage, const IValueBuilder& valueBuilder) { + const ui32 shift = 86400u * (storage.DayOfWeek - 1u); + if constexpr (std::is_same_v<TStorage, TTMStorage>) { + if (shift > storage.ToDatetime(valueBuilder.GetDateBuilder())) { return {}; } - return storage; - } - - template<typename TStorage> - TMaybe<TStorage> EndOfWeek(TStorage storage, const IValueBuilder& valueBuilder) { - const ui32 shift = 86400u * (7u - storage.DayOfWeek); - if constexpr (std::is_same_v<TStorage, TTMStorage>) { - auto dt = storage.ToDatetime(valueBuilder.GetDateBuilder()); - if (NUdf::MAX_DATETIME - shift <= dt) { - return {}; - } - storage.FromDatetime(valueBuilder.GetDateBuilder(), dt + shift, storage.TimezoneId); - } else { - auto dt = storage.ToDatetime64(valueBuilder.GetDateBuilder()); - if (NUdf::MAX_DATETIME64 - shift <= dt) { - return {}; - } - storage.FromDatetime64(valueBuilder.GetDateBuilder(), dt + shift, storage.TimezoneId); - } - SetEndOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { + storage.FromDatetime(valueBuilder.GetDateBuilder(), storage.ToDatetime(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId); + } else { + if (shift > storage.ToDatetime64(valueBuilder.GetDateBuilder())) { return {}; } - return storage; + storage.FromDatetime64(valueBuilder.GetDateBuilder(), storage.ToDatetime64(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId); + } + SetStartOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> StartOfDay(TStorage storage, const IValueBuilder& valueBuilder) { - SetStartOfDay(storage); - auto& builder = valueBuilder.GetDateBuilder(); - if (!storage.Validate(builder)) { +template <typename TStorage> +TMaybe<TStorage> EndOfWeek(TStorage storage, const IValueBuilder& valueBuilder) { + const ui32 shift = 86400u * (7u - storage.DayOfWeek); + if constexpr (std::is_same_v<TStorage, TTMStorage>) { + auto dt = storage.ToDatetime(valueBuilder.GetDateBuilder()); + if (NUdf::MAX_DATETIME - shift <= dt) { return {}; } - return storage; - } - - template<typename TStorage> - TMaybe<TStorage> EndOfDay(TStorage storage, const IValueBuilder& valueBuilder) { - SetEndOfDay(storage); - auto& builder = valueBuilder.GetDateBuilder(); - if (!storage.Validate(builder)) { + storage.FromDatetime(valueBuilder.GetDateBuilder(), dt + shift, storage.TimezoneId); + } else { + auto dt = storage.ToDatetime64(valueBuilder.GetDateBuilder()); + if (NUdf::MAX_DATETIME64 - shift <= dt) { return {}; } - return storage; + storage.FromDatetime64(valueBuilder.GetDateBuilder(), dt + shift, storage.TimezoneId); + } + SetEndOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> StartOf(TStorage storage, ui64 interval, const IValueBuilder& valueBuilder) { - if (interval >= 86400000000ull) { - // treat as StartOfDay - SetStartOfDay(storage); - } else { - auto current = storage.ToTimeOfDay(); - auto rounded = current / interval * interval; - storage.FromTimeOfDay(rounded); - } +template <typename TStorage> +TMaybe<TStorage> StartOfDay(TStorage storage, const IValueBuilder& valueBuilder) { + SetStartOfDay(storage); + auto& builder = valueBuilder.GetDateBuilder(); + if (!storage.Validate(builder)) { + return {}; + } + return storage; +} - auto& builder = valueBuilder.GetDateBuilder(); - if (!storage.Validate(builder)) { - return {}; - } - return storage; +template <typename TStorage> +TMaybe<TStorage> EndOfDay(TStorage storage, const IValueBuilder& valueBuilder) { + SetEndOfDay(storage); + auto& builder = valueBuilder.GetDateBuilder(); + if (!storage.Validate(builder)) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> EndOf(TStorage storage, ui64 interval, const IValueBuilder& valueBuilder) { - if (interval >= 86400000000ull) { - // treat as EndOfDay - SetEndOfDay(storage); - } else { - auto current = storage.ToTimeOfDay(); - auto rounded = current / interval * interval + interval - 1; - storage.FromTimeOfDay(rounded); - } +template <typename TStorage> +TMaybe<TStorage> StartOf(TStorage storage, ui64 interval, const IValueBuilder& valueBuilder) { + if (interval >= 86400000000ull) { + // treat as StartOfDay + SetStartOfDay(storage); + } else { + auto current = storage.ToTimeOfDay(); + auto rounded = current / interval * interval; + storage.FromTimeOfDay(rounded); + } - auto& builder = valueBuilder.GetDateBuilder(); - if (!storage.Validate(builder)) { - return {}; - } - return storage; + auto& builder = valueBuilder.GetDateBuilder(); + if (!storage.Validate(builder)) { + return {}; } + return storage; +} - template<bool UseEnd> - struct TStartEndOfBinaryKernelExec : TBinaryKernelExec<TStartEndOfBinaryKernelExec<UseEnd>> { - template<typename TSink> - static void Process(const IValueBuilder* valueBuilder, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - auto& storage = Reference<TMResourceName>(arg1); - ui64 interval = std::abs(arg2.Get<i64>()); - if (interval == 0) { - sink(arg1); - return; - } +template <typename TStorage> +TMaybe<TStorage> EndOf(TStorage storage, ui64 interval, const IValueBuilder& valueBuilder) { + if (interval >= 86400000000ull) { + // treat as EndOfDay + SetEndOfDay(storage); + } else { + auto current = storage.ToTimeOfDay(); + auto rounded = current / interval * interval + interval - 1; + storage.FromTimeOfDay(rounded); + } - if (auto res = (UseEnd ? EndOf<TTMStorage> : StartOf<TTMStorage>)(storage, interval, *valueBuilder)) { - storage = res.GetRef(); - sink(arg1); - } else { - sink(TBlockItem{}); - } - } - }; + auto& builder = valueBuilder.GetDateBuilder(); + if (!storage.Validate(builder)) { + return {}; + } + return storage; +} - template<const char* TResourceName, auto Core> - TUnboxedValue SimpleDatetimeToIntervalUdf(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) { - auto result = args[0]; - ui64 interval = std::abs(args[1].Get<i64>()); +template <bool UseEnd> +struct TStartEndOfBinaryKernelExec: TBinaryKernelExec<TStartEndOfBinaryKernelExec<UseEnd>> { + template <typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + auto& storage = Reference<TMResourceName>(arg1); + ui64 interval = std::abs(arg2.Get<i64>()); if (interval == 0) { - return result; + sink(arg1); + return; } - auto& storage = Reference<TResourceName>(result); - if (auto res = Core(storage, interval, *valueBuilder)) { + + if (auto res = (UseEnd ? EndOf<TTMStorage> : StartOf<TTMStorage>)(storage, interval, *valueBuilder)) { storage = res.GetRef(); - return result; + sink(arg1); + } else { + sink(TBlockItem{}); } - return TUnboxedValuePod{}; } +}; + +template <const char* TResourceName, auto Core> +TUnboxedValue SimpleDatetimeToIntervalUdf(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) { + auto result = args[0]; + ui64 interval = std::abs(args[1].Get<i64>()); + if (interval == 0) { + return result; + } + auto& storage = Reference<TResourceName>(result); + if (auto res = Core(storage, interval, *valueBuilder)) { + storage = res.GetRef(); + return result; + } + return TUnboxedValuePod{}; +} -template<const char* TUdfName, auto Boundary, auto WBoundary> +template <const char* TUdfName, auto Boundary, auto WBoundary> class TBoundaryOfInterval: public ::NYql::NUdf::TBoxedValue { public: typedef bool TTypeAwareMarker; @@ -2424,27 +2401,27 @@ public: SetUnexpectedTagError(builder, resource.GetTag()); return true; } + private: - template<auto Func> - class TImpl : public TBoxedValue { + template <auto Func> + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { try { return Func(valueBuilder, args); } catch (const std::exception&) { - TStringBuilder sb; - sb << CurrentExceptionMessage(); - sb << Endl << "[" << TStringBuf(Name()) << "]" ; - UdfTerminate(sb.c_str()); + TStringBuilder sb; + sb << CurrentExceptionMessage(); + sb << Endl << "[" << TStringBuf(Name()) << "]"; + UdfTerminate(sb.c_str()); } } }; - template<const char* TResourceName, auto Func> + template <const char* TResourceName, auto Func> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<TOptional<TResource<TResourceName>>>(); - builder.Args()->Add<TAutoMap<TResource<TResourceName>>>() - .template Add<TAutoMap<std::conditional_t<TResourceName == TMResourceName, TInterval, TInterval64>>>(); + builder.Args()->Add<TAutoMap<TResource<TResourceName>>>().template Add<TAutoMap<std::conditional_t<TResourceName == TMResourceName, TInterval, TInterval64>>>(); builder.IsStrict(); if (!typesOnly) { builder.Implementation(new TImpl<Func>()); @@ -2452,14 +2429,14 @@ private: } }; - struct TTimeOfDayKernelExec : TUnaryKernelExec<TTimeOfDayKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<TDataType<TInterval>::TLayout, false>> { - template<typename TSink> - static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { - Y_UNUSED(valueBuilder); - auto& storage = Reference<TMResourceName>(item); - sink(TBlockItem{(TDataType<TInterval>::TLayout)storage.ToTimeOfDay()}); - } - }; +struct TTimeOfDayKernelExec: TUnaryKernelExec<TTimeOfDayKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<TDataType<TInterval>::TLayout, false>> { + template <typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { + Y_UNUSED(valueBuilder); + auto& storage = Reference<TMResourceName>(item); + sink(TBlockItem{(TDataType<TInterval>::TLayout)storage.ToTimeOfDay()}); + } +}; class TTimeOfDay: public ::NYql::NUdf::TBoxedValue { public: @@ -2546,9 +2523,10 @@ public: SetUnexpectedTagError(builder, resource.GetTag()); return true; } + private: - template<const char* TResourceName> - class TImpl : public TBoxedValue { + template <const char* TResourceName> + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { try { @@ -2556,15 +2534,15 @@ private: auto& storage = Reference<TResourceName>(args[0]); return TUnboxedValuePod((i64)storage.ToTimeOfDay()); } catch (const std::exception&) { - TStringBuilder sb; - sb << CurrentExceptionMessage(); - sb << Endl << "[" << TStringBuf(Name()) << "]" ; - UdfTerminate(sb.c_str()); + TStringBuilder sb; + sb << CurrentExceptionMessage(); + sb << Endl << "[" << TStringBuf(Name()) << "]"; + UdfTerminate(sb.c_str()); } } }; - template< const char* TResourceName> + template <const char* TResourceName> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<std::conditional_t<TResourceName == TMResourceName, TInterval, TInterval64>>(); builder.Args()->Add<TAutoMap<TResource<TResourceName>>>(); @@ -2575,19 +2553,18 @@ private: } }; +// Add ... - // Add ... - - template<auto Core> - struct TAddKernelExec : TBinaryKernelExec<TAddKernelExec<Core>> { - template<typename TSink> - static void Process(const IValueBuilder* valueBuilder, TBlockItem date, TBlockItem arg, const TSink& sink) { - sink(Core(date, arg.Get<i32>(), valueBuilder->GetDateBuilder())); - } - }; +template <auto Core> +struct TAddKernelExec: TBinaryKernelExec<TAddKernelExec<Core>> { + template <typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem date, TBlockItem arg, const TSink& sink) { + sink(Core(date, arg.Get<i32>(), valueBuilder->GetDateBuilder())); + } +}; -template<const char* TUdfName, auto Shifter, auto WShifter> -class TShift : public TBoxedValue { +template <const char* TUdfName, auto Shifter, auto WShifter> +class TShift: public TBoxedValue { public: typedef bool TTypeAwareMarker; @@ -2673,16 +2650,17 @@ public: SetUnexpectedTagError(builder, resource.GetTag()); return true; } + private: - template<auto ShiftHanler> - class TImpl : public TBoxedValue { + template <auto ShiftHanler> + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { return ShiftHanler(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder()); } }; - template<const char* TResourceName, auto ShiftHandler> + template <const char* TResourceName, auto ShiftHandler> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<TOptional<TResource<TResourceName>>>(); builder.Args()->Add<TAutoMap<TResource<TResourceName>>>().template Add<i32>(); @@ -2693,154 +2671,155 @@ private: } }; - template<size_t Digits, bool Trailing = true, bool Leading = true> - struct PrintNDigits; +template <size_t Digits, bool Trailing = true, bool Leading = true> +struct PrintNDigits; - template<bool Trailing, bool Leading> - struct PrintNDigits<0U, Trailing, Leading> { - static constexpr ui32 Miltiplier = 1U; +template <bool Trailing, bool Leading> +struct PrintNDigits<0U, Trailing, Leading> { + static constexpr ui32 Miltiplier = 1U; - template <typename T> - static constexpr size_t Do(T, char*) { return 0U; } - }; + template <typename T> + static constexpr size_t Do(T, char*) { + return 0U; + } +}; - template<size_t Digits, bool Trailing, bool Leading> - struct PrintNDigits { - using TNextNoLeadPrint = PrintNDigits<Digits - 1U, Trailing, false>; - using TNextCommonPrint = PrintNDigits<Digits - 1U, Trailing, true>; - static_assert(TNextNoLeadPrint::Miltiplier == TNextCommonPrint::Miltiplier); - static constexpr ui32 Miltiplier = TNextCommonPrint::Miltiplier * 10U; +template <size_t Digits, bool Trailing, bool Leading> +struct PrintNDigits { + using TNextNoLeadPrint = PrintNDigits<Digits - 1U, Trailing, false>; + using TNextCommonPrint = PrintNDigits<Digits - 1U, Trailing, true>; + static_assert(TNextNoLeadPrint::Miltiplier == TNextCommonPrint::Miltiplier); + static constexpr ui32 Miltiplier = TNextCommonPrint::Miltiplier * 10U; - template <typename T> - static constexpr size_t Do(T in, char* out) { - in %= Miltiplier; - if (!Trailing && in == 0) { - return 0U; - } - const auto digit = in / TNextCommonPrint::Miltiplier; - if (!Leading && digit == 0) { - return TNextNoLeadPrint::Do(in, out); - } - *out = "0123456789"[digit]; - return 1U + TNextCommonPrint::Do(in, ++out); + template <typename T> + static constexpr size_t Do(T in, char* out) { + in %= Miltiplier; + if (!Trailing && in == 0) { + return 0U; } - }; - - // Format + const auto digit = in / TNextCommonPrint::Miltiplier; + if (!Leading && digit == 0) { + return TNextNoLeadPrint::Do(in, out); + } + *out = "0123456789"[digit]; + return 1U + TNextCommonPrint::Do(in, ++out); + } +}; - class TFormat : public TBoxedValue { - public: - explicit TFormat(TSourcePosition pos) - : Pos_(pos) - {} +// Format - static const TStringRef& Name() { - static auto name = TStringRef::Of("Format"); - return name; - } +class TFormat: public TBoxedValue { +public: + explicit TFormat(TSourcePosition pos) + : Pos_(pos) + { + } - static bool DeclareSignature( - const TStringRef& name, - TType*, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) - { - if (Name() != name) { - return false; - } + static const TStringRef& Name() { + static auto name = TStringRef::Of("Format"); + return name; + } - builder.OptionalArgs(1).Args()->Add<char*>() - .Add<TOptional<bool>>().Name("AlwaysWriteFractionalSeconds"); - builder.Returns( - builder.SimpleSignatureType<char*(TAutoMap<TResource<TM64ResourceName>>)>()); - if (!typesOnly) { - builder.Implementation(new TFormat(builder.GetSourcePosition())); - } + static bool DeclareSignature( + const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + if (Name() != name) { + return false; + } - return true; + builder.OptionalArgs(1).Args()->Add<char*>().Add<TOptional<bool>>().Name("AlwaysWriteFractionalSeconds"); + builder.Returns( + builder.SimpleSignatureType<char*(TAutoMap<TResource<TM64ResourceName>>)>()); + if (!typesOnly) { + builder.Implementation(new TFormat(builder.GetSourcePosition())); } - private: - using TPrintersList = std::vector<std::function<size_t(char*, const TUnboxedValuePod&, const IDateBuilder&)>>; + return true; + } - struct TDataPrinter { - const std::string_view Data; +private: + using TPrintersList = std::vector<std::function<size_t(char*, const TUnboxedValuePod&, const IDateBuilder&)>>; - size_t operator()(char* out, const TUnboxedValuePod&, const IDateBuilder&) const { - std::memcpy(out, Data.data(), Data.size()); - return Data.size(); - } - }; + struct TDataPrinter { + const std::string_view Data; - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { - bool alwaysWriteFractionalSeconds = false; - if (auto val = args[1]) { - alwaysWriteFractionalSeconds = val.Get<bool>(); - } + size_t operator()(char* out, const TUnboxedValuePod&, const IDateBuilder&) const { + std::memcpy(out, Data.data(), Data.size()); + return Data.size(); + } + }; - return TUnboxedValuePod(new TImpl(Pos_, args[0], alwaysWriteFractionalSeconds)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { + bool alwaysWriteFractionalSeconds = false; + if (auto val = args[1]) { + alwaysWriteFractionalSeconds = val.Get<bool>(); } - class TImpl : public TBoxedValue { - public: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); - const auto value = args[0]; + return TUnboxedValuePod(new TImpl(Pos_, args[0], alwaysWriteFractionalSeconds)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } - auto& builder = valueBuilder->GetDateBuilder(); + class TImpl: public TBoxedValue { + public: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + const auto value = args[0]; - auto result = valueBuilder->NewStringNotFilled(ReservedSize_); - auto pos = result.AsStringRef().Data(); - ui32 size = 0U; + auto& builder = valueBuilder->GetDateBuilder(); - for (const auto& printer : Printers_) { - if (const auto plus = printer(pos, value, builder)) { - size += plus; - pos += plus; - } - } + auto result = valueBuilder->NewStringNotFilled(ReservedSize_); + auto pos = result.AsStringRef().Data(); + ui32 size = 0U; - if (size < ReservedSize_) { - result = valueBuilder->SubString(result.Release(), 0U, size); + for (const auto& printer : Printers_) { + if (const auto plus = printer(pos, value, builder)) { + size += plus; + pos += plus; } + } - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + if (size < ReservedSize_) { + result = valueBuilder->SubString(result.Release(), 0U, size); } + + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - TImpl(TSourcePosition pos, TUnboxedValue format, bool alwaysWriteFractionalSeconds) - : Pos_(pos) - , Format_(format) - { - const std::string_view formatView(Format_.AsStringRef()); - auto dataStart = formatView.begin(); - size_t dataSize = 0U; + TImpl(TSourcePosition pos, TUnboxedValue format, bool alwaysWriteFractionalSeconds) + : Pos_(pos) + , Format_(format) + { + const std::string_view formatView(Format_.AsStringRef()); + auto dataStart = formatView.begin(); + size_t dataSize = 0U; - for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) { - if (*ptr != '%') { - ++dataSize; - continue; - } + for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) { + if (*ptr != '%') { + ++dataSize; + continue; + } - if (dataSize) { - Printers_.emplace_back(TDataPrinter{std::string_view(&*dataStart, dataSize)}); - ReservedSize_ += dataSize; - dataSize = 0U; - } + if (dataSize) { + Printers_.emplace_back(TDataPrinter{std::string_view(&*dataStart, dataSize)}); + ReservedSize_ += dataSize; + dataSize = 0U; + } - if (formatView.end() == ++ptr) { - ythrow yexception() << "format string ends with single %%"; - } + if (formatView.end() == ++ptr) { + ythrow yexception() << "format string ends with single %%"; + } - switch (*ptr) { + switch (*ptr) { case '%': { static constexpr size_t size = 1; Printers_.emplace_back([](char* out, const TUnboxedValuePod&, const IDateBuilder&) { @@ -2904,9 +2883,7 @@ private: out += PrintNDigits<size>::Do(GetSecond<TM64ResourceName>(value), out); *out++ = '.'; constexpr size_t msize = 6; - auto addSz = alwaysWriteFractionalSeconds ? - PrintNDigits<msize, true>::Do(microsecond, out) : - PrintNDigits<msize, false>::Do(microsecond, out); + auto addSz = alwaysWriteFractionalSeconds ? PrintNDigits<msize, true>::Do(microsecond, out) : PrintNDigits<msize, false>::Do(microsecond, out); return size + 1U + addSz; } return PrintNDigits<size>::Do(GetSecond<TM64ResourceName>(value), out); @@ -2956,7 +2933,7 @@ private: case 'b': { static constexpr size_t size = 3; Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) { - static constexpr std::string_view mp[] { + static constexpr std::string_view mp[]{ "Jan", "Feb", "Mar", @@ -2968,8 +2945,7 @@ private: "Sep", "Oct", "Nov", - "Dec" - }; + "Dec"}; auto month = GetMonth<TM64ResourceName>(value); Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value"); std::memcpy(out, mp[month - 1].data(), size); @@ -2980,7 +2956,7 @@ private: } case 'B': { Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) { - static constexpr std::string_view mp[] { + static constexpr std::string_view mp[]{ "January", "February", "March", @@ -2992,8 +2968,7 @@ private: "September", "October", "November", - "December" - }; + "December"}; auto month = GetMonth<TM64ResourceName>(value); Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value"); const std::string_view monthFullName = mp[month - 1]; @@ -3006,181 +2981,180 @@ private: default: throw yexception() << "character '" << *ptr << "' is not a valid format specifier." << "\nSee documentation for valid format characters"; - } - - dataStart = ptr + 1U; } - if (dataSize) { - Printers_.emplace_back(TDataPrinter{std::string_view(dataStart, dataSize)}); - ReservedSize_ += dataSize; - } + dataStart = ptr + 1U; } - private: - const TSourcePosition Pos_; - - TUnboxedValue Format_; - TPrintersList Printers_{}; - size_t ReservedSize_ = 0; - }; + if (dataSize) { + Printers_.emplace_back(TDataPrinter{std::string_view(dataStart, dataSize)}); + ReservedSize_ += dataSize; + } + } + private: const TSourcePosition Pos_; + + TUnboxedValue Format_; + TPrintersList Printers_{}; + size_t ReservedSize_ = 0; }; - template<size_t Digits, bool Variable = false> - struct ParseNDigits; + const TSourcePosition Pos_; +}; - template<bool Variable> - struct ParseNDigits<0U, Variable> { - template <typename T> - static constexpr bool Do(std::string_view::const_iterator&, T&) { - return true; - } - }; +template <size_t Digits, bool Variable = false> +struct ParseNDigits; - template<size_t Digits, bool Variable> - struct ParseNDigits { - template <typename T> - static constexpr bool Do(std::string_view::const_iterator& it, T& out) { - const auto d = *it; - if (!std::isdigit(d)) { - // XXX: If the current char is not a digit, the - // parsing succeeds iff there are no more digits - // to be parsed (see the class specialization - // above) or there are given less than N digits - // to be parsed. - if constexpr (Variable) { - return true; - } - return false; +template <bool Variable> +struct ParseNDigits<0U, Variable> { + template <typename T> + static constexpr bool Do(std::string_view::const_iterator&, T&) { + return true; + } +}; + +template <size_t Digits, bool Variable> +struct ParseNDigits { + template <typename T> + static constexpr bool Do(std::string_view::const_iterator& it, T& out) { + const auto d = *it; + if (!std::isdigit(d)) { + // XXX: If the current char is not a digit, the + // parsing succeeds iff there are no more digits + // to be parsed (see the class specialization + // above) or there are given less than N digits + // to be parsed. + if constexpr (Variable) { + return true; } - out *= 10U; - out += d - '0'; - return ParseNDigits<Digits - 1U, Variable>::Do(++it, out); + return false; } - }; + out *= 10U; + out += d - '0'; + return ParseNDigits<Digits - 1U, Variable>::Do(++it, out); + } +}; - // Parse +// Parse - template<const char* TUdfName, const char* TResourceName> - class TParse : public TBoxedValue { +template <const char* TUdfName, const char* TResourceName> +class TParse: public TBoxedValue { +public: + class TFactory: public TBoxedValue { public: - class TFactory : public TBoxedValue { - public: - explicit TFactory(TSourcePosition pos) - : Pos_(pos) - {} - - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { - return TUnboxedValuePod(new TParse(args[0], Pos_)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - - const TSourcePosition Pos_; - }; + explicit TFactory(TSourcePosition pos) + : Pos_(pos) + { + } - static const TStringRef& Name() { - static auto name = TStringRef(TUdfName, std::strlen(TUdfName)); - return name; + private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new TParse(args[0], Pos_)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - static bool DeclareSignature( - const TStringRef& name, - TType*, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) - { - if (Name() != name) { - return false; - } + const TSourcePosition Pos_; + }; - builder.OptionalArgs(1).Args()->Add<char*>() - .template Add<TOptional<ui16>>(); - builder.Returns( - builder.SimpleSignatureType<TOptional<TResource<TResourceName>>(TAutoMap<char*>)>()); - if (!typesOnly) { - builder.Implementation(new TParse::TFactory(builder.GetSourcePosition())); - } + static const TStringRef& Name() { + static auto name = TStringRef(TUdfName, std::strlen(TUdfName)); + return name; + } - return true; + static bool DeclareSignature( + const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + if (Name() != name) { + return false; } - private: - const TSourcePosition Pos_; - const TUnboxedValue Format_; + builder.OptionalArgs(1).Args()->Add<char*>().template Add<TOptional<ui16>>(); + builder.Returns( + builder.SimpleSignatureType<TOptional<TResource<TResourceName>>(TAutoMap<char*>)>()); + if (!typesOnly) { + builder.Implementation(new TParse::TFactory(builder.GetSourcePosition())); + } - std::vector<std::function<bool(std::string_view::const_iterator& it, size_t, TUnboxedValuePod&, const IDateBuilder&)>> Scanners_; + return true; + } - struct TDataScanner { - const std::string_view Data; +private: + const TSourcePosition Pos_; + const TUnboxedValue Format_; - bool operator()(std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) const { - if (limit < Data.size() || !std::equal(Data.begin(), Data.end(), it)) { - return false; - } - std::advance(it, Data.size()); - return true; - } - }; + std::vector<std::function<bool(std::string_view::const_iterator& it, size_t, TUnboxedValuePod&, const IDateBuilder&)>> Scanners_; - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); + struct TDataScanner { + const std::string_view Data; + + bool operator()(std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) const { + if (limit < Data.size() || !std::equal(Data.begin(), Data.end(), it)) { + return false; + } + std::advance(it, Data.size()); + return true; + } + }; - const std::string_view buffer = args[0].AsStringRef(); + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - TUnboxedValuePod result(0); - auto& storage = Reference<TResourceName>(result); - storage.MakeDefault(); + const std::string_view buffer = args[0].AsStringRef(); - auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TResourceName>(result); + storage.MakeDefault(); - auto it = buffer.begin(); - for (const auto& scanner : Scanners_) { - if (!scanner(it, std::distance(it, buffer.end()), result, builder)) { - return TUnboxedValuePod(); - } - } + auto& builder = valueBuilder->GetDateBuilder(); - if (buffer.end() != it || !storage.Validate(builder)) { + auto it = buffer.begin(); + for (const auto& scanner : Scanners_) { + if (!scanner(it, std::distance(it, buffer.end()), result, builder)) { return TUnboxedValuePod(); } - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + + if (buffer.end() != it || !storage.Validate(builder)) { + return TUnboxedValuePod(); + } + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - TParse(const TUnboxedValuePod& runConfig, TSourcePosition pos) - : Pos_(pos) - , Format_(runConfig) - { - const std::string_view formatView(Format_.AsStringRef()); - auto dataStart = formatView.begin(); - size_t dataSize = 0U; + TParse(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : Pos_(pos) + , Format_(runConfig) + { + const std::string_view formatView(Format_.AsStringRef()); + auto dataStart = formatView.begin(); + size_t dataSize = 0U; - for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) { - if (*ptr != '%') { - ++dataSize; - continue; - } + for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) { + if (*ptr != '%') { + ++dataSize; + continue; + } - if (dataSize) { - Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)}); - dataSize = 0; - } + if (dataSize) { + Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)}); + dataSize = 0; + } - if (++ptr == formatView.end()) { - ythrow yexception() << "format string ends with single %%"; - } + if (++ptr == formatView.end()) { + ythrow yexception() << "format string ends with single %%"; + } - switch (*ptr) { + switch (*ptr) { case '%': Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) { return limit > 0U && *it++ == '%'; @@ -3313,7 +3287,8 @@ private: ++it; --digits; } - for (; !digits && limit && std::isdigit(*it); --limit, ++it); + for (; !digits && limit && std::isdigit(*it); --limit, ++it) + ; while (digits--) { usec *= 10U; } @@ -3381,16 +3356,16 @@ private: } default: ythrow yexception() << "invalid format character: " << *ptr; - } - - dataStart = ptr + 1U; } - if (dataSize) { - Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)}); - } + dataStart = ptr + 1U; } - }; + + if (dataSize) { + Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)}); + } + } +}; #define PARSE_SPECIFIC_FORMAT(format) \ SIMPLE_STRICT_UDF(TParse##format, TOptional<TResource<TMResourceName>>(TAutoMap<char*>)) { \ @@ -3406,140 +3381,139 @@ private: return result; \ } - PARSE_SPECIFIC_FORMAT(Rfc822); - PARSE_SPECIFIC_FORMAT(Iso8601); - PARSE_SPECIFIC_FORMAT(Http); - PARSE_SPECIFIC_FORMAT(X509); +PARSE_SPECIFIC_FORMAT(Rfc822); +PARSE_SPECIFIC_FORMAT(Iso8601); +PARSE_SPECIFIC_FORMAT(Http); +PARSE_SPECIFIC_FORMAT(X509); - SIMPLE_MODULE(TDateTime2Module, - TUserDataTypeFuncFactory<true, true, SplitUDF, TSplit, - TDate, - TDatetime, - TTimestamp, - TTzDate, - TTzDatetime, - TTzTimestamp, - TDate32, - TDatetime64, - TTimestamp64, - TTzDate32, - TTzDatetime64, - TTzTimestamp64>, +SIMPLE_MODULE(TDateTime2Module, + TUserDataTypeFuncFactory<true, true, SplitUDF, TSplit, + TDate, + TDatetime, + TTimestamp, + TTzDate, + TTzDatetime, + TTzTimestamp, + TDate32, + TDatetime64, + TTimestamp64, + TTzDate32, + TTzDatetime64, + TTzTimestamp64>, - TMakeDate, - TMakeDatetime, - TMakeTimestamp, - TMakeTzDate, - TMakeTzDatetime, - TMakeTzTimestamp, + TMakeDate, + TMakeDatetime, + TMakeTimestamp, + TMakeTzDate, + TMakeTzDatetime, + TMakeTzTimestamp, - TConvert, + TConvert, - TMakeDate32, - TMakeDatetime64, - TMakeTimestamp64, - TMakeTzDate32, - TMakeTzDatetime64, - TMakeTzTimestamp64, + TMakeDate32, + TMakeDatetime64, + TMakeTimestamp64, + TMakeTzDate32, + TMakeTzDatetime64, + TMakeTzTimestamp64, - TGetDateComponent<GetYearUDF, ui16, GetYear<TMResourceName>, i32, GetYear<TM64ResourceName>>, - TGetDateComponent<GetDayOfYearUDF, ui16, GetDayOfYear<TMResourceName>, ui16, GetDayOfYear<TM64ResourceName>>, - TGetDateComponent<GetMonthUDF, ui8, GetMonth<TMResourceName>, ui8, GetMonth<TM64ResourceName>>, - TGetDateComponentName<GetMonthNameUDF, GetMonthName<TMResourceName>, GetMonthName<TM64ResourceName>>, - TGetDateComponent<GetWeekOfYearUDF, ui8, GetWeekOfYear<TMResourceName>, ui8, GetWeekOfYear<TM64ResourceName>>, - TGetDateComponent<GetWeekOfYearIso8601UDF, ui8, GetWeekOfYearIso8601<TMResourceName>, ui8, GetWeekOfYearIso8601<TM64ResourceName>>, - TGetDateComponent<GetDayOfMonthUDF, ui8, GetDay<TMResourceName>, ui8, GetDay<TM64ResourceName>>, - TGetDateComponent<GetDayOfWeekUDF, ui8, GetDayOfWeek<TMResourceName>, ui8, GetDayOfWeek<TM64ResourceName>>, - TGetDateComponentName<GetDayOfWeekNameUDF, GetDayOfWeekName<TMResourceName>, GetDayOfWeekName<TM64ResourceName>>, - TGetTimeComponent<GetHourUDF, ui8, GetHour<TMResourceName>, GetHour<TM64ResourceName>, 1u, 3600u, 24u, false>, - TGetTimeComponent<GetMinuteUDF, ui8, GetMinute<TMResourceName>, GetMinute<TM64ResourceName>, 1u, 60u, 60u, false>, - TGetTimeComponent<GetSecondUDF, ui8, GetSecond<TMResourceName>, GetSecond<TM64ResourceName>, 1u, 1u, 60u, false>, - TGetTimeComponent<GetMillisecondOfSecondUDF, ui32, GetMicrosecond<TMResourceName>, GetMicrosecond<TM64ResourceName>, 1000u, 1000u, 1000u, true>, - TGetTimeComponent<GetMicrosecondOfSecondUDF, ui32, GetMicrosecond<TMResourceName>, GetMicrosecond<TM64ResourceName>, 1u, 1u, 1000000u, true>, - TGetDateComponent<GetTimezoneIdUDF, ui16, GetTimezoneId<TMResourceName>, ui16, GetTimezoneId<TM64ResourceName>>, - TGetDateComponentName<GetTimezoneNameUDF, GetTimezoneName<TMResourceName>, GetTimezoneName<TM64ResourceName>>, + TGetDateComponent<GetYearUDF, ui16, GetYear<TMResourceName>, i32, GetYear<TM64ResourceName>>, + TGetDateComponent<GetDayOfYearUDF, ui16, GetDayOfYear<TMResourceName>, ui16, GetDayOfYear<TM64ResourceName>>, + TGetDateComponent<GetMonthUDF, ui8, GetMonth<TMResourceName>, ui8, GetMonth<TM64ResourceName>>, + TGetDateComponentName<GetMonthNameUDF, GetMonthName<TMResourceName>, GetMonthName<TM64ResourceName>>, + TGetDateComponent<GetWeekOfYearUDF, ui8, GetWeekOfYear<TMResourceName>, ui8, GetWeekOfYear<TM64ResourceName>>, + TGetDateComponent<GetWeekOfYearIso8601UDF, ui8, GetWeekOfYearIso8601<TMResourceName>, ui8, GetWeekOfYearIso8601<TM64ResourceName>>, + TGetDateComponent<GetDayOfMonthUDF, ui8, GetDay<TMResourceName>, ui8, GetDay<TM64ResourceName>>, + TGetDateComponent<GetDayOfWeekUDF, ui8, GetDayOfWeek<TMResourceName>, ui8, GetDayOfWeek<TM64ResourceName>>, + TGetDateComponentName<GetDayOfWeekNameUDF, GetDayOfWeekName<TMResourceName>, GetDayOfWeekName<TM64ResourceName>>, + TGetTimeComponent<GetHourUDF, ui8, GetHour<TMResourceName>, GetHour<TM64ResourceName>, 1u, 3600u, 24u, false>, + TGetTimeComponent<GetMinuteUDF, ui8, GetMinute<TMResourceName>, GetMinute<TM64ResourceName>, 1u, 60u, 60u, false>, + TGetTimeComponent<GetSecondUDF, ui8, GetSecond<TMResourceName>, GetSecond<TM64ResourceName>, 1u, 1u, 60u, false>, + TGetTimeComponent<GetMillisecondOfSecondUDF, ui32, GetMicrosecond<TMResourceName>, GetMicrosecond<TM64ResourceName>, 1000u, 1000u, 1000u, true>, + TGetTimeComponent<GetMicrosecondOfSecondUDF, ui32, GetMicrosecond<TMResourceName>, GetMicrosecond<TM64ResourceName>, 1u, 1u, 1000000u, true>, + TGetDateComponent<GetTimezoneIdUDF, ui16, GetTimezoneId<TMResourceName>, ui16, GetTimezoneId<TM64ResourceName>>, + TGetDateComponentName<GetTimezoneNameUDF, GetTimezoneName<TMResourceName>, GetTimezoneName<TM64ResourceName>>, - TUpdate, + TUpdate, - TFromSeconds, - TFromMilliseconds, - TFromMicroseconds, + TFromSeconds, + TFromMilliseconds, + TFromMicroseconds, - TFromSeconds64, - TFromMilliseconds64, - TFromMicroseconds64, + TFromSeconds64, + TFromMilliseconds64, + TFromMicroseconds64, - TIntervalFromDays, - TIntervalFromHours, - TIntervalFromMinutes, + TIntervalFromDays, + TIntervalFromHours, + TIntervalFromMinutes, - TLangVerForked< - NYql::MakeLangVersion(2025, 03), - NLegacy::TIntervalFromSeconds, - NActual::TIntervalFromSeconds>, + TLangVerForked< + NYql::MakeLangVersion(2025, 03), + NLegacy::TIntervalFromSeconds, + NActual::TIntervalFromSeconds>, - TIntervalFromMilliseconds, - TIntervalFromMicroseconds, + TIntervalFromMilliseconds, + TIntervalFromMicroseconds, - TInterval64FromDays, - TInterval64FromHours, - TInterval64FromMinutes, - TInterval64FromSeconds, - TInterval64FromMilliseconds, - TInterval64FromMicroseconds, + TInterval64FromDays, + TInterval64FromHours, + TInterval64FromMinutes, + TInterval64FromSeconds, + TInterval64FromMilliseconds, + TInterval64FromMicroseconds, - TToConverter<ToDaysUDF, i32, i32, UsecondsInDay>, - TToConverter<ToHoursUDF, i32, i64, UsecondsInHour>, - TToConverter<ToMinutesUDF, i32, i64, UsecondsInMinute>, + TToConverter<ToDaysUDF, i32, i32, UsecondsInDay>, + TToConverter<ToHoursUDF, i32, i64, UsecondsInHour>, + TToConverter<ToMinutesUDF, i32, i64, UsecondsInMinute>, - TBoundaryOf<StartOfYearUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfYear<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfYear<TTM64Storage>>>, - TBoundaryOf<StartOfQuarterUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfQuarter<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfQuarter<TTM64Storage>>>, - TBoundaryOf<StartOfMonthUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfMonth<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfMonth<TTM64Storage>>>, - TBoundaryOf<StartOfWeekUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfWeek<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfWeek<TTM64Storage>>>, - TBoundaryOf<StartOfDayUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfDay<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfDay<TTM64Storage>>>, - TBoundaryOfInterval<StartOfUDF, SimpleDatetimeToIntervalUdf<TMResourceName, StartOf<TTMStorage>>, - SimpleDatetimeToIntervalUdf<TM64ResourceName, StartOf<TTM64Storage>>>, - TTimeOfDay, + TBoundaryOf<StartOfYearUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfYear<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfYear<TTM64Storage>>>, + TBoundaryOf<StartOfQuarterUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfQuarter<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfQuarter<TTM64Storage>>>, + TBoundaryOf<StartOfMonthUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfMonth<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfMonth<TTM64Storage>>>, + TBoundaryOf<StartOfWeekUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfWeek<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfWeek<TTM64Storage>>>, + TBoundaryOf<StartOfDayUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfDay<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfDay<TTM64Storage>>>, + TBoundaryOfInterval<StartOfUDF, SimpleDatetimeToIntervalUdf<TMResourceName, StartOf<TTMStorage>>, + SimpleDatetimeToIntervalUdf<TM64ResourceName, StartOf<TTM64Storage>>>, + TTimeOfDay, - TShift<ShiftYearsUDF, DoAddYears<TMResourceName>, DoAddYears<TM64ResourceName>>, - TShift<ShiftQuartersUDF, DoAddQuarters<TMResourceName>, DoAddQuarters<TM64ResourceName>>, - TShift<ShiftMonthsUDF, DoAddMonths<TMResourceName>, DoAddMonths<TM64ResourceName>>, + TShift<ShiftYearsUDF, DoAddYears<TMResourceName>, DoAddYears<TM64ResourceName>>, + TShift<ShiftQuartersUDF, DoAddQuarters<TMResourceName>, DoAddQuarters<TM64ResourceName>>, + TShift<ShiftMonthsUDF, DoAddMonths<TMResourceName>, DoAddMonths<TM64ResourceName>>, - TBoundaryOf<EndOfYearUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfYear<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfYear<TTM64Storage>>>, - TBoundaryOf<EndOfQuarterUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfQuarter<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfQuarter<TTM64Storage>>>, - TBoundaryOf<EndOfMonthUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfMonth<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfMonth<TTM64Storage>>>, - TBoundaryOf<EndOfWeekUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfWeek<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfWeek<TTM64Storage>>>, - TBoundaryOf<EndOfDayUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfDay<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfDay<TTM64Storage>>>, - TBoundaryOfInterval<EndOfUDF, SimpleDatetimeToIntervalUdf<TMResourceName, EndOf<TTMStorage>>, - SimpleDatetimeToIntervalUdf<TM64ResourceName, EndOf<TTM64Storage>>>, + TBoundaryOf<EndOfYearUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfYear<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfYear<TTM64Storage>>>, + TBoundaryOf<EndOfQuarterUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfQuarter<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfQuarter<TTM64Storage>>>, + TBoundaryOf<EndOfMonthUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfMonth<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfMonth<TTM64Storage>>>, + TBoundaryOf<EndOfWeekUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfWeek<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfWeek<TTM64Storage>>>, + TBoundaryOf<EndOfDayUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfDay<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfDay<TTM64Storage>>>, + TBoundaryOfInterval<EndOfUDF, SimpleDatetimeToIntervalUdf<TMResourceName, EndOf<TTMStorage>>, + SimpleDatetimeToIntervalUdf<TM64ResourceName, EndOf<TTM64Storage>>>, - TLangVerForked< - NYql::MakeLangVersion(2025, 03), - TToUnits<ToSecondsUDF, /* TResult = */ ui32, /* TSignedResult = */ i32, /* TWResult = */ i64, 1>, - TToUnits<ToSecondsUDF, /* TResult = */ ui32, /* TSignedResult = */ i64, /* TWResult = */ i64, 1>>, + TLangVerForked< + NYql::MakeLangVersion(2025, 03), + TToUnits<ToSecondsUDF, /* TResult = */ ui32, /* TSignedResult = */ i32, /* TWResult = */ i64, 1>, + TToUnits<ToSecondsUDF, /* TResult = */ ui32, /* TSignedResult = */ i64, /* TWResult = */ i64, 1>>, - TToUnits<ToMillisecondsUDF, /* TResult = */ ui64, /* TSignedResult = */ i64, /* TWResult = */ i64, 1000>, - TToUnits<ToMicrosecondsUDF, /* TResult = */ ui64, /* TSignedResult = */ i64, /* TWResult = */ i64, 1000000>, + TToUnits<ToMillisecondsUDF, /* TResult = */ ui64, /* TSignedResult = */ i64, /* TWResult = */ i64, 1000>, + TToUnits<ToMicrosecondsUDF, /* TResult = */ ui64, /* TSignedResult = */ i64, /* TWResult = */ i64, 1000000>, - TFormat, - TParse<ParseUDF, TMResourceName>, - TParse<Parse64UDF, TM64ResourceName>, + TFormat, + TParse<ParseUDF, TMResourceName>, + TParse<Parse64UDF, TM64ResourceName>, - TParseRfc822, - TParseIso8601, - TParseHttp, - TParseX509 - ) -} + TParseRfc822, + TParseIso8601, + TParseHttp, + TParseX509) +} // namespace REGISTER_MODULES(TDateTime2Module) diff --git a/yql/essentials/udfs/common/datetime2/ya.make b/yql/essentials/udfs/common/datetime2/ya.make index 1ca69ae4bce..ca50b15e73f 100644 --- a/yql/essentials/udfs/common/datetime2/ya.make +++ b/yql/essentials/udfs/common/datetime2/ya.make @@ -4,6 +4,7 @@ YQL_UDF_CONTRIB(datetime2_udf) 43 0 ) + ENABLE(YQL_STYLE_CPP) SRCS( datetime_udf.cpp ) diff --git a/yql/essentials/udfs/common/digest/digest_udf.cpp b/yql/essentials/udfs/common/digest/digest_udf.cpp index 834d38aeaf6..63aa748e56c 100644 --- a/yql/essentials/udfs/common/digest/digest_udf.cpp +++ b/yql/essentials/udfs/common/digest/digest_udf.cpp @@ -27,456 +27,465 @@ using namespace NKikimr; using namespace NUdf; namespace { - enum EDigestType { - CRC32C, CRC64, FNV32, FNV64, MURMUR, MURMUR32, MURMUR2A, MURMUR2A32, CITY - }; - const char* DigestNames[] = { - "Crc32c", "Crc64", "Fnv32", "Fnv64", "MurMurHash", "MurMurHash32", "MurMurHash2A", "MurMurHash2A32", "CityHash" - }; +enum EDigestType { + CRC32C, + CRC64, + FNV32, + FNV64, + MURMUR, + MURMUR32, + MURMUR2A, + MURMUR2A32, + CITY +}; +const char* DigestNames[] = { + "Crc32c", "Crc64", "Fnv32", "Fnv64", "MurMurHash", "MurMurHash32", "MurMurHash2A", "MurMurHash2A32", "CityHash"}; - template<typename TResult> - using TDigestGenerator = TResult(const TStringRef&, TMaybe<TResult> init); +template <typename TResult> +using TDigestGenerator = TResult(const TStringRef&, TMaybe<TResult> init); - template<EDigestType DigestType, typename TResult, TDigestGenerator<TResult>* Generator> - class TDigestFunctionUdf: public TBoxedValue { - public: - TDigestFunctionUdf(TSourcePosition pos) : Pos_(pos) {} - - static TStringRef Name() { - static TString name = DigestNames[DigestType]; - return TStringRef(name); - } - - static bool DeclareSignature( - const TStringRef& name, - TType*, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) - { - if (Name() != name) { - return false; - } - - auto args = builder.Args(); - args->Add(builder.SimpleType<char *>()).Flags(ICallablePayload::TArgumentFlags::AutoMap); - args->Add(builder.Optional()->Item(builder.SimpleType<TResult>()).Build()).Name("Init"); - args->Done(); - builder.OptionalArgs(1); - builder.Returns(builder.SimpleType<TResult>()); - builder.IsStrict(); - - if (!typesOnly) { - builder.Implementation(new TDigestFunctionUdf<DigestType, TResult, Generator>(GetSourcePosition(builder))); - } - - return true; - } - - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { - TMaybe<TResult> init = Nothing(); - if (auto val = args[1]) { - init = val.Get<TResult>(); - } - return TUnboxedValuePod(Generator(args[0].AsStringRef(), init)); - } catch (const std ::exception&) { - TStringBuilder sb; - sb << Pos_ << " "; - sb << CurrentExceptionMessage(); - sb << Endl << "[" << TStringBuf(Name()) << "]"; - UdfTerminate(sb.c_str()); - } - - TSourcePosition Pos_; - }; - - SIMPLE_STRICT_UDF(TCrc32c, ui32(TAutoMap<char*>)) { - Y_UNUSED(valueBuilder); - const auto& inputRef = args[0].AsStringRef(); - ui32 hash = Crc32c(inputRef.Data(), inputRef.Size()); - return TUnboxedValuePod(hash); +template <EDigestType DigestType, typename TResult, TDigestGenerator<TResult>* Generator> +class TDigestFunctionUdf: public TBoxedValue { +public: + TDigestFunctionUdf(TSourcePosition pos) + : Pos_(pos) + { } - using TCrc64 = TDigestFunctionUdf<CRC64, ui64, [](auto& inputRef, auto init) { - return crc64(inputRef.Data(), inputRef.Size(), init.GetOrElse(CRC64INIT)); - }>; + static TStringRef Name() { + static TString name = DigestNames[DigestType]; + return TStringRef(name); + } - using TFnv32 = TDigestFunctionUdf<FNV32, ui32, [](auto& inputRef, auto init) { - if (init) { - return FnvHash<ui32>(inputRef.Data(), inputRef.Size(), *init); - } else { - return FnvHash<ui32>(inputRef.Data(), inputRef.Size()); + static bool DeclareSignature( + const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + if (Name() != name) { + return false; } - }>; - using TFnv64 = TDigestFunctionUdf<FNV64, ui64, [](auto& inputRef, auto init) { - if (init) { - return FnvHash<ui64>(inputRef.Data(), inputRef.Size(), *init); - } else { - return FnvHash<ui64>(inputRef.Data(), inputRef.Size()); - } - }>; + auto args = builder.Args(); + args->Add(builder.SimpleType<char*>()).Flags(ICallablePayload::TArgumentFlags::AutoMap); + args->Add(builder.Optional()->Item(builder.SimpleType<TResult>()).Build()).Name("Init"); + args->Done(); + builder.OptionalArgs(1); + builder.Returns(builder.SimpleType<TResult>()); + builder.IsStrict(); - using TMurMurHash = TDigestFunctionUdf<MURMUR, ui64, [](auto& inputRef, auto init) { - if (init) { - return MurmurHash<ui64>(inputRef.Data(), inputRef.Size(), *init); - } else { - return MurmurHash<ui64>(inputRef.Data(), inputRef.Size()); + if (!typesOnly) { + builder.Implementation(new TDigestFunctionUdf<DigestType, TResult, Generator>(GetSourcePosition(builder))); } - }>; - using TMurMurHash32 = TDigestFunctionUdf<MURMUR32, ui32, [] (auto& inputRef, auto init) { - if (init) { - return MurmurHash<ui32>(inputRef.Data(), inputRef.Size(), *init); - } else { - return MurmurHash<ui32>(inputRef.Data(), inputRef.Size()); - } - }>; + return true; + } - using TMurMurHash2A = TDigestFunctionUdf<MURMUR2A, ui64, [] (auto& inputRef, auto init) { - if (init) { - return TMurmurHash2A<ui64>{*init}.Update(inputRef.Data(), inputRef.Size()).Value(); - } else { - return TMurmurHash2A<ui64>{}.Update(inputRef.Data(), inputRef.Size()).Value(); +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { + TMaybe<TResult> init = Nothing(); + if (auto val = args[1]) { + init = val.Get<TResult>(); } - }>; - - using TMurMurHash2A32 = TDigestFunctionUdf<MURMUR2A32, ui32, [] (auto& inputRef, auto init) { - if (init) { - return TMurmurHash2A<ui32>{*init}.Update(inputRef.Data(), inputRef.Size()).Value(); - } else { - return TMurmurHash2A<ui32>{}.Update(inputRef.Data(), inputRef.Size()).Value(); - } - }>; + return TUnboxedValuePod(Generator(args[0].AsStringRef(), init)); + } catch (const std ::exception&) { + TStringBuilder sb; + sb << Pos_ << " "; + sb << CurrentExceptionMessage(); + sb << Endl << "[" << TStringBuf(Name()) << "]"; + UdfTerminate(sb.c_str()); + } - using TCityHash = TDigestFunctionUdf<CITY, ui64, [] (auto& inputRef, auto init) { - if (init) { - return CityHash64WithSeed(inputRef.Data(), inputRef.Size(), *init); - } else { - return CityHash64(inputRef.Data(), inputRef.Size()); - } - }>; + TSourcePosition Pos_; +}; - class TCityHash128: public TBoxedValue { - public: - static TStringRef Name() { - static auto name = TStringRef::Of("CityHash128"); - return name; - } +SIMPLE_STRICT_UDF(TCrc32c, ui32(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + ui32 hash = Crc32c(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); +} - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); - builder.Args(1)->Add<TAutoMap<char*>>(); - builder.Returns(type); - if (!typesOnly) { - builder.Implementation(new TCityHash128); - } - builder.IsStrict(); - return true; - } else { - return false; - } - } +using TCrc64 = TDigestFunctionUdf<CRC64, ui64, [](auto& inputRef, auto init) { + return crc64(inputRef.Data(), inputRef.Size(), init.GetOrElse(CRC64INIT)); +}>; - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - TUnboxedValue* items = nullptr; - auto val = valueBuilder->NewArray(2U, items); - const auto& inputRef = args[0].AsStringRef(); - uint128 hash = CityHash128(inputRef.Data(), inputRef.Size()); - items[0] = TUnboxedValuePod(hash.first); - items[1] = TUnboxedValuePod(hash.second); - return val; - } - }; +using TFnv32 = TDigestFunctionUdf<FNV32, ui32, [](auto& inputRef, auto init) { + if (init) { + return FnvHash<ui32>(inputRef.Data(), inputRef.Size(), *init); + } else { + return FnvHash<ui32>(inputRef.Data(), inputRef.Size()); + } +}>; - SIMPLE_STRICT_UDF(TNumericHash, ui64(TAutoMap<ui64>)) { - Y_UNUSED(valueBuilder); - ui64 input = args[0].Get<ui64>(); - ui64 hash = (ui64)NumericHash(input); - return TUnboxedValuePod(hash); +using TFnv64 = TDigestFunctionUdf<FNV64, ui64, [](auto& inputRef, auto init) { + if (init) { + return FnvHash<ui64>(inputRef.Data(), inputRef.Size(), *init); + } else { + return FnvHash<ui64>(inputRef.Data(), inputRef.Size()); } +}>; - SIMPLE_STRICT_UDF(TMd5Hex, char*(TAutoMap<char*>)) { - const auto& inputRef = args[0].AsStringRef(); - MD5 md5; - const TString& hash = md5.Calc(inputRef); - return valueBuilder->NewString(hash); +using TMurMurHash = TDigestFunctionUdf<MURMUR, ui64, [](auto& inputRef, auto init) { + if (init) { + return MurmurHash<ui64>(inputRef.Data(), inputRef.Size(), *init); + } else { + return MurmurHash<ui64>(inputRef.Data(), inputRef.Size()); } +}>; - SIMPLE_STRICT_UDF(TMd5Raw, char*(TAutoMap<char*>)) { - const auto& inputRef = args[0].AsStringRef(); - MD5 md5; - const TString& hash = md5.CalcRaw(inputRef); - return valueBuilder->NewString(hash); +using TMurMurHash32 = TDigestFunctionUdf<MURMUR32, ui32, [](auto& inputRef, auto init) { + if (init) { + return MurmurHash<ui32>(inputRef.Data(), inputRef.Size(), *init); + } else { + return MurmurHash<ui32>(inputRef.Data(), inputRef.Size()); } +}>; - SIMPLE_STRICT_UDF(TMd5HalfMix, ui64(TAutoMap<char*>)) { - Y_UNUSED(valueBuilder); - return TUnboxedValuePod(MD5::CalcHalfMix(args[0].AsStringRef())); +using TMurMurHash2A = TDigestFunctionUdf<MURMUR2A, ui64, [](auto& inputRef, auto init) { + if (init) { + return TMurmurHash2A<ui64>{*init}.Update(inputRef.Data(), inputRef.Size()).Value(); + } else { + return TMurmurHash2A<ui64>{}.Update(inputRef.Data(), inputRef.Size()).Value(); } +}>; - SIMPLE_STRICT_UDF(TArgon2, char*(TAutoMap<char*>, TAutoMap<char*>)) { - const static ui32 outSize = 32; - const static NArgonish::TArgon2Factory afactory; - const static THolder<NArgonish::IArgon2Base> argon2 = afactory.Create( - NArgonish::EArgon2Type::Argon2d, 1, 32, 1); +using TMurMurHash2A32 = TDigestFunctionUdf<MURMUR2A32, ui32, [](auto& inputRef, auto init) { + if (init) { + return TMurmurHash2A<ui32>{*init}.Update(inputRef.Data(), inputRef.Size()).Value(); + } else { + return TMurmurHash2A<ui32>{}.Update(inputRef.Data(), inputRef.Size()).Value(); + } +}>; - const TStringRef inputRef = args[0].AsStringRef(); - const TStringRef saltRef = args[1].AsStringRef(); - ui8 out[outSize]; - argon2->Hash(reinterpret_cast<const ui8*>(inputRef.Data()), inputRef.Size(), - reinterpret_cast<const ui8*>(saltRef.Data()), saltRef.Size(), - out, outSize); - return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize)); +using TCityHash = TDigestFunctionUdf<CITY, ui64, [](auto& inputRef, auto init) { + if (init) { + return CityHash64WithSeed(inputRef.Data(), inputRef.Size(), *init); + } else { + return CityHash64(inputRef.Data(), inputRef.Size()); } +}>; - SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TBlake2B, char*(TAutoMap<char*>, TOptional<char*>), 1) { - const static ui32 outSize = 32; - const static NArgonish::TBlake2BFactory bfactory; - const TStringRef inputRef = args[0].AsStringRef(); +class TCityHash128: public TBoxedValue { +public: + static TStringRef Name() { + static auto name = TStringRef::Of("CityHash128"); + return name; + } - THolder<NArgonish::IBlake2Base> blake2b; - if (args[1]) { - const TStringRef keyRef = args[1].AsStringRef(); - if (keyRef.Size() == 0) { - blake2b = bfactory.Create(outSize); - } else { - blake2b = bfactory.Create(outSize, reinterpret_cast<const ui8*>(keyRef.Data()), keyRef.Size()); + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); + builder.Args(1)->Add<TAutoMap<char*>>(); + builder.Returns(type); + if (!typesOnly) { + builder.Implementation(new TCityHash128); } + builder.IsStrict(); + return true; } else { - blake2b = bfactory.Create(outSize); + return false; } - - ui8 out[outSize]; - blake2b->Update(inputRef.Data(), inputRef.Size()); - blake2b->Final(out, outSize); - return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize)); } - SIMPLE_STRICT_UDF(TSipHash, ui64(ui64, ui64, TAutoMap<char*>)) { - using namespace highwayhash; - Y_UNUSED(valueBuilder); - const TStringRef inputRef = args[2].AsStringRef(); - const HH_U64 state[2] = {args[0].Get<ui64>(), args[1].Get<ui64>()}; - ui64 hash = SipHash(state, inputRef.Data(), inputRef.Size()); - return TUnboxedValuePod(hash); +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* items = nullptr; + auto val = valueBuilder->NewArray(2U, items); + const auto& inputRef = args[0].AsStringRef(); + uint128 hash = CityHash128(inputRef.Data(), inputRef.Size()); + items[0] = TUnboxedValuePod(hash.first); + items[1] = TUnboxedValuePod(hash.second); + return val; } +}; - SIMPLE_STRICT_UDF(THighwayHash, ui64(ui64, ui64, ui64, ui64, TAutoMap<char*>)) { - using namespace highwayhash; - Y_UNUSED(valueBuilder); - const TStringRef inputRef = args[4].AsStringRef(); - const uint64_t key[4] = { - args[0].Get<ui64>(), - args[1].Get<ui64>(), - args[2].Get<ui64>(), - args[3].Get<ui64>()}; - ui64 hash = HighwayHash64(key, inputRef.Data(), inputRef.Size()); - return TUnboxedValuePod(hash); - } +SIMPLE_STRICT_UDF(TNumericHash, ui64(TAutoMap<ui64>)) { + Y_UNUSED(valueBuilder); + ui64 input = args[0].Get<ui64>(); + ui64 hash = (ui64)NumericHash(input); + return TUnboxedValuePod(hash); +} - SIMPLE_STRICT_UDF(TFarmHashFingerprint, ui64(TAutoMap<ui64>)) { - Y_UNUSED(valueBuilder); - ui64 input = args[0].Get<ui64>(); - ui64 hash = util::Fingerprint(input); - return TUnboxedValuePod(hash); - } +SIMPLE_STRICT_UDF(TMd5Hex, char*(TAutoMap<char*>)) { + const auto& inputRef = args[0].AsStringRef(); + MD5 md5; + const TString& hash = md5.Calc(inputRef); + return valueBuilder->NewString(hash); +} - SIMPLE_STRICT_UDF(TFarmHashFingerprint2, ui64(TAutoMap<ui64>, TAutoMap<ui64>)) { - Y_UNUSED(valueBuilder); - ui64 low = args[0].Get<ui64>(); - ui64 high = args[1].Get<ui64>(); - ui64 hash = util::Fingerprint(util::Uint128(low, high)); - return TUnboxedValuePod(hash); - } +SIMPLE_STRICT_UDF(TMd5Raw, char*(TAutoMap<char*>)) { + const auto& inputRef = args[0].AsStringRef(); + MD5 md5; + const TString& hash = md5.CalcRaw(inputRef); + return valueBuilder->NewString(hash); +} - SIMPLE_STRICT_UDF(TFarmHashFingerprint32, ui32(TAutoMap<char*>)) { - Y_UNUSED(valueBuilder); - const auto& inputRef = args[0].AsStringRef(); - auto hash = util::Fingerprint32(inputRef.Data(), inputRef.Size()); - return TUnboxedValuePod(ui32(hash)); - } +SIMPLE_STRICT_UDF(TMd5HalfMix, ui64(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(MD5::CalcHalfMix(args[0].AsStringRef())); +} - SIMPLE_STRICT_UDF(TFarmHashFingerprint64, ui64(TAutoMap<char*>)) { - Y_UNUSED(valueBuilder); - const auto& inputRef = args[0].AsStringRef(); - auto hash = util::Fingerprint64(inputRef.Data(), inputRef.Size()); - return TUnboxedValuePod(ui64(hash)); - } +SIMPLE_STRICT_UDF(TArgon2, char*(TAutoMap<char*>, TAutoMap<char*>)) { + const static ui32 outSize = 32; + const static NArgonish::TArgon2Factory afactory; + const static THolder<NArgonish::IArgon2Base> argon2 = afactory.Create( + NArgonish::EArgon2Type::Argon2d, 1, 32, 1); - class TFarmHashFingerprint128: public TBoxedValue { - public: - static TStringRef Name() { - static auto name = TStringRef::Of("FarmHashFingerprint128"); - return name; - } + const TStringRef inputRef = args[0].AsStringRef(); + const TStringRef saltRef = args[1].AsStringRef(); + ui8 out[outSize]; + argon2->Hash(reinterpret_cast<const ui8*>(inputRef.Data()), inputRef.Size(), + reinterpret_cast<const ui8*>(saltRef.Data()), saltRef.Size(), + out, outSize); + return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize)); +} - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); - builder.Args(1)->Add<TAutoMap<char*>>(); - builder.Returns(type); - if (!typesOnly) { - builder.Implementation(new TFarmHashFingerprint128); - } - builder.IsStrict(); - return true; - } else { - return false; - } - } +SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TBlake2B, char*(TAutoMap<char*>, TOptional<char*>), 1) { + const static ui32 outSize = 32; + const static NArgonish::TBlake2BFactory bfactory; + const TStringRef inputRef = args[0].AsStringRef(); - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - TUnboxedValue* items = nullptr; - auto val = valueBuilder->NewArray(2U, items); - const auto& inputRef = args[0].AsStringRef(); - auto hash = util::Fingerprint128(inputRef.Data(), inputRef.Size()); - items[0] = TUnboxedValuePod(static_cast<ui64>(hash.first)); - items[1] = TUnboxedValuePod(static_cast<ui64>(hash.second)); - return val; + THolder<NArgonish::IBlake2Base> blake2b; + if (args[1]) { + const TStringRef keyRef = args[1].AsStringRef(); + if (keyRef.Size() == 0) { + blake2b = bfactory.Create(outSize); + } else { + blake2b = bfactory.Create(outSize, reinterpret_cast<const ui8*>(keyRef.Data()), keyRef.Size()); } - }; - - SIMPLE_STRICT_UDF(TSuperFastHash, ui32(TAutoMap<char*>)) { - Y_UNUSED(valueBuilder); - const auto& inputRef = args[0].AsStringRef(); - ui32 hash = SuperFastHash(inputRef.Data(), inputRef.Size()); - return TUnboxedValuePod(hash); + } else { + blake2b = bfactory.Create(outSize); } - SIMPLE_STRICT_UDF(TSha1, char*(TAutoMap<char*>)) { - const auto& inputRef = args[0].AsStringRef(); - SHA_CTX sha; - SHA1_Init(&sha); - SHA1_Update(&sha, inputRef.Data(), inputRef.Size()); - unsigned char hash[SHA_DIGEST_LENGTH]; - SHA1_Final(hash, &sha); - return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash))); - } + ui8 out[outSize]; + blake2b->Update(inputRef.Data(), inputRef.Size()); + blake2b->Final(out, outSize); + return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize)); +} - SIMPLE_STRICT_UDF(TSha256, char*(TAutoMap<char*>)) { - const auto& inputRef = args[0].AsStringRef(); - SHA256_CTX sha; - SHA256_Init(&sha); - SHA256_Update(&sha, inputRef.Data(), inputRef.Size()); - unsigned char hash[SHA256_DIGEST_LENGTH]; - SHA256_Final(hash, &sha); - return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash))); - } +SIMPLE_STRICT_UDF(TSipHash, ui64(ui64, ui64, TAutoMap<char*>)) { + using namespace highwayhash; + Y_UNUSED(valueBuilder); + const TStringRef inputRef = args[2].AsStringRef(); + const HH_U64 state[2] = {args[0].Get<ui64>(), args[1].Get<ui64>()}; + ui64 hash = SipHash(state, inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); +} - SIMPLE_STRICT_UDF_OPTIONS(TSha512, char*(TAutoMap<char*>), builder.SetMinLangVer(NYql::MakeLangVersion(2025, 3));) { - const auto& inputRef = args[0].AsStringRef(); - SHA512_CTX sha; - SHA512_Init(&sha); - SHA512_Update(&sha, inputRef.Data(), inputRef.Size()); - unsigned char hash[SHA512_DIGEST_LENGTH]; - SHA512_Final(hash, &sha); - return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash))); +SIMPLE_STRICT_UDF(THighwayHash, ui64(ui64, ui64, ui64, ui64, TAutoMap<char*>)) { + using namespace highwayhash; + Y_UNUSED(valueBuilder); + const TStringRef inputRef = args[4].AsStringRef(); + const uint64_t key[4] = { + args[0].Get<ui64>(), + args[1].Get<ui64>(), + args[2].Get<ui64>(), + args[3].Get<ui64>()}; + ui64 hash = HighwayHash64(key, inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); +} + +SIMPLE_STRICT_UDF(TFarmHashFingerprint, ui64(TAutoMap<ui64>)) { + Y_UNUSED(valueBuilder); + ui64 input = args[0].Get<ui64>(); + ui64 hash = util::Fingerprint(input); + return TUnboxedValuePod(hash); +} + +SIMPLE_STRICT_UDF(TFarmHashFingerprint2, ui64(TAutoMap<ui64>, TAutoMap<ui64>)) { + Y_UNUSED(valueBuilder); + ui64 low = args[0].Get<ui64>(); + ui64 high = args[1].Get<ui64>(); + ui64 hash = util::Fingerprint(util::Uint128(low, high)); + return TUnboxedValuePod(hash); +} + +SIMPLE_STRICT_UDF(TFarmHashFingerprint32, ui32(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + auto hash = util::Fingerprint32(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(ui32(hash)); +} + +SIMPLE_STRICT_UDF(TFarmHashFingerprint64, ui64(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + auto hash = util::Fingerprint64(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(ui64(hash)); +} + +class TFarmHashFingerprint128: public TBoxedValue { +public: + static TStringRef Name() { + static auto name = TStringRef::Of("FarmHashFingerprint128"); + return name; } - SIMPLE_STRICT_UDF(TIntHash64, ui64(TAutoMap<ui64>)) { - Y_UNUSED(valueBuilder); - ui64 x = args[0].Get<ui64>(); - x ^= 0x4CF2D2BAAE6DA887ULL; - x ^= x >> 33; - x *= 0xff51afd7ed558ccdULL; - x ^= x >> 33; - x *= 0xc4ceb9fe1a85ec53ULL; - x ^= x >> 33; - return TUnboxedValuePod(x); + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); + builder.Args(1)->Add<TAutoMap<char*>>(); + builder.Returns(type); + if (!typesOnly) { + builder.Implementation(new TFarmHashFingerprint128); + } + builder.IsStrict(); + return true; + } else { + return false; + } } - SIMPLE_STRICT_UDF(TXXH3, ui64(TAutoMap<char*>)) { - Y_UNUSED(valueBuilder); +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* items = nullptr; + auto val = valueBuilder->NewArray(2U, items); const auto& inputRef = args[0].AsStringRef(); - const ui64 hash = XXH3_64bits(inputRef.Data(), inputRef.Size()); - return TUnboxedValuePod(hash); + auto hash = util::Fingerprint128(inputRef.Data(), inputRef.Size()); + items[0] = TUnboxedValuePod(static_cast<ui64>(hash.first)); + items[1] = TUnboxedValuePod(static_cast<ui64>(hash.second)); + return val; } +}; - class TXXH3_128: public TBoxedValue { // NOLINT(readability-identifier-naming) - public: - static TStringRef Name() { - static auto name = TStringRef::Of("XXH3_128"); - return name; - } +SIMPLE_STRICT_UDF(TSuperFastHash, ui32(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + ui32 hash = SuperFastHash(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); +} + +SIMPLE_STRICT_UDF(TSha1, char*(TAutoMap<char*>)) { + const auto& inputRef = args[0].AsStringRef(); + SHA_CTX sha; + SHA1_Init(&sha); + SHA1_Update(&sha, inputRef.Data(), inputRef.Size()); + unsigned char hash[SHA_DIGEST_LENGTH]; + SHA1_Final(hash, &sha); + return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash))); +} + +SIMPLE_STRICT_UDF(TSha256, char*(TAutoMap<char*>)) { + const auto& inputRef = args[0].AsStringRef(); + SHA256_CTX sha; + SHA256_Init(&sha); + SHA256_Update(&sha, inputRef.Data(), inputRef.Size()); + unsigned char hash[SHA256_DIGEST_LENGTH]; + SHA256_Final(hash, &sha); + return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash))); +} + +SIMPLE_STRICT_UDF_OPTIONS(TSha512, char*(TAutoMap<char*>), builder.SetMinLangVer(NYql::MakeLangVersion(2025, 3));) { + const auto& inputRef = args[0].AsStringRef(); + SHA512_CTX sha; + SHA512_Init(&sha); + SHA512_Update(&sha, inputRef.Data(), inputRef.Size()); + unsigned char hash[SHA512_DIGEST_LENGTH]; + SHA512_Final(hash, &sha); + return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash))); +} + +SIMPLE_STRICT_UDF(TIntHash64, ui64(TAutoMap<ui64>)) { + Y_UNUSED(valueBuilder); + ui64 x = args[0].Get<ui64>(); + x ^= 0x4CF2D2BAAE6DA887ULL; + x ^= x >> 33; + x *= 0xff51afd7ed558ccdULL; + x ^= x >> 33; + x *= 0xc4ceb9fe1a85ec53ULL; + x ^= x >> 33; + return TUnboxedValuePod(x); +} + +SIMPLE_STRICT_UDF(TXXH3, ui64(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + const ui64 hash = XXH3_64bits(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); +} - static bool DeclareSignature(const TStringRef& name, TType*, IFunctionTypeInfoBuilder& builder, bool typesOnly) { - if (Name() == name) { - const auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); - builder.Args(1)->Add<TAutoMap<char*>>(); - builder.Returns(type); - if (!typesOnly) { - builder.Implementation(new TXXH3_128); - } - builder.IsStrict(); - return true; - } else { - return false; +class TXXH3_128: public TBoxedValue { // NOLINT(readability-identifier-naming) +public: + static TStringRef Name() { + static auto name = TStringRef::Of("XXH3_128"); + return name; + } + + static bool DeclareSignature(const TStringRef& name, TType*, IFunctionTypeInfoBuilder& builder, bool typesOnly) { + if (Name() == name) { + const auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); + builder.Args(1)->Add<TAutoMap<char*>>(); + builder.Returns(type); + if (!typesOnly) { + builder.Implementation(new TXXH3_128); } + builder.IsStrict(); + return true; + } else { + return false; } + } - private: - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { - TUnboxedValue* items = nullptr; - auto val = valueBuilder->NewArray(2U, items); - const auto& inputRef = args[0].AsStringRef(); - const auto hash = XXH3_128bits(inputRef.Data(), inputRef.Size()); - items[0] = TUnboxedValuePod(ui64(hash.low64)); - items[1] = TUnboxedValuePod(ui64(hash.high64)); - return val; - } - }; +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { + TUnboxedValue* items = nullptr; + auto val = valueBuilder->NewArray(2U, items); + const auto& inputRef = args[0].AsStringRef(); + const auto hash = XXH3_128bits(inputRef.Data(), inputRef.Size()); + items[0] = TUnboxedValuePod(ui64(hash.low64)); + items[1] = TUnboxedValuePod(ui64(hash.high64)); + return val; + } +}; - SIMPLE_MODULE(TDigestModule, - TCrc32c, - TCrc64, - TFnv32, - TFnv64, - TMurMurHash, - TMurMurHash32, - TMurMurHash2A, - TMurMurHash2A32, - TCityHash, - TCityHash128, - TNumericHash, - TMd5Hex, - TMd5Raw, - TMd5HalfMix, - TArgon2, - TBlake2B, - TSipHash, - THighwayHash, - TFarmHashFingerprint, - TFarmHashFingerprint2, - TFarmHashFingerprint32, - TFarmHashFingerprint64, - TFarmHashFingerprint128, - TSuperFastHash, - TSha1, - TSha256, - TSha512, - TIntHash64, - TXXH3, - TXXH3_128 - ) +SIMPLE_MODULE(TDigestModule, + TCrc32c, + TCrc64, + TFnv32, + TFnv64, + TMurMurHash, + TMurMurHash32, + TMurMurHash2A, + TMurMurHash2A32, + TCityHash, + TCityHash128, + TNumericHash, + TMd5Hex, + TMd5Raw, + TMd5HalfMix, + TArgon2, + TBlake2B, + TSipHash, + THighwayHash, + TFarmHashFingerprint, + TFarmHashFingerprint2, + TFarmHashFingerprint32, + TFarmHashFingerprint64, + TFarmHashFingerprint128, + TSuperFastHash, + TSha1, + TSha256, + TSha512, + TIntHash64, + TXXH3, + TXXH3_128) -} +} // namespace REGISTER_MODULES(TDigestModule) diff --git a/yql/essentials/udfs/common/digest/ya.make b/yql/essentials/udfs/common/digest/ya.make index 565e77a3013..9daa7f25318 100644 --- a/yql/essentials/udfs/common/digest/ya.make +++ b/yql/essentials/udfs/common/digest/ya.make @@ -6,6 +6,8 @@ YQL_UDF_CONTRIB(digest_udf) 0 ) + ENABLE(YQL_STYLE_CPP) + SRCS( digest_udf.cpp ) diff --git a/yql/essentials/udfs/common/file/file_udf.cpp b/yql/essentials/udfs/common/file/file_udf.cpp index d499e85529e..c06da057dee 100644 --- a/yql/essentials/udfs/common/file/file_udf.cpp +++ b/yql/essentials/udfs/common/file/file_udf.cpp @@ -16,563 +16,566 @@ extern const char ByLineFuncName[]; const char ByLineFuncName[] = "ByLines"; namespace { - namespace Helper { - template <class TUserType> - inline bool ConvertToUnboxed(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { - Y_UNUSED(valueBuilder); - TUserType userType; - if (!TryFromString<TUserType>(curLine, userType)) { - return false; - } - result = TUnboxedValuePod(userType); - return true; - } +namespace Helper { +template <class TUserType> +inline bool ConvertToUnboxed(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + Y_UNUSED(valueBuilder); + TUserType userType; + if (!TryFromString<TUserType>(curLine, userType)) { + return false; + } + result = TUnboxedValuePod(userType); + return true; +} - template <> - inline bool ConvertToUnboxed<const char*>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { - result = valueBuilder.NewString(curLine); - return true; - } +template <> +inline bool ConvertToUnboxed<const char*>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + result = valueBuilder.NewString(curLine); + return true; +} - template <> - inline bool ConvertToUnboxed<TUtf8>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { - result = valueBuilder.NewString(curLine); - return true; - } +template <> +inline bool ConvertToUnboxed<TUtf8>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + result = valueBuilder.NewString(curLine); + return true; +} - template <> - inline bool ConvertToUnboxed<TYson>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { - result = valueBuilder.NewString(curLine); - return true; - } +template <> +inline bool ConvertToUnboxed<TYson>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + result = valueBuilder.NewString(curLine); + return true; +} - template <> - inline bool ConvertToUnboxed<TJson>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { - result = valueBuilder.NewString(curLine); - return true; - } +template <> +inline bool ConvertToUnboxed<TJson>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + result = valueBuilder.NewString(curLine); + return true; +} - template <typename T> - struct TypeToTypeName { - static const char* Name() { - return "Unknown"; - } - }; - template <> - struct TypeToTypeName<bool> { - static constexpr const char* Name() { - return "Bool"; - } - }; - template <> - struct TypeToTypeName<i8> { - static constexpr const char* Name() { - return "Int8"; - } - }; - template <> - struct TypeToTypeName<ui8> { - static constexpr const char* Name() { - return "Uint8"; - } - }; - template <> - struct TypeToTypeName<i16> { - static constexpr const char* Name() { - return "Int16"; - } - }; - template <> - struct TypeToTypeName<ui16> { - static constexpr const char* Name() { - return "Uint16"; - } - }; - template <> - struct TypeToTypeName<ui32> { - static constexpr const char* Name() { - return "Uint32"; - } - }; - template <> - struct TypeToTypeName<ui64> { - static constexpr const char* Name() { - return "Uint64"; - } - }; - template <> - struct TypeToTypeName<i32> { - static constexpr const char* Name() { - return "Int32"; - } - }; - template <> - struct TypeToTypeName<i64> { - static constexpr const char* Name() { - return "Int64"; - } - }; - template <> - struct TypeToTypeName<float> { - static constexpr const char* Name() { - return "Float"; - } - }; - template <> - struct TypeToTypeName<double> { - static constexpr const char* Name() { - return "Double"; - } - }; - template <> - struct TypeToTypeName<const char*> { - static constexpr const char* Name() { - return "String"; - } - }; - template <> - struct TypeToTypeName<TUtf8> { - static constexpr const char* Name() { - return "Utf8"; - } - }; - template <> - struct TypeToTypeName<TYson> { - static constexpr const char* Name() { - return "Yson"; - } - }; - template <> - struct TypeToTypeName<TJson> { - static constexpr const char* Name() { - return "Json"; - } - }; +template <typename T> +struct TypeToTypeName { + static const char* Name() { + return "Unknown"; + } +}; +template <> +struct TypeToTypeName<bool> { + static constexpr const char* Name() { + return "Bool"; + } +}; +template <> +struct TypeToTypeName<i8> { + static constexpr const char* Name() { + return "Int8"; + } +}; +template <> +struct TypeToTypeName<ui8> { + static constexpr const char* Name() { + return "Uint8"; + } +}; +template <> +struct TypeToTypeName<i16> { + static constexpr const char* Name() { + return "Int16"; + } +}; +template <> +struct TypeToTypeName<ui16> { + static constexpr const char* Name() { + return "Uint16"; + } +}; +template <> +struct TypeToTypeName<ui32> { + static constexpr const char* Name() { + return "Uint32"; + } +}; +template <> +struct TypeToTypeName<ui64> { + static constexpr const char* Name() { + return "Uint64"; + } +}; +template <> +struct TypeToTypeName<i32> { + static constexpr const char* Name() { + return "Int32"; + } +}; +template <> +struct TypeToTypeName<i64> { + static constexpr const char* Name() { + return "Int64"; + } +}; +template <> +struct TypeToTypeName<float> { + static constexpr const char* Name() { + return "Float"; + } +}; +template <> +struct TypeToTypeName<double> { + static constexpr const char* Name() { + return "Double"; + } +}; +template <> +struct TypeToTypeName<const char*> { + static constexpr const char* Name() { + return "String"; } +}; +template <> +struct TypeToTypeName<TUtf8> { + static constexpr const char* Name() { + return "Utf8"; + } +}; +template <> +struct TypeToTypeName<TYson> { + static constexpr const char* Name() { + return "Yson"; + } +}; +template <> +struct TypeToTypeName<TJson> { + static constexpr const char* Name() { + return "Json"; + } +}; +} // namespace Helper - static const ui64 TAKE_UNLIM = -1; +static const ui64 TAKE_UNLIM = -1; - bool SkipElements(IBoxedValue& iter, ui64 skip) { - for (; skip > 0; --skip) { - if (!TBoxedValueAccessor::Skip(iter)) { - return false; - } +bool SkipElements(IBoxedValue& iter, ui64 skip) { + for (; skip > 0; --skip) { + if (!TBoxedValueAccessor::Skip(iter)) { + return false; } - return true; } + return true; +} - typedef std::function<void(const TString& message)> TTerminateFunc; - - class TStreamMeta: public TThrRefBase { - public: - typedef TBuffered<TUnbufferedFileInput> TStream; - typedef TIntrusivePtr<TStreamMeta> TPtr; +typedef std::function<void(const TString& message)> TTerminateFunc; - TStreamMeta(TString filePath) - : FilePath_(filePath) - { - // work in greedy mode to catch error on creation - Cached_ = DoCreateStream(); - } +class TStreamMeta: public TThrRefBase { +public: + typedef TBuffered<TUnbufferedFileInput> TStream; + typedef TIntrusivePtr<TStreamMeta> TPtr; - std::unique_ptr<TStream> CreateStream(TTerminateFunc terminateFunc) { - if (Cached_) { - return std::move(Cached_); - } + TStreamMeta(TString filePath) + : FilePath_(filePath) + { + // work in greedy mode to catch error on creation + Cached_ = DoCreateStream(); + } - terminateFunc("The file iterator was already created. To scan file data multiple times please use ListCollect either over ParseFile or over some lazy function over it, e.g. ListMap"); - Y_ABORT("Terminate unstoppable!"); + std::unique_ptr<TStream> CreateStream(TTerminateFunc terminateFunc) { + if (Cached_) { + return std::move(Cached_); } - bool GetLinesCount(ui64& count) const { - if (LinesCount_ == Unknown) - return false; - count = LinesCount_; - return true; + terminateFunc("The file iterator was already created. To scan file data multiple times please use ListCollect either over ParseFile or over some lazy function over it, e.g. ListMap"); + Y_ABORT("Terminate unstoppable!"); + } + + bool GetLinesCount(ui64& count) const { + if (LinesCount_ == Unknown) { + return false; } - void SetLinesCount(ui64 count) { - Y_DEBUG_ABORT_UNLESS(LinesCount_ == Unknown || count == LinesCount_, "Set another value of count lines"); - if (LinesCount_ == Unknown) { - LinesCount_ = count; - } + count = LinesCount_; + return true; + } + void SetLinesCount(ui64 count) { + Y_DEBUG_ABORT_UNLESS(LinesCount_ == Unknown || count == LinesCount_, "Set another value of count lines"); + if (LinesCount_ == Unknown) { + LinesCount_ = count; } + } - const TString& GetFilePath() const { - return FilePath_; - } + const TString& GetFilePath() const { + return FilePath_; + } - private: - std::unique_ptr<TStream> DoCreateStream() { - static const auto bufferSize = 1 << 12; - TFile file(FilePath_, OpenExisting | RdOnly | Seq); - if (FileSize_ == Unknown) { - FileSize_ = file.GetLength(); - } - return std::make_unique<TBuffered<TUnbufferedFileInput>>(bufferSize, file); +private: + std::unique_ptr<TStream> DoCreateStream() { + static const auto bufferSize = 1 << 12; + TFile file(FilePath_, OpenExisting | RdOnly | Seq); + if (FileSize_ == Unknown) { + FileSize_ = file.GetLength(); } + return std::make_unique<TBuffered<TUnbufferedFileInput>>(bufferSize, file); + } - TString FilePath_; - static const ui64 Unknown = -1; - ui64 FileSize_ = Unknown; - ui64 LinesCount_ = Unknown; - std::unique_ptr<TStream> Cached_; - }; + TString FilePath_; + static const ui64 Unknown = -1; + ui64 FileSize_ = Unknown; + ui64 LinesCount_ = Unknown; + std::unique_ptr<TStream> Cached_; +}; - class TEmptyIter: public TBoxedValue { - private: - bool Skip() override { - return false; - } - bool Next(TUnboxedValue&) override { - return false; - } +class TEmptyIter: public TBoxedValue { +private: + bool Skip() override { + return false; + } + bool Next(TUnboxedValue&) override { + return false; + } - public: - TEmptyIter(TTerminateFunc terminateFunc) - : TerminateFunc_(terminateFunc) - { - } +public: + TEmptyIter(TTerminateFunc terminateFunc) + : TerminateFunc_(terminateFunc) + { + } - private: - const TTerminateFunc TerminateFunc_; - }; +private: + const TTerminateFunc TerminateFunc_; +}; - template <class TUserType> - class TLineByLineBoxedValueIterator: public TBoxedValue { - public: - TLineByLineBoxedValueIterator(TStreamMeta::TPtr metaPtr, std::unique_ptr<TStreamMeta::TStream>&& stream, const IValueBuilder& valueBuilder, TTerminateFunc terminateFunc) - : MetaPtr_(metaPtr) - , ValueBuilder_(valueBuilder) - , Stream_(std::move(stream)) - , Splitter_(*Stream_) - , TerminateFunc_(terminateFunc) - { - } +template <class TUserType> +class TLineByLineBoxedValueIterator: public TBoxedValue { +public: + TLineByLineBoxedValueIterator(TStreamMeta::TPtr metaPtr, std::unique_ptr<TStreamMeta::TStream>&& stream, const IValueBuilder& valueBuilder, TTerminateFunc terminateFunc) + : MetaPtr_(metaPtr) + , ValueBuilder_(valueBuilder) + , Stream_(std::move(stream)) + , Splitter_(*Stream_) + , TerminateFunc_(terminateFunc) + { + } - void SetLimit(ui64 limit = TAKE_UNLIM) { - Limit_ = limit; - } + void SetLimit(ui64 limit = TAKE_UNLIM) { + Limit_ = limit; + } - private: - bool SkipLimit() { - if (Limit_ != TAKE_UNLIM) { - if (Limit_ == 0) { - return false; - } - --Limit_; +private: + bool SkipLimit() { + if (Limit_ != TAKE_UNLIM) { + if (Limit_ == 0) { + return false; } - return true; + --Limit_; } + return true; + } - bool Skip() final { - ++CurLineNum_; - return Splitter_.Next(CurLine_) && SkipLimit(); - } + bool Skip() final { + ++CurLineNum_; + return Splitter_.Next(CurLine_) && SkipLimit(); + } - bool Next(TUnboxedValue& value) override { - if (!Skip()) { - return false; - } - if (!Helper::ConvertToUnboxed<TUserType>(ValueBuilder_, CurLine_, value)) { - TStringBuilder sb; - sb << "File::ByLines failed to cast string '" << CurLine_ << "' to " << Helper::TypeToTypeName<TUserType>::Name() << Endl; - sb << "- path: " << MetaPtr_->GetFilePath() << Endl; - sb << "- line: " << CurLineNum_ << Endl; - TerminateFunc_(sb); - Y_ABORT("Terminate unstoppable!"); - } - return true; + bool Next(TUnboxedValue& value) override { + if (!Skip()) { + return false; } + if (!Helper::ConvertToUnboxed<TUserType>(ValueBuilder_, CurLine_, value)) { + TStringBuilder sb; + sb << "File::ByLines failed to cast string '" << CurLine_ << "' to " << Helper::TypeToTypeName<TUserType>::Name() << Endl; + sb << "- path: " << MetaPtr_->GetFilePath() << Endl; + sb << "- line: " << CurLineNum_ << Endl; + TerminateFunc_(sb); + Y_ABORT("Terminate unstoppable!"); + } + return true; + } - TStreamMeta::TPtr MetaPtr_; - const IValueBuilder& ValueBuilder_; + TStreamMeta::TPtr MetaPtr_; + const IValueBuilder& ValueBuilder_; - std::unique_ptr<TStreamMeta::TStream> Stream_; - TLineSplitter Splitter_; - TTerminateFunc TerminateFunc_; - TString CurLine_; - ui64 CurLineNum_ = 0; - ui64 Limit_ = TAKE_UNLIM; - TUnboxedValue Result_; - }; + std::unique_ptr<TStreamMeta::TStream> Stream_; + TLineSplitter Splitter_; + TTerminateFunc TerminateFunc_; + TString CurLine_; + ui64 CurLineNum_ = 0; + ui64 Limit_ = TAKE_UNLIM; + TUnboxedValue Result_; +}; - template <class TUserType> - class TListByLineBoxedValue: public TBoxedValue { - public: - TListByLineBoxedValue(TStreamMeta::TPtr metaPtr, const IValueBuilder& valueBuilder, TTerminateFunc terminateFunc, ui64 skip = 0ULL, ui64 take = TAKE_UNLIM) - : MetaPtr_(metaPtr) - , ValueBuilder_(valueBuilder) - , TerminateFunc_(terminateFunc) - , Skip_(skip) - , Take_(take) - {} - private: - bool HasFastListLength() const override { - ui64 tmp; - return MetaPtr_->GetLinesCount(tmp); - } - ui64 GetListLength() const override { - ui64 length; - if (!MetaPtr_->GetLinesCount(length)) { - length = Skip_; - for (const auto iter = GetListIterator(); iter.Skip(); ++length) - continue; - if (Take_ == TAKE_UNLIM) { - MetaPtr_->SetLinesCount(length); - } +template <class TUserType> +class TListByLineBoxedValue: public TBoxedValue { +public: + TListByLineBoxedValue(TStreamMeta::TPtr metaPtr, const IValueBuilder& valueBuilder, TTerminateFunc terminateFunc, ui64 skip = 0ULL, ui64 take = TAKE_UNLIM) + : MetaPtr_(metaPtr) + , ValueBuilder_(valueBuilder) + , TerminateFunc_(terminateFunc) + , Skip_(skip) + , Take_(take) + { + } + +private: + bool HasFastListLength() const override { + ui64 tmp; + return MetaPtr_->GetLinesCount(tmp); + } + ui64 GetListLength() const override { + ui64 length; + if (!MetaPtr_->GetLinesCount(length)) { + length = Skip_; + for (const auto iter = GetListIterator(); iter.Skip(); ++length) { + continue; } - if (length <= Skip_) { - return 0; + if (Take_ == TAKE_UNLIM) { + MetaPtr_->SetLinesCount(length); } - return Min(length - Skip_, Take_); } - ui64 GetEstimatedListLength() const override { - /// \todo some optimisation? - return GetListLength(); + if (length <= Skip_) { + return 0; } + return Min(length - Skip_, Take_); + } + ui64 GetEstimatedListLength() const override { + /// \todo some optimisation? + return GetListLength(); + } - TUnboxedValue GetListIterator() const override { - try { - auto stream = MetaPtr_->CreateStream(TerminateFunc_); - IBoxedValuePtr iter(new TLineByLineBoxedValueIterator<TUserType>(MetaPtr_, std::move(stream), ValueBuilder_, TerminateFunc_)); - if (!Take_ || !SkipElements(*iter, Skip_)) { - return TUnboxedValuePod(new TEmptyIter(TerminateFunc_)); - } - static_cast<TLineByLineBoxedValueIterator<TUserType>*>(iter.Get())->SetLimit(Take_); - return TUnboxedValuePod(std::move(iter)); - } catch (const std::exception& e) { - TerminateFunc_(CurrentExceptionMessage()); - Y_ABORT("Terminate unstoppable!"); + TUnboxedValue GetListIterator() const override { + try { + auto stream = MetaPtr_->CreateStream(TerminateFunc_); + IBoxedValuePtr iter(new TLineByLineBoxedValueIterator<TUserType>(MetaPtr_, std::move(stream), ValueBuilder_, TerminateFunc_)); + if (!Take_ || !SkipElements(*iter, Skip_)) { + return TUnboxedValuePod(new TEmptyIter(TerminateFunc_)); } + static_cast<TLineByLineBoxedValueIterator<TUserType>*>(iter.Get())->SetLimit(Take_); + return TUnboxedValuePod(std::move(iter)); + } catch (const std::exception& e) { + TerminateFunc_(CurrentExceptionMessage()); + Y_ABORT("Terminate unstoppable!"); } + } - IBoxedValuePtr SkipListImpl(const IValueBuilder& builder, ui64 count) const override { - return new TListByLineBoxedValue(MetaPtr_, builder, TerminateFunc_, Skip_ + count, Take_ == TAKE_UNLIM ? TAKE_UNLIM : Take_ - std::min(Take_, count)); - } - IBoxedValuePtr TakeListImpl(const IValueBuilder& builder, ui64 count) const override { - return new TListByLineBoxedValue(MetaPtr_, builder, TerminateFunc_, Skip_, std::min(Take_, count)); - } + IBoxedValuePtr SkipListImpl(const IValueBuilder& builder, ui64 count) const override { + return new TListByLineBoxedValue(MetaPtr_, builder, TerminateFunc_, Skip_ + count, Take_ == TAKE_UNLIM ? TAKE_UNLIM : Take_ - std::min(Take_, count)); + } + IBoxedValuePtr TakeListImpl(const IValueBuilder& builder, ui64 count) const override { + return new TListByLineBoxedValue(MetaPtr_, builder, TerminateFunc_, Skip_, std::min(Take_, count)); + } - bool HasListItems() const override { - return true; - } + bool HasListItems() const override { + return true; + } - TStreamMeta::TPtr MetaPtr_; - const IValueBuilder& ValueBuilder_; - TTerminateFunc TerminateFunc_; - ui64 Skip_ = 0ULL; - ui64 Take_ = TAKE_UNLIM; - }; + TStreamMeta::TPtr MetaPtr_; + const IValueBuilder& ValueBuilder_; + TTerminateFunc TerminateFunc_; + ui64 Skip_ = 0ULL; + ui64 Take_ = TAKE_UNLIM; +}; - template <class TUserType> - class TByLinesFunc: public TBoxedValue { - private: - TSourcePosition Pos_; +template <class TUserType> +class TByLinesFunc: public TBoxedValue { +private: + TSourcePosition Pos_; - TByLinesFunc(TSourcePosition pos) - : Pos_(pos) - {} + TByLinesFunc(TSourcePosition pos) + : Pos_(pos) + { + } - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { - try { - TString filePath(args[0].AsStringRef()); - TStreamMeta::TPtr metaPtr(new TStreamMeta(filePath)); - auto pos = Pos_; - auto terminateFunc = [pos](const TString& message) { - UdfTerminate((TStringBuilder() << pos << " " << message).c_str()); - }; - return TUnboxedValuePod(new TListByLineBoxedValue<TUserType>(metaPtr, *valueBuilder, terminateFunc)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + try { + TString filePath(args[0].AsStringRef()); + TStreamMeta::TPtr metaPtr(new TStreamMeta(filePath)); + auto pos = Pos_; + auto terminateFunc = [pos](const TString& message) { + UdfTerminate((TStringBuilder() << pos << " " << message).c_str()); + }; + return TUnboxedValuePod(new TListByLineBoxedValue<TUserType>(metaPtr, *valueBuilder, terminateFunc)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + } + +public: + static void DeclareSignature( + TStringRef name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + Y_UNUSED(name); + builder.UserType(userType); + builder.SimpleSignature<TListType<TUserType>(char*)>(); + if (!typesOnly) { + builder.Implementation(new TByLinesFunc<TUserType>(builder.GetSourcePosition())); } + } +}; +class TFolderListFromFile: public TBoxedValue { +private: + class TIterator: public TBoxedValue { public: - static void DeclareSignature( - TStringRef name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) + TIterator(ui32 indexP, ui32 indexT, ui32 indexA, const IValueBuilder& valueBuilder, const TSourcePosition& pos, TString filePath) + : IndexP_(indexP) + , IndexT_(indexT) + , IndexA_(indexA) + , ValueBuilder_(valueBuilder) + , Pos_(pos) + , Input_(filePath) { - Y_UNUSED(name); - builder.UserType(userType); - builder.SimpleSignature<TListType<TUserType>(char*)>(); - if (!typesOnly) { - builder.Implementation(new TByLinesFunc<TUserType>(builder.GetSourcePosition())); - } } - }; - class TFolderListFromFile: public TBoxedValue { private: - class TIterator : public TBoxedValue { - public: - TIterator(ui32 indexP, ui32 indexT, ui32 indexA, const IValueBuilder& valueBuilder, const TSourcePosition& pos, TString filePath) - : IndexP_(indexP) - , IndexT_(indexT) - , IndexA_(indexA) - , ValueBuilder_(valueBuilder) - , Pos_(pos) - , Input_(filePath) - { - } - - private: - bool Next(NUdf::TUnboxedValue& value) override { - try { - TString type; - TString path; - TString attrs; - ::Load(&Input_, type); - if (!type) { - return false; - } - ::Load(&Input_, path); - ::Load(&Input_, attrs); - - NUdf::TUnboxedValue* items = nullptr; - value = ValueBuilder_.NewArray(3, items); - items[IndexT_] = ValueBuilder_.NewString(type); - items[IndexP_] = ValueBuilder_.NewString(path); - items[IndexA_] = ValueBuilder_.NewString(attrs); - } - catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - return true; - } - - private: - const ui32 IndexP_; - const ui32 IndexT_; - const ui32 IndexA_; - const IValueBuilder& ValueBuilder_; - const TSourcePosition Pos_; - TIFStream Input_; - }; - - class TList: public TBoxedValue { - public: - TList(ui32 indexP, ui32 indexT, ui32 indexA, const IValueBuilder& valueBuilder, const TSourcePosition& pos, TString filePath) - : IndexP_(indexP) - , IndexT_(indexT) - , IndexA_(indexA) - , ValueBuilder_(valueBuilder) - , Pos_(pos) - , FilePath_(std::move(filePath)) - { - } - - protected: - NUdf::TUnboxedValue GetListIterator() const override { - return NUdf::TUnboxedValuePod(new TIterator(IndexP_, IndexT_, IndexA_, ValueBuilder_, Pos_, FilePath_)); - } - - bool HasFastListLength() const override { - return bool(Length_); - } - - ui64 GetListLength() const override { - if (!Length_) { - ui64 length = 0ULL; - for (const auto it = GetListIterator(); it.Skip();) { - ++length; - } - - Length_ = length; - } - - return *Length_; - } - - ui64 GetEstimatedListLength() const override { - return GetListLength(); - } - - bool HasListItems() const override { - if (HasItems_) { - return *HasItems_; - } - - if (Length_) { - HasItems_ = (*Length_ != 0); - return *HasItems_; + bool Next(NUdf::TUnboxedValue& value) override { + try { + TString type; + TString path; + TString attrs; + ::Load(&Input_, type); + if (!type) { + return false; } + ::Load(&Input_, path); + ::Load(&Input_, attrs); - auto iter = GetListIterator(); - HasItems_ = iter.Skip(); - return *HasItems_; + NUdf::TUnboxedValue* items = nullptr; + value = ValueBuilder_.NewArray(3, items); + items[IndexT_] = ValueBuilder_.NewString(type); + items[IndexP_] = ValueBuilder_.NewString(path); + items[IndexA_] = ValueBuilder_.NewString(attrs); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + return true; + } - protected: - const ui32 IndexP_; - const ui32 IndexT_; - const ui32 IndexA_; - const IValueBuilder& ValueBuilder_; - const TSourcePosition Pos_; - const TString FilePath_; - mutable TMaybe<ui64> Length_; - mutable TMaybe<bool> HasItems_; - }; + private: + const ui32 IndexP_; + const ui32 IndexT_; + const ui32 IndexA_; + const IValueBuilder& ValueBuilder_; + const TSourcePosition Pos_; + TIFStream Input_; + }; + class TList: public TBoxedValue { public: - TFolderListFromFile(ui32 indexP, ui32 indexT, ui32 indexA, const TSourcePosition& pos) + TList(ui32 indexP, ui32 indexT, ui32 indexA, const IValueBuilder& valueBuilder, const TSourcePosition& pos, TString filePath) : IndexP_(indexP) , IndexT_(indexT) , IndexA_(indexA) + , ValueBuilder_(valueBuilder) , Pos_(pos) + , FilePath_(std::move(filePath)) { } - static const ::NYql::NUdf::TStringRef& Name() { - static auto name = ::NYql::NUdf::TStringRef::Of("FolderListFromFile"); - return name; + protected: + NUdf::TUnboxedValue GetListIterator() const override { + return NUdf::TUnboxedValuePod(new TIterator(IndexP_, IndexT_, IndexA_, ValueBuilder_, Pos_, FilePath_)); } - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { - try { - TString filePath(args[0].AsStringRef()); - return TUnboxedValuePod(new TList(IndexP_, IndexT_, IndexA_, *valueBuilder, Pos_, filePath)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + bool HasFastListLength() const override { + return bool(Length_); } - static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { - if (Name() != name) { - // the only case when we return false - return false; + ui64 GetListLength() const override { + if (!Length_) { + ui64 length = 0ULL; + for (const auto it = GetListIterator(); it.Skip();) { + ++length; + } + + Length_ = length; } - builder.UserType(userType); + return *Length_; + } + + ui64 GetEstimatedListLength() const override { + return GetListLength(); + } - ui32 indexP, indexT, indexA; - auto itemType = builder.Struct() - ->AddField<const char*>("Path", &indexP) - .AddField<const char*>("Type", &indexT) - .AddField<TYson>("Attributes", &indexA) - .Build(); - auto resultType = builder.List()->Item(itemType).Build(); + bool HasListItems() const override { + if (HasItems_) { + return *HasItems_; + } - builder.Args()->Add<const char*>().Done().Returns(resultType); - if (!typesOnly) { - builder.Implementation(new TFolderListFromFile(indexP, indexT, indexA, builder.GetSourcePosition())); + if (Length_) { + HasItems_ = (*Length_ != 0); + return *HasItems_; } - return true; + + auto iter = GetListIterator(); + HasItems_ = iter.Skip(); + return *HasItems_; } - private: + protected: const ui32 IndexP_; const ui32 IndexT_; const ui32 IndexA_; + const IValueBuilder& ValueBuilder_; const TSourcePosition Pos_; + const TString FilePath_; + mutable TMaybe<ui64> Length_; + mutable TMaybe<bool> HasItems_; }; - SIMPLE_MODULE(TFileModule, - TUserDataTypeFuncFactory<false, false, ByLineFuncName, TByLinesFunc, const char*, TUtf8, TYson, TJson, i8, ui8, i16, ui16, ui32, ui64, i32, i64, float, double, bool>, - TFolderListFromFile - ) +public: + TFolderListFromFile(ui32 indexP, ui32 indexT, ui32 indexA, const TSourcePosition& pos) + : IndexP_(indexP) + , IndexT_(indexT) + , IndexA_(indexA) + , Pos_(pos) + { + } -} + static const ::NYql::NUdf::TStringRef& Name() { + static auto name = ::NYql::NUdf::TStringRef::Of("FolderListFromFile"); + return name; + } + + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + try { + TString filePath(args[0].AsStringRef()); + return TUnboxedValuePod(new TList(IndexP_, IndexT_, IndexA_, *valueBuilder, Pos_, filePath)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + } + + static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { + if (Name() != name) { + // the only case when we return false + return false; + } + + builder.UserType(userType); + + ui32 indexP, indexT, indexA; + auto itemType = builder.Struct() + ->AddField<const char*>("Path", &indexP) + .AddField<const char*>("Type", &indexT) + .AddField<TYson>("Attributes", &indexA) + .Build(); + auto resultType = builder.List()->Item(itemType).Build(); + + builder.Args()->Add<const char*>().Done().Returns(resultType); + if (!typesOnly) { + builder.Implementation(new TFolderListFromFile(indexP, indexT, indexA, builder.GetSourcePosition())); + } + return true; + } + +private: + const ui32 IndexP_; + const ui32 IndexT_; + const ui32 IndexA_; + const TSourcePosition Pos_; +}; + +SIMPLE_MODULE(TFileModule, + TUserDataTypeFuncFactory<false, false, ByLineFuncName, TByLinesFunc, const char*, TUtf8, TYson, TJson, i8, ui8, i16, ui16, ui32, ui64, i32, i64, float, double, bool>, + TFolderListFromFile) + +} // namespace REGISTER_MODULES(TFileModule) diff --git a/yql/essentials/udfs/common/file/ya.make b/yql/essentials/udfs/common/file/ya.make index 250f0722d8e..9a4f1863132 100644 --- a/yql/essentials/udfs/common/file/ya.make +++ b/yql/essentials/udfs/common/file/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( file_udf.cpp ) diff --git a/yql/essentials/udfs/common/histogram/histogram_udf.cpp b/yql/essentials/udfs/common/histogram/histogram_udf.cpp index 731b5956ed8..283e243396b 100644 --- a/yql/essentials/udfs/common/histogram/histogram_udf.cpp +++ b/yql/essentials/udfs/common/histogram/histogram_udf.cpp @@ -43,461 +43,220 @@ namespace { XX(Merge, arg) #define DECLARE_HISTOGRAM_RESOURCE_NAME(name) extern const char name##HistogramResourceName[] = "Histogram." #name; - HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_RESOURCE_NAME) - DECLARE_HISTOGRAM_RESOURCE_NAME(Linear) - DECLARE_HISTOGRAM_RESOURCE_NAME(Logarithmic) +HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_RESOURCE_NAME) +DECLARE_HISTOGRAM_RESOURCE_NAME(Linear) +DECLARE_HISTOGRAM_RESOURCE_NAME(Logarithmic) - class TLinearHistogram: public TAdaptiveWardHistogram { - public: - TLinearHistogram(double step, double begin, double end) - : TAdaptiveWardHistogram(1ULL << 24) - , Step_(step) - , Begin_(begin) - , End_(end) - { - } - - void Add(double value, double weight) override { - if (value < Begin_) { - value = Begin_; - } else if (value > End_) { - value = End_; - } else { - value = std::floor(value / Step_ + 0.5) * Step_; - } - TAdaptiveWardHistogram::Add(value, weight); - } - - void Add(const THistoRec&) override { - Y_ABORT("Not implemented"); - } - - protected: - double Step_; - double Begin_; - double End_; - }; - - class TLogarithmicHistogram: public TLinearHistogram { - public: - TLogarithmicHistogram(double step, double begin, double end) - : TLinearHistogram(step, begin, end) - { - } - - void Add(double value, double weight) override { - double base = std::log(value) / std::log(Step_); - double prev = std::pow(Step_, std::floor(base)); - double next = std::pow(Step_, std::ceil(base)); - if (std::abs(value - next) > std::abs(value - prev)) { - value = prev; - } else { - value = next; - } - - if (value < Begin_) { - value = Begin_; - } else if (value > End_) { - value = End_; - } - - if (!std::isnan(value)) { - TAdaptiveWardHistogram::Add(value, weight); - } - } - - void Add(const THistoRec&) override { - Y_ABORT("Not implemented"); - } - }; - - template <typename THistogramType, const char* ResourceName> - class THistogram_Create: public TBoxedValue { - public: - THistogram_Create(TSourcePosition pos) - : Pos_(pos) - {} - - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_Create"; - static auto nameRef = TStringRef(name); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - THolder<THistogramResource> histogram(new THistogramResource(args[2].Get<ui32>())); - histogram->Get()->Add(args[0].Get<double>(), args[1].Get<double>()); - return TUnboxedValuePod(histogram.Release()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - } - - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<ResourceName>(double, double, ui32)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_Create<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } - } - - private: - TSourcePosition Pos_; - }; - - template <typename THistogramType, const char* ResourceName> - class THistogram_AddValue: public TBoxedValue { - public: - THistogram_AddValue(TSourcePosition pos) - : Pos_(pos) - {} - - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_AddValue"; - static auto nameRef = TStringRef(name); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - THistogramResource* resource = static_cast<THistogramResource*>(args[0].AsBoxed().Get()); - resource->Get()->Add(args[1].Get<double>(), args[2].Get<double>()); - return TUnboxedValuePod(args[0]); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - } - - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, double, double)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_AddValue<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } - } - - private: - TSourcePosition Pos_; - }; - - template <typename THistogramType, const char* ResourceName> - class THistogram_Serialize: public TBoxedValue { - public: - THistogram_Serialize(TSourcePosition pos) - : Pos_(pos) - {} - - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_Serialize"; - static auto nameRef = TStringRef(name); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - THistogram proto; - TString result; - static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto); - Y_PROTOBUF_SUPPRESS_NODISCARD proto.SerializeToString(&result); - return valueBuilder->NewString(result); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - } +class TLinearHistogram: public TAdaptiveWardHistogram { +public: + TLinearHistogram(double step, double begin, double end) + : TAdaptiveWardHistogram(1ULL << 24) + , Step_(step) + , Begin_(begin) + , End_(end) + { + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<char*(TResource<ResourceName>)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_Serialize<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } + void Add(double value, double weight) override { + if (value < Begin_) { + value = Begin_; + } else if (value > End_) { + value = End_; + } else { + value = std::floor(value / Step_ + 0.5) * Step_; } + TAdaptiveWardHistogram::Add(value, weight); + } - private: - TSourcePosition Pos_; - }; + void Add(const THistoRec&) override { + Y_ABORT("Not implemented"); + } - template <typename THistogramType, const char* ResourceName> - class THistogram_Deserialize: public TBoxedValue { - public: - THistogram_Deserialize(TSourcePosition pos) - : Pos_(pos) - {} +protected: + double Step_; + double Begin_; + double End_; +}; - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; +class TLogarithmicHistogram: public TLinearHistogram { +public: + TLogarithmicHistogram(double step, double begin, double end) + : TLinearHistogram(step, begin, end) + { + } - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_Deserialize"; - static auto nameRef = TStringRef(name); - return nameRef; + void Add(double value, double weight) override { + double base = std::log(value) / std::log(Step_); + double prev = std::pow(Step_, std::floor(base)); + double next = std::pow(Step_, std::ceil(base)); + if (std::abs(value - next) > std::abs(value - prev)) { + value = prev; + } else { + value = next; } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - THistogram proto; - Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); - THolder<THistogramResource> histogram(new THistogramResource(args[1].Get<ui32>())); - histogram->Get()->FromProto(proto); - return TUnboxedValuePod(histogram.Release()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + if (value < Begin_) { + value = Begin_; + } else if (value > End_) { + value = End_; } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<ResourceName>(char*, ui32)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_Deserialize<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } + if (!std::isnan(value)) { + TAdaptiveWardHistogram::Add(value, weight); } + } - private: - TSourcePosition Pos_; - }; + void Add(const THistoRec&) override { + Y_ABORT("Not implemented"); + } +}; - template <typename THistogramType, const char* ResourceName> - class THistogram_Merge: public TBoxedValue { - public: - THistogram_Merge(TSourcePosition pos) - : Pos_(pos) - {} +template <typename THistogramType, const char* ResourceName> +class THistogram_Create: public TBoxedValue { +public: + THistogram_Create(TSourcePosition pos) + : Pos_(pos) + { + } - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_Merge"; - static auto nameRef = TStringRef(name); - return nameRef; - } + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Create"; + static auto nameRef = TStringRef(name); + return nameRef; + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - THistogram proto; - static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto); - static_cast<THistogramResource*>(args[1].AsBoxed().Get())->Get()->Merge(proto, 1.0); - return TUnboxedValuePod(args[1]); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + THolder<THistogramResource> histogram(new THistogramResource(args[2].Get<ui32>())); + histogram->Get()->Add(args[0].Get<double>(), args[1].Get<double>()); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, TResource<ResourceName>)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_Merge<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<ResourceName>(double, double, ui32)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Create<THistogramType, ResourceName>(builder.GetSourcePosition())); } + return true; + } else { + return false; } + } - private: - TSourcePosition Pos_; - }; - - struct THistogramIndexes { - static constexpr ui32 BinFieldsCount = 2U; - static constexpr ui32 ResultFieldsCount = 5U; - - THistogramIndexes(IFunctionTypeInfoBuilder& builder) { - const auto binStructType = builder.Struct(BinFieldsCount)->AddField<double>("Position", &Position).AddField<double>("Frequency", &Frequency).Build(); - const auto binsList = builder.List()->Item(binStructType).Build(); - ResultStructType = builder.Struct(ResultFieldsCount)->AddField<char*>("Kind", &Kind).AddField<double>("Min", &Min).AddField<double>("Max", &Max).AddField<double>("WeightsSum", &WeightsSum).AddField("Bins", binsList, &Bins).Build(); - } - - ui32 Kind; - ui32 Min; - ui32 Max; - ui32 WeightsSum; - ui32 Bins; - - ui32 Position; - ui32 Frequency; +private: + TSourcePosition Pos_; +}; - TType* ResultStructType; - }; +template <typename THistogramType, const char* ResourceName> +class THistogram_AddValue: public TBoxedValue { +public: + THistogram_AddValue(TSourcePosition pos) + : Pos_(pos) + { + } - template <typename THistogramType, const char* ResourceName> - class THistogram_GetResult: public TBoxedValue { - public: - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - THistogram_GetResult(const THistogramIndexes& histogramIndexes, TSourcePosition pos) - : HistogramIndexes_(histogramIndexes) - , Pos_(pos) - { - } + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_AddValue"; + static auto nameRef = TStringRef(name); + return nameRef; + } - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_GetResult"; - static auto nameRef = TStringRef(name); - return nameRef; +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + THistogramResource* resource = static_cast<THistogramResource*>(args[0].AsBoxed().Get()); + resource->Get()->Add(args[1].Get<double>(), args[2].Get<double>()); + return TUnboxedValuePod(args[0]); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - THistogram proto; - auto histogram = static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get(); - histogram->ToProto(proto); - - auto size = proto.FreqSize(); - TUnboxedValue* fields = nullptr; - auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); - fields[HistogramIndexes_.Kind] = valueBuilder->NewString(TStringBuf(ResourceName).Skip(10)); - if (size) { - TUnboxedValue* items = nullptr; - fields[HistogramIndexes_.Bins] = valueBuilder->NewArray(size, items); - fields[HistogramIndexes_.Min] = TUnboxedValuePod(static_cast<double>(histogram->GetMinValue())); - fields[HistogramIndexes_.Max] = TUnboxedValuePod(static_cast<double>(histogram->GetMaxValue())); - fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(static_cast<double>(histogram->GetSum())); - for (ui64 i = 0; i < size; ++i) { - TUnboxedValue* binFields = nullptr; - *items++ = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); - binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(static_cast<double>(proto.GetFreq(i))); - binFields[HistogramIndexes_.Position] = TUnboxedValuePod(static_cast<double>(proto.GetPosition(i))); - } - } else { - fields[HistogramIndexes_.Bins] = valueBuilder->NewEmptyList(); - fields[HistogramIndexes_.Min] = TUnboxedValuePod(0.0); - fields[HistogramIndexes_.Max] = TUnboxedValuePod(0.0); - fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(0.0); +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_AddValue<THistogramType, ResourceName>(builder.GetSourcePosition())); } - - return result; + return true; + } else { + return false; } + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - auto resource = builder.Resource(TStringRef(ResourceName, std::strlen(ResourceName))); - - THistogramIndexes histogramIndexes(builder); +private: + TSourcePosition Pos_; +}; - builder.Args()->Add(resource).Done().Returns(histogramIndexes.ResultStructType); +template <typename THistogramType, const char* ResourceName> +class THistogram_Serialize: public TBoxedValue { +public: + THistogram_Serialize(TSourcePosition pos) + : Pos_(pos) + { + } - if (!typesOnly) { - builder.Implementation(new THistogram_GetResult<THistogramType, ResourceName>(histogramIndexes, builder.GetSourcePosition())); - } - return true; - } else { - return false; - } - } + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - private: - const THistogramIndexes HistogramIndexes_; - TSourcePosition Pos_; - }; + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Serialize"; + static auto nameRef = TStringRef(name); + return nameRef; + } - template <> - TUnboxedValue THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::Run( +private: + TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - using THistogramResource = THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::THistogramResource; + const TUnboxedValuePod* args) const override { try { - Y_UNUSED(valueBuilder); - THolder<THistogramResource> histogram(new THistogramResource( - args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); - histogram->Get()->Add(args[0].Get<double>(), 1.0); - return TUnboxedValuePod(histogram.Release()); + THistogram proto; + TString result; + static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto); + Y_PROTOBUF_SUPPRESS_NODISCARD proto.SerializeToString(&result); + return valueBuilder->NewString(result); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } } - template <> - bool THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature( +public: + static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { - builder.SimpleSignature<TResource<LinearHistogramResourceName>(double, double, double, double)>(); + builder.SimpleSignature<char*(TResource<ResourceName>)>(); if (!typesOnly) { - builder.Implementation(new THistogram_Create<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition())); + builder.Implementation(new THistogram_Serialize<THistogramType, ResourceName>(builder.GetSourcePosition())); } return true; } else { @@ -505,17 +264,35 @@ namespace { } } - template <> - TUnboxedValue THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::Run( +private: + TSourcePosition Pos_; +}; + +template <typename THistogramType, const char* ResourceName> +class THistogram_Deserialize: public TBoxedValue { +public: + THistogram_Deserialize(TSourcePosition pos) + : Pos_(pos) + { + } + + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Deserialize"; + static auto nameRef = TStringRef(name); + return nameRef; + } + +private: + TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - using THistogramResource = THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::THistogramResource; + const TUnboxedValuePod* args) const override { try { Y_UNUSED(valueBuilder); THistogram proto; Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); - THolder<THistogramResource> histogram( - new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + THolder<THistogramResource> histogram(new THistogramResource(args[1].Get<ui32>())); histogram->Get()->FromProto(proto); return TUnboxedValuePod(histogram.Release()); } catch (const std::exception& e) { @@ -523,17 +300,17 @@ namespace { } } - template <> - bool THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature( +public: + static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { - builder.SimpleSignature<TResource<LinearHistogramResourceName>(char*, double, double, double)>(); + builder.SimpleSignature<TResource<ResourceName>(char*, ui32)>(); if (!typesOnly) { - builder.Implementation(new THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition())); + builder.Implementation(new THistogram_Deserialize<THistogramType, ResourceName>(builder.GetSourcePosition())); } return true; } else { @@ -541,33 +318,52 @@ namespace { } } - template <> - TUnboxedValue THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run( +private: + TSourcePosition Pos_; +}; + +template <typename THistogramType, const char* ResourceName> +class THistogram_Merge: public TBoxedValue { +public: + THistogram_Merge(TSourcePosition pos) + : Pos_(pos) + { + } + + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Merge"; + static auto nameRef = TStringRef(name); + return nameRef; + } + +private: + TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - using THistogramResource = THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource; + const TUnboxedValuePod* args) const override { try { Y_UNUSED(valueBuilder); - THolder<THistogramResource> histogram(new THistogramResource( - args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); - histogram->Get()->Add(args[0].Get<double>(), 1.0); - return TUnboxedValuePod(histogram.Release()); + THistogram proto; + static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto); + static_cast<THistogramResource*>(args[1].AsBoxed().Get())->Get()->Merge(proto, 1.0); + return TUnboxedValuePod(args[1]); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } } - template <> - bool THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature( +public: + static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { - builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(double, double, double, double)>(); + builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, TResource<ResourceName>)>(); if (!typesOnly) { - builder.Implementation(new THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition())); + builder.Implementation(new THistogram_Merge<THistogramType, ResourceName>(builder.GetSourcePosition())); } return true; } else { @@ -575,35 +371,99 @@ namespace { } } - template <> - TUnboxedValue THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run( +private: + TSourcePosition Pos_; +}; + +struct THistogramIndexes { + static constexpr ui32 BinFieldsCount = 2U; + static constexpr ui32 ResultFieldsCount = 5U; + + THistogramIndexes(IFunctionTypeInfoBuilder& builder) { + const auto binStructType = builder.Struct(BinFieldsCount)->AddField<double>("Position", &Position).AddField<double>("Frequency", &Frequency).Build(); + const auto binsList = builder.List()->Item(binStructType).Build(); + ResultStructType = builder.Struct(ResultFieldsCount)->AddField<char*>("Kind", &Kind).AddField<double>("Min", &Min).AddField<double>("Max", &Max).AddField<double>("WeightsSum", &WeightsSum).AddField("Bins", binsList, &Bins).Build(); + } + + ui32 Kind; + ui32 Min; + ui32 Max; + ui32 WeightsSum; + ui32 Bins; + + ui32 Position; + ui32 Frequency; + + TType* ResultStructType; +}; + +template <typename THistogramType, const char* ResourceName> +class THistogram_GetResult: public TBoxedValue { +public: + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + THistogram_GetResult(const THistogramIndexes& histogramIndexes, TSourcePosition pos) + : HistogramIndexes_(histogramIndexes) + , Pos_(pos) + { + } + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_GetResult"; + static auto nameRef = TStringRef(name); + return nameRef; + } + +private: + TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - using THistogramResource = THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource; - try { - Y_UNUSED(valueBuilder); - THistogram proto; - Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); - THolder<THistogramResource> histogram( - new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); - histogram->Get()->FromProto(proto); - return TUnboxedValuePod(histogram.Release()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + const TUnboxedValuePod* args) const override { + THistogram proto; + auto histogram = static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get(); + histogram->ToProto(proto); + + auto size = proto.FreqSize(); + TUnboxedValue* fields = nullptr; + auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); + fields[HistogramIndexes_.Kind] = valueBuilder->NewString(TStringBuf(ResourceName).Skip(10)); + if (size) { + TUnboxedValue* items = nullptr; + fields[HistogramIndexes_.Bins] = valueBuilder->NewArray(size, items); + fields[HistogramIndexes_.Min] = TUnboxedValuePod(static_cast<double>(histogram->GetMinValue())); + fields[HistogramIndexes_.Max] = TUnboxedValuePod(static_cast<double>(histogram->GetMaxValue())); + fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(static_cast<double>(histogram->GetSum())); + for (ui64 i = 0; i < size; ++i) { + TUnboxedValue* binFields = nullptr; + *items++ = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); + binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(static_cast<double>(proto.GetFreq(i))); + binFields[HistogramIndexes_.Position] = TUnboxedValuePod(static_cast<double>(proto.GetPosition(i))); + } + } else { + fields[HistogramIndexes_.Bins] = valueBuilder->NewEmptyList(); + fields[HistogramIndexes_.Min] = TUnboxedValuePod(0.0); + fields[HistogramIndexes_.Max] = TUnboxedValuePod(0.0); + fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(0.0); } + + return result; } - template <> - bool THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature( +public: + static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { - builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(char*, double, double, double)>(); + auto resource = builder.Resource(TStringRef(ResourceName, std::strlen(ResourceName))); + + THistogramIndexes histogramIndexes(builder); + + builder.Args()->Add(resource).Done().Returns(histogramIndexes.ResultStructType); + if (!typesOnly) { - builder.Implementation(new THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition())); + builder.Implementation(new THistogram_GetResult<THistogramType, ResourceName>(histogramIndexes, builder.GetSourcePosition())); } return true; } else { @@ -611,352 +471,499 @@ namespace { } } - class THistogramPrint: public TBoxedValue { - public: - THistogramPrint(const THistogramIndexes& histogramIndexes) - : HistogramIndexes_(histogramIndexes) - { +private: + const THistogramIndexes HistogramIndexes_; + TSourcePosition Pos_; +}; + +template <> +TUnboxedValue THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THolder<THistogramResource> histogram(new THistogramResource( + args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->Add(args[0].Get<double>(), 1.0); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } +} + +template <> +bool THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LinearHistogramResourceName>(double, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Create<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition())); } + return true; + } else { + return false; + } +} + +template <> +TUnboxedValue THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THistogram proto; + Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); + THolder<THistogramResource> histogram( + new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->FromProto(proto); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } +} - static const TStringRef& Name() { - static auto name = TStringRef::Of("Print"); - return name; +template <> +bool THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LinearHistogramResourceName>(char*, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition())); } + return true; + } else { + return false; + } +} - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - auto kind = args[0].GetElement(HistogramIndexes_.Kind); - auto bins = args[0].GetElement(HistogramIndexes_.Bins); - double min = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); - double max = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); - double weightsSum = args[0].GetElement(HistogramIndexes_.WeightsSum).Get<double>(); - auto binsIterator = bins.GetListIterator(); +template <> +TUnboxedValue THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THolder<THistogramResource> histogram(new THistogramResource( + args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->Add(args[0].Get<double>(), 1.0); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } +} - TStringBuilder result; - result << "Kind: " << (TStringBuf)kind.AsStringRef() << ' '; - result << Sprintf("Bins: %" PRIu64 " WeightsSum: %.3f Min: %.3f Max: %.3f", - bins.GetListLength(), weightsSum, min, max); - double maxFrequency = 0.0; - size_t maxPositionLength = 0; - size_t maxFrequencyLength = 0; - const ui8 bars = args[1].GetOrDefault<ui8>(25); +template <> +bool THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(double, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } +} - for (TUnboxedValue current; binsIterator.Next(current);) { - if (bars) { - double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - if (frequency > maxFrequency) { - maxFrequency = frequency; - } - } - size_t positionLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Position).Get<double>()).length(); - size_t frequencyLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Frequency).Get<double>()).length(); +template <> +TUnboxedValue THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THistogram proto; + Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); + THolder<THistogramResource> histogram( + new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->FromProto(proto); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } +} - if (positionLength > maxPositionLength) { - maxPositionLength = positionLength; - } - if (frequencyLength > maxFrequencyLength) { - maxFrequencyLength = frequencyLength; - } - } +template <> +bool THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(char*, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } +} - binsIterator = bins.GetListIterator(); - for (TUnboxedValue current; binsIterator.Next(current);) { - double position = current.GetElement(HistogramIndexes_.Position).Get<double>(); +class THistogramPrint: public TBoxedValue { +public: + THistogramPrint(const THistogramIndexes& histogramIndexes) + : HistogramIndexes_(histogramIndexes) + { + } + + static const TStringRef& Name() { + static auto name = TStringRef::Of("Print"); + return name; + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + auto kind = args[0].GetElement(HistogramIndexes_.Kind); + auto bins = args[0].GetElement(HistogramIndexes_.Bins); + double min = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); + double max = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); + double weightsSum = args[0].GetElement(HistogramIndexes_.WeightsSum).Get<double>(); + auto binsIterator = bins.GetListIterator(); + + TStringBuilder result; + result << "Kind: " << (TStringBuf)kind.AsStringRef() << ' '; + result << Sprintf("Bins: %" PRIu64 " WeightsSum: %.3f Min: %.3f Max: %.3f", + bins.GetListLength(), weightsSum, min, max); + double maxFrequency = 0.0; + size_t maxPositionLength = 0; + size_t maxFrequencyLength = 0; + const ui8 bars = args[1].GetOrDefault<ui8>(25); + + for (TUnboxedValue current; binsIterator.Next(current);) { + if (bars) { double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - result << "\n"; - if (bars && maxFrequency > 0) { - ui8 filledBars = static_cast<ui8>(bars * frequency / maxFrequency); - for (ui8 i = 0; i < bars; ++i) { - if (i < filledBars) { - result << "█"; - } else { - result << "░"; - } - } + if (frequency > maxFrequency) { + maxFrequency = frequency; } - result << " P: " << LeftPad(Sprintf("%.3f", position), maxPositionLength); - result << " F: " << LeftPad(Sprintf("%.3f", frequency), maxFrequencyLength); } + size_t positionLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Position).Get<double>()).length(); + size_t frequencyLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Frequency).Get<double>()).length(); - return valueBuilder->NewString(result); + if (positionLength > maxPositionLength) { + maxPositionLength = positionLength; + } + if (frequencyLength > maxFrequencyLength) { + maxFrequencyLength = frequencyLength; + } } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - THistogramIndexes histogramIndexes(builder); - auto optionalUi8 = builder.Optional()->Item<ui8>().Build(); - - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalUi8).Done().OptionalArgs(1).Returns<char*>(); - - if (!typesOnly) { - builder.Implementation(new THistogramPrint(histogramIndexes)); + binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + double position = current.GetElement(HistogramIndexes_.Position).Get<double>(); + double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + result << "\n"; + if (bars && maxFrequency > 0) { + ui8 filledBars = static_cast<ui8>(bars * frequency / maxFrequency); + for (ui8 i = 0; i < bars; ++i) { + if (i < filledBars) { + result << "█"; + } else { + result << "░"; + } } - builder.IsStrict(); - return true; - } else { - return false; } + result << " P: " << LeftPad(Sprintf("%.3f", position), maxPositionLength); + result << " F: " << LeftPad(Sprintf("%.3f", frequency), maxFrequencyLength); } - private: - const THistogramIndexes HistogramIndexes_; - }; + return valueBuilder->NewString(result); + } - class THistogramToCumulativeDistributionFunction: public TBoxedValue { - public: - THistogramToCumulativeDistributionFunction(const THistogramIndexes& histogramIndexes) - : HistogramIndexes_(histogramIndexes) - { - } + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + THistogramIndexes histogramIndexes(builder); + auto optionalUi8 = builder.Optional()->Item<ui8>().Build(); - static const TStringRef& Name() { - static auto name = TStringRef::Of("ToCumulativeDistributionFunction"); - return name; - } + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalUi8).Done().OptionalArgs(1).Returns<char*>(); - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - TUnboxedValue* fields = nullptr; - auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); - auto bins = args[0].GetElement(HistogramIndexes_.Bins); - double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); - double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); - double sum = 0.0; - double weightsSum = 0.0; - std::vector<TUnboxedValue> resultBins; - if (bins.HasFastListLength()) - resultBins.reserve(bins.GetListLength()); - const auto binsIterator = bins.GetListIterator(); - for (TUnboxedValue current; binsIterator.Next(current);) { - TUnboxedValue* binFields = nullptr; - auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); - const auto frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - sum += frequency; - weightsSum += sum; - binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(sum); - binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position); - resultBins.emplace_back(std::move(resultCurrent)); + if (!typesOnly) { + builder.Implementation(new THistogramPrint(histogramIndexes)); } - - auto kind = args[0].GetElement(HistogramIndexes_.Kind); - fields[HistogramIndexes_.Kind] = valueBuilder->AppendString(kind, "Cdf"); - fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); - fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue); - fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue); - fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum); - return result; + builder.IsStrict(); + return true; + } else { + return false; } + } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - THistogramIndexes histogramIndexes(builder); +private: + const THistogramIndexes HistogramIndexes_; +}; - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Done().Returns(histogramIndexes.ResultStructType); +class THistogramToCumulativeDistributionFunction: public TBoxedValue { +public: + THistogramToCumulativeDistributionFunction(const THistogramIndexes& histogramIndexes) + : HistogramIndexes_(histogramIndexes) + { + } - if (!typesOnly) { - builder.Implementation(new THistogramToCumulativeDistributionFunction(histogramIndexes)); - } - builder.IsStrict(); - return true; - } else { - return false; - } + static const TStringRef& Name() { + static auto name = TStringRef::Of("ToCumulativeDistributionFunction"); + return name; + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* fields = nullptr; + auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); + auto bins = args[0].GetElement(HistogramIndexes_.Bins); + double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); + double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); + double sum = 0.0; + double weightsSum = 0.0; + std::vector<TUnboxedValue> resultBins; + if (bins.HasFastListLength()) { + resultBins.reserve(bins.GetListLength()); + } + const auto binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + TUnboxedValue* binFields = nullptr; + auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); + const auto frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + sum += frequency; + weightsSum += sum; + binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(sum); + binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position); + resultBins.emplace_back(std::move(resultCurrent)); } - private: - const THistogramIndexes HistogramIndexes_; - }; + auto kind = args[0].GetElement(HistogramIndexes_.Kind); + fields[HistogramIndexes_.Kind] = valueBuilder->AppendString(kind, "Cdf"); + fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); + fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue); + fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue); + fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum); + return result; + } - class THistogramNormalize: public TBoxedValue { - public: - THistogramNormalize(const THistogramIndexes& histogramIndexes) - : HistogramIndexes_(histogramIndexes) - { - } + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + THistogramIndexes histogramIndexes(builder); - static const TStringRef& Name() { - static auto name = TStringRef::Of("Normalize"); - return name; - } + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Done().Returns(histogramIndexes.ResultStructType); - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - TUnboxedValue* fields = nullptr; - auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); - auto bins = args[0].GetElement(HistogramIndexes_.Bins); - double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); - double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); - double area = args[1].GetOrDefault<double>(100.0); - bool cdfNormalization = args[2].GetOrDefault<bool>(false); - double sum = 0.0; - double weightsSum = 0.0; - double lastBinFrequency = 0.0; - std::vector<TUnboxedValue> resultBins; - if (bins.HasFastListLength()) - resultBins.reserve(bins.GetListLength()); - auto binsIterator = bins.GetListIterator(); - for (TUnboxedValue current; binsIterator.Next(current);) { - sum += current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - lastBinFrequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - } - binsIterator = bins.GetListIterator(); - for (TUnboxedValue current; binsIterator.Next(current);) { - TUnboxedValue* binFields = nullptr; - auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); - double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - if (cdfNormalization) { - frequency = area * frequency / lastBinFrequency; - } else { - frequency = area * frequency / sum; - } - weightsSum += frequency; - binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(frequency); - binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position); - resultBins.emplace_back(std::move(resultCurrent)); + if (!typesOnly) { + builder.Implementation(new THistogramToCumulativeDistributionFunction(histogramIndexes)); } + builder.IsStrict(); + return true; + } else { + return false; + } + } - TUnboxedValue kind = args[0].GetElement(HistogramIndexes_.Kind); - if (cdfNormalization) { - kind = valueBuilder->AppendString(kind, "Cdf"); - } +private: + const THistogramIndexes HistogramIndexes_; +}; - fields[HistogramIndexes_.Kind] = kind; - fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); - fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue); - fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue); - fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum); - return result; - } +class THistogramNormalize: public TBoxedValue { +public: + THistogramNormalize(const THistogramIndexes& histogramIndexes) + : HistogramIndexes_(histogramIndexes) + { + } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - THistogramIndexes histogramIndexes(builder); - auto optionalDouble = builder.Optional()->Item<double>().Build(); - auto optionalCdfNormalization = builder.Optional()->Item<bool>().Build(); - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalDouble).Add(optionalCdfNormalization).Done().Returns(histogramIndexes.ResultStructType); - builder.OptionalArgs(1); - builder.OptionalArgs(2); - if (!typesOnly) { - builder.Implementation(new THistogramNormalize(histogramIndexes)); - } - builder.IsStrict(); - return true; + static const TStringRef& Name() { + static auto name = TStringRef::Of("Normalize"); + return name; + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* fields = nullptr; + auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); + auto bins = args[0].GetElement(HistogramIndexes_.Bins); + double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); + double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); + double area = args[1].GetOrDefault<double>(100.0); + bool cdfNormalization = args[2].GetOrDefault<bool>(false); + double sum = 0.0; + double weightsSum = 0.0; + double lastBinFrequency = 0.0; + std::vector<TUnboxedValue> resultBins; + if (bins.HasFastListLength()) { + resultBins.reserve(bins.GetListLength()); + } + auto binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + sum += current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + lastBinFrequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + } + binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + TUnboxedValue* binFields = nullptr; + auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); + double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + if (cdfNormalization) { + frequency = area * frequency / lastBinFrequency; } else { - return false; + frequency = area * frequency / sum; } + weightsSum += frequency; + binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(frequency); + binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position); + resultBins.emplace_back(std::move(resultCurrent)); } - private: - const THistogramIndexes HistogramIndexes_; - }; + TUnboxedValue kind = args[0].GetElement(HistogramIndexes_.Kind); + if (cdfNormalization) { + kind = valueBuilder->AppendString(kind, "Cdf"); + } - template <bool twoArgs> - class THistogramMethodBase: public TBoxedValue { - public: - THistogramMethodBase(const THistogramIndexes& histogramIndexes, TSourcePosition pos) - : HistogramIndexes_(histogramIndexes) - , Pos_(pos) - { + fields[HistogramIndexes_.Kind] = kind; + fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); + fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue); + fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue); + fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum); + return result; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + THistogramIndexes histogramIndexes(builder); + auto optionalDouble = builder.Optional()->Item<double>().Build(); + auto optionalCdfNormalization = builder.Optional()->Item<bool>().Build(); + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalDouble).Add(optionalCdfNormalization).Done().Returns(histogramIndexes.ResultStructType); + builder.OptionalArgs(1); + builder.OptionalArgs(2); + if (!typesOnly) { + builder.Implementation(new THistogramNormalize(histogramIndexes)); + } + builder.IsStrict(); + return true; + } else { + return false; } + } - virtual TUnboxedValue GetResult( - const THistogram& input, - const TUnboxedValuePod* args) const = 0; +private: + const THistogramIndexes HistogramIndexes_; +}; - TUnboxedValue Run( - const IValueBuilder*, - const TUnboxedValuePod* args) const override { - try { - auto bins = args[0].GetElement(HistogramIndexes_.Bins); - double min = args[0].GetElement(HistogramIndexes_.Min).template Get<double>(); - double max = args[0].GetElement(HistogramIndexes_.Max).template Get<double>(); - auto binsIterator = bins.GetListIterator(); +template <bool twoArgs> +class THistogramMethodBase: public TBoxedValue { +public: + THistogramMethodBase(const THistogramIndexes& histogramIndexes, TSourcePosition pos) + : HistogramIndexes_(histogramIndexes) + , Pos_(pos) + { + } - THistogram histogram; - histogram.SetType(HT_ADAPTIVE_HISTOGRAM); - histogram.SetMinValue(min); - histogram.SetMaxValue(max); - for (TUnboxedValue current; binsIterator.Next(current);) { - double frequency = current.GetElement(HistogramIndexes_.Frequency).template Get<double>(); - double position = current.GetElement(HistogramIndexes_.Position).template Get<double>(); - histogram.AddFreq(frequency); - histogram.AddPosition(position); - } + virtual TUnboxedValue GetResult( + const THistogram& input, + const TUnboxedValuePod* args) const = 0; - return GetResult(histogram, args); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + TUnboxedValue Run( + const IValueBuilder*, + const TUnboxedValuePod* args) const override { + try { + auto bins = args[0].GetElement(HistogramIndexes_.Bins); + double min = args[0].GetElement(HistogramIndexes_.Min).template Get<double>(); + double max = args[0].GetElement(HistogramIndexes_.Max).template Get<double>(); + auto binsIterator = bins.GetListIterator(); + + THistogram histogram; + histogram.SetType(HT_ADAPTIVE_HISTOGRAM); + histogram.SetMinValue(min); + histogram.SetMaxValue(max); + for (TUnboxedValue current; binsIterator.Next(current);) { + double frequency = current.GetElement(HistogramIndexes_.Frequency).template Get<double>(); + double position = current.GetElement(HistogramIndexes_.Position).template Get<double>(); + histogram.AddFreq(frequency); + histogram.AddPosition(position); } + + return GetResult(histogram, args); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - static THistogramIndexes DeclareSignatureBase(IFunctionTypeInfoBuilder& builder) { - THistogramIndexes histogramIndexes(builder); + static THistogramIndexes DeclareSignatureBase(IFunctionTypeInfoBuilder& builder) { + THistogramIndexes histogramIndexes(builder); - if (twoArgs) { - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Add<double>().Done().Returns<double>(); - } else { - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Done().Returns<double>(); - } - return histogramIndexes; + if (twoArgs) { + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Add<double>().Done().Returns<double>(); + } else { + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Done().Returns<double>(); } + return histogramIndexes; + } - protected: - const THistogramIndexes HistogramIndexes_; - TSourcePosition Pos_; - }; +protected: + const THistogramIndexes HistogramIndexes_; + TSourcePosition Pos_; +}; -#define DECLARE_ONE_DOUBLE_ARG_METHOD_UDF(name) \ - class T##name: public THistogramMethodBase<false> { \ - public: \ - T##name(const THistogramIndexes& histogramIndexes, TSourcePosition pos) \ - : THistogramMethodBase<false>(histogramIndexes, pos) { \ - } \ - static const TStringRef& Name() { \ - static auto name = TStringRef::Of(#name); \ - return name; \ - } \ - static bool DeclareSignature( \ - const TStringRef& name, \ - TType* userType, \ - IFunctionTypeInfoBuilder& builder, \ - bool typesOnly) { \ - Y_UNUSED(userType); \ - if (Name() == name) { \ - const auto& histogramIndexes = DeclareSignatureBase(builder); \ - if (!typesOnly) { \ - builder.Implementation(new T##name(histogramIndexes, \ - builder.GetSourcePosition())); \ - } \ - return true; \ - } else { \ - return false; \ - } \ - } \ - TUnboxedValue GetResult( \ - const THistogram& input, \ - const TUnboxedValuePod* args) const override { \ - TAdaptiveWardHistogram histo(input, input.FreqSize()); \ - double result = histo.name(args[1].Get<double>()); \ - return TUnboxedValuePod(result); \ - } \ +#define DECLARE_ONE_DOUBLE_ARG_METHOD_UDF(name) \ + class T##name: public THistogramMethodBase<false> { \ + public: \ + T##name(const THistogramIndexes& histogramIndexes, TSourcePosition pos) \ + : THistogramMethodBase<false>(histogramIndexes, pos) { \ + } \ + static const TStringRef& Name() { \ + static auto name = TStringRef::Of(#name); \ + return name; \ + } \ + static bool DeclareSignature( \ + const TStringRef& name, \ + TType* userType, \ + IFunctionTypeInfoBuilder& builder, \ + bool typesOnly) { \ + Y_UNUSED(userType); \ + if (Name() == name) { \ + const auto& histogramIndexes = DeclareSignatureBase(builder); \ + if (!typesOnly) { \ + builder.Implementation(new T##name(histogramIndexes, \ + builder.GetSourcePosition())); \ + } \ + return true; \ + } else { \ + return false; \ + } \ + } \ + TUnboxedValue GetResult( \ + const THistogram& input, \ + const TUnboxedValuePod* args) const override { \ + TAdaptiveWardHistogram histo(input, input.FreqSize()); \ + double result = histo.name(args[1].Get<double>()); \ + return TUnboxedValuePod(result); \ + } \ }; #define DECLARE_TWO_DOUBLE_ARG_METHOD_UDF(name) \ @@ -979,7 +986,7 @@ namespace { const auto& histogramIndexes = DeclareSignatureBase(builder); \ if (!typesOnly) { \ builder.Implementation(new T##name(histogramIndexes, \ - builder.GetSourcePosition())); \ + builder.GetSourcePosition())); \ } \ return true; \ } else { \ @@ -1001,18 +1008,18 @@ namespace { #define DECLARE_HISTOGRAM_UDFS(name) \ HISTOGRAM_FUNCTION_MAP(DECLARE_HISTOGRAM_UDF, name) - HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(DECLARE_ONE_DOUBLE_ARG_METHOD_UDF) - HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(DECLARE_TWO_DOUBLE_ARG_METHOD_UDF) +HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(DECLARE_ONE_DOUBLE_ARG_METHOD_UDF) +HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(DECLARE_TWO_DOUBLE_ARG_METHOD_UDF) - SIMPLE_MODULE(THistogramModule, - HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_UDFS) - HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) - HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) - DECLARE_HISTOGRAM_UDFS(Linear) - DECLARE_HISTOGRAM_UDFS(Logarithmic) - THistogramPrint, - THistogramNormalize, - THistogramToCumulativeDistributionFunction) -} +SIMPLE_MODULE(THistogramModule, + HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_UDFS) + HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) + HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) + DECLARE_HISTOGRAM_UDFS(Linear) + DECLARE_HISTOGRAM_UDFS(Logarithmic) + THistogramPrint, + THistogramNormalize, + THistogramToCumulativeDistributionFunction) +} // namespace REGISTER_MODULES(THistogramModule) diff --git a/yql/essentials/udfs/common/histogram/ya.make b/yql/essentials/udfs/common/histogram/ya.make index 51b4a241002..659a3ba4406 100644 --- a/yql/essentials/udfs/common/histogram/ya.make +++ b/yql/essentials/udfs/common/histogram/ya.make @@ -6,6 +6,8 @@ YQL_UDF_CONTRIB(histogram_udf) 0 ) + ENABLE(YQL_STYLE_CPP) + SRCS( histogram_udf.cpp ) @@ -18,4 +20,5 @@ YQL_UDF_CONTRIB(histogram_udf) RECURSE_FOR_TESTS( test -)
\ No newline at end of file +) + diff --git a/yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp b/yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp index 39d17f2ec44..f0e2ad69149 100644 --- a/yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp +++ b/yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp @@ -10,414 +10,423 @@ using namespace NKikimr; using namespace NUdf; namespace { - class THybridHyperLogLog { - private: - using THybridSet = THashSet<ui64, std::hash<ui64>, std::equal_to<ui64>, TStdAllocatorForUdf<ui64>>; - using THybridHll = THyperLogLogWithAlloc<TStdAllocatorForUdf<ui8>>; +class THybridHyperLogLog { +private: + using THybridSet = THashSet<ui64, std::hash<ui64>, std::equal_to<ui64>, TStdAllocatorForUdf<ui64>>; + using THybridHll = THyperLogLogWithAlloc<TStdAllocatorForUdf<ui8>>; - explicit THybridHyperLogLog(unsigned precision) - : Var_(THybridSet()), SizeLimit_((1u << precision) / 8), Precision_(precision) - { } + explicit THybridHyperLogLog(unsigned precision) + : Var_(THybridSet()) + , SizeLimit_((1u << precision) / 8) + , Precision_(precision) + { + } - THybridHll ConvertToHyperLogLog() const { - auto res = THybridHll::Create(Precision_); - for (auto& el : GetSetRef()) { - res.Update(el); - } - return res; + THybridHll ConvertToHyperLogLog() const { + auto res = THybridHll::Create(Precision_); + for (auto& el : GetSetRef()) { + res.Update(el); } + return res; + } - bool IsSet() const { - return Var_.index() == 1; - } + bool IsSet() const { + return Var_.index() == 1; + } - const THybridSet& GetSetRef() const { - return std::get<1>(Var_); - } + const THybridSet& GetSetRef() const { + return std::get<1>(Var_); + } - THybridSet& GetMutableSetRef() { - return std::get<1>(Var_); - } + THybridSet& GetMutableSetRef() { + return std::get<1>(Var_); + } - const THybridHll& GetHllRef() const { - return std::get<0>(Var_); - } + const THybridHll& GetHllRef() const { + return std::get<0>(Var_); + } - THybridHll& GetMutableHllRef() { - return std::get<0>(Var_); - } + THybridHll& GetMutableHllRef() { + return std::get<0>(Var_); + } - public: - THybridHyperLogLog (THybridHyperLogLog&&) = default; +public: + THybridHyperLogLog(THybridHyperLogLog&&) = default; - THybridHyperLogLog& operator=(THybridHyperLogLog&&) = default; + THybridHyperLogLog& operator=(THybridHyperLogLog&&) = default; - void Update(ui64 hash) { - if (IsSet()) { - GetMutableSetRef().insert(hash); - if (GetSetRef().size() >= SizeLimit_) { - Var_ = ConvertToHyperLogLog(); - } - } else { - GetMutableHllRef().Update(hash); + void Update(ui64 hash) { + if (IsSet()) { + GetMutableSetRef().insert(hash); + if (GetSetRef().size() >= SizeLimit_) { + Var_ = ConvertToHyperLogLog(); } + } else { + GetMutableHllRef().Update(hash); } + } - void Merge(const THybridHyperLogLog& rh) { - if (IsSet() && rh.IsSet()) { - GetMutableSetRef().insert(rh.GetSetRef().begin(), rh.GetSetRef().end()); - if (GetSetRef().size() >= SizeLimit_) { - Var_ = ConvertToHyperLogLog(); - } - } else { - if (IsSet()) { - Var_ = ConvertToHyperLogLog(); - } - if (rh.IsSet()) { - GetMutableHllRef().Merge(rh.ConvertToHyperLogLog()); - } else { - GetMutableHllRef().Merge(rh.GetHllRef()); - } + void Merge(const THybridHyperLogLog& rh) { + if (IsSet() && rh.IsSet()) { + GetMutableSetRef().insert(rh.GetSetRef().begin(), rh.GetSetRef().end()); + if (GetSetRef().size() >= SizeLimit_) { + Var_ = ConvertToHyperLogLog(); } - } - - void Save(IOutputStream& out) const { - out.Write(static_cast<char>(Var_.index())); - out.Write(static_cast<char>(Precision_)); + } else { if (IsSet()) { - ::Save(&out, GetSetRef()); + Var_ = ConvertToHyperLogLog(); + } + if (rh.IsSet()) { + GetMutableHllRef().Merge(rh.ConvertToHyperLogLog()); } else { - GetHllRef().Save(out); + GetMutableHllRef().Merge(rh.GetHllRef()); } } + } - ui64 Estimate() const { - if (IsSet()) { - return GetSetRef().size(); - } - return GetHllRef().Estimate(); + void Save(IOutputStream& out) const { + out.Write(static_cast<char>(Var_.index())); + out.Write(static_cast<char>(Precision_)); + if (IsSet()) { + ::Save(&out, GetSetRef()); + } else { + GetHllRef().Save(out); } + } - static THybridHyperLogLog Create(unsigned precision) { - Y_ENSURE(precision >= THyperLogLog::PRECISION_MIN && precision <= THyperLogLog::PRECISION_MAX); - return THybridHyperLogLog(precision); + ui64 Estimate() const { + if (IsSet()) { + return GetSetRef().size(); } + return GetHllRef().Estimate(); + } - static THybridHyperLogLog Load(IInputStream& in) { - char type; - Y_ENSURE(in.ReadChar(type)); - char precision; - Y_ENSURE(in.ReadChar(precision)); - auto res = Create(precision); - if (type) { - ::Load(&in, res.GetMutableSetRef()); - } else { - res.Var_ = THybridHll::Load(in); - } - return res; + static THybridHyperLogLog Create(unsigned precision) { + Y_ENSURE(precision >= THyperLogLog::PRECISION_MIN && precision <= THyperLogLog::PRECISION_MAX); + return THybridHyperLogLog(precision); + } + + static THybridHyperLogLog Load(IInputStream& in) { + char type; + Y_ENSURE(in.ReadChar(type)); + char precision; + Y_ENSURE(in.ReadChar(precision)); + auto res = Create(precision); + if (type) { + ::Load(&in, res.GetMutableSetRef()); + } else { + res.Var_ = THybridHll::Load(in); } + return res; + } - private: - std::variant<THybridHll, THybridSet> Var_; +private: + std::variant<THybridHll, THybridSet> Var_; - size_t SizeLimit_; + size_t SizeLimit_; - unsigned Precision_; - }; + unsigned Precision_; +}; - extern const char HyperLogLogResourceName[] = "HyperLogLog.State"; +extern const char HyperLogLogResourceName[] = "HyperLogLog.State"; - using THyperLogLogResource = TBoxedResource<THybridHyperLogLog, HyperLogLogResourceName>; +using THyperLogLogResource = TBoxedResource<THybridHyperLogLog, HyperLogLogResourceName>; - class THyperLogLogCreate: public TBoxedValue { - public: - THyperLogLogCreate(TSourcePosition pos) - : Pos_(pos) - {} +class THyperLogLogCreate: public TBoxedValue { +public: + THyperLogLogCreate(TSourcePosition pos) + : Pos_(pos) + { + } - static const TStringRef& Name() { - static auto nameRef = TStringRef::Of("Create"); - return nameRef; - } + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("Create"); + return nameRef; + } - private: - TUnboxedValue Run( - const IValueBuilder*, - const TUnboxedValuePod* args) const override { - try { - THolder<THyperLogLogResource> hll(new THyperLogLogResource(THybridHyperLogLog::Create(args[1].Get<ui32>()))); - hll->Get()->Update(args[0].Get<ui64>()); - return TUnboxedValuePod(hll.Release()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } +private: + TUnboxedValue Run( + const IValueBuilder*, + const TUnboxedValuePod* args) const override { + try { + THolder<THyperLogLogResource> hll(new THyperLogLogResource(THybridHyperLogLog::Create(args[1].Get<ui32>()))); + hll->Get()->Update(args[0].Get<ui64>()); + return TUnboxedValuePod(hll.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<HyperLogLogResourceName>(ui64, ui32)>(); - if (!typesOnly) { - builder.Implementation(new THyperLogLogCreate(builder.GetSourcePosition())); - } - return true; - } else { - return false; +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<HyperLogLogResourceName>(ui64, ui32)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLogCreate(builder.GetSourcePosition())); } + return true; + } else { + return false; } + } - private: - TSourcePosition Pos_; - }; +private: + TSourcePosition Pos_; +}; - class THyperLogLogAddValue: public TBoxedValue { - public: - THyperLogLogAddValue(TSourcePosition pos) - : Pos_(pos) - {} +class THyperLogLogAddValue: public TBoxedValue { +public: + THyperLogLogAddValue(TSourcePosition pos) + : Pos_(pos) + { + } - static const TStringRef& Name() { - static auto nameRef = TStringRef::Of("AddValue"); - return nameRef; - } + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("AddValue"); + return nameRef; + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - THyperLogLogResource* resource = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get()); - resource->Get()->Update(args[1].Get<ui64>()); - return TUnboxedValuePod(args[0]); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + THyperLogLogResource* resource = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get()); + resource->Get()->Update(args[1].Get<ui64>()); + return TUnboxedValuePod(args[0]); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<HyperLogLogResourceName>(TResource<HyperLogLogResourceName>, ui64)>(); - if (!typesOnly) { - builder.Implementation(new THyperLogLogAddValue(builder.GetSourcePosition())); - } - builder.IsStrict(); - return true; - } else { - return false; +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<HyperLogLogResourceName>(TResource<HyperLogLogResourceName>, ui64)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLogAddValue(builder.GetSourcePosition())); } + builder.IsStrict(); + return true; + } else { + return false; } + } - private: - TSourcePosition Pos_; - }; +private: + TSourcePosition Pos_; +}; - class THyperLogLogSerialize: public TBoxedValue { - public: - THyperLogLogSerialize(TSourcePosition pos) - : Pos_(pos) - {} +class THyperLogLogSerialize: public TBoxedValue { +public: + THyperLogLogSerialize(TSourcePosition pos) + : Pos_(pos) + { + } - public: - static const TStringRef& Name() { - static auto nameRef = TStringRef::Of("Serialize"); - return nameRef; - } +public: + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("Serialize"); + return nameRef; + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - TStringStream result; - static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get()->Save(result); - return valueBuilder->NewString(result.Str()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + TStringStream result; + static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get()->Save(result); + return valueBuilder->NewString(result.Str()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<char*(TResource<HyperLogLogResourceName>)>(); - if (!typesOnly) { - builder.Implementation(new THyperLogLogSerialize(builder.GetSourcePosition())); - } - return true; - } else { - return false; +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<char*(TResource<HyperLogLogResourceName>)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLogSerialize(builder.GetSourcePosition())); } + return true; + } else { + return false; } + } - private: - TSourcePosition Pos_; - }; +private: + TSourcePosition Pos_; +}; - class THyperLogLogDeserialize: public TBoxedValue { - public: - THyperLogLogDeserialize(TSourcePosition pos) - : Pos_(pos) - {} +class THyperLogLogDeserialize: public TBoxedValue { +public: + THyperLogLogDeserialize(TSourcePosition pos) + : Pos_(pos) + { + } - static const TStringRef& Name() { - static auto nameRef = TStringRef::Of("Deserialize"); - return nameRef; - } + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("Deserialize"); + return nameRef; + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - const TString arg(args[0].AsStringRef()); - TStringInput input(arg); - THolder<THyperLogLogResource> hll(new THyperLogLogResource(THybridHyperLogLog::Load(input))); - return TUnboxedValuePod(hll.Release()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + const TString arg(args[0].AsStringRef()); + TStringInput input(arg); + THolder<THyperLogLogResource> hll(new THyperLogLogResource(THybridHyperLogLog::Load(input))); + return TUnboxedValuePod(hll.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<HyperLogLogResourceName>(char*)>(); - if (!typesOnly) { - builder.Implementation(new THyperLogLogDeserialize(builder.GetSourcePosition())); - } - return true; - } else { - return false; +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<HyperLogLogResourceName>(char*)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLogDeserialize(builder.GetSourcePosition())); } + return true; + } else { + return false; } + } - private: - TSourcePosition Pos_; - }; +private: + TSourcePosition Pos_; +}; - class THyperLogLogMerge: public TBoxedValue { - public: - THyperLogLogMerge(TSourcePosition pos) - : Pos_(pos) - {} +class THyperLogLogMerge: public TBoxedValue { +public: + THyperLogLogMerge(TSourcePosition pos) + : Pos_(pos) + { + } - static const TStringRef& Name() { - static auto nameRef = TStringRef::Of("Merge"); - return nameRef; - } + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("Merge"); + return nameRef; + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - auto left = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get(); - static_cast<THyperLogLogResource*>(args[1].AsBoxed().Get())->Get()->Merge(*left); - return TUnboxedValuePod(args[1]); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + auto left = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get(); + static_cast<THyperLogLogResource*>(args[1].AsBoxed().Get())->Get()->Merge(*left); + return TUnboxedValuePod(args[1]); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<HyperLogLogResourceName>(TResource<HyperLogLogResourceName>, TResource<HyperLogLogResourceName>)>(); - if (!typesOnly) { - builder.Implementation(new THyperLogLogMerge(builder.GetSourcePosition())); - } - builder.IsStrict(); - return true; - } else { - return false; +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<HyperLogLogResourceName>(TResource<HyperLogLogResourceName>, TResource<HyperLogLogResourceName>)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLogMerge(builder.GetSourcePosition())); } + builder.IsStrict(); + return true; + } else { + return false; } + } - private: - TSourcePosition Pos_; - }; +private: + TSourcePosition Pos_; +}; - class THyperLogLogGetResult: public TBoxedValue { - public: - THyperLogLogGetResult(TSourcePosition pos) - : Pos_(pos) - {} +class THyperLogLogGetResult: public TBoxedValue { +public: + THyperLogLogGetResult(TSourcePosition pos) + : Pos_(pos) + { + } - static const TStringRef& Name() { - static auto nameRef = TStringRef::Of("GetResult"); - return nameRef; - } + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("GetResult"); + return nameRef; + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - Y_UNUSED(valueBuilder); - auto hll = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get(); - return TUnboxedValuePod(hll->Estimate()); - } +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + Y_UNUSED(valueBuilder); + auto hll = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get(); + return TUnboxedValuePod(hll->Estimate()); + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - auto resource = builder.Resource(HyperLogLogResourceName); - builder.Args()->Add(resource).Done().Returns<ui64>(); +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + auto resource = builder.Resource(HyperLogLogResourceName); + builder.Args()->Add(resource).Done().Returns<ui64>(); - if (!typesOnly) { - builder.Implementation(new THyperLogLogGetResult(builder.GetSourcePosition())); - } - builder.IsStrict(); - return true; - } else { - return false; + if (!typesOnly) { + builder.Implementation(new THyperLogLogGetResult(builder.GetSourcePosition())); } + builder.IsStrict(); + return true; + } else { + return false; } + } - private: - TSourcePosition Pos_; - }; +private: + TSourcePosition Pos_; +}; - SIMPLE_MODULE(THyperLogLogModule, - THyperLogLogCreate, - THyperLogLogAddValue, - THyperLogLogSerialize, - THyperLogLogDeserialize, - THyperLogLogMerge, - THyperLogLogGetResult) -} +SIMPLE_MODULE(THyperLogLogModule, + THyperLogLogCreate, + THyperLogLogAddValue, + THyperLogLogSerialize, + THyperLogLogDeserialize, + THyperLogLogMerge, + THyperLogLogGetResult) +} // namespace REGISTER_MODULES(THyperLogLogModule) diff --git a/yql/essentials/udfs/common/hyperloglog/ya.make b/yql/essentials/udfs/common/hyperloglog/ya.make index 2becaf7388d..4c295580a9f 100644 --- a/yql/essentials/udfs/common/hyperloglog/ya.make +++ b/yql/essentials/udfs/common/hyperloglog/ya.make @@ -6,6 +6,8 @@ YQL_UDF_CONTRIB(hyperloglog_udf) 0 ) + ENABLE(YQL_STYLE_CPP) + SRCS( hyperloglog_udf.cpp ) @@ -18,4 +20,5 @@ YQL_UDF_CONTRIB(hyperloglog_udf) RECURSE_FOR_TESTS( test -)
\ No newline at end of file +) + diff --git a/yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp b/yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp index c641407bc4c..afa8d65f6d3 100644 --- a/yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp +++ b/yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp @@ -17,461 +17,460 @@ using namespace NKikimr; using namespace NUdf; namespace { - using TOptions = ui32; - class THyperscanUdfBase: public TBoxedValue { - protected: - constexpr static const char* IGNORE_CASE_PREFIX = "(?i)"; - static void SetCommonOptions(TString& regex, TOptions& options) { - options |= HS_FLAG_ALLOWEMPTY; - if (regex.StartsWith(IGNORE_CASE_PREFIX)) { - options |= HS_FLAG_CASELESS; - regex = regex.substr(4); - } - if (UTF8Detect(regex) == UTF8) { - options |= HS_FLAG_UTF8; - } - if (NX86::HaveAVX2()) { - options |= HS_CPU_FEATURES_AVX2; - } +using TOptions = ui32; +class THyperscanUdfBase: public TBoxedValue { +protected: + constexpr static const char* IGNORE_CASE_PREFIX = "(?i)"; + static void SetCommonOptions(TString& regex, TOptions& options) { + options |= HS_FLAG_ALLOWEMPTY; + if (regex.StartsWith(IGNORE_CASE_PREFIX)) { + options |= HS_FLAG_CASELESS; + regex = regex.substr(4); + } + if (UTF8Detect(regex) == UTF8) { + options |= HS_FLAG_UTF8; + } + if (NX86::HaveAVX2()) { + options |= HS_CPU_FEATURES_AVX2; } + } +}; + +class THyperscanMatch: public THyperscanUdfBase { +public: + enum class EMode { + NORMAL, + BACKTRACKING, + MULTI }; - class THyperscanMatch: public THyperscanUdfBase { + class TFactory: public THyperscanUdfBase { public: - enum class EMode { - NORMAL, - BACKTRACKING, - MULTI - }; - - class TFactory: public THyperscanUdfBase { - public: - TFactory( - TSourcePosition pos, - bool surroundMode, - THyperscanMatch::EMode mode, - size_t regexpsCount = 0) - : Pos_(pos) - , SurroundMode_(surroundMode) - , Mode_(mode) - , RegexpsCount_(regexpsCount) - { - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - return TUnboxedValuePod( - new THyperscanMatch( - valueBuilder, - args[0], - SurroundMode_, - Mode_, - Pos_, - RegexpsCount_)); - } - - TSourcePosition Pos_; - bool SurroundMode_; - THyperscanMatch::EMode Mode_; - size_t RegexpsCount_; - }; - - static const TStringRef& Name(bool isGrep, THyperscanMatch::EMode mode) { - static auto match = TStringRef::Of("Match"); - static auto grep = TStringRef::Of("Grep"); - static auto backtrackingMatch = TStringRef::Of("BacktrackingMatch"); - static auto backtrackingGrep = TStringRef::Of("BacktrackingGrep"); - static auto multiMatch = TStringRef::Of("MultiMatch"); - static auto multiGrep = TStringRef::Of("MultiGrep"); - if (isGrep) { - switch (mode) { - case THyperscanMatch::EMode::NORMAL: - return grep; - case THyperscanMatch::EMode::BACKTRACKING: - return backtrackingGrep; - case THyperscanMatch::EMode::MULTI: - return multiGrep; - } - } else { - switch (mode) { - case THyperscanMatch::EMode::NORMAL: - return match; - case THyperscanMatch::EMode::BACKTRACKING: - return backtrackingMatch; - case THyperscanMatch::EMode::MULTI: - return multiMatch; - } - } - - Y_ABORT("Unexpected"); - } - - THyperscanMatch( - const IValueBuilder*, - const TUnboxedValuePod& runConfig, + TFactory( + TSourcePosition pos, bool surroundMode, THyperscanMatch::EMode mode, - TSourcePosition pos, - size_t regexpsCount) - : Regex_(runConfig.AsStringRef()) + size_t regexpsCount = 0) + : Pos_(pos) + , SurroundMode_(surroundMode) , Mode_(mode) - , Pos_(pos) , RegexpsCount_(regexpsCount) { - try { - TOptions options = 0; - int pcreOptions = REG_EXTENDED; - if (Mode_ == THyperscanMatch::EMode::BACKTRACKING && Regex_.StartsWith(IGNORE_CASE_PREFIX)) { - pcreOptions |= REG_ICASE; - } - auto regex = Regex_; - SetCommonOptions(regex, options); - switch (mode) { - case THyperscanMatch::EMode::NORMAL: { - if (!surroundMode) { - regex = TStringBuilder() << '^' << regex << '$'; - } - Database_ = Compile(regex, options); - break; - } - case THyperscanMatch::EMode::BACKTRACKING: { - if (!surroundMode) { - regex = TStringBuilder() << '^' << regex << '$'; - } - try { - Database_ = Compile(regex, options); - Mode_ = THyperscanMatch::EMode::NORMAL; - } catch (const TCompileException&) { - options |= HS_FLAG_PREFILTER; - Database_ = Compile(regex, options); - Fallback_ = TRegExMatch(regex, pcreOptions); - } - break; - } - case THyperscanMatch::EMode::MULTI: { - std::vector<TString> regexes; - TVector<const char*> cregexes; - TVector<TOptions> flags; - TVector<TOptions> ids; - - const auto func = [®exes, &flags, surroundMode](const std::string_view& token) { - TString regex(token); + } - TOptions opt = 0; - SetCommonOptions(regex, opt); + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + return TUnboxedValuePod( + new THyperscanMatch( + valueBuilder, + args[0], + SurroundMode_, + Mode_, + Pos_, + RegexpsCount_)); + } - if (!surroundMode) { - regex = TStringBuilder() << '^' << regex << '$'; - } + TSourcePosition Pos_; + bool SurroundMode_; + THyperscanMatch::EMode Mode_; + size_t RegexpsCount_; + }; - regexes.emplace_back(std::move(regex)); - flags.emplace_back(opt); - }; - StringSplitter(Regex_).Split('\n').Consume(func); + static const TStringRef& Name(bool isGrep, THyperscanMatch::EMode mode) { + static auto match = TStringRef::Of("Match"); + static auto grep = TStringRef::Of("Grep"); + static auto backtrackingMatch = TStringRef::Of("BacktrackingMatch"); + static auto backtrackingGrep = TStringRef::Of("BacktrackingGrep"); + static auto multiMatch = TStringRef::Of("MultiMatch"); + static auto multiGrep = TStringRef::Of("MultiGrep"); + if (isGrep) { + switch (mode) { + case THyperscanMatch::EMode::NORMAL: + return grep; + case THyperscanMatch::EMode::BACKTRACKING: + return backtrackingGrep; + case THyperscanMatch::EMode::MULTI: + return multiGrep; + } + } else { + switch (mode) { + case THyperscanMatch::EMode::NORMAL: + return match; + case THyperscanMatch::EMode::BACKTRACKING: + return backtrackingMatch; + case THyperscanMatch::EMode::MULTI: + return multiMatch; + } + } - std::transform(regexes.cbegin(), regexes.cend(), std::back_inserter(cregexes), std::bind(&TString::c_str, std::placeholders::_1)); - ids.resize(regexes.size()); - std::iota(ids.begin(), ids.end(), 0); + Y_ABORT("Unexpected"); + } - Database_ = CompileMulti(cregexes, flags, ids); - break; + THyperscanMatch( + const IValueBuilder*, + const TUnboxedValuePod& runConfig, + bool surroundMode, + THyperscanMatch::EMode mode, + TSourcePosition pos, + size_t regexpsCount) + : Regex_(runConfig.AsStringRef()) + , Mode_(mode) + , Pos_(pos) + , RegexpsCount_(regexpsCount) + { + try { + TOptions options = 0; + int pcreOptions = REG_EXTENDED; + if (Mode_ == THyperscanMatch::EMode::BACKTRACKING && Regex_.StartsWith(IGNORE_CASE_PREFIX)) { + pcreOptions |= REG_ICASE; + } + auto regex = Regex_; + SetCommonOptions(regex, options); + switch (mode) { + case THyperscanMatch::EMode::NORMAL: { + if (!surroundMode) { + regex = TStringBuilder() << '^' << regex << '$'; } + Database_ = Compile(regex, options); + break; } - Scratch_ = MakeScratch(Database_); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - } + case THyperscanMatch::EMode::BACKTRACKING: { + if (!surroundMode) { + regex = TStringBuilder() << '^' << regex << '$'; + } + try { + Database_ = Compile(regex, options); + Mode_ = THyperscanMatch::EMode::NORMAL; + } catch (const TCompileException&) { + options |= HS_FLAG_PREFILTER; + Database_ = Compile(regex, options); + Fallback_ = TRegExMatch(regex, pcreOptions); + } + break; + } + case THyperscanMatch::EMode::MULTI: { + std::vector<TString> regexes; + TVector<const char*> cregexes; + TVector<TOptions> flags; + TVector<TOptions> ids; - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try { - TUnboxedValue* items = nullptr; - TUnboxedValue tuple; - size_t i = 0; + const auto func = [®exes, &flags, surroundMode](const std::string_view& token) { + TString regex(token); - if (Mode_ == THyperscanMatch::EMode::MULTI) { - tuple = valueBuilder->NewArray(RegexpsCount_, items); - for (i = 0; i < RegexpsCount_; ++i) { - items[i] = TUnboxedValuePod(false); - } - } + TOptions opt = 0; + SetCommonOptions(regex, opt); - if (args[0]) { - // XXX: StringRef data might not be a NTBS, though the function - // <TRegExMatch::Match> expects ASCIIZ string. Explicitly copy - // the given argument string and append the NUL terminator to it. - const TString input(args[0].AsStringRef()); - if (Y_UNLIKELY(Mode_ == THyperscanMatch::EMode::MULTI)) { - auto callback = [items] (TOptions id, ui64 /* from */, ui64 /* to */) { - items[id] = TUnboxedValuePod(true); + if (!surroundMode) { + regex = TStringBuilder() << '^' << regex << '$'; + } + + regexes.emplace_back(std::move(regex)); + flags.emplace_back(opt); }; - Scan(Database_, Scratch_, input, callback); - return tuple; - } else { - bool matches = Matches(Database_, Scratch_, input); - if (matches && Mode_ == THyperscanMatch::EMode::BACKTRACKING) { - matches = Fallback_.Match(input.data()); - } - return TUnboxedValuePod(matches); - } + StringSplitter(Regex_).Split('\n').Consume(func); - } else { - return Mode_ == THyperscanMatch::EMode::MULTI ? tuple : TUnboxedValue(TUnboxedValuePod(false)); + std::transform(regexes.cbegin(), regexes.cend(), std::back_inserter(cregexes), std::bind(&TString::c_str, std::placeholders::_1)); + ids.resize(regexes.size()); + std::iota(ids.begin(), ids.end(), 0); + + Database_ = CompileMulti(cregexes, flags, ids); + break; + } } + Scratch_ = MakeScratch(Database_); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - private: - const TString Regex_; - THyperscanMatch::EMode Mode_; - const TSourcePosition Pos_; - const size_t RegexpsCount_; - TDatabase Database_; - TScratch Scratch_; - TRegExMatch Fallback_; - }; +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + TUnboxedValue* items = nullptr; + TUnboxedValue tuple; + size_t i = 0; - class THyperscanCapture: public THyperscanUdfBase { - public: - class TFactory: public THyperscanUdfBase { - public: - TFactory(TSourcePosition pos) - : Pos_(pos) - {} - - private: - TUnboxedValue Run(const IValueBuilder*, - const TUnboxedValuePod* args) const final try { - return TUnboxedValuePod(new THyperscanCapture(args[0], Pos_)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + if (Mode_ == THyperscanMatch::EMode::MULTI) { + tuple = valueBuilder->NewArray(RegexpsCount_, items); + for (i = 0; i < RegexpsCount_; ++i) { + items[i] = TUnboxedValuePod(false); } + } - private: - TSourcePosition Pos_; - }; + if (args[0]) { + // XXX: StringRef data might not be a NTBS, though the function + // <TRegExMatch::Match> expects ASCIIZ string. Explicitly copy + // the given argument string and append the NUL terminator to it. + const TString input(args[0].AsStringRef()); + if (Y_UNLIKELY(Mode_ == THyperscanMatch::EMode::MULTI)) { + auto callback = [items](TOptions id, ui64 /* from */, ui64 /* to */) { + items[id] = TUnboxedValuePod(true); + }; + Scan(Database_, Scratch_, input, callback); + return tuple; + } else { + bool matches = Matches(Database_, Scratch_, input); + if (matches && Mode_ == THyperscanMatch::EMode::BACKTRACKING) { + matches = Fallback_.Match(input.data()); + } + return TUnboxedValuePod(matches); + } - static const TStringRef& Name() { - static auto name = TStringRef::Of("Capture"); - return name; + } else { + return Mode_ == THyperscanMatch::EMode::MULTI ? tuple : TUnboxedValue(TUnboxedValuePod(false)); } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } - THyperscanCapture(const TUnboxedValuePod& runConfig, TSourcePosition pos) +private: + const TString Regex_; + THyperscanMatch::EMode Mode_; + const TSourcePosition Pos_; + const size_t RegexpsCount_; + TDatabase Database_; + TScratch Scratch_; + TRegExMatch Fallback_; +}; + +class THyperscanCapture: public THyperscanUdfBase { +public: + class TFactory: public THyperscanUdfBase { + public: + TFactory(TSourcePosition pos) : Pos_(pos) { - Regex_ = runConfig.AsStringRef(); - TOptions options = HS_FLAG_SOM_LEFTMOST; - - SetCommonOptions(Regex_, options); - - Database_ = Compile(Regex_, options); - Scratch_ = MakeScratch(Database_); } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try { - if (const auto arg = args[0]) { - - TUnboxedValue result; - auto callback = [valueBuilder, arg, &result] (TOptions id, ui64 from, ui64 to) { - Y_UNUSED(id); - if (!result) { - result = valueBuilder->SubString(arg, from, to); - } - }; - Scan(Database_, Scratch_, arg.AsStringRef(), callback); - return result; - } - - return TUnboxedValue(); + TUnboxedValue Run(const IValueBuilder*, + const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new THyperscanCapture(args[0], Pos_)); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + private: TSourcePosition Pos_; - TString Regex_; - TDatabase Database_; - TScratch Scratch_; }; - class THyperscanReplace: public THyperscanUdfBase { - public: - class TFactory: public THyperscanUdfBase { - public: - TFactory(TSourcePosition pos) - : Pos_(pos) - {} + static const TStringRef& Name() { + static auto name = TStringRef::Of("Capture"); + return name; + } - private: - TUnboxedValue Run(const IValueBuilder*, - const TUnboxedValuePod* args) const final try { - return TUnboxedValuePod(new THyperscanReplace(args[0], Pos_)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + THyperscanCapture(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : Pos_(pos) + { + Regex_ = runConfig.AsStringRef(); + TOptions options = HS_FLAG_SOM_LEFTMOST; - private: - TSourcePosition Pos_; - }; + SetCommonOptions(Regex_, options); - static const TStringRef& Name() { - static auto name = TStringRef::Of("Replace"); - return name; + Database_ = Compile(Regex_, options); + Scratch_ = MakeScratch(Database_); + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + if (const auto arg = args[0]) { + TUnboxedValue result; + auto callback = [valueBuilder, arg, &result](TOptions id, ui64 from, ui64 to) { + Y_UNUSED(id); + if (!result) { + result = valueBuilder->SubString(arg, from, to); + } + }; + Scan(Database_, Scratch_, arg.AsStringRef(), callback); + return result; } - THyperscanReplace(const TUnboxedValuePod& runConfig, TSourcePosition pos) - : Pos_(pos) - { - Regex_ = runConfig.AsStringRef(); - TOptions options = HS_FLAG_SOM_LEFTMOST; + return TUnboxedValue(); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } - SetCommonOptions(Regex_, options); + TSourcePosition Pos_; + TString Regex_; + TDatabase Database_; + TScratch Scratch_; +}; +class THyperscanReplace: public THyperscanUdfBase { +public: + class TFactory: public THyperscanUdfBase { + public: + TFactory(TSourcePosition pos) + : Pos_(pos) + { + } - Database_ = Compile(Regex_, options); - Scratch_ = MakeScratch(Database_); + private: + TUnboxedValue Run(const IValueBuilder*, + const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new THyperscanReplace(args[0], Pos_)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try { - if (args[0]) { - const std::string_view input(args[0].AsStringRef()); - const std::string_view replacement(args[1].AsStringRef()); + TSourcePosition Pos_; + }; - ui64 index = 0; - TStringBuilder result; - auto callback = [input, replacement, &index, &result] (TOptions id, ui64 from, ui64 to) { - Y_UNUSED(id); - if (index != from) { - result << input.substr(index, from - index); - } - result << replacement; - index = to; - }; - Scan(Database_, Scratch_, input, callback); + static const TStringRef& Name() { + static auto name = TStringRef::Of("Replace"); + return name; + } + + THyperscanReplace(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : Pos_(pos) + { + Regex_ = runConfig.AsStringRef(); + TOptions options = HS_FLAG_SOM_LEFTMOST; + + SetCommonOptions(Regex_, options); + + Database_ = Compile(Regex_, options); + Scratch_ = MakeScratch(Database_); + } - if (!index) { - return args[0]; +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + if (args[0]) { + const std::string_view input(args[0].AsStringRef()); + const std::string_view replacement(args[1].AsStringRef()); + + ui64 index = 0; + TStringBuilder result; + auto callback = [input, replacement, &index, &result](TOptions id, ui64 from, ui64 to) { + Y_UNUSED(id); + if (index != from) { + result << input.substr(index, from - index); } + result << replacement; + index = to; + }; + Scan(Database_, Scratch_, input, callback); - result << input.substr(index); - return valueBuilder->NewString(result); + if (!index) { + return args[0]; } - return TUnboxedValue(); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + result << input.substr(index); + return valueBuilder->NewString(result); } - TSourcePosition Pos_; - TString Regex_; - TDatabase Database_; - TScratch Scratch_; - }; + return TUnboxedValue(); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } - class THyperscanModule: public IUdfModule { - public: - TStringRef Name() const { - return TStringRef::Of("Hyperscan"); - } + TSourcePosition Pos_; + TString Regex_; + TDatabase Database_; + TScratch Scratch_; +}; - void CleanupOnTerminate() const final { - } +class THyperscanModule: public IUdfModule { +public: + TStringRef Name() const { + return TStringRef::Of("Hyperscan"); + } - void GetAllFunctions(IFunctionsSink& sink) const final { - sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::NORMAL)); - sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::NORMAL)); - sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::BACKTRACKING)); - sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::BACKTRACKING)); - sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::MULTI))->SetTypeAwareness(); - sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::MULTI))->SetTypeAwareness(); - sink.Add(THyperscanCapture::Name()); - sink.Add(THyperscanReplace::Name()); - } + void CleanupOnTerminate() const final { + } - void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final { - try { - Y_UNUSED(userType); + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::NORMAL)); + sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::NORMAL)); + sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::BACKTRACKING)); + sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::BACKTRACKING)); + sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::MULTI))->SetTypeAwareness(); + sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::MULTI))->SetTypeAwareness(); + sink.Add(THyperscanCapture::Name()); + sink.Add(THyperscanReplace::Name()); + } - bool typesOnly = (flags & TFlags::TypesOnly); - bool isMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::NORMAL) == name); - bool isGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::NORMAL) == name); - bool isBacktrackingMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::BACKTRACKING) == name); - bool isBacktrackingGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::BACKTRACKING) == name); - bool isMultiMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::MULTI) == name); - bool isMultiGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::MULTI) == name); + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { + try { + Y_UNUSED(userType); - if (isMatch || isGrep) { - builder.SimpleSignature<bool(TOptional<char*>)>() - .RunConfig<const char*>(); + bool typesOnly = (flags & TFlags::TypesOnly); + bool isMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::NORMAL) == name); + bool isGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::NORMAL) == name); + bool isBacktrackingMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::BACKTRACKING) == name); + bool isBacktrackingGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::BACKTRACKING) == name); + bool isMultiMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::MULTI) == name); + bool isMultiGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::MULTI) == name); - if (!typesOnly) { - builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isGrep, THyperscanMatch::EMode::NORMAL)); - } - } else if (isBacktrackingMatch || isBacktrackingGrep) { - builder.SimpleSignature<bool(TOptional<char*>)>() - .RunConfig<const char*>(); + if (isMatch || isGrep) { + builder.SimpleSignature<bool(TOptional<char*>)>() + .RunConfig<const char*>(); - if (!typesOnly) { - builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isBacktrackingGrep, THyperscanMatch::EMode::BACKTRACKING)); - } - } else if (isMultiMatch || isMultiGrep) { - auto boolType = builder.SimpleType<bool>(); - auto optionalStringType = builder.Optional()->Item<char*>().Build(); - const std::string_view regexp(typeConfig); - size_t regexpCount = std::count(regexp.begin(), regexp.end(), '\n') + 1; - auto tuple = builder.Tuple(); - for (size_t i = 0; i < regexpCount; ++i) { - tuple->Add(boolType); - } - auto tupleType = tuple->Build(); - builder.Args(1)->Add(optionalStringType).Done().Returns(tupleType).RunConfig<char*>(); + if (!typesOnly) { + builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isGrep, THyperscanMatch::EMode::NORMAL)); + } + } else if (isBacktrackingMatch || isBacktrackingGrep) { + builder.SimpleSignature<bool(TOptional<char*>)>() + .RunConfig<const char*>(); - if (!typesOnly) { - builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isMultiGrep, THyperscanMatch::EMode::MULTI, regexpCount)); - } - } else if (THyperscanCapture::Name() == name) { - builder.SimpleSignature<TOptional<char*>(TOptional<char*>)>() - .RunConfig<char*>(); + if (!typesOnly) { + builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isBacktrackingGrep, THyperscanMatch::EMode::BACKTRACKING)); + } + } else if (isMultiMatch || isMultiGrep) { + auto boolType = builder.SimpleType<bool>(); + auto optionalStringType = builder.Optional()->Item<char*>().Build(); + const std::string_view regexp(typeConfig); + size_t regexpCount = std::count(regexp.begin(), regexp.end(), '\n') + 1; + auto tuple = builder.Tuple(); + for (size_t i = 0; i < regexpCount; ++i) { + tuple->Add(boolType); + } + auto tupleType = tuple->Build(); + builder.Args(1)->Add(optionalStringType).Done().Returns(tupleType).RunConfig<char*>(); - if (!typesOnly) { - builder.Implementation(new THyperscanCapture::TFactory(builder.GetSourcePosition())); - } - } else if (THyperscanReplace::Name() == name) { - builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() - .RunConfig<char*>(); + if (!typesOnly) { + builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isMultiGrep, THyperscanMatch::EMode::MULTI, regexpCount)); + } + } else if (THyperscanCapture::Name() == name) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>)>() + .RunConfig<char*>(); - if (!typesOnly) { - builder.Implementation(new THyperscanReplace::TFactory(builder.GetSourcePosition())); - } + if (!typesOnly) { + builder.Implementation(new THyperscanCapture::TFactory(builder.GetSourcePosition())); + } + } else if (THyperscanReplace::Name() == name) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() + .RunConfig<char*>(); + + if (!typesOnly) { + builder.Implementation(new THyperscanReplace::TFactory(builder.GetSourcePosition())); } - } catch (const std::exception& e) { - builder.SetError(CurrentExceptionMessage()); } + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); } - }; + } +}; - class TPcreModule : public THyperscanModule { - public: - TStringRef Name() const { - return TStringRef::Of("Pcre"); - } - }; -} +class TPcreModule: public THyperscanModule { +public: + TStringRef Name() const { + return TStringRef::Of("Pcre"); + } +}; +} // namespace REGISTER_MODULES(THyperscanModule, TPcreModule) diff --git a/yql/essentials/udfs/common/hyperscan/ya.make b/yql/essentials/udfs/common/hyperscan/ya.make index 9217280ba76..574458d246a 100644 --- a/yql/essentials/udfs/common/hyperscan/ya.make +++ b/yql/essentials/udfs/common/hyperscan/ya.make @@ -14,6 +14,8 @@ YQL_UDF_CONTRIB(hyperscan_udf) 0 ) + ENABLE(YQL_STYLE_CPP) + SRCS( hyperscan_udf.cpp ) @@ -27,4 +29,5 @@ YQL_UDF_CONTRIB(hyperscan_udf) RECURSE_FOR_TESTS( test -)
\ No newline at end of file +) + diff --git a/yql/essentials/udfs/common/ip_base/ip_base.cpp b/yql/essentials/udfs/common/ip_base/ip_base.cpp index 1c017e2a5d2..fbab4c25941 100644 --- a/yql/essentials/udfs/common/ip_base/ip_base.cpp +++ b/yql/essentials/udfs/common/ip_base/ip_base.cpp @@ -4,4 +4,3 @@ SIMPLE_MODULE(TIpModule, EXPORTED_IP_BASE_UDF) REGISTER_MODULES(TIpModule) - diff --git a/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp index a0617e77283..dbd58d3e25e 100644 --- a/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp +++ b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp @@ -1 +1 @@ -#include "ip_base_udf.h"
\ No newline at end of file +#include "ip_base_udf.h" diff --git a/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h index 2bb4f987190..5e4e9cb9b00 100644 --- a/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h +++ b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h @@ -8,376 +8,369 @@ #include <util/generic/buffer.h> namespace { - using TAutoMapString = NKikimr::NUdf::TAutoMap<char*>; - using TAutoMapUint32 = NKikimr::NUdf::TAutoMap<ui32>; - using TOptionalString = NKikimr::NUdf::TOptional<char*>; - using TOptionalUint32 = NKikimr::NUdf::TOptional<ui32>; - using TOptionalByte = NKikimr::NUdf::TOptional<ui8>; - using TStringRef = NKikimr::NUdf::TStringRef; - using TUnboxedValue = NKikimr::NUdf::TUnboxedValue; - using TUnboxedValuePod = NKikimr::NUdf::TUnboxedValuePod; +using TAutoMapString = NKikimr::NUdf::TAutoMap<char*>; +using TAutoMapUint32 = NKikimr::NUdf::TAutoMap<ui32>; +using TOptionalString = NKikimr::NUdf::TOptional<char*>; +using TOptionalUint32 = NKikimr::NUdf::TOptional<ui32>; +using TOptionalByte = NKikimr::NUdf::TOptional<ui8>; +using TStringRef = NKikimr::NUdf::TStringRef; +using TUnboxedValue = NKikimr::NUdf::TUnboxedValue; +using TUnboxedValuePod = NKikimr::NUdf::TUnboxedValuePod; - ui8 GetAddressRangePrefix(const TIpAddressRange& range) { - if (range.Contains(TIpv6Address(ui128(0), TIpv6Address::Ipv6)) && range.Contains(TIpv6Address(ui128(-1), TIpv6Address::Ipv6))) { - return 0; - } - if (range.Size() == 0) { - return range.Type() == TIpv6Address::Ipv4 ? 32 : 128; - } - ui128 size = range.Size(); - size_t sizeLog = MostSignificantBit(size); - return ui8((range.Type() == TIpv6Address::Ipv4 ? 32 : 128) - sizeLog); +ui8 GetAddressRangePrefix(const TIpAddressRange& range) { + if (range.Contains(TIpv6Address(ui128(0), TIpv6Address::Ipv6)) && range.Contains(TIpv6Address(ui128(-1), TIpv6Address::Ipv6))) { + return 0; } + if (range.Size() == 0) { + return range.Type() == TIpv6Address::Ipv4 ? 32 : 128; + } + ui128 size = range.Size(); + size_t sizeLog = MostSignificantBit(size); + return ui8((range.Type() == TIpv6Address::Ipv4 ? 32 : 128) - sizeLog); +} - struct TRawIp4 { - ui8 A, B, C, D; - - static TRawIp4 FromIpAddress(const TIpv6Address& addr) { - ui128 x = addr; - return { - ui8(x >> 24 & 0xff), - ui8(x >> 16 & 0xff), - ui8(x >> 8 & 0xff), - ui8(x & 0xff) - }; - } - - static TRawIp4 MaskFromPrefix(ui8 prefix) { - ui128 x = ui128(-1) << int(32 - prefix); - x &= ui128(ui32(-1)); - return FromIpAddress({x, TIpv6Address::Ipv4}); - } +struct TRawIp4 { + ui8 A, B, C, D; - TIpv6Address ToIpAddress() const { - return {A, B, C, D}; - } + static TRawIp4 FromIpAddress(const TIpv6Address& addr) { + ui128 x = addr; + return { + ui8(x >> 24 & 0xff), + ui8(x >> 16 & 0xff), + ui8(x >> 8 & 0xff), + ui8(x & 0xff)}; + } - std::pair<TRawIp4, TRawIp4> ApplyMask(const TRawIp4& mask) const { - return {{ - ui8(A & mask.A), - ui8(B & mask.B), - ui8(C & mask.C), - ui8(D & mask.D) - },{ - ui8(A | ~mask.A), - ui8(B | ~mask.B), - ui8(C | ~mask.C), - ui8(D | ~mask.D) - }}; - } - }; + static TRawIp4 MaskFromPrefix(ui8 prefix) { + ui128 x = ui128(-1) << int(32 - prefix); + x &= ui128(ui32(-1)); + return FromIpAddress({x, TIpv6Address::Ipv4}); + } - struct TRawIp4Subnet { - TRawIp4 Base, Mask; + TIpv6Address ToIpAddress() const { + return {A, B, C, D}; + } - static TRawIp4Subnet FromIpRange(const TIpAddressRange& range) { - return {TRawIp4::FromIpAddress(*range.Begin()), TRawIp4::MaskFromPrefix(GetAddressRangePrefix(range))}; - } + std::pair<TRawIp4, TRawIp4> ApplyMask(const TRawIp4& mask) const { + return {{ui8(A & mask.A), + ui8(B & mask.B), + ui8(C & mask.C), + ui8(D & mask.D)}, + {ui8(A | ~mask.A), + ui8(B | ~mask.B), + ui8(C | ~mask.C), + ui8(D | ~mask.D)}}; + } +}; - TIpAddressRange ToIpRange() const { - auto range = Base.ApplyMask(Mask); - return {range.first.ToIpAddress(), range.second.ToIpAddress()}; - } - }; +struct TRawIp4Subnet { + TRawIp4 Base, Mask; - struct TRawIp6 { - ui8 A1, A0, B1, B0, C1, C0, D1, D0, E1, E0, F1, F0, G1, G0, H1, H0; + static TRawIp4Subnet FromIpRange(const TIpAddressRange& range) { + return {TRawIp4::FromIpAddress(*range.Begin()), TRawIp4::MaskFromPrefix(GetAddressRangePrefix(range))}; + } - static TRawIp6 FromIpAddress(const TIpv6Address& addr) { - ui128 x = addr; - return { - ui8(x >> 120 & 0xff), ui8(x >> 112 & 0xff), - ui8(x >> 104 & 0xff), ui8(x >> 96 & 0xff), - ui8(x >> 88 & 0xff), ui8(x >> 80 & 0xff), - ui8(x >> 72 & 0xff), ui8(x >> 64 & 0xff), - ui8(x >> 56 & 0xff), ui8(x >> 48 & 0xff), - ui8(x >> 40 & 0xff), ui8(x >> 32 & 0xff), - ui8(x >> 24 & 0xff), ui8(x >> 16 & 0xff), - ui8(x >> 8 & 0xff), ui8(x & 0xff) - }; - } + TIpAddressRange ToIpRange() const { + auto range = Base.ApplyMask(Mask); + return {range.first.ToIpAddress(), range.second.ToIpAddress()}; + } +}; - static TRawIp6 MaskFromPrefix(ui8 prefix) { - ui128 x = prefix == 0 ? ui128(0) : ui128(-1) << int(128 - prefix); - return FromIpAddress({x, TIpv6Address::Ipv6}); - } +struct TRawIp6 { + ui8 A1, A0, B1, B0, C1, C0, D1, D0, E1, E0, F1, F0, G1, G0, H1, H0; - TIpv6Address ToIpAddress() const { - return {ui16(ui32(A1) << ui32(8) | ui32(A0)), - ui16(ui32(B1) << ui32(8) | ui32(B0)), - ui16(ui32(C1) << ui32(8) | ui32(C0)), - ui16(ui32(D1) << ui32(8) | ui32(D0)), - ui16(ui32(E1) << ui32(8) | ui32(E0)), - ui16(ui32(F1) << ui32(8) | ui32(F0)), - ui16(ui32(G1) << ui32(8) | ui32(G0)), - ui16(ui32(H1) << ui32(8) | ui32(H0)), - }; - } + static TRawIp6 FromIpAddress(const TIpv6Address& addr) { + ui128 x = addr; + return { + ui8(x >> 120 & 0xff), ui8(x >> 112 & 0xff), + ui8(x >> 104 & 0xff), ui8(x >> 96 & 0xff), + ui8(x >> 88 & 0xff), ui8(x >> 80 & 0xff), + ui8(x >> 72 & 0xff), ui8(x >> 64 & 0xff), + ui8(x >> 56 & 0xff), ui8(x >> 48 & 0xff), + ui8(x >> 40 & 0xff), ui8(x >> 32 & 0xff), + ui8(x >> 24 & 0xff), ui8(x >> 16 & 0xff), + ui8(x >> 8 & 0xff), ui8(x & 0xff)}; + } - std::pair<TRawIp6, TRawIp6> ApplyMask(const TRawIp6& mask) const { - return { { - ui8(A1 & mask.A1), - ui8(A0 & mask.A0), - ui8(B1 & mask.B1), - ui8(B0 & mask.B0), - ui8(C1 & mask.C1), - ui8(C0 & mask.C0), - ui8(D1 & mask.D1), - ui8(D0 & mask.D0), - ui8(E1 & mask.E1), - ui8(E0 & mask.E0), - ui8(F1 & mask.F1), - ui8(F0 & mask.F0), - ui8(G1 & mask.G1), - ui8(G0 & mask.G0), - ui8(H1 & mask.H1), - ui8(H0 & mask.H0) - }, { - ui8(A1 | ~mask.A1), - ui8(A0 | ~mask.A0), - ui8(B1 | ~mask.B1), - ui8(B0 | ~mask.B0), - ui8(C1 | ~mask.C1), - ui8(C0 | ~mask.C0), - ui8(D1 | ~mask.D1), - ui8(D0 | ~mask.D0), - ui8(E1 | ~mask.E1), - ui8(E0 | ~mask.E0), - ui8(F1 | ~mask.F1), - ui8(F0 | ~mask.F0), - ui8(G1 | ~mask.G1), - ui8(G0 | ~mask.G0), - ui8(H1 | ~mask.H1), - ui8(H0 | ~mask.H0) - }}; - } - }; + static TRawIp6 MaskFromPrefix(ui8 prefix) { + ui128 x = prefix == 0 ? ui128(0) : ui128(-1) << int(128 - prefix); + return FromIpAddress({x, TIpv6Address::Ipv6}); + } - struct TRawIp6Subnet { - TRawIp6 Base, Mask; + TIpv6Address ToIpAddress() const { + return { + ui16(ui32(A1) << ui32(8) | ui32(A0)), + ui16(ui32(B1) << ui32(8) | ui32(B0)), + ui16(ui32(C1) << ui32(8) | ui32(C0)), + ui16(ui32(D1) << ui32(8) | ui32(D0)), + ui16(ui32(E1) << ui32(8) | ui32(E0)), + ui16(ui32(F1) << ui32(8) | ui32(F0)), + ui16(ui32(G1) << ui32(8) | ui32(G0)), + ui16(ui32(H1) << ui32(8) | ui32(H0)), + }; + } - static TRawIp6Subnet FromIpRange(const TIpAddressRange& range) { - return {TRawIp6::FromIpAddress(*range.Begin()), TRawIp6::MaskFromPrefix(GetAddressRangePrefix(range))}; - } + std::pair<TRawIp6, TRawIp6> ApplyMask(const TRawIp6& mask) const { + return {{ui8(A1 & mask.A1), + ui8(A0 & mask.A0), + ui8(B1 & mask.B1), + ui8(B0 & mask.B0), + ui8(C1 & mask.C1), + ui8(C0 & mask.C0), + ui8(D1 & mask.D1), + ui8(D0 & mask.D0), + ui8(E1 & mask.E1), + ui8(E0 & mask.E0), + ui8(F1 & mask.F1), + ui8(F0 & mask.F0), + ui8(G1 & mask.G1), + ui8(G0 & mask.G0), + ui8(H1 & mask.H1), + ui8(H0 & mask.H0)}, + {ui8(A1 | ~mask.A1), + ui8(A0 | ~mask.A0), + ui8(B1 | ~mask.B1), + ui8(B0 | ~mask.B0), + ui8(C1 | ~mask.C1), + ui8(C0 | ~mask.C0), + ui8(D1 | ~mask.D1), + ui8(D0 | ~mask.D0), + ui8(E1 | ~mask.E1), + ui8(E0 | ~mask.E0), + ui8(F1 | ~mask.F1), + ui8(F0 | ~mask.F0), + ui8(G1 | ~mask.G1), + ui8(G0 | ~mask.G0), + ui8(H1 | ~mask.H1), + ui8(H0 | ~mask.H0)}}; + } +}; - TIpAddressRange ToIpRange() const { - auto range = Base.ApplyMask(Mask); - return {range.first.ToIpAddress(), range.second.ToIpAddress()}; - } - }; +struct TRawIp6Subnet { + TRawIp6 Base, Mask; - TIpv6Address DeserializeAddress(const TStringRef& str) { - TIpv6Address addr; - if (str.Size() == 4) { - TRawIp4 addr4; - memcpy(&addr4, str.Data(), sizeof addr4); - addr = addr4.ToIpAddress(); - } else if (str.Size() == 16) { - TRawIp6 addr6; - memcpy(&addr6, str.Data(), sizeof addr6); - addr = addr6.ToIpAddress(); - } else { - ythrow yexception() << "Incorrect size of input, expected " - << "4 or 16, got " << str.Size(); - } - return addr; + static TRawIp6Subnet FromIpRange(const TIpAddressRange& range) { + return {TRawIp6::FromIpAddress(*range.Begin()), TRawIp6::MaskFromPrefix(GetAddressRangePrefix(range))}; } - TIpAddressRange DeserializeSubnet(const TStringRef& str) { - TIpAddressRange range; - if (str.Size() == sizeof(TRawIp4Subnet)) { - TRawIp4Subnet subnet4; - memcpy(&subnet4, str.Data(), sizeof subnet4); - range = subnet4.ToIpRange(); - } else if (str.Size() == sizeof(TRawIp6Subnet)) { - TRawIp6Subnet subnet6; - memcpy(&subnet6, str.Data(), sizeof subnet6); - range = subnet6.ToIpRange(); - } else { - ythrow yexception() << "Invalid binary representation"; - } - return range; + TIpAddressRange ToIpRange() const { + auto range = Base.ApplyMask(Mask); + return {range.first.ToIpAddress(), range.second.ToIpAddress()}; } +}; - TString SerializeAddress(const TIpv6Address& addr) { - Y_ENSURE(addr.Type() == TIpv6Address::Ipv4 || addr.Type() == TIpv6Address::Ipv6); - TString res; - if (addr.Type() == TIpv6Address::Ipv4) { - auto addr4 = TRawIp4::FromIpAddress(addr); - res = TString(reinterpret_cast<const char *>(&addr4), sizeof addr4); - } else if (addr.Type() == TIpv6Address::Ipv6) { - auto addr6 = TRawIp6::FromIpAddress(addr); - res = TString(reinterpret_cast<const char *>(&addr6), sizeof addr6); - } - return res; +TIpv6Address DeserializeAddress(const TStringRef& str) { + TIpv6Address addr; + if (str.Size() == 4) { + TRawIp4 addr4; + memcpy(&addr4, str.Data(), sizeof addr4); + addr = addr4.ToIpAddress(); + } else if (str.Size() == 16) { + TRawIp6 addr6; + memcpy(&addr6, str.Data(), sizeof addr6); + addr = addr6.ToIpAddress(); + } else { + ythrow yexception() << "Incorrect size of input, expected " + << "4 or 16, got " << str.Size(); } + return addr; +} - TString SerializeSubnet(const TIpAddressRange& range) { - TString res; - if (range.Type() == TIpv6Address::Ipv4) { - auto subnet4 = TRawIp4Subnet::FromIpRange(range); - res = TString(reinterpret_cast<const char *>(&subnet4), sizeof subnet4); - } else if (range.Type() == TIpv6Address::Ipv6) { - auto subnet6 = TRawIp6Subnet::FromIpRange(range); - res = TString(reinterpret_cast<const char *>(&subnet6), sizeof subnet6); - } - return res; +TIpAddressRange DeserializeSubnet(const TStringRef& str) { + TIpAddressRange range; + if (str.Size() == sizeof(TRawIp4Subnet)) { + TRawIp4Subnet subnet4; + memcpy(&subnet4, str.Data(), sizeof subnet4); + range = subnet4.ToIpRange(); + } else if (str.Size() == sizeof(TRawIp6Subnet)) { + TRawIp6Subnet subnet6; + memcpy(&subnet6, str.Data(), sizeof subnet6); + range = subnet6.ToIpRange(); + } else { + ythrow yexception() << "Invalid binary representation"; } + return range; +} - SIMPLE_STRICT_UDF(TFromString, TOptionalString(TAutoMapString)) { - TIpv6Address addr = TIpv6Address::FromString(args[0].AsStringRef()); - if (addr.Type() != TIpv6Address::Ipv4 && addr.Type() != TIpv6Address::Ipv6) { - return TUnboxedValue(); - } - return valueBuilder->NewString(SerializeAddress(addr)); +TString SerializeAddress(const TIpv6Address& addr) { + Y_ENSURE(addr.Type() == TIpv6Address::Ipv4 || addr.Type() == TIpv6Address::Ipv6); + TString res; + if (addr.Type() == TIpv6Address::Ipv4) { + auto addr4 = TRawIp4::FromIpAddress(addr); + res = TString(reinterpret_cast<const char*>(&addr4), sizeof addr4); + } else if (addr.Type() == TIpv6Address::Ipv6) { + auto addr6 = TRawIp6::FromIpAddress(addr); + res = TString(reinterpret_cast<const char*>(&addr6), sizeof addr6); } + return res; +} - SIMPLE_STRICT_UDF_OPTIONS(TIpv4FromUint32, char*(TAutoMapUint32), builder.SetMinLangVer(NYql::MakeLangVersion(2025, 3))) { - // in_addr expects bytes in network byte order. - in_addr addr; - addr.s_addr = htonl(args[0].Get<ui32>()); - return valueBuilder->NewString(SerializeAddress(TIpv6Address{addr})); +TString SerializeSubnet(const TIpAddressRange& range) { + TString res; + if (range.Type() == TIpv6Address::Ipv4) { + auto subnet4 = TRawIp4Subnet::FromIpRange(range); + res = TString(reinterpret_cast<const char*>(&subnet4), sizeof subnet4); + } else if (range.Type() == TIpv6Address::Ipv6) { + auto subnet6 = TRawIp6Subnet::FromIpRange(range); + res = TString(reinterpret_cast<const char*>(&subnet6), sizeof subnet6); } + return res; +} - SIMPLE_STRICT_UDF(TSubnetFromString, TOptionalString(TAutoMapString)) { - TIpAddressRange range = TIpAddressRange::FromCompactString(args[0].AsStringRef()); - auto res = SerializeSubnet(range); - return res ? valueBuilder->NewString(res) : TUnboxedValue(TUnboxedValuePod()); +SIMPLE_STRICT_UDF(TFromString, TOptionalString(TAutoMapString)) { + TIpv6Address addr = TIpv6Address::FromString(args[0].AsStringRef()); + if (addr.Type() != TIpv6Address::Ipv4 && addr.Type() != TIpv6Address::Ipv6) { + return TUnboxedValue(); } + return valueBuilder->NewString(SerializeAddress(addr)); +} - SIMPLE_UDF(TToString, char*(TAutoMapString)) { - return valueBuilder->NewString(DeserializeAddress(args[0].AsStringRef()).ToString(false)); - } +SIMPLE_STRICT_UDF_OPTIONS(TIpv4FromUint32, char*(TAutoMapUint32), builder.SetMinLangVer(NYql::MakeLangVersion(2025, 3))) { + // in_addr expects bytes in network byte order. + in_addr addr; + addr.s_addr = htonl(args[0].Get<ui32>()); + return valueBuilder->NewString(SerializeAddress(TIpv6Address{addr})); +} - SIMPLE_UDF_OPTIONS(TIpv4ToUint32, TOptionalUint32(TAutoMapString), builder.SetMinLangVer(NYql::MakeLangVersion(2025, 3))) { - Y_UNUSED(valueBuilder); - TIpv6Address addr = DeserializeAddress(args[0].AsStringRef()); - if (addr.Type() != TIpv6Address::Ipv4) { - return TUnboxedValue(); - } +SIMPLE_STRICT_UDF(TSubnetFromString, TOptionalString(TAutoMapString)) { + TIpAddressRange range = TIpAddressRange::FromCompactString(args[0].AsStringRef()); + auto res = SerializeSubnet(range); + return res ? valueBuilder->NewString(res) : TUnboxedValue(TUnboxedValuePod()); +} - in_addr tmp; - addr.ToInAddr(tmp); - ui32 ret = ntohl(tmp.s_addr); - return TUnboxedValuePod(ret); - } +SIMPLE_UDF(TToString, char*(TAutoMapString)) { + return valueBuilder->NewString(DeserializeAddress(args[0].AsStringRef()).ToString(false)); +} - SIMPLE_UDF(TSubnetToString, char*(TAutoMapString)) { - TStringBuilder result; - auto range = DeserializeSubnet(args[0].AsStringRef()); - result << (*range.Begin()).ToString(false); - result << '/'; - result << ToString(GetAddressRangePrefix(range)); - return valueBuilder->NewString(result); +SIMPLE_UDF_OPTIONS(TIpv4ToUint32, TOptionalUint32(TAutoMapString), builder.SetMinLangVer(NYql::MakeLangVersion(2025, 3))) { + Y_UNUSED(valueBuilder); + TIpv6Address addr = DeserializeAddress(args[0].AsStringRef()); + if (addr.Type() != TIpv6Address::Ipv4) { + return TUnboxedValue(); } - SIMPLE_UDF(TSubnetMatch, bool(TAutoMapString, TAutoMapString)) { - Y_UNUSED(valueBuilder); - auto range1 = DeserializeSubnet(args[0].AsStringRef()); - if (args[1].AsStringRef().Size() == sizeof(TRawIp4) || args[1].AsStringRef().Size() == sizeof(TRawIp6)) { - auto addr2 = DeserializeAddress(args[1].AsStringRef()); - return TUnboxedValuePod(range1.Contains(addr2)); - } else { // second argument is a whole subnet, not a single address - auto range2 = DeserializeSubnet(args[1].AsStringRef()); - return TUnboxedValuePod(range1.Contains(range2)); - } - } + in_addr tmp; + addr.ToInAddr(tmp); + ui32 ret = ntohl(tmp.s_addr); + return TUnboxedValuePod(ret); +} - SIMPLE_STRICT_UDF(TIsIPv4, bool(TOptionalString)) { - Y_UNUSED(valueBuilder); - bool result = false; - if (args[0]) { - const auto ref = args[0].AsStringRef(); - result = ref.Size() == 4; - } - return TUnboxedValuePod(result); +SIMPLE_UDF(TSubnetToString, char*(TAutoMapString)) { + TStringBuilder result; + auto range = DeserializeSubnet(args[0].AsStringRef()); + result << (*range.Begin()).ToString(false); + result << '/'; + result << ToString(GetAddressRangePrefix(range)); + return valueBuilder->NewString(result); +} + +SIMPLE_UDF(TSubnetMatch, bool(TAutoMapString, TAutoMapString)) { + Y_UNUSED(valueBuilder); + auto range1 = DeserializeSubnet(args[0].AsStringRef()); + if (args[1].AsStringRef().Size() == sizeof(TRawIp4) || args[1].AsStringRef().Size() == sizeof(TRawIp6)) { + auto addr2 = DeserializeAddress(args[1].AsStringRef()); + return TUnboxedValuePod(range1.Contains(addr2)); + } else { // second argument is a whole subnet, not a single address + auto range2 = DeserializeSubnet(args[1].AsStringRef()); + return TUnboxedValuePod(range1.Contains(range2)); } +} - SIMPLE_STRICT_UDF(TIsIPv6, bool(TOptionalString)) { - Y_UNUSED(valueBuilder); - bool result = false; - if (args[0]) { - const auto ref = args[0].AsStringRef(); - result = ref.Size() == 16; - } - return TUnboxedValuePod(result); +SIMPLE_STRICT_UDF(TIsIPv4, bool(TOptionalString)) { + Y_UNUSED(valueBuilder); + bool result = false; + if (args[0]) { + const auto ref = args[0].AsStringRef(); + result = ref.Size() == 4; } + return TUnboxedValuePod(result); +} - SIMPLE_STRICT_UDF(TIsEmbeddedIPv4, bool(TOptionalString)) { - Y_UNUSED(valueBuilder); - bool result = false; - if (args[0]) { - const auto ref = args[0].AsStringRef(); - if (ref.Size() == 16) { - result = DeserializeAddress(ref).Isv4MappedTov6(); - } - } - return TUnboxedValuePod(result); +SIMPLE_STRICT_UDF(TIsIPv6, bool(TOptionalString)) { + Y_UNUSED(valueBuilder); + bool result = false; + if (args[0]) { + const auto ref = args[0].AsStringRef(); + result = ref.Size() == 16; } + return TUnboxedValuePod(result); +} - SIMPLE_UDF(TConvertToIPv6, char*(TAutoMapString)) { - const auto& ref = args[0].AsStringRef(); +SIMPLE_STRICT_UDF(TIsEmbeddedIPv4, bool(TOptionalString)) { + Y_UNUSED(valueBuilder); + bool result = false; + if (args[0]) { + const auto ref = args[0].AsStringRef(); if (ref.Size() == 16) { - return valueBuilder->NewString(ref); - } else if (ref.Size() == 4) { - TIpv6Address addr4 = DeserializeAddress(ref); - auto addr6 = TIpv6Address(ui128(addr4) | ui128(0xFFFF) << 32, TIpv6Address::Ipv6); - return valueBuilder->NewString(SerializeAddress(addr6)); - } else { - ythrow yexception() << "Incorrect size of input, expected " - << "4 or 16, got " << ref.Size(); + result = DeserializeAddress(ref).Isv4MappedTov6(); } } + return TUnboxedValuePod(result); +} - SIMPLE_UDF_WITH_OPTIONAL_ARGS(TGetSubnet, char*(TAutoMapString, TOptionalByte), 1) { - const auto ref = args[0].AsStringRef(); - ui8 subnetSize = args[1].GetOrDefault<ui8>(0); - TIpv6Address addr = DeserializeAddress(ref); - if (ref.Size() == 4) { - if (!subnetSize) { - subnetSize = 24; - } - if (subnetSize > 32) { - subnetSize = 32; - } - } else if (ref.Size() == 16) { - if (!subnetSize) { - subnetSize = 64; - } - if (subnetSize > 128) { - subnetSize = 128; - } - } else { - ythrow yexception() << "Incorrect size of input, expected " - << "4 or 16, got " << ref.Size(); - } - TIpv6Address beg = LowerBoundForPrefix(addr, subnetSize); - return valueBuilder->NewString(SerializeAddress(beg)); +SIMPLE_UDF(TConvertToIPv6, char*(TAutoMapString)) { + const auto& ref = args[0].AsStringRef(); + if (ref.Size() == 16) { + return valueBuilder->NewString(ref); + } else if (ref.Size() == 4) { + TIpv6Address addr4 = DeserializeAddress(ref); + auto addr6 = TIpv6Address(ui128(addr4) | ui128(0xFFFF) << 32, TIpv6Address::Ipv6); + return valueBuilder->NewString(SerializeAddress(addr6)); + } else { + ythrow yexception() << "Incorrect size of input, expected " + << "4 or 16, got " << ref.Size(); } +} - SIMPLE_UDF(TGetSubnetByMask, char*(TAutoMapString, TAutoMapString)) { - const auto refBase = args[0].AsStringRef(); - const auto refMask = args[1].AsStringRef(); - TIpv6Address addrBase = DeserializeAddress(refBase); - TIpv6Address addrMask = DeserializeAddress(refMask); - if (addrBase.Type() != addrMask.Type()) { - ythrow yexception() << "Base and mask differ in length"; +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TGetSubnet, char*(TAutoMapString, TOptionalByte), 1) { + const auto ref = args[0].AsStringRef(); + ui8 subnetSize = args[1].GetOrDefault<ui8>(0); + TIpv6Address addr = DeserializeAddress(ref); + if (ref.Size() == 4) { + if (!subnetSize) { + subnetSize = 24; } - return valueBuilder->NewString(SerializeAddress(TIpv6Address(ui128(addrBase) & ui128(addrMask), addrBase.Type()))); + if (subnetSize > 32) { + subnetSize = 32; + } + } else if (ref.Size() == 16) { + if (!subnetSize) { + subnetSize = 64; + } + if (subnetSize > 128) { + subnetSize = 128; + } + } else { + ythrow yexception() << "Incorrect size of input, expected " + << "4 or 16, got " << ref.Size(); + } + TIpv6Address beg = LowerBoundForPrefix(addr, subnetSize); + return valueBuilder->NewString(SerializeAddress(beg)); +} + +SIMPLE_UDF(TGetSubnetByMask, char*(TAutoMapString, TAutoMapString)) { + const auto refBase = args[0].AsStringRef(); + const auto refMask = args[1].AsStringRef(); + TIpv6Address addrBase = DeserializeAddress(refBase); + TIpv6Address addrMask = DeserializeAddress(refMask); + if (addrBase.Type() != addrMask.Type()) { + ythrow yexception() << "Base and mask differ in length"; } + return valueBuilder->NewString(SerializeAddress(TIpv6Address(ui128(addrBase) & ui128(addrMask), addrBase.Type()))); +} #define EXPORTED_IP_BASE_UDF \ - TFromString, \ - TIpv4FromUint32, \ - TSubnetFromString, \ - TToString, \ - TIpv4ToUint32, \ - TSubnetToString, \ - TIsIPv4, \ - TIsIPv6, \ - TIsEmbeddedIPv4, \ - TConvertToIPv6, \ - TGetSubnet, \ - TSubnetMatch, \ - TGetSubnetByMask -} + TFromString, \ + TIpv4FromUint32, \ + TSubnetFromString, \ + TToString, \ + TIpv4ToUint32, \ + TSubnetToString, \ + TIsIPv4, \ + TIsIPv6, \ + TIsEmbeddedIPv4, \ + TConvertToIPv6, \ + TGetSubnet, \ + TSubnetMatch, \ + TGetSubnetByMask +} // namespace diff --git a/yql/essentials/udfs/common/ip_base/lib/ya.make b/yql/essentials/udfs/common/ip_base/lib/ya.make index ab9b2bce8e2..3587d2b6192 100644 --- a/yql/essentials/udfs/common/ip_base/lib/ya.make +++ b/yql/essentials/udfs/common/ip_base/lib/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( ip_base_udf.cpp ) diff --git a/yql/essentials/udfs/common/ip_base/ya.make b/yql/essentials/udfs/common/ip_base/ya.make index 0a2859c0af2..86c40dd2698 100644 --- a/yql/essentials/udfs/common/ip_base/ya.make +++ b/yql/essentials/udfs/common/ip_base/ya.make @@ -6,6 +6,8 @@ YQL_UDF_CONTRIB(ip_udf) 0 ) + ENABLE(YQL_STYLE_CPP) + SRCS( ip_base.cpp ) diff --git a/yql/essentials/udfs/common/json/json_udf.cpp b/yql/essentials/udfs/common/json/json_udf.cpp index 3a7916bed74..92316947df6 100644 --- a/yql/essentials/udfs/common/json/json_udf.cpp +++ b/yql/essentials/udfs/common/json/json_udf.cpp @@ -6,113 +6,113 @@ using namespace NKikimr; using namespace NUdf; namespace { - class TGetField: public TBoxedValue { - public: - typedef bool TTypeAwareMarker; +class TGetField: public TBoxedValue { +public: + typedef bool TTypeAwareMarker; - public: - static TStringRef Name() { - return TStringRef::Of("GetField"); - } - - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - if (!args[0]) { - return valueBuilder->NewEmptyList(); - } +public: + static TStringRef Name() { + return TStringRef::Of("GetField"); + } - const TString json(args[0].AsStringRef()); - const TString field(args[1].AsStringRef()); + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + if (!args[0]) { + return valueBuilder->NewEmptyList(); + } - if (field.empty()) { - return valueBuilder->NewEmptyList(); - } + const TString json(args[0].AsStringRef()); + const TString field(args[1].AsStringRef()); - NJson::TJsonParser parser; - parser.AddField(field, false); + if (field.empty()) { + return valueBuilder->NewEmptyList(); + } - TVector<TString> result; - parser.Parse(json, &result); + NJson::TJsonParser parser; + parser.AddField(field, false); - TUnboxedValue* items = nullptr; - const auto list = valueBuilder->NewArray(result.size(), items); - for (const TString& item : result) { - *items++ = valueBuilder->NewString(item); - } + TVector<TString> result; + parser.Parse(json, &result); - return list; + TUnboxedValue* items = nullptr; + const auto list = valueBuilder->NewArray(result.size(), items); + for (const TString& item : result) { + *items++ = valueBuilder->NewString(item); } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - if (Name() == name) { - bool useString = true; - bool isOptional = true; - if (userType) { - // support of an overload with Json/Json? input type - auto typeHelper = builder.TypeInfoHelper(); - auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); - if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) { - builder.SetError("Missing or invalid user type."); - return true; - } - - auto argsTypeTuple = userTypeInspector.GetElementType(0); - auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); - if (!argsTypeInspector) { - builder.SetError("Invalid user type - expected tuple."); - return true; - } + return list; + } - if (argsTypeInspector.GetElementsCount() != 2) { - builder.SetError("Invalid user type - expected two arguments."); - return true; - } + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + if (Name() == name) { + bool useString = true; + bool isOptional = true; + if (userType) { + // support of an overload with Json/Json? input type + auto typeHelper = builder.TypeInfoHelper(); + auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) { + builder.SetError("Missing or invalid user type."); + return true; + } - auto inputType = argsTypeInspector.GetElementType(0); - auto optInspector = TOptionalTypeInspector(*typeHelper, inputType); - auto dataType = inputType; - if (optInspector) { - dataType = optInspector.GetItemType(); - } else { - isOptional = false; - } + auto argsTypeTuple = userTypeInspector.GetElementType(0); + auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); + if (!argsTypeInspector) { + builder.SetError("Invalid user type - expected tuple."); + return true; + } - auto dataInspector = TDataTypeInspector(*typeHelper, dataType); - if (dataInspector && dataInspector.GetTypeId() == TDataType<TJson>::Id) { - useString = false; - builder.UserType(userType); - } + if (argsTypeInspector.GetElementsCount() != 2) { + builder.SetError("Invalid user type - expected two arguments."); + return true; } - auto retType = builder.List()->Item<char*>().Build(); - if (useString) { - builder.Args()->Add(builder.Optional()->Item<char*>().Build()).Add<char*>().Done().Returns(retType); + auto inputType = argsTypeInspector.GetElementType(0); + auto optInspector = TOptionalTypeInspector(*typeHelper, inputType); + auto dataType = inputType; + if (optInspector) { + dataType = optInspector.GetItemType(); } else { - auto type = builder.SimpleType<TJson>(); - if (isOptional) { - builder.Args()->Add(builder.Optional()->Item(type).Build()).Add<char*>().Done().Returns(retType); - } else { - builder.Args()->Add(type).Add<char*>().Done().Returns(retType); - } + isOptional = false; } - if (!typesOnly) { - builder.Implementation(new TGetField); + auto dataInspector = TDataTypeInspector(*typeHelper, dataType); + if (dataInspector && dataInspector.GetTypeId() == TDataType<TJson>::Id) { + useString = false; + builder.UserType(userType); } + } - builder.IsStrict(); - return true; + auto retType = builder.List()->Item<char*>().Build(); + if (useString) { + builder.Args()->Add(builder.Optional()->Item<char*>().Build()).Add<char*>().Done().Returns(retType); } else { - return false; + auto type = builder.SimpleType<TJson>(); + if (isOptional) { + builder.Args()->Add(builder.Optional()->Item(type).Build()).Add<char*>().Done().Returns(retType); + } else { + builder.Args()->Add(type).Add<char*>().Done().Returns(retType); + } + } + + if (!typesOnly) { + builder.Implementation(new TGetField); } + + builder.IsStrict(); + return true; + } else { + return false; } - }; -} + } +}; +} // namespace SIMPLE_MODULE(TJsonModule, TGetField) diff --git a/yql/essentials/udfs/common/json/ya.make b/yql/essentials/udfs/common/json/ya.make index 689714e306b..d45fe60e5f0 100644 --- a/yql/essentials/udfs/common/json/ya.make +++ b/yql/essentials/udfs/common/json/ya.make @@ -6,6 +6,8 @@ YQL_UDF_CONTRIB(json_udf) 0 ) + ENABLE(YQL_STYLE_CPP) + SRCS( json_udf.cpp ) diff --git a/yql/essentials/udfs/common/json2/as_json_node.h b/yql/essentials/udfs/common/json2/as_json_node.h index 6060f03bea8..82c51802433 100644 --- a/yql/essentials/udfs/common/json2/as_json_node.h +++ b/yql/essentials/udfs/common/json2/as_json_node.h @@ -8,108 +8,107 @@ #include <yql/essentials/minikql/dom/json.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; +using namespace NDom; - template <typename TSource> - class TAsJsonNode: public TBoxedValue { - public: - TAsJsonNode(TSourcePosition pos) - : Pos_(pos) - { - } - - static TStringRef Name(); +template <typename TSource> +class TAsJsonNode: public TBoxedValue { +public: + TAsJsonNode(TSourcePosition pos) + : Pos_(pos) + { + } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } + static TStringRef Name(); - auto optionalSourceType = builder.Optional()->Item<TSource>().Build(); - auto resourceType = builder.Resource(JSON_NODE_RESOURCE_NAME); - builder.Args() - ->Add(optionalSourceType) - .Done() - .Returns(resourceType); + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } - if (!typesOnly) { - builder.Implementation(new TAsJsonNode<TSource>(builder.GetSourcePosition())); - } + auto optionalSourceType = builder.Optional()->Item<TSource>().Build(); + auto resourceType = builder.Resource(JSON_NODE_RESOURCE_NAME); + builder.Args() + ->Add(optionalSourceType) + .Done() + .Returns(resourceType); - builder.IsStrict(); - return true; + if (!typesOnly) { + builder.Implementation(new TAsJsonNode<TSource>(builder.GetSourcePosition())); } - private: - const size_t MaxParseErrors_ = 10; + builder.IsStrict(); + return true; + } + +private: + const size_t MaxParseErrors_ = 10; - static TUnboxedValue Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder); + static TUnboxedValue Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder); - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - if (!args[0].HasValue()) { - return MakeEntity(); - } - return Interpret(args[0], valueBuilder); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + if (!args[0].HasValue()) { + return MakeEntity(); } + return Interpret(args[0], valueBuilder); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - - TSourcePosition Pos_; - }; - - template <> - TStringRef TAsJsonNode<TUtf8>::Name() { - return TStringRef::Of("Utf8AsJsonNode"); } - template <> - TUnboxedValue TAsJsonNode<TUtf8>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { - return MakeString(sourceValue.AsStringRef(), valueBuilder); - } + TSourcePosition Pos_; +}; - template <> - TStringRef TAsJsonNode<double>::Name() { - return TStringRef::Of("DoubleAsJsonNode"); - } +template <> +TStringRef TAsJsonNode<TUtf8>::Name() { + return TStringRef::Of("Utf8AsJsonNode"); +} - template <> - TUnboxedValue TAsJsonNode<double>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { - Y_UNUSED(valueBuilder); - return MakeDouble(sourceValue.Get<double>()); - } +template <> +TUnboxedValue TAsJsonNode<TUtf8>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + return MakeString(sourceValue.AsStringRef(), valueBuilder); +} - template <> - TStringRef TAsJsonNode<bool>::Name() { - return TStringRef::Of("BoolAsJsonNode"); - } +template <> +TStringRef TAsJsonNode<double>::Name() { + return TStringRef::Of("DoubleAsJsonNode"); +} - template <> - TUnboxedValue TAsJsonNode<bool>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { - Y_UNUSED(valueBuilder); - return MakeBool(sourceValue.Get<bool>()); - } +template <> +TUnboxedValue TAsJsonNode<double>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + Y_UNUSED(valueBuilder); + return MakeDouble(sourceValue.Get<double>()); +} - template <> - TStringRef TAsJsonNode<TJson>::Name() { - return TStringRef::Of("JsonAsJsonNode"); - } +template <> +TStringRef TAsJsonNode<bool>::Name() { + return TStringRef::Of("BoolAsJsonNode"); +} - template <> - TUnboxedValue TAsJsonNode<TJson>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { - return TryParseJsonDom(sourceValue.AsStringRef(), valueBuilder); - } +template <> +TUnboxedValue TAsJsonNode<bool>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + Y_UNUSED(valueBuilder); + return MakeBool(sourceValue.Get<bool>()); } +template <> +TStringRef TAsJsonNode<TJson>::Name() { + return TStringRef::Of("JsonAsJsonNode"); +} + +template <> +TUnboxedValue TAsJsonNode<TJson>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + return TryParseJsonDom(sourceValue.AsStringRef(), valueBuilder); +} +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/compile_path.h b/yql/essentials/udfs/common/json2/compile_path.h index 220bd4fbaf6..f932c6df60c 100644 --- a/yql/essentials/udfs/common/json2/compile_path.h +++ b/yql/essentials/udfs/common/json2/compile_path.h @@ -6,65 +6,64 @@ #include <yql/essentials/public/udf/udf_helpers.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; - class TCompilePath: public TBoxedValue { - public: - TCompilePath(TSourcePosition pos) - : Pos_(pos) - { - } +class TCompilePath: public TBoxedValue { +public: + TCompilePath(TSourcePosition pos) + : Pos_(pos) + { + } - static const TStringRef& Name() { - static auto name = TStringRef::Of("CompilePath"); - return name; - } + static const TStringRef& Name() { + static auto name = TStringRef::Of("CompilePath"); + return name; + } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } - auto resourceType = builder.Resource(JSONPATH_RESOURCE_NAME); - builder.Args() - ->Add<NUdf::TUtf8>() - .Done() - .Returns(resourceType); + auto resourceType = builder.Resource(JSONPATH_RESOURCE_NAME); + builder.Args() + ->Add<NUdf::TUtf8>() + .Done() + .Returns(resourceType); - if (!typesOnly) { - builder.Implementation(new TCompilePath(builder.GetSourcePosition())); - } - return true; + if (!typesOnly) { + builder.Implementation(new TCompilePath(builder.GetSourcePosition())); } + return true; + } - private: - const size_t MaxParseErrors_ = 10; - - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - TIssues issues; - const auto jsonPath = NJsonPath::ParseJsonPath(args[0].AsStringRef(), issues, MaxParseErrors_); - if (!issues.Empty()) { - ythrow yexception() << "Error parsing jsonpath:" << Endl << issues.ToString(); - } +private: + const size_t MaxParseErrors_ = 10; - return TUnboxedValuePod(new TJsonPathResource(jsonPath)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + TIssues issues; + const auto jsonPath = NJsonPath::ParseJsonPath(args[0].AsStringRef(), issues, MaxParseErrors_); + if (!issues.Empty()) { + ythrow yexception() << "Error parsing jsonpath:" << Endl << issues.ToString(); } - } - TSourcePosition Pos_; - }; -} + return TUnboxedValuePod(new TJsonPathResource(jsonPath)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + } + TSourcePosition Pos_; +}; +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/json2_udf.cpp b/yql/essentials/udfs/common/json2/json2_udf.cpp index 96ef6ccf00b..76c4ef786db 100644 --- a/yql/essentials/udfs/common/json2/json2_udf.cpp +++ b/yql/essentials/udfs/common/json2/json2_udf.cpp @@ -9,35 +9,35 @@ #include <yql/essentials/public/udf/udf_helpers.h> namespace NJson2Udf { - SIMPLE_MODULE(TJson2Module, - TParse, - TSerialize<EDataSlot::Json>, - TSerialize<EDataSlot::JsonDocument>, - TCompilePath, - TSqlValue<EDataSlot::Json, TUtf8>, - TSqlValue<EDataSlot::Json, TUtf8, true>, - TSqlValue<EDataSlot::Json, i64>, - TSqlValue<EDataSlot::Json, double>, - TSqlValue<EDataSlot::Json, bool>, - TSqlValue<EDataSlot::JsonDocument, TUtf8>, - TSqlValue<EDataSlot::JsonDocument, TUtf8, true>, - TSqlValue<EDataSlot::JsonDocument, i64>, - TSqlValue<EDataSlot::JsonDocument, double>, - TSqlValue<EDataSlot::JsonDocument, bool>, - TSqlExists<EDataSlot::Json, false>, - TSqlExists<EDataSlot::Json, true>, - TSqlExists<EDataSlot::JsonDocument, false>, - TSqlExists<EDataSlot::JsonDocument, true>, - TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>, - TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>, - TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>, - TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>, - TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>, - TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>, - TAsJsonNode<TUtf8>, - TAsJsonNode<double>, - TAsJsonNode<bool>, - TAsJsonNode<TJson>) -} +SIMPLE_MODULE(TJson2Module, + TParse, + TSerialize<EDataSlot::Json>, + TSerialize<EDataSlot::JsonDocument>, + TCompilePath, + TSqlValue<EDataSlot::Json, TUtf8>, + TSqlValue<EDataSlot::Json, TUtf8, true>, + TSqlValue<EDataSlot::Json, i64>, + TSqlValue<EDataSlot::Json, double>, + TSqlValue<EDataSlot::Json, bool>, + TSqlValue<EDataSlot::JsonDocument, TUtf8>, + TSqlValue<EDataSlot::JsonDocument, TUtf8, true>, + TSqlValue<EDataSlot::JsonDocument, i64>, + TSqlValue<EDataSlot::JsonDocument, double>, + TSqlValue<EDataSlot::JsonDocument, bool>, + TSqlExists<EDataSlot::Json, false>, + TSqlExists<EDataSlot::Json, true>, + TSqlExists<EDataSlot::JsonDocument, false>, + TSqlExists<EDataSlot::JsonDocument, true>, + TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>, + TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>, + TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>, + TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>, + TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>, + TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>, + TAsJsonNode<TUtf8>, + TAsJsonNode<double>, + TAsJsonNode<bool>, + TAsJsonNode<TJson>) +} // namespace NJson2Udf REGISTER_MODULES(NJson2Udf::TJson2Module) diff --git a/yql/essentials/udfs/common/json2/parse.h b/yql/essentials/udfs/common/json2/parse.h index 6df4bce9b0a..72db2106fa5 100644 --- a/yql/essentials/udfs/common/json2/parse.h +++ b/yql/essentials/udfs/common/json2/parse.h @@ -9,58 +9,57 @@ #include <library/cpp/json/json_reader.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; +using namespace NDom; - class TParse: public TBoxedValue { - public: - TParse(TSourcePosition pos) - : Pos_(pos) - { - } +class TParse: public TBoxedValue { +public: + TParse(TSourcePosition pos) + : Pos_(pos) + { + } - static const TStringRef& Name() { - static auto name = TStringRef::Of("Parse"); - return name; - } + static const TStringRef& Name() { + static auto name = TStringRef::Of("Parse"); + return name; + } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } - builder.Args() - ->Add<TAutoMap<TJson>>() - .Done() - .Returns<TJsonNodeResource>(); + builder.Args() + ->Add<TAutoMap<TJson>>() + .Done() + .Returns<TJsonNodeResource>(); - if (!typesOnly) { - builder.Implementation(new TParse(builder.GetSourcePosition())); - } - return true; + if (!typesOnly) { + builder.Implementation(new TParse(builder.GetSourcePosition())); } + return true; + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - const auto json = args[0].AsStringRef(); - return TryParseJsonDom(json, valueBuilder); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + const auto json = args[0].AsStringRef(); + return TryParseJsonDom(json, valueBuilder); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - TSourcePosition Pos_; - }; -} - + TSourcePosition Pos_; +}; +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/resource.h b/yql/essentials/udfs/common/json2/resource.h index aa65b14818d..5d2cc6e7b0b 100644 --- a/yql/essentials/udfs/common/json2/resource.h +++ b/yql/essentials/udfs/common/json2/resource.h @@ -4,14 +4,13 @@ #include <yql/essentials/minikql/jsonpath/jsonpath.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; - extern const char JSONPATH_RESOURCE_NAME[] = "JsonPath"; - using TJsonPathResource = TBoxedResource<NJsonPath::TJsonPathPtr, JSONPATH_RESOURCE_NAME>; - - extern const char JSON_NODE_RESOURCE_NAME[] = "JsonNode"; - using TJsonNodeResource = TResource<JSON_NODE_RESOURCE_NAME>; -} +extern const char JSONPATH_RESOURCE_NAME[] = "JsonPath"; +using TJsonPathResource = TBoxedResource<NJsonPath::TJsonPathPtr, JSONPATH_RESOURCE_NAME>; +extern const char JSON_NODE_RESOURCE_NAME[] = "JsonNode"; +using TJsonNodeResource = TResource<JSON_NODE_RESOURCE_NAME>; +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/serialize.h b/yql/essentials/udfs/common/json2/serialize.h index cda95e77f5a..2443259fbee 100644 --- a/yql/essentials/udfs/common/json2/serialize.h +++ b/yql/essentials/udfs/common/json2/serialize.h @@ -9,81 +9,80 @@ #include <yql/essentials/types/binary_json/write.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; - using namespace NBinaryJson; +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; +using namespace NDom; +using namespace NBinaryJson; - template <EDataSlot ResultType> - class TSerialize : public TBoxedValue { - public: - TSerialize(TSourcePosition pos) - : Pos_(pos) - { - } +template <EDataSlot ResultType> +class TSerialize: public TBoxedValue { +public: + TSerialize(TSourcePosition pos) + : Pos_(pos) + { + } - static const TStringRef& Name(); + static const TStringRef& Name(); - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } - TType* resultType = nullptr; - if constexpr (ResultType == EDataSlot::Json) { - resultType = builder.SimpleType<TJson>(); - } else { - resultType = builder.SimpleType<TJsonDocument>(); - } + TType* resultType = nullptr; + if constexpr (ResultType == EDataSlot::Json) { + resultType = builder.SimpleType<TJson>(); + } else { + resultType = builder.SimpleType<TJsonDocument>(); + } - builder.Args() - ->Add<TAutoMap<TJsonNodeResource>>() - .Done() - .Returns(resultType); + builder.Args() + ->Add<TAutoMap<TJsonNodeResource>>() + .Done() + .Returns(resultType); - if (!typesOnly) { - builder.Implementation(new TSerialize(builder.GetSourcePosition())); - } - return true; + if (!typesOnly) { + builder.Implementation(new TSerialize(builder.GetSourcePosition())); } + return true; + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - try { - const TUnboxedValue& jsonDom = args[0]; +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + try { + const TUnboxedValue& jsonDom = args[0]; - if constexpr (ResultType == EDataSlot::Json) { - return valueBuilder->NewString(SerializeJsonDom(jsonDom)); - } else { - const auto binaryJson = SerializeToBinaryJson(jsonDom); - return valueBuilder->NewString(TStringBuf(binaryJson.Data(), binaryJson.Size())); - } - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + if constexpr (ResultType == EDataSlot::Json) { + return valueBuilder->NewString(SerializeJsonDom(jsonDom)); + } else { + const auto binaryJson = SerializeToBinaryJson(jsonDom); + return valueBuilder->NewString(TStringBuf(binaryJson.Data(), binaryJson.Size())); } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - - TSourcePosition Pos_; - }; - - template <> - const TStringRef& TSerialize<EDataSlot::Json>::Name() { - static auto name = TStringRef::Of("Serialize"); - return name; } - template <> - const TStringRef& TSerialize<EDataSlot::JsonDocument>::Name() { - static auto name = TStringRef::Of("SerializeToJsonDocument"); - return name; - } + TSourcePosition Pos_; +}; + +template <> +const TStringRef& TSerialize<EDataSlot::Json>::Name() { + static auto name = TStringRef::Of("Serialize"); + return name; } +template <> +const TStringRef& TSerialize<EDataSlot::JsonDocument>::Name() { + static auto name = TStringRef::Of("SerializeToJsonDocument"); + return name; +} +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/sql_exists.h b/yql/essentials/udfs/common/json2/sql_exists.h index cb89f20ec21..955c1b1ce7f 100644 --- a/yql/essentials/udfs/common/json2/sql_exists.h +++ b/yql/essentials/udfs/common/json2/sql_exists.h @@ -10,126 +10,125 @@ #include <util/generic/yexception.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NJsonPath; +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; +using namespace NJsonPath; - template <EDataSlot InputType, bool ThrowException> - class TSqlExists: public TBoxedValue { - public: - explicit TSqlExists(TSourcePosition pos) - : Pos_(pos) - { +template <EDataSlot InputType, bool ThrowException> +class TSqlExists: public TBoxedValue { +public: + explicit TSqlExists(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; } - static TStringRef Name(); + auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); + TType* inputType = nullptr; + if constexpr (InputType == EDataSlot::JsonDocument) { + inputType = builder.SimpleType<TJsonDocument>(); + } else { + inputType = jsonType; + } + auto inputOptionalType = builder.Optional()->Item(inputType).Build(); + auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); + auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build(); + auto optionalBoolType = builder.Optional()->Item<bool>().Build(); - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } + if constexpr (ThrowException) { + builder.Args() + ->Add(inputOptionalType) + .Add(jsonPathType) + .Add(dictType) + .Done() + .Returns(optionalBoolType); + } else { + builder.Args() + ->Add(inputOptionalType) + .Add(jsonPathType) + .Add(dictType) + .Add(optionalBoolType) + .Done() + .Returns(optionalBoolType); + } - auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); - TType* inputType = nullptr; - if constexpr (InputType == EDataSlot::JsonDocument) { - inputType = builder.SimpleType<TJsonDocument>(); - } else { - inputType = jsonType; - } - auto inputOptionalType = builder.Optional()->Item(inputType).Build(); - auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); - auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build(); - auto optionalBoolType = builder.Optional()->Item<bool>().Build(); + if (!typesOnly) { + builder.Implementation(new TSqlExists(builder.GetSourcePosition())); + } + if constexpr (!ThrowException) { + builder.IsStrict(); + } + return true; + } - if constexpr (ThrowException) { - builder.Args() - ->Add(inputOptionalType) - .Add(jsonPathType) - .Add(dictType) - .Done() - .Returns(optionalBoolType); - } else { - builder.Args() - ->Add(inputOptionalType) - .Add(jsonPathType) - .Add(dictType) - .Add(optionalBoolType) - .Done() - .Returns(optionalBoolType); +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + if (!args[0].HasValue()) { + return TUnboxedValuePod(); } - if (!typesOnly) { - builder.Implementation(new TSqlExists(builder.GetSourcePosition())); - } - if constexpr (!ThrowException) { - builder.IsStrict(); + TValue jsonDom; + if constexpr (InputType == EDataSlot::JsonDocument) { + jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); + } else { + jsonDom = TValue(args[0]); } - return true; - } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - if (!args[0].HasValue()) { - return TUnboxedValuePod(); - } + auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); + const auto& jsonPath = *jsonPathResource->Get(); + const auto variables = DictToVariables(args[2]); - TValue jsonDom; - if constexpr (InputType == EDataSlot::JsonDocument) { - jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); + const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); + if (result.IsError()) { + if constexpr (ThrowException) { + ythrow yexception() << "Error executing jsonpath:" << Endl << result.GetError() << Endl; } else { - jsonDom = TValue(args[0]); - } - - auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); - const auto& jsonPath = *jsonPathResource->Get(); - const auto variables = DictToVariables(args[2]); - - const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); - if (result.IsError()) { - if constexpr (ThrowException) { - ythrow yexception() << "Error executing jsonpath:" << Endl << result.GetError() << Endl; - } else { - return args[3]; - } + return args[3]; } - - return TUnboxedValuePod(!result.GetNodes().empty()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - } - - TSourcePosition Pos_; - }; - template <> - TStringRef TSqlExists<EDataSlot::Json, false>::Name() { - return "SqlExists"; + return TUnboxedValuePod(!result.GetNodes().empty()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } } - template <> - TStringRef TSqlExists<EDataSlot::Json, true>::Name() { - return "SqlTryExists"; - } + TSourcePosition Pos_; +}; - template <> - TStringRef TSqlExists<EDataSlot::JsonDocument, false>::Name() { - return "JsonDocumentSqlExists"; - } +template <> +TStringRef TSqlExists<EDataSlot::Json, false>::Name() { + return "SqlExists"; +} - template <> - TStringRef TSqlExists<EDataSlot::JsonDocument, true>::Name() { - return "JsonDocumentSqlTryExists"; - } +template <> +TStringRef TSqlExists<EDataSlot::Json, true>::Name() { + return "SqlTryExists"; +} + +template <> +TStringRef TSqlExists<EDataSlot::JsonDocument, false>::Name() { + return "JsonDocumentSqlExists"; } +template <> +TStringRef TSqlExists<EDataSlot::JsonDocument, true>::Name() { + return "JsonDocumentSqlTryExists"; +} +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/sql_query.h b/yql/essentials/udfs/common/json2/sql_query.h index 1c2d610f923..00bf6fb2d74 100644 --- a/yql/essentials/udfs/common/json2/sql_query.h +++ b/yql/essentials/udfs/common/json2/sql_query.h @@ -12,173 +12,172 @@ #include <util/generic/yexception.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; - using namespace NJsonPath; +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; +using namespace NDom; +using namespace NJsonPath; - template <EDataSlot InputType, EJsonQueryWrap Mode> - class TSqlQuery: public TBoxedValue { - public: - explicit TSqlQuery(TSourcePosition pos) - : Pos_(pos) - { - } - - static TStringRef Name(); - - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } +template <EDataSlot InputType, EJsonQueryWrap Mode> +class TSqlQuery: public TBoxedValue { +public: + explicit TSqlQuery(TSourcePosition pos) + : Pos_(pos) + { + } - auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); - auto optionalJsonType = builder.Optional()->Item(jsonType).Build(); - TType* inputType = nullptr; - if constexpr (InputType == EDataSlot::JsonDocument) { - inputType = builder.SimpleType<TJsonDocument>(); - } else { - inputType = jsonType; - } - auto inputOptionalType = builder.Optional()->Item(inputType).Build(); - auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); - auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build(); + static TStringRef Name(); - /* - Arguments: - 0. Resource<JsonNode>? or JsonDocument?. Input json - 1. Resource<JsonPath>. Jsonpath to execute on json - 2. Dict<TUtf8, Resource<JsonNode>>. Variables to pass into jsonpath - 3. Bool. True - throw on empty result, false otherwise - 4. Resource<JsonNode>?. Default value to return on empty result. Ignored if 2d argument is true - 5. Bool. True - throw on error, false - otherwise - 6. Resource<JsonNode>?. Default value to return on error. Ignored if 4th argument is true - */ - // we can't mark TSqlQuery as strict due to runtime throw policy setting - // TODO: optimizer can mark SqlQuery as strict if 3th/5th arguments are literal booleans - builder.Args() - ->Add(inputOptionalType) - .Add(jsonPathType) - .Add(dictType) - .Add<bool>() - .Add(optionalJsonType) - .Add<bool>() - .Add(optionalJsonType) - .Done() - .Returns(optionalJsonType); + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } - if (!typesOnly) { - builder.Implementation(new TSqlQuery(builder.GetSourcePosition())); - } - return true; + auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); + auto optionalJsonType = builder.Optional()->Item(jsonType).Build(); + TType* inputType = nullptr; + if constexpr (InputType == EDataSlot::JsonDocument) { + inputType = builder.SimpleType<TJsonDocument>(); + } else { + inputType = jsonType; } + auto inputOptionalType = builder.Optional()->Item(inputType).Build(); + auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); + auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build(); - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - if (!args[0].HasValue()) { - return TUnboxedValuePod(); - } + /* + Arguments: + 0. Resource<JsonNode>? or JsonDocument?. Input json + 1. Resource<JsonPath>. Jsonpath to execute on json + 2. Dict<TUtf8, Resource<JsonNode>>. Variables to pass into jsonpath + 3. Bool. True - throw on empty result, false otherwise + 4. Resource<JsonNode>?. Default value to return on empty result. Ignored if 2d argument is true + 5. Bool. True - throw on error, false - otherwise + 6. Resource<JsonNode>?. Default value to return on error. Ignored if 4th argument is true + */ + // we can't mark TSqlQuery as strict due to runtime throw policy setting + // TODO: optimizer can mark SqlQuery as strict if 3th/5th arguments are literal booleans + builder.Args() + ->Add(inputOptionalType) + .Add(jsonPathType) + .Add(dictType) + .Add<bool>() + .Add(optionalJsonType) + .Add<bool>() + .Add(optionalJsonType) + .Done() + .Returns(optionalJsonType); - TValue jsonDom; - if constexpr (InputType == EDataSlot::JsonDocument) { - jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); - } else { - jsonDom = TValue(args[0]); - } + if (!typesOnly) { + builder.Implementation(new TSqlQuery(builder.GetSourcePosition())); + } + return true; + } - auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); - const auto& jsonPath = *jsonPathResource->Get(); +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + if (!args[0].HasValue()) { + return TUnboxedValuePod(); + } - const bool throwOnEmpty = args[3].Get<bool>(); - const auto emptyDefault = args[4]; - const bool throwOnError = args[5].Get<bool>(); - const auto errorDefault = args[6]; - const auto variables = DictToVariables(args[2]); + TValue jsonDom; + if constexpr (InputType == EDataSlot::JsonDocument) { + jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); + } else { + jsonDom = TValue(args[0]); + } - auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); + auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); + const auto& jsonPath = *jsonPathResource->Get(); - const auto handleCase = [](TStringBuf message, bool throws, const TUnboxedValuePod& caseDefault) { - if (throws) { - ythrow yexception() << message; - } - return caseDefault; - }; + const bool throwOnEmpty = args[3].Get<bool>(); + const auto emptyDefault = args[4]; + const bool throwOnError = args[5].Get<bool>(); + const auto errorDefault = args[6]; + const auto variables = DictToVariables(args[2]); - if (result.IsError()) { - return handleCase(TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl, throwOnError, errorDefault); - } + auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); - auto& nodes = result.GetNodes(); - const bool isSingleStruct = nodes.size() == 1 && (nodes[0].Is(EValueType::Array) || nodes[0].Is(EValueType::Object)); - if (Mode == EJsonQueryWrap::Wrap || (Mode == EJsonQueryWrap::ConditionalWrap && !isSingleStruct)) { - TVector<TUnboxedValue> converted; - converted.reserve(nodes.size()); - for (auto& node : nodes) { - converted.push_back(node.ConvertToUnboxedValue(valueBuilder)); - } - return MakeList(converted.data(), converted.size(), valueBuilder); + const auto handleCase = [](TStringBuf message, bool throws, const TUnboxedValuePod& caseDefault) { + if (throws) { + ythrow yexception() << message; } + return caseDefault; + }; - if (nodes.empty()) { - return handleCase("Empty result", throwOnEmpty, emptyDefault); - } + if (result.IsError()) { + return handleCase(TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl, throwOnError, errorDefault); + } - // No wrapping is applicable and result is not empty. Result must be a single object or array - if (nodes.size() > 1) { - return handleCase("Result consists of multiple items", throwOnError, errorDefault); + auto& nodes = result.GetNodes(); + const bool isSingleStruct = nodes.size() == 1 && (nodes[0].Is(EValueType::Array) || nodes[0].Is(EValueType::Object)); + if (Mode == EJsonQueryWrap::Wrap || (Mode == EJsonQueryWrap::ConditionalWrap && !isSingleStruct)) { + TVector<TUnboxedValue> converted; + converted.reserve(nodes.size()); + for (auto& node : nodes) { + converted.push_back(node.ConvertToUnboxedValue(valueBuilder)); } + return MakeList(converted.data(), converted.size(), valueBuilder); + } - if (!nodes[0].Is(EValueType::Array) && !nodes[0].Is(EValueType::Object)) { - return handleCase("Result is neither object nor array", throwOnError, errorDefault); - } + if (nodes.empty()) { + return handleCase("Empty result", throwOnEmpty, emptyDefault); + } - return nodes[0].ConvertToUnboxedValue(valueBuilder); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + // No wrapping is applicable and result is not empty. Result must be a single object or array + if (nodes.size() > 1) { + return handleCase("Result consists of multiple items", throwOnError, errorDefault); } - } - TSourcePosition Pos_; - }; + if (!nodes[0].Is(EValueType::Array) && !nodes[0].Is(EValueType::Object)) { + return handleCase("Result is neither object nor array", throwOnError, errorDefault); + } - template <> - TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>::Name() { - return "SqlQuery"; + return nodes[0].ConvertToUnboxedValue(valueBuilder); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } } - template <> - TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>::Name() { - return "SqlQueryWrap"; - } + TSourcePosition Pos_; +}; - template <> - TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>::Name() { - return "SqlQueryConditionalWrap"; - } +template <> +TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>::Name() { + return "SqlQuery"; +} - template <> - TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>::Name() { - return "JsonDocumentSqlQuery"; - } +template <> +TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>::Name() { + return "SqlQueryWrap"; +} - template <> - TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>::Name() { - return "JsonDocumentSqlQueryWrap"; - } +template <> +TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>::Name() { + return "SqlQueryConditionalWrap"; +} - template <> - TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>::Name() { - return "JsonDocumentSqlQueryConditionalWrap"; - } +template <> +TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>::Name() { + return "JsonDocumentSqlQuery"; +} + +template <> +TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>::Name() { + return "JsonDocumentSqlQueryWrap"; } +template <> +TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>::Name() { + return "JsonDocumentSqlQueryConditionalWrap"; +} +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/sql_value.h b/yql/essentials/udfs/common/json2/sql_value.h index 53b451c6275..525d1296a6b 100644 --- a/yql/essentials/udfs/common/json2/sql_value.h +++ b/yql/essentials/udfs/common/json2/sql_value.h @@ -15,282 +15,282 @@ #include <util/string/cast.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; - using namespace NJsonPath; +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; +using namespace NDom; +using namespace NJsonPath; - namespace { - template <class TValueType, bool ForceConvert = false> - TUnboxedValue TryConvertJson(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - Y_UNUSED(source); - Y_ABORT("Unsupported type"); - } - - template <> - TUnboxedValue TryConvertJson<TUtf8>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - if (IsNodeType(source, ENodeType::String)) { - return source; - } - return {}; - } - - template <> - TUnboxedValue TryConvertJson<TUtf8, true>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - switch (GetNodeType(source)) { - case ENodeType::String: - return source; - case ENodeType::Uint64: - return valueBuilder->NewString(ToString(source.Get<ui64>())).Release(); - case ENodeType::Int64: - return valueBuilder->NewString(ToString(source.Get<i64>())).Release(); - case ENodeType::Bool: - return source.Get<bool>() ? TUnboxedValuePod::Embedded("true") : TUnboxedValuePod::Embedded("false"); - case ENodeType::Double: - return valueBuilder->NewString(ToString(source.Get<double>())).Release(); - case ENodeType::Entity: - return TUnboxedValuePod::Embedded("null"); - case ENodeType::List: - case ENodeType::Dict: - case ENodeType::Attr: - return {}; - } - } - - template <> - TUnboxedValue TryConvertJson<i64>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - if (!source.IsEmbedded()) { - return {}; - } +namespace { +template <class TValueType, bool ForceConvert = false> +TUnboxedValue TryConvertJson(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + Y_UNUSED(source); + Y_ABORT("Unsupported type"); +} - if (IsNodeType(source, ENodeType::Int64)) { - return TUnboxedValuePod(source.Get<i64>()); - } else if (IsNodeType(source, ENodeType::Uint64) && source.Get<ui64>() < Max<i64>()) { - return TUnboxedValuePod(static_cast<i64>(source.Get<ui64>())); - } else if (IsNodeType(source, ENodeType::Double) && static_cast<i64>(source.Get<double>()) == source.Get<double>()) { - return TUnboxedValuePod(static_cast<i64>(source.Get<double>())); - } +template <> +TUnboxedValue TryConvertJson<TUtf8>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (IsNodeType(source, ENodeType::String)) { + return source; + } + return {}; +} +template <> +TUnboxedValue TryConvertJson<TUtf8, true>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + switch (GetNodeType(source)) { + case ENodeType::String: + return source; + case ENodeType::Uint64: + return valueBuilder->NewString(ToString(source.Get<ui64>())).Release(); + case ENodeType::Int64: + return valueBuilder->NewString(ToString(source.Get<i64>())).Release(); + case ENodeType::Bool: + return source.Get<bool>() ? TUnboxedValuePod::Embedded("true") : TUnboxedValuePod::Embedded("false"); + case ENodeType::Double: + return valueBuilder->NewString(ToString(source.Get<double>())).Release(); + case ENodeType::Entity: + return TUnboxedValuePod::Embedded("null"); + case ENodeType::List: + case ENodeType::Dict: + case ENodeType::Attr: return {}; - } + } +} - template <> - TUnboxedValue TryConvertJson<double>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - if (!source.IsEmbedded()) { - return {}; - } +template <> +TUnboxedValue TryConvertJson<i64>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (!source.IsEmbedded()) { + return {}; + } - if (IsNodeType(source, ENodeType::Double)) { - return TUnboxedValuePod(source.Get<double>()); - } else if (IsNodeType(source, ENodeType::Int64)) { - return TUnboxedValuePod(static_cast<double>(source.Get<i64>())); - } else if (IsNodeType(source, ENodeType::Uint64)) { - return TUnboxedValuePod(static_cast<double>(source.Get<ui64>())); - } + if (IsNodeType(source, ENodeType::Int64)) { + return TUnboxedValuePod(source.Get<i64>()); + } else if (IsNodeType(source, ENodeType::Uint64) && source.Get<ui64>() < Max<i64>()) { + return TUnboxedValuePod(static_cast<i64>(source.Get<ui64>())); + } else if (IsNodeType(source, ENodeType::Double) && static_cast<i64>(source.Get<double>()) == source.Get<double>()) { + return TUnboxedValuePod(static_cast<i64>(source.Get<double>())); + } - return {}; - } + return {}; +} - template <> - TUnboxedValue TryConvertJson<bool>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - if (!source.IsEmbedded() || !IsNodeType(source, ENodeType::Bool)) { - return {}; - } - return {TUnboxedValuePod(source.Get<bool>())}; - } +template <> +TUnboxedValue TryConvertJson<double>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (!source.IsEmbedded()) { + return {}; } - template <EDataSlot InputType, class TValueType, bool ForceConvert = false> - class TSqlValue: public TBoxedValue { - public: - enum class TErrorCode : ui8 { - Empty = 0, - Error = 1 - }; + if (IsNodeType(source, ENodeType::Double)) { + return TUnboxedValuePod(source.Get<double>()); + } else if (IsNodeType(source, ENodeType::Int64)) { + return TUnboxedValuePod(static_cast<double>(source.Get<i64>())); + } else if (IsNodeType(source, ENodeType::Uint64)) { + return TUnboxedValuePod(static_cast<double>(source.Get<ui64>())); + } - TSqlValue(TSourcePosition pos) - : Pos_(pos) - { - } + return {}; +} - static TStringRef Name(); +template <> +TUnboxedValue TryConvertJson<bool>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (!source.IsEmbedded() || !IsNodeType(source, ENodeType::Bool)) { + return {}; + } + return {TUnboxedValuePod(source.Get<bool>())}; +} +} // namespace - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } +template <EDataSlot InputType, class TValueType, bool ForceConvert = false> +class TSqlValue: public TBoxedValue { +public: + enum class TErrorCode: ui8 { + Empty = 0, + Error = 1 + }; - auto optionalValueType = builder.Optional()->Item<TValueType>().Build(); - auto errorTupleType = builder.Tuple(2)->Add<ui8>().Add<char*>().Build(); - auto returnTypeTuple = builder.Tuple(2) - ->Add(errorTupleType) - .Add(optionalValueType) - .Build(); - auto returnType = builder.Variant()->Over(returnTypeTuple).Build(); + TSqlValue(TSourcePosition pos) + : Pos_(pos) + { + } - TType* jsonType = nullptr; - if constexpr (InputType == EDataSlot::Json) { - jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); - } else { - jsonType = builder.SimpleType<TJsonDocument>(); - } - auto optionalJsonType = builder.Optional()->Item(jsonType).Build(); - auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); - auto dictType = builder.Dict()->Key<TUtf8>().Value(builder.Resource(JSON_NODE_RESOURCE_NAME)).Build(); + static TStringRef Name(); - builder.Args() - ->Add(optionalJsonType) - .Add(jsonPathType) - .Add(dictType) - .Done() - .Returns(returnType); + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } - builder.IsStrict(); + auto optionalValueType = builder.Optional()->Item<TValueType>().Build(); + auto errorTupleType = builder.Tuple(2)->Add<ui8>().Add<char*>().Build(); + auto returnTypeTuple = builder.Tuple(2) + ->Add(errorTupleType) + .Add(optionalValueType) + .Build(); + auto returnType = builder.Variant()->Over(returnTypeTuple).Build(); - if (!typesOnly) { - builder.Implementation(new TSqlValue(builder.GetSourcePosition())); - } - return true; + TType* jsonType = nullptr; + if constexpr (InputType == EDataSlot::Json) { + jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); + } else { + jsonType = builder.SimpleType<TJsonDocument>(); } + auto optionalJsonType = builder.Optional()->Item(jsonType).Build(); + auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); + auto dictType = builder.Dict()->Key<TUtf8>().Value(builder.Resource(JSON_NODE_RESOURCE_NAME)).Build(); - private: - TUnboxedValue BuildErrorResult(const IValueBuilder* valueBuilder, TErrorCode code, const TStringBuf message) const { - TUnboxedValue* items = nullptr; - auto errorTuple = valueBuilder->NewArray(2, items); - items[0] = TUnboxedValuePod(static_cast<ui8>(code)); - items[1] = valueBuilder->NewString(message); - return valueBuilder->NewVariant(0, std::move(errorTuple)); - } + builder.Args() + ->Add(optionalJsonType) + .Add(jsonPathType) + .Add(dictType) + .Done() + .Returns(returnType); + + builder.IsStrict(); - TUnboxedValue BuildSuccessfulResult(const IValueBuilder* valueBuilder, TUnboxedValue&& value) const { - return valueBuilder->NewVariant(1, std::move(value)); + if (!typesOnly) { + builder.Implementation(new TSqlValue(builder.GetSourcePosition())); } + return true; + } - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - try { - if (!args[0].HasValue()) { - return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod()); - } +private: + TUnboxedValue BuildErrorResult(const IValueBuilder* valueBuilder, TErrorCode code, const TStringBuf message) const { + TUnboxedValue* items = nullptr; + auto errorTuple = valueBuilder->NewArray(2, items); + items[0] = TUnboxedValuePod(static_cast<ui8>(code)); + items[1] = valueBuilder->NewString(message); + return valueBuilder->NewVariant(0, std::move(errorTuple)); + } - TValue jsonDom; - if constexpr (InputType == EDataSlot::JsonDocument) { - jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); - } else { - jsonDom = TValue(args[0]); - } + TUnboxedValue BuildSuccessfulResult(const IValueBuilder* valueBuilder, TUnboxedValue&& value) const { + return valueBuilder->NewVariant(1, std::move(value)); + } - auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); - const auto& jsonPath = *jsonPathResource->Get(); - const auto variables = DictToVariables(args[2]); + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + try { + if (!args[0].HasValue()) { + return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod()); + } - const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); + TValue jsonDom; + if constexpr (InputType == EDataSlot::JsonDocument) { + jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); + } else { + jsonDom = TValue(args[0]); + } - if (result.IsError()) { - return BuildErrorResult(valueBuilder, TErrorCode::Error, TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl); - } + auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); + const auto& jsonPath = *jsonPathResource->Get(); + const auto variables = DictToVariables(args[2]); - const auto& nodes = result.GetNodes(); - if (nodes.empty()) { - return BuildErrorResult(valueBuilder, TErrorCode::Empty, "Result is empty"); - } + const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); - if (nodes.size() > 1) { - return BuildErrorResult(valueBuilder, TErrorCode::Error, "Result consists of multiple items"); - } + if (result.IsError()) { + return BuildErrorResult(valueBuilder, TErrorCode::Error, TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl); + } - const auto& value = nodes[0]; - if (value.Is(EValueType::Array) || value.Is(EValueType::Object)) { - // SqlValue can return only scalar values - return BuildErrorResult(valueBuilder, TErrorCode::Error, "Extracted JSON value is either object or array"); - } + const auto& nodes = result.GetNodes(); + if (nodes.empty()) { + return BuildErrorResult(valueBuilder, TErrorCode::Empty, "Result is empty"); + } - if (value.Is(EValueType::Null)) { - // JSON nulls must be converted to SQL nulls - return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod()); - } + if (nodes.size() > 1) { + return BuildErrorResult(valueBuilder, TErrorCode::Error, "Result consists of multiple items"); + } - const auto source = value.ConvertToUnboxedValue(valueBuilder); - TUnboxedValue convertedValue = TryConvertJson<TValueType, ForceConvert>(valueBuilder, source); - if (!convertedValue) { - // error while converting JSON value type to TValueType - return BuildErrorResult(valueBuilder, TErrorCode::Error, "Cannot convert extracted JSON value to target type"); - } + const auto& value = nodes[0]; + if (value.Is(EValueType::Array) || value.Is(EValueType::Object)) { + // SqlValue can return only scalar values + return BuildErrorResult(valueBuilder, TErrorCode::Error, "Extracted JSON value is either object or array"); + } - return BuildSuccessfulResult(valueBuilder, std::move(convertedValue)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + if (value.Is(EValueType::Null)) { + // JSON nulls must be converted to SQL nulls + return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod()); } - } - TSourcePosition Pos_; - }; + const auto source = value.ConvertToUnboxedValue(valueBuilder); + TUnboxedValue convertedValue = TryConvertJson<TValueType, ForceConvert>(valueBuilder, source); + if (!convertedValue) { + // error while converting JSON value type to TValueType + return BuildErrorResult(valueBuilder, TErrorCode::Error, "Cannot convert extracted JSON value to target type"); + } - template <EDataSlot InputType, class TValueType, bool ForceConvert> - TStringRef TSqlValue<InputType, TValueType, ForceConvert>::Name() { - Y_ABORT("Unknown name"); + return BuildSuccessfulResult(valueBuilder, std::move(convertedValue)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } } - template<> - TStringRef TSqlValue<EDataSlot::Json, TUtf8, true>::Name() { - return TStringRef::Of("SqlValueConvertToUtf8"); - } + TSourcePosition Pos_; +}; - template <> - TStringRef TSqlValue<EDataSlot::Json, TUtf8>::Name() { - return TStringRef::Of("SqlValueUtf8"); - } +template <EDataSlot InputType, class TValueType, bool ForceConvert> +TStringRef TSqlValue<InputType, TValueType, ForceConvert>::Name() { + Y_ABORT("Unknown name"); +} - template <> - TStringRef TSqlValue<EDataSlot::Json, i64>::Name() { - return TStringRef::Of("SqlValueInt64"); - } +template <> +TStringRef TSqlValue<EDataSlot::Json, TUtf8, true>::Name() { + return TStringRef::Of("SqlValueConvertToUtf8"); +} - template <> - TStringRef TSqlValue<EDataSlot::Json, double>::Name() { - return TStringRef::Of("SqlValueNumber"); - } +template <> +TStringRef TSqlValue<EDataSlot::Json, TUtf8>::Name() { + return TStringRef::Of("SqlValueUtf8"); +} - template <> - TStringRef TSqlValue<EDataSlot::Json, bool>::Name() { - return TStringRef::Of("SqlValueBool"); - } +template <> +TStringRef TSqlValue<EDataSlot::Json, i64>::Name() { + return TStringRef::Of("SqlValueInt64"); +} - template<> - TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8, true>::Name() { - return TStringRef::Of("JsonDocumentSqlValueConvertToUtf8"); - } +template <> +TStringRef TSqlValue<EDataSlot::Json, double>::Name() { + return TStringRef::Of("SqlValueNumber"); +} - template <> - TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8>::Name() { - return TStringRef::Of("JsonDocumentSqlValueUtf8"); - } +template <> +TStringRef TSqlValue<EDataSlot::Json, bool>::Name() { + return TStringRef::Of("SqlValueBool"); +} - template <> - TStringRef TSqlValue<EDataSlot::JsonDocument, i64>::Name() { - return TStringRef::Of("JsonDocumentSqlValueInt64"); - } +template <> +TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8, true>::Name() { + return TStringRef::Of("JsonDocumentSqlValueConvertToUtf8"); +} - template <> - TStringRef TSqlValue<EDataSlot::JsonDocument, double>::Name() { - return TStringRef::Of("JsonDocumentSqlValueNumber"); - } +template <> +TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8>::Name() { + return TStringRef::Of("JsonDocumentSqlValueUtf8"); +} - template <> - TStringRef TSqlValue<EDataSlot::JsonDocument, bool>::Name() { - return TStringRef::Of("JsonDocumentSqlValueBool"); - } +template <> +TStringRef TSqlValue<EDataSlot::JsonDocument, i64>::Name() { + return TStringRef::Of("JsonDocumentSqlValueInt64"); +} + +template <> +TStringRef TSqlValue<EDataSlot::JsonDocument, double>::Name() { + return TStringRef::Of("JsonDocumentSqlValueNumber"); +} +template <> +TStringRef TSqlValue<EDataSlot::JsonDocument, bool>::Name() { + return TStringRef::Of("JsonDocumentSqlValueBool"); } + +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/ya.make b/yql/essentials/udfs/common/json2/ya.make index 52289125941..fa5e47018ce 100644 --- a/yql/essentials/udfs/common/json2/ya.make +++ b/yql/essentials/udfs/common/json2/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(json2_udf) 28 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( json2_udf.cpp diff --git a/yql/essentials/udfs/common/math/lib/erfinv.cpp b/yql/essentials/udfs/common/math/lib/erfinv.cpp index def902860c3..c7626b8faf0 100644 --- a/yql/essentials/udfs/common/math/lib/erfinv.cpp +++ b/yql/essentials/udfs/common/math/lib/erfinv.cpp @@ -8,8 +8,8 @@ template <size_t N> static double PolEval(double x, const std::array<double, N>& coef) { static_assert(N > 0, "Array coef[] should not be empty."); return std::accumulate(coef.crbegin() + 1, coef.crend(), coef[N - 1], - [x] (auto init, auto cur) { - return std::move(init) * x + cur; + [x](auto init, auto cur) { + return std::move(init) * x + cur; }); } @@ -111,4 +111,4 @@ double ErfInv(double x) { return ans * sign; } -} +} // namespace NMathUdf diff --git a/yql/essentials/udfs/common/math/lib/erfinv.h b/yql/essentials/udfs/common/math/lib/erfinv.h index 1ced5a07e65..23c129e6572 100644 --- a/yql/essentials/udfs/common/math/lib/erfinv.h +++ b/yql/essentials/udfs/common/math/lib/erfinv.h @@ -4,4 +4,4 @@ namespace NMathUdf { double ErfInv(double x); -} +} // namespace NMathUdf diff --git a/yql/essentials/udfs/common/math/lib/round.h b/yql/essentials/udfs/common/math/lib/round.h index f59700da88f..815c3f6173a 100644 --- a/yql/essentials/udfs/common/math/lib/round.h +++ b/yql/essentials/udfs/common/math/lib/round.h @@ -55,23 +55,23 @@ inline std::optional<i64> NearbyIntImpl(double value, decltype(FE_DOWNWARD) mode if (res < double(std::numeric_limits<i64>::min() + 513) || res > double(std::numeric_limits<i64>::max() - 512)) { return {}; } - + return static_cast<i64>(res); } inline std::optional<i64> NearbyInt(double value, ui32 mode) { switch (mode) { - case 0: - return NearbyIntImpl(value, FE_DOWNWARD); - case 1: - return NearbyIntImpl(value, FE_TONEAREST); - case 2: - return NearbyIntImpl(value, FE_TOWARDZERO); - case 3: - return NearbyIntImpl(value, FE_UPWARD); - default: - return {}; + case 0: + return NearbyIntImpl(value, FE_DOWNWARD); + case 1: + return NearbyIntImpl(value, FE_TONEAREST); + case 2: + return NearbyIntImpl(value, FE_TOWARDZERO); + case 3: + return NearbyIntImpl(value, FE_UPWARD); + default: + return {}; } } -} +} // namespace NMathUdf diff --git a/yql/essentials/udfs/common/math/lib/round_ut.cpp b/yql/essentials/udfs/common/math/lib/round_ut.cpp index 4d0e96e4dc3..d791086fa8c 100644 --- a/yql/essentials/udfs/common/math/lib/round_ut.cpp +++ b/yql/essentials/udfs/common/math/lib/round_ut.cpp @@ -7,64 +7,64 @@ using namespace NMathUdf; Y_UNIT_TEST_SUITE(TRound) { - Y_UNIT_TEST(Basic) { - double value = 1930.0 / 3361.0; - double result = RoundToDecimal<long double>(value, -3); - double answer = 0.574; - UNIT_ASSERT_VALUES_EQUAL( - HexEncode(&result, sizeof(double)), - HexEncode(&answer, sizeof(double))); - } +Y_UNIT_TEST(Basic) { + double value = 1930.0 / 3361.0; + double result = RoundToDecimal<long double>(value, -3); + double answer = 0.574; + UNIT_ASSERT_VALUES_EQUAL( + HexEncode(&result, sizeof(double)), + HexEncode(&answer, sizeof(double))); +} - Y_UNIT_TEST(Mod) { - UNIT_ASSERT_VALUES_EQUAL(*Mod(-1, 7), 6); - UNIT_ASSERT_VALUES_EQUAL(*Mod(1, 7), 1); - UNIT_ASSERT_VALUES_EQUAL(*Mod(0, 7), 0); +Y_UNIT_TEST(Mod) { + UNIT_ASSERT_VALUES_EQUAL(*Mod(-1, 7), 6); + UNIT_ASSERT_VALUES_EQUAL(*Mod(1, 7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Mod(0, 7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Mod(-1, -7), -1); - UNIT_ASSERT_VALUES_EQUAL(*Mod(1, -7), -6); - UNIT_ASSERT_VALUES_EQUAL(*Mod(0, -7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Mod(-1, -7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Mod(1, -7), -6); + UNIT_ASSERT_VALUES_EQUAL(*Mod(0, -7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Mod(-15, 7), 6); - UNIT_ASSERT_VALUES_EQUAL(*Mod(15, 7), 1); - UNIT_ASSERT_VALUES_EQUAL(*Mod(14, 7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Mod(-14, 7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Mod(-15, 7), 6); + UNIT_ASSERT_VALUES_EQUAL(*Mod(15, 7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Mod(14, 7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Mod(-14, 7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Mod(-15, -7), -1); - UNIT_ASSERT_VALUES_EQUAL(*Mod(15, -7), -6); - UNIT_ASSERT_VALUES_EQUAL(*Mod(14, -7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Mod(-14, -7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Mod(-15, -7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Mod(15, -7), -6); + UNIT_ASSERT_VALUES_EQUAL(*Mod(14, -7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Mod(-14, -7), 0); - UNIT_ASSERT(!Mod(-14, 0)); - } + UNIT_ASSERT(!Mod(-14, 0)); +} - Y_UNIT_TEST(Rem) { - UNIT_ASSERT_VALUES_EQUAL(*Rem(-1, 7), -1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(1, 7), 1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(0, 7), 0); +Y_UNIT_TEST(Rem) { + UNIT_ASSERT_VALUES_EQUAL(*Rem(-1, 7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(1, 7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(0, 7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Rem(-1, -7), -1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(1, -7), 1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(0, -7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Rem(-1, -7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(1, -7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(0, -7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Rem(-15, 7), -1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(15, 7), 1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(14, 7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Rem(-14, 7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Rem(-15, 7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(15, 7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(14, 7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Rem(-14, 7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Rem(-15, -7), -1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(15, -7), 1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(14, -7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Rem(-14, -7), 0); - UNIT_ASSERT(!Rem(-14, 0)); - } + UNIT_ASSERT_VALUES_EQUAL(*Rem(-15, -7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(15, -7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(14, -7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Rem(-14, -7), 0); + UNIT_ASSERT(!Rem(-14, 0)); +} - Y_UNIT_TEST(NearbyInt) { - const i64 maxV = 9223372036854774784ll; - const i64 minV = -9223372036854774784ll; - UNIT_ASSERT_VALUES_EQUAL((i64)(double)(maxV), maxV); - UNIT_ASSERT_VALUES_EQUAL((i64)(double)(minV), minV); +Y_UNIT_TEST(NearbyInt) { + const i64 maxV = 9223372036854774784ll; + const i64 minV = -9223372036854774784ll; + UNIT_ASSERT_VALUES_EQUAL((i64)(double)(maxV), maxV); + UNIT_ASSERT_VALUES_EQUAL((i64)(double)(minV), minV); - UNIT_ASSERT_VALUES_UNEQUAL((i64)(double)(maxV + 1), maxV + 1); - } + UNIT_ASSERT_VALUES_UNEQUAL((i64)(double)(maxV + 1), maxV + 1); } +} // Y_UNIT_TEST_SUITE(TRound) diff --git a/yql/essentials/udfs/common/math/lib/ut/ya.make b/yql/essentials/udfs/common/math/lib/ut/ya.make index c1efcde3b47..2461b36ab3d 100644 --- a/yql/essentials/udfs/common/math/lib/ut/ya.make +++ b/yql/essentials/udfs/common/math/lib/ut/ya.make @@ -2,6 +2,8 @@ IF (OS_LINUX) IF (NOT WITH_VALGRIND) UNITTEST_FOR(yql/essentials/udfs/common/math/lib) + ENABLE(YQL_STYLE_CPP) + SRCS( round_ut.cpp ) diff --git a/yql/essentials/udfs/common/math/lib/ya.make b/yql/essentials/udfs/common/math/lib/ya.make index 54b882a8438..3c0faeb996a 100644 --- a/yql/essentials/udfs/common/math/lib/ya.make +++ b/yql/essentials/udfs/common/math/lib/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( erfinv.cpp ) diff --git a/yql/essentials/udfs/common/math/math_ir.h b/yql/essentials/udfs/common/math/math_ir.h index ee788ee90ba..965d83424a5 100644 --- a/yql/essentials/udfs/common/math/math_ir.h +++ b/yql/essentials/udfs/common/math/math_ir.h @@ -16,80 +16,75 @@ namespace NUdf { XX(Pi, M_PI) \ XX(E, M_E) \ XX(Eps, std::numeric_limits<double>::epsilon()) \ - XX(RoundDownward, 0) \ - XX(RoundToNearest, 1) \ - XX(RoundTowardZero, 2) \ + XX(RoundDownward, 0) \ + XX(RoundToNearest, 1) \ + XX(RoundTowardZero, 2) \ XX(RoundUpward, 3) -#define SINGLE_ARG_FUNCS(XX) \ - XX(Abs, Abs) \ - XX(Acos, acos) \ - XX(Asin, asin) \ - XX(Asinh, asin) \ - XX(Atan, atan) \ - XX(Cbrt, cbrt) \ - XX(Ceil, ceil) \ - XX(Cos, cos) \ - XX(Cosh, cosh) \ - XX(Erf, Erf) \ - XX(Exp, exp) \ - XX(Exp2, Exp2) \ - XX(Fabs, fabs) \ - XX(Floor, std::floor) \ - XX(Lgamma, LogGamma) \ - XX(Rint, rint) \ - XX(Sin, sin) \ - XX(Sinh, sinh) \ - XX(Sqrt, sqrt) \ - XX(Tan, tan) \ - XX(Tanh, tanh) \ - XX(Tgamma, tgamma) \ - XX(Trunc, trunc) \ - XX(IsFinite, std::isfinite) \ - XX(IsInf, std::isinf) \ +#define SINGLE_ARG_FUNCS(XX) \ + XX(Abs, Abs) \ + XX(Acos, acos) \ + XX(Asin, asin) \ + XX(Asinh, asin) \ + XX(Atan, atan) \ + XX(Cbrt, cbrt) \ + XX(Ceil, ceil) \ + XX(Cos, cos) \ + XX(Cosh, cosh) \ + XX(Erf, Erf) \ + XX(Exp, exp) \ + XX(Exp2, Exp2) \ + XX(Fabs, fabs) \ + XX(Floor, std::floor) \ + XX(Lgamma, LogGamma) \ + XX(Rint, rint) \ + XX(Sin, sin) \ + XX(Sinh, sinh) \ + XX(Sqrt, sqrt) \ + XX(Tan, tan) \ + XX(Tanh, tanh) \ + XX(Tgamma, tgamma) \ + XX(Trunc, trunc) \ + XX(IsFinite, std::isfinite) \ + XX(IsInf, std::isinf) \ XX(IsNaN, std::isnan) -#define TWO_ARGS_FUNCS(XX) \ - XX(Atan2, atan2, double) \ - XX(Fmod, fmod, double) \ - XX(Hypot, hypot, double) \ - XX(Remainder, remainder, double) \ - XX(Pow, pow, double) \ +#define TWO_ARGS_FUNCS(XX) \ + XX(Atan2, atan2, double) \ + XX(Fmod, fmod, double) \ + XX(Hypot, hypot, double) \ + XX(Remainder, remainder, double) \ + XX(Pow, pow, double) \ XX(Ldexp, ldexp, int) -#define POSITIVE_SINGLE_ARG_FUNCS(XX) \ - XX(Log, log) \ - XX(Log2, Log2) \ +#define POSITIVE_SINGLE_ARG_FUNCS(XX) \ + XX(Log, log) \ + XX(Log2, Log2) \ XX(Log10, log10) - -#define CONST_IMPL(name, cnst) \ - extern "C" UDF_ALWAYS_INLINE \ - void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* /*args*/) {\ - *result = TUnboxedValuePod(cnst); \ +#define CONST_IMPL(name, cnst) \ + extern "C" UDF_ALWAYS_INLINE void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* /*args*/) { \ + *result = TUnboxedValuePod(cnst); \ } -#define SINGLE_ARG_IMPL(name, func) \ - extern "C" UDF_ALWAYS_INLINE \ - void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ - *result = TUnboxedValuePod(func(args[0].Get<double>())); \ +#define SINGLE_ARG_IMPL(name, func) \ + extern "C" UDF_ALWAYS_INLINE void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ + *result = TUnboxedValuePod(func(args[0].Get<double>())); \ } -#define TWO_ARGS_IMPL(name, func, secondType) \ - extern "C" UDF_ALWAYS_INLINE \ - void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ - *result = TUnboxedValuePod(func(args[0].Get<double>(), args[1].Get<secondType>())); \ +#define TWO_ARGS_IMPL(name, func, secondType) \ + extern "C" UDF_ALWAYS_INLINE void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ + *result = TUnboxedValuePod(func(args[0].Get<double>(), args[1].Get<secondType>())); \ } -#define POSITIVE_SINGLE_ARG_IMPL(name, func) \ - extern "C" UDF_ALWAYS_INLINE \ - void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ - double input = args[0].Get<double>(); \ - if (input > 0) { \ - *result = TUnboxedValuePod(func(input)); \ - } else { \ - *result = TUnboxedValuePod(static_cast<double>(NAN)); \ - } \ +#define POSITIVE_SINGLE_ARG_IMPL(name, func) \ + extern "C" UDF_ALWAYS_INLINE void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ + double input = args[0].Get<double>(); \ + if (input > 0) { \ + *result = TUnboxedValuePod(func(input)); \ + } else { \ + *result = TUnboxedValuePod(static_cast<double>(NAN)); \ + } \ } CONST_FUNCS(CONST_IMPL) @@ -97,13 +92,11 @@ SINGLE_ARG_FUNCS(SINGLE_ARG_IMPL) TWO_ARGS_FUNCS(TWO_ARGS_IMPL) POSITIVE_SINGLE_ARG_FUNCS(POSITIVE_SINGLE_ARG_IMPL) -extern "C" UDF_ALWAYS_INLINE -void SigmoidIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void SigmoidIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { *result = TUnboxedValuePod(1. / (1. + exp(-args[0].Get<double>()))); } -extern "C" UDF_ALWAYS_INLINE -void FuzzyEqualsIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void FuzzyEqualsIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { if (!args[2]) { *result = TUnboxedValuePod(FuzzyEquals(args[0].Get<double>(), args[1].Get<double>())); } else { @@ -112,39 +105,33 @@ void FuzzyEqualsIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const } } -extern "C" UDF_ALWAYS_INLINE -void RoundIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void RoundIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { const double val = NMathUdf::RoundToDecimal<long double>(args[0].Get<double>(), args[1].GetOrDefault<int>(0)); *result = TUnboxedValuePod(val); } -extern "C" UDF_ALWAYS_INLINE -void ErfInvIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void ErfInvIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { *result = TUnboxedValuePod(NMathUdf::ErfInv(args[0].Get<double>())); } -extern "C" UDF_ALWAYS_INLINE -void ErfcInvIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void ErfcInvIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { *result = TUnboxedValuePod(NMathUdf::ErfInv(1. - args[0].Get<double>())); } -extern "C" UDF_ALWAYS_INLINE -void ModIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void ModIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { const auto val = NMathUdf::Mod(args[0].Get<i64>(), args[1].Get<i64>()); *result = val ? TUnboxedValuePod(*val) : TUnboxedValuePod(); } -extern "C" UDF_ALWAYS_INLINE -void RemIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void RemIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { const auto val = NMathUdf::Rem(args[0].Get<i64>(), args[1].Get<i64>()); *result = val ? TUnboxedValuePod(*val) : TUnboxedValuePod(); } -extern "C" UDF_ALWAYS_INLINE -void NearbyIntIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void NearbyIntIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { const auto val = NMathUdf::NearbyInt(args[0].Get<double>(), args[1].Get<ui32>()); *result = val ? TUnboxedValuePod(*val) : TUnboxedValuePod(); } -} // NUdf -} // NYql +} // namespace NUdf +} // namespace NYql diff --git a/yql/essentials/udfs/common/math/math_udf.cpp b/yql/essentials/udfs/common/math/math_udf.cpp index e769ed3bee9..05b1d38c172 100644 --- a/yql/essentials/udfs/common/math/math_udf.cpp +++ b/yql/essentials/udfs/common/math/math_udf.cpp @@ -7,142 +7,142 @@ extern const char TagRoundingMode[] = "MathRoundingMode"; using TTaggedRoundingMode = NYql::NUdf::TTagged<ui32, TagRoundingMode>; -#define MATH_UDF_MAP(XX, XXL) \ - XX(Pi, double(), 0) \ - XX(E, double(), 0) \ - XX(Eps, double(), 0) \ - XX(RoundDownward, TTaggedRoundingMode(), 0) \ - XX(RoundToNearest, TTaggedRoundingMode(), 0) \ - XX(RoundTowardZero, TTaggedRoundingMode(), 0) \ - XX(RoundUpward, TTaggedRoundingMode(), 0) \ - XX(Abs, double(TAutoMap<double>), 0) \ - XX(Acos, double(TAutoMap<double>), 0) \ - XX(Asin, double(TAutoMap<double>), 0) \ - XX(Asinh, double(TAutoMap<double>), 0) \ - XX(Atan, double(TAutoMap<double>), 0) \ - XX(Cbrt, double(TAutoMap<double>), 0) \ - XX(Ceil, double(TAutoMap<double>), 0) \ - XX(Cos, double(TAutoMap<double>), 0) \ - XX(Cosh, double(TAutoMap<double>), 0) \ - XX(Erf, double(TAutoMap<double>), 0) \ - XX(ErfInv, double(TAutoMap<double>), 0) \ - XX(ErfcInv, double(TAutoMap<double>), 0) \ - XX(Exp, double(TAutoMap<double>), 0) \ - XX(Exp2, double(TAutoMap<double>), 0) \ - XX(Fabs, double(TAutoMap<double>), 0) \ - XX(Floor, double(TAutoMap<double>), 0) \ - XX(Lgamma, double(TAutoMap<double>), 0) \ - XX(Rint, double(TAutoMap<double>), 0) \ - XX(Sin, double(TAutoMap<double>), 0) \ - XX(Sinh, double(TAutoMap<double>), 0) \ - XX(Sqrt, double(TAutoMap<double>), 0) \ - XX(Tan, double(TAutoMap<double>), 0) \ - XX(Tanh, double(TAutoMap<double>), 0) \ - XX(Tgamma, double(TAutoMap<double>), 0) \ - XX(Trunc, double(TAutoMap<double>), 0) \ - XX(Log, double(TAutoMap<double>), 0) \ - XX(Log2, double(TAutoMap<double>), 0) \ - XX(Log10, double(TAutoMap<double>), 0) \ - XX(Atan2, double(TAutoMap<double>, TAutoMap<double>), 0) \ - XX(Fmod, double(TAutoMap<double>, TAutoMap<double>), 0) \ - XX(Hypot, double(TAutoMap<double>, TAutoMap<double>), 0) \ - XX(Remainder, double(TAutoMap<double>, TAutoMap<double>), 0) \ - XX(Pow, double(TAutoMap<double>, TAutoMap<double>), 0) \ - XX(Ldexp, double(TAutoMap<double>, TAutoMap<int>), 0) \ - XX(IsFinite, bool(TAutoMap<double>), 0) \ - XX(IsInf, bool(TAutoMap<double>), 0) \ - XX(IsNaN, bool(TAutoMap<double>), 0) \ - XX(Sigmoid, double(TAutoMap<double>), 0) \ - XX(FuzzyEquals, bool(TAutoMap<double>, TAutoMap<double>, TEpsilon), 1) \ - XX(Mod, TOptional<i64>(TAutoMap<i64>, i64), 0) \ - XX(Rem, TOptional<i64>(TAutoMap<i64>, i64), 0) \ +#define MATH_UDF_MAP(XX, XXL) \ + XX(Pi, double(), 0) \ + XX(E, double(), 0) \ + XX(Eps, double(), 0) \ + XX(RoundDownward, TTaggedRoundingMode(), 0) \ + XX(RoundToNearest, TTaggedRoundingMode(), 0) \ + XX(RoundTowardZero, TTaggedRoundingMode(), 0) \ + XX(RoundUpward, TTaggedRoundingMode(), 0) \ + XX(Abs, double(TAutoMap<double>), 0) \ + XX(Acos, double(TAutoMap<double>), 0) \ + XX(Asin, double(TAutoMap<double>), 0) \ + XX(Asinh, double(TAutoMap<double>), 0) \ + XX(Atan, double(TAutoMap<double>), 0) \ + XX(Cbrt, double(TAutoMap<double>), 0) \ + XX(Ceil, double(TAutoMap<double>), 0) \ + XX(Cos, double(TAutoMap<double>), 0) \ + XX(Cosh, double(TAutoMap<double>), 0) \ + XX(Erf, double(TAutoMap<double>), 0) \ + XX(ErfInv, double(TAutoMap<double>), 0) \ + XX(ErfcInv, double(TAutoMap<double>), 0) \ + XX(Exp, double(TAutoMap<double>), 0) \ + XX(Exp2, double(TAutoMap<double>), 0) \ + XX(Fabs, double(TAutoMap<double>), 0) \ + XX(Floor, double(TAutoMap<double>), 0) \ + XX(Lgamma, double(TAutoMap<double>), 0) \ + XX(Rint, double(TAutoMap<double>), 0) \ + XX(Sin, double(TAutoMap<double>), 0) \ + XX(Sinh, double(TAutoMap<double>), 0) \ + XX(Sqrt, double(TAutoMap<double>), 0) \ + XX(Tan, double(TAutoMap<double>), 0) \ + XX(Tanh, double(TAutoMap<double>), 0) \ + XX(Tgamma, double(TAutoMap<double>), 0) \ + XX(Trunc, double(TAutoMap<double>), 0) \ + XX(Log, double(TAutoMap<double>), 0) \ + XX(Log2, double(TAutoMap<double>), 0) \ + XX(Log10, double(TAutoMap<double>), 0) \ + XX(Atan2, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Fmod, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Hypot, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Remainder, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Pow, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Ldexp, double(TAutoMap<double>, TAutoMap<int>), 0) \ + XX(IsFinite, bool(TAutoMap<double>), 0) \ + XX(IsInf, bool(TAutoMap<double>), 0) \ + XX(IsNaN, bool(TAutoMap<double>), 0) \ + XX(Sigmoid, double(TAutoMap<double>), 0) \ + XX(FuzzyEquals, bool(TAutoMap<double>, TAutoMap<double>, TEpsilon), 1) \ + XX(Mod, TOptional<i64>(TAutoMap<i64>, i64), 0) \ + XX(Rem, TOptional<i64>(TAutoMap<i64>, i64), 0) \ XXL(Round, double(TAutoMap<double>, TPrecision), 1) -#define MATH_UDF_MAP_WITHOUT_IR(XX) \ +#define MATH_UDF_MAP_WITHOUT_IR(XX) \ XX(NearbyInt, TOptional<i64>(TAutoMap<double>, TTaggedRoundingMode), 0) #ifdef DISABLE_IR -#define MATH_STRICT_UDF(name, signature, optionalArgsCount) \ - SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(T##name, signature, optionalArgsCount) { \ - TUnboxedValuePod res; \ - name##IR(this, &res, valueBuilder, args); \ - return res; \ - } + #define MATH_STRICT_UDF(name, signature, optionalArgsCount) \ + SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(T##name, signature, optionalArgsCount) { \ + TUnboxedValuePod res; \ + name##IR(this, &res, valueBuilder, args); \ + return res; \ + } #else -#define MATH_STRICT_UDF(name, signature, optionalArgsCount) \ - SIMPLE_STRICT_UDF_WITH_IR(T##name, signature, optionalArgsCount, "/llvm_bc/Math", #name "IR") { \ - TUnboxedValuePod res; \ - name##IR(this, &res, valueBuilder, args); \ - return res; \ - } + #define MATH_STRICT_UDF(name, signature, optionalArgsCount) \ + SIMPLE_STRICT_UDF_WITH_IR(T##name, signature, optionalArgsCount, "/llvm_bc/Math", #name "IR") { \ + TUnboxedValuePod res; \ + name##IR(this, &res, valueBuilder, args); \ + return res; \ + } #endif -#define MATH_STRICT_UDF_WITHOUT_IR(name, signature, optionalArgsCount) \ - SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(T##name, signature, optionalArgsCount) { \ - TUnboxedValuePod res; \ - name##IR(this, &res, valueBuilder, args); \ - return res; \ +#define MATH_STRICT_UDF_WITHOUT_IR(name, signature, optionalArgsCount) \ + SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(T##name, signature, optionalArgsCount) { \ + TUnboxedValuePod res; \ + name##IR(this, &res, valueBuilder, args); \ + return res; \ } -#define REGISTER_MATH_UDF(udfName, ...) T##udfName, -#define REGISTER_MATH_UDF_LAST(udfName, ...) T##udfName +#define REGISTER_MATH_UDF(udfName, ...) T##udfName, +#define REGISTER_MATH_UDF_LAST(udfName, ...) T##udfName using namespace NKikimr; using namespace NUdf; namespace { - const char SwapBytesUDF[] = "SwapBytes"; - template <class TUserType> - class TSwapBytesFunc: public TBoxedValue { - private: - TSourcePosition Pos_; +const char SwapBytesUDF[] = "SwapBytes"; +template <class TUserType> +class TSwapBytesFunc: public TBoxedValue { +private: + TSourcePosition Pos_; - TSwapBytesFunc(TSourcePosition pos) - : Pos_(pos) - { - } + TSwapBytesFunc(TSourcePosition pos) + : Pos_(pos) + { + } - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { - Y_UNUSED(valueBuilder); - if constexpr (sizeof(TUserType) == 1) { - return args[0]; - } - return TUnboxedValuePod(SwapBytes(args[0].Get<TUserType>())); + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + Y_UNUSED(valueBuilder); + if constexpr (sizeof(TUserType) == 1) { + return args[0]; } + return TUnboxedValuePod(SwapBytes(args[0].Get<TUserType>())); + } - public: - static void DeclareSignature( - TStringRef name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) - { - Y_UNUSED(name); - Y_UNUSED(userType); +public: + static void DeclareSignature( + TStringRef name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + Y_UNUSED(name); + Y_UNUSED(userType); - builder.SimpleSignature<TUserType(TAutoMap<TUserType>)>() - .IsStrict() - .SetMinLangVer(NYql::MakeLangVersion(2025, 3)); - if (!typesOnly) { - builder.Implementation(new TSwapBytesFunc<TUserType>(builder.GetSourcePosition())); - } + builder.SimpleSignature<TUserType(TAutoMap<TUserType>)>() + .IsStrict() + .SetMinLangVer(NYql::MakeLangVersion(2025, 3)); + if (!typesOnly) { + builder.Implementation(new TSwapBytesFunc<TUserType>(builder.GetSourcePosition())); } - }; + } +}; - extern const char epsilon[] = "Epsilon"; - using TEpsilon = TNamedArg<double, epsilon>; +extern const char epsilon[] = "Epsilon"; +using TEpsilon = TNamedArg<double, epsilon>; - extern const char precision[] = "Precision"; - using TPrecision = TNamedArg<int, precision>; +extern const char precision[] = "Precision"; +using TPrecision = TNamedArg<int, precision>; - MATH_UDF_MAP(MATH_STRICT_UDF, MATH_STRICT_UDF) +MATH_UDF_MAP(MATH_STRICT_UDF, MATH_STRICT_UDF) - MATH_UDF_MAP_WITHOUT_IR(MATH_STRICT_UDF_WITHOUT_IR) +MATH_UDF_MAP_WITHOUT_IR(MATH_STRICT_UDF_WITHOUT_IR) - SIMPLE_MODULE(TMathModule, - MATH_UDF_MAP_WITHOUT_IR(REGISTER_MATH_UDF) - TUserDataTypeFuncFactory<true, false, SwapBytesUDF, TSwapBytesFunc, ui8, ui16, ui32, ui64>, - MATH_UDF_MAP(REGISTER_MATH_UDF, REGISTER_MATH_UDF_LAST)) -} +SIMPLE_MODULE(TMathModule, + MATH_UDF_MAP_WITHOUT_IR(REGISTER_MATH_UDF) + TUserDataTypeFuncFactory<true, false, SwapBytesUDF, TSwapBytesFunc, ui8, ui16, ui32, ui64>, + MATH_UDF_MAP(REGISTER_MATH_UDF, REGISTER_MATH_UDF_LAST)) +} // namespace REGISTER_MODULES(TMathModule) diff --git a/yql/essentials/udfs/common/math/ya.make b/yql/essentials/udfs/common/math/ya.make index dfb5fa7c994..b4dbbc6afd2 100644 --- a/yql/essentials/udfs/common/math/ya.make +++ b/yql/essentials/udfs/common/math/ya.make @@ -6,6 +6,8 @@ YQL_UDF_CONTRIB(math_udf) 0 ) + ENABLE(YQL_STYLE_CPP) + SRCS( math_udf.cpp ) diff --git a/yql/essentials/udfs/common/pire/pire_udf.cpp b/yql/essentials/udfs/common/pire/pire_udf.cpp index de2a75955e3..1357107a12b 100644 --- a/yql/essentials/udfs/common/pire/pire_udf.cpp +++ b/yql/essentials/udfs/common/pire/pire_udf.cpp @@ -14,345 +14,350 @@ using namespace NKikimr; using namespace NUdf; namespace { - class TPireUdfBase: public TBoxedValue { - protected: - TPireUdfBase(TSourcePosition pos) - : Pos_(pos) - {} +class TPireUdfBase: public TBoxedValue { +protected: + TPireUdfBase(TSourcePosition pos) + : Pos_(pos) + { + } - void SetCommonOptions(std::string_view& regex, TFsm::TOptions& options) { - if (regex.size() >= 4U && regex.substr(0U, 4U) == "(?i)") { - options.SetCaseInsensitive(true); - regex.remove_prefix(4U); - } - if (UTF8Detect(regex) == UTF8) { - options.SetCharset(CODES_UTF8); - } + void SetCommonOptions(std::string_view& regex, TFsm::TOptions& options) { + if (regex.size() >= 4U && regex.substr(0U, 4U) == "(?i)") { + options.SetCaseInsensitive(true); + regex.remove_prefix(4U); } + if (UTF8Detect(regex) == UTF8) { + options.SetCharset(CODES_UTF8); + } + } - TSourcePosition Pos_; - }; + TSourcePosition Pos_; +}; - class TPireMatch: public TPireUdfBase { +class TPireMatch: public TPireUdfBase { +public: + class TFactory: public TPireUdfBase { public: - class TFactory: public TPireUdfBase { - public: - TFactory( - bool surroundMode, - bool multiMode, - TSourcePosition pos, - size_t regexpsCount = 0) - : TPireUdfBase(pos) - , SurroundMode_(surroundMode) - , MultiMode_(multiMode) - , RegexpsCount_(regexpsCount) - { - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - return TUnboxedValuePod( - new TPireMatch( - valueBuilder, - args[0], - SurroundMode_, - MultiMode_, - Pos_, - RegexpsCount_)); - } - - bool SurroundMode_; - bool MultiMode_; - size_t RegexpsCount_; - }; - - static const TStringRef& Name(bool surroundMode, bool multiMode) { - static auto match = TStringRef::Of("Match"); - static auto grep = TStringRef::Of("Grep"); - static auto multiMatch = TStringRef::Of("MultiMatch"); - static auto multiGrep = TStringRef::Of("MultiGrep"); - if (surroundMode) { - return multiMode ? multiGrep : grep; - } else { - return multiMode ? multiMatch : match; - } - } - - TPireMatch( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod& runConfig, + TFactory( bool surroundMode, bool multiMode, TSourcePosition pos, - size_t regexpsCount) + size_t regexpsCount = 0) : TPireUdfBase(pos) + , SurroundMode_(surroundMode) , MultiMode_(multiMode) , RegexpsCount_(regexpsCount) - , SurroundMode_(surroundMode) { - Y_UNUSED(valueBuilder); - try { - std::string_view regex(runConfig.AsStringRef()); - TFsm::TOptions options; - options.SetSurround(surroundMode); - SetCommonOptions(regex, options); - if (multiMode) { - std::vector<std::string_view> parts; - StringSplitter(regex).Split('\n').AddTo(&parts); - for (const auto& part : parts) { - if (!part.empty()) { - if (Fsm_) try { + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + return TUnboxedValuePod( + new TPireMatch( + valueBuilder, + args[0], + SurroundMode_, + MultiMode_, + Pos_, + RegexpsCount_)); + } + + bool SurroundMode_; + bool MultiMode_; + size_t RegexpsCount_; + }; + + static const TStringRef& Name(bool surroundMode, bool multiMode) { + static auto match = TStringRef::Of("Match"); + static auto grep = TStringRef::Of("Grep"); + static auto multiMatch = TStringRef::Of("MultiMatch"); + static auto multiGrep = TStringRef::Of("MultiGrep"); + if (surroundMode) { + return multiMode ? multiGrep : grep; + } else { + return multiMode ? multiMatch : match; + } + } + + TPireMatch( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod& runConfig, + bool surroundMode, + bool multiMode, + TSourcePosition pos, + size_t regexpsCount) + : TPireUdfBase(pos) + , MultiMode_(multiMode) + , RegexpsCount_(regexpsCount) + , SurroundMode_(surroundMode) + { + Y_UNUSED(valueBuilder); + try { + std::string_view regex(runConfig.AsStringRef()); + TFsm::TOptions options; + options.SetSurround(surroundMode); + SetCommonOptions(regex, options); + if (multiMode) { + std::vector<std::string_view> parts; + StringSplitter(regex).Split('\n').AddTo(&parts); + for (const auto& part : parts) { + if (!part.empty()) { + if (Fsm_) { + try { *Fsm_ = *Fsm_ | TFsm(TString(part), options); } catch (const yexception&) { UdfTerminate((TStringBuilder() << Pos_ << " Failed to glue up regexes, probably the finite state machine appeared to be too large").c_str()); - } else { - Fsm_.Reset(new TFsm(TString(part), options)); } + } else { + Fsm_.Reset(new TFsm(TString(part), options)); } } - } else { - Fsm_.Reset(new TFsm(TString(regex), options)); } - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } else { + Fsm_.Reset(new TFsm(TString(regex), options)); } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try { - TUnboxedValue* items = nullptr; - TUnboxedValue tuple; - size_t i = 0; +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + TUnboxedValue* items = nullptr; + TUnboxedValue tuple; + size_t i = 0; - if (MultiMode_) { - tuple = valueBuilder->NewArray(RegexpsCount_, items); + if (MultiMode_) { + tuple = valueBuilder->NewArray(RegexpsCount_, items); - for (i = 0; i < RegexpsCount_; ++i) { - items[i] = TUnboxedValuePod(false); - } + for (i = 0; i < RegexpsCount_; ++i) { + items[i] = TUnboxedValuePod(false); } + } - if (args[0]) { - const auto input = args[0].AsStringRef(); - TMatcher matcher(*Fsm_); - const bool isMatch = matcher.Match(input.Data(), input.Size(), SurroundMode_, SurroundMode_).Final(); - if (MultiMode_) { - if (isMatch) { - const auto& matchedRegexps = matcher.MatchedRegexps(); - size_t matchesCount = matchedRegexps.second - matchedRegexps.first; + if (args[0]) { + const auto input = args[0].AsStringRef(); + TMatcher matcher(*Fsm_); + const bool isMatch = matcher.Match(input.Data(), input.Size(), SurroundMode_, SurroundMode_).Final(); + if (MultiMode_) { + if (isMatch) { + const auto& matchedRegexps = matcher.MatchedRegexps(); + size_t matchesCount = matchedRegexps.second - matchedRegexps.first; - for (i = 0; i < matchesCount; ++i) { - items[matchedRegexps.first[i]] = TUnboxedValuePod(true); - } + for (i = 0; i < matchesCount; ++i) { + items[matchedRegexps.first[i]] = TUnboxedValuePod(true); } - return tuple; - - } else { - return TUnboxedValuePod(isMatch); } + return tuple; } else { - return MultiMode_ ? tuple : TUnboxedValue(TUnboxedValuePod(false)); + return TUnboxedValuePod(isMatch); } - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + + } else { + return MultiMode_ ? tuple : TUnboxedValue(TUnboxedValuePod(false)); } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } - private: - TUniquePtr<TFsm> Fsm_; - bool MultiMode_; - size_t RegexpsCount_; - bool SurroundMode_; - }; +private: + TUniquePtr<TFsm> Fsm_; + bool MultiMode_; + size_t RegexpsCount_; + bool SurroundMode_; +}; - class TPireCapture: public TPireUdfBase { +class TPireCapture: public TPireUdfBase { +public: + class TFactory: public TPireUdfBase { public: - class TFactory: public TPireUdfBase { - public: - TFactory(TSourcePosition pos) - : TPireUdfBase(pos) - {} - - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { - return TUnboxedValuePod(new TPireCapture(args[0], Pos_)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - }; - - static const TStringRef& Name() { - static auto name = TStringRef::Of("Capture"); - return name; - } - - TPireCapture(const TUnboxedValuePod& runConfig, TSourcePosition pos) + TFactory(TSourcePosition pos) : TPireUdfBase(pos) { - std::string_view regex(runConfig.AsStringRef()); - TFsm::TOptions options; - SetCommonOptions(regex, options); - Fsm_.Reset(new TSlowCapturingFsm(TString(regex), options)); } private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try { - if (args[0]) { - const std::string_view input = args[0].AsStringRef(); - - TSlowSearcher searcher(*Fsm_); - searcher.Search(input.data(), input.size()); - - if (searcher.Captured()) { - const auto& captured = searcher.GetCaptured(); - return valueBuilder->SubString(args[0], std::distance(input.begin(), captured.begin()), captured.length()); - } - } - - return TUnboxedValue(); + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new TPireCapture(args[0], Pos_)); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - - TUniquePtr<TSlowCapturingFsm> Fsm_; }; - class TPireReplace: public TPireUdfBase { - public: - class TFactory: public TPireUdfBase { - public: - TFactory(TSourcePosition pos) - : TPireUdfBase(pos) - {} + static const TStringRef& Name() { + static auto name = TStringRef::Of("Capture"); + return name; + } - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { - return TUnboxedValuePod(new TPireReplace(args[0], Pos_)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - }; + TPireCapture(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : TPireUdfBase(pos) + { + std::string_view regex(runConfig.AsStringRef()); + TFsm::TOptions options; + SetCommonOptions(regex, options); + Fsm_.Reset(new TSlowCapturingFsm(TString(regex), options)); + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + if (args[0]) { + const std::string_view input = args[0].AsStringRef(); - static const TStringRef& Name() { - static auto name = TStringRef::Of("Replace"); - return name; + TSlowSearcher searcher(*Fsm_); + searcher.Search(input.data(), input.size()); + + if (searcher.Captured()) { + const auto& captured = searcher.GetCaptured(); + return valueBuilder->SubString(args[0], std::distance(input.begin(), captured.begin()), captured.length()); + } } - TPireReplace(const TUnboxedValuePod& runConfig, TSourcePosition pos) + return TUnboxedValue(); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + + TUniquePtr<TSlowCapturingFsm> Fsm_; +}; + +class TPireReplace: public TPireUdfBase { +public: + class TFactory: public TPireUdfBase { + public: + TFactory(TSourcePosition pos) : TPireUdfBase(pos) { - std::string_view regex(runConfig.AsStringRef()); - TFsm::TOptions options; - SetCommonOptions(regex, options); - Fsm_.Reset(new TSlowCapturingFsm(TString(regex), options)); } private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try { - if (args[0]) { - const std::string_view input(args[0].AsStringRef()); - - TSlowSearcher s(*Fsm_); - s.Search(input.data(), input.size()); - if (s.Captured()) { - const auto& captured = s.GetCaptured(); - const TString replacement(args[1].AsStringRef()); - TString replaced(args[0].AsStringRef()); - replaced.replace(std::distance(input.begin(), captured.begin()), captured.length(), replacement); - return valueBuilder->NewString(replaced); - } else { - return TUnboxedValue(args[0]); - } - } else { - return TUnboxedValue(); - } + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new TPireReplace(args[0], Pos_)); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - - TUniquePtr<TSlowCapturingFsm> Fsm_; }; - class TPireModule: public IUdfModule { - public: - TStringRef Name() const { - return TStringRef::Of("Pire"); - } + static const TStringRef& Name() { + static auto name = TStringRef::Of("Replace"); + return name; + } - void CleanupOnTerminate() const final { - } + TPireReplace(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : TPireUdfBase(pos) + { + std::string_view regex(runConfig.AsStringRef()); + TFsm::TOptions options; + SetCommonOptions(regex, options); + Fsm_.Reset(new TSlowCapturingFsm(TString(regex), options)); + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + if (args[0]) { + const std::string_view input(args[0].AsStringRef()); - void GetAllFunctions(IFunctionsSink& sink) const final { - sink.Add(TPireMatch::Name(true, true))->SetTypeAwareness(); - sink.Add(TPireMatch::Name(false, true))->SetTypeAwareness(); - sink.Add(TPireMatch::Name(true, false)); - sink.Add(TPireMatch::Name(false, false)); - sink.Add(TPireCapture::Name()); - sink.Add(TPireReplace::Name()); + TSlowSearcher s(*Fsm_); + s.Search(input.data(), input.size()); + if (s.Captured()) { + const auto& captured = s.GetCaptured(); + const TString replacement(args[1].AsStringRef()); + TString replaced(args[0].AsStringRef()); + replaced.replace(std::distance(input.begin(), captured.begin()), captured.length(), replacement); + return valueBuilder->NewString(replaced); + } else { + return TUnboxedValue(args[0]); + } + } else { + return TUnboxedValue(); } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } - void BuildFunctionTypeInfo( - const TStringRef& name, - TType*, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final try { - const bool typesOnly = (flags & TFlags::TypesOnly); - const bool isMatch = (TPireMatch::Name(false, false) == name); - const bool isGrep = (TPireMatch::Name(true, false) == name); - const bool isMultiMatch = (TPireMatch::Name(false, true) == name); - const bool isMultiGrep = (TPireMatch::Name(true, true) == name); + TUniquePtr<TSlowCapturingFsm> Fsm_; +}; - if (isMatch || isGrep) { - builder.SimpleSignature<bool(TOptional<char*>)>() - .RunConfig<const char*>(); +class TPireModule: public IUdfModule { +public: + TStringRef Name() const { + return TStringRef::Of("Pire"); + } - if (!typesOnly) { - builder.Implementation(new TPireMatch::TFactory(isGrep, false, builder.GetSourcePosition())); - } - } else if (isMultiMatch || isMultiGrep) { - const auto boolType = builder.SimpleType<bool>(); - const auto optionalStringType = builder.Optional()->Item<char*>().Build(); - const std::string_view regexp(typeConfig); - const size_t regexpCount = std::count(regexp.begin(), regexp.end(), '\n') + 1; - const auto tuple = builder.Tuple(); - for (size_t i = 0; i < regexpCount; ++i) { - tuple->Add(boolType); - } - const auto tupleType = tuple->Build(); - builder.Args(1)->Add(optionalStringType).Done().Returns(tupleType).RunConfig<char*>(); + void CleanupOnTerminate() const final { + } - if (!typesOnly) { - builder.Implementation(new TPireMatch::TFactory(isMultiGrep, true, builder.GetSourcePosition(), regexpCount)); - } - } else if (TPireCapture::Name() == name) { - builder.SimpleSignature<TOptional<char*>(TOptional<char*>)>() - .RunConfig<char*>(); + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(TPireMatch::Name(true, true))->SetTypeAwareness(); + sink.Add(TPireMatch::Name(false, true))->SetTypeAwareness(); + sink.Add(TPireMatch::Name(true, false)); + sink.Add(TPireMatch::Name(false, false)); + sink.Add(TPireCapture::Name()); + sink.Add(TPireReplace::Name()); + } - if (!typesOnly) { - builder.Implementation(new TPireCapture::TFactory(builder.GetSourcePosition())); - } - } else if (TPireReplace::Name() == name) { - builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() - .RunConfig<char*>(); + void BuildFunctionTypeInfo( + const TStringRef& name, + TType*, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final try { + const bool typesOnly = (flags & TFlags::TypesOnly); + const bool isMatch = (TPireMatch::Name(false, false) == name); + const bool isGrep = (TPireMatch::Name(true, false) == name); + const bool isMultiMatch = (TPireMatch::Name(false, true) == name); + const bool isMultiGrep = (TPireMatch::Name(true, true) == name); - if (!typesOnly) { - builder.Implementation(new TPireReplace::TFactory(builder.GetSourcePosition())); - } + if (isMatch || isGrep) { + builder.SimpleSignature<bool(TOptional<char*>)>() + .RunConfig<const char*>(); + + if (!typesOnly) { + builder.Implementation(new TPireMatch::TFactory(isGrep, false, builder.GetSourcePosition())); + } + } else if (isMultiMatch || isMultiGrep) { + const auto boolType = builder.SimpleType<bool>(); + const auto optionalStringType = builder.Optional()->Item<char*>().Build(); + const std::string_view regexp(typeConfig); + const size_t regexpCount = std::count(regexp.begin(), regexp.end(), '\n') + 1; + const auto tuple = builder.Tuple(); + for (size_t i = 0; i < regexpCount; ++i) { + tuple->Add(boolType); + } + const auto tupleType = tuple->Build(); + builder.Args(1)->Add(optionalStringType).Done().Returns(tupleType).RunConfig<char*>(); + + if (!typesOnly) { + builder.Implementation(new TPireMatch::TFactory(isMultiGrep, true, builder.GetSourcePosition(), regexpCount)); + } + } else if (TPireCapture::Name() == name) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>)>() + .RunConfig<char*>(); + + if (!typesOnly) { + builder.Implementation(new TPireCapture::TFactory(builder.GetSourcePosition())); + } + } else if (TPireReplace::Name() == name) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() + .RunConfig<char*>(); + + if (!typesOnly) { + builder.Implementation(new TPireReplace::TFactory(builder.GetSourcePosition())); } - } catch (const std::exception& e) { - builder.SetError(CurrentExceptionMessage()); } - }; + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); + } +}; -} +} // namespace REGISTER_MODULES(TPireModule) diff --git a/yql/essentials/udfs/common/pire/ya.make b/yql/essentials/udfs/common/pire/ya.make index 414b973aa62..21aef728426 100644 --- a/yql/essentials/udfs/common/pire/ya.make +++ b/yql/essentials/udfs/common/pire/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(pire_udf) 27 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( pire_udf.cpp diff --git a/yql/essentials/udfs/common/protobuf/protobuf_udf.cpp b/yql/essentials/udfs/common/protobuf/protobuf_udf.cpp index 4b7df61c28e..25c85c37727 100644 --- a/yql/essentials/udfs/common/protobuf/protobuf_udf.cpp +++ b/yql/essentials/udfs/common/protobuf/protobuf_udf.cpp @@ -9,135 +9,138 @@ using namespace NKikimr::NUdf; using namespace NProtoBuf; namespace { - class TDynamicProtoValue: public TProtobufValue { - public: - TDynamicProtoValue(const TProtoInfo& info, TDynamicInfoRef dyn) - : TProtobufValue(info) - , Dynamic_(dyn) - { - Y_ASSERT(Dynamic_ != nullptr); - } +class TDynamicProtoValue: public TProtobufValue { +public: + TDynamicProtoValue(const TProtoInfo& info, TDynamicInfoRef dyn) + : TProtobufValue(info) + , Dynamic_(dyn) + { + Y_ASSERT(Dynamic_ != nullptr); + } - TAutoPtr<Message> Parse(const TStringBuf& data) const override { - return Dynamic_->Parse(data); - } + TAutoPtr<Message> Parse(const TStringBuf& data) const override { + return Dynamic_->Parse(data); + } - private: - TDynamicInfoRef Dynamic_; - }; +private: + TDynamicInfoRef Dynamic_; +}; - class TDynamicProtoSerialize: public TProtobufSerialize { - public: - TDynamicProtoSerialize(const TProtoInfo& info, TDynamicInfoRef dyn) - : TProtobufSerialize(info) - , Dynamic_(dyn) - { - Y_ASSERT(Dynamic_ != nullptr); - } +class TDynamicProtoSerialize: public TProtobufSerialize { +public: + TDynamicProtoSerialize(const TProtoInfo& info, TDynamicInfoRef dyn) + : TProtobufSerialize(info) + , Dynamic_(dyn) + { + Y_ASSERT(Dynamic_ != nullptr); + } - TMaybe<TString> Serialize(const Message& proto) const override { - return Dynamic_->Serialize(proto); - } + TMaybe<TString> Serialize(const Message& proto) const override { + return Dynamic_->Serialize(proto); + } - TAutoPtr<Message> MakeProto() const override { - return Dynamic_->MakeProto(); - } - private: - TDynamicInfoRef Dynamic_; - }; + TAutoPtr<Message> MakeProto() const override { + return Dynamic_->MakeProto(); + } - class TDynamicProtoValueSafe: public TDynamicProtoValue { - public: - TDynamicProtoValueSafe(const TProtoInfo& info, TDynamicInfoRef dyn) - : TDynamicProtoValue(info, dyn) {} +private: + TDynamicInfoRef Dynamic_; +}; - TAutoPtr<Message> Parse(const TStringBuf& data) const override { - try { - return TDynamicProtoValue::Parse(data); - } catch (const std::exception& e) { - return nullptr; - } - } - }; +class TDynamicProtoValueSafe: public TDynamicProtoValue { +public: + TDynamicProtoValueSafe(const TProtoInfo& info, TDynamicInfoRef dyn) + : TDynamicProtoValue(info, dyn) + { + } - class TProtobufModule: public IUdfModule { - public: - TStringRef Name() const { - return TStringRef("Protobuf"); + TAutoPtr<Message> Parse(const TStringBuf& data) const override { + try { + return TDynamicProtoValue::Parse(data); + } catch (const std::exception& e) { + return nullptr; } + } +}; - void CleanupOnTerminate() const final { - } +class TProtobufModule: public IUdfModule { +public: + TStringRef Name() const { + return TStringRef("Protobuf"); + } - void GetAllFunctions(IFunctionsSink& sink) const final { - sink.Add(TStringRef::Of("Parse"))->SetTypeAwareness(); - sink.Add(TStringRef::Of("TryParse"))->SetTypeAwareness(); - sink.Add(TStringRef::Of("Serialize"))->SetTypeAwareness(); - } + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(TStringRef::Of("Parse"))->SetTypeAwareness(); + sink.Add(TStringRef::Of("TryParse"))->SetTypeAwareness(); + sink.Add(TStringRef::Of("Serialize"))->SetTypeAwareness(); + } - void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final { - Y_UNUSED(userType); + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { + Y_UNUSED(userType); - try { - auto dyn = TDynamicInfo::Create(TStringBuf(typeConfig.Data(), typeConfig.Size())); + try { + auto dyn = TDynamicInfo::Create(TStringBuf(typeConfig.Data(), typeConfig.Size())); - TProtoInfo typeInfo; - ProtoTypeBuild(dyn->Descriptor(), - dyn->GetEnumFormat(), - dyn->GetRecursionTraits(), - dyn->GetOptionalLists(), - builder, &typeInfo, - EProtoStringYqlType::Bytes, - dyn->GetSyntaxAware(), - false, - dyn->GetYtMode()); + TProtoInfo typeInfo; + ProtoTypeBuild(dyn->Descriptor(), + dyn->GetEnumFormat(), + dyn->GetRecursionTraits(), + dyn->GetOptionalLists(), + builder, &typeInfo, + EProtoStringYqlType::Bytes, + dyn->GetSyntaxAware(), + false, + dyn->GetYtMode()); - auto stringType = builder.SimpleType<char*>(); - auto structType = typeInfo.StructType; - auto optionalStructType = builder.Optional()->Item(structType).Build(); + auto stringType = builder.SimpleType<char*>(); + auto structType = typeInfo.StructType; + auto optionalStructType = builder.Optional()->Item(structType).Build(); + + if (TStringRef::Of("Serialize") == name) { + // function signature: + // String Serialize(Protobuf value) + builder.Returns(stringType) + .Args() + ->Add(structType) + .Flags(ICallablePayload::TArgumentFlags::AutoMap) + .Done(); + if ((flags & TFlags::TypesOnly) == 0) { + builder.Implementation(new TDynamicProtoSerialize(typeInfo, dyn)); + } + } else { + // function signature: + // Protobuf Parse(String value) + builder.Returns((TStringRef::Of("TryParse") == name) ? optionalStructType : structType) + .Args() + ->Add(stringType) + .Flags(ICallablePayload::TArgumentFlags::AutoMap) + .Done(); - if (TStringRef::Of("Serialize") == name) { - // function signature: - // String Serialize(Protobuf value) - builder.Returns(stringType) - .Args() - ->Add(structType) - .Flags(ICallablePayload::TArgumentFlags::AutoMap) - .Done(); + if (TStringRef::Of("Parse") == name) { if ((flags & TFlags::TypesOnly) == 0) { - builder.Implementation(new TDynamicProtoSerialize(typeInfo, dyn)); + builder.Implementation(new TDynamicProtoValue(typeInfo, dyn)); } - } else { - // function signature: - // Protobuf Parse(String value) - builder.Returns((TStringRef::Of("TryParse") == name) ? optionalStructType : structType) - .Args() - ->Add(stringType) - .Flags(ICallablePayload::TArgumentFlags::AutoMap) - .Done(); - - if (TStringRef::Of("Parse") == name) { - if ((flags & TFlags::TypesOnly) == 0) { - builder.Implementation(new TDynamicProtoValue(typeInfo, dyn)); - } - } else if (TStringRef::Of("TryParse") == name) { - if ((flags & TFlags::TypesOnly) == 0) { - builder.Implementation(new TDynamicProtoValueSafe(typeInfo, dyn)); - } + } else if (TStringRef::Of("TryParse") == name) { + if ((flags & TFlags::TypesOnly) == 0) { + builder.Implementation(new TDynamicProtoValueSafe(typeInfo, dyn)); } } - - } catch (const std::exception& e) { - builder.SetError(CurrentExceptionMessage()); } + + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); } - }; + } +}; -} +} // namespace REGISTER_MODULES(TProtobufModule); diff --git a/yql/essentials/udfs/common/protobuf/ya.make b/yql/essentials/udfs/common/protobuf/ya.make index 714ad77137f..007a5266671 100644 --- a/yql/essentials/udfs/common/protobuf/ya.make +++ b/yql/essentials/udfs/common/protobuf/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( protobuf_udf.cpp ) diff --git a/yql/essentials/udfs/common/python/bindings/py27_backports.c b/yql/essentials/udfs/common/python/bindings/py27_backports.c index cf21a97cef0..7359bcce4be 100644 --- a/yql/essentials/udfs/common/python/bindings/py27_backports.c +++ b/yql/essentials/udfs/common/python/bindings/py27_backports.c @@ -1,22 +1,21 @@ #include "py27_backports.h" - // Provide implementations from python 2.7.15 as backports -int -_PySlice_Unpack(PyObject *_r, - Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step) +int _PySlice_Unpack(PyObject* _r, + Py_ssize_t* start, Py_ssize_t* stop, Py_ssize_t* step) { - PySliceObject *r = (PySliceObject *)_r; + PySliceObject* r = (PySliceObject*)_r; /* this is harder to get right than you might think */ assert(PY_SSIZE_T_MIN + 1 <= -PY_SSIZE_T_MAX); if (r->step == Py_None) { *step = 1; - } - else { - if (!_PyEval_SliceIndex(r->step, step)) return -1; + } else { + if (!_PyEval_SliceIndex(r->step, step)) { + return -1; + } if (*step == 0) { PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); @@ -27,22 +26,25 @@ _PySlice_Unpack(PyObject *_r, * guards against later undefined behaviour resulting from code that * does "step = -step" as part of a slice reversal. */ - if (*step < -PY_SSIZE_T_MAX) + if (*step < -PY_SSIZE_T_MAX) { *step = -PY_SSIZE_T_MAX; + } } if (r->start == Py_None) { *start = *step < 0 ? PY_SSIZE_T_MAX : 0; - } - else { - if (!_PyEval_SliceIndex(r->start, start)) return -1; + } else { + if (!_PyEval_SliceIndex(r->start, start)) { + return -1; + } } if (r->stop == Py_None) { *stop = *step < 0 ? PY_SSIZE_T_MIN : PY_SSIZE_T_MAX; - } - else { - if (!_PyEval_SliceIndex(r->stop, stop)) return -1; + } else { + if (!_PyEval_SliceIndex(r->stop, stop)) { + return -1; + } } return 0; @@ -50,7 +52,7 @@ _PySlice_Unpack(PyObject *_r, Py_ssize_t _PySlice_AdjustIndices(Py_ssize_t length, - Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t step) + Py_ssize_t* start, Py_ssize_t* stop, Py_ssize_t step) { /* this is harder to get right than you might think */ @@ -62,8 +64,7 @@ _PySlice_AdjustIndices(Py_ssize_t length, if (*start < 0) { *start = (step < 0) ? -1 : 0; } - } - else if (*start >= length) { + } else if (*start >= length) { *start = (step < 0) ? length - 1 : length; } @@ -72,8 +73,7 @@ _PySlice_AdjustIndices(Py_ssize_t length, if (*stop < 0) { *stop = (step < 0) ? -1 : 0; } - } - else if (*stop >= length) { + } else if (*stop >= length) { *stop = (step < 0) ? length - 1 : length; } @@ -81,8 +81,7 @@ _PySlice_AdjustIndices(Py_ssize_t length, if (*stop < *start) { return (*start - *stop - 1) / (-step) + 1; } - } - else { + } else { if (*start < *stop) { return (*stop - *start - 1) / step + 1; } diff --git a/yql/essentials/udfs/common/python/bindings/py27_backports.h b/yql/essentials/udfs/common/python/bindings/py27_backports.h index 766af6a76fa..f39b0183ba4 100644 --- a/yql/essentials/udfs/common/python/bindings/py27_backports.h +++ b/yql/essentials/udfs/common/python/bindings/py27_backports.h @@ -6,17 +6,17 @@ extern "C" { #endif -// Declare functions which are to be backported -// (see details about need for backports in ya.make) + // Declare functions which are to be backported + // (see details about need for backports in ya.make) -int _PySlice_Unpack(PyObject *slice, - Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step); + int _PySlice_Unpack(PyObject* slice, + Py_ssize_t* start, Py_ssize_t* stop, Py_ssize_t* step); -Py_ssize_t _PySlice_AdjustIndices(Py_ssize_t length, - Py_ssize_t *start, Py_ssize_t *stop, - Py_ssize_t step); + Py_ssize_t _PySlice_AdjustIndices(Py_ssize_t length, + Py_ssize_t* start, Py_ssize_t* stop, + Py_ssize_t step); -// Declare py23 compatible names + // Declare py23 compatible names #define PySlice_Unpack _PySlice_Unpack #define PySlice_AdjustIndices _PySlice_AdjustIndices diff --git a/yql/essentials/udfs/common/python/bindings/py_callable.cpp b/yql/essentials/udfs/common/python/bindings/py_callable.cpp index e9b25606ed0..c951e0dce8f 100644 --- a/yql/essentials/udfs/common/python/bindings/py_callable.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_callable.cpp @@ -22,8 +22,7 @@ namespace { ////////////////////////////////////////////////////////////////////////////// // TPyCallableObject ////////////////////////////////////////////////////////////////////////////// -struct TPyCallableObject -{ +struct TPyCallableObject { PyObject_HEAD; TPyCastContext::TPtr CastCtx; const NUdf::TType* Type; @@ -34,7 +33,8 @@ struct TPyCallableObject : CastCtx(castCtx) , Type(type) , Inspector(*castCtx->PyCtx->TypeInfoHelper, type) - {} + { + } }; inline TPyCallableObject* CastToCallable(PyObject* o) @@ -53,7 +53,7 @@ PyObject* CallableRepr(PyObject*) return PyRepr("<yql.TCallable>").Release(); } -PyObject* CallableCall(PyObject *self, PyObject *args, PyObject *kwargs) +PyObject* CallableCall(PyObject* self, PyObject* args, PyObject* kwargs) { Y_UNUSED(kwargs); @@ -74,88 +74,90 @@ PyObject* CallableCall(PyObject *self, PyObject *args, PyObject *kwargs) } return ToPyObject(callable->CastCtx, inspector.GetReturnType(), result).Release(); - } PY_CATCH(nullptr) + } + PY_CATCH(nullptr) } -} +} // namespace PyTypeObject PyCallableType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TCallable"), - INIT_MEMBER(tp_basicsize , sizeof(TPyCallableObject)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , CallableDealloc), + // clang-format off + INIT_MEMBER(tp_name, "yql.TCallable"), + // clang-format on + INIT_MEMBER(tp_basicsize, sizeof(TPyCallableObject)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, CallableDealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , CallableRepr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , CallableCall), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , 0), - INIT_MEMBER(tp_doc , "yql.TCallable object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , nullptr), - INIT_MEMBER(tp_iternext , nullptr), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, CallableRepr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, CallableCall), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, 0), + INIT_MEMBER(tp_doc, "yql.TCallable object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, nullptr), + INIT_MEMBER(tp_iternext, nullptr), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; ////////////////////////////////////////////////////////////////////////////// // TPyCallable ////////////////////////////////////////////////////////////////////////////// -class TPyCallable: public NUdf::TBoxedValue -{ +class TPyCallable: public NUdf::TBoxedValue { public: TPyCallable( - PyObject* function, - const NUdf::TType* functionType, - const TPyCastContext::TPtr& castCtx) + PyObject* function, + const NUdf::TType* functionType, + const TPyCastContext::TPtr& castCtx) : Function_(function, TPyObjectPtr::ADD_REF) , FunctionType_(functionType) , CastCtx_(castCtx) @@ -179,16 +181,17 @@ public: private: NUdf::TUnboxedValue Run( - const NUdf::IValueBuilder*, - const NUdf::TUnboxedValuePod* args) const final - { + const NUdf::IValueBuilder*, + const NUdf::TUnboxedValuePod* args) const final { TPyGilLocker lock; try { TPyObjectPtr pyArgs = ToPyArgs(CastCtx_, FunctionType_, args, Inspector_); TPyObjectPtr resultObj = - PyObject_CallObject(Function_.Get(), pyArgs.Get()); + PyObject_CallObject(Function_.Get(), pyArgs.Get()); if (!resultObj) { - UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).c_str()); + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" + << GetLastErrorAsString()) + .c_str()); } auto returnType = Inspector_.GetReturnType(); @@ -198,7 +201,9 @@ private: return FromPyObject(CastCtx_, returnType, resultObj.Get()); } catch (const yexception& e) { - UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to cast arguments or result\n" << e.what()).c_str()); + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to cast arguments or result\n" + << e.what()) + .c_str()); } } @@ -209,11 +214,10 @@ private: NUdf::TCallableTypeInspector Inspector_; }; - TPyObjectPtr ToPyCallable( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) { TPyCallableObject* callable = new TPyCallableObject(castCtx, type); PyObject_INIT(callable, &PyCallableType); @@ -224,9 +228,9 @@ TPyObjectPtr ToPyCallable( } NUdf::TUnboxedValue FromPyCallable( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - PyObject* value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + PyObject* value) { return NUdf::TUnboxedValuePod(new TPyCallable(value, type, castCtx)); } @@ -244,13 +248,14 @@ TMaybe<TPyObjectPtr> GetOptionalAttribute(PyObject* value, const char* attrName) } } - -struct TPySecureParam -{ +struct TPySecureParam { PyObject_HEAD; TPyCastContext::TPtr CastCtx; - TPySecureParam(const TPyCastContext::TPtr& castCtx) : CastCtx(castCtx) {} + TPySecureParam(const TPyCastContext::TPtr& castCtx) + : CastCtx(castCtx) + { + } }; inline TPySecureParam* CastToSecureParam(PyObject* o) @@ -273,7 +278,9 @@ PyObject* SecureParamCall(PyObject* self, PyObject* args, PyObject* kwargs) Y_UNUSED(kwargs); struct PyBufDeleter { - void operator() (Py_buffer* view) { PyBuffer_Release(view); } + void operator()(Py_buffer* view) { + PyBuffer_Release(view); + } }; Py_buffer input; if (!PyArg_ParseTuple(args, "s*", &input)) { @@ -287,73 +294,76 @@ PyObject* SecureParamCall(PyObject* self, PyObject* args, PyObject* kwargs) throw yexception() << "Cannot get secure parameter for key: " << key; } return PyRepr(TStringBuf(key.Data(), key.Size())).Release(); - } PY_CATCH(nullptr) + } + PY_CATCH(nullptr) } static PyTypeObject PySecureParamType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TSecureParam"), - INIT_MEMBER(tp_basicsize , sizeof(TPySecureParam)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , SecureParamDealloc), + // clang-format off + INIT_MEMBER(tp_name, "yql.TSecureParam"), + // clang-format on + INIT_MEMBER(tp_basicsize, sizeof(TPySecureParam)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, SecureParamDealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , SecureParamRepr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , SecureParamCall), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , 0), - INIT_MEMBER(tp_doc , "yql.TSecureParam object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , nullptr), - INIT_MEMBER(tp_iternext , nullptr), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, SecureParamRepr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, SecureParamCall), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, 0), + INIT_MEMBER(tp_doc, "yql.TSecureParam object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, nullptr), + INIT_MEMBER(tp_iternext, nullptr), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; @@ -364,55 +374,60 @@ TPyObjectPtr ToPySecureParam(const TPyCastContext::TPtr& castCtx) return reinterpret_cast<PyObject*>(ret); } - void SetupCallableSettings(const TPyCastContext::TPtr& castCtx, PyObject* value) { - if (const auto lazyInput = GetOptionalAttribute(value, "_yql_lazy_input")) try { - castCtx->LazyInputObjects = PyCast<bool>(lazyInput->Get()); - } catch (const yexception& e) { - throw yexception() << "Cannot parse attribute '_yql_lazy_input', error: " << e.what(); + if (const auto lazyInput = GetOptionalAttribute(value, "_yql_lazy_input")) { + try { + castCtx->LazyInputObjects = PyCast<bool>(lazyInput->Get()); + } catch (const yexception& e) { + throw yexception() << "Cannot parse attribute '_yql_lazy_input', error: " << e.what(); + } } - if (const auto convertYson = GetOptionalAttribute(value, "_yql_convert_yson")) try { - Py_ssize_t itemsCount = PyTuple_GET_SIZE(convertYson->Get()); - if (itemsCount != 2) { - throw yexception() << "Expected tuple of 2 callables"; - } + if (const auto convertYson = GetOptionalAttribute(value, "_yql_convert_yson")) { + try { + Py_ssize_t itemsCount = PyTuple_GET_SIZE(convertYson->Get()); + if (itemsCount != 2) { + throw yexception() << "Expected tuple of 2 callables"; + } - castCtx->YsonConverterIn.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 0)); - castCtx->YsonConverterOut.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 1)); - if (!PyCallable_Check(castCtx->YsonConverterIn.Get()) || !PyCallable_Check(castCtx->YsonConverterOut.Get())) { - throw yexception() << "Expected tuple of 2 callables"; + castCtx->YsonConverterIn.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 0)); + castCtx->YsonConverterOut.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 1)); + if (!PyCallable_Check(castCtx->YsonConverterIn.Get()) || !PyCallable_Check(castCtx->YsonConverterOut.Get())) { + throw yexception() << "Expected tuple of 2 callables"; + } + } catch (const yexception& e) { + throw yexception() << "Cannot parse attribute '_yql_convert_yson', error: " << e.what(); } - } catch (const yexception& e) { - throw yexception() << "Cannot parse attribute '_yql_convert_yson', error: " << e.what(); } - if (const auto bytesDecodeMode = GetOptionalAttribute(value, "_yql_bytes_decode_mode")) try { - PyObject* bytesValue = nullptr; - if (PyBytes_Check(bytesDecodeMode->Get())) { - bytesValue = PyObject_Bytes(bytesDecodeMode->Get()); - } else if (PyUnicode_Check(bytesDecodeMode->Get())) { - bytesValue = PyUnicode_AsUTF8String(bytesDecodeMode->Get()); - } else { - throw yexception() << "Expected bytes or unicode"; - } - if (!bytesValue) { - PyErr_Clear(); - throw yexception() << "Failed to convert to bytes"; - } + if (const auto bytesDecodeMode = GetOptionalAttribute(value, "_yql_bytes_decode_mode")) { + try { + PyObject* bytesValue = nullptr; + if (PyBytes_Check(bytesDecodeMode->Get())) { + bytesValue = PyObject_Bytes(bytesDecodeMode->Get()); + } else if (PyUnicode_Check(bytesDecodeMode->Get())) { + bytesValue = PyUnicode_AsUTF8String(bytesDecodeMode->Get()); + } else { + throw yexception() << "Expected bytes or unicode"; + } + if (!bytesValue) { + PyErr_Clear(); + throw yexception() << "Failed to convert to bytes"; + } - TStringBuf view(PyBytes_AS_STRING(bytesValue)); - if (view == "never") { - castCtx->BytesDecodeMode = EBytesDecodeMode::Never; - } else if (view == "strict") { - castCtx->BytesDecodeMode = EBytesDecodeMode::Strict; - } else { + TStringBuf view(PyBytes_AS_STRING(bytesValue)); + if (view == "never") { + castCtx->BytesDecodeMode = EBytesDecodeMode::Never; + } else if (view == "strict") { + castCtx->BytesDecodeMode = EBytesDecodeMode::Strict; + } else { + Py_DECREF(bytesValue); + throw yexception() << "Expected values 'never' or 'strict'"; + } Py_DECREF(bytesValue); - throw yexception() << "Expected values 'never' or 'strict'"; + } catch (const yexception& e) { + throw yexception() << "Cannot parse attribute '_yql_bytes_decode_mode', error: " << e.what(); } - Py_DECREF(bytesValue); - } catch (const yexception& e) { - throw yexception() << "Cannot parse attribute '_yql_bytes_decode_mode', error: " << e.what(); } if (PyObject_SetAttrString(value, "_yql_secure_param", ToPySecureParam(castCtx).Get()) != 0) { diff --git a/yql/essentials/udfs/common/python/bindings/py_callable.h b/yql/essentials/udfs/common/python/bindings/py_callable.h index 4ce79e1d7f4..2c2f8bbed2a 100644 --- a/yql/essentials/udfs/common/python/bindings/py_callable.h +++ b/yql/essentials/udfs/common/python/bindings/py_callable.h @@ -8,15 +8,15 @@ namespace NPython { extern PyTypeObject PyCallableType; TPyObjectPtr ToPyCallable( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyCallable( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - PyObject* value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + PyObject* value); void SetupCallableSettings(const TPyCastContext::TPtr& castCtx, PyObject* value); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp index 36cc13a1da4..c806d401b39 100644 --- a/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp @@ -2,86 +2,85 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyCallableTest) { +struct TTestCallable: public NUdf::TBoxedValue { + NUdf::TUnboxedValue Run( + const NUdf::IValueBuilder* valueBuilder, + const NUdf::TUnboxedValuePod* args) const override { + Y_UNUSED(valueBuilder); + return NUdf::TUnboxedValuePod(args[0].Get<ui32>() + 42); + } +}; + +Y_UNIT_TEST(FromPyFunction) { + TPythonTestEngine engine; + const NUdf::IValueBuilder* vb = &engine.GetValueBuilder(); + + engine.ToMiniKQL<char* (*)(char*, ui32)>( + "def Test():\n" + " def test(str, count):\n" + " return str * count\n" + " return test", + [vb](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + NUdf::TUnboxedValue args[2]; + args[0] = vb->NewString("j"); + args[1] = NUdf::TUnboxedValuePod((ui32)5); + auto result = value.Run(vb, args); + + UNIT_ASSERT(result); + UNIT_ASSERT(5 == result.AsStringRef().Size()); + UNIT_ASSERT_STRINGS_EQUAL(result.AsStringRef(), "jjjjj"); + }); +} + +Y_UNIT_TEST(ToPython) { + TPythonTestEngine engine; + engine.ToPython<i32 (*)(i32)>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new TTestCallable); + }, + "def Test(value):\n" + " assert type(value).__name__ == 'TCallable'\n" + " assert value.__call__ != None\n" + " assert value(-2) == 40\n" + " assert value(-1) == 41\n" + " assert value(0) == 42\n" + " assert value(1) == 43\n" + " assert value(2) == 44\n"); +} + +Y_UNIT_TEST(ToPythonAndBack) { struct TTestCallable: public NUdf::TBoxedValue { NUdf::TUnboxedValue Run( - const NUdf::IValueBuilder* valueBuilder, - const NUdf::TUnboxedValuePod* args) const override - { + const NUdf::IValueBuilder* valueBuilder, + const NUdf::TUnboxedValuePod* args) const override { Y_UNUSED(valueBuilder); return NUdf::TUnboxedValuePod(args[0].Get<ui32>() + 42); } }; - Y_UNIT_TEST(FromPyFunction) { - TPythonTestEngine engine; - const NUdf::IValueBuilder* vb = &engine.GetValueBuilder(); + TPythonTestEngine engine; + engine.ToPythonAndBack<i32 (*)(i32)>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new TTestCallable); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + NUdf::TUnboxedValue arg = NUdf::TUnboxedValuePod((ui32)5); + const auto result = value.Run(nullptr, &arg); - engine.ToMiniKQL<char* (*)(char*, ui32)>( - "def Test():\n" - " def test(str, count):\n" - " return str * count\n" - " return test", - [vb](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - NUdf::TUnboxedValue args[2]; - args[0] = vb->NewString("j"); - args[1] = NUdf::TUnboxedValuePod((ui32) 5); - auto result = value.Run(vb, args); - - UNIT_ASSERT(result); - UNIT_ASSERT(5 == result.AsStringRef().Size()); - UNIT_ASSERT_STRINGS_EQUAL(result.AsStringRef(), "jjjjj"); - }); - } - - Y_UNIT_TEST(ToPython) { - TPythonTestEngine engine; - engine.ToPython<i32 (*)(i32)>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new TTestCallable); - }, - "def Test(value):\n" - " assert type(value).__name__ == 'TCallable'\n" - " assert value.__call__ != None\n" - " assert value(-2) == 40\n" - " assert value(-1) == 41\n" - " assert value(0) == 42\n" - " assert value(1) == 43\n" - " assert value(2) == 44\n"); - } - - Y_UNIT_TEST(ToPythonAndBack) { - struct TTestCallable: public NUdf::TBoxedValue { - NUdf::TUnboxedValue Run( - const NUdf::IValueBuilder* valueBuilder, - const NUdf::TUnboxedValuePod* args) const override - { - Y_UNUSED(valueBuilder); - return NUdf::TUnboxedValuePod(args[0].Get<ui32>() + 42); - } - }; - - TPythonTestEngine engine; - engine.ToPythonAndBack<i32 (*)(i32)>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new TTestCallable); - }, - "def Test(value): return value", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - NUdf::TUnboxedValue arg = NUdf::TUnboxedValuePod((ui32) 5); - const auto result = value.Run(nullptr, &arg); - - UNIT_ASSERT(result); - UNIT_ASSERT_VALUES_EQUAL(47, result.Get<ui32>()); - }); - } + UNIT_ASSERT(result); + UNIT_ASSERT_VALUES_EQUAL(47, result.Get<ui32>()); + }); } +} // Y_UNIT_TEST_SUITE(TPyCallableTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.cpp b/yql/essentials/udfs/common/python/bindings/py_cast.cpp index 3048f803a72..42237428bb3 100644 --- a/yql/essentials/udfs/common/python/bindings/py_cast.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_cast.cpp @@ -27,253 +27,252 @@ #include <util/string/builder.h> #ifdef HAVE_LONG_LONG -# define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongLongMask // NOLINT(readability-identifier-naming) -# define YQL_PyLong_Asi64 PyLong_AsLongLong -# define YQL_PyLong_Asui64 PyLong_AsUnsignedLongLong + #define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongLongMask // NOLINT(readability-identifier-naming) + #define YQL_PyLong_Asi64 PyLong_AsLongLong + #define YQL_PyLong_Asui64 PyLong_AsUnsignedLongLong #else -# define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongMask // NOLINT(readability-identifier-naming) -# define YQL_PyLong_Asi64 PyLong_AsLong -# define YQL_PyLong_Asui64 PyLong_AsUnsignedLong + #define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongMask // NOLINT(readability-identifier-naming) + #define YQL_PyLong_Asi64 PyLong_AsLong + #define YQL_PyLong_Asui64 PyLong_AsUnsignedLong #endif -#define TO_PYTHON(Format, Type) \ - template <> \ +#define TO_PYTHON(Format, Type) \ + template <> \ ::NPython::TPyObjectPtr PyCast<Type>(Type value) { \ - return Py_BuildValue(Format, value); \ + return Py_BuildValue(Format, value); \ } -#define TO_PYTHON_BYTES(Type) \ - template <> \ - ::NPython::TPyObjectPtr PyCast<Type>(const Type& val) { \ - TStringBuf value = val; \ - if (value.data() == nullptr) \ - Py_RETURN_NONE; \ +#define TO_PYTHON_BYTES(Type) \ + template <> \ + ::NPython::TPyObjectPtr PyCast<Type>(const Type& val) { \ + TStringBuf value = val; \ + if (value.data() == nullptr) \ + Py_RETURN_NONE; \ const Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \ - return PyBytes_FromStringAndSize(value.data(), size); \ + return PyBytes_FromStringAndSize(value.data(), size); \ } -#define TO_PYTHON_UNICODE(Type) \ - template <> \ +#define TO_PYTHON_UNICODE(Type) \ + template <> \ ::NPython::TPyObjectPtr ToPyUnicode<Type>(const Type& val) { \ - TStringBuf value = val; \ - if (value.data() == nullptr) \ - Py_RETURN_NONE; \ + TStringBuf value = val; \ + if (value.data() == nullptr) \ + Py_RETURN_NONE; \ Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \ - return PyUnicode_FromStringAndSize(value.data(), size); \ + return PyUnicode_FromStringAndSize(value.data(), size); \ } -#define PY_ENSURE_TYPE(Type, Value, Message) \ - do { \ - if (!Py##Type##_Check(Value)) { \ +#define PY_ENSURE_TYPE(Type, Value, Message) \ + do { \ + if (!Py##Type##_Check(Value)) { \ throw yexception() << Message << " " #Type "; Object repr: " \ - << PyObjectRepr(Value); \ - } \ + << PyObjectRepr(Value); \ + } \ } while (0) -#define FROM_PYTHON_FLOAT(Type) \ - template <> \ - Type PyCast<Type>(PyObject* value) { \ - double result = PyFloat_AsDouble(value); \ +#define FROM_PYTHON_FLOAT(Type) \ + template <> \ + Type PyCast<Type>(PyObject * value) { \ + double result = PyFloat_AsDouble(value); \ if (result == -1.0 && PyErr_Occurred()) { \ - PyErr_Clear(); \ - ThrowCastException(value, "Float"); \ - } \ - return static_cast<Type>(result); \ + PyErr_Clear(); \ + ThrowCastException(value, "Float"); \ + } \ + return static_cast<Type>(result); \ } -#define FROM_PYTHON_LONG(Type, BigType) \ - template <> \ - Type PyCast<Type>(PyObject* value) { \ - if (PyLong_Check(value)) { \ - auto result = YQL_PyLong_As##BigType(value); \ - if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ - PyErr_Clear(); \ - ThrowCastException(value, "Long"); \ - } \ - if (result < Min<Type>() || result > Max<Type>()) { \ +#define FROM_PYTHON_LONG(Type, BigType) \ + template <> \ + Type PyCast<Type>(PyObject * value) { \ + if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ + if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ + PyErr_Clear(); \ + ThrowCastException(value, "Long"); \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is out of range for " << #Type; \ - } \ - return static_cast<Type>(result); \ - } \ - ThrowCastTypeException(value, "Long"); \ + << " is out of range for " << #Type; \ + } \ + return static_cast<Type>(result); \ + } \ + ThrowCastTypeException(value, "Long"); \ } -#define FROM_PYTHON_INT_OR_LONG(Type, BigType) \ - template <> \ - Type PyCast<Type>(PyObject* value) { \ - if (PyInt_Check(value)) { \ - long result = PyInt_AsLong(value); \ - if (result == -1L && PyErr_Occurred()) { \ - PyErr_Clear(); \ - ThrowCastException(value, "Long"); \ - } \ - if ( \ - static_cast<i64>(Min<long>()) < static_cast<i64>(Min<Type>()) && result < static_cast<long>(Min<Type>()) || \ - static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()) \ - ) { \ - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is out of range for " << #Type; \ - } \ - return static_cast<Type>(result); \ - } else if (PyLong_Check(value)) { \ - auto result = YQL_PyLong_As##BigType(value); \ - if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ - PyErr_Clear(); \ - ThrowCastException(value, "Long"); \ - } \ - if (result < Min<Type>() || result > Max<Type>()) { \ - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is out of range for " << #Type; \ - } \ - return static_cast<Type>(result); \ - } \ - ThrowCastTypeException(value, "Long"); \ +#define FROM_PYTHON_INT_OR_LONG(Type, BigType) \ + template <> \ + Type PyCast<Type>(PyObject * value) { \ + if (PyInt_Check(value)) { \ + long result = PyInt_AsLong(value); \ + if (result == -1L && PyErr_Occurred()) { \ + PyErr_Clear(); \ + ThrowCastException(value, "Long"); \ + } \ + if ( \ + static_cast<i64>(Min<long>()) < static_cast<i64>(Min<Type>()) && result < static_cast<long>(Min<Type>()) || \ + static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>())) { \ + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for " << #Type; \ + } \ + return static_cast<Type>(result); \ + } else if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ + if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ + PyErr_Clear(); \ + ThrowCastException(value, "Long"); \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for " << #Type; \ + } \ + return static_cast<Type>(result); \ + } \ + ThrowCastTypeException(value, "Long"); \ } -#define FROM_PYTHON_BYTES_OR_UTF(Type) \ - template <> \ - Type PyCast<Type>(PyObject* value) { \ - if (PyUnicode_Check(value)) { \ - Py_ssize_t size = 0U; \ - const auto str = PyUnicode_AsUTF8AndSize(value, &size); \ - if (!str || size < 0) { \ - ThrowCastTypeException(value, "String"); \ - } \ - return Type(str, size_t(size)); \ - } else if (PyBytes_Check(value)) { \ - Py_ssize_t size = 0U; \ - char *str = nullptr; \ +#define FROM_PYTHON_BYTES_OR_UTF(Type) \ + template <> \ + Type PyCast<Type>(PyObject * value) { \ + if (PyUnicode_Check(value)) { \ + Py_ssize_t size = 0U; \ + const auto str = PyUnicode_AsUTF8AndSize(value, &size); \ + if (!str || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + return Type(str, size_t(size)); \ + } else if (PyBytes_Check(value)) { \ + Py_ssize_t size = 0U; \ + char* str = nullptr; \ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \ - if (rc == -1 || size < 0) { \ - ThrowCastTypeException(value, "String"); \ - } \ - return Type(str, size_t(size)); \ - } \ - ThrowCastTypeException(value, "String"); \ + if (rc == -1 || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + return Type(str, size_t(size)); \ + } \ + ThrowCastTypeException(value, "String"); \ } -#define FROM_PYTHON_BYTES(Type) \ - template <> \ - Type PyCast<Type>(PyObject* value) { \ - PY_ENSURE_TYPE(Bytes, value, "Expected"); \ - char* str = nullptr; \ - Py_ssize_t size = 0; \ +#define FROM_PYTHON_BYTES(Type) \ + template <> \ + Type PyCast<Type>(PyObject * value) { \ + PY_ENSURE_TYPE(Bytes, value, "Expected"); \ + char* str = nullptr; \ + Py_ssize_t size = 0; \ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \ - if (rc == -1 || size < 0) { \ - ThrowCastTypeException(value, "String"); \ - } \ - return Type(str, size_t(size)); \ + if (rc == -1 || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + return Type(str, size_t(size)); \ } -#define TRY_FROM_PYTHON_FLOAT(Type) \ - template <> \ - bool TryPyCast<Type>(PyObject* value, Type& result) { \ - double v = PyFloat_AsDouble(value); \ - if (v == -1.0 && PyErr_Occurred()) { \ - PyErr_Clear(); \ - return false; \ - } \ - result = static_cast<Type>(v); \ - return true; \ +#define TRY_FROM_PYTHON_FLOAT(Type) \ + template <> \ + bool TryPyCast<Type>(PyObject * value, Type & result) { \ + double v = PyFloat_AsDouble(value); \ + if (v == -1.0 && PyErr_Occurred()) { \ + PyErr_Clear(); \ + return false; \ + } \ + result = static_cast<Type>(v); \ + return true; \ } -#define TRY_FROM_PYTHON_LONG(Type, BigType) \ - template <> \ - bool TryPyCast<Type>(PyObject* value, Type& res) { \ - if (PyLong_Check(value)) { \ - auto result = YQL_PyLong_As##BigType(value); \ +#define TRY_FROM_PYTHON_LONG(Type, BigType) \ + template <> \ + bool TryPyCast<Type>(PyObject * value, Type & res) { \ + if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ - PyErr_Clear(); \ - return false; \ - } \ - if (result < Min<Type>() || result > Max<Type>()) { \ - return false; \ - } \ - res = static_cast<Type>(result); \ - return true; \ - } \ - return false; \ + PyErr_Clear(); \ + return false; \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ + return false; \ + } \ + res = static_cast<Type>(result); \ + return true; \ + } \ + return false; \ } -#define TRY_FROM_PYTHON_INT_OR_LONG(Type, BigType) \ - template <> \ - bool TryPyCast<Type>(PyObject* value, Type& res) { \ - if (PyInt_Check(value)) { \ - long result = PyInt_AsLong(value); \ - if (result == -1L && PyErr_Occurred()) { \ - PyErr_Clear(); \ - return false; \ - } \ - res = static_cast<Type>(result); \ +#define TRY_FROM_PYTHON_INT_OR_LONG(Type, BigType) \ + template <> \ + bool TryPyCast<Type>(PyObject * value, Type & res) { \ + if (PyInt_Check(value)) { \ + long result = PyInt_AsLong(value); \ + if (result == -1L && PyErr_Occurred()) { \ + PyErr_Clear(); \ + return false; \ + } \ + res = static_cast<Type>(result); \ if (result < static_cast<long>(Min<Type>()) || (static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()))) { \ - return false; \ - } \ - return true; \ - } else if (PyLong_Check(value)) { \ - auto result = YQL_PyLong_As##BigType(value); \ - if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ - PyErr_Clear(); \ - return false; \ - } \ - if (result < Min<Type>() || result > Max<Type>()) { \ - return false; \ - } \ - res = static_cast<Type>(result); \ - return true; \ - } \ - return false; \ + return false; \ + } \ + return true; \ + } else if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ + if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ + PyErr_Clear(); \ + return false; \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ + return false; \ + } \ + res = static_cast<Type>(result); \ + return true; \ + } \ + return false; \ } -#define TRY_FROM_PYTHON_BYTES_OR_UTF(Type) \ - template <> \ - bool TryPyCast(PyObject* value, Type& result) { \ - if (PyUnicode_Check(value)) { \ - Py_ssize_t size = 0U; \ - const auto str = PyUnicode_AsUTF8AndSize(value, &size); \ - if (!str || size < 0) { \ - return false; \ - } \ - result = Type(str, size_t(size)); \ - return true; \ - } else if (PyBytes_Check(value)) { \ - Py_ssize_t size = 0U; \ - char *str = nullptr; \ +#define TRY_FROM_PYTHON_BYTES_OR_UTF(Type) \ + template <> \ + bool TryPyCast(PyObject* value, Type& result) { \ + if (PyUnicode_Check(value)) { \ + Py_ssize_t size = 0U; \ + const auto str = PyUnicode_AsUTF8AndSize(value, &size); \ + if (!str || size < 0) { \ + return false; \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } else if (PyBytes_Check(value)) { \ + Py_ssize_t size = 0U; \ + char* str = nullptr; \ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \ - if (rc == -1 || size < 0) { \ - ThrowCastTypeException(value, "String"); \ - } \ - result = Type(str, size_t(size)); \ - return true; \ - } \ - return false; \ + if (rc == -1 || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } \ + return false; \ } -#define TRY_FROM_PYTHON_STR_OR_UTF(Type) \ - template <> \ - bool TryPyCast(PyObject* value, Type& result) { \ - if (PyUnicode_Check(value)) { \ - const TPyObjectPtr utf8(AsUtf8StringOrThrow(value)); \ - char* str = nullptr; \ - Py_ssize_t size = 0; \ +#define TRY_FROM_PYTHON_STR_OR_UTF(Type) \ + template <> \ + bool TryPyCast(PyObject* value, Type& result) { \ + if (PyUnicode_Check(value)) { \ + const TPyObjectPtr utf8(AsUtf8StringOrThrow(value)); \ + char* str = nullptr; \ + Py_ssize_t size = 0; \ int rc = PyBytes_AsStringAndSize(utf8.Get(), &str, &size); \ - if (rc == -1 || size < 0) { \ - return false; \ - } \ - result = Type(str, size_t(size)); \ - return true; \ - } else if (PyBytes_Check(value)) { \ - char* str = nullptr; \ - Py_ssize_t size = 0; \ - int rc = PyBytes_AsStringAndSize(value, &str, &size); \ - if (rc == -1 || size < 0) { \ - return false; \ - } \ - result = Type(str, size_t(size)); \ - return true; \ - } else { \ - return false; \ - } \ + if (rc == -1 || size < 0) { \ + return false; \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } else if (PyBytes_Check(value)) { \ + char* str = nullptr; \ + Py_ssize_t size = 0; \ + int rc = PyBytes_AsStringAndSize(value, &str, &size); \ + if (rc == -1 || size < 0) { \ + return false; \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } else { \ + return false; \ + } \ } namespace NPython { @@ -289,7 +288,8 @@ NPython::TPyObjectPtr AsUtf8StringOrThrow(PyObject* obj) { Y_DEFER { PyErr_Clear(); }; - throw yexception() << "Failed to convert the string to UTF-8 format. Original message is:\n" << GetLastErrorAsString() << "\n"; + throw yexception() << "Failed to convert the string to UTF-8 format. Original message is:\n" + << GetLastErrorAsString() << "\n"; } return NPython::TPyObjectPtr(utf8String); } @@ -306,7 +306,6 @@ inline void ThrowCastException(PyObject* value, TStringBuf toType) { << GetLastErrorAsString(); } - template <> bool TryPyCast<bool>(PyObject* value, bool& result) { @@ -416,19 +415,19 @@ TO_PYTHON_UNICODE(NUdf::TStringRef) template <typename T> NUdf::TUnboxedValuePod FromPyTz(PyObject* value, T limit, TStringBuf typeName, const TPyCastContext::TPtr& ctx) { PY_ENSURE(PyTuple_Check(value), - "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name); + "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name); Py_ssize_t tupleSize = PyTuple_GET_SIZE(value); PY_ENSURE(tupleSize == 2, - "Expected to get Tuple with 2 elements, but got " - << tupleSize << " elements"); + "Expected to get Tuple with 2 elements, but got " + << tupleSize << " elements"); PyObject* el0 = PyTuple_GET_ITEM(value, 0); PyObject* el1 = PyTuple_GET_ITEM(value, 1); auto num = PyCast<T>(el0); if (num >= limit) { - throw yexception() << "Python object " << PyObjectRepr(el0) \ - << " is out of range for " << typeName; + throw yexception() << "Python object " << PyObjectRepr(el0) + << " is out of range for " << typeName; } auto name = PyCast<NUdf::TStringRef>(el1); @@ -448,217 +447,248 @@ TO_PYTHON("d", double) namespace { TPyObjectPtr ToPyData(const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) + const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) { const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); const auto typeId = inspector.GetTypeId(); switch (typeId) { - case NUdf::TDataType<i8>::Id: return PyCast<i8>(value.Get<i8>()); - case NUdf::TDataType<ui8>::Id: return PyCast<ui8>(value.Get<ui8>()); - case NUdf::TDataType<i16>::Id: return PyCast<i16>(value.Get<i16>()); - case NUdf::TDataType<ui16>::Id: return PyCast<ui16>(value.Get<ui16>()); - case NUdf::TDataType<i32>::Id: return PyCast<i32>(value.Get<i32>()); - case NUdf::TDataType<ui32>::Id: return PyCast<ui32>(value.Get<ui32>()); - case NUdf::TDataType<i64>::Id: return PyCast<i64>(value.Get<i64>()); - case NUdf::TDataType<ui64>::Id: return PyCast<ui64>(value.Get<ui64>()); - case NUdf::TDataType<bool>::Id: return PyCast<bool>(value.Get<bool>()); - case NUdf::TDataType<float>::Id: return PyCast<float>(value.Get<float>()); - case NUdf::TDataType<double>::Id: return PyCast<double>(value.Get<double>()); - case NUdf::TDataType<NUdf::TDecimal>::Id: return ToPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale()); - case NUdf::TDataType<const char*>::Id: { - if (ctx->BytesDecodeMode == EBytesDecodeMode::Never) { - return PyCast<NUdf::TStringRef>(value.AsStringRef()); - } else { - auto pyObj = ToPyUnicode<NUdf::TStringRef>(value.AsStringRef()); - if (!pyObj) { - UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << - "Failed to convert to unicode with _yql_bytes_decode_mode='strict':\n" << - GetLastErrorAsString()).c_str() - ); + case NUdf::TDataType<i8>::Id: + return PyCast<i8>(value.Get<i8>()); + case NUdf::TDataType<ui8>::Id: + return PyCast<ui8>(value.Get<ui8>()); + case NUdf::TDataType<i16>::Id: + return PyCast<i16>(value.Get<i16>()); + case NUdf::TDataType<ui16>::Id: + return PyCast<ui16>(value.Get<ui16>()); + case NUdf::TDataType<i32>::Id: + return PyCast<i32>(value.Get<i32>()); + case NUdf::TDataType<ui32>::Id: + return PyCast<ui32>(value.Get<ui32>()); + case NUdf::TDataType<i64>::Id: + return PyCast<i64>(value.Get<i64>()); + case NUdf::TDataType<ui64>::Id: + return PyCast<ui64>(value.Get<ui64>()); + case NUdf::TDataType<bool>::Id: + return PyCast<bool>(value.Get<bool>()); + case NUdf::TDataType<float>::Id: + return PyCast<float>(value.Get<float>()); + case NUdf::TDataType<double>::Id: + return PyCast<double>(value.Get<double>()); + case NUdf::TDataType<NUdf::TDecimal>::Id: + return ToPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale()); + case NUdf::TDataType<const char*>::Id: { + if (ctx->BytesDecodeMode == EBytesDecodeMode::Never) { + return PyCast<NUdf::TStringRef>(value.AsStringRef()); + } else { + auto pyObj = ToPyUnicode<NUdf::TStringRef>(value.AsStringRef()); + if (!pyObj) { + UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to convert to unicode with _yql_bytes_decode_mode='strict':\n" + << GetLastErrorAsString()) + .c_str()); + } + return pyObj; } - return pyObj; } - } - case NUdf::TDataType<NUdf::TYson>::Id: { - auto pyObj = PyCast<NUdf::TStringRef>(value.AsStringRef()); - if (ctx->YsonConverterIn) { - TPyObjectPtr pyArgs(PyTuple_New(1)); - PyTuple_SET_ITEM(pyArgs.Get(), 0, pyObj.Release()); - pyObj = PyObject_CallObject(ctx->YsonConverterIn.Get(), pyArgs.Get()); - if (!pyObj) { - UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).c_str()); + case NUdf::TDataType<NUdf::TYson>::Id: { + auto pyObj = PyCast<NUdf::TStringRef>(value.AsStringRef()); + if (ctx->YsonConverterIn) { + TPyObjectPtr pyArgs(PyTuple_New(1)); + PyTuple_SET_ITEM(pyArgs.Get(), 0, pyObj.Release()); + pyObj = PyObject_CallObject(ctx->YsonConverterIn.Get(), pyArgs.Get()); + if (!pyObj) { + UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" + << GetLastErrorAsString()) + .c_str()); + } } - } - return pyObj; - } - case NUdf::TDataType<NUdf::TUuid>::Id: - return PyCast<NUdf::TStringRef>(value.AsStringRef()); - case NUdf::TDataType<NUdf::TJson>::Id: - case NUdf::TDataType<NUdf::TUtf8>::Id: - return ToPyUnicode<NUdf::TStringRef>(value.AsStringRef()); - case NUdf::TDataType<NUdf::TDate>::Id: return PyCast<ui16>(value.Get<ui16>()); - case NUdf::TDataType<NUdf::TDatetime>::Id: return PyCast<ui32>(value.Get<ui32>()); - case NUdf::TDataType<NUdf::TTimestamp>::Id: return PyCast<ui64>(value.Get<ui64>()); - case NUdf::TDataType<NUdf::TInterval>::Id: return PyCast<i64>(value.Get<i64>()); - case NUdf::TDataType<NUdf::TTzDate>::Id: { - TPyObjectPtr pyValue = PyCast<ui16>(value.Get<ui16>()); - auto tzId = value.GetTimezoneId(); - auto tzName = ctx->GetTimezoneName(tzId); - return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); - } - case NUdf::TDataType<NUdf::TTzDatetime>::Id: { - TPyObjectPtr pyValue = PyCast<ui32>(value.Get<ui32>()); - auto tzId = value.GetTimezoneId(); - auto tzName = ctx->GetTimezoneName(tzId); - return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); - } - case NUdf::TDataType<NUdf::TTzTimestamp>::Id: { - TPyObjectPtr pyValue = PyCast<ui64>(value.Get<ui64>()); - auto tzId = value.GetTimezoneId(); - auto tzName = ctx->GetTimezoneName(tzId); - return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); - } + return pyObj; + } + case NUdf::TDataType<NUdf::TUuid>::Id: + return PyCast<NUdf::TStringRef>(value.AsStringRef()); + case NUdf::TDataType<NUdf::TJson>::Id: + case NUdf::TDataType<NUdf::TUtf8>::Id: + return ToPyUnicode<NUdf::TStringRef>(value.AsStringRef()); + case NUdf::TDataType<NUdf::TDate>::Id: + return PyCast<ui16>(value.Get<ui16>()); + case NUdf::TDataType<NUdf::TDatetime>::Id: + return PyCast<ui32>(value.Get<ui32>()); + case NUdf::TDataType<NUdf::TTimestamp>::Id: + return PyCast<ui64>(value.Get<ui64>()); + case NUdf::TDataType<NUdf::TInterval>::Id: + return PyCast<i64>(value.Get<i64>()); + case NUdf::TDataType<NUdf::TTzDate>::Id: { + TPyObjectPtr pyValue = PyCast<ui16>(value.Get<ui16>()); + auto tzId = value.GetTimezoneId(); + auto tzName = ctx->GetTimezoneName(tzId); + return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); + } + case NUdf::TDataType<NUdf::TTzDatetime>::Id: { + TPyObjectPtr pyValue = PyCast<ui32>(value.Get<ui32>()); + auto tzId = value.GetTimezoneId(); + auto tzName = ctx->GetTimezoneName(tzId); + return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); + } + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: { + TPyObjectPtr pyValue = PyCast<ui64>(value.Get<ui64>()); + auto tzId = value.GetTimezoneId(); + auto tzName = ctx->GetTimezoneName(tzId); + return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); + } } throw yexception() - << "Unsupported type " << typeId; + << "Unsupported type " << typeId; } NUdf::TUnboxedValue FromPyData( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); const auto typeId = inspector.GetTypeId(); switch (typeId) { - case NUdf::TDataType<i8>::Id: return NUdf::TUnboxedValuePod(PyCast<i8>(value)); - case NUdf::TDataType<ui8>::Id: return NUdf::TUnboxedValuePod(PyCast<ui8>(value)); - case NUdf::TDataType<i16>::Id: return NUdf::TUnboxedValuePod(PyCast<i16>(value)); - case NUdf::TDataType<ui16>::Id: return NUdf::TUnboxedValuePod(PyCast<ui16>(value)); - case NUdf::TDataType<i32>::Id: return NUdf::TUnboxedValuePod(PyCast<i32>(value)); - case NUdf::TDataType<ui32>::Id: return NUdf::TUnboxedValuePod(PyCast<ui32>(value)); - case NUdf::TDataType<i64>::Id: return NUdf::TUnboxedValuePod(PyCast<i64>(value)); - case NUdf::TDataType<ui64>::Id: return NUdf::TUnboxedValuePod(PyCast<ui64>(value)); - case NUdf::TDataType<bool>::Id: return NUdf::TUnboxedValuePod(PyCast<bool>(value)); - case NUdf::TDataType<float>::Id: return NUdf::TUnboxedValuePod(PyCast<float>(value)); - case NUdf::TDataType<double>::Id: return NUdf::TUnboxedValuePod(PyCast<double>(value)); - case NUdf::TDataType<NUdf::TDecimal>::Id: return FromPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale()); - case NUdf::TDataType<NUdf::TYson>::Id: { - if (ctx->YsonConverterOut) { - TPyObjectPtr input(value, TPyObjectPtr::ADD_REF); - TPyObjectPtr pyArgs(PyTuple_New(1)); - // PyTuple_SET_ITEM steals reference, so pass ownership to it - PyTuple_SET_ITEM(pyArgs.Get(), 0, input.Release()); - input.ResetSteal(PyObject_CallObject(ctx->YsonConverterOut.Get(), pyArgs.Get())); - if (!input) { - UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).c_str()); + case NUdf::TDataType<i8>::Id: + return NUdf::TUnboxedValuePod(PyCast<i8>(value)); + case NUdf::TDataType<ui8>::Id: + return NUdf::TUnboxedValuePod(PyCast<ui8>(value)); + case NUdf::TDataType<i16>::Id: + return NUdf::TUnboxedValuePod(PyCast<i16>(value)); + case NUdf::TDataType<ui16>::Id: + return NUdf::TUnboxedValuePod(PyCast<ui16>(value)); + case NUdf::TDataType<i32>::Id: + return NUdf::TUnboxedValuePod(PyCast<i32>(value)); + case NUdf::TDataType<ui32>::Id: + return NUdf::TUnboxedValuePod(PyCast<ui32>(value)); + case NUdf::TDataType<i64>::Id: + return NUdf::TUnboxedValuePod(PyCast<i64>(value)); + case NUdf::TDataType<ui64>::Id: + return NUdf::TUnboxedValuePod(PyCast<ui64>(value)); + case NUdf::TDataType<bool>::Id: + return NUdf::TUnboxedValuePod(PyCast<bool>(value)); + case NUdf::TDataType<float>::Id: + return NUdf::TUnboxedValuePod(PyCast<float>(value)); + case NUdf::TDataType<double>::Id: + return NUdf::TUnboxedValuePod(PyCast<double>(value)); + case NUdf::TDataType<NUdf::TDecimal>::Id: + return FromPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale()); + case NUdf::TDataType<NUdf::TYson>::Id: { + if (ctx->YsonConverterOut) { + TPyObjectPtr input(value, TPyObjectPtr::ADD_REF); + TPyObjectPtr pyArgs(PyTuple_New(1)); + // PyTuple_SET_ITEM steals reference, so pass ownership to it + PyTuple_SET_ITEM(pyArgs.Get(), 0, input.Release()); + input.ResetSteal(PyObject_CallObject(ctx->YsonConverterOut.Get(), pyArgs.Get())); + if (!input) { + UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" + << GetLastErrorAsString()) + .c_str()); + } + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(input.Get())); } - return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(input.Get())); } - } #if PY_MAJOR_VERSION >= 3 - case NUdf::TDataType<const char*>::Id: - return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); - case NUdf::TDataType<NUdf::TUtf8>::Id: - case NUdf::TDataType<NUdf::TJson>::Id: - if (PyUnicode_Check(value)) { - const TPyObjectPtr uif8(AsUtf8StringOrThrow(value)); - return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(uif8.Get())); - } - throw yexception() << "Python object " << PyObjectRepr(value) << " has invalid value for unicode"; + case NUdf::TDataType<const char*>::Id: + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); + case NUdf::TDataType<NUdf::TUtf8>::Id: + case NUdf::TDataType<NUdf::TJson>::Id: + if (PyUnicode_Check(value)) { + const TPyObjectPtr uif8(AsUtf8StringOrThrow(value)); + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(uif8.Get())); + } + throw yexception() << "Python object " << PyObjectRepr(value) << " has invalid value for unicode"; #else - case NUdf::TDataType<const char*>::Id: - case NUdf::TDataType<NUdf::TJson>::Id: - case NUdf::TDataType<NUdf::TUtf8>::Id: { - if (PyUnicode_Check(value)) { - const TPyObjectPtr utf8(AsUtf8StringOrThrow(value)); - return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(utf8.Get())); - } + case NUdf::TDataType<const char*>::Id: + case NUdf::TDataType<NUdf::TJson>::Id: + case NUdf::TDataType<NUdf::TUtf8>::Id: { + if (PyUnicode_Check(value)) { + const TPyObjectPtr utf8(AsUtf8StringOrThrow(value)); + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(utf8.Get())); + } - if ((typeId == NUdf::TDataType<NUdf::TUtf8>::Id || typeId == NUdf::TDataType<NUdf::TJson>::Id) && - PyBytes_Check(value) && !NYql::IsUtf8(std::string_view(PyBytes_AS_STRING(value), static_cast<size_t>(PyBytes_GET_SIZE(value))))) { - throw yexception() << "Python string " << PyObjectRepr(value) << " is invalid for Utf8/Json"; - } + if ((typeId == NUdf::TDataType<NUdf::TUtf8>::Id || typeId == NUdf::TDataType<NUdf::TJson>::Id) && + PyBytes_Check(value) && !NYql::IsUtf8(std::string_view(PyBytes_AS_STRING(value), static_cast<size_t>(PyBytes_GET_SIZE(value))))) { + throw yexception() << "Python string " << PyObjectRepr(value) << " is invalid for Utf8/Json"; + } - return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); - } + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); + } #endif - case NUdf::TDataType<NUdf::TUuid>::Id: { - const auto& ret = ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); - if (ret.AsStringRef().Size() != 16) { - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " has invalid value for Uuid"; + case NUdf::TDataType<NUdf::TUuid>::Id: { + const auto& ret = ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); + if (ret.AsStringRef().Size() != 16) { + throw yexception() << "Python object " << PyObjectRepr(value) + << " has invalid value for Uuid"; + } + + return ret; } + case NUdf::TDataType<NUdf::TDate>::Id: { + auto num = PyCast<ui16>(value); + if (num >= NUdf::MAX_DATE) { + throw yexception() << "Python object " << PyObjectRepr(value) + << " is out of range for Date"; + } - return ret; - } - case NUdf::TDataType<NUdf::TDate>::Id: { - auto num = PyCast<ui16>(value); - if (num >= NUdf::MAX_DATE) { - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is out of range for Date"; + return NUdf::TUnboxedValuePod(num); } - return NUdf::TUnboxedValuePod(num); - } + case NUdf::TDataType<NUdf::TDatetime>::Id: { + auto num = PyCast<ui32>(value); + if (num >= NUdf::MAX_DATETIME) { + throw yexception() << "Python object " << PyObjectRepr(value) + << " is out of range for Datetime"; + } - case NUdf::TDataType<NUdf::TDatetime>::Id: { - auto num = PyCast<ui32>(value); - if (num >= NUdf::MAX_DATETIME) { - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is out of range for Datetime"; + return NUdf::TUnboxedValuePod(num); } - return NUdf::TUnboxedValuePod(num); - } + case NUdf::TDataType<NUdf::TTimestamp>::Id: { + auto num = PyCast<ui64>(value); + if (num >= NUdf::MAX_TIMESTAMP) { + throw yexception() << "Python object " << PyObjectRepr(value) + << " is out of range for Timestamp"; + } - case NUdf::TDataType<NUdf::TTimestamp>::Id: { - auto num = PyCast<ui64>(value); - if (num >= NUdf::MAX_TIMESTAMP) { - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is out of range for Timestamp"; + return NUdf::TUnboxedValuePod(num); } - return NUdf::TUnboxedValuePod(num); - } + case NUdf::TDataType<NUdf::TInterval>::Id: { + auto num = PyCast<i64>(value); + if (num <= -(i64)NUdf::MAX_TIMESTAMP || num >= (i64)NUdf::MAX_TIMESTAMP) { + throw yexception() << "Python object " << PyObjectRepr(value) + << " is out of range for Interval"; + } - case NUdf::TDataType<NUdf::TInterval>::Id: { - auto num = PyCast<i64>(value); - if (num <= -(i64)NUdf::MAX_TIMESTAMP || num >= (i64)NUdf::MAX_TIMESTAMP) { - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is out of range for Interval"; + return NUdf::TUnboxedValuePod(num); } - return NUdf::TUnboxedValuePod(num); - } - - case NUdf::TDataType<NUdf::TTzDate>::Id: - return FromPyTz<ui16>(value, NUdf::MAX_DATE, TStringBuf("TzDate"), ctx); - case NUdf::TDataType<NUdf::TTzDatetime>::Id: - return FromPyTz<ui32>(value, NUdf::MAX_DATETIME, TStringBuf("TzDatetime"), ctx); - case NUdf::TDataType<NUdf::TTzTimestamp>::Id: - return FromPyTz<ui64>(value, NUdf::MAX_TIMESTAMP, TStringBuf("TzTimestamp"), ctx); + case NUdf::TDataType<NUdf::TTzDate>::Id: + return FromPyTz<ui16>(value, NUdf::MAX_DATE, TStringBuf("TzDate"), ctx); + case NUdf::TDataType<NUdf::TTzDatetime>::Id: + return FromPyTz<ui32>(value, NUdf::MAX_DATETIME, TStringBuf("TzDatetime"), ctx); + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: + return FromPyTz<ui64>(value, NUdf::MAX_TIMESTAMP, TStringBuf("TzTimestamp"), ctx); } throw yexception() - << "Unsupported type " << typeId; + << "Unsupported type " << typeId; } TPyObjectPtr ToPyTagged( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) { const NUdf::TTaggedTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); return ToPyObject(ctx, inspector.GetBaseType(), value); } NUdf::TUnboxedValue FromPyTagged( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { const NUdf::TTaggedTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); return FromPyObject(ctx, inspector.GetBaseType(), value).Release(); @@ -682,7 +712,7 @@ TPyObjectPtr ToPyList( auto pyItem = ToPyObject(ctx, itemType, item); if (PyList_Append(list.Get(), pyItem.Get()) < 0) { throw yexception() << "Can't append item to list" - << GetLastErrorAsString(); + << GetLastErrorAsString(); } } @@ -690,8 +720,8 @@ TPyObjectPtr ToPyList( } NUdf::TUnboxedValue FromPyList( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { const NUdf::TListTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); @@ -699,10 +729,10 @@ NUdf::TUnboxedValue FromPyList( // eager list to list conversion auto itemType = inspector.GetItemType(); Py_ssize_t cnt = PyList_GET_SIZE(value); - NUdf::TUnboxedValue *items = nullptr; + NUdf::TUnboxedValue* items = nullptr; const auto list = ctx->ValueBuilder->NewArray(cnt, items); for (Py_ssize_t i = 0; i < cnt; ++i) { - PyObject *item = PyList_GET_ITEM(value, i); + PyObject* item = PyList_GET_ITEM(value, i); *items++ = FromPyObject(ctx, itemType, item); } return list; @@ -712,10 +742,10 @@ NUdf::TUnboxedValue FromPyList( // eager tuple to list conversion auto itemType = inspector.GetItemType(); Py_ssize_t cnt = PyTuple_GET_SIZE(value); - NUdf::TUnboxedValue *items = nullptr; + NUdf::TUnboxedValue* items = nullptr; const auto list = ctx->ValueBuilder->NewArray(cnt, items); for (Py_ssize_t i = 0; i < cnt; ++i) { - PyObject *item = PyTuple_GET_ITEM(value, i); + PyObject* item = PyTuple_GET_ITEM(value, i); *items++ = FromPyObject(ctx, itemType, item); } return list; @@ -748,13 +778,14 @@ NUdf::TUnboxedValue FromPyList( } throw yexception() << "Expected list, tuple, generator, generator factory, " - "iterator or iterable object, but got: " << PyObjectRepr(value); + "iterator or iterable object, but got: " + << PyObjectRepr(value); } TPyObjectPtr ToPyOptional( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) { if (!value) { return TPyObjectPtr(Py_None, TPyObjectPtr::ADD_REF); @@ -765,8 +796,8 @@ TPyObjectPtr ToPyOptional( } NUdf::TUnboxedValue FromPyOptional( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { if (value == Py_None) { return NUdf::TUnboxedValue(); @@ -786,7 +817,7 @@ TPyObjectPtr ToPyDict( const auto valueType = inspector.GetValueType(); if (NUdf::ETypeKind::Void == ctx->PyCtx->TypeInfoHelper->GetTypeKind(valueType)) { - if (ctx->LazyInputObjects) { // TODO + if (ctx->LazyInputObjects) { // TODO return ToPyLazySet(ctx, keyType, value); } @@ -820,15 +851,14 @@ TPyObjectPtr ToPyDict( } NUdf::TUnboxedValue FromPyDict( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { const NUdf::TDictTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); const auto keyType = inspector.GetKeyType(); const auto valueType = inspector.GetValueType(); - if ((PyList_Check(value) || PyTuple_Check(value) || value->ob_type == &PyThinListType || value->ob_type == &PyLazyListType) - && ctx->PyCtx->TypeInfoHelper->GetTypeKind(keyType) == NUdf::ETypeKind::Data) { + if ((PyList_Check(value) || PyTuple_Check(value) || value->ob_type == &PyThinListType || value->ob_type == &PyLazyListType) && ctx->PyCtx->TypeInfoHelper->GetTypeKind(keyType) == NUdf::ETypeKind::Data) { const NUdf::TDataTypeInspector keiIns(*ctx->PyCtx->TypeInfoHelper, keyType); if (NUdf::GetDataTypeInfo(NUdf::GetDataSlot(keiIns.GetTypeId())).Features & NUdf::EDataTypeFeatures::IntegralType) { return FromPySequence(ctx, valueType, keiIns.GetTypeId(), value); @@ -845,7 +875,7 @@ NUdf::TUnboxedValue FromPyDict( return FromPyMapping(ctx, keyType, valueType, value); } - throw yexception() << "Can't cast "<< PyObjectRepr(value) << " to dict."; + throw yexception() << "Can't cast " << PyObjectRepr(value) << " to dict."; } TPyObjectPtr ToPyNull( @@ -860,8 +890,8 @@ TPyObjectPtr ToPyNull( } NUdf::TUnboxedValue FromPyNull( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { if (value == Py_None) { return NYql::NUdf::TUnboxedValuePod(); @@ -872,23 +902,36 @@ NUdf::TUnboxedValue FromPyNull( } // namespace TPyObjectPtr ToPyObject( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) { switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) { - case NUdf::ETypeKind::Data: return ToPyData(ctx, type, value); - case NUdf::ETypeKind::Tagged: return ToPyTagged(ctx, type, value); - case NUdf::ETypeKind::Tuple: return ToPyTuple(ctx, type, value); - case NUdf::ETypeKind::Struct: return ToPyStruct(ctx, type, value); - case NUdf::ETypeKind::List: return ToPyList(ctx, type, value); - case NUdf::ETypeKind::Optional: return ToPyOptional(ctx, type, value); - case NUdf::ETypeKind::Dict: return ToPyDict(ctx, type, value); - case NUdf::ETypeKind::Callable: return ToPyCallable(ctx, type, value); - case NUdf::ETypeKind::Resource: return ToPyResource(ctx, type, value); - case NUdf::ETypeKind::Void: return ToPyVoid(ctx, type, value); - case NUdf::ETypeKind::Stream: return ToPyStream(ctx, type, value); - case NUdf::ETypeKind::Variant: return ToPyVariant(ctx, type, value); - case NUdf::ETypeKind::Null: return ToPyNull(ctx, type, value); + case NUdf::ETypeKind::Data: + return ToPyData(ctx, type, value); + case NUdf::ETypeKind::Tagged: + return ToPyTagged(ctx, type, value); + case NUdf::ETypeKind::Tuple: + return ToPyTuple(ctx, type, value); + case NUdf::ETypeKind::Struct: + return ToPyStruct(ctx, type, value); + case NUdf::ETypeKind::List: + return ToPyList(ctx, type, value); + case NUdf::ETypeKind::Optional: + return ToPyOptional(ctx, type, value); + case NUdf::ETypeKind::Dict: + return ToPyDict(ctx, type, value); + case NUdf::ETypeKind::Callable: + return ToPyCallable(ctx, type, value); + case NUdf::ETypeKind::Resource: + return ToPyResource(ctx, type, value); + case NUdf::ETypeKind::Void: + return ToPyVoid(ctx, type, value); + case NUdf::ETypeKind::Stream: + return ToPyStream(ctx, type, value); + case NUdf::ETypeKind::Variant: + return ToPyVariant(ctx, type, value); + case NUdf::ETypeKind::Null: + return ToPyNull(ctx, type, value); default: { ::TStringBuilder sb; sb << "Failed to export: "; @@ -899,23 +942,36 @@ TPyObjectPtr ToPyObject( } NUdf::TUnboxedValue FromPyObject( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) { - case NUdf::ETypeKind::Data: return FromPyData(ctx, type, value); - case NUdf::ETypeKind::Tagged: return FromPyTagged(ctx, type, value); - case NUdf::ETypeKind::Tuple: return FromPyTuple(ctx, type, value); - case NUdf::ETypeKind::Struct: return FromPyStruct(ctx, type, value); - case NUdf::ETypeKind::List: return FromPyList(ctx, type, value); - case NUdf::ETypeKind::Optional: return FromPyOptional(ctx, type, value); - case NUdf::ETypeKind::Dict: return FromPyDict(ctx, type, value); - case NUdf::ETypeKind::Callable: return FromPyCallable(ctx, type, value); - case NUdf::ETypeKind::Resource: return FromPyResource(ctx, type, value); - case NUdf::ETypeKind::Void: return FromPyVoid(ctx, type, value); - case NUdf::ETypeKind::Stream: return FromPyStream(ctx, type, TPyObjectPtr(value, TPyObjectPtr::ADD_REF), nullptr, nullptr, nullptr); - case NUdf::ETypeKind::Variant: return FromPyVariant(ctx, type, value); - case NUdf::ETypeKind::Null: return FromPyNull(ctx, type, value); + case NUdf::ETypeKind::Data: + return FromPyData(ctx, type, value); + case NUdf::ETypeKind::Tagged: + return FromPyTagged(ctx, type, value); + case NUdf::ETypeKind::Tuple: + return FromPyTuple(ctx, type, value); + case NUdf::ETypeKind::Struct: + return FromPyStruct(ctx, type, value); + case NUdf::ETypeKind::List: + return FromPyList(ctx, type, value); + case NUdf::ETypeKind::Optional: + return FromPyOptional(ctx, type, value); + case NUdf::ETypeKind::Dict: + return FromPyDict(ctx, type, value); + case NUdf::ETypeKind::Callable: + return FromPyCallable(ctx, type, value); + case NUdf::ETypeKind::Resource: + return FromPyResource(ctx, type, value); + case NUdf::ETypeKind::Void: + return FromPyVoid(ctx, type, value); + case NUdf::ETypeKind::Stream: + return FromPyStream(ctx, type, TPyObjectPtr(value, TPyObjectPtr::ADD_REF), nullptr, nullptr, nullptr); + case NUdf::ETypeKind::Variant: + return FromPyVariant(ctx, type, value); + case NUdf::ETypeKind::Null: + return FromPyNull(ctx, type, value); default: { ::TStringBuilder sb; sb << "Failed to import: "; @@ -926,10 +982,10 @@ NUdf::TUnboxedValue FromPyObject( } TPyObjectPtr ToPyArgs( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, - const NUdf::TUnboxedValuePod* args, - const NUdf::TCallableTypeInspector& inspector) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod* args, + const NUdf::TCallableTypeInspector& inspector) { const auto argsCount = inspector.GetArgsCount(); TPyObjectPtr tuple(PyTuple_New(argsCount)); @@ -955,11 +1011,11 @@ TPyObjectPtr ToPyArgs( } void FromPyArgs( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, - PyObject* pyArgs, - NUdf::TUnboxedValue* cArgs, - const NUdf::TCallableTypeInspector& inspector) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + PyObject* pyArgs, + NUdf::TUnboxedValue* cArgs, + const NUdf::TCallableTypeInspector& inspector) { PY_ENSURE_TYPE(Tuple, pyArgs, "Expected"); @@ -968,9 +1024,10 @@ void FromPyArgs( ui32 pyArgsCount = static_cast<ui32>(PyTuple_GET_SIZE(pyArgs)); PY_ENSURE(argsCount - optArgsCount <= pyArgsCount && pyArgsCount <= argsCount, - "arguments count missmatch: " - "min " << (argsCount - optArgsCount) << ", max " << argsCount - << ", got " << pyArgsCount); + "arguments count missmatch: " + "min " + << (argsCount - optArgsCount) << ", max " << argsCount + << ", got " << pyArgsCount); for (ui32 i = 0; i < pyArgsCount; i++) { PyObject* item = PyTuple_GET_ITEM(pyArgs, i); @@ -982,10 +1039,12 @@ void FromPyArgs( } } -class TDummyMemoryLock : public IMemoryLock { +class TDummyMemoryLock: public IMemoryLock { public: - void Acquire() override {} - void Release() override {} + void Acquire() override { + } + void Release() override { + } }; TPyCastContext::TPyCastContext( @@ -1023,4 +1082,4 @@ const TPyObjectPtr& TPyCastContext::GetTimezoneName(ui32 id) { return x; } -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.h b/yql/essentials/udfs/common/python/bindings/py_cast.h index e6850c74040..5dc4182ae91 100644 --- a/yql/essentials/udfs/common/python/bindings/py_cast.h +++ b/yql/essentials/udfs/common/python/bindings/py_cast.h @@ -20,26 +20,26 @@ template <typename T> TPyObjectPtr ToPyUnicode(const T& value); TPyObjectPtr ToPyObject( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyObject( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - PyObject* value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* value); TPyObjectPtr ToPyArgs( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod* args, - const NKikimr::NUdf::TCallableTypeInspector& inspector); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod* args, + const NKikimr::NUdf::TCallableTypeInspector& inspector); void FromPyArgs( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - PyObject* pyArgs, - NKikimr::NUdf::TUnboxedValue* cArgs, - const NKikimr::NUdf::TCallableTypeInspector& inspector); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* pyArgs, + NKikimr::NUdf::TUnboxedValue* cArgs, + const NKikimr::NUdf::TCallableTypeInspector& inspector); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp index 3c6514aea02..dcd7cb8da40 100644 --- a/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp @@ -25,128 +25,127 @@ UnicodeEncodeError: 'utf-8' codec can't encode character '\udc00' in position 0: )"; UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.ToMiniKQL<TType>( - StripString(TString(programToRun)), - [](const NUdf::TUnboxedValuePod& value) { - Y_UNUSED(value); - }), - yexception, StripString(TString(expectedError))); + engine.ToMiniKQL<TType>( + StripString(TString(programToRun)), + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, StripString(TString(expectedError))); } } // namespace Y_UNIT_TEST_SUITE(TPyCastTest) { - Y_UNIT_TEST(FromPyStrToInt) { - TPythonTestEngine engine; - UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.ToMiniKQL<i32>( - "def Test():\n" - " return '123a'", - [](const NUdf::TUnboxedValuePod& value) { - Y_UNUSED(value); - }), - yexception, "str"); - } +Y_UNIT_TEST(FromPyStrToInt) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<i32>( + "def Test():\n" + " return '123a'", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "str"); +} - Y_UNIT_TEST(FromPyTupleToLong) { - TPythonTestEngine engine; - UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.ToMiniKQL<ui64>( - "def Test():\n" - " return 1, 1", - [](const NUdf::TUnboxedValuePod& value) { - Y_UNUSED(value); - }), - yexception, "tuple"); - } +Y_UNIT_TEST(FromPyTupleToLong) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<ui64>( + "def Test():\n" + " return 1, 1", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "tuple"); +} - Y_UNIT_TEST(FromPyFuncToString) { - TPythonTestEngine engine; - UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.ToMiniKQL<char*>( - "def f():\n" - " return 42\n" - "def Test():\n" - " return f", - [](const NUdf::TUnboxedValuePod& value) { - Y_UNUSED(value); - }), - yexception, "function"); - } +Y_UNIT_TEST(FromPyFuncToString) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<char*>( + "def f():\n" + " return 42\n" + "def Test():\n" + " return f", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "function"); +} - Y_UNIT_TEST(FromPyNoneToString) { - TPythonTestEngine engine; - UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.ToMiniKQL<char*>( - "def Test():\n" - " return None", - [](const NUdf::TUnboxedValuePod& value) { - Y_UNUSED(value); - }), - yexception, "None"); - } +Y_UNIT_TEST(FromPyNoneToString) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<char*>( + "def Test():\n" + " return None", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "None"); +} - Y_UNIT_TEST(BadFromPythonFloat) { - TPythonTestEngine engine; - UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.ToMiniKQL<float>( - "def Test():\n" - " return '3 <dot> 1415926'", - [](const NUdf::TUnboxedValuePod& value) { - Y_UNUSED(value); - Y_UNREACHABLE(); - }), - yexception, "Cast error object '3 <dot> 1415926' to Float"); - } +Y_UNIT_TEST(BadFromPythonFloat) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<float>( + "def Test():\n" + " return '3 <dot> 1415926'", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + Y_UNREACHABLE(); + }), + yexception, "Cast error object '3 <dot> 1415926' to Float"); +} #if PY_MAJOR_VERSION >= 3 -# define RETVAL "-1" + #define RETVAL "-1" #else -# define RETVAL "-18446744073709551616L" + #define RETVAL "-18446744073709551616L" #endif - Y_UNIT_TEST(BadFromPythonLong) { - TPythonTestEngine engine; - UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.ToMiniKQL<ui64>( - "def Test():\n" - " return " RETVAL, - [](const NUdf::TUnboxedValuePod& value) { - Y_UNUSED(value); - Y_UNREACHABLE(); - }), - yexception, "Cast error object " RETVAL " to Long"); - } +Y_UNIT_TEST(BadFromPythonLong) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<ui64>( + "def Test():\n" + " return " RETVAL, + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + Y_UNREACHABLE(); + }), + yexception, "Cast error object " RETVAL " to Long"); +} - Y_UNIT_TEST(BadFromPythonUtf8) { - TestBadUtf8Encode<NUdf::TUtf8>(); - } +Y_UNIT_TEST(BadFromPythonUtf8) { + TestBadUtf8Encode<NUdf::TUtf8>(); +} - Y_UNIT_TEST(BadFromPythonJson) { - TestBadUtf8Encode<NUdf::TJson>(); - } +Y_UNIT_TEST(BadFromPythonJson) { + TestBadUtf8Encode<NUdf::TJson>(); +} - Y_UNIT_TEST(BadToPythonJson) { - TPythonTestEngine engine; - UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.UnsafeCall<void(NUdf::TJson)>( - [](const TType*, const NUdf::IValueBuilder& builder) { - // XXX: The value below is built with the - // following expression: - // $query = "a=1&t%EDb=2"; - // $qdict = Url::QueryStringToDict($query); - // $qyson = Yson::From($qdict); - // $badJson = Yson::SerializeJson($qyson); - // - // For more info, see YQL-20231 and YQL-20220. - constexpr TStringBuf badJson = "\x7b\x22\x61\x22\x3a\x5b\x22\x31\x22\x5d\x2c\x22\x74\xed\x62\x22\x3a\x5b\x22\x32\x22\x5d\x7d"; - return builder.NewString(badJson); - }, - "def Test(arg):\n" - " pass", - [](const NUdf::TUnboxedValuePod&) { - Y_UNREACHABLE(); - } - ), - yexception, "Failed to export Json given as args[0]"); - } +Y_UNIT_TEST(BadToPythonJson) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.UnsafeCall<void(NUdf::TJson)>( + [](const TType*, const NUdf::IValueBuilder& builder) { + // XXX: The value below is built with the + // following expression: + // $query = "a=1&t%EDb=2"; + // $qdict = Url::QueryStringToDict($query); + // $qyson = Yson::From($qdict); + // $badJson = Yson::SerializeJson($qyson); + // + // For more info, see YQL-20231 and YQL-20220. + constexpr TStringBuf badJson = "\x7b\x22\x61\x22\x3a\x5b\x22\x31\x22\x5d\x2c\x22\x74\xed\x62\x22\x3a\x5b\x22\x32\x22\x5d\x7d"; + return builder.NewString(badJson); + }, + "def Test(arg):\n" + " pass", + [](const NUdf::TUnboxedValuePod&) { + Y_UNREACHABLE(); + }), + yexception, "Failed to export Json given as args[0]"); +} } // Y_UNIT_TEST_SUITE(TPyCastTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_ctx.h b/yql/essentials/udfs/common/python/bindings/py_ctx.h index 7958fc1f815..d832d0b2def 100644 --- a/yql/essentials/udfs/common/python/bindings/py_ctx.h +++ b/yql/essentials/udfs/common/python/bindings/py_ctx.h @@ -79,7 +79,7 @@ struct TPyContext: public TSimpleRefCount<TPyContext> { } void Cleanup() { - for (auto& o: CleanupList) { + for (auto& o : CleanupList) { o.Cleanup(); } CleanupList.Clear(); @@ -91,7 +91,7 @@ struct TPyContext: public TSimpleRefCount<TPyContext> { }; struct TPyCastContext: public TSimpleRefCount<TPyCastContext> { - const NKikimr::NUdf::IValueBuilder *const ValueBuilder; + const NKikimr::NUdf::IValueBuilder* const ValueBuilder; const TPyContext::TPtr PyCtx; std::unordered_map<const NKikimr::NUdf::TType*, TPyObjectPtr> StructTypes; bool LazyInputObjects = true; @@ -103,9 +103,9 @@ struct TPyCastContext: public TSimpleRefCount<TPyCastContext> { THolder<IMemoryLock> MemoryLock; TPyCastContext( - const NKikimr::NUdf::IValueBuilder* builder, - TPyContext::TPtr pyCtx, - THolder<IMemoryLock> memoryLock = {}); + const NKikimr::NUdf::IValueBuilder* builder, + TPyContext::TPtr pyCtx, + THolder<IMemoryLock> memoryLock = {}); ~TPyCastContext(); @@ -117,4 +117,4 @@ struct TPyCastContext: public TSimpleRefCount<TPyCastContext> { using TPyCastContextPtr = TPyCastContext::TPtr; -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp index 3f0298013a0..2440f4c281a 100644 --- a/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp @@ -5,118 +5,113 @@ using namespace NPython; Y_UNIT_TEST_SUITE(TPyDecimalTest) { - Y_UNIT_TEST(FromPyZero) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDecimalDataType<12,5>>( - R"( +Y_UNIT_TEST(FromPyZero) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<12, 5>>( + R"( from decimal import Decimal def Test(): return Decimal() )", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(!value.GetInt128()); - }); - } + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(!value.GetInt128()); + }); +} - Y_UNIT_TEST(FromPyPi) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDecimalDataType<28,18>>( - R"( +Y_UNIT_TEST(FromPyPi) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<28, 18>>( + R"( from decimal import Decimal def Test(): return Decimal('3.141592653589793238') )", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.GetInt128() == 3141592653589793238LL); - }); - } + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == 3141592653589793238LL); + }); +} - Y_UNIT_TEST(FromPyTini) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDecimalDataType<35,35>>( - R"( +Y_UNIT_TEST(FromPyTini) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<35, 35>>( + R"( from decimal import Decimal def Test(): return Decimal('-.00000000000000000000000000000000001') )", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.GetInt128() == -1); - }); - } + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == -1); + }); +} - Y_UNIT_TEST(FromPyNan) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDecimalDataType<35,34>>( - R"( +Y_UNIT_TEST(FromPyNan) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<35, 34>>( + R"( from decimal import Decimal def Test(): return Decimal('NaN') )", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.GetInt128() == NYql::NDecimal::Nan()); - }); - } + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == NYql::NDecimal::Nan()); + }); +} - Y_UNIT_TEST(FromPyInf) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDecimalDataType<35,34>>( - R"( +Y_UNIT_TEST(FromPyInf) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<35, 34>>( + R"( from decimal import Decimal def Test(): return Decimal('-inf') )", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.GetInt128() == -NYql::NDecimal::Inf()); - }); - } + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == -NYql::NDecimal::Inf()); + }); +} - Y_UNIT_TEST(ToPyZero) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDecimalDataType<7,7>>( - [](const TType*, const NUdf::IValueBuilder&) { - return NUdf::TUnboxedValuePod::Zero(); - }, - "def Test(value): assert value.is_zero()" - ); - } +Y_UNIT_TEST(ToPyZero) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<7, 7>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod::Zero(); + }, + "def Test(value): assert value.is_zero()"); +} - Y_UNIT_TEST(ToPyPi) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDecimalDataType<20,18>>( - [](const TType*, const NUdf::IValueBuilder&) { - return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(3141592653589793238LL)); - }, - "def Test(value): assert str(value) == '3.141592653589793238'" - ); - } +Y_UNIT_TEST(ToPyPi) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<20, 18>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(3141592653589793238LL)); + }, + "def Test(value): assert str(value) == '3.141592653589793238'"); +} - Y_UNIT_TEST(ToPyTini) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDecimalDataType<35,35>>( - [](const TType*, const NUdf::IValueBuilder&) { - return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(-1)); - }, - "def Test(value): assert format(value, '.35f') == '-0.00000000000000000000000000000000001'" - ); - } +Y_UNIT_TEST(ToPyTini) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<35, 35>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(-1)); + }, + "def Test(value): assert format(value, '.35f') == '-0.00000000000000000000000000000000001'"); +} - Y_UNIT_TEST(ToPyNan) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDecimalDataType<2,2>>( - [](const TType*, const NUdf::IValueBuilder&) { - return NUdf::TUnboxedValuePod(NYql::NDecimal::Nan()); - }, - "def Test(value): assert value.is_nan()" - ); - } +Y_UNIT_TEST(ToPyNan) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<2, 2>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(NYql::NDecimal::Nan()); + }, + "def Test(value): assert value.is_nan()"); +} - Y_UNIT_TEST(ToPyInf) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDecimalDataType<30,0>>( - [](const TType*, const NUdf::IValueBuilder&) { - return NUdf::TUnboxedValuePod(-NYql::NDecimal::Inf()); - }, - "def Test(value): assert value.is_infinite() and value.is_signed()" - ); - } +Y_UNIT_TEST(ToPyInf) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<30, 0>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(-NYql::NDecimal::Inf()); + }, + "def Test(value): assert value.is_infinite() and value.is_signed()"); } +} // Y_UNIT_TEST_SUITE(TPyDecimalTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_dict.cpp b/yql/essentials/udfs/common/python/bindings/py_dict.cpp index 2df6eb4e99f..577f8c769d2 100644 --- a/yql/essentials/udfs/common/python/bindings/py_dict.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_dict.cpp @@ -8,7 +8,6 @@ #include <yql/essentials/public/udf/udf_value_builder.h> #include <yql/essentials/public/udf/udf_type_inspection.h> - using namespace NKikimr; namespace NPython { @@ -16,8 +15,7 @@ namespace NPython { ////////////////////////////////////////////////////////////////////////////// // TPyLazyDict interface ////////////////////////////////////////////////////////////////////////////// -struct TPyLazyDict -{ +struct TPyLazyDict { using TPtr = NUdf::TRefCountedPtr<TPyLazyDict, TPyPtrOps<TPyLazyDict>>; PyObject_HEAD; @@ -35,10 +33,10 @@ struct TPyLazyDict } static PyObject* New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* keyType, - const NUdf::TType* payloadType, - NUdf::IBoxedValuePtr&& value); + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + NUdf::IBoxedValuePtr&& value); static int Bool(PyObject* self); static PyObject* Repr(PyObject* self); @@ -47,7 +45,9 @@ struct TPyLazyDict static int Contains(PyObject* self, PyObject* key); static PyObject* Get(PyObject* self, PyObject* args); - static PyObject* Iter(PyObject* self) { return Keys(self, nullptr); } + static PyObject* Iter(PyObject* self) { + return Keys(self, nullptr); + } static PyObject* Keys(PyObject* self, PyObject* /* args */); static PyObject* Items(PyObject* self, PyObject* /* args */); static PyObject* Values(PyObject* self, PyObject* /* args */); @@ -60,196 +60,196 @@ PyMappingMethods LazyDictMapping = { }; PySequenceMethods LazyDictSequence = { - INIT_MEMBER(sq_length , TPyLazyDict::Len), - INIT_MEMBER(sq_concat , nullptr), - INIT_MEMBER(sq_repeat , nullptr), - INIT_MEMBER(sq_item , nullptr), + INIT_MEMBER(sq_length, TPyLazyDict::Len), + INIT_MEMBER(sq_concat, nullptr), + INIT_MEMBER(sq_repeat, nullptr), + INIT_MEMBER(sq_item, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(was_sq_slice , nullptr), + INIT_MEMBER(was_sq_slice, nullptr), #else - INIT_MEMBER(sq_slice , nullptr), + INIT_MEMBER(sq_slice, nullptr), #endif - INIT_MEMBER(sq_ass_item , nullptr), + INIT_MEMBER(sq_ass_item, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(was_sq_ass_slice , nullptr), + INIT_MEMBER(was_sq_ass_slice, nullptr), #else - INIT_MEMBER(sq_ass_slice , nullptr), + INIT_MEMBER(sq_ass_slice, nullptr), #endif - INIT_MEMBER(sq_contains , TPyLazyDict::Contains), - INIT_MEMBER(sq_inplace_concat , nullptr), - INIT_MEMBER(sq_inplace_repeat , nullptr), + INIT_MEMBER(sq_contains, TPyLazyDict::Contains), + INIT_MEMBER(sq_inplace_concat, nullptr), + INIT_MEMBER(sq_inplace_repeat, nullptr), }; PyNumberMethods LazyDictNumbering = { - INIT_MEMBER(nb_add, nullptr), - INIT_MEMBER(nb_subtract, nullptr), - INIT_MEMBER(nb_multiply, nullptr), + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_divide, nullptr), + INIT_MEMBER(nb_divide, nullptr), #endif - INIT_MEMBER(nb_remainder, nullptr), - INIT_MEMBER(nb_divmod, nullptr), - INIT_MEMBER(nb_power, nullptr), - INIT_MEMBER(nb_negative, nullptr), - INIT_MEMBER(nb_positive, nullptr), - INIT_MEMBER(nb_absolute, nullptr), + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_bool, TPyLazyDict::Bool), + INIT_MEMBER(nb_bool, TPyLazyDict::Bool), #else - INIT_MEMBER(nb_nonzero, TPyLazyDict::Bool), + INIT_MEMBER(nb_nonzero, TPyLazyDict::Bool), #endif - INIT_MEMBER(nb_invert, nullptr), - INIT_MEMBER(nb_lshift, nullptr), - INIT_MEMBER(nb_rshift, nullptr), - INIT_MEMBER(nb_and, nullptr), - INIT_MEMBER(nb_xor, nullptr), - INIT_MEMBER(nb_or, nullptr), + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_coerce, nullptr), + INIT_MEMBER(nb_coerce, nullptr), #endif - INIT_MEMBER(nb_int, nullptr), + INIT_MEMBER(nb_int, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_reserved, nullptr), + INIT_MEMBER(nb_reserved, nullptr), #else - INIT_MEMBER(nb_long, nullptr), + INIT_MEMBER(nb_long, nullptr), #endif - INIT_MEMBER(nb_float, nullptr), + INIT_MEMBER(nb_float, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_oct, nullptr), - INIT_MEMBER(nb_hex, nullptr), + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), #endif - INIT_MEMBER(nb_inplace_add, nullptr), - INIT_MEMBER(nb_inplace_subtract, nullptr), - INIT_MEMBER(nb_inplace_multiply, nullptr), - INIT_MEMBER(nb_inplace_remainder, nullptr), - INIT_MEMBER(nb_inplace_power, nullptr), - INIT_MEMBER(nb_inplace_lshift, nullptr), - INIT_MEMBER(nb_inplace_rshift, nullptr), - INIT_MEMBER(nb_inplace_and, nullptr), - INIT_MEMBER(nb_inplace_xor, nullptr), - INIT_MEMBER(nb_inplace_or, nullptr), + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), - INIT_MEMBER(nb_floor_divide, nullptr), - INIT_MEMBER(nb_true_divide, nullptr), - INIT_MEMBER(nb_inplace_floor_divide, nullptr), - INIT_MEMBER(nb_inplace_true_divide, nullptr), + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), - INIT_MEMBER(nb_index, nullptr), + INIT_MEMBER(nb_index, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_matrix_multiply, nullptr), - INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), #endif }; - #if PY_MAJOR_VERSION >= 3 -#define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) -#define Py_TPFLAGS_HAVE_SEQUENCE_IN 0 // NOLINT(readability-identifier-naming) + #define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) + #define Py_TPFLAGS_HAVE_SEQUENCE_IN 0 // NOLINT(readability-identifier-naming) #endif PyDoc_STRVAR(get__doc__, - "D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."); + "D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."); PyDoc_STRVAR(keys__doc__, - "D.keys() -> an iterator over the keys of D"); + "D.keys() -> an iterator over the keys of D"); PyDoc_STRVAR(values__doc__, - "D.values() -> an iterator over the values of D"); + "D.values() -> an iterator over the values of D"); PyDoc_STRVAR(items__doc__, - "D.items() -> an iterator over the (key, value) items of D"); + "D.items() -> an iterator over the (key, value) items of D"); #if PY_MAJOR_VERSION < 3 PyDoc_STRVAR(iterkeys__doc__, - "D.iterkeys() -> an iterator over the keys of D"); + "D.iterkeys() -> an iterator over the keys of D"); PyDoc_STRVAR(itervalues__doc__, - "D.itervalues() -> an iterator over the values of D"); + "D.itervalues() -> an iterator over the values of D"); PyDoc_STRVAR(iteritems__doc__, - "D.iteritems() -> an iterator over the (key, value) items of D"); + "D.iteritems() -> an iterator over the (key, value) items of D"); #endif static PyMethodDef LazyDictMethods[] = { - { "get", TPyLazyDict::Get, METH_VARARGS, get__doc__ }, - { "keys", TPyLazyDict::Keys, METH_NOARGS, keys__doc__ }, - { "items", TPyLazyDict::Items, METH_NOARGS, items__doc__ }, - { "values", TPyLazyDict::Values, METH_NOARGS, values__doc__ }, + {"get", TPyLazyDict::Get, METH_VARARGS, get__doc__}, + {"keys", TPyLazyDict::Keys, METH_NOARGS, keys__doc__}, + {"items", TPyLazyDict::Items, METH_NOARGS, items__doc__}, + {"values", TPyLazyDict::Values, METH_NOARGS, values__doc__}, #if PY_MAJOR_VERSION < 3 - { "iterkeys", TPyLazyDict::Keys, METH_NOARGS, iterkeys__doc__ }, - { "iteritems", TPyLazyDict::Items, METH_NOARGS, iteritems__doc__ }, - { "itervalues", TPyLazyDict::Values, METH_NOARGS, itervalues__doc__ }, + {"iterkeys", TPyLazyDict::Keys, METH_NOARGS, iterkeys__doc__}, + {"iteritems", TPyLazyDict::Items, METH_NOARGS, iteritems__doc__}, + {"itervalues", TPyLazyDict::Values, METH_NOARGS, itervalues__doc__}, #endif - { nullptr, nullptr, 0, nullptr } /* sentinel */ + {nullptr, nullptr, 0, nullptr} /* sentinel */ }; PyTypeObject PyLazyDictType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TDict"), - INIT_MEMBER(tp_basicsize , sizeof(TPyLazyDict)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyLazyDict::Dealloc), + // clang-format off + INIT_MEMBER(tp_name, "yql.TDict"), + // clang-format on + INIT_MEMBER(tp_basicsize, sizeof(TPyLazyDict)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyLazyDict::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyLazyDict::Repr), - INIT_MEMBER(tp_as_number , &LazyDictNumbering), - INIT_MEMBER(tp_as_sequence , &LazyDictSequence), - INIT_MEMBER(tp_as_mapping , &LazyDictMapping), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN), - INIT_MEMBER(tp_doc , "yql.TDict object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , &TPyLazyDict::Iter), - INIT_MEMBER(tp_iternext , nullptr), - INIT_MEMBER(tp_methods , LazyDictMethods), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyLazyDict::Repr), + INIT_MEMBER(tp_as_number, &LazyDictNumbering), + INIT_MEMBER(tp_as_sequence, &LazyDictSequence), + INIT_MEMBER(tp_as_mapping, &LazyDictMapping), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN), + INIT_MEMBER(tp_doc, "yql.TDict object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, &TPyLazyDict::Iter), + INIT_MEMBER(tp_iternext, nullptr), + INIT_MEMBER(tp_methods, LazyDictMethods), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; ////////////////////////////////////////////////////////////////////////////// // TPyLazySet interface ////////////////////////////////////////////////////////////////////////////// -struct TPyLazySet -{ +struct TPyLazySet { using TPtr = NUdf::TRefCountedPtr<TPyLazySet, TPyPtrOps<TPyLazySet>>; PyObject_HEAD; @@ -266,9 +266,9 @@ struct TPyLazySet } static PyObject* New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - NUdf::IBoxedValuePtr&& value); + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr&& value); static int Bool(PyObject* self); static PyObject* Repr(PyObject* self); @@ -280,152 +280,154 @@ struct TPyLazySet }; PySequenceMethods LazySetSequence = { - INIT_MEMBER(sq_length , TPyLazySet::Len), - INIT_MEMBER(sq_concat , nullptr), - INIT_MEMBER(sq_repeat , nullptr), - INIT_MEMBER(sq_item , nullptr), + INIT_MEMBER(sq_length, TPyLazySet::Len), + INIT_MEMBER(sq_concat, nullptr), + INIT_MEMBER(sq_repeat, nullptr), + INIT_MEMBER(sq_item, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(was_sq_slice , nullptr), + INIT_MEMBER(was_sq_slice, nullptr), #else - INIT_MEMBER(sq_slice , nullptr), + INIT_MEMBER(sq_slice, nullptr), #endif - INIT_MEMBER(sq_ass_item , nullptr), + INIT_MEMBER(sq_ass_item, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(was_sq_ass_slice , nullptr), + INIT_MEMBER(was_sq_ass_slice, nullptr), #else - INIT_MEMBER(sq_ass_slice , nullptr), + INIT_MEMBER(sq_ass_slice, nullptr), #endif - INIT_MEMBER(sq_contains , TPyLazySet::Contains), - INIT_MEMBER(sq_inplace_concat , nullptr), - INIT_MEMBER(sq_inplace_repeat , nullptr), + INIT_MEMBER(sq_contains, TPyLazySet::Contains), + INIT_MEMBER(sq_inplace_concat, nullptr), + INIT_MEMBER(sq_inplace_repeat, nullptr), }; PyNumberMethods LazySetNumbering = { - INIT_MEMBER(nb_add, nullptr), - INIT_MEMBER(nb_subtract, nullptr), - INIT_MEMBER(nb_multiply, nullptr), + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_divide, nullptr), + INIT_MEMBER(nb_divide, nullptr), #endif - INIT_MEMBER(nb_remainder, nullptr), - INIT_MEMBER(nb_divmod, nullptr), - INIT_MEMBER(nb_power, nullptr), - INIT_MEMBER(nb_negative, nullptr), - INIT_MEMBER(nb_positive, nullptr), - INIT_MEMBER(nb_absolute, nullptr), + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_bool, TPyLazySet::Bool), + INIT_MEMBER(nb_bool, TPyLazySet::Bool), #else - INIT_MEMBER(nb_nonzero, TPyLazySet::Bool), + INIT_MEMBER(nb_nonzero, TPyLazySet::Bool), #endif - INIT_MEMBER(nb_invert, nullptr), - INIT_MEMBER(nb_lshift, nullptr), - INIT_MEMBER(nb_rshift, nullptr), - INIT_MEMBER(nb_and, nullptr), - INIT_MEMBER(nb_xor, nullptr), - INIT_MEMBER(nb_or, nullptr), + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_coerce, nullptr), + INIT_MEMBER(nb_coerce, nullptr), #endif - INIT_MEMBER(nb_int, nullptr), + INIT_MEMBER(nb_int, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_reserved, nullptr), + INIT_MEMBER(nb_reserved, nullptr), #else - INIT_MEMBER(nb_long, nullptr), + INIT_MEMBER(nb_long, nullptr), #endif - INIT_MEMBER(nb_float, nullptr), + INIT_MEMBER(nb_float, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_oct, nullptr), - INIT_MEMBER(nb_hex, nullptr), + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), #endif - INIT_MEMBER(nb_inplace_add, nullptr), - INIT_MEMBER(nb_inplace_subtract, nullptr), - INIT_MEMBER(nb_inplace_multiply, nullptr), - INIT_MEMBER(nb_inplace_remainder, nullptr), - INIT_MEMBER(nb_inplace_power, nullptr), - INIT_MEMBER(nb_inplace_lshift, nullptr), - INIT_MEMBER(nb_inplace_rshift, nullptr), - INIT_MEMBER(nb_inplace_and, nullptr), - INIT_MEMBER(nb_inplace_xor, nullptr), - INIT_MEMBER(nb_inplace_or, nullptr), + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), - INIT_MEMBER(nb_floor_divide, nullptr), - INIT_MEMBER(nb_true_divide, nullptr), - INIT_MEMBER(nb_inplace_floor_divide, nullptr), - INIT_MEMBER(nb_inplace_true_divide, nullptr), + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), - INIT_MEMBER(nb_index, nullptr), + INIT_MEMBER(nb_index, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_matrix_multiply, nullptr), - INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), #endif }; PyTypeObject PyLazySetType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TSet"), - INIT_MEMBER(tp_basicsize , sizeof(TPyLazySet)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyLazySet::Dealloc), + // clang-format off + INIT_MEMBER(tp_name, "yql.TSet"), + // clang-format on + INIT_MEMBER(tp_basicsize, sizeof(TPyLazySet)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyLazySet::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyLazySet::Repr), - INIT_MEMBER(tp_as_number , &LazySetNumbering), - INIT_MEMBER(tp_as_sequence , &LazySetSequence), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN), - INIT_MEMBER(tp_doc , "yql.TSet object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , &TPyLazySet::Iter), - INIT_MEMBER(tp_iternext , nullptr), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyLazySet::Repr), + INIT_MEMBER(tp_as_number, &LazySetNumbering), + INIT_MEMBER(tp_as_sequence, &LazySetSequence), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN), + INIT_MEMBER(tp_doc, "yql.TSet object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, &TPyLazySet::Iter), + INIT_MEMBER(tp_iternext, nullptr), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; @@ -433,10 +435,11 @@ PyTypeObject PyLazySetType = { // TPyLazyDict implementation ////////////////////////////////////////////////////////////////////////////// int TPyLazyDict::Bool(PyObject* self) -{ - PY_TRY { - return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self)->Value.Get()) ? 1 : 0; - } PY_CATCH(-1) + { + PY_TRY{ + return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self) -> Value.Get()) ? 1 : 0; +} // namespace NPython +PY_CATCH(-1) } PyObject* TPyLazyDict::Repr(PyObject*) @@ -445,10 +448,11 @@ PyObject* TPyLazyDict::Repr(PyObject*) } Py_ssize_t TPyLazyDict::Len(PyObject* self) -{ - PY_TRY { - return static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*Cast(self)->Value.Get())); - } PY_CATCH(-1) + { + PY_TRY{ + return static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*Cast(self) -> Value.Get())); +} +PY_CATCH(-1) } PyObject* TPyLazyDict::Subscript(PyObject* self, PyObject* key) @@ -487,112 +491,121 @@ PyObject* TPyLazyDict::Subscript(PyObject* self, PyObject* key) PyErr_SetObject(PyExc_IndexError, repr.Get()); return nullptr; } - - } PY_CATCH(nullptr) + } + PY_CATCH(nullptr) } // -1 error // 0 not found // 1 found int TPyLazyDict::Contains(PyObject* self, PyObject* key) -{ - PY_TRY { - TPyLazyDict* dict = Cast(self); - NUdf::TUnboxedValue mkqlKey; + { + PY_TRY{ + TPyLazyDict* dict = Cast(self); +NUdf::TUnboxedValue mkqlKey; - if (dict->KeyType) { - mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key); - } else { - if (!PyIndex_Check(key)) { - const TPyObjectPtr type = PyObject_Type(key); - const TPyObjectPtr repr = PyObject_Repr(type.Get()); - const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get()); - PyErr_SetObject(PyExc_TypeError, error.Get()); - return -1; - } +if (dict->KeyType) { + mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key); +} else { + if (!PyIndex_Check(key)) { + const TPyObjectPtr type = PyObject_Type(key); + const TPyObjectPtr repr = PyObject_Repr(type.Get()); + const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get()); + PyErr_SetObject(PyExc_TypeError, error.Get()); + return -1; + } - const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError); - if (index < 0) { - return 0; - } - mkqlKey = NUdf::TUnboxedValuePod(ui64(index)); - } + const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError); + if (index < 0) { + return 0; + } + mkqlKey = NUdf::TUnboxedValuePod(ui64(index)); +} - return NUdf::TBoxedValueAccessor::Contains(*dict->Value.Get(), mkqlKey) ? 1 : 0; - } PY_CATCH(-1) +return NUdf::TBoxedValueAccessor::Contains(*dict->Value.Get(), mkqlKey) ? 1 : 0; +} +PY_CATCH(-1) } PyObject* TPyLazyDict::Get(PyObject* self, PyObject* args) -{ - PY_TRY { - PyObject* key = nullptr; - PyObject* failobj = Py_None; + { + PY_TRY{ + PyObject* key = nullptr; +PyObject* failobj = Py_None; - if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj)) - return nullptr; +if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj)) { + return nullptr; +} - TPyLazyDict* dict = Cast(self); - if (dict->KeyType) { - const auto mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key); - if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), mkqlKey)) { - return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); - } - } else { - if (!PyIndex_Check(key)) { - const TPyObjectPtr type = PyObject_Type(key); - const TPyObjectPtr repr = PyObject_Repr(type.Get()); - const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get()); - PyErr_SetObject(PyExc_TypeError, error.Get()); - return nullptr; - } +TPyLazyDict* dict = Cast(self); +if (dict->KeyType) { + const auto mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key); + if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), mkqlKey)) { + return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); + } +} else { + if (!PyIndex_Check(key)) { + const TPyObjectPtr type = PyObject_Type(key); + const TPyObjectPtr repr = PyObject_Repr(type.Get()); + const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get()); + PyErr_SetObject(PyExc_TypeError, error.Get()); + return nullptr; + } - const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError); - if (index < 0) { - return nullptr; - } + const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError); + if (index < 0) { + return nullptr; + } - if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), NUdf::TUnboxedValuePod(ui64(index)))) { - return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); - } - } + if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), NUdf::TUnboxedValuePod(ui64(index)))) { + return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); + } +} - Py_INCREF(failobj); - return failobj; - } PY_CATCH(nullptr) +Py_INCREF(failobj); +return failobj; +} +PY_CATCH(nullptr) } PyObject* TPyLazyDict::Keys(PyObject* self, PyObject* /* args */) -{ - PY_TRY { - const auto dict = Cast(self); - return ToPyIterator(dict->CastCtx, dict->KeyType, - NUdf::TBoxedValueAccessor::GetKeysIterator(*dict->Value.Get())).Release(); - } PY_CATCH(nullptr) + { + PY_TRY{ + const auto dict = Cast(self); +return ToPyIterator(dict->CastCtx, dict->KeyType, + NUdf::TBoxedValueAccessor::GetKeysIterator(*dict->Value.Get())) + .Release(); +} +PY_CATCH(nullptr) } PyObject* TPyLazyDict::Items(PyObject* self, PyObject* /* args */) -{ - PY_TRY { - const auto dict = Cast(self); - return ToPyIterator(dict->CastCtx, dict->KeyType, dict->PayloadType, - NUdf::TBoxedValueAccessor::GetDictIterator(*dict->Value.Get())).Release(); - } PY_CATCH(nullptr) + { + PY_TRY{ + const auto dict = Cast(self); +return ToPyIterator(dict->CastCtx, dict->KeyType, dict->PayloadType, + NUdf::TBoxedValueAccessor::GetDictIterator(*dict->Value.Get())) + .Release(); +} +PY_CATCH(nullptr) } PyObject* TPyLazyDict::Values(PyObject* self, PyObject* /* args */) -{ - PY_TRY { - const auto dict = Cast(self); - return ToPyIterator(dict->CastCtx, dict->PayloadType, - NUdf::TBoxedValueAccessor::GetPayloadsIterator(*dict->Value.Get())).Release(); - } PY_CATCH(nullptr) + { + PY_TRY{ + const auto dict = Cast(self); +return ToPyIterator(dict->CastCtx, dict->PayloadType, + NUdf::TBoxedValueAccessor::GetPayloadsIterator(*dict->Value.Get())) + .Release(); +} +PY_CATCH(nullptr) } PyObject* TPyLazyDict::New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* keyType, - const NUdf::TType* payloadType, - NUdf::IBoxedValuePtr&& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + NUdf::IBoxedValuePtr&& value) { TPyLazyDict* dict = new TPyLazyDict; PyObject_INIT(dict, &PyLazyDictType); @@ -608,10 +621,11 @@ PyObject* TPyLazyDict::New( // TPyLazySet implementation ////////////////////////////////////////////////////////////////////////////// int TPyLazySet::Bool(PyObject* self) -{ - PY_TRY { - return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self)->Value.Get()) ? 1 : 0; - } PY_CATCH(-1) + { + PY_TRY{ + return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self) -> Value.Get()) ? 1 : 0; +} +PY_CATCH(-1) } PyObject* TPyLazySet::Repr(PyObject*) @@ -623,34 +637,38 @@ Py_ssize_t TPyLazySet::Len(PyObject* self) { PY_TRY { return static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*Cast(self)->Value.Get())); - } PY_CATCH(-1) + } + PY_CATCH(-1) } // -1 error // 0 not found // 1 found int TPyLazySet::Contains(PyObject* self, PyObject* key) -{ - PY_TRY { - const auto set = Cast(self); - const auto mkqlKey = FromPyObject(set->CastCtx, set->ItemType, key); - return NUdf::TBoxedValueAccessor::Contains(*set->Value.Get(), mkqlKey) ? 1 : 0; - } PY_CATCH(-1) + { + PY_TRY{ + const auto set = Cast(self); +const auto mkqlKey = FromPyObject(set->CastCtx, set->ItemType, key); +return NUdf::TBoxedValueAccessor::Contains(*set->Value.Get(), mkqlKey) ? 1 : 0; +} +PY_CATCH(-1) } PyObject* TPyLazySet::Iter(PyObject* self) -{ - PY_TRY { - const auto set = Cast(self); - return ToPyIterator(set->CastCtx, set->ItemType, - NUdf::TBoxedValueAccessor::GetKeysIterator(*set->Value.Get())).Release(); - } PY_CATCH(nullptr) + { + PY_TRY{ + const auto set = Cast(self); +return ToPyIterator(set->CastCtx, set->ItemType, + NUdf::TBoxedValueAccessor::GetKeysIterator(*set->Value.Get())) + .Release(); +} +PY_CATCH(nullptr) } PyObject* TPyLazySet::New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - NUdf::IBoxedValuePtr&& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr&& value) { TPyLazySet* dict = new TPyLazySet; PyObject_INIT(dict, &PyLazySetType); @@ -664,18 +682,18 @@ PyObject* TPyLazySet::New( ////////////////////////////////////////////////////////////////////////////// TPyObjectPtr ToPyLazyDict( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* keyType, - const NUdf::TType* payloadType, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + const NUdf::TUnboxedValuePod& value) { return TPyLazyDict::New(castCtx, keyType, payloadType, value.AsBoxed()); } TPyObjectPtr ToPyLazySet( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TUnboxedValuePod& value) { return TPyLazySet::New(castCtx, itemType, value.AsBoxed()); } diff --git a/yql/essentials/udfs/common/python/bindings/py_dict.h b/yql/essentials/udfs/common/python/bindings/py_dict.h index 538ca69a127..c1337cc5ec4 100644 --- a/yql/essentials/udfs/common/python/bindings/py_dict.h +++ b/yql/essentials/udfs/common/python/bindings/py_dict.h @@ -9,42 +9,42 @@ extern PyTypeObject PyLazyDictType; extern PyTypeObject PyLazySetType; TPyObjectPtr ToPyLazyDict( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* keyType, - const NKikimr::NUdf::TType* payloadType, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payloadType, + const NKikimr::NUdf::TUnboxedValuePod& value); TPyObjectPtr ToPyLazySet( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* itemType, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyMapping( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* keyType, - const NKikimr::NUdf::TType* payType, - PyObject* map); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payType, + PyObject* map); NKikimr::NUdf::TUnboxedValue FromPyDict( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* keyType, - const NKikimr::NUdf::TType* payType, - PyObject* dict); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payType, + PyObject* dict); NKikimr::NUdf::TUnboxedValue FromPySet( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* itemType, - PyObject* set); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + PyObject* set); NKikimr::NUdf::TUnboxedValue FromPySequence( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* keyType, - PyObject* sequence); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + PyObject* sequence); NKikimr::NUdf::TUnboxedValue FromPySequence( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* itemType, - const NKikimr::NUdf::TDataTypeId keyType, - PyObject* sequence); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TDataTypeId keyType, + PyObject* sequence); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp index edb3d36e8c3..454ff363862 100644 --- a/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp @@ -7,716 +7,673 @@ using namespace NPython; Y_UNIT_TEST_SUITE(TPyDictTest) { - Y_UNIT_TEST(FromPyEmptyDict) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return {}", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(!value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 0); - }); - } +Y_UNIT_TEST(FromPyEmptyDict) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 0); + }); +} - Y_UNIT_TEST(FromPyDict_Length) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT(!value.IsSortedDict()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - }); - } +Y_UNIT_TEST(FromPyDict_Length) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT(!value.IsSortedDict()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + }); +} - Y_UNIT_TEST(FromPyDict_Lookup) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", - [](const NUdf::TUnboxedValuePod& value) { - const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(ui32(1))); - UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one"); - const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(ui32(2))); - UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two"); - const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(ui32(3))); - UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three"); +Y_UNIT_TEST(FromPyDict_Lookup) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(ui32(1))); + UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one"); + const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(ui32(2))); + UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two"); + const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(ui32(3))); + UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three"); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(0)))); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(4)))); - }); - } + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(0)))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(4)))); + }); +} - Y_UNIT_TEST(FromPyDict_Contains) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(2)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(3)))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(4)))); - }); - } +Y_UNIT_TEST(FromPyDict_Contains) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(2)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(3)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(4)))); + }); +} - Y_UNIT_TEST(FromPyDict_Items) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", - [](const NUdf::TUnboxedValuePod& value) { - std::map<ui32, TString> items; - const auto it = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { - items.emplace(key.Get<ui32>(), payload.AsStringRef()); - } +Y_UNIT_TEST(FromPyDict_Items) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + std::map<ui32, TString> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace(key.Get<ui32>(), payload.AsStringRef()); + } - UNIT_ASSERT_EQUAL(items.size(), 3); - UNIT_ASSERT_EQUAL(items[1], "one"); - UNIT_ASSERT_EQUAL(items[2], "two"); - UNIT_ASSERT_EQUAL(items[3], "three"); - }); - } + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[1], "one"); + UNIT_ASSERT_EQUAL(items[2], "two"); + UNIT_ASSERT_EQUAL(items[3], "three"); + }); +} - Y_UNIT_TEST(FromPyDict_Keys) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", - [](const NUdf::TUnboxedValuePod& value) { - std::vector<ui32> items; - const auto it = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; it.Next(key);) { - items.emplace_back(key.Get<ui32>()); - } +Y_UNIT_TEST(FromPyDict_Keys) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<ui32> items; + const auto it = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; it.Next(key);) { + items.emplace_back(key.Get<ui32>()); + } - UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items.size(), 3); - std::sort(items.begin(), items.end()); - UNIT_ASSERT_EQUAL(items[0], 1U); - UNIT_ASSERT_EQUAL(items[1], 2U); - UNIT_ASSERT_EQUAL(items[2], 3U); - }); - } + std::sort(items.begin(), items.end()); + UNIT_ASSERT_EQUAL(items[0], 1U); + UNIT_ASSERT_EQUAL(items[1], 2U); + UNIT_ASSERT_EQUAL(items[2], 3U); + }); +} - Y_UNIT_TEST(FromPyDict_Values) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", - [](const NUdf::TUnboxedValuePod& value) { - std::vector<TString> items; - const auto it = value.GetPayloadsIterator(); - for (NUdf::TUnboxedValue payload; it.Next(payload);) { - items.emplace_back(payload.AsStringRef()); - } +Y_UNIT_TEST(FromPyDict_Values) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<TString> items; + const auto it = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; it.Next(payload);) { + items.emplace_back(payload.AsStringRef()); + } - UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items.size(), 3); - std::sort(items.begin(), items.end()); - UNIT_ASSERT_EQUAL(items[0], "one"); - UNIT_ASSERT_EQUAL(items[1], "three"); - UNIT_ASSERT_EQUAL(items[2], "two"); - }); - } + std::sort(items.begin(), items.end()); + UNIT_ASSERT_EQUAL(items[0], "one"); + UNIT_ASSERT_EQUAL(items[1], "three"); + UNIT_ASSERT_EQUAL(items[2], "two"); + }); +} - Y_UNIT_TEST(FromPyList_Length) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return ['one', 'two', 'three']", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT(value.IsSortedDict()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - }); - } +Y_UNIT_TEST(FromPyList_Length) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return ['one', 'two', 'three']", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT(value.IsSortedDict()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + }); +} - Y_UNIT_TEST(FromPyTuple_Lookup) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<i32, char*>>( - "def Test(): return ('one', 'two', 'three')", - [](const NUdf::TUnboxedValuePod& value) { - const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(i32(0))); - UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one"); - const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(i32(1))); - UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two"); - const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(i32(2))); - UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three"); - const auto v4 = value.Lookup(NUdf::TUnboxedValuePod(i32(-1))); - UNIT_ASSERT_EQUAL(v4.AsStringRef(), "three"); - const auto v5 = value.Lookup(NUdf::TUnboxedValuePod(i32(-2))); - UNIT_ASSERT_EQUAL(v5.AsStringRef(), "two"); - const auto v6 = value.Lookup(NUdf::TUnboxedValuePod(i32(-3))); - UNIT_ASSERT_EQUAL(v6.AsStringRef(), "one"); +Y_UNIT_TEST(FromPyTuple_Lookup) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<i32, char*>>( + "def Test(): return ('one', 'two', 'three')", + [](const NUdf::TUnboxedValuePod& value) { + const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(i32(0))); + UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one"); + const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(i32(1))); + UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two"); + const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(i32(2))); + UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three"); + const auto v4 = value.Lookup(NUdf::TUnboxedValuePod(i32(-1))); + UNIT_ASSERT_EQUAL(v4.AsStringRef(), "three"); + const auto v5 = value.Lookup(NUdf::TUnboxedValuePod(i32(-2))); + UNIT_ASSERT_EQUAL(v5.AsStringRef(), "two"); + const auto v6 = value.Lookup(NUdf::TUnboxedValuePod(i32(-3))); + UNIT_ASSERT_EQUAL(v6.AsStringRef(), "one"); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(3)))); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(-4)))); - }); - } + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(3)))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(-4)))); + }); +} - Y_UNIT_TEST(FromPyList_Contains) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<i16, char*>>( - "def Test(): return ['one', 'two', 'three']", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(0)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(1)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(2)))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(3)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-1)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-2)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-3)))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(-4)))); - }); - } +Y_UNIT_TEST(FromPyList_Contains) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<i16, char*>>( + "def Test(): return ['one', 'two', 'three']", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(0)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(1)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(2)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(3)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-1)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-2)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-3)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(-4)))); + }); +} - Y_UNIT_TEST(FromPyTuple_Items) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui16, char*>>( - "def Test(): return ('one', 'two', 'three')", - [](const NUdf::TUnboxedValuePod& value) { - std::vector<std::pair<ui16, TString>> items; - const auto it = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { - items.emplace_back(key.Get<ui16>(), payload.AsStringRef()); - } +Y_UNIT_TEST(FromPyTuple_Items) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui16, char*>>( + "def Test(): return ('one', 'two', 'three')", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<std::pair<ui16, TString>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<ui16>(), payload.AsStringRef()); + } - UNIT_ASSERT_EQUAL(items.size(), 3U); - UNIT_ASSERT_EQUAL(items[0].first, 0); - UNIT_ASSERT_EQUAL(items[1].first, 1); - UNIT_ASSERT_EQUAL(items[2].first, 2); - UNIT_ASSERT_EQUAL(items[0].second, "one"); - UNIT_ASSERT_EQUAL(items[1].second, "two"); - UNIT_ASSERT_EQUAL(items[2].second, "three"); - }); - } + UNIT_ASSERT_EQUAL(items.size(), 3U); + UNIT_ASSERT_EQUAL(items[0].first, 0); + UNIT_ASSERT_EQUAL(items[1].first, 1); + UNIT_ASSERT_EQUAL(items[2].first, 2); + UNIT_ASSERT_EQUAL(items[0].second, "one"); + UNIT_ASSERT_EQUAL(items[1].second, "two"); + UNIT_ASSERT_EQUAL(items[2].second, "three"); + }); +} - Y_UNIT_TEST(FromPyList_Keys) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<i64, char*>>( - "def Test(): return ['one', 'two', 'three']", - [](const NUdf::TUnboxedValuePod& value) { - std::vector<i64> items; - const auto it = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; it.Next(key);) { - items.emplace_back(key.Get<i64>()); - } +Y_UNIT_TEST(FromPyList_Keys) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<i64, char*>>( + "def Test(): return ['one', 'two', 'three']", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<i64> items; + const auto it = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; it.Next(key);) { + items.emplace_back(key.Get<i64>()); + } - UNIT_ASSERT_EQUAL(items.size(), 3); - UNIT_ASSERT_EQUAL(items[0], 0); - UNIT_ASSERT_EQUAL(items[1], 1); - UNIT_ASSERT_EQUAL(items[2], 2); - }); - } + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0], 0); + UNIT_ASSERT_EQUAL(items[1], 1); + UNIT_ASSERT_EQUAL(items[2], 2); + }); +} - Y_UNIT_TEST(FromPyTuple_Values) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui64, char*>>( - "def Test(): return ('one', 'two', 'three')", - [](const NUdf::TUnboxedValuePod& value) { - std::vector<TString> items; - const auto it = value.GetPayloadsIterator(); - for (NUdf::TUnboxedValue payload; it.Next(payload);) { - items.emplace_back(payload.AsStringRef()); - } +Y_UNIT_TEST(FromPyTuple_Values) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui64, char*>>( + "def Test(): return ('one', 'two', 'three')", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<TString> items; + const auto it = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; it.Next(payload);) { + items.emplace_back(payload.AsStringRef()); + } - UNIT_ASSERT_EQUAL(items.size(), 3); - UNIT_ASSERT_EQUAL(items[0], "one"); - UNIT_ASSERT_EQUAL(items[1], "two"); - UNIT_ASSERT_EQUAL(items[2], "three"); - }); - } + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0], "one"); + UNIT_ASSERT_EQUAL(items[1], "two"); + UNIT_ASSERT_EQUAL(items[2], "three"); + }); +} - Y_UNIT_TEST(ToPyEmptyDict) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDict<ui8, ui32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build(); - }, - "def Test(value):\n" - " assert not value\n" - " assert len(value) == 0\n" - ); - } +Y_UNIT_TEST(ToPyEmptyDict) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDict<ui8, ui32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build(); + }, + "def Test(value):\n" + " assert not value\n" + " assert len(value) == 0\n"); +} - Y_UNIT_TEST(ToPyDict) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDict<int, double>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> - Add(NUdf::TUnboxedValuePod((int) 1), NUdf::TUnboxedValuePod((double) 0.1)) - .Add(NUdf::TUnboxedValuePod((int) 2), NUdf::TUnboxedValuePod((double) 0.2)) - .Add(NUdf::TUnboxedValuePod((int) 3), NUdf::TUnboxedValuePod((double) 0.3)) - .Build(); - }, - "def Test(value):\n" - " assert value\n" - " assert len(value) == 3\n" - " assert iter(value) is not None\n" - " assert 2 in value\n" - " assert 0 not in value\n" - " assert set(iter(value)) == set([1, 2, 3])\n" - " assert value[2] == 0.2\n" - " assert value.get(0, 0.7) == 0.7\n" - " assert value.get(3, 0.7) == 0.3\n" - " assert sorted(value.keys()) == [1, 2, 3]\n" - " assert sorted(value.items()) == [(1, 0.1), (2, 0.2), (3, 0.3)]\n" - " assert sorted(value.values()) == [0.1, 0.2, 0.3]\n" +Y_UNIT_TEST(ToPyDict) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDict<int, double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Add(NUdf::TUnboxedValuePod((int)1), NUdf::TUnboxedValuePod((double)0.1)).Add(NUdf::TUnboxedValuePod((int)2), NUdf::TUnboxedValuePod((double)0.2)).Add(NUdf::TUnboxedValuePod((int)3), NUdf::TUnboxedValuePod((double)0.3)).Build(); + }, + "def Test(value):\n" + " assert value\n" + " assert len(value) == 3\n" + " assert iter(value) is not None\n" + " assert 2 in value\n" + " assert 0 not in value\n" + " assert set(iter(value)) == set([1, 2, 3])\n" + " assert value[2] == 0.2\n" + " assert value.get(0, 0.7) == 0.7\n" + " assert value.get(3, 0.7) == 0.3\n" + " assert sorted(value.keys()) == [1, 2, 3]\n" + " assert sorted(value.items()) == [(1, 0.1), (2, 0.2), (3, 0.3)]\n" + " assert sorted(value.values()) == [0.1, 0.2, 0.3]\n" #if PY_MAJOR_VERSION < 3 - " assert all(isinstance(k, int) for k in value.iterkeys())\n" - " assert all(isinstance(v, float) for v in value.itervalues())\n" - " assert all(isinstance(k, int) and isinstance(v, float) for k,v in value.iteritems())\n" + " assert all(isinstance(k, int) for k in value.iterkeys())\n" + " assert all(isinstance(v, float) for v in value.itervalues())\n" + " assert all(isinstance(k, int) and isinstance(v, float) for k,v in value.iteritems())\n" #endif - ); - } - - Y_UNIT_TEST(ToPyDictWrongKey) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDict<int, double>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> - Add(NUdf::TUnboxedValuePod((int) 1), NUdf::TUnboxedValuePod((double) 0.1)) - .Add(NUdf::TUnboxedValuePod((int) 2), NUdf::TUnboxedValuePod((double) 0.2)) - .Add(NUdf::TUnboxedValuePod((int) 3), NUdf::TUnboxedValuePod((double) 0.3)) - .Build(); - }, - "def Test(value):\n" - " try:\n" - " print(value[0])\n" - " except KeyError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } - - Y_UNIT_TEST(FromPyEmptySet) { - TPythonTestEngine engine; - - engine.ToMiniKQL<NUdf::TDict<ui32, void>>( - "def Test(): return set([])", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(!value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 0); - }); - - } - - Y_UNIT_TEST(FromPySet) { - TPythonTestEngine engine; - - engine.ToMiniKQL<NUdf::TDict<char*, void>>( - "def Test(): return set(['one', 'two', 'three'])", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT(!value.IsSortedDict()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - - std::set<TString> set; - const auto it = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; it.Next(key);) { - set.emplace(key.AsStringRef()); - } + ); +} - UNIT_ASSERT_EQUAL(set.size(), 3); - UNIT_ASSERT(set.count("one")); - UNIT_ASSERT(set.count("two")); - UNIT_ASSERT(set.count("three")); - }); +Y_UNIT_TEST(ToPyDictWrongKey) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDict<int, double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Add(NUdf::TUnboxedValuePod((int)1), NUdf::TUnboxedValuePod((double)0.1)).Add(NUdf::TUnboxedValuePod((int)2), NUdf::TUnboxedValuePod((double)0.2)).Add(NUdf::TUnboxedValuePod((int)3), NUdf::TUnboxedValuePod((double)0.3)).Build(); + }, + "def Test(value):\n" + " try:\n" + " print(value[0])\n" + " except KeyError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - } +Y_UNIT_TEST(FromPyEmptySet) { + TPythonTestEngine engine; - Y_UNIT_TEST(FromPySet_Contains) { - TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, void>>( + "def Test(): return set([])", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 0); + }); +} - engine.ToMiniKQL<NUdf::TDict<char*, void>>( - "def Test(): return {b'one', b'two', b'three'}", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("one"))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("two"))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("three"))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod::Embedded("zero"))); - }); +Y_UNIT_TEST(FromPySet) { + TPythonTestEngine engine; - } + engine.ToMiniKQL<NUdf::TDict<char*, void>>( + "def Test(): return set(['one', 'two', 'three'])", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT(!value.IsSortedDict()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - Y_UNIT_TEST(ToPyEmptySet) { - TPythonTestEngine engine; + std::set<TString> set; + const auto it = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; it.Next(key);) { + set.emplace(key.AsStringRef()); + } - engine.ToPython<NUdf::TDict<ui8, void>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build(); - }, - "def Test(value):\n" - " assert not value\n" - " assert len(value) == 0\n" - ); + UNIT_ASSERT_EQUAL(set.size(), 3); + UNIT_ASSERT(set.count("one")); + UNIT_ASSERT(set.count("two")); + UNIT_ASSERT(set.count("three")); + }); +} - } +Y_UNIT_TEST(FromPySet_Contains) { + TPythonTestEngine engine; - Y_UNIT_TEST(ToPySet) { - TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<char*, void>>( + "def Test(): return {b'one', b'two', b'three'}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("one"))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("two"))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("three"))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod::Embedded("zero"))); + }); +} - engine.ToPython<NUdf::TDict<ui8, void>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> - Add(NUdf::TUnboxedValuePod((ui8) 1), NUdf::TUnboxedValuePod::Void()) - .Add(NUdf::TUnboxedValuePod((ui8) 2), NUdf::TUnboxedValuePod::Void()) - .Add(NUdf::TUnboxedValuePod((ui8) 3), NUdf::TUnboxedValuePod::Void()) - .Build(); +Y_UNIT_TEST(ToPyEmptySet) { + TPythonTestEngine engine; - }, - "def Test(value):\n" - " assert len(value) == 3\n" - " assert all(isinstance(k, int) for k in iter(value))\n" - " assert all(i in value for i in [1, 2, 3])\n"); - } + engine.ToPython<NUdf::TDict<ui8, void>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build(); + }, + "def Test(value):\n" + " assert not value\n" + " assert len(value) == 0\n"); +} - Y_UNIT_TEST(FromPyMultiDict) { - TPythonTestEngine engine; +Y_UNIT_TEST(ToPySet) { + TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, NUdf::TListType<char*>>>( - "def Test(): return {1: ['one', 'two'], 3: ['three']}", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 2); + engine.ToPython<NUdf::TDict<ui8, void>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Add(NUdf::TUnboxedValuePod((ui8)1), NUdf::TUnboxedValuePod::Void()).Add(NUdf::TUnboxedValuePod((ui8)2), NUdf::TUnboxedValuePod::Void()).Add(NUdf::TUnboxedValuePod((ui8)3), NUdf::TUnboxedValuePod::Void()).Build(); + }, + "def Test(value):\n" + " assert len(value) == 3\n" + " assert all(isinstance(k, int) for k in iter(value))\n" + " assert all(i in value for i in [1, 2, 3])\n"); +} - std::unordered_map<ui32, std::vector<TString>> map; - const auto dictIt = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; dictIt.NextPair(key, payload);) { - auto& val = map[key.Get<ui32>()]; - const auto listIt = payload.GetListIterator(); - for (NUdf::TUnboxedValue listItem; listIt.Next(listItem);) { - val.emplace_back(listItem.AsStringRef()); - } - } +Y_UNIT_TEST(FromPyMultiDict) { + TPythonTestEngine engine; - UNIT_ASSERT_EQUAL(map.size(), 2); - auto it = map.find(1); - UNIT_ASSERT(it != map.end()); - UNIT_ASSERT_EQUAL(it->second.size(), 2); - UNIT_ASSERT_EQUAL(it->second[0], "one"); - UNIT_ASSERT_EQUAL(it->second[1], "two"); - it = map.find(3); - UNIT_ASSERT(it != map.end()); - UNIT_ASSERT_EQUAL(it->second.size(), 1); - UNIT_ASSERT_EQUAL(it->second[0], "three"); - }); + engine.ToMiniKQL<NUdf::TDict<ui32, NUdf::TListType<char*>>>( + "def Test(): return {1: ['one', 'two'], 3: ['three']}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 2); - } + std::unordered_map<ui32, std::vector<TString>> map; + const auto dictIt = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; dictIt.NextPair(key, payload);) { + auto& val = map[key.Get<ui32>()]; + const auto listIt = payload.GetListIterator(); + for (NUdf::TUnboxedValue listItem; listIt.Next(listItem);) { + val.emplace_back(listItem.AsStringRef()); + } + } - Y_UNIT_TEST(ToPyMultiDict) { - TPythonTestEngine engine; + UNIT_ASSERT_EQUAL(map.size(), 2); + auto it = map.find(1); + UNIT_ASSERT(it != map.end()); + UNIT_ASSERT_EQUAL(it->second.size(), 2); + UNIT_ASSERT_EQUAL(it->second[0], "one"); + UNIT_ASSERT_EQUAL(it->second[1], "two"); + it = map.find(3); + UNIT_ASSERT(it != map.end()); + UNIT_ASSERT_EQUAL(it->second.size(), 1); + UNIT_ASSERT_EQUAL(it->second[0], "three"); + }); +} - engine.ToPython<NUdf::TDict<ui8, NUdf::TListType<NUdf::TUtf8>>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - ui32 flags = NUdf::TDictFlags::Hashed | NUdf::TDictFlags::Multi; - return vb.NewDict(type, flags)-> - Add(NUdf::TUnboxedValuePod((ui8) 1), vb.NewString("one")) - .Add(NUdf::TUnboxedValuePod((ui8) 1), vb.NewString("two")) - .Add(NUdf::TUnboxedValuePod((ui8) 3), vb.NewString("three")) - .Build(); +Y_UNIT_TEST(ToPyMultiDict) { + TPythonTestEngine engine; - }, - "def Test(value):\n" - " assert len(value) == 2\n" - " assert 1 in value\n" - " assert 3 in value\n" - " assert len(value[1]) == 2\n" - " assert 'one' in value[1]\n" - " assert 'two' in value[1]\n" - " assert list(value[3]) == ['three']\n"); - } + engine.ToPython<NUdf::TDict<ui8, NUdf::TListType<NUdf::TUtf8>>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + ui32 flags = NUdf::TDictFlags::Hashed | NUdf::TDictFlags::Multi; + return vb.NewDict(type, flags)->Add(NUdf::TUnboxedValuePod((ui8)1), vb.NewString("one")).Add(NUdf::TUnboxedValuePod((ui8)1), vb.NewString("two")).Add(NUdf::TUnboxedValuePod((ui8)3), vb.NewString("three")).Build(); + }, + "def Test(value):\n" + " assert len(value) == 2\n" + " assert 1 in value\n" + " assert 3 in value\n" + " assert len(value[1]) == 2\n" + " assert 'one' in value[1]\n" + " assert 'two' in value[1]\n" + " assert list(value[3]) == ['three']\n"); +} - Y_UNIT_TEST(ToPyAndBackDictAsIs) { - TPythonTestEngine engine; - engine.ToPythonAndBack<NUdf::TDict<i32, double>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - return vb.NewDict(type, NUdf::TDictFlags::Sorted)-> - Add(NUdf::TUnboxedValuePod((i32) 1), NUdf::TUnboxedValuePod((double) 0.1)) - .Add(NUdf::TUnboxedValuePod((i32) 2), NUdf::TUnboxedValuePod((double) 0.2)) - .Add(NUdf::TUnboxedValuePod((i32) 3), NUdf::TUnboxedValuePod((double) 0.3)) - .Build(); - }, - "def Test(value): return value", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((i32) 0))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((i32) 3))); - UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((i32) 2)).Get<double>(), 0.2); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((i32) 4))); +Y_UNIT_TEST(ToPyAndBackDictAsIs) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TDict<i32, double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Sorted)->Add(NUdf::TUnboxedValuePod((i32)1), NUdf::TUnboxedValuePod((double)0.1)).Add(NUdf::TUnboxedValuePod((i32)2), NUdf::TUnboxedValuePod((double)0.2)).Add(NUdf::TUnboxedValuePod((i32)3), NUdf::TUnboxedValuePod((double)0.3)).Build(); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((i32)0))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((i32)3))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((i32)2)).Get<double>(), 0.2); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((i32)4))); - std::vector<std::pair<i32, double>> items; - const auto it = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { - items.emplace_back(key.Get<i32>(), payload.Get<double>()); - } - UNIT_ASSERT_EQUAL(items.size(), 3); - UNIT_ASSERT_EQUAL(items[0].first, 1); - UNIT_ASSERT_EQUAL(items[1].first, 2); - UNIT_ASSERT_EQUAL(items[2].first, 3); - UNIT_ASSERT_EQUAL(items[0].second, 0.1); - UNIT_ASSERT_EQUAL(items[1].second, 0.2); - UNIT_ASSERT_EQUAL(items[2].second, 0.3); + std::vector<std::pair<i32, double>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<i32>(), payload.Get<double>()); + } + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0].first, 1); + UNIT_ASSERT_EQUAL(items[1].first, 2); + UNIT_ASSERT_EQUAL(items[2].first, 3); + UNIT_ASSERT_EQUAL(items[0].second, 0.1); + UNIT_ASSERT_EQUAL(items[1].second, 0.2); + UNIT_ASSERT_EQUAL(items[2].second, 0.3); - std::vector<i32> keys; - const auto kit = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; kit.Next(key);) { - keys.emplace_back(key.Get<i32>()); - } + std::vector<i32> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<i32>()); + } - UNIT_ASSERT_EQUAL(keys.size(), 3); - UNIT_ASSERT_EQUAL(keys[0], 1); - UNIT_ASSERT_EQUAL(keys[1], 2); - UNIT_ASSERT_EQUAL(keys[2], 3); + UNIT_ASSERT_EQUAL(keys.size(), 3); + UNIT_ASSERT_EQUAL(keys[0], 1); + UNIT_ASSERT_EQUAL(keys[1], 2); + UNIT_ASSERT_EQUAL(keys[2], 3); - std::vector<double> values; - const auto pit = value.GetPayloadsIterator(); - for (NUdf::TUnboxedValue payload; pit.Next(payload);) { - values.emplace_back(payload.Get<double>()); - } + std::vector<double> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.Get<double>()); + } - UNIT_ASSERT_EQUAL(values.size(), 3); - UNIT_ASSERT_EQUAL(values[0], 0.1); - UNIT_ASSERT_EQUAL(values[1], 0.2); - UNIT_ASSERT_EQUAL(values[2], 0.3); - } - ); - } + UNIT_ASSERT_EQUAL(values.size(), 3); + UNIT_ASSERT_EQUAL(values[0], 0.1); + UNIT_ASSERT_EQUAL(values[1], 0.2); + UNIT_ASSERT_EQUAL(values[2], 0.3); + }); +} - Y_UNIT_TEST(PyInvertDict) { - TPythonTestEngine engine; - engine.ToPythonAndBack<NUdf::TDict<i32, double>, NUdf::TDict<double, i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> - Add(NUdf::TUnboxedValuePod((i32) 1), NUdf::TUnboxedValuePod((double) 0.1)) - .Add(NUdf::TUnboxedValuePod((i32) 2), NUdf::TUnboxedValuePod((double) 0.2)) - .Add(NUdf::TUnboxedValuePod((i32) 3), NUdf::TUnboxedValuePod((double) 0.3)) - .Build(); - }, - "def Test(value): return { v: k for k, v in value.items() }", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((double) 0.1))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((double) 0.0))); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((double) 0.4))); - UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((double) 0.2)).Get<i32>(), 2); +Y_UNIT_TEST(PyInvertDict) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TDict<i32, double>, NUdf::TDict<double, i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Add(NUdf::TUnboxedValuePod((i32)1), NUdf::TUnboxedValuePod((double)0.1)).Add(NUdf::TUnboxedValuePod((i32)2), NUdf::TUnboxedValuePod((double)0.2)).Add(NUdf::TUnboxedValuePod((i32)3), NUdf::TUnboxedValuePod((double)0.3)).Build(); + }, + "def Test(value): return { v: k for k, v in value.items() }", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((double)0.1))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((double)0.0))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((double)0.4))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((double)0.2)).Get<i32>(), 2); - std::map<double, i32> items; - const auto it = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { - items.emplace(key.Get<double>(), payload.Get<i32>()); - } - UNIT_ASSERT_EQUAL(items.size(), 3); - UNIT_ASSERT_EQUAL(items[0.1], 1); - UNIT_ASSERT_EQUAL(items[0.2], 2); - UNIT_ASSERT_EQUAL(items[0.3], 3); - } - ); - } + std::map<double, i32> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace(key.Get<double>(), payload.Get<i32>()); + } + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0.1], 1); + UNIT_ASSERT_EQUAL(items[0.2], 2); + UNIT_ASSERT_EQUAL(items[0.3], 3); + }); +} - Y_UNIT_TEST(FromPyOrderedDict) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "from collections import OrderedDict\n" - "def Test(): return OrderedDict([(2, 'two'), (1, 'one'), (3, 'three')])\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); +Y_UNIT_TEST(FromPyOrderedDict) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "from collections import OrderedDict\n" + "def Test(): return OrderedDict([(2, 'two'), (1, 'one'), (3, 'three')])\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1)))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0)))); - const auto v = value.Lookup(NUdf::TUnboxedValuePod(ui32(1))); - UNIT_ASSERT_EQUAL(v.AsStringRef(), "one"); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((ui32(4))))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0)))); + const auto v = value.Lookup(NUdf::TUnboxedValuePod(ui32(1))); + UNIT_ASSERT_EQUAL(v.AsStringRef(), "one"); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((ui32(4))))); #if PY_MAJOR_VERSION >= 3 - std::vector<std::pair<ui32, TString>> items; - const auto it = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { - items.emplace_back(key.Get<ui32>(), payload.AsStringRef()); - } + std::vector<std::pair<ui32, TString>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<ui32>(), payload.AsStringRef()); + } - UNIT_ASSERT_EQUAL(items.size(), 3); - UNIT_ASSERT_EQUAL(items[0].first, 2); - UNIT_ASSERT_EQUAL(items[1].first, 1); - UNIT_ASSERT_EQUAL(items[2].first, 3); - UNIT_ASSERT_EQUAL(items[0].second, "two"); - UNIT_ASSERT_EQUAL(items[1].second, "one"); - UNIT_ASSERT_EQUAL(items[2].second, "three"); + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0].first, 2); + UNIT_ASSERT_EQUAL(items[1].first, 1); + UNIT_ASSERT_EQUAL(items[2].first, 3); + UNIT_ASSERT_EQUAL(items[0].second, "two"); + UNIT_ASSERT_EQUAL(items[1].second, "one"); + UNIT_ASSERT_EQUAL(items[2].second, "three"); - std::vector<ui32> keys; - const auto kit = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; kit.Next(key);) { - keys.emplace_back(key.Get<ui32>()); - } + std::vector<ui32> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<ui32>()); + } - UNIT_ASSERT_EQUAL(keys.size(), 3); - UNIT_ASSERT_EQUAL(keys[0], 2); - UNIT_ASSERT_EQUAL(keys[1], 1); - UNIT_ASSERT_EQUAL(keys[2], 3); + UNIT_ASSERT_EQUAL(keys.size(), 3); + UNIT_ASSERT_EQUAL(keys[0], 2); + UNIT_ASSERT_EQUAL(keys[1], 1); + UNIT_ASSERT_EQUAL(keys[2], 3); - std::vector<TString> values; - const auto pit = value.GetPayloadsIterator(); - for (NUdf::TUnboxedValue payload; pit.Next(payload);) { - values.emplace_back(payload.AsStringRef()); - } + std::vector<TString> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.AsStringRef()); + } - UNIT_ASSERT_EQUAL(values.size(), 3); - UNIT_ASSERT_EQUAL(values[0], "two"); - UNIT_ASSERT_EQUAL(values[1], "one"); - UNIT_ASSERT_EQUAL(values[2], "three"); + UNIT_ASSERT_EQUAL(values.size(), 3); + UNIT_ASSERT_EQUAL(values[0], "two"); + UNIT_ASSERT_EQUAL(values[1], "one"); + UNIT_ASSERT_EQUAL(values[2], "three"); #endif - }); - } + }); +} - Y_UNIT_TEST(ToPyAndBackSetAsIs) { - TPythonTestEngine engine; - engine.ToPythonAndBack<NUdf::TDict<float, void>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - return vb.NewDict(type, NUdf::TDictFlags::Sorted)-> - Add(NUdf::TUnboxedValuePod(0.1f), NUdf::TUnboxedValuePod::Void()) - .Add(NUdf::TUnboxedValuePod(0.2f), NUdf::TUnboxedValuePod::Void()) - .Add(NUdf::TUnboxedValuePod(0.3f), NUdf::TUnboxedValuePod::Void()) - .Build(); - }, - "def Test(value): return value", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(0.0f))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(0.3f))); - UNIT_ASSERT(value.Lookup(NUdf::TUnboxedValuePod(0.2f))); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(0.4f))); +Y_UNIT_TEST(ToPyAndBackSetAsIs) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TDict<float, void>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Sorted)->Add(NUdf::TUnboxedValuePod(0.1f), NUdf::TUnboxedValuePod::Void()).Add(NUdf::TUnboxedValuePod(0.2f), NUdf::TUnboxedValuePod::Void()).Add(NUdf::TUnboxedValuePod(0.3f), NUdf::TUnboxedValuePod::Void()).Build(); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(0.0f))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(0.3f))); + UNIT_ASSERT(value.Lookup(NUdf::TUnboxedValuePod(0.2f))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(0.4f))); - std::vector<float> keys; - const auto kit = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; kit.Next(key);) { - keys.emplace_back(key.Get<float>()); - } + std::vector<float> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<float>()); + } - UNIT_ASSERT_EQUAL(keys.size(), 3); - UNIT_ASSERT_EQUAL(keys[0], 0.1f); - UNIT_ASSERT_EQUAL(keys[1], 0.2f); - UNIT_ASSERT_EQUAL(keys[2], 0.3f); - } - ); - } + UNIT_ASSERT_EQUAL(keys.size(), 3); + UNIT_ASSERT_EQUAL(keys[0], 0.1f); + UNIT_ASSERT_EQUAL(keys[1], 0.2f); + UNIT_ASSERT_EQUAL(keys[2], 0.3f); + }); +} - Y_UNIT_TEST(ToPyAsThinList_FromPyAsDict) { - TPythonTestEngine engine; - engine.ToPythonAndBack<NUdf::TListType<float>, NUdf::TDict<i8, float>>( - [](const TType*, const NUdf::IValueBuilder& vb) { - NUdf::TUnboxedValue *items = nullptr; - const auto a = vb.NewArray(9U, items); - const float f[] = { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f }; - std::transform(f, f + 9U, items, [](float v){ return NUdf::TUnboxedValuePod(v); }); - return a; - }, - "def Test(value): return value", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i8(0)))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i8(10)))); - UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(i8(5))).Get<float>(), 0.6f); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i8(13)))); +Y_UNIT_TEST(ToPyAsThinList_FromPyAsDict) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TListType<float>, NUdf::TDict<i8, float>>( + [](const TType*, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + const auto a = vb.NewArray(9U, items); + const float f[] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f}; + std::transform(f, f + 9U, items, [](float v) { return NUdf::TUnboxedValuePod(v); }); + return a; + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i8(0)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i8(10)))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(i8(5))).Get<float>(), 0.6f); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i8(13)))); - std::vector<std::pair<i8, float>> items; - const auto it = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { - items.emplace_back(key.Get<i8>(), payload.Get<float>()); - } + std::vector<std::pair<i8, float>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<i8>(), payload.Get<float>()); + } - UNIT_ASSERT_EQUAL(items.size(), 9U); - UNIT_ASSERT_EQUAL(items.front().first, 0); - UNIT_ASSERT_EQUAL(items.back().first, 8); - UNIT_ASSERT_EQUAL(items.front().second, 0.1f); - UNIT_ASSERT_EQUAL(items.back().second, 0.9f); + UNIT_ASSERT_EQUAL(items.size(), 9U); + UNIT_ASSERT_EQUAL(items.front().first, 0); + UNIT_ASSERT_EQUAL(items.back().first, 8); + UNIT_ASSERT_EQUAL(items.front().second, 0.1f); + UNIT_ASSERT_EQUAL(items.back().second, 0.9f); - std::vector<i8> keys; - const auto kit = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; kit.Next(key);) { - keys.emplace_back(key.Get<i8>()); - } + std::vector<i8> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<i8>()); + } - UNIT_ASSERT_EQUAL(keys.size(), 9U); - UNIT_ASSERT_EQUAL(keys.front(), 0); - UNIT_ASSERT_EQUAL(keys.back(), 8); + UNIT_ASSERT_EQUAL(keys.size(), 9U); + UNIT_ASSERT_EQUAL(keys.front(), 0); + UNIT_ASSERT_EQUAL(keys.back(), 8); - std::vector<float> values; - const auto pit = value.GetPayloadsIterator(); - for (NUdf::TUnboxedValue payload; pit.Next(payload);) { - values.emplace_back(payload.Get<float>()); - } + std::vector<float> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.Get<float>()); + } - UNIT_ASSERT_EQUAL(values.size(), 9U); - UNIT_ASSERT_EQUAL(values.front(), 0.1f); - UNIT_ASSERT_EQUAL(values.back(), 0.9f); - } - ); - } + UNIT_ASSERT_EQUAL(values.size(), 9U); + UNIT_ASSERT_EQUAL(values.front(), 0.1f); + UNIT_ASSERT_EQUAL(values.back(), 0.9f); + }); +} - Y_UNIT_TEST(ToPyAsLazyList_FromPyAsDict) { - TPythonTestEngine engine; - engine.ToPythonAndBack<NUdf::TListType<i32>, NUdf::TDict<ui8, i32>>( - [](const TType*, const NUdf::IValueBuilder&) { - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(1, 10)); - }, - "def Test(value): return value", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui8(0)))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui8(10)))); - UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(ui8(5))).Get<i32>(), 6); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui8(13)))); +Y_UNIT_TEST(ToPyAsLazyList_FromPyAsDict) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TListType<i32>, NUdf::TDict<ui8, i32>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(1, 10)); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui8(0)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui8(10)))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(ui8(5))).Get<i32>(), 6); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui8(13)))); - std::vector<std::pair<ui8, i32>> items; - const auto it = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { - items.emplace_back(key.Get<ui8>(), payload.Get<i32>()); - } + std::vector<std::pair<ui8, i32>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<ui8>(), payload.Get<i32>()); + } - UNIT_ASSERT_EQUAL(items.size(), 9U); - UNIT_ASSERT_EQUAL(items.front().first, 0); - UNIT_ASSERT_EQUAL(items.back().first, 8); - UNIT_ASSERT_EQUAL(items.front().second, 1); - UNIT_ASSERT_EQUAL(items.back().second, 9); + UNIT_ASSERT_EQUAL(items.size(), 9U); + UNIT_ASSERT_EQUAL(items.front().first, 0); + UNIT_ASSERT_EQUAL(items.back().first, 8); + UNIT_ASSERT_EQUAL(items.front().second, 1); + UNIT_ASSERT_EQUAL(items.back().second, 9); - std::vector<ui8> keys; - const auto kit = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; kit.Next(key);) { - keys.emplace_back(key.Get<ui8>()); - } + std::vector<ui8> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<ui8>()); + } - UNIT_ASSERT_EQUAL(keys.size(), 9U); - UNIT_ASSERT_EQUAL(keys.front(), 0); - UNIT_ASSERT_EQUAL(keys.back(), 8); + UNIT_ASSERT_EQUAL(keys.size(), 9U); + UNIT_ASSERT_EQUAL(keys.front(), 0); + UNIT_ASSERT_EQUAL(keys.back(), 8); - std::vector<i32> values; - const auto pit = value.GetPayloadsIterator(); - for (NUdf::TUnboxedValue payload; pit.Next(payload);) { - values.emplace_back(payload.Get<i32>()); - } + std::vector<i32> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.Get<i32>()); + } - UNIT_ASSERT_EQUAL(values.size(), 9U); - UNIT_ASSERT_EQUAL(values.front(), 1); - UNIT_ASSERT_EQUAL(values.back(), 9); - } - ); - } + UNIT_ASSERT_EQUAL(values.size(), 9U); + UNIT_ASSERT_EQUAL(values.front(), 1); + UNIT_ASSERT_EQUAL(values.back(), 9); + }); } +} // Y_UNIT_TEST_SUITE(TPyDictTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_errors.cpp b/yql/essentials/udfs/common/python/bindings/py_errors.cpp index 5741978d543..0118bee003f 100644 --- a/yql/essentials/udfs/common/python/bindings/py_errors.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_errors.cpp @@ -38,11 +38,11 @@ TString GetLastErrorAsString() return {}; } - TPyObjectPtr etypePtr {etype, TPyObjectPtr::ADD_REF}; - TPyObjectPtr evaluePtr {evalue, TPyObjectPtr::ADD_REF}; - TPyObjectPtr etracebackPtr {etraceback, TPyObjectPtr::ADD_REF}; + TPyObjectPtr etypePtr{etype, TPyObjectPtr::ADD_REF}; + TPyObjectPtr evaluePtr{evalue, TPyObjectPtr::ADD_REF}; + TPyObjectPtr etracebackPtr{etraceback, TPyObjectPtr::ADD_REF}; - TPyObjectPtr stderrObject {PySys_GetObject("stderr"), TPyObjectPtr::ADD_REF}; + TPyObjectPtr stderrObject{PySys_GetObject("stderr"), TPyObjectPtr::ADD_REF}; if (!stderrObject) { return {}; } @@ -59,8 +59,7 @@ TString GetLastErrorAsString() return {}; } unused.ResetSteal( - PyObject_CallMethod(stderrObject.Get(), "_toggle_real_mode", nullptr) - ); + PyObject_CallMethod(stderrObject.Get(), "_toggle_real_mode", nullptr)); TString errorValue; if (!TryPyCast(error.Get(), errorValue)) { @@ -69,4 +68,4 @@ TString GetLastErrorAsString() return errorValue; } -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_errors.h b/yql/essentials/udfs/common/python/bindings/py_errors.h index 2306b47bb95..1aa6ecf6eb0 100644 --- a/yql/essentials/udfs/common/python/bindings/py_errors.h +++ b/yql/essentials/udfs/common/python/bindings/py_errors.h @@ -8,17 +8,17 @@ TString GetLastErrorAsString(); #define PY_TRY try -#define PY_CATCH(ErrorValue) \ - catch (const yexception& e) { \ +#define PY_CATCH(ErrorValue) \ + catch (const yexception& e) { \ PyErr_SetString(PyExc_RuntimeError, e.what()); \ - return ErrorValue; \ + return ErrorValue; \ } -#define PY_ENSURE(condition, message) \ - do { \ - if (Y_UNLIKELY(!(condition))) { \ +#define PY_ENSURE(condition, message) \ + do { \ + if (Y_UNLIKELY(!(condition))) { \ throw yexception() << message; \ - } \ + } \ } while (0) -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_gil.h b/yql/essentials/udfs/common/python/bindings/py_gil.h index 6d629e7b237..7d402773186 100644 --- a/yql/essentials/udfs/common/python/bindings/py_gil.h +++ b/yql/essentials/udfs/common/python/bindings/py_gil.h @@ -2,11 +2,9 @@ #include <Python.h> - namespace NPython { -struct TPyGilLocker -{ +struct TPyGilLocker { TPyGilLocker() : Gil_(PyGILState_Ensure()) { diff --git a/yql/essentials/udfs/common/python/bindings/py_iterator.cpp b/yql/essentials/udfs/common/python/bindings/py_iterator.cpp index c6f21cecb16..2e4912a7b08 100644 --- a/yql/essentials/udfs/common/python/bindings/py_iterator.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_iterator.cpp @@ -13,8 +13,7 @@ namespace NPython { ////////////////////////////////////////////////////////////////////////////// // TPyIterator interface ////////////////////////////////////////////////////////////////////////////// -struct TPyIterator -{ +struct TPyIterator { PyObject_HEAD; TPyCastContext::TPtr CastCtx; const NUdf::TType* ItemType; @@ -38,81 +37,82 @@ struct TPyIterator }; #if PY_MAJOR_VERSION >= 3 -#define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) + #define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) #endif PyTypeObject PyIteratorType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TIterator"), - INIT_MEMBER(tp_basicsize , sizeof(TPyIterator)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyIterator::Dealloc), + // clang-format off + INIT_MEMBER(tp_name, "yql.TIterator"), + // clang-format on + INIT_MEMBER(tp_basicsize, sizeof(TPyIterator)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyIterator::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyIterator::Repr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), - INIT_MEMBER(tp_doc , "yql.TDictKeysIterator object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , PyObject_SelfIter), - INIT_MEMBER(tp_iternext , TPyIterator::Next), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyIterator::Repr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc, "yql.TDictKeysIterator object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, PyObject_SelfIter), + INIT_MEMBER(tp_iternext, TPyIterator::Next), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; ////////////////////////////////////////////////////////////////////////////// // TPyPairIterator interface ////////////////////////////////////////////////////////////////////////////// -struct TPyPairIterator -{ +struct TPyPairIterator { PyObject_HEAD; TPyCastContext::TPtr CastCtx; const NUdf::TType* KeyType; @@ -138,68 +138,70 @@ struct TPyPairIterator PyTypeObject PyPairIteratorType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TDictIterator"), - INIT_MEMBER(tp_basicsize , sizeof(TPyPairIterator)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyPairIterator::Dealloc), + // clang-format off + INIT_MEMBER(tp_name, "yql.TDictIterator"), + // clang-format on + INIT_MEMBER(tp_basicsize, sizeof(TPyPairIterator)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyPairIterator::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyPairIterator::Repr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), - INIT_MEMBER(tp_doc , "yql.TPairIterator object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , PyObject_SelfIter), - INIT_MEMBER(tp_iternext , TPyPairIterator::Next), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyPairIterator::Repr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc, "yql.TPairIterator object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, PyObject_SelfIter), + INIT_MEMBER(tp_iternext, TPyPairIterator::Next), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; @@ -217,15 +219,16 @@ PyObject* TPyIterator::New(const TPyCastContext::TPtr& ctx, const NUdf::TType* i } PyObject* TPyIterator::Next(PyObject* self) -{ - PY_TRY { - const auto iter = Cast(self); - NUdf::TUnboxedValue item; - if (NUdf::TBoxedValueAccessor::Next(*iter->Iterator.Get(), item)) { - return (iter->ItemType ? ToPyObject(iter->CastCtx, iter->ItemType, item) : PyCast<ui64>(item.Get<ui64>())).Release(); - } - return nullptr; - } PY_CATCH(nullptr) + { + PY_TRY{ + const auto iter = Cast(self); +NUdf::TUnboxedValue item; +if (NUdf::TBoxedValueAccessor::Next(*iter->Iterator.Get(), item)) { + return (iter->ItemType ? ToPyObject(iter->CastCtx, iter->ItemType, item) : PyCast<ui64>(item.Get<ui64>())).Release(); +} +return nullptr; +} // namespace NPython +PY_CATCH(nullptr) } ////////////////////////////////////////////////////////////////////////////// @@ -243,36 +246,35 @@ PyObject* TPyPairIterator::New(const TPyCastContext::TPtr& ctx, const NUdf::TTyp } PyObject* TPyPairIterator::Next(PyObject* self) -{ - PY_TRY { - const auto iter = Cast(self); - NUdf::TUnboxedValue k, v; - if (NUdf::TBoxedValueAccessor::NextPair(*iter->Iterator.Get(), k, v)) { - const TPyObjectPtr key = iter->KeyType ? - ToPyObject(iter->CastCtx, iter->KeyType, k): - PyCast<ui64>(k.Get<ui64>()); - const TPyObjectPtr value = ToPyObject(iter->CastCtx, iter->PayloadType, v); - return PyTuple_Pack(2, key.Get(), value.Get()); - } - return nullptr; - } PY_CATCH(nullptr) + { + PY_TRY{ + const auto iter = Cast(self); +NUdf::TUnboxedValue k, v; +if (NUdf::TBoxedValueAccessor::NextPair(*iter->Iterator.Get(), k, v)) { + const TPyObjectPtr key = iter->KeyType ? ToPyObject(iter->CastCtx, iter->KeyType, k) : PyCast<ui64>(k.Get<ui64>()); + const TPyObjectPtr value = ToPyObject(iter->CastCtx, iter->PayloadType, v); + return PyTuple_Pack(2, key.Get(), value.Get()); +} +return nullptr; +} +PY_CATCH(nullptr) } ////////////////////////////////////////////////////////////////////////////// TPyObjectPtr ToPyIterator( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TUnboxedValuePod& value) { return TPyIterator::New(castCtx, itemType, value.AsBoxed()); } TPyObjectPtr ToPyIterator( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* keyType, - const NUdf::TType* payloadType, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + const NUdf::TUnboxedValuePod& value) { return TPyPairIterator::New(castCtx, keyType, payloadType, value.AsBoxed()); } diff --git a/yql/essentials/udfs/common/python/bindings/py_iterator.h b/yql/essentials/udfs/common/python/bindings/py_iterator.h index 5c5de27b0bc..29a9534a1d0 100644 --- a/yql/essentials/udfs/common/python/bindings/py_iterator.h +++ b/yql/essentials/udfs/common/python/bindings/py_iterator.h @@ -9,15 +9,14 @@ extern PyTypeObject PyIteratorType; extern PyTypeObject PyPairIteratorType; TPyObjectPtr ToPyIterator( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* itemType, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TUnboxedValuePod& value); TPyObjectPtr ToPyIterator( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* keyType, - const NKikimr::NUdf::TType* payloadType, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payloadType, + const NKikimr::NUdf::TUnboxedValuePod& value); - -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp index 1408f185a75..4ffaee2b66c 100644 --- a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp @@ -18,14 +18,16 @@ namespace { ////////////////////////////////////////////////////////////////////////////// // TLazyDictBase ////////////////////////////////////////////////////////////////////////////// -class TLazyDictBase: public NUdf::TBoxedValue -{ +class TLazyDictBase: public NUdf::TBoxedValue { protected: class TIterator: public NUdf::TBoxedValue { public: TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, TPyObjectPtr&& pyIter) - : CastCtx_(ctx), ItemType_(type), PyIter_(std::move(pyIter)) - {} + : CastCtx_(ctx) + , ItemType_(type) + , PyIter_(std::move(pyIter)) + { + } ~TIterator() { const TPyGilLocker lock; @@ -80,8 +82,12 @@ protected: class TPairIterator: public NUdf::TBoxedValue { public: TPairIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, TPyObjectPtr&& pyIter) - : CastCtx_(ctx), KeyType_(keyType), PayType_(payType), PyIter_(std::move(pyIter)) - {} + : CastCtx_(ctx) + , KeyType_(keyType) + , PayType_(payType) + , PyIter_(std::move(pyIter)) + { + } ~TPairIterator() { const TPyGilLocker lock; @@ -131,8 +137,11 @@ protected: }; TLazyDictBase(const TPyCastContext::TPtr& castCtx, const NUdf::TType* itemType, PyObject* pyObject) - : CastCtx_(castCtx), ItemType_(itemType), PyObject_(pyObject, TPyObjectPtr::AddRef()) - {} + : CastCtx_(castCtx) + , ItemType_(itemType) + , PyObject_(pyObject, TPyObjectPtr::AddRef()) + { + } ~TLazyDictBase() { const TPyGilLocker lock; @@ -146,8 +155,7 @@ protected: UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).c_str()); } return bool(has); - } - catch (const yexception& e) { + } catch (const yexception& e) { UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } @@ -159,15 +167,18 @@ protected: ////////////////////////////////////////////////////////////////////////////// // TLazyMapping ////////////////////////////////////////////////////////////////////////////// -class TLazyMapping: public TLazyDictBase -{ +class TLazyMapping: public TLazyDictBase { public: TLazyMapping(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, PyObject* dict) - : TLazyDictBase(ctx, keyType, dict), PayType_(payType) - {} + : TLazyDictBase(ctx, keyType, dict) + , PayType_(payType) + { + } private: - bool IsSortedDict() const override { return false; } + bool IsSortedDict() const override { + return false; + } ui64 GetDictLength() const override try { const TPyGilLocker lock; @@ -238,9 +249,7 @@ private: const TPyGilLocker lock; if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) { const auto map = PyObject_.Get(); - const auto has = map->ob_type->tp_as_sequence && map->ob_type->tp_as_sequence->sq_contains ? - (map->ob_type->tp_as_sequence->sq_contains)(map, pyKey.Get()) : - PyMapping_HasKey(map, pyKey.Get()); + const auto has = map->ob_type->tp_as_sequence && map->ob_type->tp_as_sequence->sq_contains ? (map->ob_type->tp_as_sequence->sq_contains)(map, pyKey.Get()) : PyMapping_HasKey(map, pyKey.Get()); if (has >= 0) { return bool(has); @@ -258,15 +267,18 @@ private: ////////////////////////////////////////////////////////////////////////////// // TLazyDict ////////////////////////////////////////////////////////////////////////////// -class TLazyDict: public TLazyDictBase -{ +class TLazyDict: public TLazyDictBase { public: TLazyDict(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, PyObject* dict) - : TLazyDictBase(ctx, keyType, dict), PayType_(payType) - {} + : TLazyDictBase(ctx, keyType, dict) + , PayType_(payType) + { + } private: - bool IsSortedDict() const override { return false; } + bool IsSortedDict() const override { + return false; + } ui64 GetDictLength() const override try { const TPyGilLocker lock; @@ -349,15 +361,17 @@ private: ////////////////////////////////////////////////////////////////////////////// // TLazySet ////////////////////////////////////////////////////////////////////////////// -class TLazySet: public TLazyDictBase -{ +class TLazySet: public TLazyDictBase { public: TLazySet(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, PyObject* set) : TLazyDictBase(ctx, itemType, set) - {} + { + } private: - bool IsSortedDict() const override { return false; } + bool IsSortedDict() const override { + return false; + } ui64 GetDictLength() const override try { const TPyGilLocker lock; @@ -425,15 +439,17 @@ private: ////////////////////////////////////////////////////////////////////////////// // TLazySequenceAsSet ////////////////////////////////////////////////////////////////////////////// -class TLazySequenceAsSet: public TLazyDictBase -{ +class TLazySequenceAsSet: public TLazyDictBase { public: TLazySequenceAsSet(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, PyObject* sequence) : TLazyDictBase(ctx, keyType, sequence) - {} + { + } private: - bool IsSortedDict() const override { return false; } + bool IsSortedDict() const override { + return false; + } ui64 GetDictLength() const override try { const TPyGilLocker lock; @@ -501,28 +517,31 @@ private: ////////////////////////////////////////////////////////////////////////////// // TLazySequenceAsDict ////////////////////////////////////////////////////////////////////////////// -template<typename KeyType> -class TLazySequenceAsDict: public NUdf::TBoxedValue -{ +template <typename KeyType> +class TLazySequenceAsDict: public NUdf::TBoxedValue { private: class TKeyIterator: public NUdf::TBoxedValue { public: TKeyIterator(Py_ssize_t size) - : Size_(size), Index_(0) - {} + : Size_(size) + , Index_(0) + { + } private: bool Skip() override { - if (Index_ >= Size_) + if (Index_ >= Size_) { return false; + } ++Index_; return true; } bool Next(NUdf::TUnboxedValue& value) override { - if (Index_ >= Size_) + if (Index_ >= Size_) { return false; + } value = NUdf::TUnboxedValuePod(KeyType(Index_++)); return true; @@ -536,8 +555,13 @@ private: class TIterator: public NUdf::TBoxedValue { public: TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, Py_ssize_t size, const TPyObjectPtr& pySeq) - : CastCtx_(ctx), ItemType_(itemType), PySeq_(pySeq), Size_(size), Index_(0) - {} + : CastCtx_(ctx) + , ItemType_(itemType) + , PySeq_(pySeq) + , Size_(size) + , Index_(0) + { + } ~TIterator() { const TPyGilLocker lock; @@ -546,16 +570,18 @@ private: private: bool Skip() override { - if (Index_ >= Size_) + if (Index_ >= Size_) { return false; + } ++Index_; return true; } bool Next(NUdf::TUnboxedValue& value) override try { - if (Index_ >= Size_) + if (Index_ >= Size_) { return false; + } const TPyGilLocker lock; value = FromPyObject(CastCtx_, ItemType_, PySequence_Fast_GET_ITEM(PySeq_.Get(), Index_++)); @@ -565,8 +591,9 @@ private: } bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& pay) override try { - if (Index_ >= Size_) + if (Index_ >= Size_) { return false; + } const TPyGilLocker lock; key = NUdf::TUnboxedValuePod(KeyType(Index_)); @@ -586,8 +613,12 @@ private: public: TLazySequenceAsDict(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, TPyObjectPtr&& sequence, Py_ssize_t size) - : CastCtx_(ctx), ItemType_(itemType), Size_(size), PySeq_(std::move(sequence)) - {} + : CastCtx_(ctx) + , ItemType_(itemType) + , Size_(size) + , PySeq_(std::move(sequence)) + { + } ~TLazySequenceAsDict() { @@ -596,7 +627,9 @@ public: } private: - bool IsSortedDict() const override { return true; } + bool IsSortedDict() const override { + return true; + } bool HasDictItems() const override { return Size_ > 0; @@ -608,15 +641,17 @@ private: NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override { const Py_ssize_t index = key.Get<KeyType>(); - if (index >= -Size_ && index < Size_) try { - const TPyGilLocker lock; - if (const auto item = PySequence_Fast_GET_ITEM(PySeq_.Get(), index >= 0 ? index : Size_ + index)) { - return FromPyObject(CastCtx_, ItemType_, item).Release().MakeOptional(); - } else if (PyErr_Occurred()) { - UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).c_str()); + if (index >= -Size_ && index < Size_) { + try { + const TPyGilLocker lock; + if (const auto item = PySequence_Fast_GET_ITEM(PySeq_.Get(), index >= 0 ? index : Size_ + index)) { + return FromPyObject(CastCtx_, ItemType_, item).Release().MakeOptional(); + } else if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).c_str()); + } + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } - } catch (const yexception& e) { - UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } return NUdf::TUnboxedValue(); } @@ -644,60 +679,60 @@ private: TPyObjectPtr PySeq_; }; -} // namspace +} // namespace NUdf::TUnboxedValue FromPyDict( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* keyType, - const NUdf::TType* payType, - PyObject* dict) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payType, + PyObject* dict) { return NUdf::TUnboxedValuePod(new TLazyDict(castCtx, keyType, payType, dict)); } NUdf::TUnboxedValue FromPyMapping( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* keyType, - const NUdf::TType* payType, - PyObject* map) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payType, + PyObject* map) { return NUdf::TUnboxedValuePod(new TLazyMapping(castCtx, keyType, payType, map)); } NUdf::TUnboxedValue FromPySet( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - PyObject* set) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + PyObject* set) { return NUdf::TUnboxedValuePod(new TLazySet(castCtx, itemType, set)); } NUdf::TUnboxedValue FromPySequence( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* keyType, - PyObject* set) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + PyObject* set) { return NUdf::TUnboxedValuePod(new TLazySequenceAsSet(castCtx, keyType, set)); } NUdf::TUnboxedValue FromPySequence( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - const NUdf::TDataTypeId keyType, - PyObject* sequence) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TDataTypeId keyType, + PyObject* sequence) { if (TPyObjectPtr fast = PySequence_Fast(sequence, "Can't get fast sequence.")) { - const auto size = PySequence_Fast_GET_SIZE(fast.Get()); - if (size >= 0) { - switch (keyType) { + const auto size = PySequence_Fast_GET_SIZE(fast.Get()); + if (size >= 0) { + switch (keyType) { #define MAKE_PRIMITIVE_TYPE_SIZE(type) \ - case NUdf::TDataType<type>::Id: \ - return NUdf::TUnboxedValuePod(new TLazySequenceAsDict<type>(castCtx, itemType, std::move(fast), size)); - INTEGRAL_VALUE_TYPES(MAKE_PRIMITIVE_TYPE_SIZE) + case NUdf::TDataType<type>::Id: \ + return NUdf::TUnboxedValuePod(new TLazySequenceAsDict<type>(castCtx, itemType, std::move(fast), size)); + INTEGRAL_VALUE_TYPES(MAKE_PRIMITIVE_TYPE_SIZE) #undef MAKE_PRIMITIVE_TYPE_SIZE + } + Y_ABORT("Invalid key type."); } - Y_ABORT("Invalid key type."); - } } UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).c_str()); } diff --git a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp index ef135f3ba20..ca6e83d1aee 100644 --- a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp @@ -11,7 +11,6 @@ #include <util/generic/maybe.h> #include <util/string/builder.h> - using namespace NKikimr; namespace NPython { @@ -50,9 +49,8 @@ static bool IsIteratorHasItems(PyObject* iter, const TPyCastContext::TPtr& castC ////////////////////////////////////////////////////////////////////////////// // TBaseLazyList ////////////////////////////////////////////////////////////////////////////// -template<typename TDerived> -class TBaseLazyList: public NUdf::TBoxedValue -{ +template <typename TDerived> +class TBaseLazyList: public NUdf::TBoxedValue { using TListSelf = TBaseLazyList<TDerived>; class TIterator: public NUdf::TBoxedValue { @@ -61,7 +59,8 @@ class TBaseLazyList: public NUdf::TBoxedValue : CastCtx_(ctx) , PyIter_(std::move(pyIter)) , ItemType_(type) - {} + { + } ~TIterator() { const TPyGilLocker lock; @@ -110,9 +109,9 @@ class TBaseLazyList: public NUdf::TBoxedValue public: TBaseLazyList( - const TPyCastContext::TPtr& castCtx, - TPyObjectPtr&& pyObject, - const NUdf::TType* type) + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) : CastCtx_(castCtx) , PyObject_(std::move(pyObject)) , ItemType_(NUdf::TListTypeInspector(*CastCtx_->PyCtx->TypeInfoHelper, type).GetItemType()) @@ -127,8 +126,7 @@ public: private: TPyObjectPtr GetIterator() const try { return static_cast<const TDerived*>(this)->GetIteratorImpl(); - } - catch (const yexception& e) { + } catch (const yexception& e) { UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } @@ -153,8 +151,9 @@ private: } bool HasListItems() const override try { - if (Length_.Defined()) + if (Length_.Defined()) { return *Length_ > 0; + } const TPyGilLocker lock; TPyObjectPtr iter = GetIterator(); @@ -163,8 +162,7 @@ private: Length_ = 0; } return hasItems; - } - catch (const yexception& e) { + } catch (const yexception& e) { UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } @@ -182,31 +180,27 @@ private: } NUdf::IBoxedValuePtr ReverseListImpl( - const NUdf::IValueBuilder& builder) const override - { + const NUdf::IValueBuilder& builder) const override { Y_UNUSED(builder); return nullptr; } NUdf::IBoxedValuePtr SkipListImpl( - const NUdf::IValueBuilder& builder, ui64 count) const override - { + const NUdf::IValueBuilder& builder, ui64 count) const override { Y_UNUSED(builder); Y_UNUSED(count); return nullptr; } NUdf::IBoxedValuePtr TakeListImpl( - const NUdf::IValueBuilder& builder, ui64 count) const override - { + const NUdf::IValueBuilder& builder, ui64 count) const override { Y_UNUSED(builder); Y_UNUSED(count); return nullptr; } NUdf::IBoxedValuePtr ToIndexDictImpl( - const NUdf::IValueBuilder& builder) const override - { + const NUdf::IValueBuilder& builder) const override { Y_UNUSED(builder); return nullptr; } @@ -221,26 +215,28 @@ protected: ////////////////////////////////////////////////////////////////////////////// // TLazyIterable ////////////////////////////////////////////////////////////////////////////// -class TLazyIterable: public TBaseLazyList<TLazyIterable> -{ +class TLazyIterable: public TBaseLazyList<TLazyIterable> { using TBase = TBaseLazyList<TLazyIterable>; + public: TLazyIterable( - const TPyCastContext::TPtr& castCtx, - TPyObjectPtr&& pyObject, - const NUdf::TType* type) + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) : TBase(castCtx, std::move(pyObject), type) - {} + { + } TPyObjectPtr GetIteratorImpl() const { - if (const TPyObjectPtr ret = PyObject_GetIter(PyObject_.Get())) { + if (const TPyObjectPtr ret = PyObject_GetIter(PyObject_.Get())) { return ret; } UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos - << "Cannot get iterator from object: " - << PyObjectRepr(PyObject_.Get()) << ", error: " - << GetLastErrorAsString()).c_str()); + << "Cannot get iterator from object: " + << PyObjectRepr(PyObject_.Get()) << ", error: " + << GetLastErrorAsString()) + .c_str()); } private: @@ -259,8 +255,7 @@ private: } } return *Length_; - } - catch (const yexception& e) { + } catch (const yexception& e) { UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } @@ -278,8 +273,7 @@ private: Length_ = 0; } return hasItems; - } - catch (const yexception& e) { + } catch (const yexception& e) { UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } }; @@ -287,23 +281,24 @@ private: ////////////////////////////////////////////////////////////////////////////// // TLazyIterator ////////////////////////////////////////////////////////////////////////////// -class TLazyIterator: public TBaseLazyList<TLazyIterator> -{ +class TLazyIterator: public TBaseLazyList<TLazyIterator> { using TBase = TBaseLazyList<TLazyIterator>; + public: TLazyIterator( - const TPyCastContext::TPtr& castCtx, - TPyObjectPtr&& pyObject, - const NUdf::TType* type) + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) : TBase(castCtx, std::move(pyObject), type) , IteratorDrained_(false) - {} + { + } TPyObjectPtr GetIteratorImpl() const { if (IteratorDrained_) { - UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << - "Lazy list was build under python iterator. " - "Iterator was already used.").c_str()); + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Lazy list was build under python iterator. " + "Iterator was already used.") + .c_str()); } IteratorDrained_ = true; return PyObject_; @@ -316,14 +311,14 @@ private: ////////////////////////////////////////////////////////////////////////////// // TLazyGenerator ////////////////////////////////////////////////////////////////////////////// -class TLazyGenerator: public TBaseLazyList<TLazyGenerator> -{ +class TLazyGenerator: public TBaseLazyList<TLazyGenerator> { using TBase = TBaseLazyList<TLazyGenerator>; + public: TLazyGenerator( - const TPyCastContext::TPtr& castCtx, - TPyObjectPtr&& pyObject, - const NUdf::TType* type) + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) : TBase(castCtx, std::move(pyObject), type) { // keep ownership of function closure if any @@ -352,29 +347,28 @@ private: TPyObjectPtr Closure_; }; -} // namspace - +} // namespace NUdf::TUnboxedValue FromPyLazyGenerator( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - TPyObjectPtr callableObj) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + TPyObjectPtr callableObj) { return NUdf::TUnboxedValuePod(new TLazyGenerator(castCtx, std::move(callableObj), type)); } NUdf::TUnboxedValue FromPyLazyIterable( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - TPyObjectPtr iterableObj) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + TPyObjectPtr iterableObj) { return NUdf::TUnboxedValuePod(new TLazyIterable(castCtx, std::move(iterableObj), type)); } NUdf::TUnboxedValue FromPyLazyIterator( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - TPyObjectPtr iteratorObj) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + TPyObjectPtr iteratorObj) { return NUdf::TUnboxedValuePod(new TLazyIterator(castCtx, std::move(iteratorObj), type)); } diff --git a/yql/essentials/udfs/common/python/bindings/py_list.cpp b/yql/essentials/udfs/common/python/bindings/py_list.cpp index bbae59865f2..072da26cff2 100644 --- a/yql/essentials/udfs/common/python/bindings/py_list.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_list.cpp @@ -10,11 +10,11 @@ using namespace NKikimr; #if PY_MAJOR_VERSION >= 3 -#define SLICEOBJ(obj) obj + #define SLICEOBJ(obj) obj #else -#define SLICEOBJ(obj) (reinterpret_cast<PySliceObject*>(obj)) -// See details about need for backports in ya.make -#include "py27_backports.h" + #define SLICEOBJ(obj) (reinterpret_cast<PySliceObject*>(obj)) + // See details about need for backports in ya.make + #include "py27_backports.h" #endif namespace NPython { @@ -33,13 +33,12 @@ inline Py_ssize_t CastIndex(PyObject* key, const char* name) return index; } -} +} // namespace ////////////////////////////////////////////////////////////////////////////// // TPyLazyList interface ////////////////////////////////////////////////////////////////////////////// -struct TPyLazyList -{ +struct TPyLazyList { using TPtr = NUdf::TRefCountedPtr<TPyLazyList, TPyPtrOps<TPyLazyList>>; PyObject_HEAD; @@ -59,11 +58,11 @@ struct TPyLazyList } static PyObject* New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - NUdf::IBoxedValuePtr value, - Py_ssize_t step = 1, - Py_ssize_t size = -1); + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value, + Py_ssize_t step = 1, + Py_ssize_t size = -1); static int Bool(PyObject* self); static PyObject* Repr(PyObject* self); @@ -85,64 +84,64 @@ PyMappingMethods LazyListMapping = { }; PyNumberMethods LazyListNumbering = { - INIT_MEMBER(nb_add, nullptr), - INIT_MEMBER(nb_subtract, nullptr), - INIT_MEMBER(nb_multiply, nullptr), + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_divide, nullptr), + INIT_MEMBER(nb_divide, nullptr), #endif - INIT_MEMBER(nb_remainder, nullptr), - INIT_MEMBER(nb_divmod, nullptr), - INIT_MEMBER(nb_power, nullptr), - INIT_MEMBER(nb_negative, nullptr), - INIT_MEMBER(nb_positive, nullptr), - INIT_MEMBER(nb_absolute, nullptr), + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_bool, TPyLazyList::Bool), + INIT_MEMBER(nb_bool, TPyLazyList::Bool), #else - INIT_MEMBER(nb_nonzero, TPyLazyList::Bool), + INIT_MEMBER(nb_nonzero, TPyLazyList::Bool), #endif - INIT_MEMBER(nb_invert, nullptr), - INIT_MEMBER(nb_lshift, nullptr), - INIT_MEMBER(nb_rshift, nullptr), - INIT_MEMBER(nb_and, nullptr), - INIT_MEMBER(nb_xor, nullptr), - INIT_MEMBER(nb_or, nullptr), + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_coerce, nullptr), + INIT_MEMBER(nb_coerce, nullptr), #endif - INIT_MEMBER(nb_int, nullptr), + INIT_MEMBER(nb_int, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_reserved, nullptr), + INIT_MEMBER(nb_reserved, nullptr), #else - INIT_MEMBER(nb_long, nullptr), + INIT_MEMBER(nb_long, nullptr), #endif - INIT_MEMBER(nb_float, nullptr), + INIT_MEMBER(nb_float, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_oct, nullptr), - INIT_MEMBER(nb_hex, nullptr), + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), #endif - INIT_MEMBER(nb_inplace_add, nullptr), - INIT_MEMBER(nb_inplace_subtract, nullptr), - INIT_MEMBER(nb_inplace_multiply, nullptr), - INIT_MEMBER(nb_inplace_remainder, nullptr), - INIT_MEMBER(nb_inplace_power, nullptr), - INIT_MEMBER(nb_inplace_lshift, nullptr), - INIT_MEMBER(nb_inplace_rshift, nullptr), - INIT_MEMBER(nb_inplace_and, nullptr), - INIT_MEMBER(nb_inplace_xor, nullptr), - INIT_MEMBER(nb_inplace_or, nullptr), + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), - INIT_MEMBER(nb_floor_divide, nullptr), - INIT_MEMBER(nb_true_divide, nullptr), - INIT_MEMBER(nb_inplace_floor_divide, nullptr), - INIT_MEMBER(nb_inplace_true_divide, nullptr), + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), - INIT_MEMBER(nb_index, nullptr), + INIT_MEMBER(nb_index, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_matrix_multiply, nullptr), - INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), #endif }; @@ -154,92 +153,93 @@ PyDoc_STRVAR(has_fast_len__doc__, "DEPRECATED: do not use."); PyDoc_STRVAR(has_items__doc__, "DEPRECATED: test list as bool instead."); static PyMethodDef TPyLazyListMethods[] = { - { "__reversed__", TPyLazyList::Reversed, METH_NOARGS, nullptr }, - { "to_index_dict", TPyLazyList::ToIndexDict, METH_NOARGS, to_index_dict__doc__ }, - { "reversed", TPyLazyList::Reversed, METH_NOARGS, reversed__doc__ }, - { "take", TPyLazyList::Take, METH_O, take__doc__ }, - { "skip", TPyLazyList::Skip, METH_O, skip__doc__ }, - { "has_fast_len", TPyLazyList::HasFastLen, METH_NOARGS, has_fast_len__doc__ }, - { "has_items", TPyLazyList::HasItems, METH_NOARGS, has_items__doc__ }, - { nullptr, nullptr, 0, nullptr } /* sentinel */ + {"__reversed__", TPyLazyList::Reversed, METH_NOARGS, nullptr}, + {"to_index_dict", TPyLazyList::ToIndexDict, METH_NOARGS, to_index_dict__doc__}, + {"reversed", TPyLazyList::Reversed, METH_NOARGS, reversed__doc__}, + {"take", TPyLazyList::Take, METH_O, take__doc__}, + {"skip", TPyLazyList::Skip, METH_O, skip__doc__}, + {"has_fast_len", TPyLazyList::HasFastLen, METH_NOARGS, has_fast_len__doc__}, + {"has_items", TPyLazyList::HasItems, METH_NOARGS, has_items__doc__}, + {nullptr, nullptr, 0, nullptr} /* sentinel */ }; #if PY_MAJOR_VERSION >= 3 -#define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) + #define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) #endif PyTypeObject PyLazyListType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TList"), - INIT_MEMBER(tp_basicsize , sizeof(TPyLazyList)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyLazyList::Dealloc), + // clang-format off + INIT_MEMBER(tp_name, "yql.TList"), + // clang-format on + INIT_MEMBER(tp_basicsize, sizeof(TPyLazyList)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyLazyList::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyLazyList::Repr), - INIT_MEMBER(tp_as_number , &LazyListNumbering), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , &LazyListMapping), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), - INIT_MEMBER(tp_doc , "yql.TList object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , TPyLazyList::Iter), - INIT_MEMBER(tp_iternext , nullptr), - INIT_MEMBER(tp_methods , TPyLazyListMethods), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyLazyList::Repr), + INIT_MEMBER(tp_as_number, &LazyListNumbering), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, &LazyListMapping), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc, "yql.TList object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, TPyLazyList::Iter), + INIT_MEMBER(tp_iternext, nullptr), + INIT_MEMBER(tp_methods, TPyLazyListMethods), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; ////////////////////////////////////////////////////////////////////////////// // TPyLazyListIterator interface ////////////////////////////////////////////////////////////////////////////// -struct TPyLazyListIterator -{ +struct TPyLazyListIterator { PyObject_HEAD; TPyLazyList::TPtr List; TPyCleanupListItem<NUdf::TUnboxedValue> Iterator; @@ -269,68 +269,70 @@ struct TPyLazyListIterator PyTypeObject PyLazyListIteratorType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TListIterator"), - INIT_MEMBER(tp_basicsize , sizeof(TPyLazyListIterator)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyLazyListIterator::Dealloc), + // clang-format off + INIT_MEMBER(tp_name, "yql.TListIterator"), + // clang-format on + INIT_MEMBER(tp_basicsize, sizeof(TPyLazyListIterator)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyLazyListIterator::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyLazyListIterator::Repr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), - INIT_MEMBER(tp_doc , "yql.ListIterator object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , PyObject_SelfIter), - INIT_MEMBER(tp_iternext , TPyLazyListIterator::Next), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyLazyListIterator::Repr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc, "yql.ListIterator object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, PyObject_SelfIter), + INIT_MEMBER(tp_iternext, TPyLazyListIterator::Next), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; @@ -338,11 +340,11 @@ PyTypeObject PyLazyListIteratorType = { // TPyLazyList implementation ////////////////////////////////////////////////////////////////////////////// PyObject* TPyLazyList::New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - NUdf::IBoxedValuePtr value, - Py_ssize_t step, - Py_ssize_t size) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value, + Py_ssize_t step, + Py_ssize_t size) { TPyLazyList* list = new TPyLazyList; PyObject_INIT(list, &PyLazyListType); @@ -362,192 +364,198 @@ PyObject* TPyLazyList::Repr(PyObject*) } PyObject* TPyLazyList::Iter(PyObject* self) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - return TPyLazyListIterator::New(list); - } PY_CATCH(nullptr) + { + PY_TRY{ + TPyLazyList* list = Cast(self); +return TPyLazyListIterator::New(list); +} // namespace NPython +PY_CATCH(nullptr) } Py_ssize_t TPyLazyList::Len(PyObject* self) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - if (list->CachedLength == -1) { - list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get())); - } - return (list->CachedLength + list->Step - 1) / list->Step; - } PY_CATCH(-1) + { + PY_TRY{ + TPyLazyList* list = Cast(self); +if (list->CachedLength == -1) { + list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get())); +} +return (list->CachedLength + list->Step - 1) / list->Step; +} +PY_CATCH(-1) } PyObject* TPyLazyList::Subscript(PyObject* self, PyObject* slice) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - const auto vb = list->CastCtx->ValueBuilder; - - if (PyIndex_Check(slice)) { - Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError); + { + PY_TRY{ + TPyLazyList* list = Cast(self); +const auto vb = list->CastCtx->ValueBuilder; - if (!list->Dict.IsSet()) { - list->Dict.Set(list->CastCtx->PyCtx, vb->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed()); - } +if (PyIndex_Check(slice)) { + Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError); - if (index < 0) { - if (list->CachedLength == -1) { - list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*list->Dict.Get())); - } + if (!list->Dict.IsSet()) { + list->Dict.Set(list->CastCtx->PyCtx, vb->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed()); + } - ++index *= list->Step; - --index += list->CachedLength; - } else { - index *= list->Step; - } + if (index < 0) { + if (list->CachedLength == -1) { + list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*list->Dict.Get())); + } - if (index < 0 || (list->CachedLength != -1 && index >= list->CachedLength)) { - const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->CachedLength); - PyErr_SetObject(PyExc_IndexError, error.Get()); - return nullptr; - } + ++index *= list->Step; + --index += list->CachedLength; + } else { + index *= list->Step; + } - if (const auto item = NUdf::TBoxedValueAccessor::Lookup(*list->Dict.Get(), NUdf::TUnboxedValuePod(ui64(index)))) { - return ToPyObject(list->CastCtx, list->ItemType, item.GetOptionalValue()).Release(); - } + if (index < 0 || (list->CachedLength != -1 && index >= list->CachedLength)) { + const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->CachedLength); + PyErr_SetObject(PyExc_IndexError, error.Get()); + return nullptr; + } - const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds", index); - PyErr_SetObject(PyExc_IndexError, error.Get()); - return nullptr; - } + if (const auto item = NUdf::TBoxedValueAccessor::Lookup(*list->Dict.Get(), NUdf::TUnboxedValuePod(ui64(index)))) { + return ToPyObject(list->CastCtx, list->ItemType, item.GetOptionalValue()).Release(); + } - if (PySlice_Check(slice)) { - Py_ssize_t start, stop, step, size; + const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds", index); + PyErr_SetObject(PyExc_IndexError, error.Get()); + return nullptr; +} - if (list->CachedLength >= 0) { - if (PySlice_GetIndicesEx(SLICEOBJ(slice), (list->CachedLength + list->Step - 1) / list->Step, &start, &stop, &step, &size) < 0) { - return nullptr; - } - } else { - if (PySlice_Unpack(slice, &start, &stop, &step) < 0) { - return nullptr; - } +if (PySlice_Check(slice)) { + Py_ssize_t start, stop, step, size; - if (step < -1 || step > 1 || (start < 0 && start > PY_SSIZE_T_MIN) || (stop < 0 && stop > PY_SSIZE_T_MIN)) { - list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get())); - size = PySlice_AdjustIndices((list->CachedLength + list->Step - 1) / list->Step, &start, &stop, step); - } else { - size = PySlice_AdjustIndices(PY_SSIZE_T_MAX, &start, &stop, step); - } - } + if (list->CachedLength >= 0) { + if (PySlice_GetIndicesEx(SLICEOBJ(slice), (list->CachedLength + list->Step - 1) / list->Step, &start, &stop, &step, &size) < 0) { + return nullptr; + } + } else { + if (PySlice_Unpack(slice, &start, &stop, &step) < 0) { + return nullptr; + } - if (!step) { - PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); - return nullptr; - } + if (step < -1 || step > 1 || (start < 0 && start > PY_SSIZE_T_MIN) || (stop < 0 && stop > PY_SSIZE_T_MIN)) { + list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get())); + size = PySlice_AdjustIndices((list->CachedLength + list->Step - 1) / list->Step, &start, &stop, step); + } else { + size = PySlice_AdjustIndices(PY_SSIZE_T_MAX, &start, &stop, step); + } + } - const Py_ssize_t hi = PY_SSIZE_T_MAX / list->Step; - const Py_ssize_t lo = PY_SSIZE_T_MIN / list->Step; - step = step > lo && step < hi ? step * list->Step : (step > 0 ? PY_SSIZE_T_MAX : PY_SSIZE_T_MIN); + if (!step) { + PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); + return nullptr; + } - NUdf::TUnboxedValue newList; - if (size > 0) { - size = step > 0 ? - (size < PY_SSIZE_T_MAX / step ? --size * step + 1 : PY_SSIZE_T_MAX): - (size < PY_SSIZE_T_MAX / -step ? --size * -step + 1 : PY_SSIZE_T_MAX); + const Py_ssize_t hi = PY_SSIZE_T_MAX / list->Step; + const Py_ssize_t lo = PY_SSIZE_T_MIN / list->Step; + step = step > lo && step < hi ? step * list->Step : (step > 0 ? PY_SSIZE_T_MAX : PY_SSIZE_T_MIN); - start = start < hi ? start * list->Step : PY_SSIZE_T_MAX; - const Py_ssize_t skip = step > 0 ? start : start - size + 1; + NUdf::TUnboxedValue newList; + if (size > 0) { + size = step > 0 ? (size < PY_SSIZE_T_MAX / step ? --size * step + 1 : PY_SSIZE_T_MAX) : (size < PY_SSIZE_T_MAX / -step ? --size * -step + 1 : PY_SSIZE_T_MAX); - newList = NUdf::TUnboxedValuePod(list->Value.Get().Get()); - if (skip > 0) { - newList = vb->SkipList(newList, skip); - } + start = start < hi ? start * list->Step : PY_SSIZE_T_MAX; + const Py_ssize_t skip = step > 0 ? start : start - size + 1; - if (size < PY_SSIZE_T_MAX && (list->CachedLength == -1 || list->CachedLength - skip > size)) { - newList = vb->TakeList(newList, size); - } + newList = NUdf::TUnboxedValuePod(list->Value.Get().Get()); + if (skip > 0) { + newList = vb->SkipList(newList, skip); + } - if (step < 0) { - step = -step; - newList = vb->ReverseList(newList); - } - } else { - newList = vb->NewEmptyList(); - } + if (size < PY_SSIZE_T_MAX && (list->CachedLength == -1 || list->CachedLength - skip > size)) { + newList = vb->TakeList(newList, size); + } - return New(list->CastCtx, list->ItemType, newList.AsBoxed(), step, size); + if (step < 0) { + step = -step; + newList = vb->ReverseList(newList); } + } else { + newList = vb->NewEmptyList(); + } - const TPyObjectPtr type = PyObject_Type(slice); - const TPyObjectPtr repr = PyObject_Repr(type.Get()); - const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get()); - PyErr_SetObject(PyExc_TypeError, error.Get()); - return nullptr; - } PY_CATCH(nullptr) + return New(list->CastCtx, list->ItemType, newList.AsBoxed(), step, size); +} + +const TPyObjectPtr type = PyObject_Type(slice); +const TPyObjectPtr repr = PyObject_Repr(type.Get()); +const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get()); +PyErr_SetObject(PyExc_TypeError, error.Get()); +return nullptr; +} +PY_CATCH(nullptr) } PyObject* TPyLazyList::ToIndexDict(PyObject* self, PyObject* /* arg */) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - if (!list->Dict.IsSet()) { - list->Dict.Set(list->CastCtx->PyCtx, list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed()); - } + { + PY_TRY{ + TPyLazyList* list = Cast(self); +if (!list->Dict.IsSet()) { + list->Dict.Set(list->CastCtx->PyCtx, list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed()); +} - return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, NUdf::TUnboxedValuePod(list->Dict.Get().Get())).Release(); - } PY_CATCH(nullptr) +return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, NUdf::TUnboxedValuePod(list->Dict.Get().Get())).Release(); +} +PY_CATCH(nullptr) } PyObject* TPyLazyList::Reversed(PyObject* self, PyObject* /* arg */) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - const auto newList = list->CastCtx->ValueBuilder->ReverseList(NUdf::TUnboxedValuePod(list->Value.Get().Get())); - return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); - } PY_CATCH(nullptr) + { + PY_TRY{ + TPyLazyList* list = Cast(self); +const auto newList = list->CastCtx->ValueBuilder->ReverseList(NUdf::TUnboxedValuePod(list->Value.Get().Get())); +return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); +} +PY_CATCH(nullptr) } PyObject* TPyLazyList::Take(PyObject* self, PyObject* arg) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - Py_ssize_t count = CastIndex(arg, "take"); - if (count < 0) { - return nullptr; - } - count *= list->Step; + { + PY_TRY{ + TPyLazyList* list = Cast(self); +Py_ssize_t count = CastIndex(arg, "take"); +if (count < 0) { + return nullptr; +} +count *= list->Step; - auto vb = list->CastCtx->ValueBuilder; - NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get())); - auto newList = vb->TakeList(value, static_cast<ui64>(count)); - return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); - } PY_CATCH(nullptr) +auto vb = list->CastCtx->ValueBuilder; +NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get())); +auto newList = vb->TakeList(value, static_cast<ui64>(count)); +return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); +} +PY_CATCH(nullptr) } PyObject* TPyLazyList::Skip(PyObject* self, PyObject* arg) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - Py_ssize_t count = CastIndex(arg, "skip"); - if (count < 0) { - return nullptr; - } - count *= list->Step; + { + PY_TRY{ + TPyLazyList* list = Cast(self); +Py_ssize_t count = CastIndex(arg, "skip"); +if (count < 0) { + return nullptr; +} +count *= list->Step; - NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get())); - const auto newList = list->CastCtx->ValueBuilder->SkipList(value, static_cast<ui64>(count)); - return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); - } PY_CATCH(nullptr) +NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get())); +const auto newList = list->CastCtx->ValueBuilder->SkipList(value, static_cast<ui64>(count)); +return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); +} +PY_CATCH(nullptr) } PyObject* TPyLazyList::HasFastLen(PyObject* self, PyObject* /* arg */) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - if (NUdf::TBoxedValueAccessor::HasFastListLength(*list->Value.Get())) { - Py_RETURN_TRUE; - } - Py_RETURN_FALSE; - } PY_CATCH(nullptr) + { + PY_TRY{ + TPyLazyList* list = Cast(self); +if (NUdf::TBoxedValueAccessor::HasFastListLength(*list->Value.Get())) { + Py_RETURN_TRUE; +} +Py_RETURN_FALSE; +} +PY_CATCH(nullptr) } PyObject* TPyLazyList::HasItems(PyObject* self, PyObject* /* arg */) @@ -558,19 +566,21 @@ PyObject* TPyLazyList::HasItems(PyObject* self, PyObject* /* arg */) Py_RETURN_TRUE; } Py_RETURN_FALSE; - } PY_CATCH(nullptr) + } + PY_CATCH(nullptr) } int TPyLazyList::Bool(PyObject* self) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - if (list->CachedLength == -1) { - return NUdf::TBoxedValueAccessor::HasListItems(*list->Value.Get()) ? 1 : 0; - } else { - return list->CachedLength > 0 ? 1 : 0; - } - } PY_CATCH(-1) + { + PY_TRY{ + TPyLazyList* list = Cast(self); +if (list->CachedLength == -1) { + return NUdf::TBoxedValueAccessor::HasListItems(*list->Value.Get()) ? 1 : 0; +} else { + return list->CachedLength > 0 ? 1 : 0; +} +} +PY_CATCH(-1) } ////////////////////////////////////////////////////////////////////////////// @@ -597,8 +607,9 @@ PyObject* TPyLazyListIterator::Next(PyObject* self) if (iter->Iterator.Get().Next(item)) { ++iter->Length; - for (auto skip = list->Step; --skip && iter->Iterator.Get().Skip(); ++iter->Length) + for (auto skip = list->Step; --skip && iter->Iterator.Get().Skip(); ++iter->Length) { continue; + } return ToPyObject(list->CastCtx, list->ItemType, item).Release(); } @@ -609,14 +620,14 @@ PyObject* TPyLazyListIterator::Next(PyObject* self) } return nullptr; - } PY_CATCH(nullptr) + } + PY_CATCH(nullptr) } ////////////////////////////////////////////////////////////////////////////// // TPyThinList interface ////////////////////////////////////////////////////////////////////////////// -struct TPyThinList -{ +struct TPyThinList { using TPtr = NUdf::TRefCountedPtr<TPyThinList, TPyPtrOps<TPyThinList>>; PyObject_HEAD; @@ -636,12 +647,12 @@ struct TPyThinList } static PyObject* New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - NUdf::IBoxedValuePtr value = NUdf::IBoxedValuePtr(), - const NUdf::TUnboxedValue* elements = nullptr, - Py_ssize_t length = 0, - Py_ssize_t step = 1); + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value = NUdf::IBoxedValuePtr(), + const NUdf::TUnboxedValue* elements = nullptr, + Py_ssize_t length = 0, + Py_ssize_t step = 1); static int Bool(PyObject* self); static PyObject* Repr(PyObject* self); @@ -663,154 +674,155 @@ PyMappingMethods ThinListMapping = { }; PyNumberMethods ThinListNumbering = { - INIT_MEMBER(nb_add, nullptr), - INIT_MEMBER(nb_subtract, nullptr), - INIT_MEMBER(nb_multiply, nullptr), + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_divide, nullptr), + INIT_MEMBER(nb_divide, nullptr), #endif - INIT_MEMBER(nb_remainder, nullptr), - INIT_MEMBER(nb_divmod, nullptr), - INIT_MEMBER(nb_power, nullptr), - INIT_MEMBER(nb_negative, nullptr), - INIT_MEMBER(nb_positive, nullptr), - INIT_MEMBER(nb_absolute, nullptr), + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_bool, TPyThinList::Bool), + INIT_MEMBER(nb_bool, TPyThinList::Bool), #else - INIT_MEMBER(nb_nonzero, TPyThinList::Bool), + INIT_MEMBER(nb_nonzero, TPyThinList::Bool), #endif - INIT_MEMBER(nb_invert, nullptr), - INIT_MEMBER(nb_lshift, nullptr), - INIT_MEMBER(nb_rshift, nullptr), - INIT_MEMBER(nb_and, nullptr), - INIT_MEMBER(nb_xor, nullptr), - INIT_MEMBER(nb_or, nullptr), + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_coerce, nullptr), + INIT_MEMBER(nb_coerce, nullptr), #endif - INIT_MEMBER(nb_int, nullptr), + INIT_MEMBER(nb_int, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_reserved, nullptr), + INIT_MEMBER(nb_reserved, nullptr), #else - INIT_MEMBER(nb_long, nullptr), + INIT_MEMBER(nb_long, nullptr), #endif - INIT_MEMBER(nb_float, nullptr), + INIT_MEMBER(nb_float, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_oct, nullptr), - INIT_MEMBER(nb_hex, nullptr), + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), #endif - INIT_MEMBER(nb_inplace_add, nullptr), - INIT_MEMBER(nb_inplace_subtract, nullptr), - INIT_MEMBER(nb_inplace_multiply, nullptr), - INIT_MEMBER(nb_inplace_remainder, nullptr), - INIT_MEMBER(nb_inplace_power, nullptr), - INIT_MEMBER(nb_inplace_lshift, nullptr), - INIT_MEMBER(nb_inplace_rshift, nullptr), - INIT_MEMBER(nb_inplace_and, nullptr), - INIT_MEMBER(nb_inplace_xor, nullptr), - INIT_MEMBER(nb_inplace_or, nullptr), + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), - INIT_MEMBER(nb_floor_divide, nullptr), - INIT_MEMBER(nb_true_divide, nullptr), - INIT_MEMBER(nb_inplace_floor_divide, nullptr), - INIT_MEMBER(nb_inplace_true_divide, nullptr), + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), - INIT_MEMBER(nb_index, nullptr), + INIT_MEMBER(nb_index, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_matrix_multiply, nullptr), - INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), #endif }; static PyMethodDef TPyThinListMethods[] = { - { "__reversed__", TPyThinList::Reversed, METH_NOARGS, nullptr }, - { "to_index_dict", TPyThinList::ToIndexDict, METH_NOARGS, to_index_dict__doc__ }, - { "reversed", TPyThinList::Reversed, METH_NOARGS, reversed__doc__ }, - { "take", TPyThinList::Take, METH_O, take__doc__ }, - { "skip", TPyThinList::Skip, METH_O, skip__doc__ }, - { "has_fast_len", TPyThinList::HasFastLen, METH_NOARGS, has_fast_len__doc__ }, - { "has_items", TPyThinList::HasItems, METH_NOARGS, has_items__doc__ }, - { nullptr, nullptr, 0, nullptr } /* sentinel */ + {"__reversed__", TPyThinList::Reversed, METH_NOARGS, nullptr}, + {"to_index_dict", TPyThinList::ToIndexDict, METH_NOARGS, to_index_dict__doc__}, + {"reversed", TPyThinList::Reversed, METH_NOARGS, reversed__doc__}, + {"take", TPyThinList::Take, METH_O, take__doc__}, + {"skip", TPyThinList::Skip, METH_O, skip__doc__}, + {"has_fast_len", TPyThinList::HasFastLen, METH_NOARGS, has_fast_len__doc__}, + {"has_items", TPyThinList::HasItems, METH_NOARGS, has_items__doc__}, + {nullptr, nullptr, 0, nullptr} /* sentinel */ }; #if PY_MAJOR_VERSION >= 3 -#define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) + #define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) #endif PyTypeObject PyThinListType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TList"), - INIT_MEMBER(tp_basicsize , sizeof(TPyThinList)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyThinList::Dealloc), + // clang-format off + INIT_MEMBER(tp_name, "yql.TList"), + // clang-format on + INIT_MEMBER(tp_basicsize, sizeof(TPyThinList)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyThinList::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyThinList::Repr), - INIT_MEMBER(tp_as_number , &ThinListNumbering), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , &ThinListMapping), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), - INIT_MEMBER(tp_doc , "yql.TList object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , TPyThinList::Iter), - INIT_MEMBER(tp_iternext , nullptr), - INIT_MEMBER(tp_methods , TPyThinListMethods), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyThinList::Repr), + INIT_MEMBER(tp_as_number, &ThinListNumbering), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, &ThinListMapping), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc, "yql.TList object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, TPyThinList::Iter), + INIT_MEMBER(tp_iternext, nullptr), + INIT_MEMBER(tp_methods, TPyThinListMethods), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; ////////////////////////////////////////////////////////////////////////////// // TPyThinListIterator interface ////////////////////////////////////////////////////////////////////////////// -struct TPyThinListIterator -{ +struct TPyThinListIterator { PyObject_HEAD; TPyThinList::TPtr List; const NUdf::TUnboxedValue* Elements; @@ -835,68 +847,70 @@ struct TPyThinListIterator PyTypeObject PyThinListIteratorType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TListIterator"), - INIT_MEMBER(tp_basicsize , sizeof(TPyThinListIterator)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyThinListIterator::Dealloc), + // clang-format off + INIT_MEMBER(tp_name, "yql.TListIterator"), + // clang-format on + INIT_MEMBER(tp_basicsize, sizeof(TPyThinListIterator)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyThinListIterator::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyThinListIterator::Repr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), - INIT_MEMBER(tp_doc , "yql.ListIterator object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , PyObject_SelfIter), - INIT_MEMBER(tp_iternext , TPyThinListIterator::Next), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyThinListIterator::Repr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc, "yql.ListIterator object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, PyObject_SelfIter), + INIT_MEMBER(tp_iternext, TPyThinListIterator::Next), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; @@ -904,12 +918,12 @@ PyTypeObject PyThinListIteratorType = { // TPyThinList implementation ////////////////////////////////////////////////////////////////////////////// PyObject* TPyThinList::New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - NUdf::IBoxedValuePtr value, - const NUdf::TUnboxedValue* elements, - Py_ssize_t length, - Py_ssize_t step) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value, + const NUdf::TUnboxedValue* elements, + Py_ssize_t length, + Py_ssize_t step) { TPyThinList* list = new TPyThinList; PyObject_INIT(list, &PyThinListType); @@ -930,11 +944,12 @@ PyObject* TPyThinList::Repr(PyObject*) } PyObject* TPyThinList::Iter(PyObject* self) -{ - PY_TRY { - TPyThinList* list = Cast(self); - return TPyThinListIterator::New(list); - } PY_CATCH(nullptr) + { + PY_TRY{ + TPyThinList* list = Cast(self); +return TPyThinListIterator::New(list); +} +PY_CATCH(nullptr) } Py_ssize_t TPyThinList::Len(PyObject* self) @@ -943,116 +958,119 @@ Py_ssize_t TPyThinList::Len(PyObject* self) } PyObject* TPyThinList::Subscript(PyObject* self, PyObject* slice) -{ - PY_TRY { - TPyThinList* list = Cast(self); - const auto vb = list->CastCtx->ValueBuilder; + { + PY_TRY{ + TPyThinList* list = Cast(self); +const auto vb = list->CastCtx->ValueBuilder; - if (PyIndex_Check(slice)) { - Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError); +if (PyIndex_Check(slice)) { + Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError); - if (index < 0) { - index += list->Length; - } + if (index < 0) { + index += list->Length; + } - if (index < 0 || index >= list->Length) { - const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->Length); - PyErr_SetObject(PyExc_IndexError, error.Get()); - return nullptr; - } + if (index < 0 || index >= list->Length) { + const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->Length); + PyErr_SetObject(PyExc_IndexError, error.Get()); + return nullptr; + } - if (list->Step > 0) { - index *= list->Step; - } else { - index = list->Length - ++index; - index *= -list->Step; - } + if (list->Step > 0) { + index *= list->Step; + } else { + index = list->Length - ++index; + index *= -list->Step; + } - return ToPyObject(list->CastCtx, list->ItemType, list->Elements[index]).Release(); - } + return ToPyObject(list->CastCtx, list->ItemType, list->Elements[index]).Release(); +} - if (PySlice_Check(slice)) { - Py_ssize_t start, stop, step, size; +if (PySlice_Check(slice)) { + Py_ssize_t start, stop, step, size; - if (PySlice_GetIndicesEx(SLICEOBJ(slice), list->Length, &start, &stop, &step, &size) < 0) { - return nullptr; - } + if (PySlice_GetIndicesEx(SLICEOBJ(slice), list->Length, &start, &stop, &step, &size) < 0) { + return nullptr; + } - if (!step) { - PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); - return nullptr; - } + if (!step) { + PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); + return nullptr; + } - if (size > 0) { - const Py_ssize_t skip = list->Step * (list->Step > 0 ? - (step > 0 ? start : start + step * (size - 1)): - (step > 0 ? stop : start + 1) - list->Length); + if (size > 0) { + const Py_ssize_t skip = list->Step * (list->Step > 0 ? (step > 0 ? start : start + step * (size - 1)) : (step > 0 ? stop : start + 1) - list->Length); - return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements + skip, size, step * list->Step); - } else { - return New(list->CastCtx, list->ItemType, list->Value.Get()); - } - } + return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements + skip, size, step * list->Step); + } else { + return New(list->CastCtx, list->ItemType, list->Value.Get()); + } +} - const TPyObjectPtr type = PyObject_Type(slice); - const TPyObjectPtr repr = PyObject_Repr(type.Get()); - const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get()); - PyErr_SetObject(PyExc_TypeError, error.Get()); - return nullptr; - } PY_CATCH(nullptr) +const TPyObjectPtr type = PyObject_Type(slice); +const TPyObjectPtr repr = PyObject_Repr(type.Get()); +const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get()); +PyErr_SetObject(PyExc_TypeError, error.Get()); +return nullptr; +} +PY_CATCH(nullptr) } #undef SLICEOBJ PyObject* TPyThinList::ToIndexDict(PyObject* self, PyObject* /* arg */) -{ - PY_TRY { - TPyThinList* list = Cast(self); - const auto dict = list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())); - return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, dict).Release(); - } PY_CATCH(nullptr) + { + PY_TRY{ + TPyThinList* list = Cast(self); +const auto dict = list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())); +return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, dict).Release(); +} +PY_CATCH(nullptr) } PyObject* TPyThinList::Reversed(PyObject* self, PyObject* /* arg */) -{ - PY_TRY { - TPyThinList* list = Cast(self); - return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements, list->Length, -list->Step); - } PY_CATCH(nullptr) + { + PY_TRY{ + TPyThinList* list = Cast(self); +return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements, list->Length, -list->Step); +} +PY_CATCH(nullptr) } PyObject* TPyThinList::Take(PyObject* self, PyObject* arg) -{ - PY_TRY { - TPyThinList* list = Cast(self); - const Py_ssize_t count = CastIndex(arg, "take"); - if (count < 0) { - return nullptr; - } + { + PY_TRY{ + TPyThinList* list = Cast(self); +const Py_ssize_t count = CastIndex(arg, "take"); +if (count < 0) { + return nullptr; +} - if (const auto size = std::min(count, list->Length)) { - return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements : list->Elements + list->Length + size * list->Step, size, list->Step); - } else { - return New(list->CastCtx, list->ItemType, list->Value.Get()); - } - } PY_CATCH(nullptr) +if (const auto size = std::min(count, list->Length)) { + return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements : list->Elements + list->Length + size * list->Step, size, list->Step); +} else { + return New(list->CastCtx, list->ItemType, list->Value.Get()); +} +} +PY_CATCH(nullptr) } PyObject* TPyThinList::Skip(PyObject* self, PyObject* arg) -{ - PY_TRY { - TPyThinList* list = Cast(self); - const Py_ssize_t count = CastIndex(arg, "skip"); - if (count < 0) { - return nullptr; - } + { + PY_TRY{ + TPyThinList* list = Cast(self); +const Py_ssize_t count = CastIndex(arg, "skip"); +if (count < 0) { + return nullptr; +} - if (const auto size = std::max(list->Length - count, Py_ssize_t(0))) { - return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements + count * list->Step : list->Elements, size, list->Step); - } else { - return New(list->CastCtx, list->ItemType); - } - } PY_CATCH(nullptr) +if (const auto size = std::max(list->Length - count, Py_ssize_t(0))) { + return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements + count * list->Step : list->Elements, size, list->Step); +} else { + return New(list->CastCtx, list->ItemType); +} +} +PY_CATCH(nullptr) } PyObject* TPyThinList::HasFastLen(PyObject* self, PyObject* /* arg */) @@ -1062,10 +1080,11 @@ PyObject* TPyThinList::HasFastLen(PyObject* self, PyObject* /* arg */) PyObject* TPyThinList::HasItems(PyObject* self, PyObject* /* arg */) { - if (Cast(self)->Length > 0) + if (Cast(self)->Length > 0) { Py_RETURN_TRUE; - else + } else { Py_RETURN_FALSE; + } } int TPyThinList::Bool(PyObject* self) @@ -1087,24 +1106,25 @@ PyObject* TPyThinListIterator::New(TPyThinList* list) } PyObject* TPyThinListIterator::Next(PyObject* self) -{ - PY_TRY { - TPyThinListIterator* iter = Cast(self); + { + PY_TRY{ + TPyThinListIterator* iter = Cast(self); - if (iter->Count) { - --iter->Count; - TPyThinList* list = iter->List.Get(); - return ToPyObject(list->CastCtx, list->ItemType, *(iter->Elements += list->Step)).Release(); - } +if (iter->Count) { + --iter->Count; + TPyThinList* list = iter->List.Get(); + return ToPyObject(list->CastCtx, list->ItemType, *(iter->Elements += list->Step)).Release(); +} - return nullptr; - } PY_CATCH(nullptr) +return nullptr; +} +PY_CATCH(nullptr) } TPyObjectPtr ToPyLazyList( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TUnboxedValuePod& value) { if (const auto elements = value.GetElements()) { return TPyThinList::New(castCtx, itemType, value.AsBoxed(), elements, value.GetListLength()); diff --git a/yql/essentials/udfs/common/python/bindings/py_list.h b/yql/essentials/udfs/common/python/bindings/py_list.h index 9db170a7954..9266ff918f5 100644 --- a/yql/essentials/udfs/common/python/bindings/py_list.h +++ b/yql/essentials/udfs/common/python/bindings/py_list.h @@ -11,23 +11,23 @@ extern PyTypeObject PyThinListIteratorType; extern PyTypeObject PyThinListType; TPyObjectPtr ToPyLazyList( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* itemType, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyLazyGenerator( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - TPyObjectPtr callableObj); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + TPyObjectPtr callableObj); NKikimr::NUdf::TUnboxedValue FromPyLazyIterable( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - TPyObjectPtr iterableObj); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + TPyObjectPtr iterableObj); NKikimr::NUdf::TUnboxedValue FromPyLazyIterator( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - TPyObjectPtr iteratorObj); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + TPyObjectPtr iteratorObj); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp index b2e9a640d4f..bceb75e6637 100644 --- a/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp @@ -4,1022 +4,990 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyListTest) { - Y_UNIT_TEST(FromPyEmptyList) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "def Test(): return []", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetListLength(), 0); - }); - } +Y_UNIT_TEST(FromPyEmptyList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test(): return []", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 0); + }); +} - Y_UNIT_TEST(FromPyList) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "def Test(): return [1, 2, 3, 4]", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetListLength(), 4); - const auto it = value.GetListIterator(); - NUdf::TUnboxedValue item; +Y_UNIT_TEST(FromPyList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test(): return [1, 2, 3, 4]", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 4); + const auto it = value.GetListIterator(); + NUdf::TUnboxedValue item; - UNIT_ASSERT(it.Next(item)); - UNIT_ASSERT_EQUAL(item.Get<ui32>(), 1); - UNIT_ASSERT(it.Next(item)); - UNIT_ASSERT_EQUAL(item.Get<ui32>(), 2); - UNIT_ASSERT(it.Next(item)); - UNIT_ASSERT_EQUAL(item.Get<ui32>(), 3); - UNIT_ASSERT(it.Next(item)); - UNIT_ASSERT_EQUAL(item.Get<ui32>(), 4); - UNIT_ASSERT(false == it.Next(item)); - }); - } + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 1); + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 2); + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 3); + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 4); + UNIT_ASSERT(false == it.Next(item)); + }); +} - Y_UNIT_TEST(ToPyEmptyList) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<char*>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - return vb.NewEmptyList(); - }, - "def Test(value):\n" - " assert value.has_fast_len()\n" - " assert len(value) == 0\n"); - } +Y_UNIT_TEST(ToPyEmptyList) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<char*>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewEmptyList(); + }, + "def Test(value):\n" + " assert value.has_fast_len()\n" + " assert len(value) == 0\n"); +} - Y_UNIT_TEST(ToPyList) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<double>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 3U> list = {{ - NUdf::TUnboxedValuePod(0.1), - NUdf::TUnboxedValuePod(0.2), - NUdf::TUnboxedValuePod(0.3) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(value):\n" - " assert value.has_fast_len()\n" - " assert len(value) == 3\n" - " assert all(isinstance(v, float) for v in value)\n" - " assert list(value) == [0.1, 0.2, 0.3]\n"); - } +Y_UNIT_TEST(ToPyList) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(value):\n" + " assert value.has_fast_len()\n" + " assert len(value) == 3\n" + " assert all(isinstance(v, float) for v in value)\n" + " assert list(value) == [0.1, 0.2, 0.3]\n"); +} - Y_UNIT_TEST(FromPyTuple) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "def Test(): return (1, 2, 3)", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetListLength(), 3); +Y_UNIT_TEST(FromPyTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test(): return (1, 2, 3)", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 3); - ui32 expected = 1; - auto it = value.GetListIterator(); - for (NUdf::TUnboxedValue item; it.Next(item);) { - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, expected); - expected++; - } - }); - } + ui32 expected = 1; + auto it = value.GetListIterator(); + for (NUdf::TUnboxedValue item; it.Next(item);) { + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; + } + }); +} - Y_UNIT_TEST(ThinListIteration) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<double>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 3U> list = {{ - NUdf::TUnboxedValuePod(0.1), - NUdf::TUnboxedValuePod(0.2), - NUdf::TUnboxedValuePod(0.3) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(value):\n" - " assert '__iter__' in dir(value)\n" - " it = iter(value)\n" - " assert next(it) == 0.1\n" - " assert next(it) == 0.2\n" - " assert next(it) == 0.3\n" - " try:\n" - " next(it)\n" - " except StopIteration:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(ThinListIteration) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(value):\n" + " assert '__iter__' in dir(value)\n" + " it = iter(value)\n" + " assert next(it) == 0.1\n" + " assert next(it) == 0.2\n" + " assert next(it) == 0.3\n" + " try:\n" + " next(it)\n" + " except StopIteration:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(ThinListReversed) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 10U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U), - NUdf::TUnboxedValuePod(3U), - NUdf::TUnboxedValuePod(4U), - NUdf::TUnboxedValuePod(5U), - NUdf::TUnboxedValuePod(6U), - NUdf::TUnboxedValuePod(7U), - NUdf::TUnboxedValuePod(8U), - NUdf::TUnboxedValuePod(9U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " e = list(range(0, 10))\n" - " assert '__reversed__' in dir(v)\n" - " assert all(one == two for one, two in zip(reversed(v), reversed(e)))\n" - ); - } +Y_UNIT_TEST(ThinListReversed) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__reversed__' in dir(v)\n" + " assert all(one == two for one, two in zip(reversed(v), reversed(e)))\n"); +} - Y_UNIT_TEST(LazyListReversed) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); - }, - "def Test(v):\n" - " assert '__reversed__' in dir(v)\n" - " it = iter(reversed(v))\n" - " assert next(it) == 2\n" - " assert next(it) == 1\n" - " assert next(it) == 0\n" - " try:\n" - " next(it)\n" - " except StopIteration:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(LazyListReversed) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " assert '__reversed__' in dir(v)\n" + " it = iter(reversed(v))\n" + " assert next(it) == 2\n" + " assert next(it) == 1\n" + " assert next(it) == 0\n" + " try:\n" + " next(it)\n" + " except StopIteration:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(LazyListIteration) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); - }, - "def Test(value):\n" - " assert '__iter__' in dir(value)\n" - " it = iter(value)\n" - " assert next(it) == 0\n" - " assert next(it) == 1\n" - " assert next(it) == 2\n" - " try:\n" - " next(it)\n" - " except StopIteration:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(LazyListIteration) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(value):\n" + " assert '__iter__' in dir(value)\n" + " it = iter(value)\n" + " assert next(it) == 0\n" + " assert next(it) == 1\n" + " assert next(it) == 2\n" + " try:\n" + " next(it)\n" + " except StopIteration:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(LazyListInvalidIndexType) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); - }, - "def Test(v):\n" - " try:\n" - " print(v[{}])\n" - " except TypeError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(LazyListInvalidIndexType) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " try:\n" + " print(v[{}])\n" + " except TypeError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(ThinListInvalidIndexType) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<double>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 3U> list = {{ - NUdf::TUnboxedValuePod(0.1), - NUdf::TUnboxedValuePod(0.2), - NUdf::TUnboxedValuePod(0.3) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " try:\n" - " print(v[{}])\n" - " except TypeError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(ThinListInvalidIndexType) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " try:\n" + " print(v[{}])\n" + " except TypeError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(LazyListZeroSliceStep) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); - }, - "def Test(v):\n" - " try:\n" - " print(v[::0])\n" - " except ValueError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(LazyListZeroSliceStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " try:\n" + " print(v[::0])\n" + " except ValueError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(ThinListZeroSliceStep) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<double>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 3U> list = {{ - NUdf::TUnboxedValuePod(0.1), - NUdf::TUnboxedValuePod(0.2), - NUdf::TUnboxedValuePod(0.3) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " try:\n" - " print(v[::0])\n" - " except ValueError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(ThinListZeroSliceStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " try:\n" + " print(v[::0])\n" + " except ValueError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(ThinListSlice) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 10U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U), - NUdf::TUnboxedValuePod(3U), - NUdf::TUnboxedValuePod(4U), - NUdf::TUnboxedValuePod(5U), - NUdf::TUnboxedValuePod(6U), - NUdf::TUnboxedValuePod(7U), - NUdf::TUnboxedValuePod(8U), - NUdf::TUnboxedValuePod(9U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " e = list(range(0, 10))\n" - " assert '__len__' in dir(v)\n" - " assert list(v[::1]) == e[::1]\n" - " assert list(v[::-1]) == e[::-1]\n" - " assert list(v[1::1]) == e[1::1]\n" - " assert list(v[2::1]) == e[2::1]\n" - " assert list(v[3::1]) == e[3::1]\n" - " assert list(v[:-1:1]) == e[:-1:1]\n" - " assert list(v[:-2:1]) == e[:-2:1]\n" - " assert list(v[:-3:1]) == e[:-3:1]\n" - " assert list(v[1::-1]) == e[1::-1]\n" - " assert list(v[2::-1]) == e[2::-1]\n" - " assert list(v[3::-1]) == e[3::-1]\n" - " assert list(v[:-1:-1]) == e[:-1:-1]\n" - " assert list(v[:-2:-1]) == e[:-2:-1]\n" - " assert list(v[:-3:-1]) == e[:-3:-1]\n" - " assert list(v[:-2:-1]) == e[:-2:-1]\n" - " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" - " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" - " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" - " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" - " assert list(v[:7:1]) == e[:7:1]\n" - " assert list(v[-1:4]) == e[-1:4]\n" - " assert list(v[5:11]) == e[5:11]\n" - " assert list(v[4:1]) == e[4:1]\n" - " assert list(v[5:-2]) == e[5:-2]\n" - ); - } +Y_UNIT_TEST(ThinListSlice) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__len__' in dir(v)\n" + " assert list(v[::1]) == e[::1]\n" + " assert list(v[::-1]) == e[::-1]\n" + " assert list(v[1::1]) == e[1::1]\n" + " assert list(v[2::1]) == e[2::1]\n" + " assert list(v[3::1]) == e[3::1]\n" + " assert list(v[:-1:1]) == e[:-1:1]\n" + " assert list(v[:-2:1]) == e[:-2:1]\n" + " assert list(v[:-3:1]) == e[:-3:1]\n" + " assert list(v[1::-1]) == e[1::-1]\n" + " assert list(v[2::-1]) == e[2::-1]\n" + " assert list(v[3::-1]) == e[3::-1]\n" + " assert list(v[:-1:-1]) == e[:-1:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[:-3:-1]) == e[:-3:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" + " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" + " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" + " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" + " assert list(v[:7:1]) == e[:7:1]\n" + " assert list(v[-1:4]) == e[-1:4]\n" + " assert list(v[5:11]) == e[5:11]\n" + " assert list(v[4:1]) == e[4:1]\n" + " assert list(v[5:-2]) == e[5:-2]\n"); +} - Y_UNIT_TEST(ThinListSliceOverReversed) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 10U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U), - NUdf::TUnboxedValuePod(3U), - NUdf::TUnboxedValuePod(4U), - NUdf::TUnboxedValuePod(5U), - NUdf::TUnboxedValuePod(6U), - NUdf::TUnboxedValuePod(7U), - NUdf::TUnboxedValuePod(8U), - NUdf::TUnboxedValuePod(9U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(x):\n" - " e = list(reversed(range(0, 10)))\n" - " v = reversed(x)\n" - " assert list(v[::1]) == e[::1]\n" - " assert list(v[::-1]) == e[::-1]\n" - " assert list(v[1::1]) == e[1::1]\n" - " assert list(v[2::1]) == e[2::1]\n" - " assert list(v[3::1]) == e[3::1]\n" - " assert list(v[:-1:1]) == e[:-1:1]\n" - " assert list(v[:-2:1]) == e[:-2:1]\n" - " assert list(v[:-3:1]) == e[:-3:1]\n" - " assert list(v[1::-1]) == e[1::-1]\n" - " assert list(v[2::-1]) == e[2::-1]\n" - " assert list(v[3::-1]) == e[3::-1]\n" - " assert list(v[:-1:-1]) == e[:-1:-1]\n" - " assert list(v[:-2:-1]) == e[:-2:-1]\n" - " assert list(v[:-3:-1]) == e[:-3:-1]\n" - " assert list(v[:-2:-1]) == e[:-2:-1]\n" - " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" - " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" - " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" - " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" - " assert list(v[:7:1]) == e[:7:1]\n" - " assert list(v[-1:4]) == e[-1:4]\n" - " assert list(v[5:11]) == e[5:11]\n" - " assert list(v[4:1]) == e[4:1]\n" - " assert list(v[5:-2]) == e[5:-2]\n" - ); - } +Y_UNIT_TEST(ThinListSliceOverReversed) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(x):\n" + " e = list(reversed(range(0, 10)))\n" + " v = reversed(x)\n" + " assert list(v[::1]) == e[::1]\n" + " assert list(v[::-1]) == e[::-1]\n" + " assert list(v[1::1]) == e[1::1]\n" + " assert list(v[2::1]) == e[2::1]\n" + " assert list(v[3::1]) == e[3::1]\n" + " assert list(v[:-1:1]) == e[:-1:1]\n" + " assert list(v[:-2:1]) == e[:-2:1]\n" + " assert list(v[:-3:1]) == e[:-3:1]\n" + " assert list(v[1::-1]) == e[1::-1]\n" + " assert list(v[2::-1]) == e[2::-1]\n" + " assert list(v[3::-1]) == e[3::-1]\n" + " assert list(v[:-1:-1]) == e[:-1:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[:-3:-1]) == e[:-3:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" + " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" + " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" + " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" + " assert list(v[:7:1]) == e[:7:1]\n" + " assert list(v[-1:4]) == e[-1:4]\n" + " assert list(v[5:11]) == e[5:11]\n" + " assert list(v[4:1]) == e[4:1]\n" + " assert list(v[5:-2]) == e[5:-2]\n"); +} - Y_UNIT_TEST(LazyListSlice) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10)); - }, - "def Test(v):\n" - " e = list(range(0, 10))\n" - " assert '__len__' in dir(v)\n" - " assert len(v) == len(e)\n" - " assert list(v[::1]) == e[::1]\n" - " assert list(v[::-1]) == e[::-1]\n" - " assert list(v[3:]) == e[3:]\n" - " assert list(v[-2:]) == e[-2:]\n" - " assert list(v[2::-1]) == e[2::-1]\n" - " assert list(v[:-2:-1]) == e[:-2:-1]\n" - " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" - " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" - " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" - " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" - " assert list(v[:7:1]) == e[:7:1]\n" - " assert list(v[-1:4]) == e[-1:4]\n" - " assert list(v[5:11]) == e[5:11]\n" - " assert list(v[4:1]) == e[4:1]\n" - " assert list(v[5:-2]) == e[5:-2]\n" - ); - } +Y_UNIT_TEST(LazyListSlice) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10)); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__len__' in dir(v)\n" + " assert len(v) == len(e)\n" + " assert list(v[::1]) == e[::1]\n" + " assert list(v[::-1]) == e[::-1]\n" + " assert list(v[3:]) == e[3:]\n" + " assert list(v[-2:]) == e[-2:]\n" + " assert list(v[2::-1]) == e[2::-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" + " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" + " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" + " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" + " assert list(v[:7:1]) == e[:7:1]\n" + " assert list(v[-1:4]) == e[-1:4]\n" + " assert list(v[5:11]) == e[5:11]\n" + " assert list(v[4:1]) == e[4:1]\n" + " assert list(v[5:-2]) == e[5:-2]\n"); +} - Y_UNIT_TEST(ThinListIterateSliceWithStep) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 20U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U), - NUdf::TUnboxedValuePod(3U), - NUdf::TUnboxedValuePod(4U), - NUdf::TUnboxedValuePod(5U), - NUdf::TUnboxedValuePod(6U), - NUdf::TUnboxedValuePod(7U), - NUdf::TUnboxedValuePod(8U), - NUdf::TUnboxedValuePod(9U), - NUdf::TUnboxedValuePod(10U), - NUdf::TUnboxedValuePod(11U), - NUdf::TUnboxedValuePod(12U), - NUdf::TUnboxedValuePod(13U), - NUdf::TUnboxedValuePod(14U), - NUdf::TUnboxedValuePod(15U), - NUdf::TUnboxedValuePod(16U), - NUdf::TUnboxedValuePod(17U), - NUdf::TUnboxedValuePod(18U), - NUdf::TUnboxedValuePod(19U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " e = list(range(0, 20))\n" - " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n" - " assert all(one == two for one, two in zip(iter(v[3:8:2]), e[3:8:2]))\n" - " assert all(one == two for one, two in zip(iter(v[::-2]), e[::-2]))\n" - " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n" - " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n" - " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n" - " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n" - " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n" - ); - } +Y_UNIT_TEST(ThinListIterateSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 20U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U), + NUdf::TUnboxedValuePod(10U), + NUdf::TUnboxedValuePod(11U), + NUdf::TUnboxedValuePod(12U), + NUdf::TUnboxedValuePod(13U), + NUdf::TUnboxedValuePod(14U), + NUdf::TUnboxedValuePod(15U), + NUdf::TUnboxedValuePod(16U), + NUdf::TUnboxedValuePod(17U), + NUdf::TUnboxedValuePod(18U), + NUdf::TUnboxedValuePod(19U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n" + " assert all(one == two for one, two in zip(iter(v[3:8:2]), e[3:8:2]))\n" + " assert all(one == two for one, two in zip(iter(v[::-2]), e[::-2]))\n" + " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n"); +} - Y_UNIT_TEST(LazyListIterateSliceWithStep) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20)); - }, - "def Test(v):\n" - " e = list(range(0, 20))\n" - " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n" - " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n" - " assert all(one == two for one, two in zip(iter(v[3:4:2]), e[3:4:2]))\n" - " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n" - " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n" - " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n" - " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n" - ); - } +Y_UNIT_TEST(LazyListIterateSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20)); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n" + " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n" + " assert all(one == two for one, two in zip(iter(v[3:4:2]), e[3:4:2]))\n" + " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n"); +} - Y_UNIT_TEST(ThinListGetByIndexSliceWithStep) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 20U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U), - NUdf::TUnboxedValuePod(3U), - NUdf::TUnboxedValuePod(4U), - NUdf::TUnboxedValuePod(5U), - NUdf::TUnboxedValuePod(6U), - NUdf::TUnboxedValuePod(7U), - NUdf::TUnboxedValuePod(8U), - NUdf::TUnboxedValuePod(9U), - NUdf::TUnboxedValuePod(10U), - NUdf::TUnboxedValuePod(11U), - NUdf::TUnboxedValuePod(12U), - NUdf::TUnboxedValuePod(13U), - NUdf::TUnboxedValuePod(14U), - NUdf::TUnboxedValuePod(15U), - NUdf::TUnboxedValuePod(16U), - NUdf::TUnboxedValuePod(17U), - NUdf::TUnboxedValuePod(18U), - NUdf::TUnboxedValuePod(19U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " e = list(range(0, 20))\n" - " assert v[::2][3] == e[::2][3]\n" - " assert v[::2][5] == e[::2][5]\n" - " assert v[::2][-3] == e[::2][-3]\n" - " assert v[::2][-7] == e[::2][-7]\n" - " assert v[2::2][4] == e[2::2][4]\n" - " assert v[2::2][5] == e[2::2][5]\n" - " assert v[2::2][-7] == e[2::2][-7]\n" - " assert v[2::2][-2] == e[2::2][-2]\n" - " assert v[:-3:2][2] == e[:-3:2][2]\n" - " assert v[:-3:2][4] == e[:-3:2][4]\n" - " assert v[:-3:2][-1] == e[:-3:2][-1]\n" - " assert v[:-3:2][-2] == e[:-3:2][-2]\n" - " assert v[:-4:3][2] == e[:-4:3][2]\n" - " assert v[:-4:3][4] == e[:-4:3][4]\n" - " assert v[:-4:3][-3] == e[:-4:3][-3]\n" - " assert v[:-4:3][-2] == e[:-4:3][-2]\n" - " assert v[-6::-3][1] == e[-6::-3][1]\n" - " assert v[-6::-3][3] == e[-6::-3][3]\n" - " assert v[-6::-3][-4] == e[-6::-3][-4]\n" - " assert v[-6::-3][-1] == e[-6::-3][-1]\n" - ); - } +Y_UNIT_TEST(ThinListGetByIndexSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 20U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U), + NUdf::TUnboxedValuePod(10U), + NUdf::TUnboxedValuePod(11U), + NUdf::TUnboxedValuePod(12U), + NUdf::TUnboxedValuePod(13U), + NUdf::TUnboxedValuePod(14U), + NUdf::TUnboxedValuePod(15U), + NUdf::TUnboxedValuePod(16U), + NUdf::TUnboxedValuePod(17U), + NUdf::TUnboxedValuePod(18U), + NUdf::TUnboxedValuePod(19U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert v[::2][3] == e[::2][3]\n" + " assert v[::2][5] == e[::2][5]\n" + " assert v[::2][-3] == e[::2][-3]\n" + " assert v[::2][-7] == e[::2][-7]\n" + " assert v[2::2][4] == e[2::2][4]\n" + " assert v[2::2][5] == e[2::2][5]\n" + " assert v[2::2][-7] == e[2::2][-7]\n" + " assert v[2::2][-2] == e[2::2][-2]\n" + " assert v[:-3:2][2] == e[:-3:2][2]\n" + " assert v[:-3:2][4] == e[:-3:2][4]\n" + " assert v[:-3:2][-1] == e[:-3:2][-1]\n" + " assert v[:-3:2][-2] == e[:-3:2][-2]\n" + " assert v[:-4:3][2] == e[:-4:3][2]\n" + " assert v[:-4:3][4] == e[:-4:3][4]\n" + " assert v[:-4:3][-3] == e[:-4:3][-3]\n" + " assert v[:-4:3][-2] == e[:-4:3][-2]\n" + " assert v[-6::-3][1] == e[-6::-3][1]\n" + " assert v[-6::-3][3] == e[-6::-3][3]\n" + " assert v[-6::-3][-4] == e[-6::-3][-4]\n" + " assert v[-6::-3][-1] == e[-6::-3][-1]\n"); +} - Y_UNIT_TEST(LazyListGetByIndexSliceWithStep) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20)); - }, - "def Test(v):\n" - " e = list(range(0, 20))\n" - " assert v[::2][3] == e[::2][3]\n" - " assert v[::2][5] == e[::2][5]\n" - " assert v[::2][-3] == e[::2][-3]\n" - " assert v[::2][-7] == e[::2][-7]\n" - " assert v[2::2][4] == e[2::2][4]\n" - " assert v[2::2][5] == e[2::2][5]\n" - " assert v[2::2][-7] == e[2::2][-7]\n" - " assert v[2::2][-2] == e[2::2][-2]\n" - " assert v[:-3:2][2] == e[:-3:2][2]\n" - " assert v[:-3:2][4] == e[:-3:2][4]\n" - " assert v[:-3:2][-1] == e[:-3:2][-1]\n" - " assert v[:-3:2][-2] == e[:-3:2][-2]\n" - " assert v[:-4:3][2] == e[:-4:3][2]\n" - " assert v[:-4:3][4] == e[:-4:3][4]\n" - " assert v[:-4:3][-3] == e[:-4:3][-3]\n" - " assert v[:-4:3][-2] == e[:-4:3][-2]\n" - " assert v[-6::-3][1] == e[-6::-3][1]\n" - " assert v[-6::-3][3] == e[-6::-3][3]\n" - " assert v[-6::-3][-4] == e[-6::-3][-4]\n" - " assert v[-6::-3][-1] == e[-6::-3][-1]\n" - ); - } +Y_UNIT_TEST(LazyListGetByIndexSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20)); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert v[::2][3] == e[::2][3]\n" + " assert v[::2][5] == e[::2][5]\n" + " assert v[::2][-3] == e[::2][-3]\n" + " assert v[::2][-7] == e[::2][-7]\n" + " assert v[2::2][4] == e[2::2][4]\n" + " assert v[2::2][5] == e[2::2][5]\n" + " assert v[2::2][-7] == e[2::2][-7]\n" + " assert v[2::2][-2] == e[2::2][-2]\n" + " assert v[:-3:2][2] == e[:-3:2][2]\n" + " assert v[:-3:2][4] == e[:-3:2][4]\n" + " assert v[:-3:2][-1] == e[:-3:2][-1]\n" + " assert v[:-3:2][-2] == e[:-3:2][-2]\n" + " assert v[:-4:3][2] == e[:-4:3][2]\n" + " assert v[:-4:3][4] == e[:-4:3][4]\n" + " assert v[:-4:3][-3] == e[:-4:3][-3]\n" + " assert v[:-4:3][-2] == e[:-4:3][-2]\n" + " assert v[-6::-3][1] == e[-6::-3][1]\n" + " assert v[-6::-3][3] == e[-6::-3][3]\n" + " assert v[-6::-3][-4] == e[-6::-3][-4]\n" + " assert v[-6::-3][-1] == e[-6::-3][-1]\n"); +} - Y_UNIT_TEST(ThinListByIndex) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 10U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U), - NUdf::TUnboxedValuePod(3U), - NUdf::TUnboxedValuePod(4U), - NUdf::TUnboxedValuePod(5U), - NUdf::TUnboxedValuePod(6U), - NUdf::TUnboxedValuePod(7U), - NUdf::TUnboxedValuePod(8U), - NUdf::TUnboxedValuePod(9U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " e = list(range(0, 10))\n" - " assert '__getitem__' in dir(v)\n" - " assert v[0] == e[0]\n" - " assert v[3] == e[3]\n" - " assert v[5] == e[5]\n" - " assert v[9] == e[9]\n" - " assert v[-1] == e[-1]\n" - " assert v[-4] == e[-4]\n" - " assert v[-9] == e[-9]\n" - " assert v[-10] == e[-10]\n" - ); - } +Y_UNIT_TEST(ThinListByIndex) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__getitem__' in dir(v)\n" + " assert v[0] == e[0]\n" + " assert v[3] == e[3]\n" + " assert v[5] == e[5]\n" + " assert v[9] == e[9]\n" + " assert v[-1] == e[-1]\n" + " assert v[-4] == e[-4]\n" + " assert v[-9] == e[-9]\n" + " assert v[-10] == e[-10]\n"); +} - Y_UNIT_TEST(LazyListByIndex) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10)); - }, - "def Test(v):\n" - " e = list(range(0, 10))\n" - " assert '__getitem__' in dir(v)\n" - " assert v[0] == e[0]\n" - " assert v[3] == e[3]\n" - " assert v[5] == e[5]\n" - " assert v[9] == e[9]\n" - " assert v[-1] == e[-1]\n" - " assert v[-4] == e[-4]\n" - " assert v[-9] == e[-9]\n" - " assert v[-10] == e[-10]\n" - ); - } +Y_UNIT_TEST(LazyListByIndex) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10)); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__getitem__' in dir(v)\n" + " assert v[0] == e[0]\n" + " assert v[3] == e[3]\n" + " assert v[5] == e[5]\n" + " assert v[9] == e[9]\n" + " assert v[-1] == e[-1]\n" + " assert v[-4] == e[-4]\n" + " assert v[-9] == e[-9]\n" + " assert v[-10] == e[-10]\n"); +} - Y_UNIT_TEST(ThinListIndexOutOfBounds) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 3U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " try:\n" - " print(v[3])\n" - " except IndexError:\n" - " pass\n" - " else:\n" - " assert False\n" - " try:\n" - " print(v[-4])\n" - " except IndexError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(ThinListIndexOutOfBounds) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " try:\n" + " print(v[3])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + " try:\n" + " print(v[-4])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(LazyListIndexOutOfBounds) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); - }, - "def Test(v):\n" - " try:\n" - " print(v[3])\n" - " except IndexError:\n" - " pass\n" - " else:\n" - " assert False\n" - " try:\n" - " print(v[-4])\n" - " except IndexError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(LazyListIndexOutOfBounds) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " try:\n" + " print(v[3])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + " try:\n" + " print(v[-4])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(LazyListWithoutLenghNormalSlice) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10)); - }, - "def Test(v):\n" - " e = range(0, 10)\n" - " assert '__len__' in dir(v)\n" - " assert all(one == two for one, two in zip(iter(v[::1]), e[::1]))\n" - " assert all(one == two for one, two in zip(iter(v[::-1]), e[::-1]))\n" - " assert all(one == two for one, two in zip(iter(v[4:]), e[4:]))\n" - " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n" - " assert all(one == two for one, two in zip(iter(v[:6:1]), e[:6:1]))\n" - " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n" - " assert all(one == two for one, two in zip(iter(v[4:11]), e[4:11]))\n" - " assert all(one == two for one, two in zip(iter(v[5:1]), e[5:1]))\n" - ); - } +Y_UNIT_TEST(LazyListWithoutLenghNormalSlice) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10)); + }, + "def Test(v):\n" + " e = range(0, 10)\n" + " assert '__len__' in dir(v)\n" + " assert all(one == two for one, two in zip(iter(v[::1]), e[::1]))\n" + " assert all(one == two for one, two in zip(iter(v[::-1]), e[::-1]))\n" + " assert all(one == two for one, two in zip(iter(v[4:]), e[4:]))\n" + " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n" + " assert all(one == two for one, two in zip(iter(v[:6:1]), e[:6:1]))\n" + " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n" + " assert all(one == two for one, two in zip(iter(v[4:11]), e[4:11]))\n" + " assert all(one == two for one, two in zip(iter(v[5:1]), e[5:1]))\n"); +} - Y_UNIT_TEST(ThinListTakeSkip) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 10U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U), - NUdf::TUnboxedValuePod(3U), - NUdf::TUnboxedValuePod(4U), - NUdf::TUnboxedValuePod(5U), - NUdf::TUnboxedValuePod(6U), - NUdf::TUnboxedValuePod(7U), - NUdf::TUnboxedValuePod(8U), - NUdf::TUnboxedValuePod(9U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " e = list(range(0, 10))\n" - " assert len(v) == len(e)\n" - " assert list(v.skip(5)) == e[5:]\n" - " assert list(v.take(5)) == e[0:5]\n" - " assert list(v.skip(4).take(5)) == e[4:][:5]\n" - " try:\n" - " print(list(v.skip(-1)))\n" - " except IndexError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(ThinListTakeSkip) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert len(v) == len(e)\n" + " assert list(v.skip(5)) == e[5:]\n" + " assert list(v.take(5)) == e[0:5]\n" + " assert list(v.skip(4).take(5)) == e[4:][:5]\n" + " try:\n" + " print(list(v.skip(-1)))\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(LazyListTakeSkip) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10)); - }, - "def Test(v):\n" - " e = list(range(0, 10))\n" - " assert list(v.skip(5)) == e[5:]\n" - " assert list(v.take(5)) == e[0:5]\n" - " assert list(v.skip(4).take(5)) == e[4:][:5]\n" - " try:\n" - " print(list(v.skip(-1)))\n" - " except IndexError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(LazyListTakeSkip) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10)); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert list(v.skip(5)) == e[5:]\n" + " assert list(v.take(5)) == e[0:5]\n" + " assert list(v.skip(4).take(5)) == e[4:][:5]\n" + " try:\n" + " print(list(v.skip(-1)))\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(LazyListToIndexDict) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); - }, - "def Test(value):\n" - " d = value.to_index_dict()\n" - " assert len(d) == 3\n" - " assert d[0] == 3\n" - " assert d[1] == 4\n" - " assert d[2] == 5\n" - " assert 3 not in d"); - } +Y_UNIT_TEST(LazyListToIndexDict) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " assert len(d) == 3\n" + " assert d[0] == 3\n" + " assert d[1] == 4\n" + " assert d[2] == 5\n" + " assert 3 not in d"); +} - Y_UNIT_TEST(LazyListTrue) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - NUdf::TUnboxedValue *items = nullptr; - return vb.NewArray(1U, items); - }, - "def Test(value):\n" - " assert value\n" - ); - } +Y_UNIT_TEST(LazyListTrue) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + NUdf::TUnboxedValue* items = nullptr; + return vb.NewArray(1U, items); + }, + "def Test(value):\n" + " assert value\n"); +} - Y_UNIT_TEST(LazyListFalse) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0)); - }, - "def Test(value):\n" - " assert not value\n" - ); - } +Y_UNIT_TEST(LazyListFalse) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0)); + }, + "def Test(value):\n" + " assert not value\n"); +} - Y_UNIT_TEST(ThinListTrue) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); - }, - "def Test(value):\n" - " assert value\n" - ); - } +Y_UNIT_TEST(ThinListTrue) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " assert value\n"); +} - Y_UNIT_TEST(ThinListFalse) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - return vb.NewEmptyList(); - }, - "def Test(value):\n" - " assert not value\n" - ); - } +Y_UNIT_TEST(ThinListFalse) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewEmptyList(); + }, + "def Test(value):\n" + " assert not value\n"); +} - Y_UNIT_TEST(LazyListHasItems) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); - }, - "def Test(value):\n" - " b = value.has_items()\n" - " assert b\n"); - } +Y_UNIT_TEST(LazyListHasItems) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " b = value.has_items()\n" + " assert b\n"); +} - Y_UNIT_TEST(LazyListEmptyHasItems) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0)); - }, - "def Test(value):\n" - " b = value.has_items()\n" - " assert not b\n"); - } +Y_UNIT_TEST(LazyListEmptyHasItems) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0)); + }, + "def Test(value):\n" + " b = value.has_items()\n" + " assert not b\n"); +} - Y_UNIT_TEST(LazyIndexDictContains) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); - }, - "def Test(value):\n" - " d = value.to_index_dict()\n" - " assert 0 in d\n" - " assert 1 in d\n" - " assert 2 in d\n" - " assert 3 not in d\n" - " assert -1 not in d"); - } +Y_UNIT_TEST(LazyIndexDictContains) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " assert 0 in d\n" + " assert 1 in d\n" + " assert 2 in d\n" + " assert 3 not in d\n" + " assert -1 not in d"); +} - Y_UNIT_TEST(LazyIndexDictIter) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); - }, - "def Test(value):\n" - " d = value.to_index_dict()\n" - " i, j = 0, 3\n" - " for k, v in d.items():\n" - " assert i == k\n" - " assert j == v\n" - " i, j = i+1, j+1"); - } +Y_UNIT_TEST(LazyIndexDictIter) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " i, j = 0, 3\n" + " for k, v in d.items():\n" + " assert i == k\n" + " assert j == v\n" + " i, j = i+1, j+1"); +} - Y_UNIT_TEST(LazyIndexDictGet) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 5)); - }, - "def Test(value):\n" - " d = value.to_index_dict()\n" - " assert d.get(1) == 4\n" - " assert d.get(5) == None\n" - " assert d.get(5, 10) == 10\n"); - } +Y_UNIT_TEST(LazyIndexDictGet) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 5)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " assert d.get(1) == 4\n" + " assert d.get(5) == None\n" + " assert d.get(5, 10) == 10\n"); +} - Y_UNIT_TEST(FromPyGeneratorFactory) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "def first_10():\n" - " num = 0\n" - " while num < 10:\n" - " yield num\n" - " num += 1\n" - "def Test():\n" - " return first_10\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(!value.HasFastListLength()); - UNIT_ASSERT(value.HasListItems()); +Y_UNIT_TEST(FromPyGeneratorFactory) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def first_10():\n" + " num = 0\n" + " while num < 10:\n" + " yield num\n" + " num += 1\n" + "def Test():\n" + " return first_10\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasFastListLength()); + UNIT_ASSERT(value.HasListItems()); - const auto it = value.GetListIterator(); - ui32 expected = 0; - for (NUdf::TUnboxedValue item; it.Next(item);) { - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, expected); - expected++; - } + const auto it = value.GetListIterator(); + ui32 expected = 0; + for (NUdf::TUnboxedValue item; it.Next(item);) { + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; + } - UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10); - UNIT_ASSERT_EQUAL(value.GetListLength(), 10); - }); - } + UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10); + UNIT_ASSERT_EQUAL(value.GetListLength(), 10); + }); +} - Y_UNIT_TEST(FromPyIterable) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "def Test():\n" +Y_UNIT_TEST(FromPyIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test():\n" #if PY_MAJOR_VERSION >= 3 - " return range(10)\n", + " return range(10)\n", #else - " return xrange(10)\n", + " return xrange(10)\n", #endif - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(!value.HasFastListLength()); - UNIT_ASSERT(value.HasListItems()); + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasFastListLength()); + UNIT_ASSERT(value.HasListItems()); - const auto it = value.GetListIterator(); - ui32 expected = 0U; - for (NUdf::TUnboxedValue item; it.Next(item);) { - UNIT_ASSERT_EQUAL(item.Get<ui32>(), expected++); - } + const auto it = value.GetListIterator(); + ui32 expected = 0U; + for (NUdf::TUnboxedValue item; it.Next(item);) { + UNIT_ASSERT_EQUAL(item.Get<ui32>(), expected++); + } - UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10); - UNIT_ASSERT_EQUAL(value.GetListLength(), 10); - UNIT_ASSERT(value.HasFastListLength()); - }); - } + UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10); + UNIT_ASSERT_EQUAL(value.GetListLength(), 10); + UNIT_ASSERT(value.HasFastListLength()); + }); +} - Y_UNIT_TEST(FromPyCustomIterable) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "class T:\n" - " def __init__(self, l):\n" - " self.l = l\n" - " def __len__(self):\n" - " return len(self.l)\n" - " def __nonzero__(self):\n" - " return bool(self.l)\n" - " def __iter__(self):\n" - " return iter(self.l)\n" - "\n" - "def Test():\n" - " return T([1, 2])\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(value.HasListItems()); - UNIT_ASSERT_EQUAL(value.GetListLength(), 2); +Y_UNIT_TEST(FromPyCustomIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "class T:\n" + " def __init__(self, l):\n" + " self.l = l\n" + " def __len__(self):\n" + " return len(self.l)\n" + " def __nonzero__(self):\n" + " return bool(self.l)\n" + " def __iter__(self):\n" + " return iter(self.l)\n" + "\n" + "def Test():\n" + " return T([1, 2])\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasListItems()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 2); - auto it = value.GetListIterator(); - { - NUdf::TUnboxedValue item; - it.Next(item); - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, 1); - } - { - NUdf::TUnboxedValue item; - it.Next(item); - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, 2); - } + auto it = value.GetListIterator(); + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 1); + } + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 2); + } - UNIT_ASSERT(false == it.Skip()); - }); - } + UNIT_ASSERT(false == it.Skip()); + }); +} - Y_UNIT_TEST(FromPyIterator) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "def Test():\n" - " return iter(range(2))\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(false == value.HasFastListLength()); +Y_UNIT_TEST(FromPyIterator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test():\n" + " return iter(range(2))\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(false == value.HasFastListLength()); - auto it = value.GetListIterator(); - { - NUdf::TUnboxedValue item; - it.Next(item); - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, 0); - } - { - NUdf::TUnboxedValue item; - it.Next(item); - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, 1); - } + auto it = value.GetListIterator(); + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 0); + } + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 1); + } - UNIT_ASSERT(false == it.Skip()); - }); - } + UNIT_ASSERT(false == it.Skip()); + }); +} - Y_UNIT_TEST(FromPyGenerator) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "def Test():\n" - " yield 0\n" - " yield 1\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(false == value.HasFastListLength()); +Y_UNIT_TEST(FromPyGenerator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test():\n" + " yield 0\n" + " yield 1\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(false == value.HasFastListLength()); - auto it = value.GetListIterator(); - { - NUdf::TUnboxedValue item; - it.Next(item); - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, 0); - } - { - NUdf::TUnboxedValue item; - it.Next(item); - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, 1); - } + auto it = value.GetListIterator(); + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 0); + } + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 1); + } - UNIT_ASSERT(false == it.Skip()); - }); - } + UNIT_ASSERT(false == it.Skip()); + }); } +} // Y_UNIT_TEST_SUITE(TPyListTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp index 35c94d5e8ed..19f7929b6da 100644 --- a/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp @@ -2,14 +2,14 @@ #include <library/cpp/testing/unittest/registar.h> -#define PY_CHECKER(Name, PyType, AsType, Type) \ - struct TPy##Name##Checker { \ - void operator()(PyObject* pyVal, Type expected) { \ - UNIT_ASSERT(Py##PyType##_Check(pyVal)); \ - Type val = Py##PyType##_As##AsType(pyVal); \ +#define PY_CHECKER(Name, PyType, AsType, Type) \ + struct TPy##Name##Checker { \ + void operator()(PyObject* pyVal, Type expected) { \ + UNIT_ASSERT(Py##PyType##_Check(pyVal)); \ + Type val = Py##PyType##_As##AsType(pyVal); \ UNIT_ASSERT(val != static_cast<Type>(-1) || !PyErr_Occurred()); \ - UNIT_ASSERT_EQUAL(val, expected); \ - } \ + UNIT_ASSERT_EQUAL(val, expected); \ + } \ }; #if PY_MAJOR_VERSION >= 3 @@ -33,327 +33,326 @@ PY_CHECKER(Float, Float, Double, long) using namespace NPython; Y_UNIT_TEST_SUITE(TPyNumberTest) { - template <typename T, typename TPyChecker> - void TestCastsInRange(T begin, T end) { - for (T i = begin; i < end; i++) { - TPyObjectPtr pyVal = PyCast<T>(i); - UNIT_ASSERT(pyVal.Get() != nullptr); +template <typename T, typename TPyChecker> +void TestCastsInRange(T begin, T end) { + for (T i = begin; i < end; i++) { + TPyObjectPtr pyVal = PyCast<T>(i); + UNIT_ASSERT(pyVal.Get() != nullptr); - TPyChecker c; - c(pyVal.Get(), i); + TPyChecker c; + c(pyVal.Get(), i); - T cppVal = PyCast<T>(pyVal.Get()); - UNIT_ASSERT_EQUAL(cppVal, i); - } + T cppVal = PyCast<T>(pyVal.Get()); + UNIT_ASSERT_EQUAL(cppVal, i); } +} - template <typename T, typename TPyChecker, int range = 10> - void TestSignedCasts() { - TPythonTestEngine engine; - TestCastsInRange<T, TPyChecker>(Min<T>(), Min<T>() + range); - TestCastsInRange<T, TPyChecker>(-range, range); - TestCastsInRange<T, TPyChecker>(Max<T>() - range, Max<T>()); - } +template <typename T, typename TPyChecker, int range = 10> +void TestSignedCasts() { + TPythonTestEngine engine; + TestCastsInRange<T, TPyChecker>(Min<T>(), Min<T>() + range); + TestCastsInRange<T, TPyChecker>(-range, range); + TestCastsInRange<T, TPyChecker>(Max<T>() - range, Max<T>()); +} - template <typename T, typename TPyDownChecker, - typename TPyUpChecker = TPyDownChecker, int range = 10> - void TestUnsignedCasts() { - TPythonTestEngine engine; - TestCastsInRange<T, TPyDownChecker>(Min<T>(), Min<T>() + range); - TestCastsInRange<T, TPyUpChecker>(Max<T>() - range, Max<T>()); - } +template <typename T, typename TPyDownChecker, + typename TPyUpChecker = TPyDownChecker, int range = 10> +void TestUnsignedCasts() { + TPythonTestEngine engine; + TestCastsInRange<T, TPyDownChecker>(Min<T>(), Min<T>() + range); + TestCastsInRange<T, TPyUpChecker>(Max<T>() - range, Max<T>()); +} - Y_UNIT_TEST(Bool) { - TPythonTestEngine engine; - UNIT_ASSERT_EQUAL(PyCast<bool>(Py_True), true); - UNIT_ASSERT_EQUAL(PyCast<bool>(Py_False), false); +Y_UNIT_TEST(Bool) { + TPythonTestEngine engine; + UNIT_ASSERT_EQUAL(PyCast<bool>(Py_True), true); + UNIT_ASSERT_EQUAL(PyCast<bool>(Py_False), false); - TPyObjectPtr list = PyList_New(0); - UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), false); - bool res1; - UNIT_ASSERT(TryPyCast<bool>(list.Get(), res1)); - UNIT_ASSERT_EQUAL(res1, false); + TPyObjectPtr list = PyList_New(0); + UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), false); + bool res1; + UNIT_ASSERT(TryPyCast<bool>(list.Get(), res1)); + UNIT_ASSERT_EQUAL(res1, false); - PyList_Append(list.Get(), Py_None); - UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), true); - bool res2; - UNIT_ASSERT(TryPyCast<bool>(list.Get(), res2)); - UNIT_ASSERT_EQUAL(res2, true); - } + PyList_Append(list.Get(), Py_None); + UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), true); + bool res2; + UNIT_ASSERT(TryPyCast<bool>(list.Get(), res2)); + UNIT_ASSERT_EQUAL(res2, true); +} - Y_UNIT_TEST(Float) { - TestSignedCasts<float, TPyFloatChecker>(); - } +Y_UNIT_TEST(Float) { + TestSignedCasts<float, TPyFloatChecker>(); +} - Y_UNIT_TEST(Double) { - TestUnsignedCasts<double, TPyFloatChecker>(); - } +Y_UNIT_TEST(Double) { + TestUnsignedCasts<double, TPyFloatChecker>(); +} - Y_UNIT_TEST(I64) { - TestSignedCasts<i64, TPyLLongChecker>(); - } +Y_UNIT_TEST(I64) { + TestSignedCasts<i64, TPyLLongChecker>(); +} - Y_UNIT_TEST(Ui64) { - TestUnsignedCasts<ui64, TPyUlongChecker>(); - } +Y_UNIT_TEST(Ui64) { + TestUnsignedCasts<ui64, TPyUlongChecker>(); +} #if PY_MAJOR_VERSION >= 3 - Y_UNIT_TEST(I8) { - TestSignedCasts<i8, TPyLongChecker>(); - } - - Y_UNIT_TEST(Ui8) { - TestUnsignedCasts<ui8, TPyLongChecker>(); - } +Y_UNIT_TEST(I8) { + TestSignedCasts<i8, TPyLongChecker>(); +} - Y_UNIT_TEST(I16) { - TestSignedCasts<i16, TPyLongChecker>(); - } +Y_UNIT_TEST(Ui8) { + TestUnsignedCasts<ui8, TPyLongChecker>(); +} - Y_UNIT_TEST(Ui16) { - TestUnsignedCasts<ui16, TPyLongChecker>(); - } +Y_UNIT_TEST(I16) { + TestSignedCasts<i16, TPyLongChecker>(); +} - Y_UNIT_TEST(I32) { - TestSignedCasts<i32, TPyLongChecker>(); - } +Y_UNIT_TEST(Ui16) { + TestUnsignedCasts<ui16, TPyLongChecker>(); +} - Y_UNIT_TEST(Ui32) { - TestUnsignedCasts<ui32, TPyLongChecker>(); - } - Y_UNIT_TEST(ImplicitIntCasts) { - TPythonTestEngine engine; - const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>(); - i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10); - TPyObjectPtr pyInt = PyLong_FromLong(expected); +Y_UNIT_TEST(I32) { + TestSignedCasts<i32, TPyLongChecker>(); +} - { // signed - i64 actual = PyCast<i64>(pyInt.Get()); - UNIT_ASSERT_EQUAL(actual, expected); +Y_UNIT_TEST(Ui32) { + TestUnsignedCasts<ui32, TPyLongChecker>(); +} +Y_UNIT_TEST(ImplicitIntCasts) { + TPythonTestEngine engine; + const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>(); + i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10); + TPyObjectPtr pyInt = PyLong_FromLong(expected); - bool isOk = TryPyCast<i64>(pyInt.Get(), actual); - UNIT_ASSERT(isOk); - UNIT_ASSERT_EQUAL(actual, expected); - } + { // signed + i64 actual = PyCast<i64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, expected); - { // unsigned - ui64 actual = PyCast<ui64>(pyInt.Get()); - UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + bool isOk = TryPyCast<i64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, expected); + } - bool isOk = TryPyCast<ui64>(pyInt.Get(), actual); - UNIT_ASSERT(isOk); - UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); - } + { // unsigned + ui64 actual = PyCast<ui64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); - { // to float - float f = PyCast<float>(pyInt.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + bool isOk = TryPyCast<ui64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + } - bool isOk = TryPyCast<float>(pyInt.Get(), f); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - } + { // to float + float f = PyCast<float>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - { // to double - double d = PyCast<double>(pyInt.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + bool isOk = TryPyCast<float>(pyInt.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + } - bool isOk = TryPyCast<double>(pyInt.Get(), d); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - } + { // to double + double d = PyCast<double>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - // expected overflow - i32 tmp; - UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp)); - ui32 tmpu; - UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu)); + bool isOk = TryPyCast<double>(pyInt.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); } + // expected overflow + i32 tmp; + UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp)); + ui32 tmpu; + UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu)); +} + #else - Y_UNIT_TEST(I8) { - TestSignedCasts<i8, TPyIntChecker>(); - } +Y_UNIT_TEST(I8) { + TestSignedCasts<i8, TPyIntChecker>(); +} - Y_UNIT_TEST(Ui8) { - TestUnsignedCasts<ui8, TPyIntChecker>(); - } +Y_UNIT_TEST(Ui8) { + TestUnsignedCasts<ui8, TPyIntChecker>(); +} - Y_UNIT_TEST(I16) { - TestSignedCasts<i16, TPyIntChecker>(); - } +Y_UNIT_TEST(I16) { + TestSignedCasts<i16, TPyIntChecker>(); +} - Y_UNIT_TEST(Ui16) { - TestUnsignedCasts<ui16, TPyIntChecker>(); - } +Y_UNIT_TEST(Ui16) { + TestUnsignedCasts<ui16, TPyIntChecker>(); +} - Y_UNIT_TEST(I32) { - TestSignedCasts<i32, TPyIntChecker>(); - } +Y_UNIT_TEST(I32) { + TestSignedCasts<i32, TPyIntChecker>(); +} - Y_UNIT_TEST(Ui32) { - if (sizeof(long) == 4) { - TestUnsignedCasts<ui32, TPyIntChecker, TPyLLongChecker>(); - } else { - TestUnsignedCasts<ui32, TPyIntChecker>(); - } +Y_UNIT_TEST(Ui32) { + if (sizeof(long) == 4) { + TestUnsignedCasts<ui32, TPyIntChecker, TPyLLongChecker>(); + } else { + TestUnsignedCasts<ui32, TPyIntChecker>(); } +} - Y_UNIT_TEST(ImplicitIntCasts) { - TPythonTestEngine engine; - const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>(); - i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10); - TPyObjectPtr pyInt = PyInt_FromLong(expected); - - { // signed - i64 actual = PyCast<i64>(pyInt.Get()); - UNIT_ASSERT_EQUAL(actual, expected); +Y_UNIT_TEST(ImplicitIntCasts) { + TPythonTestEngine engine; + const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>(); + i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10); + TPyObjectPtr pyInt = PyInt_FromLong(expected); - bool isOk = TryPyCast<i64>(pyInt.Get(), actual); - UNIT_ASSERT(isOk); - UNIT_ASSERT_EQUAL(actual, expected); - } + { // signed + i64 actual = PyCast<i64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, expected); - { // unsigned - ui64 actual = PyCast<ui64>(pyInt.Get()); - UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + bool isOk = TryPyCast<i64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, expected); + } - bool isOk = TryPyCast<ui64>(pyInt.Get(), actual); - UNIT_ASSERT(isOk); - UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); - } + { // unsigned + ui64 actual = PyCast<ui64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); - { // to float - float f = PyCast<float>(pyInt.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + bool isOk = TryPyCast<ui64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + } - bool isOk = TryPyCast<float>(pyInt.Get(), f); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - } + { // to float + float f = PyCast<float>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - { // to double - double d = PyCast<double>(pyInt.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + bool isOk = TryPyCast<float>(pyInt.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + } - bool isOk = TryPyCast<double>(pyInt.Get(), d); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - } + { // to double + double d = PyCast<double>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - // expected overflow - i32 tmp; - UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp)); - ui32 tmpu; - UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu)); + bool isOk = TryPyCast<double>(pyInt.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); } -#endif + // expected overflow + i32 tmp; + UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp)); + ui32 tmpu; + UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu)); +} +#endif - Y_UNIT_TEST(ImplicitLongCasts) { - TPythonTestEngine engine; - i64 expected = static_cast<i64>(Max<ui32>()) + 10; - TPyObjectPtr pyLong; - #ifdef HAVE_LONG_LONG - pyLong = PyLong_FromLongLong(expected); - #else - pyLong = PyLong_FromLong(expected) - #endif - - { // signed - i64 actual = PyCast<i64>(pyLong.Get()); - UNIT_ASSERT_EQUAL(actual, expected); +Y_UNIT_TEST(ImplicitLongCasts) { + TPythonTestEngine engine; + i64 expected = static_cast<i64>(Max<ui32>()) + 10; + TPyObjectPtr pyLong; +#ifdef HAVE_LONG_LONG + pyLong = PyLong_FromLongLong(expected); +#else + pyLong = PyLong_FromLong(expected) +#endif - bool isOk = TryPyCast<i64>(pyLong.Get(), actual); - UNIT_ASSERT(isOk); - UNIT_ASSERT_EQUAL(actual, expected); - } + { // signed + i64 actual = PyCast<i64>(pyLong.Get()); + UNIT_ASSERT_EQUAL(actual, expected); - { // unsigned - ui64 actual = PyCast<ui64>(pyLong.Get()); - UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + bool isOk = TryPyCast<i64>(pyLong.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, expected); + } - bool isOk = TryPyCast<ui64>(pyLong.Get(), actual); - UNIT_ASSERT(isOk); - UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); - } + { // unsigned + ui64 actual = PyCast<ui64>(pyLong.Get()); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); - { // to float - float f = PyCast<float>(pyLong.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + bool isOk = TryPyCast<ui64>(pyLong.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + } - bool isOk = TryPyCast<float>(pyLong.Get(), f); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - } + { // to float + float f = PyCast<float>(pyLong.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - { // to double - double d = PyCast<double>(pyLong.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + bool isOk = TryPyCast<float>(pyLong.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + } - bool isOk = TryPyCast<double>(pyLong.Get(), d); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - } + { // to double + double d = PyCast<double>(pyLong.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - // expected overflow - i8 tmp; - UNIT_ASSERT(!TryPyCast<i8>(pyLong.Get(), tmp)); + bool isOk = TryPyCast<double>(pyLong.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); } - Y_UNIT_TEST(HugeLongOverflow) { - TPythonTestEngine engine; - TPyObjectPtr pyLong = PyLong_FromString((char*)"0xfffffffffffffffff", nullptr, 0); - TPyObjectPtr bitLength = PyObject_CallMethod(pyLong.Get(), (char*)"bit_length", (char*)"()"); - UNIT_ASSERT_EQUAL(PyCast<ui32>(bitLength.Get()), 68); // 68 bits number - - ui64 resUI64; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI64)); + // expected overflow + i8 tmp; + UNIT_ASSERT(!TryPyCast<i8>(pyLong.Get(), tmp)); +} - i64 resI64; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI64)); +Y_UNIT_TEST(HugeLongOverflow) { + TPythonTestEngine engine; + TPyObjectPtr pyLong = PyLong_FromString((char*)"0xfffffffffffffffff", nullptr, 0); + TPyObjectPtr bitLength = PyObject_CallMethod(pyLong.Get(), (char*)"bit_length", (char*)"()"); + UNIT_ASSERT_EQUAL(PyCast<ui32>(bitLength.Get()), 68); // 68 bits number - ui32 resUI32; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI32)); + ui64 resUI64; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI64)); - i32 resI32; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI32)); + i64 resI64; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI64)); - ui16 resUI16; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI16)); + ui32 resUI32; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI32)); - i16 resI16; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI16)); + i32 resI32; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI32)); - ui8 resUI8; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI8)); + ui16 resUI16; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI16)); - i8 resI8; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI8)); - } + i16 resI16; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI16)); - Y_UNIT_TEST(ImplicitFloatCasts) { - TPythonTestEngine engine; - double expected = 3.14159; - TPyObjectPtr pyFloat = PyFloat_FromDouble(expected); + ui8 resUI8; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI8)); - { // to float - float f = PyCast<float>(pyFloat.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + i8 resI8; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI8)); +} - bool isOk = TryPyCast<float>(pyFloat.Get(), f); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - } +Y_UNIT_TEST(ImplicitFloatCasts) { + TPythonTestEngine engine; + double expected = 3.14159; + TPyObjectPtr pyFloat = PyFloat_FromDouble(expected); - { // to double - double d = PyCast<double>(pyFloat.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + { // to float + float f = PyCast<float>(pyFloat.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - bool isOk = TryPyCast<double>(pyFloat.Get(), d); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - } + bool isOk = TryPyCast<float>(pyFloat.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); } + { // to double + double d = PyCast<double>(pyFloat.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + + bool isOk = TryPyCast<double>(pyFloat.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + } } + +} // Y_UNIT_TEST_SUITE(TPyNumberTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp index 4cc45f11840..fbcb98c323d 100644 --- a/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp @@ -2,55 +2,55 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(FromPyNone) { - Y_UNIT_TEST(FromPyNone) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TOptional<ui32>>( - "def Test(): return None", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(!value); +Y_UNIT_TEST(FromPyNone) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TOptional<ui32>>( + "def Test(): return None", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(!value); }); - } +} - Y_UNIT_TEST(FromPyObject) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TOptional<ui32>>( - "def Test(): return 42", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT_EQUAL(value.Get<ui32>(), 42); - }); - } +Y_UNIT_TEST(FromPyObject) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TOptional<ui32>>( + "def Test(): return 42", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.Get<ui32>(), 42); + }); +} - Y_UNIT_TEST(ToPyNone) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TOptional<char*>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(); - }, - "def Test(value):\n" - " assert value == None\n"); - } +Y_UNIT_TEST(ToPyNone) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TOptional<char*>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(); + }, + "def Test(value):\n" + " assert value == None\n"); +} - Y_UNIT_TEST(ToPyFilledOptional) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TOptional<NUdf::TTuple<NUdf::TUtf8, bool>>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - const TOptionalType* optType = - static_cast<const TOptionalType*>(type); - NUdf::TUnboxedValue* items = nullptr; - auto tuple = vb.NewArray(static_cast<const TTupleType*>(optType->GetItemType())->GetElementsCount(), items); - items[0] = vb.NewString("test string"); - items[1] = NUdf::TUnboxedValuePod(false); - return NUdf::TUnboxedValue(tuple); - }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert len(value) == 2\n" - " assert value == ('test string', False)\n"); - } +Y_UNIT_TEST(ToPyFilledOptional) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TOptional<NUdf::TTuple<NUdf::TUtf8, bool>>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + const TOptionalType* optType = + static_cast<const TOptionalType*>(type); + NUdf::TUnboxedValue* items = nullptr; + auto tuple = vb.NewArray(static_cast<const TTupleType*>(optType->GetItemType())->GetElementsCount(), items); + items[0] = vb.NewString("test string"); + items[1] = NUdf::TUnboxedValuePod(false); + return NUdf::TUnboxedValue(tuple); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert len(value) == 2\n" + " assert value == ('test string', False)\n"); } +} // Y_UNIT_TEST_SUITE(FromPyNone) diff --git a/yql/essentials/udfs/common/python/bindings/py_ptr.h b/yql/essentials/udfs/common/python/bindings/py_ptr.h index 704629b86b7..66f70cea01c 100644 --- a/yql/essentials/udfs/common/python/bindings/py_ptr.h +++ b/yql/essentials/udfs/common/python/bindings/py_ptr.h @@ -7,8 +7,7 @@ namespace NPython { template <typename T> -class TPyPtrOps -{ +class TPyPtrOps { public: static inline void Ref(T* t) { Y_ASSERT(t); @@ -26,9 +25,7 @@ public: } }; -class TPyObjectPtr: - public NYql::NUdf::TRefCountedPtr<PyObject, TPyPtrOps<PyObject>> -{ +class TPyObjectPtr: public NYql::NUdf::TRefCountedPtr<PyObject, TPyPtrOps<PyObject>> { using TSelf = NYql::NUdf::TRefCountedPtr<PyObject, TPyPtrOps<PyObject>>; public: @@ -37,7 +34,7 @@ public: } inline TPyObjectPtr(PyObject* p) - : TSelf(p, STEAL_REF) // do not increment refcounter by default + : TSelf(p, STEAL_REF) // do not increment refcounter by default { } @@ -66,4 +63,4 @@ public: void Reset(PyObject* p) = delete; }; -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_resource.cpp b/yql/essentials/udfs/common/python/bindings/py_resource.cpp index 050eae0c8ce..33f446aff3b 100644 --- a/yql/essentials/udfs/common/python/bindings/py_resource.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_resource.cpp @@ -21,11 +21,11 @@ void DestroyResourceCapsule(PyObject* obj) { ///////////////////////////////////////////////////////////////////////////// // TResource ///////////////////////////////////////////////////////////////////////////// -class TResource final: public NUdf::TBoxedValue -{ +class TResource final: public NUdf::TBoxedValue { public: TResource(PyObject* value, const NUdf::TStringRef& tag) - : Value_(value, TPyObjectPtr::ADD_REF), Tag_(tag) + : Value_(value, TPyObjectPtr::ADD_REF) + , Tag_(tag) { } @@ -52,11 +52,10 @@ private: const char ResourceCapsuleName[] = "YqlResourceCapsule"; TPyObjectPtr ToPyResource( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) { - #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 15) NUdf::TResourceTypeInspector inpector(*ctx->PyCtx->TypeInfoHelper, type); auto tag = inpector.GetTag(); @@ -77,10 +76,9 @@ TPyObjectPtr ToPyResource( } NUdf::TUnboxedValue FromPyResource( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { - #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 15) NUdf::TResourceTypeInspector inpector(*ctx->PyCtx->TypeInfoHelper, type); auto tag = inpector.GetTag(); @@ -93,14 +91,14 @@ NUdf::TUnboxedValue FromPyResource( auto valueTag = resource->GetResourceTag(); if (valueTag != tag) { throw yexception() << "Mismatch of resource tag, expected: " - << tag << ", got: " << valueTag; + << tag << ", got: " << valueTag; } return *resource; } - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is not a valid resource with tag " << tag; + throw yexception() << "Python object " << PyObjectRepr(value) + << " is not a valid resource with tag " << tag; #else Y_UNUSED(type); if (PyCapsule_CheckExact(value)) { @@ -113,4 +111,4 @@ NUdf::TUnboxedValue FromPyResource( #endif } -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_resource.h b/yql/essentials/udfs/common/python/bindings/py_resource.h index b46b84c84b1..48def547e70 100644 --- a/yql/essentials/udfs/common/python/bindings/py_resource.h +++ b/yql/essentials/udfs/common/python/bindings/py_resource.h @@ -8,13 +8,13 @@ namespace NPython { extern const char ResourceCapsuleName[]; TPyObjectPtr ToPyResource( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyResource( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - PyObject* value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* value); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp index 25b43cbf6ae..095e642acdb 100644 --- a/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp @@ -2,7 +2,6 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; extern const char SimpleDataTag[] = "SimpleData"; @@ -15,67 +14,71 @@ struct TSimpleData { TSimpleData(const TString& name, ui32 age) : Name(name) , Age(age) - {} + { + } }; using TSimpleDataResource = NUdf::TBoxedResource<TSimpleData, SimpleDataTag>; Y_UNIT_TEST_SUITE(TPyResourceTest) { - Y_UNIT_TEST(MkqlObject) { - TPythonTestEngine engine; - TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<SimpleDataTag>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new TSimpleDataResource("Jamel", 99)); - }, - "import yql\n" - "\n" - "def Test(value):\n" - " assert str(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n" - " assert repr(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n" - " assert type(value).__name__ == 'PyCapsule'\n" - " return value\n"); - UNIT_ASSERT(!!pyValue); +Y_UNIT_TEST(MkqlObject) { + TPythonTestEngine engine; + TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<SimpleDataTag>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new TSimpleDataResource("Jamel", 99)); + }, + "import yql\n" + "\n" + "def Test(value):\n" + " assert str(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n" + " assert repr(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n" + " assert type(value).__name__ == 'PyCapsule'\n" + " return value\n"); + UNIT_ASSERT(!!pyValue); - engine.ToMiniKQLWithArg<NUdf::TResource<SimpleDataTag>>( - pyValue.Get(), - "import yql\n" - "\n" - "def Test(value):\n" - " return value\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value);; - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_STRINGS_EQUAL(value.GetResourceTag(), SimpleDataTag); - auto simpleData = - reinterpret_cast<TSimpleData*>(value.GetResource()); - UNIT_ASSERT_EQUAL(simpleData->Age, 99); - UNIT_ASSERT_STRINGS_EQUAL(simpleData->Name, "Jamel"); - }); - } + engine.ToMiniKQLWithArg<NUdf::TResource<SimpleDataTag>>( + pyValue.Get(), + "import yql\n" + "\n" + "def Test(value):\n" + " return value\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + ; + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_STRINGS_EQUAL(value.GetResourceTag(), SimpleDataTag); + auto simpleData = + reinterpret_cast<TSimpleData*>(value.GetResource()); + UNIT_ASSERT_EQUAL(simpleData->Age, 99); + UNIT_ASSERT_STRINGS_EQUAL(simpleData->Name, "Jamel"); + }); +} - Y_UNIT_TEST(PythonObject) { - TPythonTestEngine engine; - NUdf::TUnboxedValue mkqlValue = engine.FromPython<NUdf::TResource<PythonTestTag>>( - "class CustomStruct:\n" - " def __init__(self, name, age):\n" - " self.name = name\n" - " self.age = age\n" - "\n" - "def Test():\n" - " return CustomStruct('Jamel', 97)\n"); - UNIT_ASSERT(mkqlValue); - UNIT_ASSERT_STRINGS_EQUAL(mkqlValue.GetResourceTag(), PythonTestTag); +Y_UNIT_TEST(PythonObject) { + TPythonTestEngine engine; + NUdf::TUnboxedValue mkqlValue = engine.FromPython<NUdf::TResource<PythonTestTag>>( + "class CustomStruct:\n" + " def __init__(self, name, age):\n" + " self.name = name\n" + " self.age = age\n" + "\n" + "def Test():\n" + " return CustomStruct('Jamel', 97)\n"); + UNIT_ASSERT(mkqlValue); + UNIT_ASSERT_STRINGS_EQUAL(mkqlValue.GetResourceTag(), PythonTestTag); - TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<PythonTestTag>>( - [mkqlValue](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return mkqlValue; - }, - "def Test(value):\n" - " assert isinstance(value, CustomStruct)\n" - " assert value.age, 97\n" - " assert value.name, 'Jamel'\n"); - UNIT_ASSERT(!!pyValue); - } + TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<PythonTestTag>>( + [mkqlValue](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return mkqlValue; + }, + "def Test(value):\n" + " assert isinstance(value, CustomStruct)\n" + " assert value.age, 97\n" + " assert value.name, 'Jamel'\n"); + UNIT_ASSERT(!!pyValue); } +} // Y_UNIT_TEST_SUITE(TPyResourceTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_stream.cpp b/yql/essentials/udfs/common/python/bindings/py_stream.cpp index 24f7e0eb45d..6337ba5732f 100644 --- a/yql/essentials/udfs/common/python/bindings/py_stream.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_stream.cpp @@ -41,88 +41,90 @@ struct TPyStream { } static PyObject* New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - NUdf::IBoxedValuePtr value); + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + NUdf::IBoxedValuePtr value); static PyObject* Next(PyObject* self); }; #if PY_MAJOR_VERSION >= 3 -#define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) + #define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) #endif PyTypeObject PyStreamType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TStream"), - INIT_MEMBER(tp_basicsize , sizeof(TPyStream)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyStream::Dealloc), + // clang-format off + INIT_MEMBER(tp_name, "yql.TStream"), + // clang-format on + INIT_MEMBER(tp_basicsize, sizeof(TPyStream)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyStream::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyStream::Repr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), - INIT_MEMBER(tp_doc , "yql.TStream object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , PyObject_SelfIter), - INIT_MEMBER(tp_iternext , TPyStream::Next), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyStream::Repr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc, "yql.TStream object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, PyObject_SelfIter), + INIT_MEMBER(tp_iternext, TPyStream::Next), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; PyObject* TPyStream::New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - NUdf::IBoxedValuePtr value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + NUdf::IBoxedValuePtr value) { TPyStream* stream = new TPyStream; PyObject_INIT(stream, &PyStreamType); @@ -143,18 +145,19 @@ PyObject* TPyStream::Next(PyObject* self) { auto status = NUdf::TBoxedValueAccessor::Fetch(*stream->Value.Get(), item); switch (status) { - case NUdf::EFetchStatus::Ok: - return ToPyObject(stream->CastCtx, stream->ItemType, item) + case NUdf::EFetchStatus::Ok: + return ToPyObject(stream->CastCtx, stream->ItemType, item) .Release(); - case NUdf::EFetchStatus::Finish: - return nullptr; - case NUdf::EFetchStatus::Yield: - PyErr_SetNone(PyYieldIterationException); - return nullptr; - default: - Y_ABORT("Unknown stream status"); + case NUdf::EFetchStatus::Finish: + return nullptr; + case NUdf::EFetchStatus::Yield: + PyErr_SetNone(PyYieldIterationException); + return nullptr; + default: + Y_ABORT("Unknown stream status"); } - } PY_CATCH(nullptr) + } + PY_CATCH(nullptr) } ////////////////////////////////////////////////////////////////////////////// @@ -163,13 +166,13 @@ PyObject* TPyStream::Next(PyObject* self) { class TStreamOverPyIter final: public NUdf::TBoxedValue { public: TStreamOverPyIter( - TPyCastContext::TPtr castCtx, - const NUdf::TType* itemType, - TPyObjectPtr pyIter, - TPyObjectPtr pyIterable, - TPyObjectPtr pyGeneratorCallable, - TPyObjectPtr pyGeneratorCallableClosure, - TPyObjectPtr pyGeneratorCallableArgs) + TPyCastContext::TPtr castCtx, + const NUdf::TType* itemType, + TPyObjectPtr pyIter, + TPyObjectPtr pyIterable, + TPyObjectPtr pyGeneratorCallable, + TPyObjectPtr pyGeneratorCallableClosure, + TPyObjectPtr pyGeneratorCallableArgs) : CastCtx_(std::move(castCtx)) , ItemType_(itemType) , PyIter_(std::move(pyIter)) @@ -215,7 +218,9 @@ private: PyIter_.Reset(); TPyObjectPtr result(PyObject_CallObject(PyGeneratorCallable_.Get(), PyGeneratorCallableArgs_.Get())); if (!result) { - UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).c_str()); + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" + << GetLastErrorAsString()) + .c_str()); } if (PyGen_Check(result.Get())) { @@ -244,8 +249,7 @@ private: } return NUdf::EFetchStatus::Finish; - } - catch (const yexception& e) { + } catch (const yexception& e) { UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } } @@ -260,14 +264,13 @@ private: TPyObjectPtr PyGeneratorCallableArgs_; }; - ////////////////////////////////////////////////////////////////////////////// // public functions ////////////////////////////////////////////////////////////////////////////// TPyObjectPtr ToPyStream( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value) { return TPyStream::New(castCtx, type, value.AsBoxed()); } @@ -278,8 +281,7 @@ NKikimr::NUdf::TUnboxedValue FromPyStream( const TPyObjectPtr& value, const TPyObjectPtr& originalCallable, const TPyObjectPtr& originalCallableClosure, - const TPyObjectPtr& originalCallableArgs -) + const TPyObjectPtr& originalCallableArgs) { const NUdf::TStreamTypeInspector inspector(*castCtx->PyCtx->TypeInfoHelper, type); const NUdf::TType* itemType = inspector.GetItemType(); @@ -290,7 +292,7 @@ NKikimr::NUdf::TUnboxedValue FromPyStream( UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).c_str()); } return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr, - originalCallable, originalCallableClosure, originalCallableArgs)); + originalCallable, originalCallableClosure, originalCallableArgs)); } if (PyIter_Check(value.Get()) @@ -301,7 +303,7 @@ NKikimr::NUdf::TUnboxedValue FromPyStream( ) { TPyObjectPtr iter(value.Get(), TPyObjectPtr::ADD_REF); return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr, - originalCallable, originalCallableClosure, originalCallableArgs)); + originalCallable, originalCallableClosure, originalCallableArgs)); } // assume that this function will returns generator @@ -324,7 +326,7 @@ NKikimr::NUdf::TUnboxedValue FromPyStream( } return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr, - originalCallable ? value : nullptr, originalCallable ? callableClosure : nullptr, nullptr)); + originalCallable ? value : nullptr, originalCallable ? callableClosure : nullptr, nullptr)); } // must be after checking for callable @@ -337,7 +339,9 @@ NKikimr::NUdf::TUnboxedValue FromPyStream( } UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << "Expected iterator, generator, generator factory, " - "or iterable object, but got " << PyObjectRepr(value.Get())).c_str()); + "or iterable object, but got " + << PyObjectRepr(value.Get())) + .c_str()); } } // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_stream.h b/yql/essentials/udfs/common/python/bindings/py_stream.h index f677e23930d..ab28b846433 100644 --- a/yql/essentials/udfs/common/python/bindings/py_stream.h +++ b/yql/essentials/udfs/common/python/bindings/py_stream.h @@ -9,16 +9,16 @@ extern PyTypeObject PyStreamType; extern PyObject* PyYieldIterationException; TPyObjectPtr ToPyStream( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyStream( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - const TPyObjectPtr& value, - const TPyObjectPtr& originalCallable, - const TPyObjectPtr& originalCallableClosure, - const TPyObjectPtr& originalCallableArgs); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const TPyObjectPtr& value, + const TPyObjectPtr& originalCallable, + const TPyObjectPtr& originalCallableClosure, + const TPyObjectPtr& originalCallableArgs); } // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp index 4a36f7b8f36..61c0fe5caf3 100644 --- a/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp @@ -2,207 +2,206 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyStreamTest) { - void Ui32StreamValidator(const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - - NUdf::TUnboxedValue item; - ui32 expected = 0; - NUdf::EFetchStatus status; +void Ui32StreamValidator(const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); - while (true) { - status = value.Fetch(item); - if (status != NUdf::EFetchStatus::Ok) break; + NUdf::TUnboxedValue item; + ui32 expected = 0; + NUdf::EFetchStatus status; - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, expected); - expected++; + while (true) { + status = value.Fetch(item); + if (status != NUdf::EFetchStatus::Ok) { + break; } - UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Finish); - UNIT_ASSERT_EQUAL(expected, 10); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; } - struct TTestStream final: NUdf::TBoxedValue { - TTestStream(ui32 maxValue, ui32 yieldOn = Max<ui32>()) - : Current_(0) - , YieldOn_(yieldOn) - , MaxValue_(maxValue) - { - } + UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Finish); + UNIT_ASSERT_EQUAL(expected, 10); +} - private: - NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override { - if (Current_ == YieldOn_) { - return NUdf::EFetchStatus::Yield; - } else if (Current_ >= MaxValue_) { - return NUdf::EFetchStatus::Finish; - } - result = NUdf::TUnboxedValuePod(Current_++); - return NUdf::EFetchStatus::Ok; +struct TTestStream final: NUdf::TBoxedValue { + TTestStream(ui32 maxValue, ui32 yieldOn = Max<ui32>()) + : Current_(0) + , YieldOn_(yieldOn) + , MaxValue_(maxValue) + { + } + +private: + NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override { + if (Current_ == YieldOn_) { + return NUdf::EFetchStatus::Yield; + } else if (Current_ >= MaxValue_) { + return NUdf::EFetchStatus::Finish; } + result = NUdf::TUnboxedValuePod(Current_++); + return NUdf::EFetchStatus::Ok; + } - ui32 Current_, YieldOn_, MaxValue_; - }; + ui32 Current_, YieldOn_, MaxValue_; +}; - Y_UNIT_TEST(FromGenerator) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TStream<ui32>>( - "def Test():\n" - " num = 0\n" - " while num < 10:\n" - " yield num\n" - " num += 1\n", - Ui32StreamValidator); - } +Y_UNIT_TEST(FromGenerator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" + " num = 0\n" + " while num < 10:\n" + " yield num\n" + " num += 1\n", + Ui32StreamValidator); +} - Y_UNIT_TEST(FromGeneratorFactory) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TStream<ui32>>( - "def first_10():\n" - " num = 0\n" - " while num < 10:\n" - " yield num\n" - " num += 1\n" - "def Test():\n" - " return first_10\n", - Ui32StreamValidator); - } +Y_UNIT_TEST(FromGeneratorFactory) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def first_10():\n" + " num = 0\n" + " while num < 10:\n" + " yield num\n" + " num += 1\n" + "def Test():\n" + " return first_10\n", + Ui32StreamValidator); +} - Y_UNIT_TEST(FromIterator) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TStream<ui32>>( - "def Test():\n" - " return iter(range(10))\n", - Ui32StreamValidator); - } +Y_UNIT_TEST(FromIterator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" + " return iter(range(10))\n", + Ui32StreamValidator); +} - Y_UNIT_TEST(FromIterable) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TStream<ui32>>( - "def Test():\n" +Y_UNIT_TEST(FromIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" #if PY_MAJOR_VERSION >= 3 - " return range(10)\n", + " return range(10)\n", #else - " return xrange(10)\n", + " return xrange(10)\n", #endif - Ui32StreamValidator); - } + Ui32StreamValidator); +} - Y_UNIT_TEST(FromCustomIterable) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TStream<ui32>>( - "class T:\n" - " def __init__(self, l):\n" - " self.l = l\n" - " def __len__(self):\n" - " return len(self.l)\n" - " def __nonzero__(self):\n" - " return bool(self.l)\n" - " def __iter__(self):\n" - " return iter(self.l)\n" - "\n" - "def Test():\n" - " return T(list(range(10)))\n", - Ui32StreamValidator); - } +Y_UNIT_TEST(FromCustomIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "class T:\n" + " def __init__(self, l):\n" + " self.l = l\n" + " def __len__(self):\n" + " return len(self.l)\n" + " def __nonzero__(self):\n" + " return bool(self.l)\n" + " def __iter__(self):\n" + " return iter(self.l)\n" + "\n" + "def Test():\n" + " return T(list(range(10)))\n", + Ui32StreamValidator); +} - Y_UNIT_TEST(FromList) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TStream<ui32>>( - "def Test():\n" - " return [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - Ui32StreamValidator); - } +Y_UNIT_TEST(FromList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" + " return [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + Ui32StreamValidator); +} - Y_UNIT_TEST(ToPython) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TStream<ui32>>( - [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { - return NUdf::TUnboxedValuePod(new TTestStream(10)); - }, - "def Test(value):\n" - " import yql\n" - " assert repr(value) == '<yql.TStream>'\n" - " assert type(value).__name__ == 'TStream'\n" - " assert list(value) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n"); - } +Y_UNIT_TEST(ToPython) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TStream<ui32>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + return NUdf::TUnboxedValuePod(new TTestStream(10)); + }, + "def Test(value):\n" + " import yql\n" + " assert repr(value) == '<yql.TStream>'\n" + " assert type(value).__name__ == 'TStream'\n" + " assert list(value) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n"); +} - Y_UNIT_TEST(ToPythonAndBackAsIs) { - TPythonTestEngine engine; - engine.ToPythonAndBack<NUdf::TStream<ui32>>( - [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { - return NUdf::TUnboxedValuePod(new TTestStream(10)); - }, - "def Test(value): return value", - Ui32StreamValidator - ); - } +Y_UNIT_TEST(ToPythonAndBackAsIs) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TStream<ui32>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + return NUdf::TUnboxedValuePod(new TTestStream(10)); + }, + "def Test(value): return value", + Ui32StreamValidator); +} - Y_UNIT_TEST(YieldingStreamFromPython) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TStream<ui32>>( - "import yql\n" - "def Test():\n" - " yield 0\n" - " yield 1\n" - " yield yql.TYieldIteration\n" - " yield 2\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); +Y_UNIT_TEST(YieldingStreamFromPython) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "import yql\n" + "def Test():\n" + " yield 0\n" + " yield 1\n" + " yield yql.TYieldIteration\n" + " yield 2\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); - NUdf::TUnboxedValue item; - ui32 expected = 0; - NUdf::EFetchStatus status; + NUdf::TUnboxedValue item; + ui32 expected = 0; + NUdf::EFetchStatus status; - while ((status = value.Fetch(item)) == NUdf::EFetchStatus::Ok) { - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, expected); - expected++; - } + while ((status = value.Fetch(item)) == NUdf::EFetchStatus::Ok) { + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; + } - UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Yield); - UNIT_ASSERT_EQUAL(expected, 2); - }); - } + UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Yield); + UNIT_ASSERT_EQUAL(expected, 2); + }); +} - Y_UNIT_TEST(YieldingStreamFromCpp) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TStream<ui32>>( - [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { - return NUdf::TUnboxedValuePod(new TTestStream(5, 2)); - }, - "import yql\n" - "def Test(value):\n" - " assert repr(value) == '<yql.TStream>'\n" - " assert type(value).__name__ == 'TStream'\n" - " assert next(value) == 0\n" - " assert next(value) == 1\n" - " try:\n" - " next(value)\n" - " except yql.TYieldIteration:\n" - " pass\n" - " else:\n" - " assert False, 'Expected yql.TYieldIteration'\n"); - } +Y_UNIT_TEST(YieldingStreamFromCpp) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TStream<ui32>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + return NUdf::TUnboxedValuePod(new TTestStream(5, 2)); + }, + "import yql\n" + "def Test(value):\n" + " assert repr(value) == '<yql.TStream>'\n" + " assert type(value).__name__ == 'TStream'\n" + " assert next(value) == 0\n" + " assert next(value) == 1\n" + " try:\n" + " next(value)\n" + " except yql.TYieldIteration:\n" + " pass\n" + " else:\n" + " assert False, 'Expected yql.TYieldIteration'\n"); +} - Y_UNIT_TEST(FromCppListIterator) { - TPythonTestEngine engine; - engine.ToPythonAndBack<NUdf::TListType<ui32>, NUdf::TStream<ui32>>( - [](const TType*, const NUdf::IValueBuilder& vb) { - NUdf::TUnboxedValue *items = nullptr; - const auto a = vb.NewArray(10U, items); - ui32 i = 0U; - std::generate_n(items, 10U, [&i](){ return NUdf::TUnboxedValuePod(i++); }); - return a; - }, - "def Test(value): return iter(value)", - Ui32StreamValidator - ); - } +Y_UNIT_TEST(FromCppListIterator) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TListType<ui32>, NUdf::TStream<ui32>>( + [](const TType*, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + const auto a = vb.NewArray(10U, items); + ui32 i = 0U; + std::generate_n(items, 10U, [&i]() { return NUdf::TUnboxedValuePod(i++); }); + return a; + }, + "def Test(value): return iter(value)", + Ui32StreamValidator); } +} // Y_UNIT_TEST_SUITE(TPyStreamTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp index b1f5a13786b..bfe107e44b5 100644 --- a/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp @@ -5,94 +5,94 @@ using namespace NPython; Y_UNIT_TEST_SUITE(TPyStringTest) { - template <typename TStringType> - void TestStringCasts() { - TStringType testStr1(TStringBuf("test string")); - TStringBuf strBuf1 = testStr1; - TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data()); - const auto value = PyCast<TStringType>(str1.Get()); +template <typename TStringType> +void TestStringCasts() { + TStringType testStr1(TStringBuf("test string")); + TStringBuf strBuf1 = testStr1; + TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data()); + const auto value = PyCast<TStringType>(str1.Get()); - UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); + UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); - TStringType testStr2(TStringBuf("another test string")); - TStringBuf strBuf2 = testStr2; - TPyObjectPtr str2 = PyCast<TStringType>(testStr2); + TStringType testStr2(TStringBuf("another test string")); + TStringBuf strBuf2 = testStr2; + TPyObjectPtr str2 = PyCast<TStringType>(testStr2); - Py_ssize_t size = 0U; - char* buf = nullptr; - const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size); - UNIT_ASSERT(rc >= 0); - UNIT_ASSERT(buf != nullptr); - UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); - UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); - } + Py_ssize_t size = 0U; + char* buf = nullptr; + const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size); + UNIT_ASSERT(rc >= 0); + UNIT_ASSERT(buf != nullptr); + UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); + UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); +} - template <typename TStringType> - void TestBinaryStringCasts() { - TStringType testStr1(TStringBuf("\xa0\xa1"sv)); - TStringBuf strBuf1 = testStr1; - TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data()); - const auto value = PyCast<TStringType>(str1.Get()); +template <typename TStringType> +void TestBinaryStringCasts() { + TStringType testStr1(TStringBuf("\xa0\xa1"sv)); + TStringBuf strBuf1 = testStr1; + TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data()); + const auto value = PyCast<TStringType>(str1.Get()); - UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); + UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); - TStringType testStr2(TStringBuf("\xf0\x90\x28\xbc"sv)); - TStringBuf strBuf2 = testStr2; - TPyObjectPtr str2 = PyCast<TStringType>(testStr2); + TStringType testStr2(TStringBuf("\xf0\x90\x28\xbc"sv)); + TStringBuf strBuf2 = testStr2; + TPyObjectPtr str2 = PyCast<TStringType>(testStr2); - Py_ssize_t size = 0U; - char* buf = nullptr; - const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size); - UNIT_ASSERT(rc >= 0); - UNIT_ASSERT(buf != nullptr); - UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); - UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); - } + Py_ssize_t size = 0U; + char* buf = nullptr; + const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size); + UNIT_ASSERT(rc >= 0); + UNIT_ASSERT(buf != nullptr); + UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); + UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); +} - template <typename TStringType> - void TestUtf8StringCasts() { - const TStringType testStr1(TStringBuf("тестовая строка")); - TStringBuf strBuf1 = testStr1; - const TPyObjectPtr str1 = PyUnicode_FromString(strBuf1.data()); - const TPyObjectPtr utf8 = PyUnicode_AsUTF8String(str1.Get()); - const auto value = PyCast<TStringType>(utf8.Get()); - UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); +template <typename TStringType> +void TestUtf8StringCasts() { + const TStringType testStr1(TStringBuf("тестовая строка")); + TStringBuf strBuf1 = testStr1; + const TPyObjectPtr str1 = PyUnicode_FromString(strBuf1.data()); + const TPyObjectPtr utf8 = PyUnicode_AsUTF8String(str1.Get()); + const auto value = PyCast<TStringType>(utf8.Get()); + UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); - const TStringType testStr2(TStringBuf("еще одна тестовая строка")); - TStringBuf strBuf2 = testStr2; - const auto str2 = ToPyUnicode<TStringType>(testStr2); + const TStringType testStr2(TStringBuf("еще одна тестовая строка")); + TStringBuf strBuf2 = testStr2; + const auto str2 = ToPyUnicode<TStringType>(testStr2); - UNIT_ASSERT(PyUnicode_Check(str2.Get())); + UNIT_ASSERT(PyUnicode_Check(str2.Get())); - Py_ssize_t size = 0U; + Py_ssize_t size = 0U; #if PY_MAJOR_VERSION >= 3 - const auto buf = PyUnicode_AsUTF8AndSize(str2.Get(), &size); + const auto buf = PyUnicode_AsUTF8AndSize(str2.Get(), &size); #else - char* buf = nullptr; - const TPyObjectPtr pyUtf8Str = PyUnicode_AsUTF8String(str2.Get()); - const auto rc = PyBytes_AsStringAndSize(pyUtf8Str.Get(), &buf, &size); - UNIT_ASSERT(rc >= 0); + char* buf = nullptr; + const TPyObjectPtr pyUtf8Str = PyUnicode_AsUTF8String(str2.Get()); + const auto rc = PyBytes_AsStringAndSize(pyUtf8Str.Get(), &buf, &size); + UNIT_ASSERT(rc >= 0); #endif - UNIT_ASSERT(buf != nullptr); - UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); - UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); - } + UNIT_ASSERT(buf != nullptr); + UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); + UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); +} - Y_UNIT_TEST(Simple) { - TestStringCasts<TString>(); - TestStringCasts<TStringBuf>(); - TestStringCasts<NUdf::TStringRef>(); - } +Y_UNIT_TEST(Simple) { + TestStringCasts<TString>(); + TestStringCasts<TStringBuf>(); + TestStringCasts<NUdf::TStringRef>(); +} - Y_UNIT_TEST(Utf8) { - TestUtf8StringCasts<TString>(); - TestUtf8StringCasts<TStringBuf>(); - TestUtf8StringCasts<NUdf::TStringRef>(); - } +Y_UNIT_TEST(Utf8) { + TestUtf8StringCasts<TString>(); + TestUtf8StringCasts<TStringBuf>(); + TestUtf8StringCasts<NUdf::TStringRef>(); +} - Y_UNIT_TEST(Binary) { - TestBinaryStringCasts<TString>(); - TestBinaryStringCasts<TStringBuf>(); - TestBinaryStringCasts<NUdf::TStringRef>(); - } +Y_UNIT_TEST(Binary) { + TestBinaryStringCasts<TString>(); + TestBinaryStringCasts<TStringBuf>(); + TestBinaryStringCasts<NUdf::TStringRef>(); } +} // Y_UNIT_TEST_SUITE(TPyStringTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_struct.cpp b/yql/essentials/udfs/common/python/bindings/py_struct.cpp index bcfd86351fe..745f7feae19 100644 --- a/yql/essentials/udfs/common/python/bindings/py_struct.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_struct.cpp @@ -34,8 +34,7 @@ TPyObjectPtr CreateNewStrucInstance(const TPyCastContext::TPtr& ctx, const NKiki INIT_MEMBER(name, "yql.Struct"), INIT_MEMBER(doc, nullptr), INIT_MEMBER(fields, fields.data()), - INIT_MEMBER(n_in_sequence, int(inspector.GetMembersCount())) - }; + INIT_MEMBER(n_in_sequence, int(inspector.GetMembersCount()))}; const auto typeObject = new PyTypeObject(); if (0 > PyStructSequence_InitType2(typeObject, &desc)) { @@ -48,14 +47,14 @@ TPyObjectPtr CreateNewStrucInstance(const TPyCastContext::TPtr& ctx, const NKiki const TPyObjectPtr object = PyStructSequence_New(it.first->second.GetAs<PyTypeObject>()); #else const auto className = TString("yql.Struct_") += ToString(ctx->StructTypes.size()); - PyObject* metaclass = (PyObject *) &PyClass_Type; + PyObject* metaclass = (PyObject*)&PyClass_Type; const TPyObjectPtr name = PyRepr(TStringBuf(className)); const TPyObjectPtr bases = PyTuple_New(0); const TPyObjectPtr dict = PyDict_New(); TPyObjectPtr newClass = PyObject_CallFunctionObjArgs( - metaclass, name.Get(), bases.Get(), dict.Get(), - nullptr); + metaclass, name.Get(), bases.Get(), dict.Get(), + nullptr); if (!newClass) { throw yexception() << "can't create new type: " << GetLastErrorAsString(); } @@ -120,7 +119,7 @@ TPyObjectPtr GetAttrFromPyObject(PyObject* v, TStringBuf name) return PyObject_GetAttr(v, w.Get()); } -} +} // namespace TPyObjectPtr ToPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) { @@ -138,8 +137,8 @@ TPyObjectPtr ToPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TType* type const auto item = ToPyObject(ctx, inspector.GetMemberType(i), *ptr++); if (0 > PyObject_SetAttrString(object.Get(), name.data(), item.Get())) { throw yexception() - << "Can't set attr '" << name << "' to python object: " - << GetLastErrorAsString(); + << "Can't set attr '" << name << "' to python object: " + << GetLastErrorAsString(); } #endif } @@ -153,8 +152,8 @@ TPyObjectPtr ToPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TType* type const auto item = ToPyObject(ctx, inspector.GetMemberType(i), value.GetElement(i)); if (0 > PyObject_SetAttrString(object.Get(), name.data(), item.Get())) { throw yexception() - << "Can't set attr '" << name << "' to python object: " - << GetLastErrorAsString(); + << "Can't set attr '" << name << "' to python object: " + << GetLastErrorAsString(); } #endif } @@ -195,7 +194,8 @@ NUdf::TUnboxedValue FromPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TT } if (!errors.empty()) { - throw yexception() << "Failed to convert dict to struct\n" << JoinSeq("\n", errors) << "\nDict repr: " << PyObjectRepr(value); + throw yexception() << "Failed to convert dict to struct\n" + << JoinSeq("\n", errors) << "\nDict repr: " << PyObjectRepr(value); } } else { for (ui32 i = 0; i < membersCount; i++) { @@ -222,11 +222,12 @@ NUdf::TUnboxedValue FromPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TT } if (!errors.empty()) { - throw yexception() << "Failed to convert object to struct\n" << JoinSeq("\n", errors) << "\nObject repr: " << PyObjectRepr(value); + throw yexception() << "Failed to convert object to struct\n" + << JoinSeq("\n", errors) << "\nObject repr: " << PyObjectRepr(value); } } return mkqlStruct; } -} +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_struct.h b/yql/essentials/udfs/common/python/bindings/py_struct.h index 79a380283fb..6f2bdb917ed 100644 --- a/yql/essentials/udfs/common/python/bindings/py_struct.h +++ b/yql/essentials/udfs/common/python/bindings/py_struct.h @@ -6,12 +6,12 @@ namespace NPython { TPyObjectPtr ToPyStruct( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyStruct( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, PyObject* value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, PyObject* value); } // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp index b8662907907..5b5c1bfea6b 100644 --- a/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp @@ -2,329 +2,287 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyStructTest) { - Y_UNIT_TEST(FromPyObject) { - TPythonTestEngine engine; - - ui32 ageIdx = 0, nameIdx = 0; - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("age", &ageIdx) - .AddField<char*>("name", &nameIdx) - .Build(); +Y_UNIT_TEST(FromPyObject) { + TPythonTestEngine engine; - engine.ToMiniKQL(personType, - "class Person:\n" - " def __init__(self, age, name):\n" - " self.age = age\n" - " self.name = name\n" - "\n" - "def Test():\n" - " return Person(99, 'Jamel')\n", - [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto name = value.GetElement(nameIdx); - UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel"); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); - }); - } + ui32 ageIdx = 0, nameIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("age", &ageIdx).AddField<char*>("name", &nameIdx).Build(); - Y_UNIT_TEST(FromPyObjectMissingOptionalField) { - TPythonTestEngine engine; + engine.ToMiniKQL(personType, + "class Person:\n" + " def __init__(self, age, name):\n" + " self.age = age\n" + " self.name = name\n" + "\n" + "def Test():\n" + " return Person(99, 'Jamel')\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel"); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); +} - ui32 ageIdx = 0, nameIdx = 0; - auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("age", &ageIdx) - .AddField("name", optionalStringType, &nameIdx) - .Build(); +Y_UNIT_TEST(FromPyObjectMissingOptionalField) { + TPythonTestEngine engine; - engine.ToMiniKQL(personType, - "class Person:\n" - " def __init__(self, age):\n" - " self.age = age\n" - "\n" - "def Test():\n" - " return Person(99)\n", - [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto name = value.GetElement(nameIdx); - UNIT_ASSERT(!name); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); - }); - } + ui32 ageIdx = 0, nameIdx = 0; + auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("age", &ageIdx).AddField("name", optionalStringType, &nameIdx).Build(); - Y_UNIT_TEST(FromPyObjectBytesAttrWithNullCharacter) { - TPythonTestEngine engine; + engine.ToMiniKQL(personType, + "class Person:\n" + " def __init__(self, age):\n" + " self.age = age\n" + "\n" + "def Test():\n" + " return Person(99)\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT(!name); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); +} - ui32 ageIdx = 0; - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("a\0ge", &ageIdx) - .Build(); +Y_UNIT_TEST(FromPyObjectBytesAttrWithNullCharacter) { + TPythonTestEngine engine; - engine.ToMiniKQL(personType, - "class Person:\n" - " def __init__(self, age):\n" - " setattr(self, 'a\\0ge', age)\n" - "\n" - "def Test():\n" - " return Person(99)\n", - [ageIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); - }); - } + ui32 ageIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("a\0ge", &ageIdx).Build(); - Y_UNIT_TEST(FromPyDict) { - TPythonTestEngine engine; + engine.ToMiniKQL(personType, + "class Person:\n" + " def __init__(self, age):\n" + " setattr(self, 'a\\0ge', age)\n" + "\n" + "def Test():\n" + " return Person(99)\n", + [ageIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); +} - ui32 ageIdx = 0, nameIdx = 0; - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("age", &ageIdx) - .AddField<char*>("name", &nameIdx) - .Build(); +Y_UNIT_TEST(FromPyDict) { + TPythonTestEngine engine; - engine.ToMiniKQL(personType, - "def Test():\n" - " return { 'name': 'Jamel', 'age': 99 }\n", - [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto name = value.GetElement(nameIdx); - UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel"); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); - }); - } + ui32 ageIdx = 0, nameIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("age", &ageIdx).AddField<char*>("name", &nameIdx).Build(); - Y_UNIT_TEST(FromPyDictMissingOptionalField) { - TPythonTestEngine engine; + engine.ToMiniKQL(personType, + "def Test():\n" + " return { 'name': 'Jamel', 'age': 99 }\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel"); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); +} - ui32 ageIdx = 0, nameIdx = 0; - auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("age", &ageIdx) - .AddField("name", optionalStringType, &nameIdx) - .Build(); +Y_UNIT_TEST(FromPyDictMissingOptionalField) { + TPythonTestEngine engine; - engine.ToMiniKQL(personType, - "def Test():\n" - " return { 'age': 99 }\n", - [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto name = value.GetElement(nameIdx); - UNIT_ASSERT(!name); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); - }); - } + ui32 ageIdx = 0, nameIdx = 0; + auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("age", &ageIdx).AddField("name", optionalStringType, &nameIdx).Build(); - Y_UNIT_TEST(FromPyDictBytesKeyWithNullCharacter) { - TPythonTestEngine engine; + engine.ToMiniKQL(personType, + "def Test():\n" + " return { 'age': 99 }\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT(!name); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); +} - ui32 ageIdx = 0; - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("a\0ge", &ageIdx) - .Build(); +Y_UNIT_TEST(FromPyDictBytesKeyWithNullCharacter) { + TPythonTestEngine engine; - engine.ToMiniKQL(personType, - "def Test():\n" - " return { b'a\\0ge': 99 }\n", - [ageIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); - }); - } + ui32 ageIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("a\0ge", &ageIdx).Build(); - Y_UNIT_TEST(FromPyNamedTuple) { - TPythonTestEngine engine; + engine.ToMiniKQL(personType, + "def Test():\n" + " return { b'a\\0ge': 99 }\n", + [ageIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); +} - ui32 ageIdx = 0, nameIdx = 0; - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("age", &ageIdx) - .AddField<char*>("name", &nameIdx) - .Build(); +Y_UNIT_TEST(FromPyNamedTuple) { + TPythonTestEngine engine; - engine.ToMiniKQL(personType, - "from collections import namedtuple\n" - "def Test():\n" - " Person = namedtuple('Person', 'name age')\n" - " return Person(age=13, name='Tony')\n", - [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto name = value.GetElement(nameIdx); - UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Tony"); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 13); - }); - } + ui32 ageIdx = 0, nameIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("age", &ageIdx).AddField<char*>("name", &nameIdx).Build(); - Y_UNIT_TEST(FromPyNamedTupleNoneOptionalField) { - TPythonTestEngine engine; + engine.ToMiniKQL(personType, + "from collections import namedtuple\n" + "def Test():\n" + " Person = namedtuple('Person', 'name age')\n" + " return Person(age=13, name='Tony')\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Tony"); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 13); + }); +} - ui32 ageIdx = 0, nameIdx = 0; - auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("age", &ageIdx) - .AddField("name", optionalStringType, &nameIdx) - .Build(); +Y_UNIT_TEST(FromPyNamedTupleNoneOptionalField) { + TPythonTestEngine engine; - engine.ToMiniKQL(personType, - "from collections import namedtuple\n" - "def Test():\n" - " Pers = namedtuple('Person', 'name age')\n" - " return Pers(name=None, age=15)\n", - [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto name = value.GetElement(nameIdx); - UNIT_ASSERT(!name); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 15); - }); - } + ui32 ageIdx = 0, nameIdx = 0; + auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("age", &ageIdx).AddField("name", optionalStringType, &nameIdx).Build(); - Y_UNIT_TEST(FromPyEmptyStruct) { - TPythonTestEngine engine; - auto emptyStruct = engine.GetTypeBuilder().Struct()->Build(); + engine.ToMiniKQL(personType, + "from collections import namedtuple\n" + "def Test():\n" + " Pers = namedtuple('Person', 'name age')\n" + " return Pers(name=None, age=15)\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT(!name); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 15); + }); +} - engine.ToMiniKQL(emptyStruct, - "class Empty: pass\n" - "\n" - "def Test():\n" - " return Empty()\n", - [](const NUdf::TUnboxedValuePod&) {}); - } +Y_UNIT_TEST(FromPyEmptyStruct) { + TPythonTestEngine engine; + auto emptyStruct = engine.GetTypeBuilder().Struct()->Build(); - Y_UNIT_TEST(ToPyObject) { - TPythonTestEngine engine; + engine.ToMiniKQL(emptyStruct, + "class Empty: pass\n" + "\n" + "def Test():\n" + " return Empty()\n", + [](const NUdf::TUnboxedValuePod&) {}); +} - ui32 ageIdx = 0, nameIdx = 0, addressIdx = 0, cityIdx = 0, streetIdx = 0, buildingIdx = 0; - auto addressType = engine.GetTypeBuilder().Struct()-> - AddField<NUdf::TUtf8>("city", &cityIdx) - .AddField<NUdf::TUtf8>("street", &streetIdx) - .AddField<ui16>("building", &buildingIdx) - .Build(); +Y_UNIT_TEST(ToPyObject) { + TPythonTestEngine engine; - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<ui16>("age", &ageIdx) - .AddField<NUdf::TUtf8>("name", &nameIdx) - .AddField("address", addressType, &addressIdx) - .Build(); + ui32 ageIdx = 0, nameIdx = 0, addressIdx = 0, cityIdx = 0, streetIdx = 0, buildingIdx = 0; + auto addressType = engine.GetTypeBuilder().Struct()->AddField<NUdf::TUtf8>("city", &cityIdx).AddField<NUdf::TUtf8>("street", &streetIdx).AddField<ui16>("building", &buildingIdx).Build(); + auto personType = engine.GetTypeBuilder().Struct()->AddField<ui16>("age", &ageIdx).AddField<NUdf::TUtf8>("name", &nameIdx).AddField("address", addressType, &addressIdx).Build(); - engine.ToPython(personType, - [=](const TType* type, const NUdf::IValueBuilder& vb) { - NUdf::TUnboxedValue* items = nullptr; - auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); - items[ageIdx] = NUdf::TUnboxedValuePod(ui16(97)); - items[nameIdx] = vb.NewString("Jamel"); - NUdf::TUnboxedValue* items2 = nullptr; - items[addressIdx] = vb.NewArray(static_cast<const TStructType*>(static_cast<const TStructType*>(type)->GetMemberType(addressIdx))->GetMembersCount(), items2); - items2[cityIdx] = vb.NewString("Moscow");; - items2[streetIdx] = vb.NewString("L'va Tolstogo"); - items2[buildingIdx] = NUdf::TUnboxedValuePod(ui16(16)); - return new_struct; - }, - "def Test(value):\n" - " assert isinstance(value, object)\n" - " assert value.name == 'Jamel'\n" - " assert value.age == 97\n" - " assert value.address.city == 'Moscow'\n" - " assert value.address.building == 16\n" - ); - } + engine.ToPython(personType, + [=](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); + items[ageIdx] = NUdf::TUnboxedValuePod(ui16(97)); + items[nameIdx] = vb.NewString("Jamel"); + NUdf::TUnboxedValue* items2 = nullptr; + items[addressIdx] = vb.NewArray(static_cast<const TStructType*>(static_cast<const TStructType*>(type)->GetMemberType(addressIdx))->GetMembersCount(), items2); + items2[cityIdx] = vb.NewString("Moscow"); + ; + items2[streetIdx] = vb.NewString("L'va Tolstogo"); + items2[buildingIdx] = NUdf::TUnboxedValuePod(ui16(16)); + return new_struct; + }, + "def Test(value):\n" + " assert isinstance(value, object)\n" + " assert value.name == 'Jamel'\n" + " assert value.age == 97\n" + " assert value.address.city == 'Moscow'\n" + " assert value.address.building == 16\n"); +} - Y_UNIT_TEST(ToPyObjectKeywordsAsFields) { - TPythonTestEngine engine; +Y_UNIT_TEST(ToPyObjectKeywordsAsFields) { + TPythonTestEngine engine; - ui32 passIdx = 0, whileIdx = 0, ifIdx = 0, notIdx = 0; - auto structType = engine.GetTypeBuilder().Struct()-> - AddField<NUdf::TUtf8>("pass", &passIdx) - .AddField<NUdf::TUtf8>("while", &whileIdx) - .AddField<NUdf::TUtf8>("if", &ifIdx) - .AddField<NUdf::TUtf8>("not", ¬Idx) - .Build(); + ui32 passIdx = 0, whileIdx = 0, ifIdx = 0, notIdx = 0; + auto structType = engine.GetTypeBuilder().Struct()->AddField<NUdf::TUtf8>("pass", &passIdx).AddField<NUdf::TUtf8>("while", &whileIdx).AddField<NUdf::TUtf8>("if", &ifIdx).AddField<NUdf::TUtf8>("not", ¬Idx).Build(); - engine.ToPython(structType, - [=](const TType* type, const NUdf::IValueBuilder& vb) { - NUdf::TUnboxedValue* items = nullptr; - auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); - items[ifIdx] = vb.NewString("You"); - items[whileIdx] = vb.NewString("Shall"); - items[notIdx] = vb.NewString("Not"); - items[passIdx] = vb.NewString("Pass"); - return new_struct; - }, - "def Test(value):\n" - " assert getattr(value, 'if') == 'You'\n" - " assert getattr(value, 'while') == 'Shall'\n" - " assert getattr(value, 'not') == 'Not'\n" - " assert getattr(value, 'pass') == 'Pass'\n" - ); - } + engine.ToPython(structType, + [=](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); + items[ifIdx] = vb.NewString("You"); + items[whileIdx] = vb.NewString("Shall"); + items[notIdx] = vb.NewString("Not"); + items[passIdx] = vb.NewString("Pass"); + return new_struct; + }, + "def Test(value):\n" + " assert getattr(value, 'if') == 'You'\n" + " assert getattr(value, 'while') == 'Shall'\n" + " assert getattr(value, 'not') == 'Not'\n" + " assert getattr(value, 'pass') == 'Pass'\n"); +} #if PY_MAJOR_VERSION >= 3 // TODO: Fix for python 2 - Y_UNIT_TEST(ToPyObjectTryModify) { - TPythonTestEngine engine; +Y_UNIT_TEST(ToPyObjectTryModify) { + TPythonTestEngine engine; - ui32 field1Idx = 0, field2Idx = 0; - auto structType = engine.GetTypeBuilder().Struct()-> - AddField<NUdf::TUtf8>("field1", &field1Idx) - .AddField<NUdf::TUtf8>("field2", &field2Idx) - .Build(); + ui32 field1Idx = 0, field2Idx = 0; + auto structType = engine.GetTypeBuilder().Struct()->AddField<NUdf::TUtf8>("field1", &field1Idx).AddField<NUdf::TUtf8>("field2", &field2Idx).Build(); - engine.ToPython(structType, - [=](const TType* type, const NUdf::IValueBuilder& vb) { - NUdf::TUnboxedValue* items = nullptr; - auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); - items[field1Idx] = NUdf::TUnboxedValuePod::Zero(); - items[field2Idx] = NUdf::TUnboxedValuePod::Embedded("empty"); - return new_struct; - }, - "def Test(value):\n" - " try:\n" - " setattr(value, 'field1', 17)\n" - " except AttributeError:\n" - " pass\n" - " else:\n" - " assert False\n" - " try:\n" - " value.field2 = 18\n" - " except AttributeError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } + engine.ToPython(structType, + [=](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); + items[field1Idx] = NUdf::TUnboxedValuePod::Zero(); + items[field2Idx] = NUdf::TUnboxedValuePod::Embedded("empty"); + return new_struct; + }, + "def Test(value):\n" + " try:\n" + " setattr(value, 'field1', 17)\n" + " except AttributeError:\n" + " pass\n" + " else:\n" + " assert False\n" + " try:\n" + " value.field2 = 18\n" + " except AttributeError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} #endif - Y_UNIT_TEST(ToPyObjectEmptyStruct) { - TPythonTestEngine engine; +Y_UNIT_TEST(ToPyObjectEmptyStruct) { + TPythonTestEngine engine; - auto personType = engine.GetTypeBuilder().Struct()->Build(); + auto personType = engine.GetTypeBuilder().Struct()->Build(); - engine.ToPython(personType, - [](const TType*, const NUdf::IValueBuilder& vb) { - return vb.NewEmptyList(); - }, - "def Test(value):\n" - " assert isinstance(value, object)\n" + engine.ToPython(personType, + [](const TType*, const NUdf::IValueBuilder& vb) { + return vb.NewEmptyList(); + }, + "def Test(value):\n" + " assert isinstance(value, object)\n" #if PY_MAJOR_VERSION >= 3 - " assert len(value) == 0\n" + " assert len(value) == 0\n" #endif - ); - } + ); } +} // Y_UNIT_TEST_SUITE(TPyStructTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_test_engine.h b/yql/essentials/udfs/common/python/bindings/py_test_engine.h index 6809fc61cff..c2fce35bca3 100644 --- a/yql/essentials/udfs/common/python/bindings/py_test_engine.h +++ b/yql/essentials/udfs/common/python/bindings/py_test_engine.h @@ -13,7 +13,6 @@ #define PYTHON_TEST_TAG "Python2Test" - using namespace NKikimr; using namespace NMiniKQL; @@ -49,15 +48,14 @@ public: , FunctionInfoBuilder_(NYql::UnknownLangVersion, Env_, TypeInfoHelper_, "", nullptr, {}) { HolderFactory_ = MakeHolder<THolderFactory>( - Alloc_.Ref(), - MemInfo_, - nullptr); + Alloc_.Ref(), + MemInfo_, + nullptr); ValueBuilder_ = MakeHolder<TDefaultValueBuilder>(*HolderFactory_, NUdf::EValidatePolicy::Exception); BindTerminator_ = MakeHolder<TBindTerminator>(ValueBuilder_.Get()); Singleton<TPyInitializer>(); CastCtx_ = MakeIntrusive<TPyCastContext>(&GetValueBuilder(), - MakeIntrusive<TPyContext>(TypeInfoHelper_.Get(), NUdf::TStringRef::Of(PYTHON_TEST_TAG), NUdf::TSourcePosition()) - ); + MakeIntrusive<TPyContext>(TypeInfoHelper_.Get(), NUdf::TStringRef::Of(PYTHON_TEST_TAG), NUdf::TSourcePosition())); } ~TPythonTestEngine() { @@ -90,8 +88,8 @@ public: template <typename TChecker> void ToMiniKQLWithArg( - NUdf::TType* udfType, PyObject* argValue, - const TStringBuf& script, TChecker&& checker) + NUdf::TType* udfType, PyObject* argValue, + const TStringBuf& script, TChecker&& checker) { TPyObjectPtr args = Py_BuildValue("(O)", argValue); @@ -108,8 +106,8 @@ public: template <typename TExpectedType, typename TChecker> void ToMiniKQLWithArg( - PyObject* argValue, - const TStringBuf& script, TChecker&& checker) + PyObject* argValue, + const TStringBuf& script, TChecker&& checker) { auto type = GetTypeBuilder().SimpleType<TExpectedType>(); ToMiniKQLWithArg<TChecker>(type, argValue, script, std::move(checker)); @@ -143,9 +141,9 @@ public: template <typename TMiniKQLValueBuilder> TPyObjectPtr ToPython( - NUdf::TType* udfType, - TMiniKQLValueBuilder&& builder, - const TStringBuf& script) + NUdf::TType* udfType, + TMiniKQLValueBuilder&& builder, + const TStringBuf& script) { try { TType* type = static_cast<TType*>(udfType); @@ -239,7 +237,7 @@ private: } TPyObjectPtr RunPythonFunction( - const TStringBuf& script, PyObject* args = nullptr) + const TStringBuf& script, PyObject* args = nullptr) { TPyObjectPtr function(CompilePythonFunction(script)); return PyObject_CallObject(function.Get(), args); diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple.cpp b/yql/essentials/udfs/common/python/bindings/py_tuple.cpp index 6cef25ea47f..60842a895fb 100644 --- a/yql/essentials/udfs/common/python/bindings/py_tuple.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_tuple.cpp @@ -58,4 +58,4 @@ NUdf::TUnboxedValue FromPyTuple(const TPyCastContext::TPtr& ctx, const NUdf::TTy throw yexception() << "Expected Tuple or Sequence but got: " << PyObjectRepr(value); } -} +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple.h b/yql/essentials/udfs/common/python/bindings/py_tuple.h index 7d66af9b011..a175c87adf0 100644 --- a/yql/essentials/udfs/common/python/bindings/py_tuple.h +++ b/yql/essentials/udfs/common/python/bindings/py_tuple.h @@ -6,12 +6,12 @@ namespace NPython { TPyObjectPtr ToPyTuple( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyTuple( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, PyObject* value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, PyObject* value); } // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp index f465f0ebb68..905a5ec382a 100644 --- a/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp @@ -2,107 +2,106 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyTupleTest) { - Y_UNIT_TEST(FromPyEmptyTuple) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTuple<>>( - "def Test(): return ()", - [](const NUdf::TUnboxedValuePod&) {}); - } +Y_UNIT_TEST(FromPyEmptyTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<>>( + "def Test(): return ()", + [](const NUdf::TUnboxedValuePod&) {}); +} - Y_UNIT_TEST(FromPyList) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTuple<int, int, int>>( - "def Test(): return [1, 2, 3]", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); - UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2); - UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3); - }); - } +Y_UNIT_TEST(FromPyList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<int, int, int>>( + "def Test(): return [1, 2, 3]", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); + UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2); + UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3); + }); +} - Y_UNIT_TEST(FromPyIter) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTuple<int, int, int>>( - "def Test(): return iter({1, 2, 3})", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); - UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2); - UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3); - }); - } +Y_UNIT_TEST(FromPyIter) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<int, int, int>>( + "def Test(): return iter({1, 2, 3})", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); + UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2); + UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3); + }); +} - Y_UNIT_TEST(FromPyTuple) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTuple<int, double, char*>>( - "def Test(): return (1, float(2.3), '4')", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); - auto second = value.GetElement(1); - UNIT_ASSERT_DOUBLES_EQUAL(second.Get<double>(), 2.3, 0.0001); - const auto third = value.GetElement(2); - UNIT_ASSERT_EQUAL(third.AsStringRef(), "4"); - }); - } +Y_UNIT_TEST(FromPyTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<int, double, char*>>( + "def Test(): return (1, float(2.3), '4')", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); + auto second = value.GetElement(1); + UNIT_ASSERT_DOUBLES_EQUAL(second.Get<double>(), 2.3, 0.0001); + const auto third = value.GetElement(2); + UNIT_ASSERT_EQUAL(third.AsStringRef(), "4"); + }); +} - Y_UNIT_TEST(FromPyTupleInTuple) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTuple<ui32, NUdf::TTuple<ui8, float>, char*>>( - "def Test(): return (1, (2, float(3.4)), '5')", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetElement(0).Get<ui32>(), 1); +Y_UNIT_TEST(FromPyTupleInTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<ui32, NUdf::TTuple<ui8, float>, char*>>( + "def Test(): return (1, (2, float(3.4)), '5')", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<ui32>(), 1); - auto second = value.GetElement(1); - UNIT_ASSERT(second); - UNIT_ASSERT(second.IsBoxed()); - UNIT_ASSERT_EQUAL(second.GetElement(0).Get<ui8>(), 2); - UNIT_ASSERT_DOUBLES_EQUAL( - second.GetElement(1).Get<float>(), 3.4, 0.0001); + auto second = value.GetElement(1); + UNIT_ASSERT(second); + UNIT_ASSERT(second.IsBoxed()); + UNIT_ASSERT_EQUAL(second.GetElement(0).Get<ui8>(), 2); + UNIT_ASSERT_DOUBLES_EQUAL( + second.GetElement(1).Get<float>(), 3.4, 0.0001); - const auto third = value.GetElement(2); - UNIT_ASSERT_EQUAL(third.AsStringRef(), "5"); - }); - } + const auto third = value.GetElement(2); + UNIT_ASSERT_EQUAL(third.AsStringRef(), "5"); + }); +} - Y_UNIT_TEST(ToPyEmptyTuple) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TTuple<>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - NUdf::TUnboxedValue* items = nullptr; - return vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items); - }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert len(value) == 0\n" - " assert value == ()\n"); - } +Y_UNIT_TEST(ToPyEmptyTuple) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTuple<>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + return vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert len(value) == 0\n" + " assert value == ()\n"); +} - Y_UNIT_TEST(ToPyTuple) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TTuple<NUdf::TUtf8, ui64, ui8, float>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - NUdf::TUnboxedValue* items = nullptr; - auto tuple = vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items); - items[0] = vb.NewString("111"); - items[1] = NUdf::TUnboxedValuePod((ui64) 2); - items[2] = NUdf::TUnboxedValuePod((ui8) 3); - items[3] = NUdf::TUnboxedValuePod((float) 4.5); - return tuple; - }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert len(value) == 4\n" - " assert value == ('111', 2, 3, 4.5)\n"); - } +Y_UNIT_TEST(ToPyTuple) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTuple<NUdf::TUtf8, ui64, ui8, float>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto tuple = vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items); + items[0] = vb.NewString("111"); + items[1] = NUdf::TUnboxedValuePod((ui64)2); + items[2] = NUdf::TUnboxedValuePod((ui8)3); + items[3] = NUdf::TUnboxedValuePod((float)4.5); + return tuple; + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert len(value) == 4\n" + " assert value == ('111', 2, 3, 4.5)\n"); } +} // Y_UNIT_TEST_SUITE(TPyTupleTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp index 08b6b78b168..af1926243fb 100644 --- a/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp @@ -4,82 +4,81 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyTzDateTest) { - Y_UNIT_TEST(FromDate) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTzDate>( - "def Test():\n" - " return (2, 'Europe/Moscow')\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT_VALUES_EQUAL(value.Get<ui16>(), 2); - UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); - }); - } +Y_UNIT_TEST(FromDate) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTzDate>( + "def Test():\n" + " return (2, 'Europe/Moscow')\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_VALUES_EQUAL(value.Get<ui16>(), 2); + UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + }); +} - Y_UNIT_TEST(FromDatetime) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTzDatetime>( - "def Test():\n" - " return (2, 'Europe/Moscow')\n", - [](const NUdf::TUnboxedValuePod& value) { +Y_UNIT_TEST(FromDatetime) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTzDatetime>( + "def Test():\n" + " return (2, 'Europe/Moscow')\n", + [](const NUdf::TUnboxedValuePod& value) { UNIT_ASSERT(value); UNIT_ASSERT_VALUES_EQUAL(value.Get<ui32>(), 2); UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); }); - } +} - Y_UNIT_TEST(FromTimestamp) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTzTimestamp>( - "def Test():\n" - " return (2, 'Europe/Moscow')\n", - [](const NUdf::TUnboxedValuePod& value) { +Y_UNIT_TEST(FromTimestamp) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTzTimestamp>( + "def Test():\n" + " return (2, 'Europe/Moscow')\n", + [](const NUdf::TUnboxedValuePod& value) { UNIT_ASSERT(value); UNIT_ASSERT_VALUES_EQUAL(value.Get<ui64>(), 2); UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); }); - } +} - Y_UNIT_TEST(ToDate) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TTzDate>( - [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { - auto ret = NUdf::TUnboxedValuePod((ui16)2); - ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); - return ret; - }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert value == (2, 'Europe/Moscow')\n"); - } +Y_UNIT_TEST(ToDate) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTzDate>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + auto ret = NUdf::TUnboxedValuePod((ui16)2); + ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + return ret; + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (2, 'Europe/Moscow')\n"); +} - Y_UNIT_TEST(ToDatetime) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TTzDatetime>( - [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { +Y_UNIT_TEST(ToDatetime) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTzDatetime>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { auto ret = NUdf::TUnboxedValuePod((ui32)2); ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); return ret; }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert value == (2, 'Europe/Moscow')\n"); - } + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (2, 'Europe/Moscow')\n"); +} - Y_UNIT_TEST(ToTimestamp) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TTzTimestamp>( - [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { +Y_UNIT_TEST(ToTimestamp) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTzTimestamp>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { auto ret = NUdf::TUnboxedValuePod((ui64)2); ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); return ret; }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert value == (2, 'Europe/Moscow')\n"); - } + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (2, 'Europe/Moscow')\n"); } +} // Y_UNIT_TEST_SUITE(TPyTzDateTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_utils.cpp b/yql/essentials/udfs/common/python/bindings/py_utils.cpp index d1e0e8b4846..412aebb874b 100644 --- a/yql/essentials/udfs/common/python/bindings/py_utils.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_utils.cpp @@ -8,12 +8,11 @@ #include <regex> - namespace NPython { TPyObjectPtr PyRepr(TStringBuf asciiStr, bool intern) { for (auto c : asciiStr) { - Y_ABORT_UNLESS((c&0x80) == 0, "expected ascii"); + Y_ABORT_UNLESS((c & 0x80) == 0, "expected ascii"); } Py_ssize_t size = static_cast<Py_ssize_t>(asciiStr.size()); @@ -41,7 +40,7 @@ TString PyObjectRepr(PyObject* value) { static constexpr std::string_view truncSuffix = "(truncated)"; const TPyObjectPtr repr(PyObject_Repr(value)); if (!repr) { - return TString("repr error: ") + GetLastErrorAsString(); + return TString("repr error: ") + GetLastErrorAsString(); } TString string; @@ -64,11 +63,13 @@ bool HasEncodingCookie(const TString& source) { // static std::regex encodingRe( - "^[ \\t\\v]*#.*?coding[:=][ \\t]*[-_.a-zA-Z0-9]+.*"); + "^[ \\t\\v]*#.*?coding[:=][ \\t]*[-_.a-zA-Z0-9]+.*"); int i = 0; - for (const auto& it: StringSplitter(source).Split('\n')) { - if (i++ == 2) break; + for (const auto& it : StringSplitter(source).Split('\n')) { + if (i++ == 2) { + break; + } TStringBuf line = it.Token(); if (std::regex_match(line.begin(), line.end(), encodingRe)) { @@ -86,4 +87,4 @@ void PyCleanup() { PySys_SetObject("last_traceback", Py_None); } -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_utils.h b/yql/essentials/udfs/common/python/bindings/py_utils.h index 0c5ef058f1a..cfb3f720836 100644 --- a/yql/essentials/udfs/common/python/bindings/py_utils.h +++ b/yql/essentials/udfs/common/python/bindings/py_utils.h @@ -5,9 +5,9 @@ #include <util/generic/strbuf.h> #ifdef _win_ -#define INIT_MEMBER(member, value) value //member + #define INIT_MEMBER(member, value) value // member #else -#define INIT_MEMBER(member, value) .member = (value) + #define INIT_MEMBER(member, value) .member = (value) #endif namespace NPython { @@ -15,7 +15,7 @@ namespace NPython { TPyObjectPtr PyRepr(TStringBuf asciiStr, bool intern = false); template <size_t size> -TPyObjectPtr PyRepr(const char(&str)[size]) { +TPyObjectPtr PyRepr(const char (&str)[size]) { return PyRepr(TStringBuf(str, size - 1), true); } @@ -25,4 +25,4 @@ bool HasEncodingCookie(const TString& source); void PyCleanup(); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp index ce521689b40..9b8e9fa53c4 100644 --- a/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp @@ -2,36 +2,35 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyUtilsTest) { - Y_UNIT_TEST(EncodingCookie) { - UNIT_ASSERT(HasEncodingCookie("# -*- coding: latin-1 -*-")); - UNIT_ASSERT(HasEncodingCookie("# -*- coding:latin-1 -*-")); - UNIT_ASSERT(HasEncodingCookie("# -*- coding=latin-1 -*-")); - UNIT_ASSERT(HasEncodingCookie("# -*- encoding: latin-1 -*-")); - UNIT_ASSERT(HasEncodingCookie("# -*- encoding:latin-1 -*-")); - UNIT_ASSERT(HasEncodingCookie("# -*- encoding=latin-1 -*-")); - UNIT_ASSERT(HasEncodingCookie("# -*- coding: iso-8859-15 -*-")); - UNIT_ASSERT(HasEncodingCookie("# -*- coding: ascii -*-")); - UNIT_ASSERT(HasEncodingCookie( - "# This Python file uses the following encoding: utf-8")); +Y_UNIT_TEST(EncodingCookie) { + UNIT_ASSERT(HasEncodingCookie("# -*- coding: latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding:latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding=latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- encoding: latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- encoding:latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- encoding=latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding: iso-8859-15 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding: ascii -*-")); + UNIT_ASSERT(HasEncodingCookie( + "# This Python file uses the following encoding: utf-8")); - // encoding commend on second line - UNIT_ASSERT(HasEncodingCookie( - "#!/usr/local/bin/python\n" - "# -*- coding: iso-8859-15 -*-\n" - "print 'hello'")); + // encoding commend on second line + UNIT_ASSERT(HasEncodingCookie( + "#!/usr/local/bin/python\n" + "# -*- coding: iso-8859-15 -*-\n" + "print 'hello'")); - // missing "coding:" prefix - UNIT_ASSERT(false == HasEncodingCookie("# latin-1")); + // missing "coding:" prefix + UNIT_ASSERT(false == HasEncodingCookie("# latin-1")); - // encoding comment not on line 1 or 2 - UNIT_ASSERT(false == HasEncodingCookie( - "#!/usr/local/bin/python\n" - "#\n" - "# -*- coding: latin-1 -*-\n")); - } + // encoding comment not on line 1 or 2 + UNIT_ASSERT(false == HasEncodingCookie( + "#!/usr/local/bin/python\n" + "#\n" + "# -*- coding: latin-1 -*-\n")); } +} // Y_UNIT_TEST_SUITE(TPyUtilsTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_variant.cpp b/yql/essentials/udfs/common/python/bindings/py_variant.cpp index ab222b34323..73764ad3fe5 100644 --- a/yql/essentials/udfs/common/python/bindings/py_variant.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_variant.cpp @@ -7,7 +7,6 @@ #include <yql/essentials/public/udf/udf_value_builder.h> #include <yql/essentials/public/udf/udf_type_inspection.h> - using namespace NKikimr; namespace NPython { @@ -16,9 +15,9 @@ namespace NPython { // public functions ////////////////////////////////////////////////////////////////////////////// TPyObjectPtr ToPyVariant( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) { auto& th = *castCtx->PyCtx->TypeInfoHelper; NUdf::TVariantTypeInspector varInsp(th, type); @@ -35,7 +34,7 @@ TPyObjectPtr ToPyVariant( } else if (auto structInsp = NUdf::TStructTypeInspector(th, subType)) { itemType = structInsp.GetMemberType(index); TPyObjectPtr pyName = ToPyUnicode<NUdf::TStringRef>( - structInsp.GetMemberName(index)); + structInsp.GetMemberName(index)); TPyObjectPtr pyItem = ToPyObject(castCtx, itemType, item); return PyTuple_Pack(2, pyName.Get(), pyItem.Get()); } @@ -44,9 +43,9 @@ TPyObjectPtr ToPyVariant( } NUdf::TUnboxedValue FromPyVariant( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - PyObject* value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + PyObject* value) { PY_ENSURE(PyTuple_Check(value), "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name); @@ -54,7 +53,7 @@ NUdf::TUnboxedValue FromPyVariant( Py_ssize_t tupleSize = PyTuple_GET_SIZE(value); PY_ENSURE(tupleSize == 2, "Expected to get Tuple with 2 elements, but got " - << tupleSize << " elements"); + << tupleSize << " elements"); auto& th = *castCtx->PyCtx->TypeInfoHelper; NUdf::TVariantTypeInspector varInsp(th, type); @@ -69,12 +68,12 @@ NUdf::TUnboxedValue FromPyVariant( if (auto tupleInsp = NUdf::TTupleTypeInspector(th, subType)) { PY_ENSURE(index < tupleInsp.GetElementsCount(), "Index must be < " << tupleInsp.GetElementsCount() - << ", but got " << index); + << ", but got " << index); auto* itemType = tupleInsp.GetElementType(index); return castCtx->ValueBuilder->NewVariant(index, FromPyObject(castCtx, itemType, el1)); } else { throw yexception() << "Cannot convert " << PyObjectRepr(value) - << " underlying Variant type is not a Tuple"; + << " underlying Variant type is not a Tuple"; } } else if (TryPyCast(el0, name)) { if (auto structInsp = NUdf::TStructTypeInspector(th, subType)) { @@ -85,13 +84,14 @@ NUdf::TUnboxedValue FromPyVariant( return castCtx->ValueBuilder->NewVariant(index, FromPyObject(castCtx, itemType, el1)); } else { throw yexception() << "Cannot convert " << PyObjectRepr(value) - << " underlying Variant type is not a Struct"; + << " underlying Variant type is not a Struct"; } } else { throw yexception() - << "Expected first Tuple element to either be an int " - "or a str, but got " << Py_TYPE(el0)->tp_name; + << "Expected first Tuple element to either be an int " + "or a str, but got " + << Py_TYPE(el0)->tp_name; } } -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_variant.h b/yql/essentials/udfs/common/python/bindings/py_variant.h index ca97123183b..dd96deba5b5 100644 --- a/yql/essentials/udfs/common/python/bindings/py_variant.h +++ b/yql/essentials/udfs/common/python/bindings/py_variant.h @@ -5,13 +5,13 @@ namespace NPython { TPyObjectPtr ToPyVariant( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyVariant( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - PyObject* value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + PyObject* value); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp index d792449d828..adfdd2a09b7 100644 --- a/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp @@ -3,99 +3,94 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyVariantTest) { - Y_UNIT_TEST(FromPyWithIndex) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TVariant<float, ui32, char*>>( - "def Test():\n" - " return (2, 'hello')\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT_EQUAL(value.GetVariantIndex(), 2); - auto item = value.GetVariantItem(); - UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "hello"); - }); - } +Y_UNIT_TEST(FromPyWithIndex) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TVariant<float, ui32, char*>>( + "def Test():\n" + " return (2, 'hello')\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.GetVariantIndex(), 2); + auto item = value.GetVariantItem(); + UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "hello"); + }); +} - Y_UNIT_TEST(FromPyWithName) { - TPythonTestEngine engine; +Y_UNIT_TEST(FromPyWithName) { + TPythonTestEngine engine; - ui32 ageIdx = 0, nameIdx = 0; - NUdf::TType* personType = engine.GetTypeBuilder().Struct()-> - AddField<ui32>("age", &ageIdx) - .AddField<char*>("name", &nameIdx) - .Build(); + ui32 ageIdx = 0, nameIdx = 0; + NUdf::TType* personType = engine.GetTypeBuilder().Struct()->AddField<ui32>("age", &ageIdx).AddField<char*>("name", &nameIdx).Build(); - NUdf::TType* variantType = engine.GetTypeBuilder() - .Variant()->Over(personType).Build(); + NUdf::TType* variantType = engine.GetTypeBuilder() + .Variant() + ->Over(personType) + .Build(); - engine.ToMiniKQL( - variantType, - "def Test():\n" - " return ('age', 99)\n", - [ageIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT_EQUAL(value.GetVariantIndex(), ageIdx); - auto item = value.GetVariantItem(); - UNIT_ASSERT_EQUAL(item.Get<ui32>(), 99); - }); + engine.ToMiniKQL( + variantType, + "def Test():\n" + " return ('age', 99)\n", + [ageIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.GetVariantIndex(), ageIdx); + auto item = value.GetVariantItem(); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 99); + }); - engine.ToMiniKQL( - variantType, - "def Test():\n" - " return ('name', 'Jamel')\n", - [nameIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT_EQUAL(value.GetVariantIndex(), nameIdx); - auto item = value.GetVariantItem(); - UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "Jamel"); - }); - } + engine.ToMiniKQL( + variantType, + "def Test():\n" + " return ('name', 'Jamel')\n", + [nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.GetVariantIndex(), nameIdx); + auto item = value.GetVariantItem(); + UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "Jamel"); + }); +} - Y_UNIT_TEST(ToPyWithIndex) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TVariant<float, ui32, char*>>( - [](const TType* /*type*/, const NUdf::IValueBuilder& vb) { - return vb.NewVariant(1, NUdf::TUnboxedValuePod((ui32) 42)); - }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert value == (1, 42)\n"); - } +Y_UNIT_TEST(ToPyWithIndex) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TVariant<float, ui32, char*>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& vb) { + return vb.NewVariant(1, NUdf::TUnboxedValuePod((ui32)42)); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (1, 42)\n"); +} - Y_UNIT_TEST(ToPyWithName) { - TPythonTestEngine engine; +Y_UNIT_TEST(ToPyWithName) { + TPythonTestEngine engine; - ui32 ageIdx = 0, nameIdx = 0; - NUdf::TType* personType = engine.GetTypeBuilder().Struct()-> - AddField<ui32>("age", &ageIdx) - .AddField<NUdf::TUtf8>("name", &nameIdx) - .Build(); + ui32 ageIdx = 0, nameIdx = 0; + NUdf::TType* personType = engine.GetTypeBuilder().Struct()->AddField<ui32>("age", &ageIdx).AddField<NUdf::TUtf8>("name", &nameIdx).Build(); - NUdf::TType* variantType = engine.GetTypeBuilder() - .Variant()->Over(personType).Build(); + NUdf::TType* variantType = engine.GetTypeBuilder() + .Variant() + ->Over(personType) + .Build(); - engine.ToPython( - variantType, - [ageIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) { - return vb.NewVariant(ageIdx, NUdf::TUnboxedValuePod((ui32) 99)); - }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert value == ('age', 99)\n" - ); + engine.ToPython( + variantType, + [ageIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) { + return vb.NewVariant(ageIdx, NUdf::TUnboxedValuePod((ui32)99)); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == ('age', 99)\n"); - engine.ToPython( - variantType, - [nameIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) { - return vb.NewVariant(nameIdx, vb.NewString("Jamel")); - }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert value == ('name', 'Jamel')\n" - ); - } + engine.ToPython( + variantType, + [nameIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) { + return vb.NewVariant(nameIdx, vb.NewString("Jamel")); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == ('name', 'Jamel')\n"); } +} // Y_UNIT_TEST_SUITE(TPyVariantTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_void.cpp b/yql/essentials/udfs/common/python/bindings/py_void.cpp index ef72c052fbc..697c2f593f4 100644 --- a/yql/essentials/udfs/common/python/bindings/py_void.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_void.cpp @@ -21,80 +21,80 @@ static void VoidDealloc(PyObject*) { PyTypeObject PyVoidType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.Void"), - INIT_MEMBER(tp_basicsize , 0), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , VoidDealloc), + // clang-format off + INIT_MEMBER(tp_name, "yql.Void"), + // clang-format on + INIT_MEMBER(tp_basicsize, 0), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, VoidDealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , VoidRepr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , 0), - INIT_MEMBER(tp_doc , "yql.Void object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , nullptr), - INIT_MEMBER(tp_iternext , nullptr), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, VoidRepr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, 0), + INIT_MEMBER(tp_doc, "yql.Void object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, nullptr), + INIT_MEMBER(tp_iternext, nullptr), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; PyObject PyVoidObject = { - _PyObject_EXTRA_INIT - 1, &PyVoidType -}; + _PyObject_EXTRA_INIT 1, &PyVoidType}; TPyObjectPtr ToPyVoid( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) { Y_UNUSED(ctx); Y_UNUSED(type); @@ -103,9 +103,9 @@ TPyObjectPtr ToPyVoid( } NUdf::TUnboxedValue FromPyVoid( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, - PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + PyObject* value) { Y_UNUSED(ctx); Y_UNUSED(type); @@ -114,4 +114,4 @@ NUdf::TUnboxedValue FromPyVoid( return NUdf::TUnboxedValue::Void(); } -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_void.h b/yql/essentials/udfs/common/python/bindings/py_void.h index 3c8203ab6e8..1015a530ff0 100644 --- a/yql/essentials/udfs/common/python/bindings/py_void.h +++ b/yql/essentials/udfs/common/python/bindings/py_void.h @@ -9,13 +9,13 @@ extern PyTypeObject PyVoidType; extern PyObject PyVoidObject; TPyObjectPtr ToPyVoid( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyVoid( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - PyObject* value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* value); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp index e6e8a72768c..e300f395d23 100644 --- a/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp @@ -2,36 +2,36 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyVoidTest) { - Y_UNIT_TEST(FromPython) { - TPythonTestEngine engine; - engine.ToMiniKQL<void>( - "import yql\n" - "\n" - "def Test():\n" - " return yql.Void\n", - [](const NUdf::TUnboxedValue& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(false == value.IsBoxed()); - }); - } +Y_UNIT_TEST(FromPython) { + TPythonTestEngine engine; + engine.ToMiniKQL<void>( + "import yql\n" + "\n" + "def Test():\n" + " return yql.Void\n", + [](const NUdf::TUnboxedValue& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(false == value.IsBoxed()); + }); +} - Y_UNIT_TEST(ToPython) { - TPythonTestEngine engine; - engine.ToPython<void>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValue::Void(); - }, - "import yql\n" - "\n" - "def Test(value):\n" - " assert str(value) == 'yql.Void'\n" - " assert repr(value) == 'yql.Void'\n" - " assert isinstance(value, yql.TVoid)\n" - " assert value is yql.Void\n"); - } +Y_UNIT_TEST(ToPython) { + TPythonTestEngine engine; + engine.ToPython<void>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValue::Void(); + }, + "import yql\n" + "\n" + "def Test(value):\n" + " assert str(value) == 'yql.Void'\n" + " assert repr(value) == 'yql.Void'\n" + " assert isinstance(value, yql.TVoid)\n" + " assert value is yql.Void\n"); } +} // Y_UNIT_TEST_SUITE(TPyVoidTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp b/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp index 11ba4262173..b9a1df22ae2 100644 --- a/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp @@ -14,84 +14,84 @@ namespace NPython { static PyMethodDef ModuleMethods[] = { - { nullptr, nullptr, 0, nullptr } /* sentinel */ + {nullptr, nullptr, 0, nullptr} /* sentinel */ }; #define MODULE_NAME "yql" #if PY_MAJOR_VERSION >= 3 -#define MODULE_NAME_TYPING "yql.typing" + #define MODULE_NAME_TYPING "yql.typing" #endif #define MODULE_INITIALIZED_ATTRIBUTE "_initialized" PyDoc_STRVAR(ModuleDoc, - "This module provides YQL specific types for Python."); + "This module provides YQL specific types for Python."); #if PY_MAJOR_VERSION >= 3 PyDoc_STRVAR(ModuleDocTyping, - "This module provides annotations for YQL types for Python."); + "This module provides annotations for YQL types for Python."); #endif PyDoc_STRVAR(StopIterationException_doc, - "Can be throwed to yield stream iteration."); + "Can be throwed to yield stream iteration."); -#define PREPARE_TYPE(Name, Type) \ - do { \ - if (PyType_Ready(Type) < 0) { \ +#define PREPARE_TYPE(Name, Type) \ + do { \ + if (PyType_Ready(Type) < 0) { \ throw yexception() << "Can't prepare type: " << (Name); \ - } \ + } \ } while (0) -#define REGISTER_TYPE(Name, Type) \ - do { \ - PREPARE_TYPE(Name, Type); \ - Py_INCREF(Type); \ - if (PyModule_AddObject(module, (Name), (PyObject*) Type) < 0) { \ - throw yexception() << "Can't add type: " << (Name); \ - } \ +#define REGISTER_TYPE(Name, Type) \ + do { \ + PREPARE_TYPE(Name, Type); \ + Py_INCREF(Type); \ + if (PyModule_AddObject(module, (Name), (PyObject*)Type) < 0) { \ + throw yexception() << "Can't add type: " << (Name); \ + } \ } while (0) -#define REGISTER_OBJECT(Name, Object) \ - do { \ - if (PyDict_SetItemString(dict, (Name), (PyObject *) (Object)) < 0) \ - throw yexception() << "Can't register object: " << (Name); \ +#define REGISTER_OBJECT(Name, Object) \ + do { \ + if (PyDict_SetItemString(dict, (Name), (PyObject*)(Object)) < 0) \ + throw yexception() << "Can't register object: " << (Name); \ } while (0) -#define REGISTER_EXCEPTION(Name, Object, Doc) \ - do { \ - if (!Object) { \ - Object = PyErr_NewExceptionWithDoc((char*) MODULE_NAME "." Name, Doc, nullptr, nullptr); \ - if (!Object) { \ - throw yexception() << "Can't register exception: " << (Name); \ - } \ - REGISTER_OBJECT(Name, Object); \ - } \ +#define REGISTER_EXCEPTION(Name, Object, Doc) \ + do { \ + if (!Object) { \ + Object = PyErr_NewExceptionWithDoc((char*)MODULE_NAME "." Name, Doc, nullptr, nullptr); \ + if (!Object) { \ + throw yexception() << "Can't register exception: " << (Name); \ + } \ + REGISTER_OBJECT(Name, Object); \ + } \ } while (0) #if PY_MAJOR_VERSION >= 3 static PyModuleDef ModuleDefinition = { - PyModuleDef_HEAD_INIT, - INIT_MEMBER(m_name, MODULE_NAME), - INIT_MEMBER(m_doc, ModuleDoc), - INIT_MEMBER(m_size, -1), - INIT_MEMBER(m_methods, ModuleMethods), - INIT_MEMBER(m_slots, nullptr), - INIT_MEMBER(m_traverse, nullptr), - INIT_MEMBER(m_clear, nullptr), - INIT_MEMBER(m_free, nullptr), + PyModuleDef_HEAD_INIT, + INIT_MEMBER(m_name, MODULE_NAME), + INIT_MEMBER(m_doc, ModuleDoc), + INIT_MEMBER(m_size, -1), + INIT_MEMBER(m_methods, ModuleMethods), + INIT_MEMBER(m_slots, nullptr), + INIT_MEMBER(m_traverse, nullptr), + INIT_MEMBER(m_clear, nullptr), + INIT_MEMBER(m_free, nullptr), }; static PyModuleDef ModuleDefinitionTyping = { - PyModuleDef_HEAD_INIT, - INIT_MEMBER(m_name, MODULE_NAME_TYPING), - INIT_MEMBER(m_doc, ModuleDocTyping), - INIT_MEMBER(m_size, -1), - INIT_MEMBER(m_methods, nullptr), - INIT_MEMBER(m_slots, nullptr), - INIT_MEMBER(m_traverse, nullptr), - INIT_MEMBER(m_clear, nullptr), - INIT_MEMBER(m_free, nullptr), + PyModuleDef_HEAD_INIT, + INIT_MEMBER(m_name, MODULE_NAME_TYPING), + INIT_MEMBER(m_doc, ModuleDocTyping), + INIT_MEMBER(m_size, -1), + INIT_MEMBER(m_methods, nullptr), + INIT_MEMBER(m_slots, nullptr), + INIT_MEMBER(m_traverse, nullptr), + INIT_MEMBER(m_clear, nullptr), + INIT_MEMBER(m_free, nullptr), }; PyMODINIT_FUNC PyInit_YQL(void) // NOLINT(readability-identifier-naming) @@ -207,7 +207,7 @@ void InitYqlModule(NYql::NUdf::EPythonFlavor pythonFlavor, bool standalone) { ythrow yexception() << "Can't parse YQL type annotations module"; } - auto processError = [&] (PyObject* obj, TStringBuf message) { + auto processError = [&](PyObject* obj, TStringBuf message) { if (obj) { return; } @@ -248,4 +248,4 @@ void TermYqlModule() { PyYieldIterationException = nullptr; } -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_yql_module.h b/yql/essentials/udfs/common/python/bindings/py_yql_module.h index 970471d029e..f621175af50 100644 --- a/yql/essentials/udfs/common/python/bindings/py_yql_module.h +++ b/yql/essentials/udfs/common/python/bindings/py_yql_module.h @@ -8,4 +8,4 @@ void PrepareYqlModule(); void InitYqlModule(NYql::NUdf::EPythonFlavor pythonFlavor, bool standalone = true); void TermYqlModule(); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/typing.py b/yql/essentials/udfs/common/python/bindings/typing.py index 0e53ad1e0a4..da32d5b5de9 100644 --- a/yql/essentials/udfs/common/python/bindings/typing.py +++ b/yql/essentials/udfs/common/python/bindings/typing.py @@ -18,9 +18,7 @@ def main(): import yandex.type_info.typing as ti_typing import six except ImportError as e: - raise ImportError( - str(e) + ". Make sure that library/python/type_info is in your PEERDIR list" - ) + raise ImportError(str(e) + ". Make sure that library/python/type_info is in your PEERDIR list") from yql import typing @@ -80,10 +78,10 @@ def main(): arg_type = param.stop ti_base.validate_type(arg_type) if param.step is not None: - for x in param.step: - if x != AutoMap: - raise ValueError("Expected AutoMap as parameter flag but got: {}".format(ti_base._with_type(x))) - flags.add(x) + for x in param.step: + if x != AutoMap: + raise ValueError("Expected AutoMap as parameter flag but got: {}".format(ti_base._with_type(x))) + flags.add(x) else: ti_base.validate_type(arg_type) return (name, arg_type, flags) @@ -91,13 +89,17 @@ def main(): @six.python_2_unicode_compatible class GenericCallableAlias(ti_base.Type): def __str__(self): - return ("Callable<(" + - ",".join(_format_arg(x) for x in self.args[:len(self.args)-self.optional_args]) + - ("," if len(self.args) > self.optional_args and self.optional_args else "") + - ("[" if self.optional_args else "") + - ",".join(_format_arg(x) for x in self.args[len(self.args)-self.optional_args:]) + - ("]" if self.optional_args else "") + - ")->" + str(getattr(self, "return")) + ">") + return ( + "Callable<(" + + ",".join(_format_arg(x) for x in self.args[: len(self.args) - self.optional_args]) + + ("," if len(self.args) > self.optional_args and self.optional_args else "") + + ("[" if self.optional_args else "") + + ",".join(_format_arg(x) for x in self.args[len(self.args) - self.optional_args :]) + + ("]" if self.optional_args else "") + + ")->" + + str(getattr(self, "return")) + + ">" + ) def to_yson_type(self): yson_repr = { @@ -108,18 +110,28 @@ def main(): } return yson_repr - class GenericCallable(ti_base.Generic): def __getitem__(self, params): - if not isinstance(params, tuple) or len(params) < 2 or not isinstance(params[0], int) or not ti_typing.is_valid_type(params[1]): - raise ValueError("Expected at least two arguments (integer and type of return value) but got: {}".format(ti_base._with_type(params))) + if ( + not isinstance(params, tuple) + or len(params) < 2 + or not isinstance(params[0], int) + or not ti_typing.is_valid_type(params[1]) + ): + raise ValueError( + "Expected at least two arguments (integer and type of return value) but got: {}".format( + ti_base._with_type(params) + ) + ) args = [] for param in params[2:]: name, arg_type, flags = _extract_arg_info(param) args.append((name, arg_type, flags)) if params[0] < 0 or params[0] > len(args): - raise ValueError("Optional argument count - " + str(params[0]) + " out of range [0.." + str(len(args)) + "]") + raise ValueError( + "Optional argument count - " + str(params[0]) + " out of range [0.." + str(len(args)) + "]" + ) attrs = { "optional_args": params[0], diff --git a/yql/essentials/udfs/common/python/bindings/ya.make b/yql/essentials/udfs/common/python/bindings/ya.make index aea3e547176..29dca847ee9 100644 --- a/yql/essentials/udfs/common/python/bindings/ya.make +++ b/yql/essentials/udfs/common/python/bindings/ya.make @@ -2,6 +2,8 @@ PY23_NATIVE_LIBRARY() YQL_ABI_VERSION(2 27 0) +ENABLE(YQL_STYLE_CPP) + SRCS( py_callable.cpp py_cast.cpp diff --git a/yql/essentials/udfs/common/python/bindings/ya.make.test.inc b/yql/essentials/udfs/common/python/bindings/ya.make.test.inc index 67803ad18f4..036c72bc147 100644 --- a/yql/essentials/udfs/common/python/bindings/ya.make.test.inc +++ b/yql/essentials/udfs/common/python/bindings/ya.make.test.inc @@ -1,3 +1,5 @@ +ENABLE(YQL_STYLE_CPP) + SRCS( py_callable_ut.cpp py_cast_ut.cpp diff --git a/yql/essentials/udfs/common/python/main_py3/include/main.h b/yql/essentials/udfs/common/python/main_py3/include/main.h index c96402004e3..f8ae216a196 100644 --- a/yql/essentials/udfs/common/python/main_py3/include/main.h +++ b/yql/essentials/udfs/common/python/main_py3/include/main.h @@ -4,9 +4,7 @@ #ifdef __cplusplus extern "C" { #endif -Y_PUBLIC -int RunPython(int argc, char** argv); + Y_PUBLIC int RunPython(int argc, char** argv); #ifdef __cplusplus } #endif - diff --git a/yql/essentials/udfs/common/python/main_py3/main.cpp b/yql/essentials/udfs/common/python/main_py3/main.cpp index edc3c89ca5b..a347c2eb864 100644 --- a/yql/essentials/udfs/common/python/main_py3/main.cpp +++ b/yql/essentials/udfs/common/python/main_py3/main.cpp @@ -1,9 +1,7 @@ #include "main.h" -extern "C" -int RunPythonImpl(int argc, char** argv); +extern "C" int RunPythonImpl(int argc, char** argv); -extern "C" -int RunPython(int argc, char** argv) { +extern "C" int RunPython(int argc, char** argv) { return RunPythonImpl(argc, argv); } diff --git a/yql/essentials/udfs/common/python/main_py3/ya.make b/yql/essentials/udfs/common/python/main_py3/ya.make index 7bbb4d0f304..1b2dc5b61bd 100644 --- a/yql/essentials/udfs/common/python/main_py3/ya.make +++ b/yql/essentials/udfs/common/python/main_py3/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + USE_PYTHON3() ADDINCL( diff --git a/yql/essentials/udfs/common/python/python_udf/python_function_factory.h b/yql/essentials/udfs/common/python/python_udf/python_function_factory.h index 7d96f67a083..6f81817f609 100644 --- a/yql/essentials/udfs/common/python/python_udf/python_function_factory.h +++ b/yql/essentials/udfs/common/python/python_udf/python_function_factory.h @@ -25,15 +25,14 @@ using namespace NPython; ////////////////////////////////////////////////////////////////////////////// // TPythonFunctionFactory ////////////////////////////////////////////////////////////////////////////// -class TPythonFunctionFactory: public TBoxedValue -{ +class TPythonFunctionFactory: public TBoxedValue { public: TPythonFunctionFactory( - const TStringRef& name, - const TStringRef& tag, - const TType* functionType, - ITypeInfoHelper::TPtr&& helper, - const NYql::NUdf::TSourcePosition& pos) + const TStringRef& name, + const TStringRef& tag, + const TType* functionType, + ITypeInfoHelper::TPtr&& helper, + const NYql::NUdf::TSourcePosition& pos) : Ctx_(new TPyContext(helper, tag, pos)) , FunctionName_(name) , FunctionType_(functionType) @@ -47,9 +46,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { TPyCastContext::TPtr castCtx = MakeIntrusive<TPyCastContext>(valueBuilder, Ctx_); // for get propper c-compatible null-terminating string @@ -74,7 +72,8 @@ private: SetupCallableSettings(castCtx, function.Get()); } catch (const yexception& e) { UdfTerminate((TStringBuilder() << Ctx_->Pos << "Failed to setup callable settings: " - << e.what()).c_str()); + << e.what()) + .c_str()); } return FromPyCallable(castCtx, FunctionType_, function.Release()); } @@ -92,7 +91,7 @@ private: cflags.cf_flags = PyCF_SOURCE_IS_UTF8; code.ResetSteal(Py_CompileStringFlags( - source.data(), filename.data(), Py_file_input, &cflags)); + source.data(), filename.data(), Py_file_input, &cflags)); } if (code) { diff --git a/yql/essentials/udfs/common/python/python_udf/python_udf.cpp b/yql/essentials/udfs/common/python/python_udf/python_udf.cpp index a14d9d81c32..fd0743962ba 100644 --- a/yql/essentials/udfs/common/python/python_udf/python_udf.cpp +++ b/yql/essentials/udfs/common/python/python_udf/python_udf.cpp @@ -10,9 +10,9 @@ namespace { #if PY_MAJOR_VERSION >= 3 -#define PYTHON_PROGRAMM_NAME L"YQL::Python3" + #define PYTHON_PROGRAMM_NAME L"YQL::Python3" #else -#define PYTHON_PROGRAMM_NAME "YQL::Python2" + #define PYTHON_PROGRAMM_NAME "YQL::Python2" #endif int AddToPythonPath(const TVector<TStringBuf>& pathVals) @@ -20,9 +20,11 @@ int AddToPythonPath(const TVector<TStringBuf>& pathVals) char pathVar[] = "path"; // PySys_{Get,Set}Object take a non-const char* arg TPyObjectPtr sysPath(PySys_GetObject(pathVar), TPyObjectPtr::ADD_REF); - if (!sysPath) return -1; + if (!sysPath) { + return -1; + } - for (const auto& val: pathVals) { + for (const auto& val : pathVals) { TPyObjectPtr pyStr = PyRepr(val.data()); int rc = PyList_Append(sysPath.Get(), pyStr.Get()); if (rc != 0) { @@ -45,11 +47,11 @@ void InitArcadiaPythonRuntime() ////////////////////////////////////////////////////////////////////////////// // TPythonModule ////////////////////////////////////////////////////////////////////////////// -class TPythonModule: public IUdfModule -{ +class TPythonModule: public IUdfModule { public: TPythonModule(const TString& resourceName, EPythonFlavor pythonFlavor, bool standalone = true) - : ResourceName_(resourceName), Standalone_(standalone) + : ResourceName_(resourceName) + , Standalone_(standalone) { if (Standalone_) { Py_SetProgramName(PYTHON_PROGRAMM_NAME); @@ -99,15 +101,15 @@ public: PyCleanup(); } - void GetAllFunctions(IFunctionsSink&) const final {} + void GetAllFunctions(IFunctionsSink&) const final { + } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { Y_UNUSED(typeConfig); if (flags & TFlags::TypesOnly) { @@ -137,44 +139,43 @@ private: // TStubModule ////////////////////////////////////////////////////////////////////////////// class TStubModule: public IUdfModule { - void GetAllFunctions(IFunctionsSink&) const final {} + void GetAllFunctions(IFunctionsSink&) const final { + } void BuildFunctionTypeInfo( - const TStringRef& /*name*/, - TType* /*userType*/, - const TStringRef& /*typeConfig*/, - ui32 flags, - IFunctionTypeInfoBuilder& /*builder*/) const final - { + const TStringRef& /*name*/, + TType* /*userType*/, + const TStringRef& /*typeConfig*/, + ui32 flags, + IFunctionTypeInfoBuilder& /*builder*/) const final { Y_DEBUG_ABORT_UNLESS(flags & TFlags::TypesOnly, - "in stub module this function can be called only for types loading"); + "in stub module this function can be called only for types loading"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } }; } // namespace void NKikimr::NUdf::RegisterYqlPythonUdf( - IRegistrator& registrator, - ui32 flags, - TStringBuf moduleName, - TStringBuf resourceName, - EPythonFlavor pythonFlavor) + IRegistrator& registrator, + ui32 flags, + TStringBuf moduleName, + TStringBuf resourceName, + EPythonFlavor pythonFlavor) { if (flags & IRegistrator::TFlags::TypesOnly) { registrator.AddModule(moduleName, new TStubModule); } else { registrator.AddModule( moduleName, - NKikimr::NUdf::GetYqlPythonUdfModule(resourceName, pythonFlavor, true) - ); + NKikimr::NUdf::GetYqlPythonUdfModule(resourceName, pythonFlavor, true)); } } TUniquePtr<NKikimr::NUdf::IUdfModule> NKikimr::NUdf::GetYqlPythonUdfModule( TStringBuf resourceName, NKikimr::NUdf::EPythonFlavor pythonFlavor, - bool standalone -) { + bool standalone) { return new TPythonModule(TString(resourceName), pythonFlavor, standalone); } diff --git a/yql/essentials/udfs/common/python/python_udf/python_udf.h b/yql/essentials/udfs/common/python/python_udf/python_udf.h index 83b3bb86e6f..ff8f6e3b07a 100644 --- a/yql/essentials/udfs/common/python/python_udf/python_udf.h +++ b/yql/essentials/udfs/common/python/python_udf/python_udf.h @@ -6,7 +6,7 @@ namespace NYql { namespace NUdf { inline constexpr char STANDART_STREAM_PROXY_INJECTION_SCRIPT[] = -R"( + R"( # numpy on import may find installed openblas library and load it, # which in turn causes it to start CPUCOUNT threads # with approx. 40Mb memory reserved for each thread; @@ -66,11 +66,11 @@ enum class EPythonFlavor { }; void RegisterYqlPythonUdf( - IRegistrator& registrator, - ui32 flags, - TStringBuf moduleName, - TStringBuf resourceName, - EPythonFlavor pythonFlavor); + IRegistrator& registrator, + ui32 flags, + TStringBuf moduleName, + TStringBuf resourceName, + EPythonFlavor pythonFlavor); TUniquePtr<IUdfModule> GetYqlPythonUdfModule( TStringBuf resourceName, diff --git a/yql/essentials/udfs/common/python/python_udf/ya.make b/yql/essentials/udfs/common/python/python_udf/ya.make index 9a2090665a2..124f075c904 100644 --- a/yql/essentials/udfs/common/python/python_udf/ya.make +++ b/yql/essentials/udfs/common/python/python_udf/ya.make @@ -2,6 +2,8 @@ PY23_NATIVE_LIBRARY() YQL_ABI_VERSION(2 27 0) +ENABLE(YQL_STYLE_CPP) + SRCS( python_udf.cpp ) diff --git a/yql/essentials/udfs/common/re2/re2_udf.cpp b/yql/essentials/udfs/common/re2/re2_udf.cpp index 2f1f6dbb529..b13d975cf35 100644 --- a/yql/essentials/udfs/common/re2/re2_udf.cpp +++ b/yql/essentials/udfs/common/re2/re2_udf.cpp @@ -15,14 +15,14 @@ using namespace NUdf; namespace { - template <typename T> - T Id(T x) { - return x; - } +template <typename T> +T Id(T x) { + return x; +} - re2::RE2::Options::Encoding EncodingFromBool(bool x) { - return x ? re2::RE2::Options::Encoding::EncodingUTF8 : re2::RE2::Options::Encoding::EncodingLatin1; - } +re2::RE2::Options::Encoding EncodingFromBool(bool x) { + return x ? re2::RE2::Options::Encoding::EncodingUTF8 : re2::RE2::Options::Encoding::EncodingLatin1; +} #define OPTIONS_MAP(xx) \ xx(Utf8, 0, bool, true, set_encoding, EncodingFromBool) \ @@ -39,309 +39,310 @@ namespace { xx(WordBoundary, 11, bool, false, set_word_boundary, Id) \ xx(OneLine, 12, bool, false, set_one_line, Id) - ui64 GetFailProbability() { - auto envResult = TryGetEnv("YQL_RE2_REGEXP_PROBABILITY_FAIL"); - if (!envResult) { - return 0; - } - ui64 result; - bool isValid = TryIntFromString<10, ui64>(envResult->data(), envResult->size(), result); - Y_ENSURE(isValid, TStringBuilder() << "Error while parsing YQL_RE2_REGEXP_PROBABILITY_FAIL. Actual value is: " << *envResult); - return result; +ui64 GetFailProbability() { + auto envResult = TryGetEnv("YQL_RE2_REGEXP_PROBABILITY_FAIL"); + if (!envResult) { + return 0; } + ui64 result; + bool isValid = TryIntFromString<10, ui64>(envResult->data(), envResult->size(), result); + Y_ENSURE(isValid, TStringBuilder() << "Error while parsing YQL_RE2_REGEXP_PROBABILITY_FAIL. Actual value is: " << *envResult); + return result; +} - bool ShouldFailOnInvalidRegexp(const std::string_view regexp, NYql::TLangVersion currentLangVersion) { - if (currentLangVersion >= NYql::MakeLangVersion(2025, 3)) { - return true; - } - THashType hash = GetStringHash(regexp) % 100; - static ui64 failProbability = GetFailProbability(); - return hash < failProbability; +bool ShouldFailOnInvalidRegexp(const std::string_view regexp, NYql::TLangVersion currentLangVersion) { + if (currentLangVersion >= NYql::MakeLangVersion(2025, 3)) { + return true; } + THashType hash = GetStringHash(regexp) % 100; + static ui64 failProbability = GetFailProbability(); + return hash < failProbability; +} - RE2::Options CreateDefaultOptions(){ - RE2::Options options; +RE2::Options CreateDefaultOptions() { + RE2::Options options; #define FIELD_HANDLE(name, index, type, defVal, setter, conv) options.setter(conv(defVal)); - OPTIONS_MAP(FIELD_HANDLE) + OPTIONS_MAP(FIELD_HANDLE) #undef FIELD_HANDLE - options.set_log_errors(false); - return options; - } + options.set_log_errors(false); + return options; +} - TString FormatRegexpError(const RE2& Regexp) { - return TStringBuilder() << "Regexp compilation failed. Regexp: \"" << Regexp.pattern() << "\". Original error is: \"" << Regexp.error() << "\""; - } +TString FormatRegexpError(const RE2& Regexp) { + return TStringBuilder() << "Regexp compilation failed. Regexp: \"" << Regexp.pattern() << "\". Original error is: \"" << Regexp.error() << "\""; +} - enum EOptionsField: ui32 { - OPTIONS_MAP(ENUM_VALUE_GEN) - Count - }; +enum EOptionsField: ui32 { + OPTIONS_MAP(ENUM_VALUE_GEN) + Count +}; - struct TOptionsSchema { - TType* StructType; - ui32 Indices[EOptionsField::Count]; - }; +struct TOptionsSchema { + TType* StructType; + ui32 Indices[EOptionsField::Count]; +}; - RE2::Options ExtractOptions(std::string_view pattern, TUnboxedValuePod optionsValue, const TOptionsSchema& schema, bool posix) { - RE2::Options options = CreateDefaultOptions(); +RE2::Options ExtractOptions(std::string_view pattern, TUnboxedValuePod optionsValue, const TOptionsSchema& schema, bool posix) { + RE2::Options options = CreateDefaultOptions(); - options.set_posix_syntax(posix); - bool needUtf8 = (UTF8Detect(pattern) == UTF8); - options.set_encoding( - needUtf8 - ? RE2::Options::Encoding::EncodingUTF8 - : RE2::Options::Encoding::EncodingLatin1); - if (optionsValue) { + options.set_posix_syntax(posix); + bool needUtf8 = (UTF8Detect(pattern) == UTF8); + options.set_encoding( + needUtf8 + ? RE2::Options::Encoding::EncodingUTF8 + : RE2::Options::Encoding::EncodingLatin1); + if (optionsValue) { #define FIELD_HANDLE(name, index, type, defVal, setter, conv) options.setter(conv(optionsValue.GetElement(schema.Indices[index]).Get<type>())); - OPTIONS_MAP(FIELD_HANDLE) + OPTIONS_MAP(FIELD_HANDLE) #undef FIELD_HANDLE - options.set_log_errors(false); - } - return options; + options.set_log_errors(false); } + return options; +} + +struct TRegexpGroups { + TVector<TString> Names; + TVector<ui32> Indexes; +}; - struct TRegexpGroups { - TVector<TString> Names; - TVector<ui32> Indexes; +class TRe2Udf: public TBoxedValue { +public: + enum EMode { + MATCH, + GREP, + CAPTURE, + REPLACE, + COUNT, + FIND_AND_CONSUME, }; - class TRe2Udf: public TBoxedValue { + template <bool posix> + class TFactory: public TBoxedValue { public: - enum EMode { - MATCH, - GREP, - CAPTURE, - REPLACE, - COUNT, - FIND_AND_CONSUME, - }; - - template <bool posix> - class TFactory: public TBoxedValue { - public: - TFactory( - EMode mode, - const TOptionsSchema& optionsSchema, - TSourcePosition pos, - NYql::TLangVersion currentlangVersion, - const TRegexpGroups& regexpGroups = TRegexpGroups()) - : Mode_(mode) - , OptionsSchema_(optionsSchema) - , Pos_(pos) - , RegexpGroups_(regexpGroups) - , CurrentLangVersion_(currentlangVersion) - { - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - return TUnboxedValuePod( - new TRe2Udf( - valueBuilder, - args[0], - RegexpGroups_, - Mode_, - posix, - OptionsSchema_, - Pos_, - CurrentLangVersion_)); - } - - EMode Mode_; - const TOptionsSchema OptionsSchema_; - TSourcePosition Pos_; - const TRegexpGroups RegexpGroups_; - NYql::TLangVersion CurrentLangVersion_; - }; - - static const TStringRef& Name(EMode mode) { - static auto match = TStringRef::Of("Match"); - static auto grep = TStringRef::Of("Grep"); - static auto capture = TStringRef::Of("Capture"); - static auto replace = TStringRef::Of("Replace"); - static auto count = TStringRef::Of("Count"); - static auto findAndconsume = TStringRef::Of("FindAndConsume"); - - switch (mode) { - case EMode::MATCH: - return match; - case EMode::GREP: - return grep; - case EMode::CAPTURE: - return capture; - case EMode::REPLACE: - return replace; - case EMode::COUNT: - return count; - case EMode::FIND_AND_CONSUME: - return findAndconsume; - } - Y_ABORT("Unexpected mode"); - } - - TRe2Udf( - const IValueBuilder*, - const TUnboxedValuePod& runConfig, - const TRegexpGroups regexpGroups, + TFactory( EMode mode, - bool posix, const TOptionsSchema& optionsSchema, TSourcePosition pos, - NYql::TLangVersion currentLangVersion) - : RegexpGroups_(regexpGroups) - , Mode_(mode) - , Captured_() + NYql::TLangVersion currentlangVersion, + const TRegexpGroups& regexpGroups = TRegexpGroups()) + : Mode_(mode) , OptionsSchema_(optionsSchema) , Pos_(pos) - , CurrentLangVersion_(currentLangVersion) { - try { - auto patternValue = runConfig.GetElement(0); - auto optionsValue = runConfig.GetElement(1); - const std::string_view pattern(patternValue.AsStringRef()); + , RegexpGroups_(regexpGroups) + , CurrentLangVersion_(currentlangVersion) + { + } - RE2::Options options = ExtractOptions(pattern, optionsValue, OptionsSchema_, posix); - Regexp_ = std::make_unique<RE2>(StringPiece(pattern.data(), pattern.size()), options); + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + return TUnboxedValuePod( + new TRe2Udf( + valueBuilder, + args[0], + RegexpGroups_, + Mode_, + posix, + OptionsSchema_, + Pos_, + CurrentLangVersion_)); + } - if (!Regexp_->ok() && ShouldFailOnInvalidRegexp(pattern, CurrentLangVersion_)) { - throw yexception() << FormatRegexpError(*Regexp_); - } + EMode Mode_; + const TOptionsSchema OptionsSchema_; + TSourcePosition Pos_; + const TRegexpGroups RegexpGroups_; + NYql::TLangVersion CurrentLangVersion_; + }; - if (mode == EMode::CAPTURE) { - Captured_ = std::make_unique<StringPiece[]>(Regexp_->NumberOfCapturingGroups() + 1); - } + static const TStringRef& Name(EMode mode) { + static auto match = TStringRef::Of("Match"); + static auto grep = TStringRef::Of("Grep"); + static auto capture = TStringRef::Of("Capture"); + static auto replace = TStringRef::Of("Replace"); + static auto count = TStringRef::Of("Count"); + static auto findAndconsume = TStringRef::Of("FindAndConsume"); + + switch (mode) { + case EMode::MATCH: + return match; + case EMode::GREP: + return grep; + case EMode::CAPTURE: + return capture; + case EMode::REPLACE: + return replace; + case EMode::COUNT: + return count; + case EMode::FIND_AND_CONSUME: + return findAndconsume; + } + Y_ABORT("Unexpected mode"); + } - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + TRe2Udf( + const IValueBuilder*, + const TUnboxedValuePod& runConfig, + const TRegexpGroups regexpGroups, + EMode mode, + bool posix, + const TOptionsSchema& optionsSchema, + TSourcePosition pos, + NYql::TLangVersion currentLangVersion) + : RegexpGroups_(regexpGroups) + , Mode_(mode) + , Captured_() + , OptionsSchema_(optionsSchema) + , Pos_(pos) + , CurrentLangVersion_(currentLangVersion) + { + try { + auto patternValue = runConfig.GetElement(0); + auto optionsValue = runConfig.GetElement(1); + const std::string_view pattern(patternValue.AsStringRef()); + + RE2::Options options = ExtractOptions(pattern, optionsValue, OptionsSchema_, posix); + Regexp_ = std::make_unique<RE2>(StringPiece(pattern.data(), pattern.size()), options); + + if (!Regexp_->ok() && ShouldFailOnInvalidRegexp(pattern, CurrentLangVersion_)) { + throw yexception() << FormatRegexpError(*Regexp_); } + + if (mode == EMode::CAPTURE) { + Captured_ = std::make_unique<StringPiece[]>(Regexp_->NumberOfCapturingGroups() + 1); + } + + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try { - RE2::Anchor anchor = RE2::UNANCHORED; - if (args[0]) { - const std::string_view input(args[0].AsStringRef()); - const StringPiece piece(input.data(), input.size()); +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + RE2::Anchor anchor = RE2::UNANCHORED; + if (args[0]) { + const std::string_view input(args[0].AsStringRef()); + const StringPiece piece(input.data(), input.size()); - switch (Mode_) { - case MATCH: - anchor = RE2::ANCHOR_BOTH; - [[fallthrough]]; - case GREP: - return TUnboxedValuePod(Regexp_->Match(piece, 0, input.size(), anchor, nullptr, 0)); - case CAPTURE: { - const int count = Regexp_->NumberOfCapturingGroups() + 1; - TUnboxedValue* items = nullptr; - const auto result = valueBuilder->NewArray(RegexpGroups_.Names.size(), items); - if (Regexp_->Match(piece, 0, input.size(), anchor, Captured_.get(), count)) { - for (int i = 0; i < count; ++i) { - if (!Captured_[i].empty()) { - items[RegexpGroups_.Indexes[i]] = valueBuilder->SubString(args[0], std::distance(piece.begin(), Captured_[i].begin()), Captured_[i].size()); - } + switch (Mode_) { + case MATCH: + anchor = RE2::ANCHOR_BOTH; + [[fallthrough]]; + case GREP: + return TUnboxedValuePod(Regexp_->Match(piece, 0, input.size(), anchor, nullptr, 0)); + case CAPTURE: { + const int count = Regexp_->NumberOfCapturingGroups() + 1; + TUnboxedValue* items = nullptr; + const auto result = valueBuilder->NewArray(RegexpGroups_.Names.size(), items); + if (Regexp_->Match(piece, 0, input.size(), anchor, Captured_.get(), count)) { + for (int i = 0; i < count; ++i) { + if (!Captured_[i].empty()) { + items[RegexpGroups_.Indexes[i]] = valueBuilder->SubString(args[0], std::distance(piece.begin(), Captured_[i].begin()), Captured_[i].size()); } - } else { - return BuildEmptyStruct(valueBuilder); - } - return result; - } - case REPLACE: { - const std::string_view rewriteRef(args[1].AsStringRef()); - const StringPiece rewrite(rewriteRef.data(), rewriteRef.size()); - TString rewriteError; - if (!Regexp_->CheckRewriteString(rewrite, &rewriteError)) { - UdfTerminate((TStringBuilder() << Pos_ << " [rewrite error] " << rewriteError).c_str()); } - std::string result(input); - RE2::GlobalReplace(&result, *Regexp_, rewrite); - return input == result ? TUnboxedValue(args[0]) : valueBuilder->NewString(result); + } else { + return BuildEmptyStruct(valueBuilder); } - case COUNT: { - std::string inputHolder(input); - const ui32 result = RE2::GlobalReplace(&inputHolder, *Regexp_, ""); - return TUnboxedValuePod(result); + return result; + } + case REPLACE: { + const std::string_view rewriteRef(args[1].AsStringRef()); + const StringPiece rewrite(rewriteRef.data(), rewriteRef.size()); + TString rewriteError; + if (!Regexp_->CheckRewriteString(rewrite, &rewriteError)) { + UdfTerminate((TStringBuilder() << Pos_ << " [rewrite error] " << rewriteError).c_str()); } - case FIND_AND_CONSUME: { - StringPiece text(piece); - std::vector<TUnboxedValue> matches; - for (StringPiece w; text.begin() < text.end() && RE2::FindAndConsume(&text, *Regexp_, &w);) { - if (w.size() == 0 && !text.empty()) { - text.remove_prefix(1); - } - matches.emplace_back(valueBuilder->SubString(args[0], std::distance(piece.begin(), w.begin()), w.size())); + std::string result(input); + RE2::GlobalReplace(&result, *Regexp_, rewrite); + return input == result ? TUnboxedValue(args[0]) : valueBuilder->NewString(result); + } + case COUNT: { + std::string inputHolder(input); + const ui32 result = RE2::GlobalReplace(&inputHolder, *Regexp_, ""); + return TUnboxedValuePod(result); + } + case FIND_AND_CONSUME: { + StringPiece text(piece); + std::vector<TUnboxedValue> matches; + for (StringPiece w; text.begin() < text.end() && RE2::FindAndConsume(&text, *Regexp_, &w);) { + if (w.size() == 0 && !text.empty()) { + text.remove_prefix(1); } - return valueBuilder->NewList(matches.data(), matches.size()); + matches.emplace_back(valueBuilder->SubString(args[0], std::distance(piece.begin(), w.begin()), w.size())); } + return valueBuilder->NewList(matches.data(), matches.size()); } - Y_ABORT("Unexpected mode"); - } else { - switch (Mode_) { - case MATCH: - case GREP: - return TUnboxedValuePod(false); - case CAPTURE: - return BuildEmptyStruct(valueBuilder); - case REPLACE: - return TUnboxedValuePod(); - case COUNT: - return TUnboxedValuePod::Zero(); - case FIND_AND_CONSUME: - return valueBuilder->NewEmptyList(); - } - Y_ABORT("Unexpected mode"); } - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + Y_ABORT("Unexpected mode"); + } else { + switch (Mode_) { + case MATCH: + case GREP: + return TUnboxedValuePod(false); + case CAPTURE: + return BuildEmptyStruct(valueBuilder); + case REPLACE: + return TUnboxedValuePod(); + case COUNT: + return TUnboxedValuePod::Zero(); + case FIND_AND_CONSUME: + return valueBuilder->NewEmptyList(); + } + Y_ABORT("Unexpected mode"); } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } - std::unique_ptr<RE2> Regexp_; - const TRegexpGroups RegexpGroups_; - EMode Mode_; - std::unique_ptr<StringPiece[]> Captured_; - const TOptionsSchema OptionsSchema_; - TSourcePosition Pos_; - NYql::TLangVersion CurrentLangVersion_; - - TUnboxedValue BuildEmptyStruct(const IValueBuilder* valueBuilder) const { - TUnboxedValue* items = nullptr; - return valueBuilder->NewArray(RegexpGroups_.Names.size(), items); - } - }; + std::unique_ptr<RE2> Regexp_; + const TRegexpGroups RegexpGroups_; + EMode Mode_; + std::unique_ptr<StringPiece[]> Captured_; + const TOptionsSchema OptionsSchema_; + TSourcePosition Pos_; + NYql::TLangVersion CurrentLangVersion_; - SIMPLE_STRICT_UDF(TEscape, char*(char*)) { - const std::string_view input(args[0].AsStringRef()); - const auto& result = RE2::QuoteMeta(StringPiece(input.data(), input.size())); - return input == result ? TUnboxedValue(args[0]) : valueBuilder->NewString(result); + TUnboxedValue BuildEmptyStruct(const IValueBuilder* valueBuilder) const { + TUnboxedValue* items = nullptr; + return valueBuilder->NewArray(RegexpGroups_.Names.size(), items); } +}; - TOptionsSchema MakeOptionsSchema(::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder) { - TOptionsSchema ret; - auto structBuilder = builder.Struct(EOptionsField::Count); +SIMPLE_STRICT_UDF(TEscape, char*(char*)) { + const std::string_view input(args[0].AsStringRef()); + const auto& result = RE2::QuoteMeta(StringPiece(input.data(), input.size())); + return input == result ? TUnboxedValue(args[0]) : valueBuilder->NewString(result); +} + +TOptionsSchema MakeOptionsSchema(::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder) { + TOptionsSchema ret; + auto structBuilder = builder.Struct(EOptionsField::Count); #define FIELD_HANDLE(name, index, type, ...) structBuilder->AddField<type>(TStringRef::Of(#name), &ret.Indices[index]); - OPTIONS_MAP(FIELD_HANDLE) + OPTIONS_MAP(FIELD_HANDLE) #undef FIELD_HANDLE - ret.StructType = structBuilder->Build(); - return ret; - } + ret.StructType = structBuilder->Build(); + return ret; +} - class TOptions: public TBoxedValue { - private: - const TOptionsSchema Schema_; +class TOptions: public TBoxedValue { +private: + const TOptionsSchema Schema_; - public: - TOptions(const TOptionsSchema& schema) - : Schema_(schema) - { - } +public: + TOptions(const TOptionsSchema& schema) + : Schema_(schema) + { + } - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - TUnboxedValue* items = nullptr; - const auto result = valueBuilder->NewArray(EOptionsField::Count, items); + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* items = nullptr; + const auto result = valueBuilder->NewArray(EOptionsField::Count, items); #define FIELD_HANDLE(name, index, type, defVal, ...) \ { \ auto structIndex = Schema_.Indices[index]; \ @@ -352,294 +353,295 @@ namespace { } \ } - OPTIONS_MAP(FIELD_HANDLE) + OPTIONS_MAP(FIELD_HANDLE) #undef FIELD_HANDLE - return result; - } + return result; + } - static const ::NKikimr::NUdf::TStringRef& Name() { - static auto name = ::NKikimr::NUdf::TStringRef::Of("Options"); - return name; - } + static const ::NKikimr::NUdf::TStringRef& Name() { + static auto name = ::NKikimr::NUdf::TStringRef::Of("Options"); + return name; + } - static bool DeclareSignature( - const ::NKikimr::NUdf::TStringRef& name, - ::NKikimr::NUdf::TType* userType, - ::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.IsStrict(); + static bool DeclareSignature( + const ::NKikimr::NUdf::TStringRef& name, + ::NKikimr::NUdf::TType* userType, + ::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.IsStrict(); - auto argsBuilder = builder.Args(); + auto argsBuilder = builder.Args(); #define FIELD_HANDLE(name, index, type, ...) argsBuilder->Add<TOptional<type>>().Name(TStringRef::Of(#name)); - OPTIONS_MAP(FIELD_HANDLE) + OPTIONS_MAP(FIELD_HANDLE) #undef FIELD_HANDLE - auto optionsSchema = MakeOptionsSchema(builder); - builder.Returns(optionsSchema.StructType); - builder.OptionalArgs(EOptionsField::Count); - if (!typesOnly) { - builder.Implementation(new TOptions(optionsSchema)); - } - - return true; - } else { - return false; + auto optionsSchema = MakeOptionsSchema(builder); + builder.Returns(optionsSchema.StructType); + builder.OptionalArgs(EOptionsField::Count); + if (!typesOnly) { + builder.Implementation(new TOptions(optionsSchema)); } - } - }; - template <bool posix> - class TIsValidRegexp: public TBoxedValue { - public: - TIsValidRegexp(const TOptionsSchema optionsSchema) - : OptionsSchema_(std::move(optionsSchema)) - { + return true; + } else { + return false; } + } +}; - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - Y_UNUSED(valueBuilder); - if (!args[0]) { - return TUnboxedValuePod(false); - } - RE2::Options options = ExtractOptions(args[0].AsStringRef(), args[1], OptionsSchema_, posix); - RE2 regexp(args[0].AsStringRef(), options); - return TUnboxedValuePod(regexp.ok()); - } +template <bool posix> +class TIsValidRegexp: public TBoxedValue { +public: + TIsValidRegexp(const TOptionsSchema optionsSchema) + : OptionsSchema_(std::move(optionsSchema)) + { + } - static const ::NKikimr::NUdf::TStringRef& Name() { - static auto name = ::NKikimr::NUdf::TStringRef::Of("IsValidRegexp"); - return name; + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + Y_UNUSED(valueBuilder); + if (!args[0]) { + return TUnboxedValuePod(false); } + RE2::Options options = ExtractOptions(args[0].AsStringRef(), args[1], OptionsSchema_, posix); + RE2 regexp(args[0].AsStringRef(), options); + return TUnboxedValuePod(regexp.ok()); + } - static bool DeclareSignature( - const ::NKikimr::NUdf::TStringRef& name, - ::NKikimr::NUdf::TType* userType, - ::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - TOptionsSchema optionsSchema = MakeOptionsSchema(builder); - auto optOptionsStructType = builder.Optional()->Item(optionsSchema.StructType).Build(); - builder.Args() - ->Add(builder.Optional()->Item(builder.SimpleType<char*>())) - .Add(optOptionsStructType) - .Done() - .Returns(builder.SimpleType<bool>()); + static const ::NKikimr::NUdf::TStringRef& Name() { + static auto name = ::NKikimr::NUdf::TStringRef::Of("IsValidRegexp"); + return name; + } - builder.OptionalArgs(1); - if (!typesOnly) { - builder.Implementation(new TIsValidRegexp(std::move(optionsSchema))); - } - builder.IsStrict(); - return true; - } else { - return false; + static bool DeclareSignature( + const ::NKikimr::NUdf::TStringRef& name, + ::NKikimr::NUdf::TType* userType, + ::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + TOptionsSchema optionsSchema = MakeOptionsSchema(builder); + auto optOptionsStructType = builder.Optional()->Item(optionsSchema.StructType).Build(); + builder.Args() + ->Add(builder.Optional()->Item(builder.SimpleType<char*>())) + .Add(optOptionsStructType) + .Done() + .Returns(builder.SimpleType<bool>()); + + builder.OptionalArgs(1); + if (!typesOnly) { + builder.Implementation(new TIsValidRegexp(std::move(optionsSchema))); } + builder.IsStrict(); + return true; + } else { + return false; } + } - private: - const TOptionsSchema OptionsSchema_; - }; +private: + const TOptionsSchema OptionsSchema_; +}; - SIMPLE_UDF_WITH_OPTIONAL_ARGS(TPatternFromLike, char*(char*, TOptional<char*>), 1) { - const std::string_view input(args[0].AsStringRef()); - const bool hasEscape = bool(args[1]); - char escape = 0; - if (hasEscape) { - const std::string_view escapeRef(args[1].AsStringRef()); - if (escapeRef.size() != 1U) { - UdfTerminate((TStringBuilder() << GetPos() << " Escape should be single character").c_str()); - } - escape = escapeRef.front(); +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TPatternFromLike, char*(char*, TOptional<char*>), 1) { + const std::string_view input(args[0].AsStringRef()); + const bool hasEscape = bool(args[1]); + char escape = 0; + if (hasEscape) { + const std::string_view escapeRef(args[1].AsStringRef()); + if (escapeRef.size() != 1U) { + UdfTerminate((TStringBuilder() << GetPos() << " Escape should be single character").c_str()); } - const TString escaped(RE2::QuoteMeta(StringPiece(input.data(), input.size()))); + escape = escapeRef.front(); + } + const TString escaped(RE2::QuoteMeta(StringPiece(input.data(), input.size()))); - TStringBuilder result; - result << "(?s)"; - bool slash = false; - bool escapeOn = false; + TStringBuilder result; + result << "(?s)"; + bool slash = false; + bool escapeOn = false; - for (const char& c : escaped) { - switch (c) { - case '\\': - if (slash) { - result << "\\\\"; - } - slash = !slash; - break; - case '%': - if (escapeOn) { - result << "\\%"; - escapeOn = false; - } else { - result << ".*"; - } - slash = false; - break; - case '_': + for (const char& c : escaped) { + switch (c) { + case '\\': + if (slash) { + result << "\\\\"; + } + slash = !slash; + break; + case '%': + if (escapeOn) { + result << "\\%"; + escapeOn = false; + } else { + result << ".*"; + } + slash = false; + break; + case '_': + if (escapeOn) { + result << "\\_"; + escapeOn = false; + } else { + result << '.'; + } + slash = false; + break; + default: + if (hasEscape && c == escape) { if (escapeOn) { - result << "\\_"; - escapeOn = false; - } else { - result << '.'; + result << RE2::QuoteMeta(StringPiece(&c, 1)); } - slash = false; - break; - default: - if (hasEscape && c == escape) { - if (escapeOn) { - result << RE2::QuoteMeta(StringPiece(&c, 1)); - } - escapeOn = !escapeOn; - } else { - if (slash) - result << '\\'; - result << c; - escapeOn = false; + escapeOn = !escapeOn; + } else { + if (slash) { + result << '\\'; } - slash = false; - break; - } + result << c; + escapeOn = false; + } + slash = false; + break; } - return valueBuilder->NewString(result); } + return valueBuilder->NewString(result); +} - TType* MakeRunConfigType(IFunctionTypeInfoBuilder& builder, TType* optOptionsStructType) { - return builder.Tuple()->Add<char*>().Add(optOptionsStructType).Build(); - } +TType* MakeRunConfigType(IFunctionTypeInfoBuilder& builder, TType* optOptionsStructType) { + return builder.Tuple()->Add<char*>().Add(optOptionsStructType).Build(); +} - template <bool posix> - class TRe2Module: public IUdfModule { - public: - TStringRef Name() const { - return posix ? TStringRef::Of("Re2posix") : TStringRef::Of("Re2"); - } +template <bool posix> +class TRe2Module: public IUdfModule { +public: + TStringRef Name() const { + return posix ? TStringRef::Of("Re2posix") : TStringRef::Of("Re2"); + } - void CleanupOnTerminate() const final { - } + void CleanupOnTerminate() const final { + } - void GetAllFunctions(IFunctionsSink& sink) const final { - sink.Add(TRe2Udf::Name(TRe2Udf::EMode::MATCH)); - sink.Add(TRe2Udf::Name(TRe2Udf::EMode::GREP)); - sink.Add(TRe2Udf::Name(TRe2Udf::EMode::CAPTURE))->SetTypeAwareness(); - sink.Add(TRe2Udf::Name(TRe2Udf::EMode::REPLACE)); - sink.Add(TRe2Udf::Name(TRe2Udf::EMode::COUNT)); - sink.Add(TRe2Udf::Name(TRe2Udf::EMode::FIND_AND_CONSUME)); - sink.Add(TEscape::Name()); - sink.Add(TPatternFromLike::Name()); - sink.Add(TOptions::Name()); - sink.Add(TIsValidRegexp<posix>::Name()); - } + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::MATCH)); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::GREP)); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::CAPTURE))->SetTypeAwareness(); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::REPLACE)); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::COUNT)); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::FIND_AND_CONSUME)); + sink.Add(TEscape::Name()); + sink.Add(TPatternFromLike::Name()); + sink.Add(TOptions::Name()); + sink.Add(TIsValidRegexp<posix>::Name()); + } - void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final try { - Y_UNUSED(userType); - TOptionsSchema optionsSchema = MakeOptionsSchema(builder); - auto optOptionsStructType = builder.Optional()->Item(optionsSchema.StructType).Build(); + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final try { + Y_UNUSED(userType); + TOptionsSchema optionsSchema = MakeOptionsSchema(builder); + auto optOptionsStructType = builder.Optional()->Item(optionsSchema.StructType).Build(); - bool typesOnly = (flags & TFlags::TypesOnly); - bool isMatch = (TRe2Udf::Name(TRe2Udf::EMode::MATCH) == name); - bool isGrep = (TRe2Udf::Name(TRe2Udf::EMode::GREP) == name); - bool isCapture = (TRe2Udf::Name(TRe2Udf::EMode::CAPTURE) == name); - bool isReplace = (TRe2Udf::Name(TRe2Udf::EMode::REPLACE) == name); - bool isCount = (TRe2Udf::Name(TRe2Udf::EMode::COUNT) == name); - bool isFindAndConsume = (TRe2Udf::Name(TRe2Udf::FIND_AND_CONSUME) == name); + bool typesOnly = (flags & TFlags::TypesOnly); + bool isMatch = (TRe2Udf::Name(TRe2Udf::EMode::MATCH) == name); + bool isGrep = (TRe2Udf::Name(TRe2Udf::EMode::GREP) == name); + bool isCapture = (TRe2Udf::Name(TRe2Udf::EMode::CAPTURE) == name); + bool isReplace = (TRe2Udf::Name(TRe2Udf::EMode::REPLACE) == name); + bool isCount = (TRe2Udf::Name(TRe2Udf::EMode::COUNT) == name); + bool isFindAndConsume = (TRe2Udf::Name(TRe2Udf::FIND_AND_CONSUME) == name); - if (isMatch || isGrep) { - builder.SimpleSignature<bool(TOptional<char*>)>() - .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + if (isMatch || isGrep) { + builder.SimpleSignature<bool(TOptional<char*>)>() + .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); - if (!typesOnly) { - const auto mode = isMatch ? TRe2Udf::EMode::MATCH : TRe2Udf::EMode::GREP; - builder.Implementation(new TRe2Udf::TFactory<posix>(mode, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); - } - } else if (isCapture) { - TRegexpGroups groups; - auto optionalStringType = builder.Optional()->Item<char*>().Build(); - auto structBuilder = builder.Struct(); - RE2::Options options = CreateDefaultOptions(); - RE2 regexp(StringPiece(typeConfig.Data(), typeConfig.Size()), options); - if (!regexp.ok()) { - builder.SetError(FormatRegexpError(regexp)); - return; - } - const auto& groupNames = regexp.CapturingGroupNames(); - int groupCount = regexp.NumberOfCapturingGroups(); - if (groupCount >= 0) { - std::unordered_set<std::string_view> groupNamesSet; - int unnamedCount = 0; - ++groupCount; - groups.Indexes.resize(groupCount); - groups.Names.resize(groupCount); - for (int i = 0; i < groupCount; ++i) { - TString fieldName; - auto it = groupNames.find(i); - if (it != groupNames.end()) { - if (!groupNamesSet.insert(it->second).second) { - builder.SetError( - TStringBuilder() << "Regexp contains duplicate capturing group name: " << it->second); - return; - } - fieldName = it->second; - } else { - fieldName = "_" + ToString(unnamedCount); - ++unnamedCount; + if (!typesOnly) { + const auto mode = isMatch ? TRe2Udf::EMode::MATCH : TRe2Udf::EMode::GREP; + builder.Implementation(new TRe2Udf::TFactory<posix>(mode, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); + } + } else if (isCapture) { + TRegexpGroups groups; + auto optionalStringType = builder.Optional()->Item<char*>().Build(); + auto structBuilder = builder.Struct(); + RE2::Options options = CreateDefaultOptions(); + RE2 regexp(StringPiece(typeConfig.Data(), typeConfig.Size()), options); + if (!regexp.ok()) { + builder.SetError(FormatRegexpError(regexp)); + return; + } + const auto& groupNames = regexp.CapturingGroupNames(); + int groupCount = regexp.NumberOfCapturingGroups(); + if (groupCount >= 0) { + std::unordered_set<std::string_view> groupNamesSet; + int unnamedCount = 0; + ++groupCount; + groups.Indexes.resize(groupCount); + groups.Names.resize(groupCount); + for (int i = 0; i < groupCount; ++i) { + TString fieldName; + auto it = groupNames.find(i); + if (it != groupNames.end()) { + if (!groupNamesSet.insert(it->second).second) { + builder.SetError( + TStringBuilder() << "Regexp contains duplicate capturing group name: " << it->second); + return; } - groups.Names[i] = fieldName; - structBuilder->AddField(fieldName, optionalStringType, &groups.Indexes[i]); - } - builder.Args(1)->Add(optionalStringType).Done().Returns(structBuilder->Build()).RunConfig(MakeRunConfigType(builder, optOptionsStructType)); - - if (!typesOnly) { - builder.Implementation( - new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::CAPTURE, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer(), groups)); + fieldName = it->second; + } else { + fieldName = "_" + ToString(unnamedCount); + ++unnamedCount; } - - } else { - Y_ENSURE(regexp.ok()); - builder.SetError("Regexp contains no capturing groups"); + groups.Names[i] = fieldName; + structBuilder->AddField(fieldName, optionalStringType, &groups.Indexes[i]); } - } else if (isReplace) { - builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() - .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + builder.Args(1)->Add(optionalStringType).Done().Returns(structBuilder->Build()).RunConfig(MakeRunConfigType(builder, optOptionsStructType)); if (!typesOnly) { - builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::REPLACE, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); + builder.Implementation( + new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::CAPTURE, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer(), groups)); } - } else if (isCount) { - builder.SimpleSignature<ui32(TOptional<char*>)>() - .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); - if (!typesOnly) { - builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::COUNT, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); - } - } else if (isFindAndConsume) { - builder.SimpleSignature<TListType<char*>(TOptional<char*>)>() - .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); - if (!typesOnly) { - builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::FIND_AND_CONSUME, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); - } - } else if (!( - TEscape::DeclareSignature(name, userType, builder, typesOnly) || - TPatternFromLike::DeclareSignature(name, userType, builder, typesOnly) || - TOptions::DeclareSignature(name, userType, builder, typesOnly) || - TIsValidRegexp<posix>::DeclareSignature(name, userType, builder, typesOnly))) { - builder.SetError( - TStringBuilder() << "Unknown function name: " << TString(name)); + } else { + Y_ENSURE(regexp.ok()); + builder.SetError("Regexp contains no capturing groups"); } - } catch (const std::exception& e) { - builder.SetError(CurrentExceptionMessage()); + } else if (isReplace) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() + .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + + if (!typesOnly) { + builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::REPLACE, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); + } + } else if (isCount) { + builder.SimpleSignature<ui32(TOptional<char*>)>() + .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + + if (!typesOnly) { + builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::COUNT, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); + } + } else if (isFindAndConsume) { + builder.SimpleSignature<TListType<char*>(TOptional<char*>)>() + .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + if (!typesOnly) { + builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::FIND_AND_CONSUME, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); + } + } else if (!( + TEscape::DeclareSignature(name, userType, builder, typesOnly) || + TPatternFromLike::DeclareSignature(name, userType, builder, typesOnly) || + TOptions::DeclareSignature(name, userType, builder, typesOnly) || + TIsValidRegexp<posix>::DeclareSignature(name, userType, builder, typesOnly))) { + builder.SetError( + TStringBuilder() << "Unknown function name: " << TString(name)); } - }; + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); + } +}; -} +} // namespace REGISTER_MODULES( TRe2Module<false>, diff --git a/yql/essentials/udfs/common/re2/ya.make b/yql/essentials/udfs/common/re2/ya.make index ca8be7370ba..895f75bd6cc 100644 --- a/yql/essentials/udfs/common/re2/ya.make +++ b/yql/essentials/udfs/common/re2/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(re2_udf) 43 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( re2_udf.cpp diff --git a/yql/essentials/udfs/common/set/set_udf.cpp b/yql/essentials/udfs/common/set/set_udf.cpp index a7851ec8633..86e9dae50d6 100644 --- a/yql/essentials/udfs/common/set/set_udf.cpp +++ b/yql/essentials/udfs/common/set/set_udf.cpp @@ -18,7 +18,8 @@ private: protected: TSetBase(THash hash, TEquals equals) : Set_(1, hash, equals) - {} + { + } void Init(const TUnboxedValuePod& value, ui32 maxSize) { MaxSize_ = maxSize ? maxSize : std::numeric_limits<ui32>::max(); @@ -89,8 +90,7 @@ public: template <EDataSlot Slot> class TSetData - : public TSetBase<TUnboxedValueHash<Slot>, TUnboxedValueEquals<Slot>> -{ + : public TSetBase<TUnboxedValueHash<Slot>, TUnboxedValueEquals<Slot>> { public: using TBase = TSetBase<TUnboxedValueHash<Slot>, TUnboxedValueEquals<Slot>>; @@ -130,27 +130,26 @@ struct TGenericEquals { }; class TSetGeneric - : public TSetBase<TGenericHash, TGenericEquals> -{ + : public TSetBase<TGenericHash, TGenericEquals> { public: using TBase = TSetBase<TGenericHash, TGenericEquals>; TSetGeneric(const TUnboxedValuePod& value, ui32 maxSize, - IHash::TPtr hash, IEquate::TPtr equate) + IHash::TPtr hash, IEquate::TPtr equate) : TBase(TGenericHash{hash}, TGenericEquals{equate}) { TBase::Init(value, maxSize); } TSetGeneric(const TSetGeneric& left, const TSetGeneric& right, - IHash::TPtr hash, IEquate::TPtr equate) + IHash::TPtr hash, IEquate::TPtr equate) : TBase(TGenericHash{hash}, TGenericEquals{equate}) { TBase::Merge(left, right); } TSetGeneric(const TUnboxedValuePod& serialized, - IHash::TPtr hash, IEquate::TPtr equate) + IHash::TPtr hash, IEquate::TPtr equate) : TBase(TGenericHash{hash}, TGenericEquals{equate}) { TBase::Deserialize(serialized); @@ -158,14 +157,13 @@ public: }; extern const char SetResourceNameGeneric[] = "Set.SetResource.Generic"; -class TSetResource: - public TBoxedResource<TSetGeneric, SetResourceNameGeneric> -{ +class TSetResource: public TBoxedResource<TSetGeneric, SetResourceNameGeneric> { public: template <typename... Args> inline TSetResource(Args&&... args) : TBoxedResource(std::forward<Args>(args)...) - {} + { + } }; template <EDataSlot Slot> @@ -182,7 +180,6 @@ TSetResource* GetSetResource(const TUnboxedValuePod& arg) { return static_cast<TSetResource*>(arg.AsBoxed().Get()); } - template <EDataSlot Slot> class TSetCreateData: public TBoxedValue { private: @@ -201,7 +198,8 @@ public: TSetCreate(IHash::TPtr hash, IEquate::TPtr equate) : Hash_(hash) , Equate_(equate) - {} + { + } private: IHash::TPtr Hash_; @@ -279,7 +277,8 @@ public: TSetDeserialize(IHash::TPtr hash, IEquate::TPtr equate) : Hash_(hash) , Equate_(equate) - {} + { + } private: IHash::TPtr Hash_; @@ -308,7 +307,8 @@ public: TSetMerge(IHash::TPtr hash, IEquate::TPtr equate) : Hash_(hash) , Equate_(equate) - {} + { + } private: IHash::TPtr Hash_; @@ -330,26 +330,24 @@ private: } }; - -#define MAKE_RESOURCE(slot, ...) \ -extern const char SetResourceName##slot[] = "Set.SetResource."#slot; \ -template <> \ -class TSetResourceData<EDataSlot::slot>: \ - public TBoxedResource<TSetData<EDataSlot::slot>, SetResourceName##slot> \ -{ \ -public: \ - template <typename... Args> \ - inline TSetResourceData(Args&&... args) \ - : TBoxedResource(std::forward<Args>(args)...) \ - {} \ -}; +#define MAKE_RESOURCE(slot, ...) \ + extern const char SetResourceName##slot[] = "Set.SetResource." #slot; \ + template <> \ + class TSetResourceData<EDataSlot::slot>: public TBoxedResource<TSetData<EDataSlot::slot>, SetResourceName##slot> { \ + public: \ + template <typename... Args> \ + inline TSetResourceData(Args&&... args) \ + : TBoxedResource(std::forward<Args>(args)...) \ + { \ + } \ + }; UDF_TYPE_ID_MAP(MAKE_RESOURCE) -#define MAKE_IMPL(operation, slot) \ -case EDataSlot::slot: \ - builder.Implementation(new operation<EDataSlot::slot>); \ - break; +#define MAKE_IMPL(operation, slot) \ + case EDataSlot::slot: \ + builder.Implementation(new operation<EDataSlot::slot>); \ + break; #define MAKE_CREATE(slot, ...) MAKE_IMPL(TSetCreateData, slot) #define MAKE_ADD_VALUE(slot, ...) MAKE_IMPL(TSetAddValueData, slot) @@ -359,11 +357,10 @@ case EDataSlot::slot: \ #define MAKE_MERGE(slot, ...) MAKE_IMPL(TSetMergeData, slot) #define MAKE_GET_RESULT(slot, ...) MAKE_IMPL(TSetGetResultData, slot) -#define MAKE_TYPE(slot, ...) \ -case EDataSlot::slot: \ - setType = builder.Resource(SetResourceName##slot); \ - break; - +#define MAKE_TYPE(slot, ...) \ + case EDataSlot::slot: \ + setType = builder.Resource(SetResourceName##slot); \ + break; static const auto CreateName = TStringRef::Of("Create"); static const auto AddValueName = TStringRef::Of("AddValue"); @@ -397,8 +394,7 @@ public: TType* userType, const TStringRef& typeConfig, ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + IFunctionTypeInfoBuilder& builder) const final { Y_UNUSED(typeConfig); try { @@ -449,7 +445,7 @@ public: setType = builder.Resource(SetResourceNameGeneric); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_TYPE) + UDF_TYPE_ID_MAP(MAKE_TYPE) } } @@ -463,7 +459,7 @@ public: builder.Implementation(new TSetCreate(hash, equate)); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_CREATE) + UDF_TYPE_ID_MAP(MAKE_CREATE) } } } @@ -479,7 +475,7 @@ public: builder.Implementation(new TSetAddValue); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_ADD_VALUE) + UDF_TYPE_ID_MAP(MAKE_ADD_VALUE) } } } @@ -495,7 +491,7 @@ public: builder.Implementation(new TSetWasChanged); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_WAS_CHANGED) + UDF_TYPE_ID_MAP(MAKE_WAS_CHANGED) } } } @@ -511,7 +507,7 @@ public: builder.Implementation(new TSetMerge(hash, equate)); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_MERGE) + UDF_TYPE_ID_MAP(MAKE_MERGE) } } } @@ -527,7 +523,7 @@ public: builder.Implementation(new TSetSerialize); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_SERIALIZE) + UDF_TYPE_ID_MAP(MAKE_SERIALIZE) } } } @@ -541,7 +537,7 @@ public: builder.Implementation(new TSetDeserialize(hash, equate)); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_DESERIALIZE) + UDF_TYPE_ID_MAP(MAKE_DESERIALIZE) } } } @@ -559,7 +555,7 @@ public: builder.Implementation(new TSetGetResult); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_GET_RESULT) + UDF_TYPE_ID_MAP(MAKE_GET_RESULT) } } } diff --git a/yql/essentials/udfs/common/set/ya.make b/yql/essentials/udfs/common/set/ya.make index 9c235a3084a..21a51dba968 100644 --- a/yql/essentials/udfs/common/set/ya.make +++ b/yql/essentials/udfs/common/set/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(set_udf) 28 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( set_udf.cpp diff --git a/yql/essentials/udfs/common/stat/stat_udf_ut.cpp b/yql/essentials/udfs/common/stat/stat_udf_ut.cpp index 2a033ff31e4..cb4c03ffa22 100644 --- a/yql/essentials/udfs/common/stat/stat_udf_ut.cpp +++ b/yql/essentials/udfs/common/stat/stat_udf_ut.cpp @@ -11,353 +11,349 @@ namespace NYql { using namespace NKikimr::NMiniKQL; - namespace NUdf { - extern NUdf::TUniquePtr<NUdf::IUdfModule> CreateStatModule(); - } +namespace NUdf { +extern NUdf::TUniquePtr<NUdf::IUdfModule> CreateStatModule(); +} // namespace NUdf - Y_UNIT_TEST_SUITE(TUDFStatTest) { - Y_UNIT_TEST(SimplePercentile) { - auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); - auto randomProvider = CreateDeterministicRandomProvider(1); - auto timeProvider = CreateDeterministicTimeProvider(10000000); - NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); - mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); - TScopedAlloc alloc(__LOCATION__); - TTypeEnvironment env(alloc); - TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); - auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); - auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); - auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); +Y_UNIT_TEST_SUITE(TUDFStatTest) { +Y_UNIT_TEST(SimplePercentile) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(10000000); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); - TRuntimeNode pgmDigest; - { - auto param1 = pgmBuilder.NewDataLiteral<double>(0.0); - TVector<TRuntimeNode> params = {param1}; - pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); - } + TRuntimeNode pgmDigest; + { + auto param1 = pgmBuilder.NewDataLiteral<double>(0.0); + TVector<TRuntimeNode> params = {param1}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); + } - for (int n = 1; n < 10; n += 1) { - auto param2 = pgmBuilder.NewDataLiteral((double)n); - TVector<TRuntimeNode> params = {pgmDigest, param2}; - pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); - } + for (int n = 1; n < 10; n += 1) { + auto param2 = pgmBuilder.NewDataLiteral((double)n); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); + } - TRuntimeNode pgmReturn; - { - auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); - TVector<TRuntimeNode> params = {pgmDigest, param2}; - pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); - } + TRuntimeNode pgmReturn; + { + auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); + } - TExploringNodeVisitor explorer; - explorer.Walk(pgmReturn.GetNode(), env); - TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), - NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); - auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); - auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); - auto value = graph->GetValue(); - UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.5, 0.001); - } + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.5, 0.001); +} - Y_UNIT_TEST(SimplePercentileSpecific) { - auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); - auto randomProvider = CreateDeterministicRandomProvider(1); - auto timeProvider = CreateDeterministicTimeProvider(1); - NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); - mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); - TScopedAlloc alloc(__LOCATION__); - TTypeEnvironment env(alloc); - TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); - auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); - auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); - auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); +Y_UNIT_TEST(SimplePercentileSpecific) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); - TRuntimeNode pgmDigest; - { - auto param1 = pgmBuilder.NewDataLiteral<double>(75.0); - TVector<TRuntimeNode> params = {param1}; - pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); - } + TRuntimeNode pgmDigest; + { + auto param1 = pgmBuilder.NewDataLiteral<double>(75.0); + TVector<TRuntimeNode> params = {param1}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); + } - TVector<double> vals = {800, 20, 150}; - for (auto val : vals) { - auto param2 = pgmBuilder.NewDataLiteral(val); - TVector<TRuntimeNode> params = {pgmDigest, param2}; - pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); - } + TVector<double> vals = {800, 20, 150}; + for (auto val : vals) { + auto param2 = pgmBuilder.NewDataLiteral(val); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); + } - TRuntimeNode pgmReturn; - { - auto param2 = pgmBuilder.NewDataLiteral<double>(0.5); - TVector<TRuntimeNode> params = {pgmDigest, param2}; - pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); - } + TRuntimeNode pgmReturn; + { + auto param2 = pgmBuilder.NewDataLiteral<double>(0.5); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); + } - TExploringNodeVisitor explorer; - explorer.Walk(pgmReturn.GetNode(), env); - TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), - NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); - auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); - auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); - auto value = graph->GetValue(); - Cerr << value.Get<double>() << Endl; - //~ UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 9.0, 0.001); - } + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + Cerr << value.Get<double>() << Endl; + //~ UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 9.0, 0.001); +} - Y_UNIT_TEST(SerializedPercentile) { - auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); - auto randomProvider = CreateDeterministicRandomProvider(1); - auto timeProvider = CreateDeterministicTimeProvider(1); - NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); - mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); - TScopedAlloc alloc(__LOCATION__); - TTypeEnvironment env(alloc); - TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); - auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); - auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); - auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); - auto udfTDigest_Serialize = pgmBuilder.Udf("Stat.TDigest_Serialize"); - auto udfTDigest_Deserialize = pgmBuilder.Udf("Stat.TDigest_Deserialize"); +Y_UNIT_TEST(SerializedPercentile) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + auto udfTDigest_Serialize = pgmBuilder.Udf("Stat.TDigest_Serialize"); + auto udfTDigest_Deserialize = pgmBuilder.Udf("Stat.TDigest_Deserialize"); - TRuntimeNode pgmDigest; - { - auto param1 = pgmBuilder.NewDataLiteral<double>(0.0); - TVector<TRuntimeNode> params = {param1}; - pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); - } + TRuntimeNode pgmDigest; + { + auto param1 = pgmBuilder.NewDataLiteral<double>(0.0); + TVector<TRuntimeNode> params = {param1}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); + } - for (int n = 1; n < 10; n += 1) { - auto param2 = pgmBuilder.NewDataLiteral((double)n); - TVector<TRuntimeNode> params = {pgmDigest, param2}; - pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); - } + for (int n = 1; n < 10; n += 1) { + auto param2 = pgmBuilder.NewDataLiteral((double)n); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); + } - TRuntimeNode pgmSerializedData; - { - TVector<TRuntimeNode> params = {pgmDigest}; - pgmSerializedData = pgmBuilder.Apply(udfTDigest_Serialize, params); - } + TRuntimeNode pgmSerializedData; + { + TVector<TRuntimeNode> params = {pgmDigest}; + pgmSerializedData = pgmBuilder.Apply(udfTDigest_Serialize, params); + } - TRuntimeNode pgmDigest2; - { - TVector<TRuntimeNode> params = {pgmSerializedData}; - pgmDigest2 = pgmBuilder.Apply(udfTDigest_Deserialize, params); - } + TRuntimeNode pgmDigest2; + { + TVector<TRuntimeNode> params = {pgmSerializedData}; + pgmDigest2 = pgmBuilder.Apply(udfTDigest_Deserialize, params); + } - TRuntimeNode pgmReturn; - { - auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); - TVector<TRuntimeNode> params = {pgmDigest2, param2}; - pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); - } + TRuntimeNode pgmReturn; + { + auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); + TVector<TRuntimeNode> params = {pgmDigest2, param2}; + pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); + } - TExploringNodeVisitor explorer; - explorer.Walk(pgmReturn.GetNode(), env); - TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), - NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); - auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); - auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); - auto value = graph->GetValue(); - UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.5, 0.001); - } + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.5, 0.001); +} - Y_UNIT_TEST(SerializedMergedPercentile) { - auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); - auto randomProvider = CreateDeterministicRandomProvider(1); - auto timeProvider = CreateDeterministicTimeProvider(1); - NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); - mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); - TScopedAlloc alloc(__LOCATION__); - TTypeEnvironment env(alloc); - TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); - auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); - auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); - auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); - auto udfTDigest_Serialize = pgmBuilder.Udf("Stat.TDigest_Serialize"); - auto udfTDigest_Deserialize = pgmBuilder.Udf("Stat.TDigest_Deserialize"); - auto udfTDigest_Merge = pgmBuilder.Udf("Stat.TDigest_Merge"); +Y_UNIT_TEST(SerializedMergedPercentile) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + auto udfTDigest_Serialize = pgmBuilder.Udf("Stat.TDigest_Serialize"); + auto udfTDigest_Deserialize = pgmBuilder.Udf("Stat.TDigest_Deserialize"); + auto udfTDigest_Merge = pgmBuilder.Udf("Stat.TDigest_Merge"); - TVector<TRuntimeNode> pgmSerializedDataVector; + TVector<TRuntimeNode> pgmSerializedDataVector; - for (int i = 0; i < 100; i += 10) { - TRuntimeNode pgmDigest; - { - auto param1 = pgmBuilder.NewDataLiteral(double(i) / 10); - TVector<TRuntimeNode> params = {param1}; - pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); - } + for (int i = 0; i < 100; i += 10) { + TRuntimeNode pgmDigest; + { + auto param1 = pgmBuilder.NewDataLiteral(double(i) / 10); + TVector<TRuntimeNode> params = {param1}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); + } - for (int n = i + 1; n < i + 10; n += 1) { - auto param2 = pgmBuilder.NewDataLiteral(double(n) / 10); - TVector<TRuntimeNode> params = {pgmDigest, param2}; - pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); - } + for (int n = i + 1; n < i + 10; n += 1) { + auto param2 = pgmBuilder.NewDataLiteral(double(n) / 10); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); + } - TRuntimeNode pgmSerializedData; - { - TVector<TRuntimeNode> params = {pgmDigest}; - pgmSerializedData = pgmBuilder.Apply(udfTDigest_Serialize, params); - } - pgmSerializedDataVector.push_back(pgmSerializedData); - } + TRuntimeNode pgmSerializedData; + { + TVector<TRuntimeNode> params = {pgmDigest}; + pgmSerializedData = pgmBuilder.Apply(udfTDigest_Serialize, params); + } + pgmSerializedDataVector.push_back(pgmSerializedData); + } - TRuntimeNode pgmDigest; - for (size_t i = 0; i < pgmSerializedDataVector.size(); ++i) { - TRuntimeNode pgmDigest2; - { - TVector<TRuntimeNode> params = {pgmSerializedDataVector[i]}; - pgmDigest2 = pgmBuilder.Apply(udfTDigest_Deserialize, params); - } - if (!pgmDigest) { - pgmDigest = pgmDigest2; - } else { - TVector<TRuntimeNode> params = {pgmDigest, pgmDigest2}; - pgmDigest = pgmBuilder.Apply(udfTDigest_Merge, params); - } - } + TRuntimeNode pgmDigest; + for (size_t i = 0; i < pgmSerializedDataVector.size(); ++i) { + TRuntimeNode pgmDigest2; + { + TVector<TRuntimeNode> params = {pgmSerializedDataVector[i]}; + pgmDigest2 = pgmBuilder.Apply(udfTDigest_Deserialize, params); + } + if (!pgmDigest) { + pgmDigest = pgmDigest2; + } else { + TVector<TRuntimeNode> params = {pgmDigest, pgmDigest2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Merge, params); + } + } - TRuntimeNode pgmReturn; - { - auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); - TVector<TRuntimeNode> params = {pgmDigest, param2}; - pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); - } + TRuntimeNode pgmReturn; + { + auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); + } - TExploringNodeVisitor explorer; - explorer.Walk(pgmReturn.GetNode(), env); - TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), - NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); - auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); - auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); - auto value = graph->GetValue(); - UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.95, 0.001); - } + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.95, 0.001); +} - static double GetParetoRandomNumber(double a) { - return 1 / pow(RandomNumber<double>(), double(1) / a); - } +static double GetParetoRandomNumber(double a) { + return 1 / pow(RandomNumber<double>(), double(1) / a); +} - Y_UNIT_TEST(BigPercentile) { - auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); - auto randomProvider = CreateDeterministicRandomProvider(1); - auto timeProvider = CreateDeterministicTimeProvider(1); - NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); - mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); - TScopedAlloc alloc(__LOCATION__); - TTypeEnvironment env(alloc); - TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); - auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); - auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); - auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); - const size_t NUMBERS = 100000; - const double PERCENTILE = 0.99; - const double THRESHOLD = 0.0004; // at q=0.99 threshold is 4*delta*0.0099 - TVector<double> randomNumbers1; - TVector<TRuntimeNode> randomNumbers2; - randomNumbers1.reserve(NUMBERS); - randomNumbers2.reserve(NUMBERS); - for (size_t n = 0; n < NUMBERS; ++n) { - double randomNumber = GetParetoRandomNumber(10); - randomNumbers1.push_back(randomNumber); - randomNumbers2.push_back(pgmBuilder.NewDataLiteral(randomNumber)); - } - TRuntimeNode bigList = pgmBuilder.AsList(randomNumbers2); - auto pgmDigest = - pgmBuilder.Fold1(bigList, - [&](TRuntimeNode item) { +Y_UNIT_TEST(BigPercentile) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + const size_t NUMBERS = 100000; + const double PERCENTILE = 0.99; + const double THRESHOLD = 0.0004; // at q=0.99 threshold is 4*delta*0.0099 + TVector<double> randomNumbers1; + TVector<TRuntimeNode> randomNumbers2; + randomNumbers1.reserve(NUMBERS); + randomNumbers2.reserve(NUMBERS); + for (size_t n = 0; n < NUMBERS; ++n) { + double randomNumber = GetParetoRandomNumber(10); + randomNumbers1.push_back(randomNumber); + randomNumbers2.push_back(pgmBuilder.NewDataLiteral(randomNumber)); + } + TRuntimeNode bigList = pgmBuilder.AsList(randomNumbers2); + auto pgmDigest = + pgmBuilder.Fold1(bigList, + [&](TRuntimeNode item) { std::array<TRuntimeNode, 1> args; args[0] = item; - return pgmBuilder.Apply(udfTDigest_Create, args); - }, - [&](TRuntimeNode item, TRuntimeNode state) { + return pgmBuilder.Apply(udfTDigest_Create, args); }, + [&](TRuntimeNode item, TRuntimeNode state) { std::array<TRuntimeNode, 2> args; args[0] = state; args[1] = item; - return pgmBuilder.Apply(udfTDigest_AddValue, args); - }); - TRuntimeNode pgmReturn = - pgmBuilder.Map(pgmDigest, [&](TRuntimeNode item) { - auto param2 = pgmBuilder.NewDataLiteral(PERCENTILE); - std::array<TRuntimeNode, 2> args; - args[0] = item; - args[1] = param2; - return pgmBuilder.Apply(udfTDigest_GetPercentile, args); - }); + return pgmBuilder.Apply(udfTDigest_AddValue, args); }); + TRuntimeNode pgmReturn = + pgmBuilder.Map(pgmDigest, [&](TRuntimeNode item) { + auto param2 = pgmBuilder.NewDataLiteral(PERCENTILE); + std::array<TRuntimeNode, 2> args; + args[0] = item; + args[1] = param2; + return pgmBuilder.Apply(udfTDigest_GetPercentile, args); + }); - TExploringNodeVisitor explorer; - explorer.Walk(pgmReturn.GetNode(), env); - TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), - NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); - auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); - auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); - auto value = graph->GetValue(); - UNIT_ASSERT(value); - double digestValue = value.Get<double>(); - std::sort(randomNumbers1.begin(), randomNumbers1.end()); - // This gives us a 1-based index of the last value <= digestValue - auto index = std::upper_bound(randomNumbers1.begin(), randomNumbers1.end(), digestValue) - randomNumbers1.begin(); - // See https://en.wikipedia.org/wiki/Percentile#First_Variant.2C - double p = (index - 0.5) / double(randomNumbers1.size()); - UNIT_ASSERT_DOUBLES_EQUAL(p, PERCENTILE, THRESHOLD); - } + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT(value); + double digestValue = value.Get<double>(); + std::sort(randomNumbers1.begin(), randomNumbers1.end()); + // This gives us a 1-based index of the last value <= digestValue + auto index = std::upper_bound(randomNumbers1.begin(), randomNumbers1.end(), digestValue) - randomNumbers1.begin(); + // See https://en.wikipedia.org/wiki/Percentile#First_Variant.2C + double p = (index - 0.5) / double(randomNumbers1.size()); + UNIT_ASSERT_DOUBLES_EQUAL(p, PERCENTILE, THRESHOLD); +} - Y_UNIT_TEST(CentroidPrecision) { - auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); - auto randomProvider = CreateDeterministicRandomProvider(1); - auto timeProvider = CreateDeterministicTimeProvider(1); - NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); - mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); - TScopedAlloc alloc(__LOCATION__); - TTypeEnvironment env(alloc); - TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); - auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); - auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); - auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); - const size_t NUMBERS = 100000; - const double PERCENTILE = 0.25; - const double minValue = 1.0; - const double maxValue = 100.0; - const double majorityValue = 50.0; - TVector<TRuntimeNode> numbers; - numbers.reserve(NUMBERS); - for (size_t n = 0; n < NUMBERS - 2; ++n) { - numbers.push_back(pgmBuilder.NewDataLiteral(majorityValue)); - } - numbers.push_back(pgmBuilder.NewDataLiteral(minValue)); - numbers.push_back(pgmBuilder.NewDataLiteral(maxValue)); - TRuntimeNode bigList = pgmBuilder.AsList(numbers); - auto pgmDigest = - pgmBuilder.Fold1(bigList, - [&](TRuntimeNode item) { +Y_UNIT_TEST(CentroidPrecision) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + const size_t NUMBERS = 100000; + const double PERCENTILE = 0.25; + const double minValue = 1.0; + const double maxValue = 100.0; + const double majorityValue = 50.0; + TVector<TRuntimeNode> numbers; + numbers.reserve(NUMBERS); + for (size_t n = 0; n < NUMBERS - 2; ++n) { + numbers.push_back(pgmBuilder.NewDataLiteral(majorityValue)); + } + numbers.push_back(pgmBuilder.NewDataLiteral(minValue)); + numbers.push_back(pgmBuilder.NewDataLiteral(maxValue)); + TRuntimeNode bigList = pgmBuilder.AsList(numbers); + auto pgmDigest = + pgmBuilder.Fold1(bigList, + [&](TRuntimeNode item) { std::array<TRuntimeNode, 1> args; args[0] = item; - return pgmBuilder.Apply(udfTDigest_Create, args); - }, - [&](TRuntimeNode item, TRuntimeNode state) { + return pgmBuilder.Apply(udfTDigest_Create, args); }, + [&](TRuntimeNode item, TRuntimeNode state) { std::array<TRuntimeNode, 2> args; args[0] = state; args[1] = item; - return pgmBuilder.Apply(udfTDigest_AddValue, args); - }); - TRuntimeNode pgmReturn = - pgmBuilder.Map(pgmDigest, [&](TRuntimeNode item) { - auto param2 = pgmBuilder.NewDataLiteral(PERCENTILE); - std::array<TRuntimeNode, 2> args; - args[0] = item; - args[1] = param2; - return pgmBuilder.Apply(udfTDigest_GetPercentile, args); - }); + return pgmBuilder.Apply(udfTDigest_AddValue, args); }); + TRuntimeNode pgmReturn = + pgmBuilder.Map(pgmDigest, [&](TRuntimeNode item) { + auto param2 = pgmBuilder.NewDataLiteral(PERCENTILE); + std::array<TRuntimeNode, 2> args; + args[0] = item; + args[1] = param2; + return pgmBuilder.Apply(udfTDigest_GetPercentile, args); + }); - TExploringNodeVisitor explorer; - explorer.Walk(pgmReturn.GetNode(), env); - TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), - NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); - auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); - auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); - auto value = graph->GetValue(); - UNIT_ASSERT(value); - double digestValue = value.Get<double>(); - UNIT_ASSERT_EQUAL(digestValue, majorityValue); - } - } + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT(value); + double digestValue = value.Get<double>(); + UNIT_ASSERT_EQUAL(digestValue, majorityValue); } +} // Y_UNIT_TEST_SUITE(TUDFStatTest) +} // namespace NYql diff --git a/yql/essentials/udfs/common/stat/static/stat_udf.h b/yql/essentials/udfs/common/stat/static/stat_udf.h index 3ab6dbb20b4..e1d637132b7 100644 --- a/yql/essentials/udfs/common/stat/static/stat_udf.h +++ b/yql/essentials/udfs/common/stat/static/stat_udf.h @@ -9,57 +9,57 @@ using namespace NYql; using namespace NUdf; namespace { - extern const char DigestResourceName[] = "Stat.TDigestResource"; +extern const char DigestResourceName[] = "Stat.TDigestResource"; - typedef TBoxedResource<TDigest, DigestResourceName> TDigestResource; - typedef TRefCountedPtr<TDigestResource> TDigestResourcePtr; +typedef TBoxedResource<TDigest, DigestResourceName> TDigestResource; +typedef TRefCountedPtr<TDigestResource> TDigestResourcePtr; - SIMPLE_UDF_WITH_OPTIONAL_ARGS(TTDigest_Create, TResource<DigestResourceName>(double, TOptional<double>, TOptional<double>), 2) { - Y_UNUSED(valueBuilder); - const double delta = args[1].GetOrDefault<double>(0.01); - const double K = args[2].GetOrDefault<double>(25.0); - if (delta == 0 || K / delta < 1) { - UdfTerminate((TStringBuilder() << GetPos() << " Invalid combination of delta/K values").c_str()); - } - - return TUnboxedValuePod(new TDigestResource(delta, K, args[0].Get<double>(), true)); +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TTDigest_Create, TResource<DigestResourceName>(double, TOptional<double>, TOptional<double>), 2) { + Y_UNUSED(valueBuilder); + const double delta = args[1].GetOrDefault<double>(0.01); + const double K = args[2].GetOrDefault<double>(25.0); + if (delta == 0 || K / delta < 1) { + UdfTerminate((TStringBuilder() << GetPos() << " Invalid combination of delta/K values").c_str()); } - SIMPLE_STRICT_UDF(TTDigest_AddValue, TResource<DigestResourceName>(TResource<DigestResourceName>, double)) { - Y_UNUSED(valueBuilder); - TDigestResource::Validate(args[0]); - TDigestResource* resource = static_cast<TDigestResource*>(args[0].AsBoxed().Get()); - resource->Get()->AddValue(args[1].Get<double>()); - return TUnboxedValuePod(resource); - } + return TUnboxedValuePod(new TDigestResource(delta, K, args[0].Get<double>(), true)); +} - SIMPLE_STRICT_UDF(TTDigest_GetPercentile, double(TResource<DigestResourceName>, double)) { - Y_UNUSED(valueBuilder); - TDigestResource::Validate(args[0]); - return TUnboxedValuePod(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get()->GetPercentile(args[1].Get<double>())); - } +SIMPLE_STRICT_UDF(TTDigest_AddValue, TResource<DigestResourceName>(TResource<DigestResourceName>, double)) { + Y_UNUSED(valueBuilder); + TDigestResource::Validate(args[0]); + TDigestResource* resource = static_cast<TDigestResource*>(args[0].AsBoxed().Get()); + resource->Get()->AddValue(args[1].Get<double>()); + return TUnboxedValuePod(resource); +} - SIMPLE_STRICT_UDF(TTDigest_Serialize, char*(TResource<DigestResourceName>)) { - TDigestResource::Validate(args[0]); - return valueBuilder->NewString(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get()->Serialize()); - } +SIMPLE_STRICT_UDF(TTDigest_GetPercentile, double(TResource<DigestResourceName>, double)) { + Y_UNUSED(valueBuilder); + TDigestResource::Validate(args[0]); + return TUnboxedValuePod(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get()->GetPercentile(args[1].Get<double>())); +} - SIMPLE_UDF(TTDigest_Deserialize, TResource<DigestResourceName>(char*)) { - Y_UNUSED(valueBuilder); - return TUnboxedValuePod(new TDigestResource(TString(args[0].AsStringRef()), true)); - } +SIMPLE_STRICT_UDF(TTDigest_Serialize, char*(TResource<DigestResourceName>)) { + TDigestResource::Validate(args[0]); + return valueBuilder->NewString(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get()->Serialize()); +} - SIMPLE_STRICT_UDF(TTDigest_Merge, TResource<DigestResourceName>(TResource<DigestResourceName>, TResource<DigestResourceName>)) { - Y_UNUSED(valueBuilder); - TDigestResource::Validate(args[0]); - TDigestResource::Validate(args[1]); - return TUnboxedValuePod(new TDigestResource( - static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get(), - static_cast<TDigestResource*>(args[1].AsBoxed().Get())->Get(), - true)); - } +SIMPLE_UDF(TTDigest_Deserialize, TResource<DigestResourceName>(char*)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(new TDigestResource(TString(args[0].AsStringRef()), true)); +} - /* +SIMPLE_STRICT_UDF(TTDigest_Merge, TResource<DigestResourceName>(TResource<DigestResourceName>, TResource<DigestResourceName>)) { + Y_UNUSED(valueBuilder); + TDigestResource::Validate(args[0]); + TDigestResource::Validate(args[1]); + return TUnboxedValuePod(new TDigestResource( + static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get(), + static_cast<TDigestResource*>(args[1].AsBoxed().Get())->Get(), + true)); +} + +/* * * TODO: Memory tracking * @@ -67,12 +67,12 @@ namespace { * */ - SIMPLE_MODULE(TStatModule, - TTDigest_Create, - TTDigest_AddValue, - TTDigest_GetPercentile, - TTDigest_Serialize, - TTDigest_Deserialize, - TTDigest_Merge) +SIMPLE_MODULE(TStatModule, + TTDigest_Create, + TTDigest_AddValue, + TTDigest_GetPercentile, + TTDigest_Serialize, + TTDigest_Deserialize, + TTDigest_Merge) -} +} // namespace diff --git a/yql/essentials/udfs/common/stat/static/static_udf.cpp b/yql/essentials/udfs/common/stat/static/static_udf.cpp index 3cb1d88a1c8..571d3ca8d2a 100644 --- a/yql/essentials/udfs/common/stat/static/static_udf.cpp +++ b/yql/essentials/udfs/common/stat/static/static_udf.cpp @@ -1,10 +1,10 @@ #include "stat_udf.h" namespace NYql { - namespace NUdf { - NUdf::TUniquePtr<NUdf::IUdfModule> CreateStatModule() { - return new TStatModule(); - } - - } +namespace NUdf { +NUdf::TUniquePtr<NUdf::IUdfModule> CreateStatModule() { + return new TStatModule(); } + +} // namespace NUdf +} // namespace NYql diff --git a/yql/essentials/udfs/common/stat/static/ya.make b/yql/essentials/udfs/common/stat/static/ya.make index f3cc7842eea..892e8a34e93 100644 --- a/yql/essentials/udfs/common/stat/static/ya.make +++ b/yql/essentials/udfs/common/stat/static/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( static_udf.cpp stat_udf.h diff --git a/yql/essentials/udfs/common/stat/ut/ya.make b/yql/essentials/udfs/common/stat/ut/ya.make index fdce51cbdfb..1ab9bbb1432 100644 --- a/yql/essentials/udfs/common/stat/ut/ya.make +++ b/yql/essentials/udfs/common/stat/ut/ya.make @@ -1,5 +1,7 @@ UNITTEST_FOR(yql/essentials/udfs/common/stat/static) +ENABLE(YQL_STYLE_CPP) + SRCS( ../stat_udf_ut.cpp ) diff --git a/yql/essentials/udfs/common/stat/ya.make b/yql/essentials/udfs/common/stat/ya.make index d1e622b4447..8d7535044aa 100644 --- a/yql/essentials/udfs/common/stat/ya.make +++ b/yql/essentials/udfs/common/stat/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(stat_udf) 28 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( stat_udf.cpp diff --git a/yql/essentials/udfs/common/streaming/streaming_udf.cpp b/yql/essentials/udfs/common/streaming/streaming_udf.cpp index 63fa55e9e80..2229e8ff9e7 100644 --- a/yql/essentials/udfs/common/streaming/streaming_udf.cpp +++ b/yql/essentials/udfs/common/streaming/streaming_udf.cpp @@ -23,807 +23,809 @@ using namespace NKikimr; using namespace NUdf; namespace { - // Cyclic Read-Write buffer. - // Not thread safe, synchronization between reader and writer threads - // should be managed externally. - class TCyclicRWBuffer { - public: - TCyclicRWBuffer(size_t capacity) - : Buffer_(capacity) - , Finished_(false) - , DataStart_(0) - , DataSize_(0) - { - Buffer_.Resize(capacity); - } +// Cyclic Read-Write buffer. +// Not thread safe, synchronization between reader and writer threads +// should be managed externally. +class TCyclicRWBuffer { +public: + TCyclicRWBuffer(size_t capacity) + : Buffer_(capacity) + , Finished_(false) + , DataStart_(0) + , DataSize_(0) + { + Buffer_.Resize(capacity); + } - bool IsFinished() const { - return Finished_; - } + bool IsFinished() const { + return Finished_; + } - void Finish() { - Finished_ = true; - } + void Finish() { + Finished_ = true; + } - bool HasData() const { - return DataSize_ > 0; - } + bool HasData() const { + return DataSize_ > 0; + } - size_t GetDataSize() const { - return DataSize_; - } + size_t GetDataSize() const { + return DataSize_; + } - void GetData(const char*& ptr, size_t& len) const { - size_t readSize = GetDataRegionSize(DataStart_, DataSize_); - ptr = Buffer_.Data() + DataStart_; - len = readSize; - } + void GetData(const char*& ptr, size_t& len) const { + size_t readSize = GetDataRegionSize(DataStart_, DataSize_); + ptr = Buffer_.Data() + DataStart_; + len = readSize; + } - void CommitRead(size_t len) { - Y_DEBUG_ABORT_UNLESS(len <= GetDataRegionSize(DataStart_, DataSize_)); + void CommitRead(size_t len) { + Y_DEBUG_ABORT_UNLESS(len <= GetDataRegionSize(DataStart_, DataSize_)); - DataStart_ = GetBufferPosition(DataStart_ + len); - DataSize_ -= len; - } + DataStart_ = GetBufferPosition(DataStart_ + len); + DataSize_ -= len; + } - bool CanWrite() const { - return WriteSize() > 0; - } + bool CanWrite() const { + return WriteSize() > 0; + } - size_t WriteSize() const { - return Buffer_.Size() - DataSize_; - } + size_t WriteSize() const { + return Buffer_.Size() - DataSize_; + } - size_t Write(const char*& ptr, size_t& len) { - if (!CanWrite()) { - return 0; - } - - size_t bytesWritten = 0; - size_t bytesToWrite = std::min(len, WriteSize()); - while (bytesToWrite > 0) { - size_t writeStart = GetWriteStart(); - size_t writeSize = GetDataRegionSize(writeStart, bytesToWrite); + size_t Write(const char*& ptr, size_t& len) { + if (!CanWrite()) { + return 0; + } - MemCopy(Data(writeStart), ptr, writeSize); + size_t bytesWritten = 0; + size_t bytesToWrite = std::min(len, WriteSize()); + while (bytesToWrite > 0) { + size_t writeStart = GetWriteStart(); + size_t writeSize = GetDataRegionSize(writeStart, bytesToWrite); - DataSize_ += writeSize; - bytesWritten += writeSize; - bytesToWrite -= writeSize; + MemCopy(Data(writeStart), ptr, writeSize); - ptr += writeSize; - len -= writeSize; - } + DataSize_ += writeSize; + bytesWritten += writeSize; + bytesToWrite -= writeSize; - return bytesWritten; + ptr += writeSize; + len -= writeSize; } - size_t Write(IZeroCopyInput& input) { - const void* ptr; - size_t dataLen = input.Next(&ptr, WriteSize()); - const char* dataPtr = reinterpret_cast<const char*>(ptr); - return Write(dataPtr, dataLen); - } + return bytesWritten; + } - private: - size_t GetBufferPosition(size_t pos) const { - return pos % Buffer_.Size(); - } + size_t Write(IZeroCopyInput& input) { + const void* ptr; + size_t dataLen = input.Next(&ptr, WriteSize()); + const char* dataPtr = reinterpret_cast<const char*>(ptr); + return Write(dataPtr, dataLen); + } - size_t GetDataRegionSize(size_t start, size_t size) const { - Y_DEBUG_ABORT_UNLESS(start < Buffer_.Size()); +private: + size_t GetBufferPosition(size_t pos) const { + return pos % Buffer_.Size(); + } - return std::min(size, Buffer_.Size() - start); - } + size_t GetDataRegionSize(size_t start, size_t size) const { + Y_DEBUG_ABORT_UNLESS(start < Buffer_.Size()); - size_t GetWriteStart() const { - return GetBufferPosition(DataStart_ + DataSize_); - } + return std::min(size, Buffer_.Size() - start); + } - char* Data(size_t pos) { - Y_DEBUG_ABORT_UNLESS(pos < Buffer_.Size()); + size_t GetWriteStart() const { + return GetBufferPosition(DataStart_ + DataSize_); + } - return (Buffer_.Data() + pos); - } - - private: - TBuffer Buffer_; + char* Data(size_t pos) { + Y_DEBUG_ABORT_UNLESS(pos < Buffer_.Size()); - bool Finished_; + return (Buffer_.Data() + pos); + } - size_t DataStart_; - size_t DataSize_; - }; +private: + TBuffer Buffer_; - struct TStreamingParams { - public: - const size_t DefaultProcessPollLatencyMs = 5 * 1000; // 5 seconds - const size_t DefaultInputBufferSizeBytes = 4 * 1024 * 1024; // 4MB - const size_t DefaultOutputBufferSizeBytes = 16 * 1024 * 1024; // 16MB - const char* DefaultInputDelimiter = "\n"; - const char* DefaultOutputDelimiter = "\n"; + bool Finished_; - public: - TUnboxedValue InputStreamObj; - TString CommandLine; - TUnboxedValue ArgumentsList; - TString InputDelimiter; - TString OutputDelimiter; - size_t InputBufferSizeBytes; - size_t OutputBufferSizeBytes; - size_t ProcessPollLatencyMs; + size_t DataStart_; + size_t DataSize_; +}; - TStreamingParams() - : InputDelimiter(DefaultInputDelimiter) - , OutputDelimiter(DefaultOutputDelimiter) - , InputBufferSizeBytes(DefaultInputBufferSizeBytes) - , OutputBufferSizeBytes(DefaultOutputBufferSizeBytes) - , ProcessPollLatencyMs(DefaultProcessPollLatencyMs) - { - } - }; +struct TStreamingParams { +public: + const size_t DefaultProcessPollLatencyMs = 5 * 1000; // 5 seconds + const size_t DefaultInputBufferSizeBytes = 4 * 1024 * 1024; // 4MB + const size_t DefaultOutputBufferSizeBytes = 16 * 1024 * 1024; // 16MB + const char* DefaultInputDelimiter = "\n"; + const char* DefaultOutputDelimiter = "\n"; - struct TThreadSyncData { - TMutex BuffersMutex; - TCondVar InputBufferCanReadCond; - TCondVar MainThreadHasWorkCond; - TCondVar OutputBufferCanWriteCond; - }; +public: + TUnboxedValue InputStreamObj; + TString CommandLine; + TUnboxedValue ArgumentsList; + TString InputDelimiter; + TString OutputDelimiter; + size_t InputBufferSizeBytes; + size_t OutputBufferSizeBytes; + size_t ProcessPollLatencyMs; - class TStringListBufferedInputStream: public IInputStream { - public: - TStringListBufferedInputStream(TUnboxedValue rowsStream, const TString& delimiter, size_t bufferSizeBytes, - TThreadSyncData& syncData, TSourcePosition pos) - : RowsStream_(rowsStream) - , Delimiter_(delimiter) - , SyncData_(syncData) - , Pos_(pos) - , DelimiterMatcher_(delimiter) - , DelimiterInput_(delimiter) - , Buffer_(bufferSizeBytes) - , CurReadMode_(ReadMode::Start) - { - } + TStreamingParams() + : InputDelimiter(DefaultInputDelimiter) + , OutputDelimiter(DefaultOutputDelimiter) + , InputBufferSizeBytes(DefaultInputBufferSizeBytes) + , OutputBufferSizeBytes(DefaultOutputBufferSizeBytes) + , ProcessPollLatencyMs(DefaultProcessPollLatencyMs) + { + } +}; - TStringListBufferedInputStream(const TStringListBufferedInputStream&) = delete; - TStringListBufferedInputStream& operator=(const TStringListBufferedInputStream&) = delete; +struct TThreadSyncData { + TMutex BuffersMutex; + TCondVar InputBufferCanReadCond; + TCondVar MainThreadHasWorkCond; + TCondVar OutputBufferCanWriteCond; +}; - TCyclicRWBuffer& GetBuffer() { - return Buffer_; - } +class TStringListBufferedInputStream: public IInputStream { +public: + TStringListBufferedInputStream(TUnboxedValue rowsStream, const TString& delimiter, size_t bufferSizeBytes, + TThreadSyncData& syncData, TSourcePosition pos) + : RowsStream_(rowsStream) + , Delimiter_(delimiter) + , SyncData_(syncData) + , Pos_(pos) + , DelimiterMatcher_(delimiter) + , DelimiterInput_(delimiter) + , Buffer_(bufferSizeBytes) + , CurReadMode_(ReadMode::Start) + { + } - // Fetch input from upstream list iterator to the buffer. - // Called from Main thread. - EFetchStatus FetchInput() { - with_lock (SyncData_.BuffersMutex) { - Y_DEBUG_ABORT_UNLESS(!Buffer_.HasData()); - Y_DEBUG_ABORT_UNLESS(Buffer_.CanWrite()); + TStringListBufferedInputStream(const TStringListBufferedInputStream&) = delete; + TStringListBufferedInputStream& operator=(const TStringListBufferedInputStream&) = delete; - bool receivedYield = false; + TCyclicRWBuffer& GetBuffer() { + return Buffer_; + } - while (Buffer_.CanWrite() && CurReadMode_ != ReadMode::Done && !receivedYield) { - switch (CurReadMode_) { - case ReadMode::Start: { - auto status = ReadNextString(); - if (status == EFetchStatus::Yield) { - receivedYield = true; - break; - } + // Fetch input from upstream list iterator to the buffer. + // Called from Main thread. + EFetchStatus FetchInput() { + with_lock (SyncData_.BuffersMutex) { + Y_DEBUG_ABORT_UNLESS(!Buffer_.HasData()); + Y_DEBUG_ABORT_UNLESS(Buffer_.CanWrite()); - CurReadMode_ = (status == EFetchStatus::Ok) - ? ReadMode::String - : ReadMode::Done; + bool receivedYield = false; + while (Buffer_.CanWrite() && CurReadMode_ != ReadMode::Done && !receivedYield) { + switch (CurReadMode_) { + case ReadMode::Start: { + auto status = ReadNextString(); + if (status == EFetchStatus::Yield) { + receivedYield = true; break; } - case ReadMode::String: - if (CurStringInput_.Exhausted()) { - DelimiterInput_.Reset(Delimiter_.data(), Delimiter_.size()); - CurReadMode_ = ReadMode::Delimiter; - break; - } + CurReadMode_ = (status == EFetchStatus::Ok) + ? ReadMode::String + : ReadMode::Done; + + break; + } - Buffer_.Write(CurStringInput_); + case ReadMode::String: + if (CurStringInput_.Exhausted()) { + DelimiterInput_.Reset(Delimiter_.data(), Delimiter_.size()); + CurReadMode_ = ReadMode::Delimiter; break; + } - case ReadMode::Delimiter: - if (DelimiterInput_.Exhausted()) { - CurReadMode_ = ReadMode::Start; - break; - } + Buffer_.Write(CurStringInput_); + break; - Buffer_.Write(DelimiterInput_); + case ReadMode::Delimiter: + if (DelimiterInput_.Exhausted()) { + CurReadMode_ = ReadMode::Start; break; + } - default: - break; - } - } + Buffer_.Write(DelimiterInput_); + break; - if (CurReadMode_ == ReadMode::Done) { - Buffer_.Finish(); + default: + break; } - - SyncData_.InputBufferCanReadCond.Signal(); - return receivedYield ? EFetchStatus::Yield : EFetchStatus::Ok; } - } - private: - // Read data to pass into the child process input pipe. - // Called from Communicate thread. - size_t DoRead(void* buf, size_t len) override { - try { - with_lock (SyncData_.BuffersMutex) { - while (!Buffer_.HasData() && !Buffer_.IsFinished()) { - SyncData_.MainThreadHasWorkCond.Signal(); - SyncData_.InputBufferCanReadCond.WaitI(SyncData_.BuffersMutex); - } + if (CurReadMode_ == ReadMode::Done) { + Buffer_.Finish(); + } - if (!Buffer_.HasData()) { - Y_DEBUG_ABORT_UNLESS(Buffer_.IsFinished()); - return 0; - } + SyncData_.InputBufferCanReadCond.Signal(); + return receivedYield ? EFetchStatus::Yield : EFetchStatus::Ok; + } + } - const char* dataPtr; - size_t dataLen; - Buffer_.GetData(dataPtr, dataLen); +private: + // Read data to pass into the child process input pipe. + // Called from Communicate thread. + size_t DoRead(void* buf, size_t len) override { + try { + with_lock (SyncData_.BuffersMutex) { + while (!Buffer_.HasData() && !Buffer_.IsFinished()) { + SyncData_.MainThreadHasWorkCond.Signal(); + SyncData_.InputBufferCanReadCond.WaitI(SyncData_.BuffersMutex); + } - size_t bytesRead = std::min(dataLen, len); - Y_DEBUG_ABORT_UNLESS(bytesRead > 0); - memcpy(buf, dataPtr, bytesRead); - Buffer_.CommitRead(bytesRead); - return bytesRead; + if (!Buffer_.HasData()) { + Y_DEBUG_ABORT_UNLESS(Buffer_.IsFinished()); + return 0; } - ythrow yexception(); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - } + const char* dataPtr; + size_t dataLen; + Buffer_.GetData(dataPtr, dataLen); - EFetchStatus ReadNextString() { - TUnboxedValue item; - EFetchStatus status = RowsStream_.Fetch(item); - switch (status) { - case EFetchStatus::Yield: - case EFetchStatus::Finish: - return status; - default: - break; + size_t bytesRead = std::min(dataLen, len); + Y_DEBUG_ABORT_UNLESS(bytesRead > 0); + memcpy(buf, dataPtr, bytesRead); + Buffer_.CommitRead(bytesRead); + return bytesRead; } - CurString_ = item.GetElement(0); - CurStringInput_.Reset(CurString_.AsStringRef().Data(), CurString_.AsStringRef().Size()); + ythrow yexception(); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + } - // Check that input string doesn't contain delimiters - const char* match; - Y_UNUSED(match); - if (DelimiterMatcher_.SubStr( - CurString_.AsStringRef().Data(), - CurString_.AsStringRef().Data() + CurString_.AsStringRef().Size(), - match)) - { - ythrow yexception() << "Delimiter found in input string."; - } + EFetchStatus ReadNextString() { + TUnboxedValue item; + EFetchStatus status = RowsStream_.Fetch(item); + switch (status) { + case EFetchStatus::Yield: + case EFetchStatus::Finish: + return status; + default: + break; + } - return EFetchStatus::Ok; + CurString_ = item.GetElement(0); + CurStringInput_.Reset(CurString_.AsStringRef().Data(), CurString_.AsStringRef().Size()); + + // Check that input string doesn't contain delimiters + const char* match; + Y_UNUSED(match); + if (DelimiterMatcher_.SubStr( + CurString_.AsStringRef().Data(), + CurString_.AsStringRef().Data() + CurString_.AsStringRef().Size(), + match)) + { + ythrow yexception() << "Delimiter found in input string."; } - private: - enum class ReadMode { - Start, - String, - Delimiter, - Done - }; + return EFetchStatus::Ok; + } - TUnboxedValue RowsStream_; - TString Delimiter_; - TThreadSyncData& SyncData_; - TSourcePosition Pos_; +private: + enum class ReadMode { + Start, + String, + Delimiter, + Done + }; - TKMPMatcher DelimiterMatcher_; - TUnboxedValue CurString_; - TMemoryInput CurStringInput_; - TMemoryInput DelimiterInput_; + TUnboxedValue RowsStream_; + TString Delimiter_; + TThreadSyncData& SyncData_; + TSourcePosition Pos_; - TCyclicRWBuffer Buffer_; + TKMPMatcher DelimiterMatcher_; + TUnboxedValue CurString_; + TMemoryInput CurStringInput_; + TMemoryInput DelimiterInput_; - ReadMode CurReadMode_; - }; + TCyclicRWBuffer Buffer_; - class TStringListBufferedOutputStream: public IOutputStream { - public: - TStringListBufferedOutputStream(const TString& delimiter, size_t stringBufferSizeBytes, - TStringListBufferedInputStream& inputStream, TThreadSyncData& syncData) - : Delimiter_(delimiter) - , InputStream_(inputStream) - , SyncData_(syncData) - , HasDelimiterMatch_(false) - , DelimiterMatcherCallback_(HasDelimiterMatch_) - , DelimiterMatcher_(delimiter.data(), delimiter.data() + delimiter.size(), &DelimiterMatcherCallback_) - , Buffer_(stringBufferSizeBytes) - { - } + ReadMode CurReadMode_; +}; - TStringListBufferedOutputStream(const TStringListBufferedOutputStream&) = delete; - TStringListBufferedOutputStream& operator=(const TStringListBufferedOutputStream&) = delete; +class TStringListBufferedOutputStream: public IOutputStream { +public: + TStringListBufferedOutputStream(const TString& delimiter, size_t stringBufferSizeBytes, + TStringListBufferedInputStream& inputStream, TThreadSyncData& syncData) + : Delimiter_(delimiter) + , InputStream_(inputStream) + , SyncData_(syncData) + , HasDelimiterMatch_(false) + , DelimiterMatcherCallback_(HasDelimiterMatch_) + , DelimiterMatcher_(delimiter.data(), delimiter.data() + delimiter.size(), &DelimiterMatcherCallback_) + , Buffer_(stringBufferSizeBytes) + { + } - // Get string record from buffer. - // Called from Main thread. - EFetchStatus FetchNextString(TString& str) { - while (!HasDelimiterMatch_) { - with_lock (SyncData_.BuffersMutex) { - bool inputHasData; - bool bufferNeedsData; + TStringListBufferedOutputStream(const TStringListBufferedOutputStream&) = delete; + TStringListBufferedOutputStream& operator=(const TStringListBufferedOutputStream&) = delete; - do { - inputHasData = InputStream_.GetBuffer().HasData() || InputStream_.GetBuffer().IsFinished(); - bufferNeedsData = !Buffer_.HasData() && !Buffer_.IsFinished(); + // Get string record from buffer. + // Called from Main thread. + EFetchStatus FetchNextString(TString& str) { + while (!HasDelimiterMatch_) { + with_lock (SyncData_.BuffersMutex) { + bool inputHasData; + bool bufferNeedsData; - if (inputHasData && bufferNeedsData) { - SyncData_.MainThreadHasWorkCond.WaitI(SyncData_.BuffersMutex); - } - } while (inputHasData && bufferNeedsData); + do { + inputHasData = InputStream_.GetBuffer().HasData() || InputStream_.GetBuffer().IsFinished(); + bufferNeedsData = !Buffer_.HasData() && !Buffer_.IsFinished(); - if (!inputHasData) { - auto status = InputStream_.FetchInput(); - if (status == EFetchStatus::Yield) { - return EFetchStatus::Yield; - } + if (inputHasData && bufferNeedsData) { + SyncData_.MainThreadHasWorkCond.WaitI(SyncData_.BuffersMutex); } + } while (inputHasData && bufferNeedsData); - if (bufferNeedsData) { - continue; + if (!inputHasData) { + auto status = InputStream_.FetchInput(); + if (status == EFetchStatus::Yield) { + return EFetchStatus::Yield; } + } - if (!Buffer_.HasData()) { - Y_DEBUG_ABORT_UNLESS(Buffer_.IsFinished()); - str = TString(TStringBuf(CurrentString_.Data(), CurrentString_.Size())); - CurrentString_.Clear(); - return str.empty() ? EFetchStatus::Finish : EFetchStatus::Ok; - } + if (bufferNeedsData) { + continue; + } - const char* data; - size_t size; - Buffer_.GetData(data, size); + if (!Buffer_.HasData()) { + Y_DEBUG_ABORT_UNLESS(Buffer_.IsFinished()); + str = TString(TStringBuf(CurrentString_.Data(), CurrentString_.Size())); + CurrentString_.Clear(); + return str.empty() ? EFetchStatus::Finish : EFetchStatus::Ok; + } - size_t read = 0; - while (!HasDelimiterMatch_ && read < size) { - DelimiterMatcher_.Push(data[read]); - ++read; - } + const char* data; + size_t size; + Buffer_.GetData(data, size); - Y_DEBUG_ABORT_UNLESS(read > 0); - CurrentString_.Append(data, read); - bool signalCanWrite = !Buffer_.CanWrite(); - Buffer_.CommitRead(read); + size_t read = 0; + while (!HasDelimiterMatch_ && read < size) { + DelimiterMatcher_.Push(data[read]); + ++read; + } - if (signalCanWrite) { - SyncData_.OutputBufferCanWriteCond.Signal(); - } + Y_DEBUG_ABORT_UNLESS(read > 0); + CurrentString_.Append(data, read); + bool signalCanWrite = !Buffer_.CanWrite(); + Buffer_.CommitRead(read); + + if (signalCanWrite) { + SyncData_.OutputBufferCanWriteCond.Signal(); } } + } - Y_DEBUG_ABORT_UNLESS(CurrentString_.Size() >= Delimiter_.size()); - str = TString(TStringBuf(CurrentString_.Data(), CurrentString_.Size() - Delimiter_.size())); - CurrentString_.Clear(); - HasDelimiterMatch_ = false; + Y_DEBUG_ABORT_UNLESS(CurrentString_.Size() >= Delimiter_.size()); + str = TString(TStringBuf(CurrentString_.Data(), CurrentString_.Size() - Delimiter_.size())); + CurrentString_.Clear(); + HasDelimiterMatch_ = false; - return EFetchStatus::Ok; - } + return EFetchStatus::Ok; + } - TCyclicRWBuffer& GetBuffer() { - return Buffer_; - } + TCyclicRWBuffer& GetBuffer() { + return Buffer_; + } - private: - // Write data from child process output to buffer. - // Called from Communicate thread. - void DoWrite(const void* buf, size_t len) override { - const char* curStrPos = reinterpret_cast<const char*>(buf); - size_t curStrLen = len; +private: + // Write data from child process output to buffer. + // Called from Communicate thread. + void DoWrite(const void* buf, size_t len) override { + const char* curStrPos = reinterpret_cast<const char*>(buf); + size_t curStrLen = len; - while (curStrLen > 0) { - with_lock (SyncData_.BuffersMutex) { - while (!Buffer_.CanWrite() && !Buffer_.IsFinished()) { - SyncData_.OutputBufferCanWriteCond.WaitI(SyncData_.BuffersMutex); - } + while (curStrLen > 0) { + with_lock (SyncData_.BuffersMutex) { + while (!Buffer_.CanWrite() && !Buffer_.IsFinished()) { + SyncData_.OutputBufferCanWriteCond.WaitI(SyncData_.BuffersMutex); + } - if (Buffer_.IsFinished()) { - return; - } + if (Buffer_.IsFinished()) { + return; + } - bool signalCanRead = !Buffer_.HasData(); - Buffer_.Write(curStrPos, curStrLen); + bool signalCanRead = !Buffer_.HasData(); + Buffer_.Write(curStrPos, curStrLen); - if (signalCanRead) { - SyncData_.MainThreadHasWorkCond.Signal(); - } + if (signalCanRead) { + SyncData_.MainThreadHasWorkCond.Signal(); } } } + } - void DoFinish() override { - IOutputStream::DoFinish(); + void DoFinish() override { + IOutputStream::DoFinish(); - with_lock (SyncData_.BuffersMutex) { - Buffer_.Finish(); - SyncData_.MainThreadHasWorkCond.Signal(); - } + with_lock (SyncData_.BuffersMutex) { + Buffer_.Finish(); + SyncData_.MainThreadHasWorkCond.Signal(); } + } - private: - class MatcherCallback: public TKMPStreamMatcher<char>::ICallback { - public: - MatcherCallback(bool& hasMatch) - : HasMatch_(hasMatch) - { - } - - void OnMatch(const char* begin, const char* end) override { - Y_UNUSED(begin); - Y_UNUSED(end); +private: + class MatcherCallback: public TKMPStreamMatcher<char>::ICallback { + public: + MatcherCallback(bool& hasMatch) + : HasMatch_(hasMatch) + { + } - HasMatch_ = true; - } + void OnMatch(const char* begin, const char* end) override { + Y_UNUSED(begin); + Y_UNUSED(end); - private: - bool& HasMatch_; - }; + HasMatch_ = true; + } private: - TString Delimiter_; - TStringListBufferedInputStream& InputStream_; - TThreadSyncData& SyncData_; + bool& HasMatch_; + }; - bool HasDelimiterMatch_; - MatcherCallback DelimiterMatcherCallback_; - TKMPStreamMatcher<char> DelimiterMatcher_; +private: + TString Delimiter_; + TStringListBufferedInputStream& InputStream_; + TThreadSyncData& SyncData_; - TBuffer CurrentString_; + bool HasDelimiterMatch_; + MatcherCallback DelimiterMatcherCallback_; + TKMPStreamMatcher<char> DelimiterMatcher_; - TCyclicRWBuffer Buffer_; - }; + TBuffer CurrentString_; - class TStreamingOutputListIterator { - public: - TStreamingOutputListIterator(const TStreamingParams& params, const IValueBuilder* valueBuilder, TSourcePosition pos) - : StreamingParams_(params) - , ValueBuilder_(valueBuilder) - , Pos_(pos) - { - } + TCyclicRWBuffer Buffer_; +}; - TStreamingOutputListIterator(const TStreamingOutputListIterator&) = delete; - TStreamingOutputListIterator& operator=(const TStreamingOutputListIterator&) = delete; +class TStreamingOutputListIterator { +public: + TStreamingOutputListIterator(const TStreamingParams& params, const IValueBuilder* valueBuilder, TSourcePosition pos) + : StreamingParams_(params) + , ValueBuilder_(valueBuilder) + , Pos_(pos) + { + } - ~TStreamingOutputListIterator() { - if (ShellCommand_) { - Y_DEBUG_ABORT_UNLESS(InputStream_ && OutputStream_); + TStreamingOutputListIterator(const TStreamingOutputListIterator&) = delete; + TStreamingOutputListIterator& operator=(const TStreamingOutputListIterator&) = delete; - try { - ShellCommand_->Terminate(); - } catch (const std::exception& e) { - Cerr << CurrentExceptionMessage(); - } + ~TStreamingOutputListIterator() { + if (ShellCommand_) { + Y_DEBUG_ABORT_UNLESS(InputStream_ && OutputStream_); - // Let Communicate thread finish. - with_lock (ThreadSyncData_.BuffersMutex) { - InputStream_->GetBuffer().Finish(); - OutputStream_->GetBuffer().Finish(); - ThreadSyncData_.InputBufferCanReadCond.Signal(); - ThreadSyncData_.OutputBufferCanWriteCond.Signal(); - } + try { + ShellCommand_->Terminate(); + } catch (const std::exception& e) { + Cerr << CurrentExceptionMessage(); + } - ShellCommand_->Wait(); + // Let Communicate thread finish. + with_lock (ThreadSyncData_.BuffersMutex) { + InputStream_->GetBuffer().Finish(); + OutputStream_->GetBuffer().Finish(); + ThreadSyncData_.InputBufferCanReadCond.Signal(); + ThreadSyncData_.OutputBufferCanWriteCond.Signal(); } - } - EFetchStatus Fetch(TUnboxedValue& result) { - try { - EFetchStatus status = EFetchStatus::Ok; + ShellCommand_->Wait(); + } + } - if (!ProcessStarted()) { - StartProcess(); + EFetchStatus Fetch(TUnboxedValue& result) { + try { + EFetchStatus status = EFetchStatus::Ok; - // Don't try to fetch data if there was a problem starting the process, - // this causes infinite wait on Windows system due to incorrect ShellCommand behavior. - if (ShellCommand_->GetStatus() != TShellCommand::SHELL_RUNNING && ShellCommand_->GetStatus() != TShellCommand::SHELL_FINISHED) { - status = EFetchStatus::Finish; - } - } + if (!ProcessStarted()) { + StartProcess(); - if (status == EFetchStatus::Ok) { - status = OutputStream_->FetchNextString(CurrentRecord_); + // Don't try to fetch data if there was a problem starting the process, + // this causes infinite wait on Windows system due to incorrect ShellCommand behavior. + if (ShellCommand_->GetStatus() != TShellCommand::SHELL_RUNNING && ShellCommand_->GetStatus() != TShellCommand::SHELL_FINISHED) { + status = EFetchStatus::Finish; } + } - if (status == EFetchStatus::Finish) { - switch (ShellCommand_->GetStatus()) { - case TShellCommand::SHELL_FINISHED: - break; - case TShellCommand::SHELL_INTERNAL_ERROR: - ythrow yexception() << "Internal error running process: " << ShellCommand_->GetInternalError(); - break; - case TShellCommand::SHELL_ERROR: - ythrow yexception() << "Error running user process: " << ShellCommand_->GetError(); - break; - default: - ythrow yexception() << "Unexpected shell command status: " << (int)ShellCommand_->GetStatus(); - } - return EFetchStatus::Finish; - } + if (status == EFetchStatus::Ok) { + status = OutputStream_->FetchNextString(CurrentRecord_); + } - if (status == EFetchStatus::Ok) { - TUnboxedValue* items = nullptr; - result = ValueBuilder_->NewArray(1, items); - *items = ValueBuilder_->NewString(TStringRef(CurrentRecord_.data(), CurrentRecord_.size())); + if (status == EFetchStatus::Finish) { + switch (ShellCommand_->GetStatus()) { + case TShellCommand::SHELL_FINISHED: + break; + case TShellCommand::SHELL_INTERNAL_ERROR: + ythrow yexception() << "Internal error running process: " << ShellCommand_->GetInternalError(); + break; + case TShellCommand::SHELL_ERROR: + ythrow yexception() << "Error running user process: " << ShellCommand_->GetError(); + break; + default: + ythrow yexception() << "Unexpected shell command status: " << (int)ShellCommand_->GetStatus(); } + return EFetchStatus::Finish; + } - return status; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + if (status == EFetchStatus::Ok) { + TUnboxedValue* items = nullptr; + result = ValueBuilder_->NewArray(1, items); + *items = ValueBuilder_->NewString(TStringRef(CurrentRecord_.data(), CurrentRecord_.size())); } - } - private: - void StartProcess() { - InputStream_.Reset(new TStringListBufferedInputStream( - StreamingParams_.InputStreamObj, StreamingParams_.InputDelimiter, - StreamingParams_.InputBufferSizeBytes, ThreadSyncData_, Pos_)); + return status; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + } - OutputStream_.Reset(new TStringListBufferedOutputStream( - StreamingParams_.OutputDelimiter, StreamingParams_.OutputBufferSizeBytes, *InputStream_, - ThreadSyncData_)); +private: + void StartProcess() { + InputStream_.Reset(new TStringListBufferedInputStream( + StreamingParams_.InputStreamObj, StreamingParams_.InputDelimiter, + StreamingParams_.InputBufferSizeBytes, ThreadSyncData_, Pos_)); - TShellCommandOptions opt; - opt.SetAsync(true).SetUseShell(false).SetLatency(StreamingParams_.ProcessPollLatencyMs).SetInputStream(InputStream_.Get()).SetOutputStream(OutputStream_.Get()).SetCloseStreams(true).SetCloseAllFdsOnExec(true); + OutputStream_.Reset(new TStringListBufferedOutputStream( + StreamingParams_.OutputDelimiter, StreamingParams_.OutputBufferSizeBytes, *InputStream_, + ThreadSyncData_)); - TList<TString> commandArguments; - auto argumetsIterator = StreamingParams_.ArgumentsList.GetListIterator(); - for (TUnboxedValue item; argumetsIterator.Next(item);) { - commandArguments.emplace_back(TStringBuf(item.AsStringRef())); - } + TShellCommandOptions opt; + opt.SetAsync(true).SetUseShell(false).SetLatency(StreamingParams_.ProcessPollLatencyMs).SetInputStream(InputStream_.Get()).SetOutputStream(OutputStream_.Get()).SetCloseStreams(true).SetCloseAllFdsOnExec(true); - ShellCommand_.Reset(new TShellCommand(StreamingParams_.CommandLine, commandArguments, opt)); - ShellCommand_->Run(); + TList<TString> commandArguments; + auto argumetsIterator = StreamingParams_.ArgumentsList.GetListIterator(); + for (TUnboxedValue item; argumetsIterator.Next(item);) { + commandArguments.emplace_back(TStringBuf(item.AsStringRef())); } - bool ProcessStarted() const { - return !!ShellCommand_; - } + ShellCommand_.Reset(new TShellCommand(StreamingParams_.CommandLine, commandArguments, opt)); + ShellCommand_->Run(); + } - private: - TStreamingParams StreamingParams_; - const IValueBuilder* ValueBuilder_; - TSourcePosition Pos_; + bool ProcessStarted() const { + return !!ShellCommand_; + } - TThreadSyncData ThreadSyncData_; +private: + TStreamingParams StreamingParams_; + const IValueBuilder* ValueBuilder_; + TSourcePosition Pos_; - THolder<TShellCommand> ShellCommand_; - THolder<TStringListBufferedInputStream> InputStream_; - THolder<TStringListBufferedOutputStream> OutputStream_; + TThreadSyncData ThreadSyncData_; - TString CurrentRecord_; - }; + THolder<TShellCommand> ShellCommand_; + THolder<TStringListBufferedInputStream> InputStream_; + THolder<TStringListBufferedOutputStream> OutputStream_; - class TStreamingOutput: public TBoxedValue { - public: - TStreamingOutput(const TStreamingParams& params, const IValueBuilder* valueBuilder, TSourcePosition pos) - : StreamingParams_(params) - , ValueBuilder_(valueBuilder) - , Pos_(pos) - { - } + TString CurrentRecord_; +}; - TStreamingOutput(const TStreamingOutput&) = delete; - TStreamingOutput& operator=(const TStreamingOutput&) = delete; +class TStreamingOutput: public TBoxedValue { +public: + TStreamingOutput(const TStreamingParams& params, const IValueBuilder* valueBuilder, TSourcePosition pos) + : StreamingParams_(params) + , ValueBuilder_(valueBuilder) + , Pos_(pos) + { + } - private: - EFetchStatus Fetch(TUnboxedValue& result) override { - if (IsFinished_) { - return EFetchStatus::Finish; - } + TStreamingOutput(const TStreamingOutput&) = delete; + TStreamingOutput& operator=(const TStreamingOutput&) = delete; - if (!Iterator_) { - Iterator_.Reset(new TStreamingOutputListIterator(StreamingParams_, ValueBuilder_, Pos_)); - } +private: + EFetchStatus Fetch(TUnboxedValue& result) override { + if (IsFinished_) { + return EFetchStatus::Finish; + } - auto ret = Iterator_->Fetch(result); + if (!Iterator_) { + Iterator_.Reset(new TStreamingOutputListIterator(StreamingParams_, ValueBuilder_, Pos_)); + } - if (ret == EFetchStatus::Finish) { - IsFinished_ = true; - Iterator_.Reset(); - } + auto ret = Iterator_->Fetch(result); - return ret; + if (ret == EFetchStatus::Finish) { + IsFinished_ = true; + Iterator_.Reset(); } - TStreamingParams StreamingParams_; - const IValueBuilder* ValueBuilder_; - TSourcePosition Pos_; - bool IsFinished_ = false; - THolder<TStreamingOutputListIterator> Iterator_; - }; + return ret; + } - class TStreamingScriptOutput: public TStreamingOutput { - public: - TStreamingScriptOutput(const TStreamingParams& params, const IValueBuilder* valueBuilder, - TSourcePosition pos, const TString& script, const TString& scriptFilename) - : TStreamingOutput(params, valueBuilder, pos) - , ScriptFileHandle_(scriptFilename) - { - auto scriptStripped = StripBeforeShebang(script); - ScriptFileHandle_.Write(scriptStripped.data(), scriptStripped.size()); - ScriptFileHandle_.Close(); + TStreamingParams StreamingParams_; + const IValueBuilder* ValueBuilder_; + TSourcePosition Pos_; + bool IsFinished_ = false; + THolder<TStreamingOutputListIterator> Iterator_; +}; - if (Chmod(ScriptFileHandle_.Name().c_str(), MODE0755) != 0) { - ythrow yexception() << "Chmod failed for script file:" << ScriptFileHandle_.Name() - << " with error: " << LastSystemErrorText(); - } +class TStreamingScriptOutput: public TStreamingOutput { +public: + TStreamingScriptOutput(const TStreamingParams& params, const IValueBuilder* valueBuilder, + TSourcePosition pos, const TString& script, const TString& scriptFilename) + : TStreamingOutput(params, valueBuilder, pos) + , ScriptFileHandle_(scriptFilename) + { + auto scriptStripped = StripBeforeShebang(script); + ScriptFileHandle_.Write(scriptStripped.data(), scriptStripped.size()); + ScriptFileHandle_.Close(); + + if (Chmod(ScriptFileHandle_.Name().c_str(), MODE0755) != 0) { + ythrow yexception() << "Chmod failed for script file:" << ScriptFileHandle_.Name() + << " with error: " << LastSystemErrorText(); } + } - private: - static TString StripBeforeShebang(const TString& script) { - auto shebangIndex = script.find("#!"); - if (shebangIndex != TString::npos) { - auto scriptStripped = StripStringLeft(script); +private: + static TString StripBeforeShebang(const TString& script) { + auto shebangIndex = script.find("#!"); + if (shebangIndex != TString::npos) { + auto scriptStripped = StripStringLeft(script); - if (scriptStripped.size() == script.size() - shebangIndex) { - return scriptStripped; - } + if (scriptStripped.size() == script.size() - shebangIndex) { + return scriptStripped; } - - return script; } - TTempFileHandle ScriptFileHandle_; - }; + return script; + } - class TStreamingProcess: public TBoxedValue { - public: - TStreamingProcess(TSourcePosition pos) - : Pos_(pos) - {} + TTempFileHandle ScriptFileHandle_; +}; - private: - TUnboxedValue Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - auto inputListArg = args[0]; - auto commandLineArg = args[1].AsStringRef(); - auto argumentsArg = args[2]; - auto inputDelimiterArg = args[3]; - auto outputDelimiterArg = args[4]; +class TStreamingProcess: public TBoxedValue { +public: + TStreamingProcess(TSourcePosition pos) + : Pos_(pos) + { + } - Y_DEBUG_ABORT_UNLESS(inputListArg.IsBoxed()); +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + auto inputListArg = args[0]; + auto commandLineArg = args[1].AsStringRef(); + auto argumentsArg = args[2]; + auto inputDelimiterArg = args[3]; + auto outputDelimiterArg = args[4]; - TStreamingParams params; - params.InputStreamObj = TUnboxedValuePod(inputListArg); - params.CommandLine = TString(TStringBuf(commandLineArg)); - params.ArgumentsList = !argumentsArg - ? valueBuilder->NewEmptyList() - : TUnboxedValue(argumentsArg.GetOptionalValue()); + Y_DEBUG_ABORT_UNLESS(inputListArg.IsBoxed()); - if (inputDelimiterArg) { - params.InputDelimiter = TString(TStringBuf(inputDelimiterArg.AsStringRef())); - } - if (outputDelimiterArg) { - params.OutputDelimiter = TString(TStringBuf(outputDelimiterArg.AsStringRef())); - } + TStreamingParams params; + params.InputStreamObj = TUnboxedValuePod(inputListArg); + params.CommandLine = TString(TStringBuf(commandLineArg)); + params.ArgumentsList = !argumentsArg + ? valueBuilder->NewEmptyList() + : TUnboxedValue(argumentsArg.GetOptionalValue()); - return TUnboxedValuePod(new TStreamingOutput(params, valueBuilder, Pos_)); + if (inputDelimiterArg) { + params.InputDelimiter = TString(TStringBuf(inputDelimiterArg.AsStringRef())); } - - public: - static TStringRef Name() { - static auto name = TStringRef::Of("Process"); - return name; + if (outputDelimiterArg) { + params.OutputDelimiter = TString(TStringBuf(outputDelimiterArg.AsStringRef())); } - private: - TSourcePosition Pos_; - }; + return TUnboxedValuePod(new TStreamingOutput(params, valueBuilder, Pos_)); + } - class TStreamingProcessInline: public TBoxedValue { - public: - TStreamingProcessInline(TSourcePosition pos) - : Pos_(pos) - {} +public: + static TStringRef Name() { + static auto name = TStringRef::Of("Process"); + return name; + } - private: - TUnboxedValue Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - auto inputListArg = args[0]; - auto scriptArg = args[1].AsStringRef(); - auto argumentsArg = args[2]; - auto inputDelimiterArg = args[3]; - auto outputDelimiterArg = args[4]; +private: + TSourcePosition Pos_; +}; - TString script(scriptArg); - TString scriptFilename = MakeTempName("."); +class TStreamingProcessInline: public TBoxedValue { +public: + TStreamingProcessInline(TSourcePosition pos) + : Pos_(pos) + { + } - TStreamingParams params; - params.InputStreamObj = TUnboxedValuePod(inputListArg); - params.CommandLine = scriptFilename; - params.ArgumentsList = !argumentsArg - ? valueBuilder->NewEmptyList() - : TUnboxedValue(argumentsArg.GetOptionalValue()); +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + auto inputListArg = args[0]; + auto scriptArg = args[1].AsStringRef(); + auto argumentsArg = args[2]; + auto inputDelimiterArg = args[3]; + auto outputDelimiterArg = args[4]; - if (inputDelimiterArg) { - params.InputDelimiter = TString(TStringBuf(inputDelimiterArg.AsStringRef())); - } - if (outputDelimiterArg) { - params.OutputDelimiter = TString(TStringBuf(outputDelimiterArg.AsStringRef())); - } + TString script(scriptArg); + TString scriptFilename = MakeTempName("."); - return TUnboxedValuePod(new TStreamingScriptOutput(params, valueBuilder, Pos_, script, scriptFilename)); - } + TStreamingParams params; + params.InputStreamObj = TUnboxedValuePod(inputListArg); + params.CommandLine = scriptFilename; + params.ArgumentsList = !argumentsArg + ? valueBuilder->NewEmptyList() + : TUnboxedValue(argumentsArg.GetOptionalValue()); - public: - static TStringRef Name() { - static auto name = TStringRef::Of("ProcessInline"); - return name; + if (inputDelimiterArg) { + params.InputDelimiter = TString(TStringBuf(inputDelimiterArg.AsStringRef())); + } + if (outputDelimiterArg) { + params.OutputDelimiter = TString(TStringBuf(outputDelimiterArg.AsStringRef())); } - private: - TSourcePosition Pos_; - }; + return TUnboxedValuePod(new TStreamingScriptOutput(params, valueBuilder, Pos_, script, scriptFilename)); + } - class TStreamingModule: public IUdfModule { - public: - TStringRef Name() const { - return TStringRef::Of("Streaming"); - } +public: + static TStringRef Name() { + static auto name = TStringRef::Of("ProcessInline"); + return name; + } - void CleanupOnTerminate() const final { - } +private: + TSourcePosition Pos_; +}; - void GetAllFunctions(IFunctionsSink& sink) const final { - sink.Add(TStreamingProcess::Name()); - sink.Add(TStreamingProcessInline::Name()); - } +class TStreamingModule: public IUdfModule { +public: + TStringRef Name() const { + return TStringRef::Of("Streaming"); + } - void BuildFunctionTypeInfo( - const TStringRef& name, - NUdf::TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const override { - try { - Y_UNUSED(userType); - Y_UNUSED(typeConfig); + void CleanupOnTerminate() const final { + } - bool typesOnly = (flags & TFlags::TypesOnly); + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(TStreamingProcess::Name()); + sink.Add(TStreamingProcessInline::Name()); + } - auto optionalStringType = builder.Optional()->Item<char*>().Build(); - auto rowType = builder.Struct(1)->AddField("Data", TDataType<char*>::Id, nullptr).Build(); - auto rowsType = builder.Stream()->Item(rowType).Build(); - auto stringListType = builder.List()->Item(TDataType<char*>::Id).Build(); - auto optionalStringListType = builder.Optional()->Item(stringListType).Build(); + void BuildFunctionTypeInfo( + const TStringRef& name, + NUdf::TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const override { + try { + Y_UNUSED(userType); + Y_UNUSED(typeConfig); - if (TStreamingProcess::Name() == name) { - builder.Returns(rowsType).Args()->Add(rowsType).Add<char*>().Add(optionalStringListType).Add(optionalStringType).Add(optionalStringType).Done().OptionalArgs(3); + bool typesOnly = (flags & TFlags::TypesOnly); - if (!typesOnly) { - builder.Implementation(new TStreamingProcess(builder.GetSourcePosition())); - } + auto optionalStringType = builder.Optional()->Item<char*>().Build(); + auto rowType = builder.Struct(1)->AddField("Data", TDataType<char*>::Id, nullptr).Build(); + auto rowsType = builder.Stream()->Item(rowType).Build(); + auto stringListType = builder.List()->Item(TDataType<char*>::Id).Build(); + auto optionalStringListType = builder.Optional()->Item(stringListType).Build(); + + if (TStreamingProcess::Name() == name) { + builder.Returns(rowsType).Args()->Add(rowsType).Add<char*>().Add(optionalStringListType).Add(optionalStringType).Add(optionalStringType).Done().OptionalArgs(3); + + if (!typesOnly) { + builder.Implementation(new TStreamingProcess(builder.GetSourcePosition())); } + } - if (TStreamingProcessInline::Name() == name) { - builder.Returns(rowsType).Args()->Add(rowsType).Add<char*>().Add(optionalStringListType).Add(optionalStringType).Add(optionalStringType).Done().OptionalArgs(3); + if (TStreamingProcessInline::Name() == name) { + builder.Returns(rowsType).Args()->Add(rowsType).Add<char*>().Add(optionalStringListType).Add(optionalStringType).Add(optionalStringType).Done().OptionalArgs(3); - if (!typesOnly) { - builder.Implementation(new TStreamingProcessInline(builder.GetSourcePosition())); - } + if (!typesOnly) { + builder.Implementation(new TStreamingProcessInline(builder.GetSourcePosition())); } - } catch (const std::exception& e) { - builder.SetError(CurrentExceptionMessage()); } + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); } - }; + } +}; -} +} // namespace REGISTER_MODULES(TStreamingModule) diff --git a/yql/essentials/udfs/common/streaming/ya.make b/yql/essentials/udfs/common/streaming/ya.make index 9b080a7f86f..320490adcd2 100644 --- a/yql/essentials/udfs/common/streaming/ya.make +++ b/yql/essentials/udfs/common/streaming/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( streaming_udf.cpp ) diff --git a/yql/essentials/udfs/common/string/string_udf.cpp b/yql/essentials/udfs/common/string/string_udf.cpp index b1dbb528cbb..6574bacbeea 100644 --- a/yql/essentials/udfs/common/string/string_udf.cpp +++ b/yql/essentials/udfs/common/string/string_udf.cpp @@ -83,7 +83,7 @@ TString ReverseBits(const TStringRef input) { END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) // 'unsafe' udf is actually strict - it returns null on any exception -#define STRING_UNSAFE_UDF(udfName, function) \ +#define STRING_UNSAFE_UDF(udfName, function) \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, TOptional<char*>(TOptional<char*>)) { \ EMPTY_RESULT_ON_EMPTY_ARG(0); \ const TStringBuf input(args[0].AsStringRef()); \ @@ -96,8 +96,7 @@ TString ReverseBits(const TStringRef input) { } \ \ struct T##udfName##KernelExec \ - : public TUnaryKernelExec<T##udfName##KernelExec> \ - { \ + : public TUnaryKernelExec<T##udfName##KernelExec> { \ template <typename TSink> \ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ if (!arg1) { \ @@ -119,7 +118,7 @@ TString ReverseBits(const TStringRef input) { // NOTE: The functions below are marked as deprecated, so block implementation // is not required for them SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), - builder.SetMaxLangVer(NYql::MakeLangVersion(2025, 1))) { + builder.SetMaxLangVer(NYql::MakeLangVersion(2025, 1))) { EMPTY_RESULT_ON_EMPTY_ARG(0) const TStringBuf input(args[0].AsStringRef()); try { @@ -144,7 +143,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ } -#define STROKA_ASCII_CASE_UDF(udfName, function) \ +#define STROKA_ASCII_CASE_UDF(udfName, function) \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap<char*>)) { \ TString input(args[0].AsStringRef()); \ if (input.function()) { \ @@ -155,8 +154,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ \ struct T##udfName##KernelExec \ - : public TUnaryKernelExec<T##udfName##KernelExec> \ - { \ + : public TUnaryKernelExec<T##udfName##KernelExec> { \ template <typename TSink> \ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ TString input(arg1.AsStringRef()); \ @@ -170,31 +168,29 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) - -#define STROKA_FIND_UDF(udfName, function) \ - SIMPLE_STRICT_UDF(T##udfName, bool(TOptional<char*>, char*)) { \ - Y_UNUSED(valueBuilder); \ - if (args[0]) { \ - const TStringBuf haystack(args[0].AsStringRef()); \ - const TStringBuf needle(args[1].AsStringRef()); \ - return TUnboxedValuePod(haystack.function(needle)); \ - } else { \ - return TUnboxedValuePod(false); \ - } \ +#define STROKA_FIND_UDF(udfName, function) \ + SIMPLE_STRICT_UDF(T##udfName, bool(TOptional<char*>, char*)) { \ + Y_UNUSED(valueBuilder); \ + if (args[0]) { \ + const TStringBuf haystack(args[0].AsStringRef()); \ + const TStringBuf needle(args[1].AsStringRef()); \ + return TUnboxedValuePod(haystack.function(needle)); \ + } else { \ + return TUnboxedValuePod(false); \ + } \ } -#define STRING_TWO_ARGS_UDF_DEPRECATED_2025_02(udfName, function) \ - SIMPLE_STRICT_UDF_OPTIONS(T##udfName, bool(TOptional<char*>, char*), \ - builder.SetMaxLangVer(NYql::MakeLangVersion(2025, 1))) \ - { \ - Y_UNUSED(valueBuilder); \ - if (args[0]) { \ - const TStringBuf haystack(args[0].AsStringRef()); \ - const TStringBuf needle(args[1].AsStringRef()); \ - return TUnboxedValuePod(function(haystack, needle)); \ - } else { \ - return TUnboxedValuePod(false); \ - } \ +#define STRING_TWO_ARGS_UDF_DEPRECATED_2025_02(udfName, function) \ + SIMPLE_STRICT_UDF_OPTIONS(T##udfName, bool(TOptional<char*>, char*), \ + builder.SetMaxLangVer(NYql::MakeLangVersion(2025, 1))) { \ + Y_UNUSED(valueBuilder); \ + if (args[0]) { \ + const TStringBuf haystack(args[0].AsStringRef()); \ + const TStringBuf needle(args[1].AsStringRef()); \ + return TUnboxedValuePod(function(haystack, needle)); \ + } else { \ + return TUnboxedValuePod(false); \ + } \ } #define STRING_ASCII_CMP_IGNORE_CASE_UDF(udfName, function, minVersion) \ @@ -209,12 +205,10 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ \ struct T##udfName##KernelExec \ - : public TBinaryKernelExec<T##udfName##KernelExec> \ - { \ + : public TBinaryKernelExec<T##udfName##KernelExec> { \ template <typename TSink> \ static void Process(const IValueBuilder*, TBlockItem arg1, \ - TBlockItem arg2, const TSink& sink) \ - { \ + TBlockItem arg2, const TSink& sink) { \ if (arg1) { \ const TStringBuf haystack(arg1.AsStringRef()); \ const TStringBuf needle(arg2.AsStringRef()); \ @@ -226,9 +220,8 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), }; \ \ BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(T##udfName, \ - bool(TOptional<char*>, char*), \ - builder.SetMinLangVer(minVersion)) \ - { \ + bool(TOptional<char*>, char*), \ + builder.SetMinLangVer(minVersion)) { \ Y_UNUSED(valueBuilder); \ return udfName##Impl(args); \ } \ @@ -236,8 +229,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) \ \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T_yql_##udfName, \ - bool(TOptional<char*>, char*)) \ - { \ + bool(TOptional<char*>, char*)) { \ Y_UNUSED(valueBuilder); \ return udfName##Impl(args); \ } \ @@ -263,8 +255,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ \ struct T##function##KernelExec \ - : public TUnaryKernelExec<T##function##KernelExec> \ - { \ + : public TUnaryKernelExec<T##function##KernelExec> { \ template <typename TSink> \ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ if (arg1) { \ @@ -285,58 +276,54 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) - - -#define STRING_STREAM_PAD_FORMATTER_UDF(function) \ - BEGIN_SIMPLE_ARROW_UDF_WITH_OPTIONAL_ARGS(T##function, \ - char*(TAutoMap<char*>, ui64, TOptional<char*>), 1) \ - { \ - TStringStream result; \ - const TStringBuf input(args[0].AsStringRef()); \ - char paddingSymbol = ' '; \ - if (args[2]) { \ - TStringBuf filler = args[2].AsStringRef(); \ - if (filler.Size() != 1) { \ - ythrow yexception() << "Not 1 symbol in paddingSymbol"; \ - } \ - paddingSymbol = filler[0]; \ - } \ - const ui64 padLen = args[1].Get<ui64>(); \ - if (padLen > padLim) { \ - ythrow yexception() << "Padding length (" << padLen << ") exceeds maximum: " << padLim; \ - } \ - result << function(input, padLen, paddingSymbol); \ - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ - } \ - \ - struct T##function##KernelExec \ - : public TGenericKernelExec<T##function##KernelExec, 3> \ - { \ - template <typename TSink> \ - static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { \ - TStringStream result; \ - const TStringBuf input(args.GetElement(0).AsStringRef()); \ - char paddingSymbol = ' '; \ - if (args.GetElement(2)) { \ - TStringBuf filler = args.GetElement(2).AsStringRef(); \ - if (filler.Size() != 1) { \ - ythrow yexception() << "Not 1 symbol in paddingSymbol"; \ - } \ - paddingSymbol = filler[0]; \ - } \ - const ui64 padLen = args.GetElement(1).Get<ui64>(); \ - if (padLen > padLim) { \ - ythrow yexception() << "Padding length (" << padLen \ - << ") exceeds maximum: " << padLim; \ - } \ - result << function(input, padLen, paddingSymbol); \ - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ - } \ - }; \ - \ +#define STRING_STREAM_PAD_FORMATTER_UDF(function) \ + BEGIN_SIMPLE_ARROW_UDF_WITH_OPTIONAL_ARGS(T##function, \ + char*(TAutoMap<char*>, ui64, TOptional<char*>), 1) { \ + TStringStream result; \ + const TStringBuf input(args[0].AsStringRef()); \ + char paddingSymbol = ' '; \ + if (args[2]) { \ + TStringBuf filler = args[2].AsStringRef(); \ + if (filler.Size() != 1) { \ + ythrow yexception() << "Not 1 symbol in paddingSymbol"; \ + } \ + paddingSymbol = filler[0]; \ + } \ + const ui64 padLen = args[1].Get<ui64>(); \ + if (padLen > padLim) { \ + ythrow yexception() << "Padding length (" << padLen << ") exceeds maximum: " << padLim; \ + } \ + result << function(input, padLen, paddingSymbol); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##function##KernelExec \ + : public TGenericKernelExec<T##function##KernelExec, 3> { \ + template <typename TSink> \ + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { \ + TStringStream result; \ + const TStringBuf input(args.GetElement(0).AsStringRef()); \ + char paddingSymbol = ' '; \ + if (args.GetElement(2)) { \ + TStringBuf filler = args.GetElement(2).AsStringRef(); \ + if (filler.Size() != 1) { \ + ythrow yexception() << "Not 1 symbol in paddingSymbol"; \ + } \ + paddingSymbol = filler[0]; \ + } \ + const ui64 padLen = args.GetElement(1).Get<ui64>(); \ + if (padLen > padLim) { \ + ythrow yexception() << "Padding length (" << padLen \ + << ") exceeds maximum: " << padLim; \ + } \ + result << function(input, padLen, paddingSymbol); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) -#define STRING_STREAM_NUM_FORMATTER_UDF(function, argType) \ +#define STRING_STREAM_NUM_FORMATTER_UDF(function, argType) \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap<argType>)) { \ TStringStream result; \ result << function(args[0].Get<argType>()); \ @@ -344,8 +331,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ \ struct T##function##KernelExec \ - : public TUnaryKernelExec<T##function##KernelExec> \ - { \ + : public TUnaryKernelExec<T##function##KernelExec> { \ template <typename TSink> \ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ TStringStream result; \ @@ -356,7 +342,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) -#define STRING_STREAM_TEXT_FORMATTER_UDF(function) \ +#define STRING_STREAM_TEXT_FORMATTER_UDF(function) \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap<char*>)) { \ TStringStream result; \ const TStringBuf input(args[0].AsStringRef()); \ @@ -365,8 +351,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ \ struct T##function##KernelExec \ - : public TUnaryKernelExec<T##function##KernelExec> \ - { \ + : public TUnaryKernelExec<T##function##KernelExec> { \ template <typename TSink> \ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ TStringStream result; \ @@ -378,8 +363,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) - -#define STRING_STREAM_HRSZ_FORMATTER_UDF(udfName, hrSize) \ +#define STRING_STREAM_HRSZ_FORMATTER_UDF(udfName, hrSize) \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap<ui64>)) { \ TStringStream result; \ result << HumanReadableSize(args[0].Get<ui64>(), hrSize); \ @@ -387,8 +371,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ \ struct T##udfName##KernelExec \ - : public TUnaryKernelExec<T##udfName##KernelExec> \ - { \ + : public TUnaryKernelExec<T##udfName##KernelExec> { \ template <typename TSink> \ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ TStringStream result; \ @@ -415,11 +398,11 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), XX(ReverseBytes, ReverseBytes, NYql::MakeLangVersion(2025, 2)) \ XX(ReverseBits, ReverseBits, NYql::MakeLangVersion(2025, 2)) -#define STRING_UNSAFE_UDF_MAP(XX) \ - XX(Base32Decode, Base32Decode) \ - XX(Base32StrictDecode, Base32StrictDecode) \ - XX(Base64Decode, Base64Decode) \ - XX(Base64StrictDecode, Base64StrictDecode) \ +#define STRING_UNSAFE_UDF_MAP(XX) \ + XX(Base32Decode, Base32Decode) \ + XX(Base32StrictDecode, Base32StrictDecode) \ + XX(Base64Decode, Base64Decode) \ + XX(Base64StrictDecode, Base64StrictDecode) \ XX(HexDecode, HexDecode) // NOTE: The functions below are marked as deprecated, so block implementation @@ -492,196 +475,212 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), XX(HumanReadableQuantity, SF_QUANTITY) \ XX(HumanReadableBytes, SF_BYTES) +BEGIN_SIMPLE_STRICT_ARROW_UDF(TCollapseText, char*(TAutoMap<char*>, ui64)) { + TString input(args[0].AsStringRef()); + ui64 maxLength = args[1].Get<ui64>(); + CollapseText(input, maxLength); + return valueBuilder->NewString(input); +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TCollapseText, char*(TAutoMap<char*>, ui64)) { - TString input(args[0].AsStringRef()); - ui64 maxLength = args[1].Get<ui64>(); +struct TCollapseTextKernelExec + : public TBinaryKernelExec<TCollapseTextKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + TString input(arg1.AsStringRef()); + ui64 maxLength = arg2.Get<ui64>(); CollapseText(input, maxLength); - return valueBuilder->NewString(input); + return sink(TBlockItem(input)); } +}; - struct TCollapseTextKernelExec - : public TBinaryKernelExec<TCollapseTextKernelExec> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - TString input(arg1.AsStringRef()); - ui64 maxLength = arg2.Get<ui64>(); - CollapseText(input, maxLength); - return sink(TBlockItem(input)); - } - }; - - END_SIMPLE_ARROW_UDF(TCollapseText, TCollapseTextKernelExec::Do); +END_SIMPLE_ARROW_UDF(TCollapseText, TCollapseTextKernelExec::Do); - - BEGIN_SIMPLE_STRICT_ARROW_UDF(TContains, bool(TOptional<char*>, char*)) { - Y_UNUSED(valueBuilder); - if (!args[0]) - return TUnboxedValuePod(false); - - const TStringBuf haystack(args[0].AsStringRef()); - const TStringBuf needle(args[1].AsStringRef()); - return TUnboxedValuePod(haystack.Contains(needle)); +BEGIN_SIMPLE_STRICT_ARROW_UDF(TContains, bool(TOptional<char*>, char*)) { + Y_UNUSED(valueBuilder); + if (!args[0]) { + return TUnboxedValuePod(false); } - struct TContainsKernelExec : public TBinaryKernelExec<TContainsKernelExec> { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - if (!arg1) - return sink(TBlockItem(false)); + const TStringBuf haystack(args[0].AsStringRef()); + const TStringBuf needle(args[1].AsStringRef()); + return TUnboxedValuePod(haystack.Contains(needle)); +} - const TStringBuf haystack(arg1.AsStringRef()); - const TStringBuf needle(arg2.AsStringRef()); - sink(TBlockItem(haystack.Contains(needle))); +struct TContainsKernelExec: public TBinaryKernelExec<TContainsKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + if (!arg1) { + return sink(TBlockItem(false)); } - }; - - END_SIMPLE_ARROW_UDF(TContains, TContainsKernelExec::Do); - static bool IgnoreCaseComparator(char a, char b) { - return AsciiToUpper(a) == AsciiToUpper(b); + const TStringBuf haystack(arg1.AsStringRef()); + const TStringBuf needle(arg2.AsStringRef()); + sink(TBlockItem(haystack.Contains(needle))); } +}; - struct TAsciiContainsIgnoreCaseKernelExec - : public TBinaryKernelExec<TAsciiContainsIgnoreCaseKernelExec> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - if (!arg1) { - return sink(TBlockItem(arg2 ? false : true)); - } +END_SIMPLE_ARROW_UDF(TContains, TContainsKernelExec::Do); - const TStringBuf haystack(arg1.AsStringRef()); - const TStringBuf needle(arg2.AsStringRef()); - if (haystack.empty()) { - return sink(TBlockItem((needle.empty()))); - } - const auto found = std::search(haystack.cbegin(), haystack.cend(), - needle.cbegin(), needle.cend(), IgnoreCaseComparator); - sink(TBlockItem(found != haystack.cend())); - } - }; +static bool IgnoreCaseComparator(char a, char b) { + return AsciiToUpper(a) == AsciiToUpper(b); +} - TUnboxedValuePod AsciiContainsIgnoreCaseImpl(const TUnboxedValuePod* args) { - if (!args[0]) { - return TUnboxedValuePod(false); +struct TAsciiContainsIgnoreCaseKernelExec + : public TBinaryKernelExec<TAsciiContainsIgnoreCaseKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + if (!arg1) { + return sink(TBlockItem(arg2 ? false : true)); } - const TStringBuf haystack(args[0].AsStringRef()); - const TStringBuf needle(args[1].AsStringRef()); + const TStringBuf haystack(arg1.AsStringRef()); + const TStringBuf needle(arg2.AsStringRef()); if (haystack.empty()) { - return TUnboxedValuePod(needle.empty()); + return sink(TBlockItem((needle.empty()))); } const auto found = std::search(haystack.cbegin(), haystack.cend(), needle.cbegin(), needle.cend(), IgnoreCaseComparator); - return TUnboxedValuePod(found != haystack.cend()); + sink(TBlockItem(found != haystack.cend())); } +}; - BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(TAsciiContainsIgnoreCase, bool(TOptional<char*>, char*), - builder.SetMinLangVer(NYql::MakeLangVersion(2025, 2))) - { - Y_UNUSED(valueBuilder); - return AsciiContainsIgnoreCaseImpl(args); +TUnboxedValuePod AsciiContainsIgnoreCaseImpl(const TUnboxedValuePod* args) { + if (!args[0]) { + return TUnboxedValuePod(false); } - END_SIMPLE_ARROW_UDF(TAsciiContainsIgnoreCase, TAsciiContainsIgnoreCaseKernelExec::Do); - - BEGIN_SIMPLE_STRICT_ARROW_UDF(T_yql_AsciiContainsIgnoreCase, bool(TOptional<char*>, char*)) - { - Y_UNUSED(valueBuilder); - return AsciiContainsIgnoreCaseImpl(args); + const TStringBuf haystack(args[0].AsStringRef()); + const TStringBuf needle(args[1].AsStringRef()); + if (haystack.empty()) { + return TUnboxedValuePod(needle.empty()); } + const auto found = std::search(haystack.cbegin(), haystack.cend(), + needle.cbegin(), needle.cend(), IgnoreCaseComparator); + return TUnboxedValuePod(found != haystack.cend()); +} - END_SIMPLE_ARROW_UDF(T_yql_AsciiContainsIgnoreCase, TAsciiContainsIgnoreCaseKernelExec::Do); - - BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceAll, char*(TAutoMap<char*>, char*, char*)) { - if (TString result(args[0].AsStringRef()); SubstGlobal(result, args[1].AsStringRef(), args[2].AsStringRef())) - return valueBuilder->NewString(result); - else - return args[0]; - } +BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(TAsciiContainsIgnoreCase, bool(TOptional<char*>, char*), + builder.SetMinLangVer(NYql::MakeLangVersion(2025, 2))) +{ + Y_UNUSED(valueBuilder); + return AsciiContainsIgnoreCaseImpl(args); +} - struct TReplaceAllKernelExec - : public TGenericKernelExec<TReplaceAllKernelExec, 3> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { - TString result(args.GetElement(0).AsStringRef()); - const TStringBuf what(args.GetElement(1).AsStringRef()); - const TStringBuf with(args.GetElement(2).AsStringRef()); - if (SubstGlobal(result, what, with)) { - return sink(TBlockItem(result)); - } else { - return sink(args.GetElement(0)); - } - } - }; +END_SIMPLE_ARROW_UDF(TAsciiContainsIgnoreCase, TAsciiContainsIgnoreCaseKernelExec::Do); - END_SIMPLE_ARROW_UDF(TReplaceAll, TReplaceAllKernelExec::Do) +BEGIN_SIMPLE_STRICT_ARROW_UDF(T_yql_AsciiContainsIgnoreCase, bool(TOptional<char*>, char*)) +{ + Y_UNUSED(valueBuilder); + return AsciiContainsIgnoreCaseImpl(args); +} +END_SIMPLE_ARROW_UDF(T_yql_AsciiContainsIgnoreCase, TAsciiContainsIgnoreCaseKernelExec::Do); - BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceFirst, char*(TAutoMap<char*>, char*, char*)) { - std::string result(args[0].AsStringRef()); - const std::string_view what(args[1].AsStringRef()); - if (const auto index = result.find(what); index != std::string::npos) { - result.replace(index, what.size(), std::string_view(args[2].AsStringRef())); - return valueBuilder->NewString(result); - } +BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceAll, char*(TAutoMap<char*>, char*, char*)) { + if (TString result(args[0].AsStringRef()); SubstGlobal(result, args[1].AsStringRef(), args[2].AsStringRef())) { + return valueBuilder->NewString(result); + } else { return args[0]; } +} - struct TReplaceFirstKernelExec - : public TGenericKernelExec<TReplaceFirstKernelExec, 3> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { - std::string result(args.GetElement(0).AsStringRef()); - const std::string_view what(args.GetElement(1).AsStringRef()); - const std::string_view with(args.GetElement(2).AsStringRef()); - if (const auto index = result.find(what); index != std::string::npos) { - result.replace(index, what.size(), with); - return sink(TBlockItem(result)); - } +struct TReplaceAllKernelExec + : public TGenericKernelExec<TReplaceAllKernelExec, 3> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { + TString result(args.GetElement(0).AsStringRef()); + const TStringBuf what(args.GetElement(1).AsStringRef()); + const TStringBuf with(args.GetElement(2).AsStringRef()); + if (SubstGlobal(result, what, with)) { + return sink(TBlockItem(result)); + } else { return sink(args.GetElement(0)); } - }; + } +}; - END_SIMPLE_ARROW_UDF(TReplaceFirst, TReplaceFirstKernelExec::Do) +END_SIMPLE_ARROW_UDF(TReplaceAll, TReplaceAllKernelExec::Do) +BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceFirst, char*(TAutoMap<char*>, char*, char*)) { + std::string result(args[0].AsStringRef()); + const std::string_view what(args[1].AsStringRef()); + if (const auto index = result.find(what); index != std::string::npos) { + result.replace(index, what.size(), std::string_view(args[2].AsStringRef())); + return valueBuilder->NewString(result); + } + return args[0]; +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceLast, char*(TAutoMap<char*>, char*, char*)) { - std::string result(args[0].AsStringRef()); - const std::string_view what(args[1].AsStringRef()); - if (const auto index = result.rfind(what); index != std::string::npos) { - result.replace(index, what.size(), std::string_view(args[2].AsStringRef())); - return valueBuilder->NewString(result); +struct TReplaceFirstKernelExec + : public TGenericKernelExec<TReplaceFirstKernelExec, 3> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { + std::string result(args.GetElement(0).AsStringRef()); + const std::string_view what(args.GetElement(1).AsStringRef()); + const std::string_view with(args.GetElement(2).AsStringRef()); + if (const auto index = result.find(what); index != std::string::npos) { + result.replace(index, what.size(), with); + return sink(TBlockItem(result)); } - return args[0]; + return sink(args.GetElement(0)); } +}; - struct TReplaceLastKernelExec - : public TGenericKernelExec<TReplaceLastKernelExec, 3> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { - std::string result(args.GetElement(0).AsStringRef()); - const std::string_view what(args.GetElement(1).AsStringRef()); - const std::string_view with(args.GetElement(2).AsStringRef()); - if (const auto index = result.rfind(what); index != std::string::npos) { - result.replace(index, what.size(), with); - return sink(TBlockItem(result)); - } - return sink(args.GetElement(0)); +END_SIMPLE_ARROW_UDF(TReplaceFirst, TReplaceFirstKernelExec::Do) + +BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceLast, char*(TAutoMap<char*>, char*, char*)) { + std::string result(args[0].AsStringRef()); + const std::string_view what(args[1].AsStringRef()); + if (const auto index = result.rfind(what); index != std::string::npos) { + result.replace(index, what.size(), std::string_view(args[2].AsStringRef())); + return valueBuilder->NewString(result); + } + return args[0]; +} + +struct TReplaceLastKernelExec + : public TGenericKernelExec<TReplaceLastKernelExec, 3> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { + std::string result(args.GetElement(0).AsStringRef()); + const std::string_view what(args.GetElement(1).AsStringRef()); + const std::string_view with(args.GetElement(2).AsStringRef()); + if (const auto index = result.rfind(what); index != std::string::npos) { + result.replace(index, what.size(), with); + return sink(TBlockItem(result)); } - }; + return sink(args.GetElement(0)); + } +}; - END_SIMPLE_ARROW_UDF(TReplaceLast, TReplaceLastKernelExec::Do) +END_SIMPLE_ARROW_UDF(TReplaceLast, TReplaceLastKernelExec::Do) +BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveAll, char*(TAutoMap<char*>, char*)) { + std::string input(args[0].AsStringRef()); + const std::string_view remove(args[1].AsStringRef()); + std::array<bool, 256> chars{}; + for (const ui8 c : remove) { + chars[c] = true; + } + size_t tpos = 0; + for (const ui8 c : input) { + if (!chars[c]) { + input[tpos++] = c; + } + } + if (tpos != input.size()) { + input.resize(tpos); + return valueBuilder->NewString(input); + } + return args[0]; +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveAll, char*(TAutoMap<char*>, char*)) { - std::string input(args[0].AsStringRef()); - const std::string_view remove(args[1].AsStringRef()); +struct TRemoveAllKernelExec + : public TBinaryKernelExec<TRemoveAllKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + std::string input(arg1.AsStringRef()); + const std::string_view remove(arg2.AsStringRef()); std::array<bool, 256> chars{}; for (const ui8 c : remove) { chars[c] = true; @@ -694,42 +693,36 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } if (tpos != input.size()) { input.resize(tpos); - return valueBuilder->NewString(input); + return sink(TBlockItem(input)); } - return args[0]; + sink(arg1); } +}; - struct TRemoveAllKernelExec - : public TBinaryKernelExec<TRemoveAllKernelExec> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - std::string input(arg1.AsStringRef()); - const std::string_view remove(arg2.AsStringRef()); - std::array<bool, 256> chars{}; - for (const ui8 c : remove) { - chars[c] = true; - } - size_t tpos = 0; - for (const ui8 c : input) { - if (!chars[c]) { - input[tpos++] = c; - } - } - if (tpos != input.size()) { - input.resize(tpos); - return sink(TBlockItem(input)); - } - sink(arg1); - } - }; - - END_SIMPLE_ARROW_UDF(TRemoveAll, TRemoveAllKernelExec::Do) +END_SIMPLE_ARROW_UDF(TRemoveAll, TRemoveAllKernelExec::Do) +BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveFirst, char*(TAutoMap<char*>, char*)) { + std::string input(args[0].AsStringRef()); + const std::string_view remove(args[1].AsStringRef()); + std::array<bool, 256> chars{}; + for (const ui8 c : remove) { + chars[c] = true; + } + for (auto it = input.cbegin(); it != input.cend(); ++it) { + if (chars[static_cast<ui8>(*it)]) { + input.erase(it); + return valueBuilder->NewString(input); + } + } + return args[0]; +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveFirst, char*(TAutoMap<char*>, char*)) { - std::string input(args[0].AsStringRef()); - const std::string_view remove(args[1].AsStringRef()); +struct TRemoveFirstKernelExec + : public TBinaryKernelExec<TRemoveFirstKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + std::string input(arg1.AsStringRef()); + const std::string_view remove(arg2.AsStringRef()); std::array<bool, 256> chars{}; for (const ui8 c : remove) { chars[c] = true; @@ -737,39 +730,37 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), for (auto it = input.cbegin(); it != input.cend(); ++it) { if (chars[static_cast<ui8>(*it)]) { input.erase(it); - return valueBuilder->NewString(input); + return sink(TBlockItem(input)); } } - return args[0]; + sink(arg1); } +}; - struct TRemoveFirstKernelExec - : public TBinaryKernelExec<TRemoveFirstKernelExec> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - std::string input(arg1.AsStringRef()); - const std::string_view remove(arg2.AsStringRef()); - std::array<bool, 256> chars{}; - for (const ui8 c : remove) { - chars[c] = true; - } - for (auto it = input.cbegin(); it != input.cend(); ++it) { - if (chars[static_cast<ui8>(*it)]) { - input.erase(it); - return sink(TBlockItem(input)); - } - } - sink(arg1); - } - }; - - END_SIMPLE_ARROW_UDF(TRemoveFirst, TRemoveFirstKernelExec::Do) +END_SIMPLE_ARROW_UDF(TRemoveFirst, TRemoveFirstKernelExec::Do) +BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveLast, char*(TAutoMap<char*>, char*)) { + std::string input(args[0].AsStringRef()); + const std::string_view remove(args[1].AsStringRef()); + std::array<bool, 256> chars{}; + for (const ui8 c : remove) { + chars[c] = true; + } + for (auto it = input.crbegin(); it != input.crend(); ++it) { + if (chars[static_cast<ui8>(*it)]) { + input.erase(input.crend() - it - 1, 1); + return valueBuilder->NewString(input); + } + } + return args[0]; +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveLast, char*(TAutoMap<char*>, char*)) { - std::string input(args[0].AsStringRef()); - const std::string_view remove(args[1].AsStringRef()); +struct TRemoveLastKernelExec + : public TBinaryKernelExec<TRemoveLastKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + std::string input(arg1.AsStringRef()); + const std::string_view remove(arg2.AsStringRef()); std::array<bool, 256> chars{}; for (const ui8 c : remove) { chars[c] = true; @@ -777,347 +768,318 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), for (auto it = input.crbegin(); it != input.crend(); ++it) { if (chars[static_cast<ui8>(*it)]) { input.erase(input.crend() - it - 1, 1); - return valueBuilder->NewString(input); + return sink(TBlockItem(input)); } } - return args[0]; + sink(arg1); } +}; - struct TRemoveLastKernelExec - : public TBinaryKernelExec<TRemoveLastKernelExec> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - std::string input(arg1.AsStringRef()); - const std::string_view remove(arg2.AsStringRef()); - std::array<bool, 256> chars{}; - for (const ui8 c : remove) { - chars[c] = true; - } - for (auto it = input.crbegin(); it != input.crend(); ++it) { - if (chars[static_cast<ui8>(*it)]) { - input.erase(input.crend() - it - 1, 1); - return sink(TBlockItem(input)); - } - } - sink(arg1); - } - }; - - END_SIMPLE_ARROW_UDF(TRemoveLast, TRemoveLastKernelExec::Do) +END_SIMPLE_ARROW_UDF(TRemoveLast, TRemoveLastKernelExec::Do) +// NOTE: String::Find is marked as deprecated, so block implementation is +// not required for them. Hence, only the scalar one is provided. +SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) { + Y_UNUSED(valueBuilder); + const TStringBuf haystack(args[0].AsStringRef()); + const TStringBuf needle(args[1].AsStringRef()); + const ui64 pos = args[2].GetOrDefault<ui64>(0); + return TUnboxedValuePod(haystack.find(needle, pos)); +} - // NOTE: String::Find is marked as deprecated, so block implementation is - // not required for them. Hence, only the scalar one is provided. - SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) { - Y_UNUSED(valueBuilder); - const TStringBuf haystack(args[0].AsStringRef()); - const TStringBuf needle(args[1].AsStringRef()); - const ui64 pos = args[2].GetOrDefault<ui64>(0); - return TUnboxedValuePod(haystack.find(needle, pos)); - } - - // NOTE: String::ReverseFind is marked as deprecated, so block - // implementation is not required for them. Hence, only the scalar one is - // provided. - SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TReverseFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) { - Y_UNUSED(valueBuilder); - const TStringBuf haystack(args[0].AsStringRef()); - const TStringBuf needle(args[1].AsStringRef()); - const ui64 pos = args[2].GetOrDefault<ui64>(TStringBuf::npos); - return TUnboxedValuePod(haystack.rfind(needle, pos)); - } +// NOTE: String::ReverseFind is marked as deprecated, so block +// implementation is not required for them. Hence, only the scalar one is +// provided. +SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TReverseFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) { + Y_UNUSED(valueBuilder); + const TStringBuf haystack(args[0].AsStringRef()); + const TStringBuf needle(args[1].AsStringRef()); + const ui64 pos = args[2].GetOrDefault<ui64>(TStringBuf::npos); + return TUnboxedValuePod(haystack.rfind(needle, pos)); +} - // NOTE: String::Substring is marked as deprecated, so block implementation - // is not required for them. Hence, only the scalar one is provided. - SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSubstring, char*(TAutoMap<char*>, TOptional<ui64>, TOptional<ui64>), 1) { - const TStringBuf input(args[0].AsStringRef()); - const ui64 from = args[1].GetOrDefault<ui64>(0); - const ui64 count = args[2].GetOrDefault<ui64>(TStringBuf::npos); - return valueBuilder->NewString(input.substr(from, count)); - } +// NOTE: String::Substring is marked as deprecated, so block implementation +// is not required for them. Hence, only the scalar one is provided. +SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSubstring, char*(TAutoMap<char*>, TOptional<ui64>, TOptional<ui64>), 1) { + const TStringBuf input(args[0].AsStringRef()); + const ui64 from = args[1].GetOrDefault<ui64>(0); + const ui64 count = args[2].GetOrDefault<ui64>(TStringBuf::npos); + return valueBuilder->NewString(input.substr(from, count)); +} - using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>; +using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>; - template <typename TIt> - static void SplitToListImpl( - const IValueBuilder* valueBuilder, - const TUnboxedValue& input, - const std::string_view::const_iterator from, - const TIt& it, - TTmpVector& result) { - for (const auto& elem : it) { - result.emplace_back(valueBuilder->SubString(input, std::distance(from, elem.TokenStart()), std::distance(elem.TokenStart(), elem.TokenDelim()))); - } +template <typename TIt> +static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const std::string_view::const_iterator from, + const TIt& it, + TTmpVector& result) { + for (const auto& elem : it) { + result.emplace_back(valueBuilder->SubString(input, std::distance(from, elem.TokenStart()), std::distance(elem.TokenStart(), elem.TokenDelim()))); } - template <typename TIt> - static void SplitToListImpl( - const IValueBuilder* valueBuilder, - const TUnboxedValue& input, - const std::string_view::const_iterator from, - TIt& it, - bool skipEmpty, - TTmpVector& result) { - if (skipEmpty) { - SplitToListImpl(valueBuilder, input, from, it.SkipEmpty(), result); - } else { - SplitToListImpl(valueBuilder, input, from, it, result); - } +} +template <typename TIt> +static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const std::string_view::const_iterator from, + TIt& it, + bool skipEmpty, + TTmpVector& result) { + if (skipEmpty) { + SplitToListImpl(valueBuilder, input, from, it.SkipEmpty(), result); + } else { + SplitToListImpl(valueBuilder, input, from, it, result); } +} - constexpr char delimeterStringName[] = "DelimeterString"; - constexpr char skipEmptyName[] = "SkipEmpty"; - constexpr char limitName[] = "Limit"; - using TDelimeterStringArg = TNamedArg<bool, delimeterStringName>; - using TSkipEmptyArg = TNamedArg<bool, skipEmptyName>; - using TLimitArg = TNamedArg<ui64, limitName>; - +constexpr char delimeterStringName[] = "DelimeterString"; +constexpr char skipEmptyName[] = "SkipEmpty"; +constexpr char limitName[] = "Limit"; +using TDelimeterStringArg = TNamedArg<bool, delimeterStringName>; +using TSkipEmptyArg = TNamedArg<bool, skipEmptyName>; +using TLimitArg = TNamedArg<ui64, limitName>; - SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSplitToList, TListType<char*>( - TOptional<char*>, - char*, - TDelimeterStringArg, - TSkipEmptyArg, - TLimitArg - ), - 3) { - TTmpVector result; - if (args[0]) { - const std::string_view input(args[0].AsStringRef()); - const std::string_view delimeter(args[1].AsStringRef()); - const bool delimiterString = args[2].GetOrDefault<bool>(true); - const bool skipEmpty = args[3].GetOrDefault<bool>(false); - const auto limit = args[4].GetOrDefault<ui64>(0); - if (delimiterString) { - if (limit) { - auto it = StringSplitter(input).SplitByString(delimeter).Limit(limit + 1); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } else { - auto it = StringSplitter(input).SplitByString(delimeter); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } +SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSplitToList, TListType<char*>(TOptional<char*>, + char*, + TDelimeterStringArg, + TSkipEmptyArg, + TLimitArg), + 3) { + TTmpVector result; + if (args[0]) { + const std::string_view input(args[0].AsStringRef()); + const std::string_view delimeter(args[1].AsStringRef()); + const bool delimiterString = args[2].GetOrDefault<bool>(true); + const bool skipEmpty = args[3].GetOrDefault<bool>(false); + const auto limit = args[4].GetOrDefault<ui64>(0); + if (delimiterString) { + if (limit) { + auto it = StringSplitter(input).SplitByString(delimeter).Limit(limit + 1); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); } else { - if (limit) { - auto it = StringSplitter(input).SplitBySet(TString(delimeter).c_str()).Limit(limit + 1); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } else { - auto it = StringSplitter(input).SplitBySet(TString(delimeter).c_str()); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } + auto it = StringSplitter(input).SplitByString(delimeter); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } + } else { + if (limit) { + auto it = StringSplitter(input).SplitBySet(TString(delimeter).c_str()).Limit(limit + 1); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } else { + auto it = StringSplitter(input).SplitBySet(TString(delimeter).c_str()); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); } } - return valueBuilder->NewList(result.data(), result.size()); } + return valueBuilder->NewList(result.data(), result.size()); +} - SIMPLE_STRICT_UDF(TJoinFromList, char*(TAutoMap<TListType<TOptional<char*>>>, char*)) { - const TStringBuf delimeter(args[1].AsStringRef()); +SIMPLE_STRICT_UDF(TJoinFromList, char*(TAutoMap<TListType<TOptional<char*>>>, char*)) { + const TStringBuf delimeter(args[1].AsStringRef()); - // Construct the string in-place if the list is eager. - if (auto elems = args[0].GetElements()) { - ui64 elemCount = args[0].GetListLength(); - ui64 valueCount = 0; - ui64 resultLength = 0; + // Construct the string in-place if the list is eager. + if (auto elems = args[0].GetElements()) { + ui64 elemCount = args[0].GetListLength(); + ui64 valueCount = 0; + ui64 resultLength = 0; - for (ui64 i = 0; i != elemCount; ++i) { - if (elems[i]) { - resultLength += elems[i].AsStringRef().Size(); - ++valueCount; - } - } - if (valueCount > 0) { - resultLength += (valueCount - 1) * delimeter.size(); + for (ui64 i = 0; i != elemCount; ++i) { + if (elems[i]) { + resultLength += elems[i].AsStringRef().Size(); + ++valueCount; } + } + if (valueCount > 0) { + resultLength += (valueCount - 1) * delimeter.size(); + } - TUnboxedValue result = valueBuilder->NewStringNotFilled(resultLength); - if (!resultLength) { - return result; - } + TUnboxedValue result = valueBuilder->NewStringNotFilled(resultLength); + if (!resultLength) { + return result; + } - const auto buffer = result.AsStringRef(); - auto it = buffer.Data(); - const auto bufferEnd = buffer.Data() + buffer.Size(); - for (ui64 i = 0; i != elemCount; ++i) { - if (elems[i]) { - TStringBuf curStr = elems[i].AsStringRef(); - memcpy(it, curStr.data(), curStr.size()); - it += curStr.size(); + const auto buffer = result.AsStringRef(); + auto it = buffer.Data(); + const auto bufferEnd = buffer.Data() + buffer.Size(); + for (ui64 i = 0; i != elemCount; ++i) { + if (elems[i]) { + TStringBuf curStr = elems[i].AsStringRef(); + memcpy(it, curStr.data(), curStr.size()); + it += curStr.size(); - // Last element just has been written. - if (it == bufferEnd) { - break; - } - memcpy(it, delimeter.data(), delimeter.size()); - it += delimeter.size(); + // Last element just has been written. + if (it == bufferEnd) { + break; } + memcpy(it, delimeter.data(), delimeter.size()); + it += delimeter.size(); } - return result; } + return result; + } - auto input = args[0].GetListIterator(); + auto input = args[0].GetListIterator(); - // Since UnboxedValue can embed small strings, iterating over the list may invalidate StringRefs, thus a copy is required. - TVector<TString, TStdAllocatorForUdf<TString>> items; - if (args[0].HasFastListLength()) { - items.reserve(args[0].GetListLength()); - } + // Since UnboxedValue can embed small strings, iterating over the list may invalidate StringRefs, thus a copy is required. + TVector<TString, TStdAllocatorForUdf<TString>> items; + if (args[0].HasFastListLength()) { + items.reserve(args[0].GetListLength()); + } - for (TUnboxedValue current; input.Next(current);) { - if (current) { - items.emplace_back(current.AsStringRef()); - } + for (TUnboxedValue current; input.Next(current);) { + if (current) { + items.emplace_back(current.AsStringRef()); } - - return valueBuilder->NewString(JoinSeq(delimeter, items)); } - BEGIN_SIMPLE_STRICT_ARROW_UDF(TLevensteinDistance, ui64(TAutoMap<char*>, TAutoMap<char*>)) { - Y_UNUSED(valueBuilder); - const TStringBuf left(args[0].AsStringRef()); - const TStringBuf right(args[1].AsStringRef()); - const ui64 result = NLevenshtein::Distance(left, right); - return TUnboxedValuePod(result); - } + return valueBuilder->NewString(JoinSeq(delimeter, items)); +} - struct TLevensteinDistanceKernelExec : public TBinaryKernelExec<TLevensteinDistanceKernelExec> { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - const std::string_view left(arg1.AsStringRef()); - const std::string_view right(arg2.AsStringRef()); - const ui64 result = NLevenshtein::Distance(left, right); - sink(TBlockItem(result)); - } - }; +BEGIN_SIMPLE_STRICT_ARROW_UDF(TLevensteinDistance, ui64(TAutoMap<char*>, TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const TStringBuf left(args[0].AsStringRef()); + const TStringBuf right(args[1].AsStringRef()); + const ui64 result = NLevenshtein::Distance(left, right); + return TUnboxedValuePod(result); +} - END_SIMPLE_ARROW_UDF(TLevensteinDistance, TLevensteinDistanceKernelExec::Do); +struct TLevensteinDistanceKernelExec: public TBinaryKernelExec<TLevensteinDistanceKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + const std::string_view left(arg1.AsStringRef()); + const std::string_view right(arg2.AsStringRef()); + const ui64 result = NLevenshtein::Distance(left, right); + sink(TBlockItem(result)); + } +}; +END_SIMPLE_ARROW_UDF(TLevensteinDistance, TLevensteinDistanceKernelExec::Do); +BEGIN_SIMPLE_STRICT_ARROW_UDF(THumanReadableDuration, char*(TAutoMap<ui64>)) { + TStringStream result; + result << HumanReadable(TDuration::MicroSeconds(args[0].Get<ui64>())); + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(THumanReadableDuration, char*(TAutoMap<ui64>)) { +struct THumanReadableDurationKernelExec + : public TUnaryKernelExec<THumanReadableDurationKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { TStringStream result; - result << HumanReadable(TDuration::MicroSeconds(args[0].Get<ui64>())); - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); + result << HumanReadable(TDuration::MicroSeconds(arg1.Get<ui64>())); + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); } +}; - struct THumanReadableDurationKernelExec - : public TUnaryKernelExec<THumanReadableDurationKernelExec> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { - TStringStream result; - result << HumanReadable(TDuration::MicroSeconds(arg1.Get<ui64>())); - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); - } - }; - - END_SIMPLE_ARROW_UDF(THumanReadableDuration, THumanReadableDurationKernelExec::Do) +END_SIMPLE_ARROW_UDF(THumanReadableDuration, THumanReadableDurationKernelExec::Do) +BEGIN_SIMPLE_STRICT_ARROW_UDF(TPrec, char*(TAutoMap<double>, ui64)) { + TStringStream result; + result << Prec(args[0].Get<double>(), args[1].Get<ui64>()); + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TPrec, char*(TAutoMap<double>, ui64)) { +struct TPrecKernelExec: public TBinaryKernelExec<TPrecKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { TStringStream result; - result << Prec(args[0].Get<double>(), args[1].Get<ui64>()); - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); + result << Prec(arg1.Get<double>(), arg2.Get<ui64>()); + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); } +}; - struct TPrecKernelExec : public TBinaryKernelExec<TPrecKernelExec> { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - TStringStream result; - result << Prec(arg1.Get<double>(), arg2.Get<ui64>()); - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); - } - }; +END_SIMPLE_ARROW_UDF(TPrec, TPrecKernelExec::Do) - END_SIMPLE_ARROW_UDF(TPrec, TPrecKernelExec::Do) +SIMPLE_STRICT_UDF(TToByteList, TListType<ui8>(char*)) { + const TStringBuf input(args[0].AsStringRef()); + TUnboxedValue* items = nullptr; + TUnboxedValue result = valueBuilder->NewArray(input.size(), items); + for (const unsigned char c : input) { + *items++ = TUnboxedValuePod(c); + } + return result; +} +SIMPLE_STRICT_UDF(TFromByteList, char*(TListType<ui8>)) { + auto input = args[0]; - SIMPLE_STRICT_UDF(TToByteList, TListType<ui8>(char*)) { - const TStringBuf input(args[0].AsStringRef()); - TUnboxedValue* items = nullptr; - TUnboxedValue result = valueBuilder->NewArray(input.size(), items); - for (const unsigned char c : input) { - *items++ = TUnboxedValuePod(c); + if (auto elems = input.GetElements()) { + const auto elemCount = input.GetListLength(); + TUnboxedValue result = valueBuilder->NewStringNotFilled(input.GetListLength()); + auto bufferPtr = result.AsStringRef().Data(); + for (ui64 i = 0; i != elemCount; ++i) { + *(bufferPtr++) = elems[i].Get<ui8>(); } return result; } - SIMPLE_STRICT_UDF(TFromByteList, char*(TListType<ui8>)) { - auto input = args[0]; + std::vector<char, NKikimr::NUdf::TStdAllocatorForUdf<char>> buffer; + buffer.reserve(TUnboxedValuePod::InternalBufferSize); - if (auto elems = input.GetElements()) { - const auto elemCount = input.GetListLength(); - TUnboxedValue result = valueBuilder->NewStringNotFilled(input.GetListLength()); - auto bufferPtr = result.AsStringRef().Data(); - for (ui64 i = 0; i != elemCount; ++i) { - *(bufferPtr++) = elems[i].Get<ui8>(); - } - return result; - } - - std::vector<char, NKikimr::NUdf::TStdAllocatorForUdf<char>> buffer; - buffer.reserve(TUnboxedValuePod::InternalBufferSize); - - const auto& iter = input.GetListIterator(); - for (NUdf::TUnboxedValue item; iter.Next(item); ) { - buffer.push_back(item.Get<ui8>()); - } - - return valueBuilder->NewString(TStringRef(buffer.data(), buffer.size())); + const auto& iter = input.GetListIterator(); + for (NUdf::TUnboxedValue item; iter.Next(item);) { + buffer.push_back(item.Get<ui8>()); } + return valueBuilder->NewString(TStringRef(buffer.data(), buffer.size())); +} + #define STRING_REGISTER_UDF(udfName, ...) T##udfName, #define STRING_OPT_REGISTER_UDF(udfName, ...) T_yql_##udfName, - STRING_UDF_MAP(STRING_UDF) - STRING_UNSAFE_UDF_MAP(STRING_UNSAFE_UDF) - STROKA_CASE_UDF_MAP(STROKA_CASE_UDF) - STROKA_ASCII_CASE_UDF_MAP(STROKA_ASCII_CASE_UDF) - STROKA_FIND_UDF_MAP(STROKA_FIND_UDF) - STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(STRING_TWO_ARGS_UDF_DEPRECATED_2025_02) - STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_ASCII_CMP_IGNORE_CASE_UDF) - IS_ASCII_UDF_MAP(IS_ASCII_UDF) +STRING_UDF_MAP(STRING_UDF) +STRING_UNSAFE_UDF_MAP(STRING_UNSAFE_UDF) +STROKA_CASE_UDF_MAP(STROKA_CASE_UDF) +STROKA_ASCII_CASE_UDF_MAP(STROKA_ASCII_CASE_UDF) +STROKA_FIND_UDF_MAP(STROKA_FIND_UDF) +STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(STRING_TWO_ARGS_UDF_DEPRECATED_2025_02) +STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_ASCII_CMP_IGNORE_CASE_UDF) +IS_ASCII_UDF_MAP(IS_ASCII_UDF) - static constexpr ui64 padLim = 1000000; - STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_STREAM_PAD_FORMATTER_UDF) - STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_STREAM_NUM_FORMATTER_UDF) - STRING_STREAM_TEXT_FORMATTER_UDF_MAP(STRING_STREAM_TEXT_FORMATTER_UDF) - STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(STRING_STREAM_HRSZ_FORMATTER_UDF) +static constexpr ui64 padLim = 1000000; +STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_STREAM_PAD_FORMATTER_UDF) +STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_STREAM_NUM_FORMATTER_UDF) +STRING_STREAM_TEXT_FORMATTER_UDF_MAP(STRING_STREAM_TEXT_FORMATTER_UDF) +STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(STRING_STREAM_HRSZ_FORMATTER_UDF) - SIMPLE_MODULE(TStringModule, - STRING_UDF_MAP(STRING_REGISTER_UDF) - STRING_UNSAFE_UDF_MAP(STRING_REGISTER_UDF) - STROKA_UDF_MAP(STRING_REGISTER_UDF) - STROKA_CASE_UDF_MAP(STRING_REGISTER_UDF) - STROKA_ASCII_CASE_UDF_MAP(STRING_REGISTER_UDF) - STROKA_FIND_UDF_MAP(STRING_REGISTER_UDF) - STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(STRING_REGISTER_UDF) - STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_REGISTER_UDF) - STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_OPT_REGISTER_UDF) - IS_ASCII_UDF_MAP(STRING_REGISTER_UDF) - STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) - STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) - STRING_STREAM_TEXT_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) - STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) - TReverse, - TCollapseText, - TReplaceAll, - TReplaceFirst, - TReplaceLast, - TRemoveAll, - TRemoveFirst, - TRemoveLast, - TContains, - TAsciiContainsIgnoreCase, - T_yql_AsciiContainsIgnoreCase, - TFind, - TReverseFind, - TSubstring, - TSplitToList, - TJoinFromList, - TLevensteinDistance, - THumanReadableDuration, - TPrec, - TToByteList, - TFromByteList) - } // namespace +SIMPLE_MODULE(TStringModule, + STRING_UDF_MAP(STRING_REGISTER_UDF) + STRING_UNSAFE_UDF_MAP(STRING_REGISTER_UDF) + STROKA_UDF_MAP(STRING_REGISTER_UDF) + STROKA_CASE_UDF_MAP(STRING_REGISTER_UDF) + STROKA_ASCII_CASE_UDF_MAP(STRING_REGISTER_UDF) + STROKA_FIND_UDF_MAP(STRING_REGISTER_UDF) + STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(STRING_REGISTER_UDF) + STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_REGISTER_UDF) + STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_OPT_REGISTER_UDF) + IS_ASCII_UDF_MAP(STRING_REGISTER_UDF) + STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) + STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) + STRING_STREAM_TEXT_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) + STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) + TReverse, + TCollapseText, + TReplaceAll, + TReplaceFirst, + TReplaceLast, + TRemoveAll, + TRemoveFirst, + TRemoveLast, + TContains, + TAsciiContainsIgnoreCase, + T_yql_AsciiContainsIgnoreCase, + TFind, + TReverseFind, + TSubstring, + TSplitToList, + TJoinFromList, + TLevensteinDistance, + THumanReadableDuration, + TPrec, + TToByteList, + TFromByteList) +} // namespace REGISTER_MODULES(TStringModule) diff --git a/yql/essentials/udfs/common/string/ya.make b/yql/essentials/udfs/common/string/ya.make index bd83c78b8c1..1f8c6d4926d 100644 --- a/yql/essentials/udfs/common/string/ya.make +++ b/yql/essentials/udfs/common/string/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(string_udf) 43 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( string_udf.cpp diff --git a/yql/essentials/udfs/common/top/top_udf.cpp b/yql/essentials/udfs/common/top/top_udf.cpp index a9b3d3e430d..41a0136f411 100644 --- a/yql/essentials/udfs/common/top/top_udf.cpp +++ b/yql/essentials/udfs/common/top/top_udf.cpp @@ -71,11 +71,13 @@ class TTopKeeperContainer { size_t MaxSize_ = 0; bool Finalized_ = false; TCompare Compare_; + public: explicit TTopKeeperContainer(TCompare compare) : Keeper_(0, compare) , Compare_(compare) - {} + { + } TVector<TValue, TAllocator> GetInternal() { if (OrderedSet_) { @@ -152,7 +154,8 @@ protected: protected: explicit TTopKeeperWrapperBase(TCompare compare) : Keeper_(compare) - {} + { + } void Init(const TUnboxedValuePod& value, ui32 maxSize) { Keeper_.SetMaxSize(maxSize); @@ -220,7 +223,8 @@ protected: protected: explicit TTopKeeperPairWrapperBase(TCompare compare) : Keeper_(compare) - {} + { + } void Init(const TUnboxedValuePod& key, const TUnboxedValuePod& payload, ui32 maxSize) { Keeper_.SetMaxSize(maxSize); @@ -284,14 +288,12 @@ public: } }; - template <EDataSlot Slot, bool HasKey, bool IsTop> class TTopKeeperDataWrapper; template <EDataSlot Slot, bool IsTop> class TTopKeeperDataWrapper<Slot, false, IsTop> - : public TTopKeeperWrapperBase<TDataCompare<Slot, IsTop>> -{ + : public TTopKeeperWrapperBase<TDataCompare<Slot, IsTop>> { public: using TBase = TTopKeeperWrapperBase<TDataCompare<Slot, IsTop>>; @@ -316,8 +318,7 @@ public: template <EDataSlot Slot, bool IsTop> class TTopKeeperDataWrapper<Slot, true, IsTop> - : public TTopKeeperPairWrapperBase<TDataPairCompare<Slot, IsTop>> -{ + : public TTopKeeperPairWrapperBase<TDataPairCompare<Slot, IsTop>> { public: using TBase = TTopKeeperPairWrapperBase<TDataPairCompare<Slot, IsTop>>; @@ -345,8 +346,7 @@ class TTopKeeperWrapper; template <bool IsTop> class TTopKeeperWrapper<false, IsTop> - : public TTopKeeperWrapperBase<TGenericCompare<IsTop>> -{ + : public TTopKeeperWrapperBase<TGenericCompare<IsTop>> { public: using TBase = TTopKeeperWrapperBase<TGenericCompare<IsTop>>; @@ -371,8 +371,7 @@ public: template <bool IsTop> class TTopKeeperWrapper<true, IsTop> - : public TTopKeeperPairWrapperBase<TGenericPairCompare<IsTop>> -{ + : public TTopKeeperPairWrapperBase<TGenericPairCompare<IsTop>> { public: using TBase = TTopKeeperPairWrapperBase<TGenericPairCompare<IsTop>>; @@ -395,7 +394,6 @@ public: } }; - template <EDataSlot Slot, bool HasKey, bool IsTop> class TTopResourceData; @@ -414,9 +412,8 @@ TTopResource<HasKey, IsTop>* GetTopResource(const TUnboxedValuePod& arg) { return static_cast<TTopResource<HasKey, IsTop>*>(arg.AsBoxed().Get()); } - template <EDataSlot Slot, bool HasKey, bool IsTop> -class TTopCreateData : public TBoxedValue { +class TTopCreateData: public TBoxedValue { private: template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr> TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { @@ -436,7 +433,7 @@ private: }; template <bool HasKey, bool IsTop> -class TTopCreate : public TBoxedValue { +class TTopCreate: public TBoxedValue { private: template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr> TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { @@ -457,14 +454,15 @@ private: public: explicit TTopCreate(ICompare::TPtr compare) : Compare_(compare) - {} + { + } private: ICompare::TPtr Compare_; }; template <EDataSlot Slot, bool HasKey, bool IsTop> -class TTopAddValueData : public TBoxedValue { +class TTopAddValueData: public TBoxedValue { private: template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr> TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { @@ -486,7 +484,7 @@ private: }; template <bool HasKey, bool IsTop> -class TTopAddValue : public TBoxedValue { +class TTopAddValue: public TBoxedValue { private: template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr> TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { @@ -508,11 +506,12 @@ private: public: explicit TTopAddValue(ICompare::TPtr) - {} + { + } }; template <EDataSlot Slot, bool HasKey, bool IsTop> -class TTopSerializeData : public TBoxedValue { +class TTopSerializeData: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { auto resource = GetTopResourceData<Slot, HasKey, IsTop>(args[0]); @@ -521,7 +520,7 @@ private: }; template <bool HasKey, bool IsTop> -class TTopSerialize : public TBoxedValue { +class TTopSerialize: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { auto resource = GetTopResource<HasKey, IsTop>(args[0]); @@ -530,11 +529,12 @@ private: public: explicit TTopSerialize(ICompare::TPtr) - {} + { + } }; template <EDataSlot Slot, bool HasKey, bool IsTop> -class TTopDeserializeData : public TBoxedValue { +class TTopDeserializeData: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { return TUnboxedValuePod(new TTopResourceData<Slot, HasKey, IsTop>(args[0])); @@ -542,7 +542,7 @@ private: }; template <bool HasKey, bool IsTop> -class TTopDeserialize : public TBoxedValue { +class TTopDeserialize: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { return TUnboxedValuePod(new TTopResource<HasKey, IsTop>(args[0], Compare_)); @@ -551,14 +551,15 @@ private: public: explicit TTopDeserialize(ICompare::TPtr compare) : Compare_(compare) - {} + { + } private: ICompare::TPtr Compare_; }; template <EDataSlot Slot, bool HasKey, bool IsTop> -class TTopMergeData : public TBoxedValue { +class TTopMergeData: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { auto left = GetTopResourceData<Slot, HasKey, IsTop>(args[0]); @@ -568,7 +569,7 @@ private: }; template <bool HasKey, bool IsTop> -class TTopMerge : public TBoxedValue { +class TTopMerge: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { auto left = GetTopResource<HasKey, IsTop>(args[0]); @@ -579,14 +580,15 @@ private: public: explicit TTopMerge(ICompare::TPtr compare) : Compare_(compare) - {} + { + } private: ICompare::TPtr Compare_; }; template <EDataSlot Slot, bool HasKey, bool IsTop> -class TTopGetResultData : public TBoxedValue { +class TTopGetResultData: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { auto resource = GetTopResourceData<Slot, HasKey, IsTop>(args[0]); @@ -595,7 +597,7 @@ private: }; template <bool HasKey, bool IsTop> -class TTopGetResult : public TBoxedValue { +class TTopGetResult: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { auto resource = GetTopResource<HasKey, IsTop>(args[0]); @@ -604,25 +606,24 @@ private: public: explicit TTopGetResult(ICompare::TPtr) - {} + { + } }; - -#define RESOURCE(slot, hasKey, isTop) \ -extern const char TopResourceName_##slot##_##hasKey##_##isTop[] = \ - "Top.TopResource."#slot"."#hasKey"."#isTop; \ -template <> \ -class TTopResourceData<EDataSlot::slot, hasKey, isTop>: \ - public TBoxedResource< \ - TTopKeeperDataWrapper<EDataSlot::slot, hasKey, isTop>, \ - TopResourceName_##slot##_##hasKey##_##isTop> \ -{ \ -public: \ - template <typename... Args> \ - inline TTopResourceData(Args&&... args) \ - : TBoxedResource(std::forward<Args>(args)...) \ - {} \ -}; +#define RESOURCE(slot, hasKey, isTop) \ + extern const char TopResourceName_##slot##_##hasKey##_##isTop[] = \ + "Top.TopResource." #slot "." #hasKey "." #isTop; \ + template <> \ + class TTopResourceData<EDataSlot::slot, hasKey, isTop>: public TBoxedResource< \ + TTopKeeperDataWrapper<EDataSlot::slot, hasKey, isTop>, \ + TopResourceName_##slot##_##hasKey##_##isTop> { \ + public: \ + template <typename... Args> \ + inline TTopResourceData(Args&&... args) \ + : TBoxedResource(std::forward<Args>(args)...) \ + { \ + } \ + }; #define RESOURCE_00(slot, ...) RESOURCE(slot, false, false) #define RESOURCE_01(slot, ...) RESOURCE(slot, false, true) @@ -679,52 +680,50 @@ UDF_TYPE_ID_MAP(RESOURCE_11) #define TYPE_10(slot, ...) MAKE_TYPE(slot, true, false) #define TYPE_11(slot, ...) MAKE_TYPE(slot, true, true) -#define PARAMETRIZE(action) \ - if (hasKey) { \ - if (isTop) { \ - switch (*slot) { \ - UDF_TYPE_ID_MAP(action##_11) \ - } \ - } else { \ - switch (*slot) { \ - UDF_TYPE_ID_MAP(action##_10) \ - } \ - } \ - } else { \ - if (isTop) { \ - switch (*slot) { \ - UDF_TYPE_ID_MAP(action##_01) \ - } \ - } else { \ - switch (*slot) { \ - UDF_TYPE_ID_MAP(action##_00) \ - } \ - } \ +#define PARAMETRIZE(action) \ + if (hasKey) { \ + if (isTop) { \ + switch (*slot) { \ + UDF_TYPE_ID_MAP(action##_11) \ + } \ + } else { \ + switch (*slot) { \ + UDF_TYPE_ID_MAP(action##_10) \ + } \ + } \ + } else { \ + if (isTop) { \ + switch (*slot) { \ + UDF_TYPE_ID_MAP(action##_01) \ + } \ + } else { \ + switch (*slot) { \ + UDF_TYPE_ID_MAP(action##_00) \ + } \ + } \ } - -#define RESOURCE_GENERIC(hasKey, isTop) \ -extern const char TopResourceName_Generic_##hasKey##_##isTop[] = \ - "Top.TopResource.Generic."#hasKey"."#isTop; \ -template <> \ -class TTopResource<hasKey, isTop>: \ - public TBoxedResource< \ - TTopKeeperWrapper<hasKey, isTop>, \ - TopResourceName_Generic_##hasKey##_##isTop> \ -{ \ -public: \ - template <typename... Args> \ - inline TTopResource(Args&&... args) \ - : TBoxedResource(std::forward<Args>(args)...) \ - {} \ -}; +#define RESOURCE_GENERIC(hasKey, isTop) \ + extern const char TopResourceName_Generic_##hasKey##_##isTop[] = \ + "Top.TopResource.Generic." #hasKey "." #isTop; \ + template <> \ + class TTopResource<hasKey, isTop>: public TBoxedResource< \ + TTopKeeperWrapper<hasKey, isTop>, \ + TopResourceName_Generic_##hasKey##_##isTop> { \ + public: \ + template <typename... Args> \ + inline TTopResource(Args&&... args) \ + : TBoxedResource(std::forward<Args>(args)...) \ + { \ + } \ + }; RESOURCE_GENERIC(false, false) RESOURCE_GENERIC(false, true) RESOURCE_GENERIC(true, false) RESOURCE_GENERIC(true, true) -#define MAKE_IMPL_GENERIC(operation, hasKey, isTop) \ +#define MAKE_IMPL_GENERIC(operation, hasKey, isTop) \ builder.Implementation(new operation<hasKey, isTop>(compare)); #define CREATE_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopCreate, hasKey, isTop) @@ -734,7 +733,7 @@ RESOURCE_GENERIC(true, true) #define DESERIALIZE_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopDeserialize, hasKey, isTop) #define GET_RESULT_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopGetResult, hasKey, isTop) -#define TYPE_GENERIC(hasKey, isTop) \ +#define TYPE_GENERIC(hasKey, isTop) \ topType = builder.Resource(TopResourceName_Generic_##hasKey##_##isTop); #define PARAMETRIZE_GENERIC(action) \ @@ -752,7 +751,6 @@ RESOURCE_GENERIC(true, true) } \ } - static const auto CreateName = TStringRef::Of("Create"); static const auto AddValueName = TStringRef::Of("AddValue"); static const auto SerializeName = TStringRef::Of("Serialize"); @@ -760,7 +758,7 @@ static const auto DeserializeName = TStringRef::Of("Deserialize"); static const auto MergeName = TStringRef::Of("Merge"); static const auto GetResultName = TStringRef::Of("GetResult"); -class TTopModule : public IUdfModule { +class TTopModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("Top"); @@ -783,8 +781,7 @@ public: TType* userType, const TStringRef& typeConfig, ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + IFunctionTypeInfoBuilder& builder) const final { Y_UNUSED(typeConfig); try { @@ -951,4 +948,3 @@ public: } // namespace REGISTER_MODULES(TTopModule) - diff --git a/yql/essentials/udfs/common/top/ya.make b/yql/essentials/udfs/common/top/ya.make index 4a8cdf859e3..f0818f2b34d 100644 --- a/yql/essentials/udfs/common/top/ya.make +++ b/yql/essentials/udfs/common/top/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(top_udf) 28 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( top_udf.cpp diff --git a/yql/essentials/udfs/common/topfreq/static/static_udf.cpp b/yql/essentials/udfs/common/topfreq/static/static_udf.cpp index 4075bfa9c2b..40e478c5276 100644 --- a/yql/essentials/udfs/common/topfreq/static/static_udf.cpp +++ b/yql/essentials/udfs/common/topfreq/static/static_udf.cpp @@ -1,10 +1,10 @@ #include "topfreq_udf.h" namespace NYql { - namespace NUdf { - NUdf::TUniquePtr<NUdf::IUdfModule> CreateTopFreqModule() { - return new TTopFreqModule(); - } - - } +namespace NUdf { +NUdf::TUniquePtr<NUdf::IUdfModule> CreateTopFreqModule() { + return new TTopFreqModule(); } + +} // namespace NUdf +} // namespace NYql diff --git a/yql/essentials/udfs/common/topfreq/static/topfreq.cpp b/yql/essentials/udfs/common/topfreq/static/topfreq.cpp index c118b52d0a1..321bfd5a667 100644 --- a/yql/essentials/udfs/common/topfreq/static/topfreq.cpp +++ b/yql/essentials/udfs/common/topfreq/static/topfreq.cpp @@ -8,7 +8,8 @@ using namespace NUdf; template <typename THash, typename TEquals> TTopFreqBase<THash, TEquals>::TTopFreqBase(THash hash, TEquals equals) : Indices_(0, hash, equals) -{} +{ +} template <typename THash, typename TEquals> void TTopFreqBase<THash, TEquals>::Init(const TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize) { @@ -179,21 +180,21 @@ UDF_TYPE_ID_MAP(INSTANCE_FOR) #undef INSTANCE_FOR TTopFreqGeneric::TTopFreqGeneric(const TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize, - IHash::TPtr hash, IEquate::TPtr equate) + IHash::TPtr hash, IEquate::TPtr equate) : TBase(TGenericHash{hash}, TGenericEquals{equate}) { TBase::Init(value, minSize, maxSize); } TTopFreqGeneric::TTopFreqGeneric(const TTopFreqGeneric& topFreq1, const TTopFreqGeneric& topFreq2, - IHash::TPtr hash, IEquate::TPtr equate) + IHash::TPtr hash, IEquate::TPtr equate) : TBase(TGenericHash{hash}, TGenericEquals{equate}) { TBase::Merge(topFreq1, topFreq2); } TTopFreqGeneric::TTopFreqGeneric(const TUnboxedValuePod& serialized, - IHash::TPtr hash, IEquate::TPtr equate) + IHash::TPtr hash, IEquate::TPtr equate) : TBase(TGenericHash{hash}, TGenericEquals{equate}) { TBase::Deserialize(serialized); @@ -210,4 +211,3 @@ TUnboxedValue TTopFreqGeneric::Get(const IValueBuilder* builder, ui32 resultSize void TTopFreqGeneric::AddValue(const TUnboxedValuePod& value) { TBase::AddValue(value); } - diff --git a/yql/essentials/udfs/common/topfreq/static/topfreq.h b/yql/essentials/udfs/common/topfreq/static/topfreq.h index b10574f33f6..c2d9d78d14c 100644 --- a/yql/essentials/udfs/common/topfreq/static/topfreq.h +++ b/yql/essentials/udfs/common/topfreq/static/topfreq.h @@ -42,9 +42,8 @@ protected: template <NKikimr::NUdf::EDataSlot Slot> class TTopFreqData : public TTopFreqBase< - NKikimr::NUdf::TUnboxedValueHash<Slot>, - NKikimr::NUdf::TUnboxedValueEquals<Slot>> -{ + NKikimr::NUdf::TUnboxedValueHash<Slot>, + NKikimr::NUdf::TUnboxedValueEquals<Slot>> { public: using TBase = TTopFreqBase< NKikimr::NUdf::TUnboxedValueHash<Slot>, @@ -72,24 +71,22 @@ struct TGenericEquals { bool operator()( const NKikimr::NUdf::TUnboxedValuePod& left, - const NKikimr::NUdf::TUnboxedValuePod& right) const - { + const NKikimr::NUdf::TUnboxedValuePod& right) const { return Equate->Equals(left, right); } }; class TTopFreqGeneric - : public TTopFreqBase<TGenericHash, TGenericEquals> -{ + : public TTopFreqBase<TGenericHash, TGenericEquals> { public: using TBase = TTopFreqBase<TGenericHash, TGenericEquals>; TTopFreqGeneric(const NKikimr::NUdf::TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize, - NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); + NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); TTopFreqGeneric(const TTopFreqGeneric& topFreq1, const TTopFreqGeneric& topFreq2, - NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); + NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); TTopFreqGeneric(const NKikimr::NUdf::TUnboxedValuePod& serialized, - NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); + NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); NKikimr::NUdf::TUnboxedValue Serialize(const NKikimr::NUdf::IValueBuilder* builder); NKikimr::NUdf::TUnboxedValue Get(const NKikimr::NUdf::IValueBuilder* builder, ui32 resultSize); diff --git a/yql/essentials/udfs/common/topfreq/static/topfreq_udf.h b/yql/essentials/udfs/common/topfreq/static/topfreq_udf.h index 93f8c3fd587..aef26def698 100644 --- a/yql/essentials/udfs/common/topfreq/static/topfreq_udf.h +++ b/yql/essentials/udfs/common/topfreq/static/topfreq_udf.h @@ -15,177 +15,176 @@ using namespace NYql; using namespace NUdf; namespace { - extern const char TopFreqResourceNameGeneric[] = "TopFreq.TopFreqResource.Generic"; - class TTopFreqResource: - public TBoxedResource<TTopFreqGeneric, TopFreqResourceNameGeneric> +extern const char TopFreqResourceNameGeneric[] = "TopFreq.TopFreqResource.Generic"; +class TTopFreqResource: public TBoxedResource<TTopFreqGeneric, TopFreqResourceNameGeneric> { +public: + template <typename... Args> + inline TTopFreqResource(Args&&... args) + : TBoxedResource(std::forward<Args>(args)...) { - public: - template <typename... Args> - inline TTopFreqResource(Args&&... args) - : TBoxedResource(std::forward<Args>(args)...) - {} - }; - - template <EDataSlot Slot> - class TTopFreqResourceData; - - template <EDataSlot Slot> - TTopFreqResourceData<Slot>* GetTopFreqResourceData(const TUnboxedValuePod& arg) { - TTopFreqResourceData<Slot>::Validate(arg); - return static_cast<TTopFreqResourceData<Slot>*>(arg.AsBoxed().Get()); } +}; - TTopFreqResource* GetTopFreqResource(const TUnboxedValuePod& arg) { - TTopFreqResource::Validate(arg); - return static_cast<TTopFreqResource*>(arg.AsBoxed().Get()); - } +template <EDataSlot Slot> +class TTopFreqResourceData; +template <EDataSlot Slot> +TTopFreqResourceData<Slot>* GetTopFreqResourceData(const TUnboxedValuePod& arg) { + TTopFreqResourceData<Slot>::Validate(arg); + return static_cast<TTopFreqResourceData<Slot>*>(arg.AsBoxed().Get()); +} - template <EDataSlot Slot> - class TTopFreqCreateData: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - ui32 minSize = args[1].Get<ui32>(); - return TUnboxedValuePod(new TTopFreqResourceData<Slot>(args[0], minSize, minSize * 2)); - } - }; +TTopFreqResource* GetTopFreqResource(const TUnboxedValuePod& arg) { + TTopFreqResource::Validate(arg); + return static_cast<TTopFreqResource*>(arg.AsBoxed().Get()); +} - class TTopFreqCreate: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - ui32 minSize = args[1].Get<ui32>(); - return TUnboxedValuePod(new TTopFreqResource(args[0], minSize, minSize * 2, Hash_, Equate_)); - } +template <EDataSlot Slot> +class TTopFreqCreateData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + ui32 minSize = args[1].Get<ui32>(); + return TUnboxedValuePod(new TTopFreqResourceData<Slot>(args[0], minSize, minSize * 2)); + } +}; - public: - TTopFreqCreate(IHash::TPtr hash, IEquate::TPtr equate) - : Hash_(hash) - , Equate_(equate) - {} +class TTopFreqCreate: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + ui32 minSize = args[1].Get<ui32>(); + return TUnboxedValuePod(new TTopFreqResource(args[0], minSize, minSize * 2, Hash_, Equate_)); + } - private: - IHash::TPtr Hash_; - IEquate::TPtr Equate_; - }; +public: + TTopFreqCreate(IHash::TPtr hash, IEquate::TPtr equate) + : Hash_(hash) + , Equate_(equate) + { + } - template <EDataSlot Slot> - class TTopFreqAddValueData: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - const auto topFreq = GetTopFreqResourceData<Slot>(args[0]); - topFreq->Get()->AddValue(args[1]); - return TUnboxedValuePod(topFreq); - } - }; +private: + IHash::TPtr Hash_; + IEquate::TPtr Equate_; +}; - class TTopFreqAddValue: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - const auto topFreq = GetTopFreqResource(args[0]); - topFreq->Get()->AddValue(args[1]); - return TUnboxedValuePod(topFreq); - } - }; +template <EDataSlot Slot> +class TTopFreqAddValueData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + const auto topFreq = GetTopFreqResourceData<Slot>(args[0]); + topFreq->Get()->AddValue(args[1]); + return TUnboxedValuePod(topFreq); + } +}; - template <EDataSlot Slot> - class TTopFreqSerializeData: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { - return GetTopFreqResourceData<Slot>(args[0])->Get()->Serialize(valueBuilder); - } - }; +class TTopFreqAddValue: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + const auto topFreq = GetTopFreqResource(args[0]); + topFreq->Get()->AddValue(args[1]); + return TUnboxedValuePod(topFreq); + } +}; - class TTopFreqSerialize: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { - return GetTopFreqResource(args[0])->Get()->Serialize(valueBuilder); - } - }; +template <EDataSlot Slot> +class TTopFreqSerializeData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { + return GetTopFreqResourceData<Slot>(args[0])->Get()->Serialize(valueBuilder); + } +}; - template <EDataSlot Slot> - class TTopFreqDeserializeData: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - return TUnboxedValuePod(new TTopFreqResourceData<Slot>(args[0])); - } - }; +class TTopFreqSerialize: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { + return GetTopFreqResource(args[0])->Get()->Serialize(valueBuilder); + } +}; - class TTopFreqDeserialize: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - return TUnboxedValuePod(new TTopFreqResource(args[0], Hash_, Equate_)); - } +template <EDataSlot Slot> +class TTopFreqDeserializeData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + return TUnboxedValuePod(new TTopFreqResourceData<Slot>(args[0])); + } +}; - public: - TTopFreqDeserialize(IHash::TPtr hash, IEquate::TPtr equate) - : Hash_(hash) - , Equate_(equate) - {} +class TTopFreqDeserialize: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + return TUnboxedValuePod(new TTopFreqResource(args[0], Hash_, Equate_)); + } - private: - IHash::TPtr Hash_; - IEquate::TPtr Equate_; - }; +public: + TTopFreqDeserialize(IHash::TPtr hash, IEquate::TPtr equate) + : Hash_(hash) + , Equate_(equate) + { + } - template <EDataSlot Slot> - class TTopFreqMergeData: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - const auto topFreq0 = GetTopFreqResourceData<Slot>(args[0]); - const auto topFreq1 = GetTopFreqResourceData<Slot>(args[1]); - return TUnboxedValuePod(new TTopFreqResourceData<Slot>(*topFreq0->Get(), *topFreq1->Get())); - } - }; +private: + IHash::TPtr Hash_; + IEquate::TPtr Equate_; +}; - class TTopFreqMerge: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - const auto topFreq0 = GetTopFreqResource(args[0]); - const auto topFreq1 = GetTopFreqResource(args[1]); - return TUnboxedValuePod(new TTopFreqResource(*topFreq0->Get(), *topFreq1->Get(), Hash_, Equate_)); - } +template <EDataSlot Slot> +class TTopFreqMergeData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + const auto topFreq0 = GetTopFreqResourceData<Slot>(args[0]); + const auto topFreq1 = GetTopFreqResourceData<Slot>(args[1]); + return TUnboxedValuePod(new TTopFreqResourceData<Slot>(*topFreq0->Get(), *topFreq1->Get())); + } +}; - public: - TTopFreqMerge(IHash::TPtr hash, IEquate::TPtr equate) - : Hash_(hash) - , Equate_(equate) - {} +class TTopFreqMerge: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + const auto topFreq0 = GetTopFreqResource(args[0]); + const auto topFreq1 = GetTopFreqResource(args[1]); + return TUnboxedValuePod(new TTopFreqResource(*topFreq0->Get(), *topFreq1->Get(), Hash_, Equate_)); + } - private: - IHash::TPtr Hash_; - IEquate::TPtr Equate_; - }; +public: + TTopFreqMerge(IHash::TPtr hash, IEquate::TPtr equate) + : Hash_(hash) + , Equate_(equate) + { + } - template <EDataSlot Slot> - class TTopFreqGetData: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { - return GetTopFreqResourceData<Slot>(args[0])->Get()->Get(valueBuilder, args[1].Get<ui32>()); - } - }; +private: + IHash::TPtr Hash_; + IEquate::TPtr Equate_; +}; - class TTopFreqGet: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { - return GetTopFreqResource(args[0])->Get()->Get(valueBuilder, args[1].Get<ui32>()); - } - }; +template <EDataSlot Slot> +class TTopFreqGetData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { + return GetTopFreqResourceData<Slot>(args[0])->Get()->Get(valueBuilder, args[1].Get<ui32>()); + } +}; +class TTopFreqGet: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { + return GetTopFreqResource(args[0])->Get()->Get(valueBuilder, args[1].Get<ui32>()); + } +}; -#define MAKE_RESOURCE(slot, ...) \ - extern const char TopFreqResourceName##slot[] = "TopFreq.TopFreqResource."#slot; \ - template <> \ - class TTopFreqResourceData<EDataSlot::slot>: \ - public TBoxedResource<TTopFreqData<EDataSlot::slot>, TopFreqResourceName##slot> \ - { \ - public: \ - template <typename... Args> \ - inline TTopFreqResourceData(Args&&... args) \ - : TBoxedResource(std::forward<Args>(args)...) \ - {} \ +#define MAKE_RESOURCE(slot, ...) \ + extern const char TopFreqResourceName##slot[] = "TopFreq.TopFreqResource." #slot; \ + template <> \ + class TTopFreqResourceData<EDataSlot::slot>: public TBoxedResource<TTopFreqData<EDataSlot::slot>, TopFreqResourceName##slot> { \ + public: \ + template <typename... Args> \ + inline TTopFreqResourceData(Args&&... args) \ + : TBoxedResource(std::forward<Args>(args)...) \ + { \ + } \ }; - UDF_TYPE_ID_MAP(MAKE_RESOURCE) +UDF_TYPE_ID_MAP(MAKE_RESOURCE) #define MAKE_IMPL(operation, slot) \ case EDataSlot::slot: \ @@ -204,190 +203,188 @@ namespace { topFreqType = builder.Resource(TopFreqResourceName##slot); \ break; +static const auto CreateName = TStringRef::Of("TopFreq_Create"); +static const auto AddValueName = TStringRef::Of("TopFreq_AddValue"); +static const auto SerializeName = TStringRef::Of("TopFreq_Serialize"); +static const auto DeserializeName = TStringRef::Of("TopFreq_Deserialize"); +static const auto MergeName = TStringRef::Of("TopFreq_Merge"); +static const auto GetName = TStringRef::Of("TopFreq_Get"); - static const auto CreateName = TStringRef::Of("TopFreq_Create"); - static const auto AddValueName = TStringRef::Of("TopFreq_AddValue"); - static const auto SerializeName = TStringRef::Of("TopFreq_Serialize"); - static const auto DeserializeName = TStringRef::Of("TopFreq_Deserialize"); - static const auto MergeName = TStringRef::Of("TopFreq_Merge"); - static const auto GetName = TStringRef::Of("TopFreq_Get"); +class TTopFreqModule: public IUdfModule { +public: + TStringRef Name() const { + return TStringRef::Of("TopFreq"); + } - class TTopFreqModule: public IUdfModule { - public: - TStringRef Name() const { - return TStringRef::Of("TopFreq"); - } + void CleanupOnTerminate() const final { + } - void CleanupOnTerminate() const final { - } + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(CreateName)->SetTypeAwareness(); + sink.Add(AddValueName)->SetTypeAwareness(); + sink.Add(SerializeName)->SetTypeAwareness(); + sink.Add(DeserializeName)->SetTypeAwareness(); + sink.Add(MergeName)->SetTypeAwareness(); + sink.Add(GetName)->SetTypeAwareness(); + } - void GetAllFunctions(IFunctionsSink& sink) const final { - sink.Add(CreateName)->SetTypeAwareness(); - sink.Add(AddValueName)->SetTypeAwareness(); - sink.Add(SerializeName)->SetTypeAwareness(); - sink.Add(DeserializeName)->SetTypeAwareness(); - sink.Add(MergeName)->SetTypeAwareness(); - sink.Add(GetName)->SetTypeAwareness(); - } + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { + Y_UNUSED(typeConfig); - void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { - Y_UNUSED(typeConfig); + try { + const bool typesOnly = (flags & TFlags::TypesOnly); + builder.UserType(userType); - try { - const bool typesOnly = (flags & TFlags::TypesOnly); - builder.UserType(userType); + auto typeHelper = builder.TypeInfoHelper(); - auto typeHelper = builder.TypeInfoHelper(); + auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() != 3) { + builder.SetError("User type is not a 3-tuple"); + return; + } + + bool isGeneric = false; + IHash::TPtr hash; + IEquate::TPtr equate; + TMaybe<EDataSlot> slot; - auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); - if (!userTypeInspector || userTypeInspector.GetElementsCount() != 3) { - builder.SetError("User type is not a 3-tuple"); + auto valueType = userTypeInspector.GetElementType(2); + auto valueTypeInspector = TDataTypeInspector(*typeHelper, valueType); + if (!valueTypeInspector) { + isGeneric = true; + hash = builder.MakeHash(valueType); + equate = builder.MakeEquate(valueType); + if (!hash || !equate) { return; } - - bool isGeneric = false; - IHash::TPtr hash; - IEquate::TPtr equate; - TMaybe<EDataSlot> slot; - - auto valueType = userTypeInspector.GetElementType(2); - auto valueTypeInspector = TDataTypeInspector(*typeHelper, valueType); - if (!valueTypeInspector) { - isGeneric = true; - hash = builder.MakeHash(valueType); - equate = builder.MakeEquate(valueType); - if (!hash || !equate) { - return; - } - } else { - slot = FindDataSlot(valueTypeInspector.GetTypeId()); - if (!slot) { - builder.SetError("Unknown data type"); - return; - } - const auto& features = NUdf::GetDataTypeInfo(*slot).Features; - if (!(features & NUdf::CanHash) || !(features & NUdf::CanEquate)) { - builder.SetError("Data type is not hashable or equatable"); - return; - } + } else { + slot = FindDataSlot(valueTypeInspector.GetTypeId()); + if (!slot) { + builder.SetError("Unknown data type"); + return; } + const auto& features = NUdf::GetDataTypeInfo(*slot).Features; + if (!(features & NUdf::CanHash) || !(features & NUdf::CanEquate)) { + builder.SetError("Data type is not hashable or equatable"); + return; + } + } - auto serializedItemType = builder.Tuple()->Add<ui64>().Add(valueType).Build(); - auto serializedListType = builder.List()->Item(serializedItemType).Build(); - auto serializedType = builder.Tuple()->Add<ui32>().Add<ui32>().Add(serializedListType).Build(); + auto serializedItemType = builder.Tuple()->Add<ui64>().Add(valueType).Build(); + auto serializedListType = builder.List()->Item(serializedItemType).Build(); + auto serializedType = builder.Tuple()->Add<ui32>().Add<ui32>().Add(serializedListType).Build(); - TType* topFreqType = nullptr; - if (isGeneric) { - topFreqType = builder.Resource(TopFreqResourceNameGeneric); - } else { - switch (*slot) { + TType* topFreqType = nullptr; + if (isGeneric) { + topFreqType = builder.Resource(TopFreqResourceNameGeneric); + } else { + switch (*slot) { UDF_TYPE_ID_MAP(MAKE_TYPE) - } } + } - if (name == CreateName) { - builder.Args()->Add(valueType).Add<ui32>().Done().Returns(topFreqType); + if (name == CreateName) { + builder.Args()->Add(valueType).Add<ui32>().Done().Returns(topFreqType); - if (!typesOnly) { - if (isGeneric) { - builder.Implementation(new TTopFreqCreate(hash, equate)); - } else { - switch (*slot) { + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreqCreate(hash, equate)); + } else { + switch (*slot) { UDF_TYPE_ID_MAP(MAKE_CREATE) - } } } - builder.IsStrict(); } + builder.IsStrict(); + } - if (name == AddValueName) { - builder.Args()->Add(topFreqType).Add(valueType).Done().Returns(topFreqType); + if (name == AddValueName) { + builder.Args()->Add(topFreqType).Add(valueType).Done().Returns(topFreqType); - if (!typesOnly) { - if (isGeneric) { - builder.Implementation(new TTopFreqAddValue); - } else { - switch (*slot) { + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreqAddValue); + } else { + switch (*slot) { UDF_TYPE_ID_MAP(MAKE_ADD_VALUE) - } } } - builder.IsStrict(); } + builder.IsStrict(); + } - if (name == MergeName) { - builder.Args()->Add(topFreqType).Add(topFreqType).Done().Returns(topFreqType); + if (name == MergeName) { + builder.Args()->Add(topFreqType).Add(topFreqType).Done().Returns(topFreqType); - if (!typesOnly) { - if (isGeneric) { - builder.Implementation(new TTopFreqMerge(hash, equate)); - } else { - switch (*slot) { + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreqMerge(hash, equate)); + } else { + switch (*slot) { UDF_TYPE_ID_MAP(MAKE_MERGE) - } } } - builder.IsStrict(); } + builder.IsStrict(); + } - if (name == SerializeName) { - builder.Args()->Add(topFreqType).Done().Returns(serializedType); + if (name == SerializeName) { + builder.Args()->Add(topFreqType).Done().Returns(serializedType); - if (!typesOnly) { - if (isGeneric) { - builder.Implementation(new TTopFreqSerialize); - } else { - switch (*slot) { + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreqSerialize); + } else { + switch (*slot) { UDF_TYPE_ID_MAP(MAKE_SERIALIZE) - } } } - builder.IsStrict(); } + builder.IsStrict(); + } - if (name == DeserializeName) { - builder.Args()->Add(serializedType).Done().Returns(topFreqType); + if (name == DeserializeName) { + builder.Args()->Add(serializedType).Done().Returns(topFreqType); - if (!typesOnly) { - if (isGeneric) { - builder.Implementation(new TTopFreqDeserialize(hash, equate)); - } else { - switch (*slot) { + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreqDeserialize(hash, equate)); + } else { + switch (*slot) { UDF_TYPE_ID_MAP(MAKE_DESERIALIZE) - } } } } + } - if (name == GetName) { - ui32 indexF, indexV; - auto itemType = builder.Struct()->AddField<ui64>("Frequency", &indexF).AddField("Value", valueType, &indexV).Build(); - auto resultType = builder.List()->Item(itemType).Build(); + if (name == GetName) { + ui32 indexF, indexV; + auto itemType = builder.Struct()->AddField<ui64>("Frequency", &indexF).AddField("Value", valueType, &indexV).Build(); + auto resultType = builder.List()->Item(itemType).Build(); - builder.Args()->Add(topFreqType).Add<ui32>().Done().Returns(resultType); + builder.Args()->Add(topFreqType).Add<ui32>().Done().Returns(resultType); - if (!typesOnly) { - if (isGeneric) { - builder.Implementation(new TTopFreqGet); - } else { - switch (*slot) { + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreqGet); + } else { + switch (*slot) { UDF_TYPE_ID_MAP(MAKE_GET) - } } } - builder.IsStrict(); } - - } catch (const std::exception& e) { - builder.SetError(CurrentExceptionMessage()); + builder.IsStrict(); } + + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); } - }; + } +}; } // namespace diff --git a/yql/essentials/udfs/common/topfreq/static/ya.make b/yql/essentials/udfs/common/topfreq/static/ya.make index 95838f33c49..94379d474a9 100644 --- a/yql/essentials/udfs/common/topfreq/static/ya.make +++ b/yql/essentials/udfs/common/topfreq/static/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( static_udf.cpp topfreq.cpp diff --git a/yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp b/yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp index 9ce7b8561fb..51d02f43b2a 100644 --- a/yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp +++ b/yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp @@ -10,442 +10,443 @@ #include <yql/essentials/udfs/common/topfreq/static/topfreq_udf.h> namespace NYql { - using namespace NKikimr::NMiniKQL; - namespace NUdf { - extern NUdf::TUniquePtr<NUdf::IUdfModule> CreateTopFreqModule(); - } - - class TSetup { - public: - TSetup() - : MutableFunctionRegistry_(CreateFunctionRegistry(CreateBuiltinRegistry())->Clone()) - , RandomProvider_(CreateDeterministicRandomProvider(1)) - , TimeProvider_(CreateDeterministicTimeProvider(10000000)) - , Alloc_(__LOCATION__) - , Env_(Alloc_) - { - MutableFunctionRegistry_->AddModule("", "TopFreq", NUdf::CreateTopFreqModule()); - PgmBuidler_.Reset(new TProgramBuilder(Env_, *MutableFunctionRegistry_)); - } +using namespace NKikimr::NMiniKQL; +namespace NUdf { +extern NUdf::TUniquePtr<NUdf::IUdfModule> CreateTopFreqModule(); +} // namespace NUdf - TProgramBuilder& GetProgramBuilder() { - return *PgmBuidler_.Get(); - } +class TSetup { +public: + TSetup() + : MutableFunctionRegistry_(CreateFunctionRegistry(CreateBuiltinRegistry())->Clone()) + , RandomProvider_(CreateDeterministicRandomProvider(1)) + , TimeProvider_(CreateDeterministicTimeProvider(10000000)) + , Alloc_(__LOCATION__) + , Env_(Alloc_) + { + MutableFunctionRegistry_->AddModule("", "TopFreq", NUdf::CreateTopFreqModule()); + PgmBuidler_.Reset(new TProgramBuilder(Env_, *MutableFunctionRegistry_)); + } - NUdf::TUnboxedValue GetValue(TRuntimeNode& node) { - Explorer_.Walk(node.GetNode(), Env_); + TProgramBuilder& GetProgramBuilder() { + return *PgmBuidler_.Get(); + } - TComputationPatternOpts opts(Alloc_.Ref(), Env_, GetBuiltinFactory(), - MutableFunctionRegistry_.Get(), - NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); - Pattern_ = MakeComputationPattern(Explorer_, node, {}, opts); - Graph_ = Pattern_->Clone(opts.ToComputationOptions(*RandomProvider_, *TimeProvider_)); + NUdf::TUnboxedValue GetValue(TRuntimeNode& node) { + Explorer_.Walk(node.GetNode(), Env_); - return Graph_->GetValue(); - } + TComputationPatternOpts opts(Alloc_.Ref(), Env_, GetBuiltinFactory(), + MutableFunctionRegistry_.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + Pattern_ = MakeComputationPattern(Explorer_, node, {}, opts); + Graph_ = Pattern_->Clone(opts.ToComputationOptions(*RandomProvider_, *TimeProvider_)); - private: - using IMutableFunctionRegistryPtr = TIntrusivePtr<IMutableFunctionRegistry>; - using IRandomProviderPtr = TIntrusivePtr<IRandomProvider>; - using ITimeProviderPtr = TIntrusivePtr<ITimeProvider>; + return Graph_->GetValue(); + } - IMutableFunctionRegistryPtr MutableFunctionRegistry_; - IRandomProviderPtr RandomProvider_; - ITimeProviderPtr TimeProvider_; - TScopedAlloc Alloc_; - TTypeEnvironment Env_; - THolder<TProgramBuilder> PgmBuidler_; - IComputationPattern::TPtr Pattern_; - THolder<IComputationGraph> Graph_; - TExploringNodeVisitor Explorer_; - }; +private: + using IMutableFunctionRegistryPtr = TIntrusivePtr<IMutableFunctionRegistry>; + using IRandomProviderPtr = TIntrusivePtr<IRandomProvider>; + using ITimeProviderPtr = TIntrusivePtr<ITimeProvider>; - Y_UNIT_TEST_SUITE(TUDFTopFreqTest) { - Y_UNIT_TEST(SimpleTopFreq) { - TSetup setup; - TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); + IMutableFunctionRegistryPtr MutableFunctionRegistry_; + IRandomProviderPtr RandomProvider_; + ITimeProviderPtr TimeProvider_; + TScopedAlloc Alloc_; + TTypeEnvironment Env_; + THolder<TProgramBuilder> PgmBuidler_; + IComputationPattern::TPtr Pattern_; + THolder<IComputationGraph> Graph_; + TExploringNodeVisitor Explorer_; +}; - const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<i32>::Id); - const auto emptyStructType = pgmBuilder.NewEmptyStructType(); - const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Int32"); - const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); +Y_UNIT_TEST_SUITE(TUDFTopFreqTest) { +Y_UNIT_TEST(SimpleTopFreq) { + TSetup setup; + TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); - const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); - const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); - auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); + const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<i32>::Id); + const auto emptyStructType = pgmBuilder.NewEmptyStructType(); + const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Int32"); + const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); - auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); - auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); - auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); + const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); + const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); + auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); - auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); - auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); - auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); + auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); + auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); + auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); - TRuntimeNode pgmTopFreq; - { - auto val = pgmBuilder.NewDataLiteral<i32>(3); - auto param = pgmBuilder.NewDataLiteral<ui32>(10); + auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); + auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); + auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); - TVector<TRuntimeNode> params = {val, param}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); - } + TRuntimeNode pgmTopFreq; + { + auto val = pgmBuilder.NewDataLiteral<i32>(3); + auto param = pgmBuilder.NewDataLiteral<ui32>(10); - for (int n = 0; n < 9; n++) { - auto value = pgmBuilder.NewDataLiteral<i32>(1); - TVector<TRuntimeNode> params = {pgmTopFreq, value}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } + TVector<TRuntimeNode> params = {val, param}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); + } - for (int n = 0; n < 7; n++) { - auto value = pgmBuilder.NewDataLiteral<i32>(4); - TVector<TRuntimeNode> params = {pgmTopFreq, value}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } + for (int n = 0; n < 9; n++) { + auto value = pgmBuilder.NewDataLiteral<i32>(1); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } - TRuntimeNode pgmReturn; - { - auto param = pgmBuilder.NewDataLiteral<ui32>(4); - TVector<TRuntimeNode> params = {pgmTopFreq, param}; - pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); - } + for (int n = 0; n < 7; n++) { + auto value = pgmBuilder.NewDataLiteral<i32>(4); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } - auto value = setup.GetValue(pgmReturn); + TRuntimeNode pgmReturn; + { + auto param = pgmBuilder.NewDataLiteral<ui32>(4); + TVector<TRuntimeNode> params = {pgmTopFreq, param}; + pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); + } - auto listIterator = value.GetListIterator(); + auto value = setup.GetValue(pgmReturn); - TUnboxedValue item; + auto listIterator = value.GetListIterator(); - UNIT_ASSERT(listIterator.Next(item)); - UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 1); - UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 9); + TUnboxedValue item; - UNIT_ASSERT(listIterator.Next(item)); - UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 4); - UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 7); + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 1); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 9); - UNIT_ASSERT(listIterator.Next(item)); - UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 3); - UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 1); + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 4); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 7); - UNIT_ASSERT(!listIterator.Next(item)); - } + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 3); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 1); - Y_UNIT_TEST(MergingTopFreq) { - TSetup setup; - TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); + UNIT_ASSERT(!listIterator.Next(item)); +} - const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); - const auto emptyStructType = pgmBuilder.NewEmptyStructType(); - const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Uint64"); - const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); +Y_UNIT_TEST(MergingTopFreq) { + TSetup setup; + TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); - const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); - const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); - auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); + const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); + const auto emptyStructType = pgmBuilder.NewEmptyStructType(); + const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Uint64"); + const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); - auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); - auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); - auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); + const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); + const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); + auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); - auto mergeArgsType = pgmBuilder.NewTupleType({resourceType, resourceType}); - auto mergeUserType = pgmBuilder.NewTupleType({mergeArgsType, emptyStructType, valueType}); - auto udfTopFreq_Merge = pgmBuilder.Udf("TopFreq.TopFreq_Merge", TRuntimeNode(), mergeUserType); + auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); + auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); + auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); - auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); - auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); - auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); + auto mergeArgsType = pgmBuilder.NewTupleType({resourceType, resourceType}); + auto mergeUserType = pgmBuilder.NewTupleType({mergeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Merge = pgmBuilder.Udf("TopFreq.TopFreq_Merge", TRuntimeNode(), mergeUserType); - TRuntimeNode pgmTopFreq; - { - auto value = pgmBuilder.NewDataLiteral<ui64>(1); - auto param = pgmBuilder.NewDataLiteral<ui32>(1); - TVector<TRuntimeNode> params = {value, param}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); - } + auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); + auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); + auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); - for (int n = 0; n < 1; n++) { - auto value = pgmBuilder.NewDataLiteral<ui64>(1); - TVector<TRuntimeNode> params = {pgmTopFreq, value}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } + TRuntimeNode pgmTopFreq; + { + auto value = pgmBuilder.NewDataLiteral<ui64>(1); + auto param = pgmBuilder.NewDataLiteral<ui32>(1); + TVector<TRuntimeNode> params = {value, param}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); + } - for (int n = 0; n < 4; n++) { - auto value = pgmBuilder.NewDataLiteral<ui64>(5); - TVector<TRuntimeNode> params = {pgmTopFreq, value}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } + for (int n = 0; n < 1; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(1); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } - for (int n = 0; n < 1; n++) { - auto value = pgmBuilder.NewDataLiteral<ui64>(3); - TVector<TRuntimeNode> params = {pgmTopFreq, value}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } + for (int n = 0; n < 4; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(5); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } - TRuntimeNode pgmTopFreq2; - { - auto value = pgmBuilder.NewDataLiteral<ui64>(1); - auto param = pgmBuilder.NewDataLiteral<ui32>(1); - TVector<TRuntimeNode> params = {value, param}; - pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_Create, params); - } + for (int n = 0; n < 1; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(3); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } - for (int n = 0; n < 5; n++) { - auto value = pgmBuilder.NewDataLiteral<ui64>(1); - TVector<TRuntimeNode> params = {pgmTopFreq2, value}; - pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } + TRuntimeNode pgmTopFreq2; + { + auto value = pgmBuilder.NewDataLiteral<ui64>(1); + auto param = pgmBuilder.NewDataLiteral<ui32>(1); + TVector<TRuntimeNode> params = {value, param}; + pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_Create, params); + } - for (int n = 0; n < 5; n++) { - auto value = pgmBuilder.NewDataLiteral<ui64>(5); - TVector<TRuntimeNode> params = {pgmTopFreq2, value}; - pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } + for (int n = 0; n < 5; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(1); + TVector<TRuntimeNode> params = {pgmTopFreq2, value}; + pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } - TRuntimeNode pgmTopFreq3; - { - TVector<TRuntimeNode> params = {pgmTopFreq, pgmTopFreq2}; - pgmTopFreq3 = pgmBuilder.Apply(udfTopFreq_Merge, params); - } + for (int n = 0; n < 5; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(5); + TVector<TRuntimeNode> params = {pgmTopFreq2, value}; + pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } - TRuntimeNode pgmReturn; - { - auto param = pgmBuilder.NewDataLiteral<ui32>(1); - TVector<TRuntimeNode> params = {pgmTopFreq3, param}; - pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); - } + TRuntimeNode pgmTopFreq3; + { + TVector<TRuntimeNode> params = {pgmTopFreq, pgmTopFreq2}; + pgmTopFreq3 = pgmBuilder.Apply(udfTopFreq_Merge, params); + } - auto value = setup.GetValue(pgmReturn); + TRuntimeNode pgmReturn; + { + auto param = pgmBuilder.NewDataLiteral<ui32>(1); + TVector<TRuntimeNode> params = {pgmTopFreq3, param}; + pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); + } - auto listIterator = value.GetListIterator(); + auto value = setup.GetValue(pgmReturn); - TUnboxedValue item; + auto listIterator = value.GetListIterator(); - UNIT_ASSERT(listIterator.Next(item)); - UNIT_ASSERT_EQUAL(item.GetElement(1).Get<ui64>(), 5); - UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 9); + TUnboxedValue item; - UNIT_ASSERT(!listIterator.Next(item)); - } + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<ui64>(), 5); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 9); - Y_UNIT_TEST(SerializedTopFreq) { - TSetup setup; - TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); + UNIT_ASSERT(!listIterator.Next(item)); +} - const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<bool>::Id); - const auto emptyStructType = pgmBuilder.NewEmptyStructType(); - const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Bool"); - const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); - const auto ui64Type = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); +Y_UNIT_TEST(SerializedTopFreq) { + TSetup setup; + TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); - const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); - const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); - auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); + const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<bool>::Id); + const auto emptyStructType = pgmBuilder.NewEmptyStructType(); + const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Bool"); + const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); + const auto ui64Type = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); - auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); - auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); - auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); + const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); + const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); + auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); - auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); - auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); - auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); + auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); + auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); + auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); - auto serializeArgsType = pgmBuilder.NewTupleType({resourceType}); - auto serializeUserType = pgmBuilder.NewTupleType({serializeArgsType, emptyStructType, valueType}); - auto udfTopFreq_Serialize = pgmBuilder.Udf("TopFreq.TopFreq_Serialize", TRuntimeNode(), serializeUserType); + auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); + auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); + auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); - auto serializedType = pgmBuilder.NewTupleType({ui32Type, ui32Type, - pgmBuilder.NewListType(pgmBuilder.NewTupleType({ui64Type, valueType}))}); + auto serializeArgsType = pgmBuilder.NewTupleType({resourceType}); + auto serializeUserType = pgmBuilder.NewTupleType({serializeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Serialize = pgmBuilder.Udf("TopFreq.TopFreq_Serialize", TRuntimeNode(), serializeUserType); - auto deserializeArgsType = pgmBuilder.NewTupleType({serializedType}); - auto deserializeUserType = pgmBuilder.NewTupleType({deserializeArgsType, emptyStructType, valueType}); - auto udfTopFreq_Deserialize = pgmBuilder.Udf("TopFreq.TopFreq_Deserialize", TRuntimeNode(), deserializeUserType); + auto serializedType = pgmBuilder.NewTupleType({ui32Type, ui32Type, + pgmBuilder.NewListType(pgmBuilder.NewTupleType({ui64Type, valueType}))}); - TRuntimeNode pgmTopFreq; - { - auto value = pgmBuilder.NewDataLiteral<bool>(true); - auto param = pgmBuilder.NewDataLiteral<ui32>(10); - TVector<TRuntimeNode> params = {value, param}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); - } + auto deserializeArgsType = pgmBuilder.NewTupleType({serializedType}); + auto deserializeUserType = pgmBuilder.NewTupleType({deserializeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Deserialize = pgmBuilder.Udf("TopFreq.TopFreq_Deserialize", TRuntimeNode(), deserializeUserType); - for (int n = 0; n < 7; n++) { - auto value = pgmBuilder.NewDataLiteral<bool>(true); - TVector<TRuntimeNode> params = {pgmTopFreq, value}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } + TRuntimeNode pgmTopFreq; + { + auto value = pgmBuilder.NewDataLiteral<bool>(true); + auto param = pgmBuilder.NewDataLiteral<ui32>(10); + TVector<TRuntimeNode> params = {value, param}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); + } - for (int n = 0; n < 10; n++) { - auto value = pgmBuilder.NewDataLiteral<bool>(false); - TVector<TRuntimeNode> params = {pgmTopFreq, value}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } + for (int n = 0; n < 7; n++) { + auto value = pgmBuilder.NewDataLiteral<bool>(true); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } - TRuntimeNode pgmSerializedTopFreq; - { - TVector<TRuntimeNode> params = {pgmTopFreq}; - pgmSerializedTopFreq = pgmBuilder.Apply(udfTopFreq_Serialize, params); - } + for (int n = 0; n < 10; n++) { + auto value = pgmBuilder.NewDataLiteral<bool>(false); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } - TRuntimeNode pgmDeserializedTopFreq; - { - TVector<TRuntimeNode> params = {pgmSerializedTopFreq}; - pgmDeserializedTopFreq = pgmBuilder.Apply(udfTopFreq_Deserialize, params); - } + TRuntimeNode pgmSerializedTopFreq; + { + TVector<TRuntimeNode> params = {pgmTopFreq}; + pgmSerializedTopFreq = pgmBuilder.Apply(udfTopFreq_Serialize, params); + } - TRuntimeNode pgmReturn; - { - auto param = pgmBuilder.NewDataLiteral<ui32>(3); - TVector<TRuntimeNode> params = {pgmDeserializedTopFreq, param}; - pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); - } + TRuntimeNode pgmDeserializedTopFreq; + { + TVector<TRuntimeNode> params = {pgmSerializedTopFreq}; + pgmDeserializedTopFreq = pgmBuilder.Apply(udfTopFreq_Deserialize, params); + } - auto value = setup.GetValue(pgmReturn); + TRuntimeNode pgmReturn; + { + auto param = pgmBuilder.NewDataLiteral<ui32>(3); + TVector<TRuntimeNode> params = {pgmDeserializedTopFreq, param}; + pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); + } - auto listIterator = value.GetListIterator(); + auto value = setup.GetValue(pgmReturn); - TUnboxedValue item; + auto listIterator = value.GetListIterator(); - UNIT_ASSERT(listIterator.Next(item)); - UNIT_ASSERT_EQUAL(item.GetElement(1).Get<bool>(), false); - UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 10); + TUnboxedValue item; - UNIT_ASSERT(listIterator.Next(item)); - UNIT_ASSERT_EQUAL(item.GetElement(1).Get<bool>(), true); - UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 8); + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<bool>(), false); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 10); - UNIT_ASSERT(!listIterator.Next(item)); - } + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<bool>(), true); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 8); - Y_UNIT_TEST(ApproxTopFreq) { - TSetup setup; - TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); + UNIT_ASSERT(!listIterator.Next(item)); +} - const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); - const auto emptyStructType = pgmBuilder.NewEmptyStructType(); - const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Uint64"); - const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); - const auto ui64Type = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); +Y_UNIT_TEST(ApproxTopFreq) { + TSetup setup; + TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); - const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); - const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); - auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); + const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); + const auto emptyStructType = pgmBuilder.NewEmptyStructType(); + const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Uint64"); + const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); + const auto ui64Type = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); - auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); - auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); - auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); + const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); + const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); + auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); - auto mergeArgsType = pgmBuilder.NewTupleType({resourceType, resourceType}); - auto mergeUserType = pgmBuilder.NewTupleType({mergeArgsType, emptyStructType, valueType}); - auto udfTopFreq_Merge = pgmBuilder.Udf("TopFreq.TopFreq_Merge", TRuntimeNode(), mergeUserType); + auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); + auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); + auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); - auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); - auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); - auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); + auto mergeArgsType = pgmBuilder.NewTupleType({resourceType, resourceType}); + auto mergeUserType = pgmBuilder.NewTupleType({mergeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Merge = pgmBuilder.Udf("TopFreq.TopFreq_Merge", TRuntimeNode(), mergeUserType); - auto serializeArgsType = pgmBuilder.NewTupleType({resourceType}); - auto serializeUserType = pgmBuilder.NewTupleType({serializeArgsType, emptyStructType, valueType}); - auto udfTopFreq_Serialize = pgmBuilder.Udf("TopFreq.TopFreq_Serialize", TRuntimeNode(), serializeUserType); + auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); + auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); + auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); - auto serializedType = pgmBuilder.NewTupleType({ui32Type, ui32Type, - pgmBuilder.NewListType(pgmBuilder.NewTupleType({ui64Type, valueType}))}); + auto serializeArgsType = pgmBuilder.NewTupleType({resourceType}); + auto serializeUserType = pgmBuilder.NewTupleType({serializeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Serialize = pgmBuilder.Udf("TopFreq.TopFreq_Serialize", TRuntimeNode(), serializeUserType); - auto deserializeArgsType = pgmBuilder.NewTupleType({serializedType}); - auto deserializeUserType = pgmBuilder.NewTupleType({deserializeArgsType, emptyStructType, valueType}); - auto udfTopFreq_Deserialize = pgmBuilder.Udf("TopFreq.TopFreq_Deserialize", TRuntimeNode(), deserializeUserType); + auto serializedType = pgmBuilder.NewTupleType({ui32Type, ui32Type, + pgmBuilder.NewListType(pgmBuilder.NewTupleType({ui64Type, valueType}))}); - static const ui64 BigNum = 20; - static const ui64 BigEach = 5000; - static const ui64 SmallNum = 500; - static const ui64 SmallEach = 20; - static const ui64 Total = BigNum * BigEach + SmallNum * SmallEach; - static const i32 AskFor = 25; - static const ui64 BlockSize = 200; - static const ui64 BlockCount = 10; - static const i32 WorksIfAtLeast = 15; + auto deserializeArgsType = pgmBuilder.NewTupleType({serializedType}); + auto deserializeUserType = pgmBuilder.NewTupleType({deserializeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Deserialize = pgmBuilder.Udf("TopFreq.TopFreq_Deserialize", TRuntimeNode(), deserializeUserType); - std::array<ui64, Total> values; - std::array<TRuntimeNode, BlockCount> pgmTopFreqs; + static const ui64 BigNum = 20; + static const ui64 BigEach = 5000; + static const ui64 SmallNum = 500; + static const ui64 SmallEach = 20; + static const ui64 Total = BigNum * BigEach + SmallNum * SmallEach; + static const i32 AskFor = 25; + static const ui64 BlockSize = 200; + static const ui64 BlockCount = 10; + static const i32 WorksIfAtLeast = 15; - i32 curIndex = 0; - for (ui64 i = 1; i <= BigNum; i++) { - for (ui64 j = 0; j < BigEach; j++) { - values[curIndex++] = i; - } - } + std::array<ui64, Total> values; + std::array<TRuntimeNode, BlockCount> pgmTopFreqs; - for (ui64 i = BigNum + 1; i <= BigNum + SmallNum; i++) { - for (ui64 j = 0; j < SmallEach; j++) { - values[curIndex++] = i; - } - } + i32 curIndex = 0; + for (ui64 i = 1; i <= BigNum; i++) { + for (ui64 j = 0; j < BigEach; j++) { + values[curIndex++] = i; + } + } - Shuffle(values.begin(), values.end()); + for (ui64 i = BigNum + 1; i <= BigNum + SmallNum; i++) { + for (ui64 j = 0; j < SmallEach; j++) { + values[curIndex++] = i; + } + } - TVector<TRuntimeNode> params; - TRuntimeNode param; - TRuntimeNode pgmvalue; + Shuffle(values.begin(), values.end()); - for (ui64 i = 0; i < BlockCount; i++) { - { - pgmvalue = pgmBuilder.NewDataLiteral<ui64>(values[i * BlockSize]); - param = pgmBuilder.NewDataLiteral<ui32>(AskFor); - params = {pgmvalue, param}; - pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Create, params); - } + TVector<TRuntimeNode> params; + TRuntimeNode param; + TRuntimeNode pgmvalue; - for (ui64 j = i * BlockSize + 1; j < (i + 1) * BlockSize; j++) { - pgmvalue = pgmBuilder.NewDataLiteral<ui64>(values[j]); - params = {pgmTopFreqs[i], pgmvalue}; - pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } + for (ui64 i = 0; i < BlockCount; i++) { + { + pgmvalue = pgmBuilder.NewDataLiteral<ui64>(values[i * BlockSize]); + param = pgmBuilder.NewDataLiteral<ui32>(AskFor); + params = {pgmvalue, param}; + pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Create, params); + } - { - params = {pgmTopFreqs[i]}; - pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Serialize, params); - } - } + for (ui64 j = i * BlockSize + 1; j < (i + 1) * BlockSize; j++) { + pgmvalue = pgmBuilder.NewDataLiteral<ui64>(values[j]); + params = {pgmTopFreqs[i], pgmvalue}; + pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } - TRuntimeNode pgmMainTopFreq; - { - pgmvalue = pgmBuilder.NewDataLiteral<ui64>(Total + 2); - param = pgmBuilder.NewDataLiteral<ui32>(AskFor); - params = {pgmvalue, param}; - pgmMainTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); - } + { + params = {pgmTopFreqs[i]}; + pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Serialize, params); + } + } - for (ui64 i = 0; i < BlockCount; i++) { - params = {pgmTopFreqs[i]}; - pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Deserialize, params); + TRuntimeNode pgmMainTopFreq; + { + pgmvalue = pgmBuilder.NewDataLiteral<ui64>(Total + 2); + param = pgmBuilder.NewDataLiteral<ui32>(AskFor); + params = {pgmvalue, param}; + pgmMainTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); + } - params = {pgmMainTopFreq, pgmTopFreqs[i]}; - pgmMainTopFreq = pgmBuilder.Apply(udfTopFreq_Merge, params); - } + for (ui64 i = 0; i < BlockCount; i++) { + params = {pgmTopFreqs[i]}; + pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Deserialize, params); - TRuntimeNode pgmReturn; - { - param = pgmBuilder.NewDataLiteral<ui32>(AskFor); - params = {pgmMainTopFreq, param}; - pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); - } + params = {pgmMainTopFreq, pgmTopFreqs[i]}; + pgmMainTopFreq = pgmBuilder.Apply(udfTopFreq_Merge, params); + } - auto value = setup.GetValue(pgmReturn); + TRuntimeNode pgmReturn; + { + param = pgmBuilder.NewDataLiteral<ui32>(AskFor); + params = {pgmMainTopFreq, param}; + pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); + } - auto listIterator = value.GetListIterator(); + auto value = setup.GetValue(pgmReturn); - ui32 found = 0; + auto listIterator = value.GetListIterator(); - for (ui64 i = 0; i < AskFor; i++) { - TUnboxedValue item; + ui32 found = 0; - UNIT_ASSERT(listIterator.Next(item)); - ui64 current = item.GetElement(1).Get<ui64>(); - if (current <= BigNum) - found++; - } + for (ui64 i = 0; i < AskFor; i++) { + TUnboxedValue item; - UNIT_ASSERT(!listIterator.Skip()); - UNIT_ASSERT(found >= WorksIfAtLeast); + UNIT_ASSERT(listIterator.Next(item)); + ui64 current = item.GetElement(1).Get<ui64>(); + if (current <= BigNum) { + found++; } } + + UNIT_ASSERT(!listIterator.Skip()); + UNIT_ASSERT(found >= WorksIfAtLeast); } +} // Y_UNIT_TEST_SUITE(TUDFTopFreqTest) +} // namespace NYql diff --git a/yql/essentials/udfs/common/topfreq/ut/ya.make b/yql/essentials/udfs/common/topfreq/ut/ya.make index 142aea4ebfd..3d0b1b859a8 100644 --- a/yql/essentials/udfs/common/topfreq/ut/ya.make +++ b/yql/essentials/udfs/common/topfreq/ut/ya.make @@ -1,5 +1,7 @@ UNITTEST_FOR(yql/essentials/udfs/common/topfreq/static) +ENABLE(YQL_STYLE_CPP) + SRCS( ../topfreq_udf_ut.cpp ) diff --git a/yql/essentials/udfs/common/topfreq/ya.make b/yql/essentials/udfs/common/topfreq/ya.make index 2c91204fed6..3871d8f13ad 100644 --- a/yql/essentials/udfs/common/topfreq/ya.make +++ b/yql/essentials/udfs/common/topfreq/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(topfreq_udf) 28 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( topfreq_udf.cpp diff --git a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp index 3e90765e405..4c31f3d5612 100644 --- a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp +++ b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp @@ -1 +1 @@ -#include "unicode_base_udf.h"
\ No newline at end of file +#include "unicode_base_udf.h" diff --git a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h index 850990d7ba9..df930831dea 100644 --- a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h +++ b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h @@ -30,522 +30,523 @@ namespace { template <typename... Args> \ static auto Execute(Args&&... args) = delete; - inline constexpr bool IsAscii(wchar32 c) noexcept { - return ::IsAscii(c); +inline constexpr bool IsAscii(wchar32 c) noexcept { + return ::IsAscii(c); +} + +template <class It> +struct TIsUnicodeSpaceAdapter { + bool operator()(const It& it) const noexcept { + return IsSpace(*it); } +}; - template <class It> - struct TIsUnicodeSpaceAdapter { - bool operator()(const It& it) const noexcept { - return IsSpace(*it); - } - }; +template <class It> +TIsUnicodeSpaceAdapter<It> IsUnicodeSpaceAdapter(It) { + return {}; +} + +struct TNoChangesTag {}; - template <class It> - TIsUnicodeSpaceAdapter<It> IsUnicodeSpaceAdapter(It) { - return {}; +template <typename TDerived> +struct TScalarOperationMixin { + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef()); } + { + Y_DEBUG_ABORT_UNLESS(IsUtf8(args[0].AsStringRef())); + auto executeResult = TDerived::Execute(args[0].AsStringRef()); + return ProcessResult(builder, std::move(executeResult), args); } - struct TNoChangesTag {}; + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TMaybe<TStringRef>(TStringRef())); } + { + auto executeResult = TDerived::Execute(args[0] ? TMaybe<TStringRef>(args[0].AsStringRef()) : Nothing()); + return ProcessResult(builder, std::move(executeResult), args); + } - template <typename TDerived> - struct TScalarOperationMixin { - static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) - requires requires { TDerived::Execute(TStringRef()); } - { - Y_DEBUG_ABORT_UNLESS(IsUtf8(args[0].AsStringRef())); - auto executeResult = TDerived::Execute(args[0].AsStringRef()); - return ProcessResult(builder, std::move(executeResult), args); - } + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef(), TStringRef()); } + { + auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef()); + return ProcessResult(builder, std::move(executeResult), args); + } - static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) - requires requires { TDerived::Execute(TMaybe<TStringRef>(TStringRef())); } - { - auto executeResult = TDerived::Execute(args[0] ? TMaybe<TStringRef>(args[0].AsStringRef()) : Nothing()); - return ProcessResult(builder, std::move(executeResult), args); - } + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef(), TMaybe<ui16>()); } + { + auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1] ? TMaybe<ui16>(args[1].Get<ui16>()) : Nothing()); + return ProcessResult(builder, std::move(executeResult), args); + } - static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) - requires requires { TDerived::Execute(TStringRef(), TStringRef()); } - { - auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef()); - return ProcessResult(builder, std::move(executeResult), args); - } + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef(), TStringRef(), TStringRef()); } + { + auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef(), args[2].AsStringRef()); + return ProcessResult(builder, std::move(executeResult), args); + } - static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) - requires requires { TDerived::Execute(TStringRef(), TMaybe<ui16>()); } - { - auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1] ? TMaybe<ui16>(args[1].Get<ui16>()) : Nothing()); - return ProcessResult(builder, std::move(executeResult), args); - } + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef(), TStringRef(), TMaybe<ui64>()); } + { + auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef(), args[2] ? TMaybe<ui64>(args[2].Get<ui64>()) : Nothing()); + return ProcessResult(builder, std::move(executeResult), args); + } - static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) - requires requires { TDerived::Execute(TStringRef(), TStringRef(), TStringRef()); } - { - auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef(), args[2].AsStringRef()); - return ProcessResult(builder, std::move(executeResult), args); - } + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef(), TMaybe<ui64>(), TMaybe<ui64>()); } + { + auto executeResult = TDerived::Execute(args[0].AsStringRef(), + args[1] ? TMaybe<ui64>(args[1].Get<ui64>()) : Nothing(), + args[2] ? TMaybe<ui64>(args[2].Get<ui64>()) : Nothing()); + return ProcessResult(builder, std::move(executeResult), args); + } - static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) - requires requires { TDerived::Execute(TStringRef(), TStringRef(), TMaybe<ui64>()); } - { - auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef(), args[2] ? TMaybe<ui64>(args[2].Get<ui64>()) : Nothing()); - return ProcessResult(builder, std::move(executeResult), args); - } +private: + static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TString& newString, const TUnboxedValuePod*) { + return builder->NewString(newString); + } - static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) - requires requires { TDerived::Execute(TStringRef(), TMaybe<ui64>(), TMaybe<ui64>()); } - { - auto executeResult = TDerived::Execute(args[0].AsStringRef(), - args[1] ? TMaybe<ui64>(args[1].Get<ui64>()) : Nothing(), - args[2] ? TMaybe<ui64>(args[2].Get<ui64>()) : Nothing()); - return ProcessResult(builder, std::move(executeResult), args); - } - private: - static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TString& newString, const TUnboxedValuePod*) { - return builder->NewString(newString); - } + static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TStringBuf newString, const TUnboxedValuePod*) { + return builder->NewString(newString); + } - static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TStringBuf newString, const TUnboxedValuePod*) { - return builder->NewString(newString); + template <typename T> + static TUnboxedValue ProcessResult(const IValueBuilder* builder, const std::variant<TNoChangesTag, T>& newValue, const TUnboxedValuePod* initialArg) { + if (std::holds_alternative<T>(newValue)) { + return ProcessResult(builder, std::move(std::get<T>(newValue)), initialArg); + } else { + return initialArg[0]; } + } - template <typename T> - static TUnboxedValue ProcessResult(const IValueBuilder* builder, const std::variant<TNoChangesTag, T>& newValue, const TUnboxedValuePod* initialArg) { - if (std::holds_alternative<T>(newValue)) { - return ProcessResult(builder, std::move(std::get<T>(newValue)), initialArg); - } else { - return initialArg[0]; - } + template <typename T> + static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TMaybe<T>& newValue, const TUnboxedValuePod* initialArg) { + if (newValue.Defined()) { + return ProcessResult(builder, *newValue, initialArg); + } else { + return TUnboxedValuePod(); } + } - template <typename T> - static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TMaybe<T>& newValue, const TUnboxedValuePod* initialArg) { - if (newValue.Defined()) { - return ProcessResult(builder, *newValue, initialArg); - } else { - return TUnboxedValuePod(); - } - } + template <typename T, typename = std::enable_if_t<TPrimitiveDataType<T>::Result>> + static TUnboxedValue ProcessResult(const IValueBuilder* builder, T result, const TUnboxedValuePod*) { + Y_UNUSED(builder); + return TUnboxedValuePod(result); + } +}; - template <typename T, typename = std::enable_if_t<TPrimitiveDataType<T>::Result>> - static TUnboxedValue ProcessResult(const IValueBuilder* builder, T result, const TUnboxedValuePod*) { - Y_UNUSED(builder); - return TUnboxedValuePod(result); - } - }; +template <typename TDerived> +struct TBlockOperationMixin { + template <typename TSink> + static void BlockDoExecute(const TBlockItem arg, const TSink& sink) + requires requires { TDerived::Execute(TStringRef()); } + { + Y_DEBUG_ABORT_UNLESS(IsUtf8(arg.AsStringRef())); + auto executeResult = TDerived::Execute(arg.AsStringRef()); + TBlockItem boxedValue = ProcessResult(executeResult, arg); + sink(boxedValue); + } - template <typename TDerived> - struct TBlockOperationMixin { - template <typename TSink> - static void BlockDoExecute(const TBlockItem arg, const TSink& sink) - requires requires { TDerived::Execute(TStringRef()); } - { - Y_DEBUG_ABORT_UNLESS(IsUtf8(arg.AsStringRef())); - auto executeResult = TDerived::Execute(arg.AsStringRef()); - TBlockItem boxedValue = ProcessResult(executeResult, arg); - sink(boxedValue); - } + template <typename TSink> + static void BlockDoExecute(const TBlockItem arg, const TSink& sink) + requires requires { TDerived::Execute(TMaybe<TStringRef>(TStringRef())); } + { + auto executeResult = TDerived::Execute(arg ? TMaybe<TStringRef>(arg.AsStringRef()) : Nothing()); + TBlockItem boxedValue = ProcessResult(executeResult, arg); + sink(boxedValue); + } - template <typename TSink> - static void BlockDoExecute(const TBlockItem arg, const TSink& sink) - requires requires { TDerived::Execute(TMaybe<TStringRef>(TStringRef())); } - { - auto executeResult = TDerived::Execute(arg ? TMaybe<TStringRef>(arg.AsStringRef()) : Nothing()); - TBlockItem boxedValue = ProcessResult(executeResult, arg); - sink(boxedValue); - } + template <typename TSink> + static void BlockDoExecute(const TBlockItem arg1, const TBlockItem arg2, const TSink& sink) + requires requires { TDerived::Execute(TStringRef(), TStringRef()); } + { + auto executeResult = TDerived::Execute(arg1.AsStringRef(), + arg2.AsStringRef()); + TBlockItem boxedValue = ProcessResult(executeResult, arg1); + sink(boxedValue); + } - template <typename TSink> - static void BlockDoExecute(const TBlockItem arg1, const TBlockItem arg2, const TSink& sink) - requires requires { TDerived::Execute(TStringRef(), TStringRef()); } - { - auto executeResult = TDerived::Execute(arg1.AsStringRef(), - arg2.AsStringRef()); - TBlockItem boxedValue = ProcessResult(executeResult, arg1); - sink(boxedValue); - } + template <typename TSink> + static void BlockDoExecute(const TBlockItem arg1, const TBlockItem arg2, const TSink& sink) + requires requires { TDerived::Execute(TStringRef(), TMaybe<ui16>()); } + { + auto executeResult = TDerived::Execute(arg1.AsStringRef(), arg2 ? TMaybe<ui16>(arg2.Get<ui16>()) : Nothing()); + TBlockItem boxedValue = ProcessResult(executeResult, arg1); + sink(boxedValue); + } - template <typename TSink> - static void BlockDoExecute(const TBlockItem arg1, const TBlockItem arg2, const TSink& sink) - requires requires { TDerived::Execute(TStringRef(), TMaybe<ui16>()); } - { - auto executeResult = TDerived::Execute(arg1.AsStringRef(), arg2 ? TMaybe<ui16>(arg2.Get<ui16>()) : Nothing()); - TBlockItem boxedValue = ProcessResult(executeResult, arg1); - sink(boxedValue); - } + template <typename TSink> + static void BlockDoExecute(const TBlockItem args, const TSink& sink) + requires(requires { TDerived::Execute(TStringRef(), TStringRef(), TStringRef()); }) + { + auto executeResult = TDerived::Execute(args.GetElement(0).AsStringRef(), + args.GetElement(1).AsStringRef(), + args.GetElement(2).AsStringRef()); + TBlockItem boxedValue = ProcessResult(executeResult, args.GetElement(0)); + sink(boxedValue); + } - template <typename TSink> - static void BlockDoExecute(const TBlockItem args, const TSink& sink) - requires(requires { TDerived::Execute(TStringRef(), TStringRef(), TStringRef()); }) - { - auto executeResult = TDerived::Execute(args.GetElement(0).AsStringRef(), - args.GetElement(1).AsStringRef(), - args.GetElement(2).AsStringRef()); - TBlockItem boxedValue = ProcessResult(executeResult, args.GetElement(0)); - sink(boxedValue); - } + template <typename TSink> + static void BlockDoExecute(const TBlockItem args, const TSink& sink) + requires(requires { TDerived::Execute(TStringRef(), TStringRef(), TMaybe<ui64>(0ULL)); }) + { + auto executeResult = TDerived::Execute(args.GetElement(0).AsStringRef(), + args.GetElement(1).AsStringRef(), + (args.GetElement(2) ? TMaybe<ui64>(args.GetElement(2).Get<ui64>()) : Nothing())); + TBlockItem boxedValue = ProcessResult(executeResult, args.GetElement(0)); + sink(boxedValue); + } - template <typename TSink> - static void BlockDoExecute(const TBlockItem args, const TSink& sink) - requires(requires { TDerived::Execute(TStringRef(), TStringRef(), TMaybe<ui64>(0ULL)); }) - { - auto executeResult = TDerived::Execute(args.GetElement(0).AsStringRef(), - args.GetElement(1).AsStringRef(), - (args.GetElement(2) ? TMaybe<ui64>(args.GetElement(2).Get<ui64>()) : Nothing())); - TBlockItem boxedValue = ProcessResult(executeResult, args.GetElement(0)); - sink(boxedValue); - } + template <typename TSink> + static void BlockDoExecute(const TBlockItem args, const TSink& sink) + requires(requires { TDerived::Execute(TStringRef(), TMaybe<ui64>(0ULL), TMaybe<ui64>(0ULL)); }) + { + auto executeResult = TDerived::Execute(args.GetElement(0).AsStringRef(), + (args.GetElement(1) ? TMaybe<ui64>(args.GetElement(1).Get<ui64>()) : Nothing()), + (args.GetElement(2) ? TMaybe<ui64>(args.GetElement(2).Get<ui64>()) : Nothing())); + TBlockItem boxedValue = ProcessResult(executeResult, args.GetElement(0)); + sink(boxedValue); + } - template <typename TSink> - static void BlockDoExecute(const TBlockItem args, const TSink& sink) - requires(requires { TDerived::Execute(TStringRef(), TMaybe<ui64>(0ULL), TMaybe<ui64>(0ULL)); }) - { - auto executeResult = TDerived::Execute(args.GetElement(0).AsStringRef(), - (args.GetElement(1) ? TMaybe<ui64>(args.GetElement(1).Get<ui64>()) : Nothing()), - (args.GetElement(2) ? TMaybe<ui64>(args.GetElement(2).Get<ui64>()) : Nothing())); - TBlockItem boxedValue = ProcessResult(executeResult, args.GetElement(0)); - sink(boxedValue); - } +private: + static TBlockItem ProcessResult(const TString& newString, const TBlockItem arg) { + Y_UNUSED(arg); + return TBlockItem(newString); + } - private: - static TBlockItem ProcessResult(const TString& newString, const TBlockItem arg) { - Y_UNUSED(arg); - return TBlockItem(newString); - } + static TBlockItem ProcessResult(const TStringBuf newString, const TBlockItem arg) { + Y_UNUSED(arg); + return TBlockItem(newString); + } - static TBlockItem ProcessResult(const TStringBuf newString, const TBlockItem arg) { - Y_UNUSED(arg); - return TBlockItem(newString); + template <typename T> + static TBlockItem ProcessResult(const TMaybe<T>& newValue, const TBlockItem arg) { + if (newValue.Defined()) { + return ProcessResult(*newValue, arg); + } else { + return TBlockItem(); } + } - template <typename T> - static TBlockItem ProcessResult(const TMaybe<T>& newValue, const TBlockItem arg) { - if (newValue.Defined()) { - return ProcessResult(*newValue, arg); - } else { - return TBlockItem(); - } + template <typename T> + static TBlockItem ProcessResult(const std::variant<TNoChangesTag, T>& newValue, const TBlockItem arg) { + if (std::holds_alternative<T>(newValue)) { + return ProcessResult(std::get<T>(newValue), arg); + } else { + return arg; } + } - template <typename T> - static TBlockItem ProcessResult(const std::variant<TNoChangesTag, T>& newValue, const TBlockItem arg) { - if (std::holds_alternative<T>(newValue)) { - return ProcessResult(std::get<T>(newValue), arg); - } else { - return arg; + template <typename T, typename = std::enable_if_t<TPrimitiveDataType<T>::Result>> + static TBlockItem ProcessResult(T result, const TBlockItem arg) { + Y_UNUSED(arg); + return TBlockItem(result); + } +}; + +template <typename TDerived> +struct TOperationMixin: public TBlockOperationMixin<TDerived>, public TScalarOperationMixin<TDerived> {}; + +template <auto mode> +struct TNormalizeUTF8: public TOperationMixin<TNormalizeUTF8<mode>> { + static TString Execute(TStringRef arg) { + const TUtf16String& input = UTF8ToWide(arg.Data(), arg.Size()); + return WideToUTF8(Normalize<mode>(input)); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +template <bool (*Function)(wchar32)> +struct TCheckAllChars: public TOperationMixin<TCheckAllChars<Function>> { + static bool Execute(TStringRef arg) { + const TStringBuf input(arg); + wchar32 rune; + const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin()); + const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end()); + while (cur != last) { + ReadUTF8CharAndAdvance(rune, cur, last); + if (!static_cast<bool (*)(wchar32)>(Function)(rune)) { + return false; } } + return true; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - template <typename T, typename = std::enable_if_t<TPrimitiveDataType<T>::Result>> - static TBlockItem ProcessResult(T result, const TBlockItem arg) { - Y_UNUSED(arg); - return TBlockItem(result); +template <bool (*Function)(TUtf16String&, size_t pos, size_t count)> +struct TStringToStringMapper: public TOperationMixin<TStringToStringMapper<Function>> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef arg) { + if (auto wide = UTF8ToWide(arg); + static_cast<bool (*)(TUtf16String&, size_t pos, size_t count)>(Function)(wide, 0, TUtf16String::npos)) { + return WideToUTF8(std::move(wide)); + } else { + return TNoChangesTag{}; } - }; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - template <typename TDerived> - struct TOperationMixin: public TBlockOperationMixin<TDerived>, public TScalarOperationMixin<TDerived> {}; +struct TLengthGetter: public TOperationMixin<TLengthGetter> { + static ui64 Execute(TStringRef inputRef) { + size_t result; + GetNumberOfUTF8Chars(inputRef.Data(), inputRef.Size(), result); + return static_cast<ui64>(result); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - template <auto mode> - struct TNormalizeUTF8: public TOperationMixin<TNormalizeUTF8<mode>> { - static TString Execute(TStringRef arg) { - const TUtf16String& input = UTF8ToWide(arg.Data(), arg.Size()); - return WideToUTF8(Normalize<mode>(input)); - } - DISABLE_IMPICT_ARGUMENT_CAST; - }; +struct TReverser: public TOperationMixin<TReverser> { + static TString Execute(TStringRef inputRef) { + auto wide = UTF8ToWide(inputRef); + ReverseInPlace(wide); + return WideToUTF8(wide); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - template <bool (*Function)(wchar32)> - struct TCheckAllChars: public TOperationMixin<TCheckAllChars<Function>> { - static bool Execute(TStringRef arg) { - const TStringBuf input(arg); - wchar32 rune; - const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin()); - const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end()); - while (cur != last) { - ReadUTF8CharAndAdvance(rune, cur, last); - if (!static_cast<bool (*)(wchar32)>(Function)(rune)) { - return false; - } +struct TStripper: public TOperationMixin<TStripper> { + static TString Execute(TStringRef inputRef) { + const TUtf32String input = UTF8ToUTF32<true>(inputRef); + const auto& result = StripString(input, IsUnicodeSpaceAdapter(input.begin())); + return WideToUTF8(result); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +struct TAllRemover: public TOperationMixin<TAllRemover> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef removeRef) { + TUtf32String input = UTF8ToUTF32<true>(inputRef); + const TUtf32String remove = UTF8ToUTF32<true>(removeRef); + const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); + size_t tpos = 0; + for (const wchar32 c : input) { + if (!chars.contains(c)) { + input[tpos++] = c; } - return true; } - DISABLE_IMPICT_ARGUMENT_CAST; - }; + if (tpos != input.size()) { + input.resize(tpos); + return WideToUTF8(input); + } + return TNoChangesTag{}; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - template <bool (*Function)(TUtf16String&, size_t pos, size_t count)> - struct TStringToStringMapper: public TOperationMixin<TStringToStringMapper<Function>> { - static std::variant<TNoChangesTag, TString> Execute(TStringRef arg) { - if (auto wide = UTF8ToWide(arg); - static_cast<bool (*)(TUtf16String&, size_t pos, size_t count)>(Function)(wide, 0, TUtf16String::npos)) { - return WideToUTF8(std::move(wide)); - } else { - return TNoChangesTag{}; +struct TFirstRemover: public TOperationMixin<TFirstRemover> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef removeRef) { + TUtf32String input = UTF8ToUTF32<true>(inputRef); + const auto remove = UTF8ToUTF32<true>(removeRef); + const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); + for (auto it = input.cbegin(); it != input.cend(); ++it) { + if (chars.contains(*it)) { + input.erase(it); + return WideToUTF8(input); } } - DISABLE_IMPICT_ARGUMENT_CAST; - }; + return TNoChangesTag{}; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - struct TLengthGetter: public TOperationMixin<TLengthGetter> { - static ui64 Execute(TStringRef inputRef) { - size_t result; - GetNumberOfUTF8Chars(inputRef.Data(), inputRef.Size(), result); - return static_cast<ui64>(result); +struct TUnicodeSetMatcher: public TOperationMixin<TUnicodeSetMatcher> { + static bool Execute(TStringRef inputRef, TStringRef customCategoryRef) { + const TStringBuf input(inputRef); + const TUtf16String& customCategory = UTF8ToWide(customCategoryRef); + TUnicodeSet unicodeSet; + try { + unicodeSet.Parse(customCategory); + } catch (...) { + throw yexception() << "Failed to parse unicode set: " << CurrentExceptionMessage(); } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TReverser: public TOperationMixin<TReverser> { - static TString Execute(TStringRef inputRef) { - auto wide = UTF8ToWide(inputRef); - ReverseInPlace(wide); - return WideToUTF8(wide); + wchar32 rune; + const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin()); + const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end()); + while (cur != last) { + ReadUTF8CharAndAdvance(rune, cur, last); + if (!unicodeSet.Has(rune)) { + return false; + } } - DISABLE_IMPICT_ARGUMENT_CAST; - }; + return true; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - struct TStripper: public TOperationMixin<TStripper> { - static TString Execute(TStringRef inputRef) { - const TUtf32String input = UTF8ToUTF32<true>(inputRef); - const auto& result = StripString(input, IsUnicodeSpaceAdapter(input.begin())); - return WideToUTF8(result); - } - DISABLE_IMPICT_ARGUMENT_CAST; - }; +struct TLevensteinDistanceFinder: public TOperationMixin<TLevensteinDistanceFinder> { + static ui64 Execute(TStringRef leftRef, TStringRef rightRef) { + const TStringBuf left(leftRef); + const TStringBuf right(rightRef); + const auto& leftUtf32 = UTF8ToUTF32<true>(left); + const auto& rightUtf32 = UTF8ToUTF32<true>(right); + return NLevenshtein::Distance(leftUtf32, rightUtf32); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - struct TAllRemover: public TOperationMixin<TAllRemover> { - static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef removeRef) { - TUtf32String input = UTF8ToUTF32<true>(inputRef); - const TUtf32String remove = UTF8ToUTF32<true>(removeRef); - const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); - size_t tpos = 0; - for (const wchar32 c : input) { - if (!chars.contains(c)) { - input[tpos++] = c; - } - } - if (tpos != input.size()) { - input.resize(tpos); +struct TLastRemoval: public TOperationMixin<TLastRemoval> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef removeRef) { + TUtf32String input = UTF8ToUTF32<true>(inputRef); + const TUtf32String remove = UTF8ToUTF32<true>(removeRef); + const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); + for (auto it = input.crbegin(); it != input.crend(); ++it) { + if (chars.contains(*it)) { + input.erase(input.crend() - it - 1, 1); return WideToUTF8(input); } - return TNoChangesTag{}; } - DISABLE_IMPICT_ARGUMENT_CAST; - }; + return TNoChangesTag{}; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - struct TFirstRemover: public TOperationMixin<TFirstRemover> { - static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef removeRef) { - TUtf32String input = UTF8ToUTF32<true>(inputRef); - const auto remove = UTF8ToUTF32<true>(removeRef); - const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); - for (auto it = input.cbegin(); it != input.cend(); ++it) { - if (chars.contains(*it)) { - input.erase(it); - return WideToUTF8(input); - } - } +struct TAllReplacer: public TOperationMixin<TAllReplacer> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef whatReplace, TStringRef toReplace) { + if (TString result(inputRef); SubstGlobal(result, whatReplace, toReplace)) { + return result; + } else { return TNoChangesTag{}; } - DISABLE_IMPICT_ARGUMENT_CAST; - }; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - struct TUnicodeSetMatcher: public TOperationMixin<TUnicodeSetMatcher> { - static bool Execute(TStringRef inputRef, TStringRef customCategoryRef) { - const TStringBuf input(inputRef); - const TUtf16String& customCategory = UTF8ToWide(customCategoryRef); - TUnicodeSet unicodeSet; - try { - unicodeSet.Parse(customCategory); - } catch (...) { - throw yexception() << "Failed to parse unicode set: " << CurrentExceptionMessage(); - } - wchar32 rune; - const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin()); - const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end()); - while (cur != last) { - ReadUTF8CharAndAdvance(rune, cur, last); - if (!unicodeSet.Has(rune)) { - return false; - } - } - return true; +struct TFirstReplacer: public TOperationMixin<TFirstReplacer> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef whatReplace, TStringRef toReplace) { + std::string result(inputRef); + const std::string_view what(whatReplace); + if (const auto index = result.find(what); index != std::string::npos) { + result.replace(index, what.size(), std::string_view(toReplace)); + return result; } - DISABLE_IMPICT_ARGUMENT_CAST; - }; + return TNoChangesTag{}; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - struct TLevensteinDistanceFinder: public TOperationMixin<TLevensteinDistanceFinder> { - static ui64 Execute(TStringRef leftRef, TStringRef rightRef) { - const TStringBuf left(leftRef); - const TStringBuf right(rightRef); - const auto& leftUtf32 = UTF8ToUTF32<true>(left); - const auto& rightUtf32 = UTF8ToUTF32<true>(right); - return NLevenshtein::Distance(leftUtf32, rightUtf32); +struct TLastReplacer: public TOperationMixin<TLastReplacer> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef whatReplace, TStringRef toReplace) { + std::string result(inputRef); + const std::string_view what(whatReplace); + if (const auto index = result.rfind(what); index != std::string::npos) { + result.replace(index, what.size(), std::string_view(toReplace)); + return result; } - DISABLE_IMPICT_ARGUMENT_CAST; - }; + return TNoChangesTag{}; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - struct TLastRemoval: public TOperationMixin<TLastRemoval> { - static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef removeRef) { - TUtf32String input = UTF8ToUTF32<true>(inputRef); - const TUtf32String remove = UTF8ToUTF32<true>(removeRef); - const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); - for (auto it = input.crbegin(); it != input.crend(); ++it) { - if (chars.contains(*it)) { - input.erase(input.crend() - it - 1, 1); - return WideToUTF8(input); - } - } - return TNoChangesTag{}; - } - DISABLE_IMPICT_ARGUMENT_CAST; - }; +struct TFinder: public TOperationMixin<TFinder> { + static TMaybe<ui64> Execute(TStringRef inputRef, TStringRef whatFind, TMaybe<ui64> whereFind) { + const std::string_view string(inputRef); + const std::string_view needle(whatFind); + std::string_view::size_type pos = 0U; - struct TAllReplacer: public TOperationMixin<TAllReplacer> { - static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef whatReplace, TStringRef toReplace) { - if (TString result(inputRef); SubstGlobal(result, whatReplace, toReplace)) { - return result; - } else { - return TNoChangesTag{}; + if (auto p = whereFind.GetOrElse(0ULL)) { + for (auto ptr = string.data(); p && pos < string.size(); --p) { + const auto width = WideCharSize(*ptr); + pos += width; + ptr += width; } } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - struct TFirstReplacer: public TOperationMixin<TFirstReplacer> { - static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef whatReplace, TStringRef toReplace) { - std::string result(inputRef); - const std::string_view what(whatReplace); - if (const auto index = result.find(what); index != std::string::npos) { - result.replace(index, what.size(), std::string_view(toReplace)); - return result; - } - return TNoChangesTag{}; - } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TLastReplacer: public TOperationMixin<TLastReplacer> { - static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef whatReplace, TStringRef toReplace) { - std::string result(inputRef); - const std::string_view what(whatReplace); - if (const auto index = result.rfind(what); index != std::string::npos) { - result.replace(index, what.size(), std::string_view(toReplace)); - return result; - } - return TNoChangesTag{}; + if (const auto find = string.find(needle, pos); std::string_view::npos != find) { + size_t result; + GetNumberOfUTF8Chars(string.data(), find, result); + return static_cast<ui64>(result); } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TFinder: public TOperationMixin<TFinder> { - static TMaybe<ui64> Execute(TStringRef inputRef, TStringRef whatFind, TMaybe<ui64> whereFind) { - const std::string_view string(inputRef); - const std::string_view needle(whatFind); - std::string_view::size_type pos = 0U; + return Nothing(); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - if (auto p = whereFind.GetOrElse(0ULL)) { - for (auto ptr = string.data(); p && pos < string.size(); --p) { - const auto width = WideCharSize(*ptr); - pos += width; - ptr += width; - } - } +struct TRFinder: public TOperationMixin<TRFinder> { + static TMaybe<ui64> Execute(TStringRef inputRef, TStringRef whatFind, TMaybe<ui64> whereFind) { + const std::string_view string(inputRef); + const std::string_view needle(whatFind); + std::string_view::size_type pos = std::string_view::npos; - if (const auto find = string.find(needle, pos); std::string_view::npos != find) { - size_t result; - GetNumberOfUTF8Chars(string.data(), find, result); - return static_cast<ui64>(result); + if (auto p = whereFind.GetOrElse(std::string_view::npos); std::string_view::npos != p) { + pos = 0ULL; + for (auto ptr = string.data(); p && pos < string.size(); --p) { + const auto width = WideCharSize(*ptr); + pos += width; + ptr += width; } - return Nothing(); } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TRFinder: public TOperationMixin<TRFinder> { - static TMaybe<ui64> Execute(TStringRef inputRef, TStringRef whatFind, TMaybe<ui64> whereFind) { - const std::string_view string(inputRef); - const std::string_view needle(whatFind); - std::string_view::size_type pos = std::string_view::npos; - if (auto p = whereFind.GetOrElse(std::string_view::npos); std::string_view::npos != p) { - pos = 0ULL; - for (auto ptr = string.data(); p && pos < string.size(); --p) { - const auto width = WideCharSize(*ptr); - pos += width; - ptr += width; - } - } + if (const auto find = string.rfind(needle, pos); std::string_view::npos != find) { + size_t result; + GetNumberOfUTF8Chars(string.data(), find, result); + return static_cast<ui64>(result); + } + return Nothing(); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - if (const auto find = string.rfind(needle, pos); std::string_view::npos != find) { - size_t result; - GetNumberOfUTF8Chars(string.data(), find, result); - return static_cast<ui64>(result); - } +template <bool strict> +struct TToUint64Converter: public TOperationMixin<TToUint64Converter<strict>> { + static TNothing Terminate(const char* message) { + if constexpr (strict) { return Nothing(); + } else { + throw yexception() << message; } - DISABLE_IMPICT_ARGUMENT_CAST; }; - template <bool strict> - struct TToUint64Converter: public TOperationMixin<TToUint64Converter<strict>> { - static TNothing Terminate(const char* message) { - if constexpr (strict) { - return Nothing(); - } else { - throw yexception() << message; - } + static TMaybe<ui64> Execute(TStringRef inputRef, TMaybe<ui16> inputBase) { + const TString inputStr(inputRef); + const char* input = inputStr.data(); + const int base = inputBase.GetOrElse(0); + char* pos = nullptr; + auto prevErrno = errno; + errno = 0; + Y_DEFER { + errno = prevErrno; }; - - static TMaybe<ui64> Execute(TStringRef inputRef, TMaybe<ui16> inputBase) { - const TString inputStr(inputRef); - const char* input = inputStr.data(); - const int base = inputBase.GetOrElse(0); - char* pos = nullptr; - auto prevErrno = errno; - errno = 0; - Y_DEFER { - errno = prevErrno; - }; - unsigned long long res = std::strtoull(input, &pos, base); - if (!res && errno == EINVAL) { - return Terminate("Incorrect base"); - } - - ui64 ret = static_cast<ui64>(res); - if (!res && pos == input) { - return Terminate("Input string is not a number"); - } else if ((res == ULLONG_MAX && errno == ERANGE) || ret != res) { - return Terminate("Converted value falls out of Uint64 range"); - } else if (*pos) { - return Terminate("Input string contains junk after the number"); - } - return ret; + unsigned long long res = std::strtoull(input, &pos, base); + if (!res && errno == EINVAL) { + return Terminate("Incorrect base"); } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - struct TUtf8Checker: public TOperationMixin<TUtf8Checker> { - static bool Execute(TMaybe<TStringRef> inputRef) { - if (!inputRef.Defined()) { - return false; - } - return IsUtf8(*inputRef); + ui64 ret = static_cast<ui64>(res); + if (!res && pos == input) { + return Terminate("Input string is not a number"); + } else if ((res == ULLONG_MAX && errno == ERANGE) || ret != res) { + return Terminate("Converted value falls out of Uint64 range"); + } else if (*pos) { + return Terminate("Input string contains junk after the number"); } - DISABLE_IMPICT_ARGUMENT_CAST; - }; + return ret; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - struct TSubstringGetter: public TOperationMixin<TSubstringGetter> { - static TStringBuf Execute(TStringRef inputRef Y_LIFETIME_BOUND, TMaybe<ui64> inputFrom, TMaybe<ui64> inputLen) { - const TStringBuf input(inputRef); - size_t from = inputFrom.GetOrElse(0); - size_t len = inputLen.GetOrElse(TStringBuf::npos); - return SubstrUTF8(input, from, len); +struct TUtf8Checker: public TOperationMixin<TUtf8Checker> { + static bool Execute(TMaybe<TStringRef> inputRef) { + if (!inputRef.Defined()) { + return false; } - DISABLE_IMPICT_ARGUMENT_CAST; - }; + return IsUtf8(*inputRef); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; -#define DEFINE_UTF8_OPERATION_STRICT(udfName, Executor, signature, optArgs) \ +struct TSubstringGetter: public TOperationMixin<TSubstringGetter> { + static TStringBuf Execute(TStringRef inputRef Y_LIFETIME_BOUND, TMaybe<ui64> inputFrom, TMaybe<ui64> inputLen) { + const TStringBuf input(inputRef); + size_t from = inputFrom.GetOrElse(0); + size_t len = inputLen.GetOrElse(TStringBuf::npos); + return SubstrUTF8(input, from, len); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +#define DEFINE_UTF8_OPERATION_STRICT(udfName, Executor, signature, optArgs) \ BEGIN_SIMPLE_STRICT_ARROW_UDF_WITH_OPTIONAL_ARGS(T##udfName, signature, optArgs) { \ return Executor::DoExecute(valueBuilder, args); \ } \ @@ -599,226 +600,226 @@ namespace { \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) - DEFINE_UTF8_OPERATION_STRICT(IsUtf, TUtf8Checker, bool(TOptional<char*>), /*optArgs=*/1); +DEFINE_UTF8_OPERATION_STRICT(IsUtf, TUtf8Checker, bool(TOptional<char*>), /*optArgs=*/1); - DEFINE_UTF8_OPERATION_STRICT(Normalize, TNormalizeUTF8<NFC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(NormalizeNFD, TNormalizeUTF8<NFD>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(NormalizeNFC, TNormalizeUTF8<NFC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(NormalizeNFKD, TNormalizeUTF8<NFKD>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(NormalizeNFKC, TNormalizeUTF8<NFKC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(Normalize, TNormalizeUTF8<NFC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(NormalizeNFD, TNormalizeUTF8<NFD>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(NormalizeNFC, TNormalizeUTF8<NFC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(NormalizeNFKD, TNormalizeUTF8<NFKD>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(NormalizeNFKC, TNormalizeUTF8<NFKC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsAscii, TCheckAllChars<IsAscii>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsSpace, TCheckAllChars<IsSpace>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsUpper, TCheckAllChars<IsUpper>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsLower, TCheckAllChars<IsLower>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsDigit, TCheckAllChars<IsDigit>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsAlpha, TCheckAllChars<IsAlpha>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsAlnum, TCheckAllChars<IsAlnum>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsHex, TCheckAllChars<IsHexdigit>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsAscii, TCheckAllChars<IsAscii>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsSpace, TCheckAllChars<IsSpace>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsUpper, TCheckAllChars<IsUpper>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsLower, TCheckAllChars<IsLower>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsDigit, TCheckAllChars<IsDigit>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsAlpha, TCheckAllChars<IsAlpha>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsAlnum, TCheckAllChars<IsAlnum>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsHex, TCheckAllChars<IsHexdigit>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(ToTitle, TStringToStringMapper<ToTitle>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(ToUpper, TStringToStringMapper<ToUpper>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(ToLower, TStringToStringMapper<ToLower>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(ToTitle, TStringToStringMapper<ToTitle>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(ToUpper, TStringToStringMapper<ToUpper>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(ToLower, TStringToStringMapper<ToLower>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(GetLength, TLengthGetter, ui64(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(GetLength, TLengthGetter, ui64(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(Reverse, TReverser, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(Strip, TStripper, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_MANY_STRICT(Substring, TSubstringGetter, TUtf8(TAutoMap<TUtf8>, TOptional<ui64>, TOptional<ui64>), /*argsCount=*/3, /*optArgs=*/1); +DEFINE_UTF8_OPERATION_STRICT(Reverse, TReverser, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(Strip, TStripper, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_MANY_STRICT(Substring, TSubstringGetter, TUtf8(TAutoMap<TUtf8>, TOptional<ui64>, TOptional<ui64>), /*argsCount=*/3, /*optArgs=*/1); - DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveAll, TAllRemover, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveFirst, TFirstRemover, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_BIN_NOT_STRICT(IsUnicodeSet, TUnicodeSetMatcher, bool(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_BIN_STRICT(LevensteinDistance, TLevensteinDistanceFinder, ui64(TAutoMap<TUtf8>, TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveLast, TLastRemoval, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveAll, TAllRemover, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveFirst, TFirstRemover, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_BIN_NOT_STRICT(IsUnicodeSet, TUnicodeSetMatcher, bool(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_BIN_STRICT(LevensteinDistance, TLevensteinDistanceFinder, ui64(TAutoMap<TUtf8>, TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveLast, TLastRemoval, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_MANY_STRICT(ReplaceAll, TAllReplacer, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8), /*argsCount=*/3, /*optionalArgs=*/0); - DEFINE_UTF8_OPERATION_MANY_STRICT(ReplaceFirst, TFirstReplacer, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8), /*argsCount=*/3, /*optionalArgs=*/0); - DEFINE_UTF8_OPERATION_MANY_STRICT(ReplaceLast, TLastReplacer, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8), /*argsCount=*/3, /*optionalArgs=*/0); +DEFINE_UTF8_OPERATION_MANY_STRICT(ReplaceAll, TAllReplacer, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8), /*argsCount=*/3, /*optionalArgs=*/0); +DEFINE_UTF8_OPERATION_MANY_STRICT(ReplaceFirst, TFirstReplacer, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8), /*argsCount=*/3, /*optionalArgs=*/0); +DEFINE_UTF8_OPERATION_MANY_STRICT(ReplaceLast, TLastReplacer, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8), /*argsCount=*/3, /*optionalArgs=*/0); - DEFINE_UTF8_OPERATION_MANY_STRICT(Find, TFinder, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), /*argsCount=*/3, /*optionalArgs=*/1); - DEFINE_UTF8_OPERATION_MANY_STRICT(RFind, TRFinder, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), /*argsCount=*/3, /*optionalArgs=*/1); +DEFINE_UTF8_OPERATION_MANY_STRICT(Find, TFinder, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), /*argsCount=*/3, /*optionalArgs=*/1); +DEFINE_UTF8_OPERATION_MANY_STRICT(RFind, TRFinder, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), /*argsCount=*/3, /*optionalArgs=*/1); - DEFINE_UTF8_OPERATION_BIN_NOT_STRICT(ToUint64, TToUint64Converter</*strict=*/false>, ui64(TAutoMap<TUtf8>, TOptional<ui16>), /*optionalArgs=*/1); - DEFINE_UTF8_OPERATION_BIN_STRICT(TryToUint64, TToUint64Converter</*strict=*/true>, TOptional<ui64>(TAutoMap<TUtf8>, TOptional<ui16>), /*optionalArgs=*/1); +DEFINE_UTF8_OPERATION_BIN_NOT_STRICT(ToUint64, TToUint64Converter</*strict=*/false>, ui64(TAutoMap<TUtf8>, TOptional<ui16>), /*optionalArgs=*/1); +DEFINE_UTF8_OPERATION_BIN_STRICT(TryToUint64, TToUint64Converter</*strict=*/true>, TOptional<ui64>(TAutoMap<TUtf8>, TOptional<ui16>), /*optionalArgs=*/1); - using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>; +using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>; - template <typename TIt> - static void SplitToListImpl( - const IValueBuilder* valueBuilder, - const TUnboxedValue& input, - const std::string_view::const_iterator from, - const TIt& it, - TTmpVector& result) { - for (const auto& elem : it) { - result.emplace_back(valueBuilder->SubString(input, std::distance(from, elem.TokenStart()), std::distance(elem.TokenStart(), elem.TokenDelim()))); - } +template <typename TIt> +static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const std::string_view::const_iterator from, + const TIt& it, + TTmpVector& result) { + for (const auto& elem : it) { + result.emplace_back(valueBuilder->SubString(input, std::distance(from, elem.TokenStart()), std::distance(elem.TokenStart(), elem.TokenDelim()))); } +} - template <typename TIt> - static void SplitToListImpl( - const IValueBuilder* valueBuilder, - const TUnboxedValue& input, - const TUtf32String::const_iterator start, - const TIt& it, - TTmpVector& result) { - const std::string_view& original = input.AsStringRef(); - size_t charPos = 0U, bytePos = 0U; - for (const auto& elem : it) { - for (const size_t next = std::distance(start, elem.TokenStart()); charPos < next; ++charPos) - bytePos += WideCharSize(original[bytePos]); - const auto from = bytePos; +template <typename TIt> +static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const TUtf32String::const_iterator start, + const TIt& it, + TTmpVector& result) { + const std::string_view& original = input.AsStringRef(); + size_t charPos = 0U, bytePos = 0U; + for (const auto& elem : it) { + for (const size_t next = std::distance(start, elem.TokenStart()); charPos < next; ++charPos) { + bytePos += WideCharSize(original[bytePos]); + } + const auto from = bytePos; - for (const size_t next = charPos + std::distance(elem.TokenStart(), elem.TokenDelim()); charPos < next; ++charPos) - bytePos += WideCharSize(original[bytePos]); - const auto size = bytePos - from; - result.emplace_back(valueBuilder->SubString(input, from, size)); + for (const size_t next = charPos + std::distance(elem.TokenStart(), elem.TokenDelim()); charPos < next; ++charPos) { + bytePos += WideCharSize(original[bytePos]); } + const auto size = bytePos - from; + result.emplace_back(valueBuilder->SubString(input, from, size)); } +} - template <typename TIt, typename TStrIt> - static void SplitToListImpl( - const IValueBuilder* valueBuilder, - const TUnboxedValue& input, - const TStrIt from, - TIt& it, - bool skipEmpty, - TTmpVector& result) { - if (skipEmpty) { - SplitToListImpl(valueBuilder, input, from, it.SkipEmpty(), result); - } else { - SplitToListImpl(valueBuilder, input, from, it, result); - } +template <typename TIt, typename TStrIt> +static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const TStrIt from, + TIt& it, + bool skipEmpty, + TTmpVector& result) { + if (skipEmpty) { + SplitToListImpl(valueBuilder, input, from, it.SkipEmpty(), result); + } else { + SplitToListImpl(valueBuilder, input, from, it, result); } +} - constexpr char delimeterStringName[] = "DelimeterString"; - constexpr char skipEmptyName[] = "SkipEmpty"; - constexpr char limitName[] = "Limit"; - using TDelimeterStringArg = TNamedArg<bool, delimeterStringName>; - using TSkipEmptyArg = TNamedArg<bool, skipEmptyName>; - using TLimitArg = TNamedArg<ui64, limitName>; +constexpr char delimeterStringName[] = "DelimeterString"; +constexpr char skipEmptyName[] = "SkipEmpty"; +constexpr char limitName[] = "Limit"; +using TDelimeterStringArg = TNamedArg<bool, delimeterStringName>; +using TSkipEmptyArg = TNamedArg<bool, skipEmptyName>; +using TLimitArg = TNamedArg<ui64, limitName>; - SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSplitToList, TListType<TUtf8>( - TOptional<TUtf8>, - TUtf8, - TDelimeterStringArg, - TSkipEmptyArg, - TLimitArg - ), - 3) { - TTmpVector result; - if (args[0]) { - const bool delimiterString = args[2].GetOrDefault<bool>(true); - const bool skipEmpty = args[3].GetOrDefault<bool>(false); - const auto limit = args[4].GetOrDefault<ui64>(0); - if (delimiterString) { - const std::string_view input(args[0].AsStringRef()); - const std::string_view delimeter(args[1].AsStringRef()); - if (limit) { - auto it = StringSplitter(input).SplitByString(delimeter).Limit(limit + 1); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } else { - auto it = StringSplitter(input).SplitByString(delimeter); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSplitToList, TListType<TUtf8>(TOptional<TUtf8>, + TUtf8, + TDelimeterStringArg, + TSkipEmptyArg, + TLimitArg), + 3) { + TTmpVector result; + if (args[0]) { + const bool delimiterString = args[2].GetOrDefault<bool>(true); + const bool skipEmpty = args[3].GetOrDefault<bool>(false); + const auto limit = args[4].GetOrDefault<ui64>(0); + if (delimiterString) { + const std::string_view input(args[0].AsStringRef()); + const std::string_view delimeter(args[1].AsStringRef()); + if (limit) { + auto it = StringSplitter(input).SplitByString(delimeter).Limit(limit + 1); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } else { + auto it = StringSplitter(input).SplitByString(delimeter); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } + } else { + const auto& input = UTF8ToUTF32<true>(args[0].AsStringRef()); + const auto& delimeter = UTF8ToUTF32<true>(args[1].AsStringRef()); + if (limit) { + auto it = StringSplitter(input).SplitBySet(delimeter.c_str()).Limit(limit + 1); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); } else { - const auto& input = UTF8ToUTF32<true>(args[0].AsStringRef()); - const auto& delimeter = UTF8ToUTF32<true>(args[1].AsStringRef()); - if (limit) { - auto it = StringSplitter(input).SplitBySet(delimeter.c_str()).Limit(limit + 1); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } else { - auto it = StringSplitter(input).SplitBySet(delimeter.c_str()); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } + auto it = StringSplitter(input).SplitBySet(delimeter.c_str()); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); } } - return valueBuilder->NewList(result.data(), result.size()); } + return valueBuilder->NewList(result.data(), result.size()); +} - SIMPLE_UDF(TJoinFromList, TUtf8(TAutoMap<TListType<TOptional<TUtf8>>>, TUtf8)) { - const auto input = args[0].GetListIterator(); - const std::string_view delimeter(args[1].AsStringRef()); - std::vector<TString> items; +SIMPLE_UDF(TJoinFromList, TUtf8(TAutoMap<TListType<TOptional<TUtf8>>>, TUtf8)) { + const auto input = args[0].GetListIterator(); + const std::string_view delimeter(args[1].AsStringRef()); + std::vector<TString> items; - for (TUnboxedValue current; input.Next(current);) { - if (current) { - items.emplace_back(current.AsStringRef()); - } + for (TUnboxedValue current; input.Next(current);) { + if (current) { + items.emplace_back(current.AsStringRef()); } - - return valueBuilder->NewString(JoinSeq(delimeter, items)); } - SIMPLE_UDF(TToCodePointList, TListType<ui32>(TAutoMap<TUtf8>)) { - size_t codePointCount = 0; - const auto& inputRef = args[0].AsStringRef(); - if (!GetNumberOfUTF8Chars(inputRef.Data(), inputRef.Size(), codePointCount)) { - // should not happen but still we have to check return code - ythrow yexception() << "Unable to count code points"; - } + return valueBuilder->NewString(JoinSeq(delimeter, items)); +} - TUnboxedValue* itemsPtr = nullptr; - auto result = valueBuilder->NewArray(codePointCount, itemsPtr); - const unsigned char* current = reinterpret_cast<const unsigned char*>(inputRef.Data()); - const unsigned char* end = current + inputRef.Size(); - wchar32 rune = BROKEN_RUNE; - ui32 codePointIndex = 0; - RECODE_RESULT retcode = RECODE_OK; - while (current < end && RECODE_OK == (retcode = ReadUTF8CharAndAdvance(rune, current, end))) { - if (codePointIndex >= codePointCount) { - // sanity check - ythrow yexception() << "Too big code point index " << codePointIndex << ", expecting only " << codePointCount << " code points"; - } - itemsPtr[codePointIndex++] = TUnboxedValuePod(static_cast<ui32>(rune)); - } +SIMPLE_UDF(TToCodePointList, TListType<ui32>(TAutoMap<TUtf8>)) { + size_t codePointCount = 0; + const auto& inputRef = args[0].AsStringRef(); + if (!GetNumberOfUTF8Chars(inputRef.Data(), inputRef.Size(), codePointCount)) { + // should not happen but still we have to check return code + ythrow yexception() << "Unable to count code points"; + } - if (retcode != RECODE_OK) { - ythrow yexception() << "Malformed UTF-8 string"; + TUnboxedValue* itemsPtr = nullptr; + auto result = valueBuilder->NewArray(codePointCount, itemsPtr); + const unsigned char* current = reinterpret_cast<const unsigned char*>(inputRef.Data()); + const unsigned char* end = current + inputRef.Size(); + wchar32 rune = BROKEN_RUNE; + ui32 codePointIndex = 0; + RECODE_RESULT retcode = RECODE_OK; + while (current < end && RECODE_OK == (retcode = ReadUTF8CharAndAdvance(rune, current, end))) { + if (codePointIndex >= codePointCount) { + // sanity check + ythrow yexception() << "Too big code point index " << codePointIndex << ", expecting only " << codePointCount << " code points"; } - - return result; + itemsPtr[codePointIndex++] = TUnboxedValuePod(static_cast<ui32>(rune)); } - SIMPLE_UDF(TFromCodePointList, TUtf8(TAutoMap<TListType<ui32>>)) { - auto input = args[0]; - if (auto elems = input.GetElements()) { - const auto elemCount = input.GetListLength(); - auto bufferSize = WideToUTF8BufferSize(elemCount); - TTempBuf buffer(bufferSize); - auto bufferPtr = buffer.Data(); - auto bufferEnd = buffer.Data() + bufferSize; - for (ui64 i = 0; i != elemCount; ++i) { - const auto& item = elems[i]; - const wchar32 rune = item.Get<ui32>(); - size_t written = 0; - WideToUTF8(&rune, 1, bufferPtr, written); - Y_ENSURE(written <= 4); - bufferPtr += written; - Y_ENSURE(bufferPtr <= bufferEnd); - } - return valueBuilder->NewString(TStringRef(buffer.Data(), bufferPtr - buffer.Data())); - } + if (retcode != RECODE_OK) { + ythrow yexception() << "Malformed UTF-8 string"; + } - std::vector<char, NUdf::TStdAllocatorForUdf<char>> buffer; - buffer.reserve(TUnboxedValuePod::InternalBufferSize); + return result; +} - const auto& iter = input.GetListIterator(); - char runeBuffer[4] = {}; - for (NUdf::TUnboxedValue item; iter.Next(item); ) { +SIMPLE_UDF(TFromCodePointList, TUtf8(TAutoMap<TListType<ui32>>)) { + auto input = args[0]; + if (auto elems = input.GetElements()) { + const auto elemCount = input.GetListLength(); + auto bufferSize = WideToUTF8BufferSize(elemCount); + TTempBuf buffer(bufferSize); + auto bufferPtr = buffer.Data(); + auto bufferEnd = buffer.Data() + bufferSize; + for (ui64 i = 0; i != elemCount; ++i) { + const auto& item = elems[i]; const wchar32 rune = item.Get<ui32>(); size_t written = 0; - WideToUTF8(&rune, 1, runeBuffer, written); + WideToUTF8(&rune, 1, bufferPtr, written); Y_ENSURE(written <= 4); - buffer.insert(buffer.end(), runeBuffer, runeBuffer + written); + bufferPtr += written; + Y_ENSURE(bufferPtr <= bufferEnd); } + return valueBuilder->NewString(TStringRef(buffer.Data(), bufferPtr - buffer.Data())); + } + + std::vector<char, NUdf::TStdAllocatorForUdf<char>> buffer; + buffer.reserve(TUnboxedValuePod::InternalBufferSize); - return valueBuilder->NewString(TStringRef(buffer.data(), buffer.size())); + const auto& iter = input.GetListIterator(); + char runeBuffer[4] = {}; + for (NUdf::TUnboxedValue item; iter.Next(item);) { + const wchar32 rune = item.Get<ui32>(); + size_t written = 0; + WideToUTF8(&rune, 1, runeBuffer, written); + Y_ENSURE(written <= 4); + buffer.insert(buffer.end(), runeBuffer, runeBuffer + written); } + return valueBuilder->NewString(TStringRef(buffer.data(), buffer.size())); +} + #define EXPORTED_UNICODE_BASE_UDF \ - TIsUtf, \ + TIsUtf, \ TGetLength, \ TSubstring, \ TFind, \ @@ -855,4 +856,4 @@ namespace { TIsAlpha, \ TIsAlnum, \ TIsHex -} +} // namespace diff --git a/yql/essentials/udfs/common/unicode_base/lib/ya.make b/yql/essentials/udfs/common/unicode_base/lib/ya.make index 2fda0829667..7ca562aef02 100644 --- a/yql/essentials/udfs/common/unicode_base/lib/ya.make +++ b/yql/essentials/udfs/common/unicode_base/lib/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( unicode_base_udf.cpp ) diff --git a/yql/essentials/udfs/common/unicode_base/ya.make b/yql/essentials/udfs/common/unicode_base/ya.make index 4ec872e2495..0540c0593b5 100644 --- a/yql/essentials/udfs/common/unicode_base/ya.make +++ b/yql/essentials/udfs/common/unicode_base/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(unicode_udf) 37 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( unicode_base.cpp diff --git a/yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp b/yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp index 50a3ee8d1f1..778a3088b7f 100644 --- a/yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp +++ b/yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp @@ -1 +1 @@ -#include "url_base_udf.h"
\ No newline at end of file +#include "url_base_udf.h" diff --git a/yql/essentials/udfs/common/url_base/lib/url_base_udf.h b/yql/essentials/udfs/common/url_base/lib/url_base_udf.h index 04ad1b4e469..676d7a802d1 100644 --- a/yql/essentials/udfs/common/url_base/lib/url_base_udf.h +++ b/yql/essentials/udfs/common/url_base/lib/url_base_udf.h @@ -26,28 +26,27 @@ inline bool PrepareUrl(const std::string_view& keyStr, TUri& parser) { return parser.ParseAbs(keyStr, parseFlags) == TUri::ParsedOK; } -#define ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(udfName, functionName) \ - BEGIN_SIMPLE_ARROW_UDF(udfName, TOptional<char*>(TOptional<char*>)) { \ - EMPTY_RESULT_ON_EMPTY_ARG(0); \ - const std::string_view url(args[0].AsStringRef()); \ - const std::string_view res(functionName(url)); \ - return res.empty() ? TUnboxedValue() : \ - valueBuilder->SubString(args[0], std::distance(url.begin(), res.begin()), res.size()); \ - } \ - struct udfName##KernelExec : public TUnaryKernelExec<udfName##KernelExec> { \ - template <typename TSink> \ - static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { \ - if (!arg) { \ - return sink(TBlockItem()); \ - } \ - const std::string_view url(arg.AsStringRef()); \ - const std::string_view res(functionName(url)); \ - if (res.empty()) { \ - return sink(TBlockItem()); \ - } \ - sink(TBlockItem(TStringRef(res))); \ - } \ - }; \ +#define ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(udfName, functionName) \ + BEGIN_SIMPLE_ARROW_UDF(udfName, TOptional<char*>(TOptional<char*>)) { \ + EMPTY_RESULT_ON_EMPTY_ARG(0); \ + const std::string_view url(args[0].AsStringRef()); \ + const std::string_view res(functionName(url)); \ + return res.empty() ? TUnboxedValue() : valueBuilder->SubString(args[0], std::distance(url.begin(), res.begin()), res.size()); \ + } \ + struct udfName##KernelExec: public TUnaryKernelExec<udfName##KernelExec> { \ + template <typename TSink> \ + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { \ + if (!arg) { \ + return sink(TBlockItem()); \ + } \ + const std::string_view url(arg.AsStringRef()); \ + const std::string_view res(functionName(url)); \ + if (res.empty()) { \ + return sink(TBlockItem()); \ + } \ + sink(TBlockItem(TStringRef(res))); \ + } \ + }; \ END_SIMPLE_ARROW_UDF(udfName, udfName##KernelExec::Do); BEGIN_SIMPLE_ARROW_UDF(TNormalize, TOptional<char*>(TOptional<char*>)) { @@ -55,10 +54,10 @@ BEGIN_SIMPLE_ARROW_UDF(TNormalize, TOptional<char*>(TOptional<char*>)) { TUri url; const bool success = PrepareUrl(args[0].AsStringRef(), url); return success - ? valueBuilder->NewString(url.PrintS(TUri::FlagNoFrag)) - : TUnboxedValue(); + ? valueBuilder->NewString(url.PrintS(TUri::FlagNoFrag)) + : TUnboxedValue(); } -struct TNormalizeKernelExec : public TUnaryKernelExec<TNormalizeKernelExec> { +struct TNormalizeKernelExec: public TUnaryKernelExec<TNormalizeKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { @@ -79,7 +78,7 @@ BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetScheme, char*(TAutoMap<char*>)) { const std::string_view prefix(GetSchemePrefix(url)); return valueBuilder->SubString(args[0], std::distance(url.begin(), prefix.begin()), prefix.size()); } -struct TGetSchemeKernelExec : public TUnaryKernelExec<TGetSchemeKernelExec> { +struct TGetSchemeKernelExec: public TUnaryKernelExec<TGetSchemeKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const std::string_view url(arg.AsStringRef()); @@ -117,12 +116,12 @@ BEGIN_SIMPLE_ARROW_UDF(TGetPort, TOptional<ui64>(TOptional<char*>)) { TStringBuf scheme, host; TString lowerUri(args[0].AsStringRef()); std::transform(lowerUri.cbegin(), lowerUri.cbegin() + GetSchemePrefixSize(lowerUri), - lowerUri.begin(), [](unsigned char c){ return std::tolower(c); }); + lowerUri.begin(), [](unsigned char c) { return std::tolower(c); }); return TryGetSchemeHostAndPort(lowerUri, scheme, host, port) && port - ? TUnboxedValuePod(port) - : TUnboxedValuePod(); + ? TUnboxedValuePod(port) + : TUnboxedValuePod(); } -struct TGetPortKernelExec : public TUnaryKernelExec<TGetPortKernelExec> { +struct TGetPortKernelExec: public TUnaryKernelExec<TGetPortKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { @@ -132,7 +131,7 @@ struct TGetPortKernelExec : public TUnaryKernelExec<TGetPortKernelExec> { TStringBuf scheme, host; TString lowerUri(arg.AsStringRef()); std::transform(lowerUri.cbegin(), lowerUri.cbegin() + GetSchemePrefixSize(lowerUri), - lowerUri.begin(), [](unsigned char c){ return std::tolower(c); }); + lowerUri.begin(), [](unsigned char c) { return std::tolower(c); }); if (TryGetSchemeHostAndPort(lowerUri, scheme, host, port) && port) { return sink(TBlockItem(port)); } @@ -147,10 +146,10 @@ BEGIN_SIMPLE_ARROW_UDF(TGetTail, TOptional<char*>(TOptional<char*>)) { TStringBuf host, tail; SplitUrlToHostAndPath(url, host, tail); return tail.StartsWith('/') - ? valueBuilder->NewString(tail) - : valueBuilder->NewString(TString('/').append(tail)); + ? valueBuilder->NewString(tail) + : valueBuilder->NewString(TString('/').append(tail)); } -struct TGetTailKernelExec : public TUnaryKernelExec<TGetTailKernelExec> { +struct TGetTailKernelExec: public TUnaryKernelExec<TGetTailKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { @@ -184,7 +183,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetPath, TOptional<char*>(TOptional<char*>)) { return valueBuilder->SubString(args[0], std::distance(url.begin(), cut.begin()), cut.length()); } -struct TGetPathKernelExec : public TUnaryKernelExec<TGetPathKernelExec> { +struct TGetPathKernelExec: public TUnaryKernelExec<TGetPathKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { @@ -211,10 +210,9 @@ BEGIN_SIMPLE_ARROW_UDF(TGetFragment, TOptional<char*>(TOptional<char*>)) { EMPTY_RESULT_ON_EMPTY_ARG(0); const std::string_view url(args[0].AsStringRef()); const auto pos = url.find('#'); - return pos == std::string_view::npos ? TUnboxedValue() : - valueBuilder->SubString(args[0], pos + 1U, url.length() - pos - 1U); + return pos == std::string_view::npos ? TUnboxedValue() : valueBuilder->SubString(args[0], pos + 1U, url.length() - pos - 1U); } -struct TGetFragmentKernelExec : public TUnaryKernelExec<TGetFragmentKernelExec> { +struct TGetFragmentKernelExec: public TUnaryKernelExec<TGetFragmentKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { @@ -254,7 +252,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetDomain, TOptional<char*>(TOptional<char*>, ui8)) { const std::pair<ui32, ui32> result = *resultOpt; return valueBuilder->SubString(args[0], result.first, result.second); } -struct TGetDomainKernelExec : public TBinaryKernelExec<TGetDomainKernelExec> { +struct TGetDomainKernelExec: public TBinaryKernelExec<TGetDomainKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { if (!arg1) { @@ -274,7 +272,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetTLD, char*(TAutoMap<char*>)) { const TStringBuf url(args[0].AsStringRef()); return valueBuilder->NewString(GetZone(GetOnlyHost(url))); } -struct TGetTLDKernelExec : public TUnaryKernelExec<TGetTLDKernelExec> { +struct TGetTLDKernelExec: public TUnaryKernelExec<TGetTLDKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const TStringBuf url(arg.AsStringRef()); @@ -289,7 +287,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetDomainLevel, ui64(TAutoMap<char*>)) { StringSplitter(GetOnlyHost(args[0].AsStringRef())).Split('.').AddTo(&parts); return TUnboxedValuePod(ui64(parts.size())); } -struct TGetDomainLevelKernelExec : public TUnaryKernelExec<TGetDomainLevelKernelExec> { +struct TGetDomainLevelKernelExec: public TUnaryKernelExec<TGetDomainLevelKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { std::vector<std::string_view> parts; @@ -358,7 +356,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetCGIParam, TOptional<char*>(TOptional<char*>, char*)) const std::pair<ui32, ui32> result = *resultOpt; return valueBuilder->SubString(args[0], result.first, result.second); } -struct TGetCGIParamKernelExec : public TBinaryKernelExec<TGetCGIParamKernelExec> { +struct TGetCGIParamKernelExec: public TBinaryKernelExec<TGetCGIParamKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { if (!arg1) { @@ -385,7 +383,7 @@ BEGIN_SIMPLE_ARROW_UDF(TCutQueryStringAndFragment, char*(TAutoMap<char*>)) { const auto cut = input.find_first_of("?#"); return std::string_view::npos == cut ? NUdf::TUnboxedValue(args[0]) : valueBuilder->SubString(args[0], 0U, cut); } -struct TCutQueryStringAndFragmentKernelExec : public TUnaryKernelExec<TCutQueryStringAndFragmentKernelExec> { +struct TCutQueryStringAndFragmentKernelExec: public TUnaryKernelExec<TCutQueryStringAndFragmentKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const std::string_view input(arg.AsStringRef()); @@ -405,7 +403,7 @@ BEGIN_SIMPLE_ARROW_UDF(TEncode, TOptional<char*>(TOptional<char*>)) { UrlEscape(url); return input == url ? NUdf::TUnboxedValue(args[0]) : valueBuilder->NewString(url); } -struct TEncodeKernelExec : public TUnaryKernelExec<TEncodeKernelExec> { +struct TEncodeKernelExec: public TUnaryKernelExec<TEncodeKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { @@ -433,7 +431,7 @@ BEGIN_SIMPLE_ARROW_UDF(TDecode, TOptional<char*>(TOptional<char*>)) { UrlUnescape(url); return input == url ? NUdf::TUnboxedValue(args[0]) : valueBuilder->NewString(url); } -struct TDecodeKernelExec : public TUnaryKernelExec<TDecodeKernelExec> { +struct TDecodeKernelExec: public TUnaryKernelExec<TDecodeKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { @@ -455,7 +453,7 @@ BEGIN_SIMPLE_ARROW_UDF(TIsKnownTLD, bool(TAutoMap<char*>)) { Y_UNUSED(valueBuilder); return TUnboxedValuePod(IsTld(args[0].AsStringRef())); } -struct TIsKnownTLDKernelExec : public TUnaryKernelExec<TIsKnownTLDKernelExec> { +struct TIsKnownTLDKernelExec: public TUnaryKernelExec<TIsKnownTLDKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { sink(TBlockItem(static_cast<ui8>(IsTld(arg.AsStringRef())))); @@ -467,7 +465,7 @@ BEGIN_SIMPLE_ARROW_UDF(TIsWellKnownTLD, bool(TAutoMap<char*>)) { Y_UNUSED(valueBuilder); return TUnboxedValuePod(IsVeryGoodTld(args[0].AsStringRef())); } -struct TIsWellKnownTLDKernelExec : public TUnaryKernelExec<TIsWellKnownTLDKernelExec> { +struct TIsWellKnownTLDKernelExec: public TUnaryKernelExec<TIsWellKnownTLDKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { sink(TBlockItem(static_cast<ui8>(IsVeryGoodTld(arg.AsStringRef())))); @@ -475,13 +473,14 @@ struct TIsWellKnownTLDKernelExec : public TUnaryKernelExec<TIsWellKnownTLDKernel }; END_SIMPLE_ARROW_UDF(TIsWellKnownTLD, TIsWellKnownTLDKernelExec::Do); -BEGIN_SIMPLE_ARROW_UDF(THostNameToPunycode, TOptional<char*>(TAutoMap<char*>)) try { +BEGIN_SIMPLE_ARROW_UDF(THostNameToPunycode, TOptional<char*>(TAutoMap<char*>)) +try { const TUtf16String& input = UTF8ToWide(args[0].AsStringRef()); return valueBuilder->NewString(HostNameToPunycode(input)); } catch (TPunycodeError&) { return TUnboxedValue(); } -struct THostNameToPunycodeKernelExec : public TUnaryKernelExec<THostNameToPunycodeKernelExec> { +struct THostNameToPunycodeKernelExec: public TUnaryKernelExec<THostNameToPunycodeKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) try { const TUtf16String& input = UTF8ToWide(arg.AsStringRef()); @@ -496,7 +495,7 @@ BEGIN_SIMPLE_ARROW_UDF(TForceHostNameToPunycode, char*(TAutoMap<char*>)) { const TUtf16String& input = UTF8ToWide(args[0].AsStringRef()); return valueBuilder->NewString(ForceHostNameToPunycode(input)); } -struct TForceHostNameToPunycodeKernelExec : public TUnaryKernelExec<TForceHostNameToPunycodeKernelExec> { +struct TForceHostNameToPunycodeKernelExec: public TUnaryKernelExec<TForceHostNameToPunycodeKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const TUtf16String& input = UTF8ToWide(arg.AsStringRef()); @@ -505,14 +504,15 @@ struct TForceHostNameToPunycodeKernelExec : public TUnaryKernelExec<TForceHostNa }; END_SIMPLE_ARROW_UDF(TForceHostNameToPunycode, TForceHostNameToPunycodeKernelExec::Do); -BEGIN_SIMPLE_ARROW_UDF(TPunycodeToHostName, TOptional<char*>(TAutoMap<char*>)) try { +BEGIN_SIMPLE_ARROW_UDF(TPunycodeToHostName, TOptional<char*>(TAutoMap<char*>)) +try { const TStringRef& input = args[0].AsStringRef(); const auto& result = WideToUTF8(PunycodeToHostName(input)); return valueBuilder->NewString(result); } catch (TPunycodeError&) { return TUnboxedValue(); } -struct TPunycodeToHostNameKernelExec : public TUnaryKernelExec<TPunycodeToHostNameKernelExec> { +struct TPunycodeToHostNameKernelExec: public TUnaryKernelExec<TPunycodeToHostNameKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) try { const TStringRef& input = arg.AsStringRef(); @@ -529,7 +529,7 @@ BEGIN_SIMPLE_ARROW_UDF(TForcePunycodeToHostName, char*(TAutoMap<char*>)) { const auto& result = WideToUTF8(ForcePunycodeToHostName(input)); return valueBuilder->NewString(result); } -struct TForcePunycodeToHostNameKernelExec : public TUnaryKernelExec<TForcePunycodeToHostNameKernelExec> { +struct TForcePunycodeToHostNameKernelExec: public TUnaryKernelExec<TForcePunycodeToHostNameKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const TStringRef& input = arg.AsStringRef(); @@ -543,7 +543,7 @@ BEGIN_SIMPLE_ARROW_UDF(TCanBePunycodeHostName, bool(TAutoMap<char*>)) { Y_UNUSED(valueBuilder); return TUnboxedValuePod(CanBePunycodeHostName(args[0].AsStringRef())); } -struct TCanBePunycodeHostNameKernelExec : public TUnaryKernelExec<TCanBePunycodeHostNameKernelExec> { +struct TCanBePunycodeHostNameKernelExec: public TUnaryKernelExec<TCanBePunycodeHostNameKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { sink(TBlockItem(static_cast<ui8>(CanBePunycodeHostName(arg.AsStringRef())))); @@ -551,36 +551,36 @@ struct TCanBePunycodeHostNameKernelExec : public TUnaryKernelExec<TCanBePunycode }; END_SIMPLE_ARROW_UDF(TCanBePunycodeHostName, TCanBePunycodeHostNameKernelExec::Do); -#define EXPORTED_URL_BASE_UDF \ - TNormalize, \ - TParse, \ - TGetScheme, \ - TGetHost, \ - TGetHostPort, \ - TGetSchemeHost, \ - TGetSchemeHostPort, \ - TGetPort, \ - TGetTail, \ - TGetPath, \ - TGetFragment, \ - TGetDomain, \ - TGetTLD, \ - TGetDomainLevel, \ - TGetSignificantDomain, \ - TGetCGIParam, \ - TCutScheme, \ - TCutWWW, \ - TCutWWW2, \ - TCutQueryStringAndFragment, \ - TEncode, \ - TDecode, \ - TIsKnownTLD, \ - TIsWellKnownTLD, \ - THostNameToPunycode, \ - TForceHostNameToPunycode, \ - TPunycodeToHostName, \ - TForcePunycodeToHostName, \ - TCanBePunycodeHostName, \ - TQueryStringToList, \ - TQueryStringToDict, \ - TBuildQueryString +#define EXPORTED_URL_BASE_UDF \ + TNormalize, \ + TParse, \ + TGetScheme, \ + TGetHost, \ + TGetHostPort, \ + TGetSchemeHost, \ + TGetSchemeHostPort, \ + TGetPort, \ + TGetTail, \ + TGetPath, \ + TGetFragment, \ + TGetDomain, \ + TGetTLD, \ + TGetDomainLevel, \ + TGetSignificantDomain, \ + TGetCGIParam, \ + TCutScheme, \ + TCutWWW, \ + TCutWWW2, \ + TCutQueryStringAndFragment, \ + TEncode, \ + TDecode, \ + TIsKnownTLD, \ + TIsWellKnownTLD, \ + THostNameToPunycode, \ + TForceHostNameToPunycode, \ + TPunycodeToHostName, \ + TForcePunycodeToHostName, \ + TCanBePunycodeHostName, \ + TQueryStringToList, \ + TQueryStringToDict, \ + TBuildQueryString diff --git a/yql/essentials/udfs/common/url_base/lib/url_parse.cpp b/yql/essentials/udfs/common/url_base/lib/url_parse.cpp index 34485970ee0..c892bf25f6f 100644 --- a/yql/essentials/udfs/common/url_base/lib/url_parse.cpp +++ b/yql/essentials/udfs/common/url_base/lib/url_parse.cpp @@ -1,53 +1,53 @@ #include "url_parse.h" #define FIELD_ADD(name) structBuilder->AddField(#name, optionalStringType, &urlParseIndexes.name); -#define FIELD_FILL(name) \ - if (value.FldIsSet(TUri::Field##name)) { \ +#define FIELD_FILL(name) \ + if (value.FldIsSet(TUri::Field##name)) { \ fields[UrlParseIndexes_.name] = valueBuilder->NewString(value.GetField(TUri::Field##name)); \ } namespace NUrlUdf { - using namespace NUri; - using namespace NKikimr; - using namespace NUdf; +using namespace NUri; +using namespace NKikimr; +using namespace NUdf; - TUnboxedValue TParse::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - TUri value; - const auto ParseError = value.ParseAbs(args[0].AsStringRef(), ParseFlags_); - TUnboxedValue* fields = nullptr; - const auto result = valueBuilder->NewArray(FieldsCount, fields); - if (ParseError == TUri::ParsedOK) { - FIELD_MAP(FIELD_FILL) - } else { - fields[UrlParseIndexes_.ParseError] = valueBuilder->NewString(TStringBuilder() << ParseError); - } - return result; +TUnboxedValue TParse::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + TUri value; + const auto ParseError = value.ParseAbs(args[0].AsStringRef(), ParseFlags_); + TUnboxedValue* fields = nullptr; + const auto result = valueBuilder->NewArray(FieldsCount, fields); + if (ParseError == TUri::ParsedOK) { + FIELD_MAP(FIELD_FILL) + } else { + fields[UrlParseIndexes_.ParseError] = valueBuilder->NewString(TStringBuilder() << ParseError); } + return result; +} - bool TParse::DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - TUrlParseIndexes urlParseIndexes; +bool TParse::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + TUrlParseIndexes urlParseIndexes; - builder.Args(1)->Add<TAutoMap<char*>>(); - const auto optionalStringType = builder.Optional()->Item<char*>().Build(); - const auto structBuilder = builder.Struct(FieldsCount); - structBuilder->AddField("ParseError", optionalStringType, &urlParseIndexes.ParseError); - FIELD_MAP(FIELD_ADD) - builder.Returns(structBuilder->Build()); + builder.Args(1)->Add<TAutoMap<char*>>(); + const auto optionalStringType = builder.Optional()->Item<char*>().Build(); + const auto structBuilder = builder.Struct(FieldsCount); + structBuilder->AddField("ParseError", optionalStringType, &urlParseIndexes.ParseError); + FIELD_MAP(FIELD_ADD) + builder.Returns(structBuilder->Build()); - if (!typesOnly) { - builder.Implementation(new TParse(urlParseIndexes)); - } - return true; - } else { - return false; + if (!typesOnly) { + builder.Implementation(new TParse(urlParseIndexes)); } + return true; + } else { + return false; } } +} // namespace NUrlUdf diff --git a/yql/essentials/udfs/common/url_base/lib/url_parse.h b/yql/essentials/udfs/common/url_base/lib/url_parse.h index e25e79c5196..8dab6ac7f47 100644 --- a/yql/essentials/udfs/common/url_base/lib/url_parse.h +++ b/yql/essentials/udfs/common/url_base/lib/url_parse.h @@ -16,44 +16,44 @@ #define FIELD_INDEXES(name) ui32 name; namespace NUrlUdf { - using namespace NUri; - using namespace NKikimr; - using namespace NUdf; +using namespace NUri; +using namespace NKikimr; +using namespace NUdf; - struct TUrlParseIndexes { - ui32 ParseError; - FIELD_MAP(FIELD_INDEXES) - }; +struct TUrlParseIndexes { + ui32 ParseError; + FIELD_MAP(FIELD_INDEXES) +}; - class TParse: public TBoxedValue { - public: - TParse(const TUrlParseIndexes& UrlParseIndexes) - : UrlParseIndexes_(UrlParseIndexes) - , ParseFlags_(TUri::FeaturesRecommended) - { - } +class TParse: public TBoxedValue { +public: + TParse(const TUrlParseIndexes& UrlParseIndexes) + : UrlParseIndexes_(UrlParseIndexes) + , ParseFlags_(TUri::FeaturesRecommended) + { + } - static const TStringRef& Name() { - static auto nameRef = TStringRef("Parse"); - return nameRef; - } + static const TStringRef& Name() { + static auto nameRef = TStringRef("Parse"); + return nameRef; + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override; +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly); +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly); - private: - const TUrlParseIndexes UrlParseIndexes_; - const NUri::TParseFlags ParseFlags_; +private: + const TUrlParseIndexes UrlParseIndexes_; + const NUri::TParseFlags ParseFlags_; - static constexpr ui32 FieldsCount = sizeof(TUrlParseIndexes) / sizeof(ui32); - }; -} + static constexpr ui32 FieldsCount = sizeof(TUrlParseIndexes) / sizeof(ui32); +}; +} // namespace NUrlUdf diff --git a/yql/essentials/udfs/common/url_base/lib/url_query.cpp b/yql/essentials/udfs/common/url_base/lib/url_query.cpp index 885dc3b16e9..32dc2e34806 100644 --- a/yql/essentials/udfs/common/url_base/lib/url_query.cpp +++ b/yql/essentials/udfs/common/url_base/lib/url_query.cpp @@ -7,122 +7,125 @@ #include <library/cpp/string_utils/quote/quote.h> namespace NUrlUdf { - void TQueryStringParse::MakeSignature(IFunctionTypeInfoBuilder& builder, - const TType* retType) - { - builder.Returns(retType).OptionalArgs(4); - auto args = builder.Args(); - args->Add<TAutoMap<TQueryStr>>(); - args->Add<TKeepBlankValuesNArg>(); - args->Add<TStrictNArg>(); - args->Add<TMaxFieldsNArg>(); - args->Add<TSeparatorNArg>().Done(); +void TQueryStringParse::MakeSignature(IFunctionTypeInfoBuilder& builder, + const TType* retType) +{ + builder.Returns(retType).OptionalArgs(4); + auto args = builder.Args(); + args->Add<TAutoMap<TQueryStr>>(); + args->Add<TKeepBlankValuesNArg>(); + args->Add<TStrictNArg>(); + args->Add<TMaxFieldsNArg>(); + args->Add<TSeparatorNArg>().Done(); +} + +std::vector<std::pair<TString, TString>> +TQueryStringParse::RunImpl(const TUnboxedValuePod* args) const { + const std::string_view query(args[0].AsStringRef()); + if (query.empty()) { + return {}; } + const bool keepBlankValues = args[1].GetOrDefault(false); + const bool strict = args[2].GetOrDefault(true); + const ui32 maxFieldCnt = args[3].GetOrDefault(Max<ui32>()); + const std::string_view sep(args[4] ? args[4].AsStringRef() : "&"); - std::vector<std::pair<TString, TString>> - TQueryStringParse::RunImpl(const TUnboxedValuePod* args) const { - const std::string_view query(args[0].AsStringRef()); - if (query.empty()) - return {}; - const bool keepBlankValues = args[1].GetOrDefault(false); - const bool strict = args[2].GetOrDefault(true); - const ui32 maxFieldCnt = args[3].GetOrDefault(Max<ui32>()); - const std::string_view sep(args[4] ? args[4].AsStringRef() : "&"); + std::vector<TStringBuf> parts; + StringSplitter(query).SplitByString(sep).Collect(&parts); + if (parts.size() > maxFieldCnt) { + UdfTerminate((TStringBuilder() << Pos_ << "Max number of fields (" << maxFieldCnt + << ") exceeded: got " << parts.size()) + .c_str()); + } - std::vector<TStringBuf> parts; - StringSplitter(query).SplitByString(sep).Collect(&parts); - if (parts.size() > maxFieldCnt) { - UdfTerminate((TStringBuilder() << Pos_ << "Max number of fields (" << maxFieldCnt - << ") exceeded: got " << parts.size()).c_str()); + std::vector<std::pair<TString, TString>> pairs; + for (const TStringBuf& part : parts) { + if (part.empty() && !strict) { + continue; } - - std::vector<std::pair<TString, TString>> pairs; - for (const TStringBuf& part: parts) { - if (part.empty() && !strict) { - continue; - } - TVector<TString> nvPair = StringSplitter(part).Split('=').Limit(2); - if (nvPair.size() != 2) { - if (strict) { - UdfTerminate((TStringBuilder() << Pos_ << "Bad query field: \"" - << nvPair[0] << "\"").c_str()); - } - if (keepBlankValues) { - nvPair.emplace_back(""); - } else { - continue; - } + TVector<TString> nvPair = StringSplitter(part).Split('=').Limit(2); + if (nvPair.size() != 2) { + if (strict) { + UdfTerminate((TStringBuilder() << Pos_ << "Bad query field: \"" + << nvPair[0] << "\"") + .c_str()); } - if (!nvPair[1].empty() || keepBlankValues) { - CGIUnescape(nvPair[0]); - CGIUnescape(nvPair[1]); - pairs.emplace_back(nvPair[0], nvPair[1]); + if (keepBlankValues) { + nvPair.emplace_back(""); + } else { + continue; } } - return pairs; + if (!nvPair[1].empty() || keepBlankValues) { + CGIUnescape(nvPair[0]); + CGIUnescape(nvPair[1]); + pairs.emplace_back(nvPair[0], nvPair[1]); + } } + return pairs; +} - bool TQueryStringToList::DeclareSignature(const TStringRef& name, - TType*, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - if (Name() == name) { - MakeSignature(builder, GetListType(builder)); - if (!typesOnly) { - builder.Implementation(new TQueryStringToList(builder.GetSourcePosition())); - } - return true; +bool TQueryStringToList::DeclareSignature(const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + if (Name() == name) { + MakeSignature(builder, GetListType(builder)); + if (!typesOnly) { + builder.Implementation(new TQueryStringToList(builder.GetSourcePosition())); } - return false; + return true; } + return false; +} - TUnboxedValue TQueryStringToList::Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - const auto pairs = RunImpl(args); - std::vector<TUnboxedValue> ret; - for (const auto& nvPair : pairs) { - TUnboxedValue* pair = nullptr; - auto item = valueBuilder->NewArray(2U, pair); - pair[0] = valueBuilder->NewString(nvPair.first); - pair[1] = valueBuilder->NewString(nvPair.second); - ret.push_back(item); - } - return valueBuilder->NewList(ret.data(), ret.size()); +TUnboxedValue TQueryStringToList::Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + const auto pairs = RunImpl(args); + std::vector<TUnboxedValue> ret; + for (const auto& nvPair : pairs) { + TUnboxedValue* pair = nullptr; + auto item = valueBuilder->NewArray(2U, pair); + pair[0] = valueBuilder->NewString(nvPair.first); + pair[1] = valueBuilder->NewString(nvPair.second); + ret.push_back(item); } + return valueBuilder->NewList(ret.data(), ret.size()); +} - bool TQueryStringToDict::DeclareSignature(const TStringRef& name, - TType*, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - if (Name() == name) { - auto dictType = GetDictType(builder); - MakeSignature(builder, dictType); - if (!typesOnly) { - builder.Implementation(new TQueryStringToDict(dictType, - builder.GetSourcePosition())); - } - return true; +bool TQueryStringToDict::DeclareSignature(const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + if (Name() == name) { + auto dictType = GetDictType(builder); + MakeSignature(builder, dictType); + if (!typesOnly) { + builder.Implementation(new TQueryStringToDict(dictType, + builder.GetSourcePosition())); } - return false; + return true; } + return false; +} - TUnboxedValue TQueryStringToDict::Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - const auto pairs = RunImpl(args); - auto ret = valueBuilder->NewDict(DictType_, TDictFlags::Hashed | TDictFlags::Multi); - for (const auto& nvPair : pairs) { - ret->Add(valueBuilder->NewString(nvPair.first), - valueBuilder->NewString(nvPair.second)); - } - return ret->Build(); +TUnboxedValue TQueryStringToDict::Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + const auto pairs = RunImpl(args); + auto ret = valueBuilder->NewDict(DictType_, TDictFlags::Hashed | TDictFlags::Multi); + for (const auto& nvPair : pairs) { + ret->Add(valueBuilder->NewString(nvPair.first), + valueBuilder->NewString(nvPair.second)); } + return ret->Build(); +} - TUnboxedValue TBuildQueryString::Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - const std::string_view sep(args[1] ? args[1].AsStringRef() : "&"); - TStringBuilder ret; +TUnboxedValue TBuildQueryString::Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + const std::string_view sep(args[1] ? args[1].AsStringRef() : "&"); + TStringBuilder ret; - switch(FirstArgTypeId_) { + switch (FirstArgTypeId_) { case EFirstArgTypeId::Dict: { TUnboxedValue key, value; const auto dictIt = args[0].GetDictIterator(); @@ -132,8 +135,9 @@ namespace NUrlUdf { const auto listIt = value.GetListIterator(); TUnboxedValue item; while (listIt.Next(item)) { - if (wasItem++) + if (wasItem++) { ret << sep; + } if (item) { ret << keyEscaped << '=' << CGIEscapeRet(item.AsStringRef()); } else { @@ -148,8 +152,9 @@ namespace NUrlUdf { const auto dictIt = args[0].GetDictIterator(); ui64 wasKey = 0; while (dictIt.NextPair(key, value)) { - if (wasKey++) + if (wasKey++) { ret << sep; + } if (value) { ret << CGIEscapeRet(key.AsStringRef()) << '=' << CGIEscapeRet(value.AsStringRef()); @@ -164,8 +169,9 @@ namespace NUrlUdf { TUnboxedValue item; const auto listIt = args[0].GetListIterator(); while (listIt.Next(item)) { - if (wasItem++) + if (wasItem++) { ret << sep; + } TUnboxedValue key = item.GetElement(0), val = item.GetElement(1); if (val) { ret << CGIEscapeRet(key.AsStringRef()) << '=' @@ -178,66 +184,66 @@ namespace NUrlUdf { } default: Y_ABORT("Current first parameter type is not yet implemented"); - } - return valueBuilder->NewString(ret); } + return valueBuilder->NewString(ret); +} - bool TBuildQueryString::DeclareSignature(const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - if (Name() == name) { - if (!userType) { - builder.SetError("Missing user type"); - return true; - } - builder.UserType(userType); - const auto typeHelper = builder.TypeInfoHelper(); - const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); - if (!userTypeInspector || !userTypeInspector.GetElementsCount()) { - builder.SetError("User type is not tuple"); - return true; - } - const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, - userTypeInspector.GetElementType(0)); - if (!argsTypeInspector || !argsTypeInspector.GetElementsCount()) { - builder.SetError("Please provide at least one argument"); - return true; - } - const auto firstArgType = argsTypeInspector.GetElementType(0); - EFirstArgTypeId firstArgTypeId = EFirstArgTypeId::None; +bool TBuildQueryString::DeclareSignature(const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + if (Name() == name) { + if (!userType) { + builder.SetError("Missing user type"); + return true; + } + builder.UserType(userType); + const auto typeHelper = builder.TypeInfoHelper(); + const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || !userTypeInspector.GetElementsCount()) { + builder.SetError("User type is not tuple"); + return true; + } + const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, + userTypeInspector.GetElementType(0)); + if (!argsTypeInspector || !argsTypeInspector.GetElementsCount()) { + builder.SetError("Please provide at least one argument"); + return true; + } + const auto firstArgType = argsTypeInspector.GetElementType(0); + EFirstArgTypeId firstArgTypeId = EFirstArgTypeId::None; - if (typeHelper->IsSameType(GetDictType(builder), firstArgType) || - typeHelper->IsSameType(GetDictType(builder, true), firstArgType)) { - firstArgTypeId = EFirstArgTypeId::Dict; - } else if (typeHelper->IsSameType(GetListType(builder), firstArgType) || - typeHelper->IsSameType(GetListType(builder, true), firstArgType) || - typeHelper->GetTypeKind(firstArgType) == ETypeKind::EmptyList) - { - firstArgTypeId = EFirstArgTypeId::List; - } else if (typeHelper->IsSameType(GetFlattenDictType(builder), firstArgType) || - typeHelper->IsSameType(GetFlattenDictType(builder, true), firstArgType) || - typeHelper->GetTypeKind(firstArgType) == ETypeKind::EmptyDict) - { - firstArgTypeId = EFirstArgTypeId::FlattenDict; - } - if (firstArgTypeId != EFirstArgTypeId::None) { - builder.Returns<TQueryStr>().OptionalArgs(1); - auto args = builder.Args(); - args->Add(firstArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap); - args->Add<TSeparatorNArg>().Done(); - if (!typesOnly) { - builder.Implementation(new TBuildQueryString(builder.GetSourcePosition(), - firstArgTypeId)); - } - } else { - TStringBuilder sb; - sb << "Unsupported first argument type: "; - TTypePrinter(*typeHelper, firstArgType).Out(sb.Out); - builder.SetError(sb); + if (typeHelper->IsSameType(GetDictType(builder), firstArgType) || + typeHelper->IsSameType(GetDictType(builder, true), firstArgType)) { + firstArgTypeId = EFirstArgTypeId::Dict; + } else if (typeHelper->IsSameType(GetListType(builder), firstArgType) || + typeHelper->IsSameType(GetListType(builder, true), firstArgType) || + typeHelper->GetTypeKind(firstArgType) == ETypeKind::EmptyList) + { + firstArgTypeId = EFirstArgTypeId::List; + } else if (typeHelper->IsSameType(GetFlattenDictType(builder), firstArgType) || + typeHelper->IsSameType(GetFlattenDictType(builder, true), firstArgType) || + typeHelper->GetTypeKind(firstArgType) == ETypeKind::EmptyDict) + { + firstArgTypeId = EFirstArgTypeId::FlattenDict; + } + if (firstArgTypeId != EFirstArgTypeId::None) { + builder.Returns<TQueryStr>().OptionalArgs(1); + auto args = builder.Args(); + args->Add(firstArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap); + args->Add<TSeparatorNArg>().Done(); + if (!typesOnly) { + builder.Implementation(new TBuildQueryString(builder.GetSourcePosition(), + firstArgTypeId)); } - return true; + } else { + TStringBuilder sb; + sb << "Unsupported first argument type: "; + TTypePrinter(*typeHelper, firstArgType).Out(sb.Out); + builder.SetError(sb); } - return false; + return true; } + return false; } +} // namespace NUrlUdf diff --git a/yql/essentials/udfs/common/url_base/lib/url_query.h b/yql/essentials/udfs/common/url_base/lib/url_query.h index 552b8527823..7ba82241896 100644 --- a/yql/essentials/udfs/common/url_base/lib/url_query.h +++ b/yql/essentials/udfs/common/url_base/lib/url_query.h @@ -3,132 +3,136 @@ #include <yql/essentials/public/udf/udf_helpers.h> namespace NUrlUdf { - using namespace NYql::NUdf; +using namespace NYql::NUdf; - struct TQueryStringConv : public TBoxedValue { - protected: - static constexpr char Separator[] = "Separator"; +struct TQueryStringConv: public TBoxedValue { +protected: + static constexpr char Separator[] = "Separator"; - using TQueryStr = char*; - using TSeparatorNArg = TNamedArg<TQueryStr, Separator>; + using TQueryStr = char*; + using TSeparatorNArg = TNamedArg<TQueryStr, Separator>; - static inline TType* GetListType(const IFunctionTypeInfoBuilder& builder, - bool optional = false) - { - auto tupleType = optional ? - builder.Tuple()->Add<TQueryStr>().Add(builder.Optional()->Item<TQueryStr>().Build()).Build() - : builder.Tuple()->Add<TQueryStr>().Add<TQueryStr>().Build(); - return builder.List()->Item(tupleType).Build(); - } + static inline TType* GetListType(const IFunctionTypeInfoBuilder& builder, + bool optional = false) + { + auto tupleType = optional ? builder.Tuple()->Add<TQueryStr>().Add(builder.Optional()->Item<TQueryStr>().Build()).Build() + : builder.Tuple()->Add<TQueryStr>().Add<TQueryStr>().Build(); + return builder.List()->Item(tupleType).Build(); + } - static inline TType* GetDictType(const IFunctionTypeInfoBuilder& builder, - bool optional = false) - { - auto listType = optional ? - builder.List()->Item(builder.Optional()->Item<TQueryStr>().Build()).Build() - : builder.List()->Item<TQueryStr>().Build(); - return builder.Dict()->Key<TQueryStr>().Value(listType).Build(); - } + static inline TType* GetDictType(const IFunctionTypeInfoBuilder& builder, + bool optional = false) + { + auto listType = optional ? builder.List()->Item(builder.Optional()->Item<TQueryStr>().Build()).Build() + : builder.List()->Item<TQueryStr>().Build(); + return builder.Dict()->Key<TQueryStr>().Value(listType).Build(); + } - static inline TType* GetFlattenDictType(const IFunctionTypeInfoBuilder& builder, - bool optional = false) - { - return optional ? - builder.Dict()->Key<TQueryStr>().Value(builder.Optional()->Item<TQueryStr>().Build()).Build() - : builder.Dict()->Key<TQueryStr>().Value<TQueryStr>().Build(); - } - }; + static inline TType* GetFlattenDictType(const IFunctionTypeInfoBuilder& builder, + bool optional = false) + { + return optional ? builder.Dict()->Key<TQueryStr>().Value(builder.Optional()->Item<TQueryStr>().Build()).Build() + : builder.Dict()->Key<TQueryStr>().Value<TQueryStr>().Build(); + } +}; - struct TQueryStringParse: public TQueryStringConv { - explicit TQueryStringParse(TSourcePosition&& pos) : Pos_(std::move(pos)) {} +struct TQueryStringParse: public TQueryStringConv { + explicit TQueryStringParse(TSourcePosition&& pos) + : Pos_(std::move(pos)) + { + } - protected: - static constexpr char KeepBlankValues[] = "KeepBlankValues"; - static constexpr char Strict[] = "Strict"; - static constexpr char MaxFields[] = "MaxFields"; +protected: + static constexpr char KeepBlankValues[] = "KeepBlankValues"; + static constexpr char Strict[] = "Strict"; + static constexpr char MaxFields[] = "MaxFields"; - using TKeepBlankValuesNArg = TNamedArg<bool, KeepBlankValues>; - using TStrictNArg = TNamedArg<bool, Strict>; - using TMaxFieldsNArg = TNamedArg<ui32, MaxFields>; + using TKeepBlankValuesNArg = TNamedArg<bool, KeepBlankValues>; + using TStrictNArg = TNamedArg<bool, Strict>; + using TMaxFieldsNArg = TNamedArg<ui32, MaxFields>; - static void MakeSignature(IFunctionTypeInfoBuilder& builder, const TType* retType); + static void MakeSignature(IFunctionTypeInfoBuilder& builder, const TType* retType); - std::vector<std::pair<TString, TString>> - RunImpl(const TUnboxedValuePod* args) const; + std::vector<std::pair<TString, TString>> + RunImpl(const TUnboxedValuePod* args) const; - private: - TSourcePosition Pos_; - }; +private: + TSourcePosition Pos_; +}; - struct TQueryStringToList : public TQueryStringParse { - explicit TQueryStringToList(TSourcePosition&& pos) - : TQueryStringParse(std::forward<TSourcePosition>(pos)) {} +struct TQueryStringToList: public TQueryStringParse { + explicit TQueryStringToList(TSourcePosition&& pos) + : TQueryStringParse(std::forward<TSourcePosition>(pos)) + { + } - static const TStringRef& Name() { - static const auto name = TStringRef::Of("QueryStringToList"); - return name; - } + static const TStringRef& Name() { + static const auto name = TStringRef::Of("QueryStringToList"); + return name; + } - static bool DeclareSignature(const TStringRef& name, - TType*, - IFunctionTypeInfoBuilder& builder, - bool typesOnly); + static bool DeclareSignature(const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly); - TUnboxedValue Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override; - }; + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; +}; - struct TQueryStringToDict : public TQueryStringParse { - explicit TQueryStringToDict(TType* dictType, TSourcePosition&& pos) - : TQueryStringParse(std::move(pos)) - , DictType_(dictType) - {} +struct TQueryStringToDict: public TQueryStringParse { + explicit TQueryStringToDict(TType* dictType, TSourcePosition&& pos) + : TQueryStringParse(std::move(pos)) + , DictType_(dictType) + { + } - static const TStringRef& Name() { - static const auto name = TStringRef::Of("QueryStringToDict"); - return name; - } + static const TStringRef& Name() { + static const auto name = TStringRef::Of("QueryStringToDict"); + return name; + } - static bool DeclareSignature(const TStringRef& name, - TType*, - IFunctionTypeInfoBuilder& builder, - bool typesOnly); + static bool DeclareSignature(const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly); - TUnboxedValue Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override; + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; - private: - TType* DictType_; - }; +private: + TType* DictType_; +}; - class TBuildQueryString : public TQueryStringConv { - TSourcePosition Pos_; - enum class EFirstArgTypeId { - None, - Dict, - FlattenDict, - List, - } FirstArgTypeId_; +class TBuildQueryString: public TQueryStringConv { + TSourcePosition Pos_; + enum class EFirstArgTypeId { + None, + Dict, + FlattenDict, + List, + } FirstArgTypeId_; - public: - typedef bool TTypeAwareMarker; +public: + typedef bool TTypeAwareMarker; - explicit TBuildQueryString(TSourcePosition&& pos, EFirstArgTypeId firstArgTypeId) - : Pos_(std::move(pos)) - , FirstArgTypeId_(firstArgTypeId) - {} + explicit TBuildQueryString(TSourcePosition&& pos, EFirstArgTypeId firstArgTypeId) + : Pos_(std::move(pos)) + , FirstArgTypeId_(firstArgTypeId) + { + } - static const TStringRef& Name() { - static const auto name = TStringRef::Of("BuildQueryString"); - return name; - } + static const TStringRef& Name() { + static const auto name = TStringRef::Of("BuildQueryString"); + return name; + } - TUnboxedValue Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override; + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; - static bool DeclareSignature(const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly); - }; -} + static bool DeclareSignature(const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly); +}; +} // namespace NUrlUdf diff --git a/yql/essentials/udfs/common/url_base/lib/ya.make b/yql/essentials/udfs/common/url_base/lib/ya.make index 9887842303b..51a55865bd4 100644 --- a/yql/essentials/udfs/common/url_base/lib/ya.make +++ b/yql/essentials/udfs/common/url_base/lib/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( url_base_udf.cpp url_parse.cpp diff --git a/yql/essentials/udfs/common/url_base/url_base.cpp b/yql/essentials/udfs/common/url_base/url_base.cpp index 628abe7a301..ae5516741be 100644 --- a/yql/essentials/udfs/common/url_base/url_base.cpp +++ b/yql/essentials/udfs/common/url_base/url_base.cpp @@ -4,4 +4,3 @@ SIMPLE_MODULE(TUrlModule, EXPORTED_URL_BASE_UDF) REGISTER_MODULES(TUrlModule) - diff --git a/yql/essentials/udfs/common/url_base/ya.make b/yql/essentials/udfs/common/url_base/ya.make index a251b5cf0d2..79c7c9bead6 100644 --- a/yql/essentials/udfs/common/url_base/ya.make +++ b/yql/essentials/udfs/common/url_base/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(url_udf) 37 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( url_base.cpp diff --git a/yql/essentials/udfs/common/vector/vector_udf.cpp b/yql/essentials/udfs/common/vector/vector_udf.cpp index f1afafa7217..dbaf1d05e31 100644 --- a/yql/essentials/udfs/common/vector/vector_udf.cpp +++ b/yql/essentials/udfs/common/vector/vector_udf.cpp @@ -15,7 +15,8 @@ private: public: TVector() : Vector_() - {} + { + } TUnboxedValue GetResult(const IValueBuilder* builder) { TUnboxedValue* values = nullptr; @@ -45,14 +46,13 @@ public: }; extern const char VectorResourceName[] = "Vector.VectorResource"; -class TVectorResource: - public TBoxedResource<TVector, VectorResourceName> -{ +class TVectorResource: public TBoxedResource<TVector, VectorResourceName> { public: template <typename... Args> inline TVectorResource(Args&&... args) : TBoxedResource(std::forward<Args>(args)...) - {} + { + } }; TVectorResource* GetVectorResource(const TUnboxedValuePod& arg) { @@ -120,8 +120,7 @@ public: TType* userType, const TStringRef& typeConfig, ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + IFunctionTypeInfoBuilder& builder) const final { Y_UNUSED(typeConfig); try { diff --git a/yql/essentials/udfs/common/vector/ya.make b/yql/essentials/udfs/common/vector/ya.make index a1403f62a61..342065cbaff 100644 --- a/yql/essentials/udfs/common/vector/ya.make +++ b/yql/essentials/udfs/common/vector/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( vector_udf.cpp ) diff --git a/yql/essentials/udfs/common/yson2/ya.make b/yql/essentials/udfs/common/yson2/ya.make index 36867223141..cfcb7334c29 100644 --- a/yql/essentials/udfs/common/yson2/ya.make +++ b/yql/essentials/udfs/common/yson2/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(yson2_udf) 28 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( yson2_udf.cpp diff --git a/yql/essentials/udfs/common/yson2/yson2_udf.cpp b/yql/essentials/udfs/common/yson2/yson2_udf.cpp index 721280dd5c5..a6ed9d13002 100644 --- a/yql/essentials/udfs/common/yson2/yson2_udf.cpp +++ b/yql/essentials/udfs/common/yson2/yson2_udf.cpp @@ -30,7 +30,7 @@ using TBoolDictType = TDict<char*, bool>; using TDoubleDictType = TDict<char*, double>; using TStringDictType = TDict<char*, char*>; -enum class EOptions : ui8 { +enum class EOptions: ui8 { Strict = 1, AutoConvert = 2 }; @@ -38,8 +38,8 @@ enum class EOptions : ui8 { union TOpts { ui8 Raw = 0; struct { - bool Strict: 1; - bool AutoConvert: 1; + bool Strict : 1; + bool AutoConvert : 1; }; }; @@ -52,7 +52,7 @@ TOpts ParseOptions(TUnboxedValuePod x) { return {}; } -class TOptions : public TBoxedValue { +class TOptions: public TBoxedValue { TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { ui8 options = 0; @@ -66,6 +66,7 @@ class TOptions : public TBoxedValue { return TUnboxedValuePod(options); } + public: static const TStringRef& Name() { static auto name = TStringRef::Of("Options"); @@ -99,18 +100,25 @@ public: using TConverterPtr = TUnboxedValuePod (*)(TUnboxedValuePod, const IValueBuilder*, const TSourcePosition& pos); template <TConverterPtr Converter> -class TLazyConveterT : public TManagedBoxedValue { +class TLazyConveterT: public TManagedBoxedValue { public: TLazyConveterT(TUnboxedValue&& original, const IValueBuilder* valueBuilder, const TSourcePosition& pos) - : Original_(std::move(original)), ValueBuilder_(valueBuilder), Pos_(pos) - {} + : Original_(std::move(original)) + , ValueBuilder_(valueBuilder) + , Pos_(pos) + { + } + private: template <bool NoSwap> class TIterator: public TManagedBoxedValue { public: TIterator(TUnboxedValue&& original, const IValueBuilder* valueBuilder, const TSourcePosition& pos) - : Original_(std::move(original)), ValueBuilder_(valueBuilder), Pos_(pos) - {} + : Original_(std::move(original)) + , ValueBuilder_(valueBuilder) + , Pos_(pos) + { + } private: bool Skip() final { @@ -140,7 +148,7 @@ private: } const TUnboxedValue Original_; - const IValueBuilder *const ValueBuilder_; + const IValueBuilder* const ValueBuilder_; const TSourcePosition Pos_; }; @@ -196,11 +204,11 @@ private: } const TUnboxedValue Original_; - const IValueBuilder *const ValueBuilder_; + const IValueBuilder* const ValueBuilder_; const TSourcePosition Pos_; }; -template<bool Strict, bool AutoConvert, TConverterPtr Converter = nullptr> +template <bool Strict, bool AutoConvert, TConverterPtr Converter = nullptr> TUnboxedValuePod ConvertToListImpl(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { if (!x) { return valueBuilder->NewEmptyList().Release(); @@ -208,8 +216,9 @@ TUnboxedValuePod ConvertToListImpl(TUnboxedValuePod x, const IValueBuilder* valu switch (GetNodeType(x)) { case ENodeType::List: - if (!x.IsBoxed()) + if (!x.IsBoxed()) { break; + } if constexpr (Converter != nullptr) { if constexpr (Strict || AutoConvert) { return TUnboxedValuePod(new TLazyConveterT<Converter>(x, valueBuilder, pos)); @@ -251,7 +260,7 @@ TUnboxedValuePod ConvertToListImpl(TUnboxedValuePod x, const IValueBuilder* valu return valueBuilder->NewEmptyList().Release(); } -template<bool Strict, bool AutoConvert, TConverterPtr Converter = nullptr> +template <bool Strict, bool AutoConvert, TConverterPtr Converter = nullptr> TUnboxedValuePod ConvertToDictImpl(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { if (!x) { return valueBuilder->NewEmptyList().Release(); @@ -259,8 +268,9 @@ TUnboxedValuePod ConvertToDictImpl(TUnboxedValuePod x, const IValueBuilder* valu switch (GetNodeType(x)) { case ENodeType::Dict: - if (!x.IsBoxed()) + if (!x.IsBoxed()) { break; + } if constexpr (Converter != nullptr) { if constexpr (Strict || AutoConvert) { return TUnboxedValuePod(new TLazyConveterT<Converter>(x, valueBuilder, pos)); @@ -310,8 +320,9 @@ TUnboxedValuePod LookupImpl(TUnboxedValuePod dict, const TUnboxedValuePod key, c if (dict.IsBoxed()) { if (const i32 size = dict.GetListLength()) { if (i32 index; TryFromString(key.AsStringRef(), index) && index < size && index >= -size) { - if (index < 0) + if (index < 0) { index += size; + } if constexpr (Converter != nullptr) { return Converter(dict.Lookup(TUnboxedValuePod(index)).Release(), valueBuilder, pos); } @@ -337,10 +348,11 @@ TUnboxedValuePod YPathImpl(TUnboxedValuePod dict, const TUnboxedValuePod key, co for (const auto s : StringSplitter(path.substr(path[1U] == '/' ? 2U : 1U)).Split('/')) { const bool attr = IsNodeType<ENodeType::Attr>(dict); if (const std::string_view subpath = s.Token(); subpath == "@") { - if (attr) + if (attr) { dict = SetNodeType<ENodeType::Dict>(dict); - else + } else { return {}; + } } else { if (attr) { dict = dict.GetVariantItem().Release(); @@ -362,16 +374,17 @@ TUnboxedValuePod YPathImpl(TUnboxedValuePod dict, const TUnboxedValuePod key, co return dict; } -template<bool Strict, bool AutoConvert> +template <bool Strict, bool AutoConvert> TUnboxedValuePod ContainsImpl(TUnboxedValuePod dict, TUnboxedValuePod key, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { switch (GetNodeType(dict)) { case ENodeType::Attr: return ContainsImpl<Strict, AutoConvert>(dict.GetVariantItem().Release(), key, valueBuilder, pos); case ENodeType::Dict: - if (dict.IsBoxed()) + if (dict.IsBoxed()) { return TUnboxedValuePod(dict.Contains(key)); - else + } else { return TUnboxedValuePod(false); + } case ENodeType::List: if (dict.IsBoxed()) { if (const i32 size = dict.GetListLength()) { @@ -382,14 +395,15 @@ TUnboxedValuePod ContainsImpl(TUnboxedValuePod dict, TUnboxedValuePod key, const } return TUnboxedValuePod(false); default: - if constexpr (Strict && !AutoConvert) + if constexpr (Strict && !AutoConvert) { UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Can't check contains on scalar " << TDebugPrinter(dict)).c_str()); - else + } else { return {}; + } } } -template<bool Strict, bool AutoConvert> +template <bool Strict, bool AutoConvert> TUnboxedValuePod GetLengthImpl(TUnboxedValuePod dict, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { switch (GetNodeType(dict)) { case ENodeType::Attr: @@ -399,132 +413,150 @@ TUnboxedValuePod GetLengthImpl(TUnboxedValuePod dict, const IValueBuilder* value case ENodeType::List: return TUnboxedValuePod(dict.IsBoxed() ? dict.GetListLength() : ui64(0)); default: - if constexpr (Strict && !AutoConvert) + if constexpr (Strict && !AutoConvert) { UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Can't get container length from scalar " << TDebugPrinter(dict)).c_str()); - else + } else { return {}; + } } } -} +} // namespace SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBool, TOptional<bool>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToBool<true, true> : &ConvertToBool<true, false>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToBool<false, true> : &ConvertToBool<false, false>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64, TOptional<i64>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToIntegral<true, true, i64> : &ConvertToIntegral<true, false, i64>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToIntegral<false, true, i64> : &ConvertToIntegral<false, false, i64>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64, TOptional<ui64>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToIntegral<true, true, ui64> : &ConvertToIntegral<true, false, ui64>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToIntegral<false, true, ui64> : &ConvertToIntegral<false, false, ui64>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDouble, TOptional<double>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToFloat<true, true, double> : &ConvertToFloat<true, false, double>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToFloat<false, true, double> : &ConvertToFloat<false, false, double>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToString, TOptional<char*>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToString<true, true, false> : &ConvertToString<true, false, false>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToString<false, true, false> : &ConvertToString<false, false, false>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToList, TListType<TNodeResource>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToListImpl<true, true> : &ConvertToListImpl<true, false>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToListImpl<false, true> : &ConvertToListImpl<false, false>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64List, TListType<i64>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToIntegral<true, true, i64>> : &ConvertToListImpl<true, false, &ConvertToIntegral<true, false, i64>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToIntegral<false, true, i64>> : &ConvertToListImpl<false, false, &ConvertToIntegral<false, false, i64>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64List, TListType<ui64>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToIntegral<true, true, ui64>> : &ConvertToListImpl<true, false, &ConvertToIntegral<true, false, ui64>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToIntegral<false, true, ui64>> : &ConvertToListImpl<false, false, &ConvertToIntegral<false, false, ui64>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBoolList, TListType<bool>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToBool<true, true>> : &ConvertToListImpl<true, false, &ConvertToBool<true, false>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToBool<false, true>> : &ConvertToListImpl<false, false, &ConvertToBool<false, false>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDoubleList, TListType<double>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToFloat<true, true, double>> : &ConvertToListImpl<true, false, &ConvertToFloat<true, false, double>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToFloat<false, true, double>> : &ConvertToListImpl<false, false, &ConvertToFloat<false, false, double>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToStringList, TListType<char*>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToString<true, true, false>> : &ConvertToListImpl<true, false, &ConvertToString<true, false, false>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToString<false, true, false>> : &ConvertToListImpl<false, false, &ConvertToString<false, false, false>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDict, TDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToDictImpl<true, true> : &ConvertToDictImpl<true, false>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToDictImpl<false, true> : &ConvertToDictImpl<false, false>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64Dict, TInt64DictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToIntegral<true, true, i64>> : &ConvertToDictImpl<true, false, &ConvertToIntegral<true, false, i64>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToIntegral<false, true, i64>> : &ConvertToDictImpl<false, false, &ConvertToIntegral<false, false, i64>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64Dict, TUint64DictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToIntegral<true, true, ui64>> : &ConvertToDictImpl<true, false, &ConvertToIntegral<true, false, ui64>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToIntegral<false, true, ui64>> : &ConvertToDictImpl<false, false, &ConvertToIntegral<false, false, ui64>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBoolDict, TBoolDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToBool<true, true>> : &ConvertToDictImpl<true, false, &ConvertToBool<true, false>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToBool<false, true>> : &ConvertToDictImpl<false, false, &ConvertToBool<false, false>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDoubleDict, TDoubleDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToFloat<true, true, double>> : &ConvertToDictImpl<true, false, &ConvertToFloat<true, false, double>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToFloat<false, true, double>> : &ConvertToDictImpl<false, false, &ConvertToFloat<false, false, double>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToStringDict, TStringDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToString<true, true, false>> : &ConvertToDictImpl<true, false, &ConvertToString<true, false, false>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToString<false, true, false>> : &ConvertToDictImpl<false, false, &ConvertToString<false, false, false>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_STRICT_UDF(TAttributes, TDictType(TAutoMap<TNodeResource>)) { @@ -537,17 +569,19 @@ SIMPLE_STRICT_UDF(TAttributes, TDictType(TAutoMap<TNodeResource>)) { } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TContains, TOptional<bool>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &ContainsImpl<true, true> : &ContainsImpl<true, false>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ContainsImpl<false, true> : &ContainsImpl<false, false>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TGetLength, TOptional<ui64>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &GetLengthImpl<true, true> : &GetLengthImpl<true, false>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &GetLengthImpl<false, true> : &GetLengthImpl<false, false>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TLookup, TOptional<TNodeResource>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { @@ -555,52 +589,59 @@ SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TLookup, TOptional<TNodeResource>(TAutoMap< } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupBool, TOptional<bool>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &LookupImpl<&ConvertToBool<true, true>> : &LookupImpl<&ConvertToBool<true, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &LookupImpl<&ConvertToBool<false, true>> : &LookupImpl<&ConvertToBool<false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupInt64, TOptional<i64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<true, true, i64>> : &LookupImpl<&ConvertToIntegral<true, false, i64>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<false, true, i64>> : &LookupImpl<&ConvertToIntegral<false, false, i64>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupUint64, TOptional<ui64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<true, true, ui64>> : &LookupImpl<&ConvertToIntegral<true, false, ui64>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<false, true, ui64>> : &LookupImpl<&ConvertToIntegral<false, false, ui64>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupDouble, TOptional<double>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &LookupImpl<&ConvertToFloat<true, true, double>> : &LookupImpl<&ConvertToFloat<true, false, double>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &LookupImpl<&ConvertToFloat<false, true, double>> : &LookupImpl<&ConvertToFloat<false, false, double>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupString, TOptional<char*>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &LookupImpl<&ConvertToString<true, true, false>> : &LookupImpl<&ConvertToString<true, false, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &LookupImpl<&ConvertToString<false, true, false>> : &LookupImpl<&ConvertToString<false, false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupList, TOptional<TListType<TNodeResource>>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &LookupImpl<&ConvertToListImpl<true, true>> : &LookupImpl<&ConvertToListImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &LookupImpl<&ConvertToListImpl<false, true>> : &LookupImpl<&ConvertToListImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupDict, TOptional<TDictType>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &LookupImpl<&ConvertToDictImpl<true, true>> : &LookupImpl<&ConvertToDictImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &LookupImpl<&ConvertToDictImpl<false, true>> : &LookupImpl<&ConvertToDictImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPath, TOptional<TNodeResource>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { @@ -608,52 +649,59 @@ SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPath, TOptional<TNodeResource>(TAutoMap<TNodeRes } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathBool, TOptional<bool>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &YPathImpl<&ConvertToBool<true, true>> : &YPathImpl<&ConvertToBool<true, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &YPathImpl<&ConvertToBool<false, true>> : &YPathImpl<&ConvertToBool<false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathInt64, TOptional<i64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<true, true, i64>> : &YPathImpl<&ConvertToIntegral<true, false, i64>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<false, true, i64>> : &YPathImpl<&ConvertToIntegral<false, false, i64>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathUint64, TOptional<ui64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<true, true, ui64>> : &YPathImpl<&ConvertToIntegral<true, false, ui64>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<false, true, ui64>> : &YPathImpl<&ConvertToIntegral<false, false, ui64>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathDouble, TOptional<double>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &YPathImpl<&ConvertToFloat<true, true, double>> : &YPathImpl<&ConvertToFloat<true, false, double>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &YPathImpl<&ConvertToFloat<false, true, double>> : &YPathImpl<&ConvertToFloat<false, false, double>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathString, TOptional<char*>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &YPathImpl<&ConvertToString<true, true, false>> : &YPathImpl<&ConvertToString<true, false, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &YPathImpl<&ConvertToString<false, true, false>> : &YPathImpl<&ConvertToString<false, false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathList, TOptional<TListType<TNodeResource>>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &YPathImpl<&ConvertToListImpl<true, true>> : &YPathImpl<&ConvertToListImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &YPathImpl<&ConvertToListImpl<false, true>> : &YPathImpl<&ConvertToListImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathDict, TOptional<TDictType>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &YPathImpl<&ConvertToDictImpl<true, true>> : &YPathImpl<&ConvertToDictImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &YPathImpl<&ConvertToDictImpl<false, true>> : &YPathImpl<&ConvertToDictImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_STRICT_UDF(TSerialize, TYson(TAutoMap<TNodeResource>)) { @@ -672,7 +720,8 @@ constexpr char SkipMapEntity[] = "SkipMapEntity"; constexpr char EncodeUtf8[] = "EncodeUtf8"; constexpr char WriteNanAsString[] = "WriteNanAsString"; -SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSerializeJson, TOptional<TJson>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>, TNamedArg<bool, SkipMapEntity>, TNamedArg<bool, EncodeUtf8>, TNamedArg<bool, WriteNanAsString>), 4) try { +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSerializeJson, TOptional<TJson>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>, TNamedArg<bool, SkipMapEntity>, TNamedArg<bool, EncodeUtf8>, TNamedArg<bool, WriteNanAsString>), 4) +try { return valueBuilder->NewString(SerializeJsonDom(args[0], args[2].GetOrDefault(false), args[3].GetOrDefault(false), args[4].GetOrDefault(false))); } catch (const std::exception& e) { if (ParseOptions(args[1]).Strict) { @@ -721,7 +770,7 @@ SIMPLE_STRICT_UDF(TWithAttributes, TOptional<TNodeResource>(TAutoMap<TNodeResour } } -template<ENodeType Type> +template <ENodeType Type> TUnboxedValuePod IsTypeImpl(TUnboxedValuePod y) { if (IsNodeType<ENodeType::Attr>(y)) { y = y.GetVariantItem().Release(); @@ -787,11 +836,14 @@ public: typedef bool TTypeAwareMarker; TBase(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape) - : Pos_(pos), TypeHelper_(typeHelper), Shape_(shape) - {} + : Pos_(pos) + , TypeHelper_(typeHelper) + , Shape_(shape) + { + } protected: - template<bool MoreTypesAllowed> + template <bool MoreTypesAllowed> static const TType* CheckType(const ITypeInfoHelper::TPtr typeHelper, const TType* shape) { switch (/* const auto kind = */ typeHelper->GetTypeKind(shape)) { case ETypeKind::Null: @@ -826,31 +878,41 @@ protected: return CheckType<MoreTypesAllowed>(typeHelper, TListTypeInspector(*typeHelper, shape).GetItemType()); case ETypeKind::Dict: { const auto dictTypeInspector = TDictTypeInspector(*typeHelper, shape); - if (const auto keyType = dictTypeInspector.GetKeyType(); ETypeKind::Data == typeHelper->GetTypeKind(keyType)) - if (const auto keyId = TDataTypeInspector(*typeHelper, keyType).GetTypeId(); keyId == TDataType<char*>::Id || keyId == TDataType<TUtf8>::Id) + if (const auto keyType = dictTypeInspector.GetKeyType(); ETypeKind::Data == typeHelper->GetTypeKind(keyType)) { + if (const auto keyId = TDataTypeInspector(*typeHelper, keyType).GetTypeId(); keyId == TDataType<char*>::Id || keyId == TDataType<TUtf8>::Id) { return CheckType<MoreTypesAllowed>(typeHelper, dictTypeInspector.GetValueType()); + } + } return shape; } case ETypeKind::Tuple: - if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, shape); auto count = tupleTypeInspector.GetElementsCount()) do - if (const auto bad = CheckType<MoreTypesAllowed>(typeHelper, tupleTypeInspector.GetElementType(--count))) - return bad; - while (count); + if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, shape); auto count = tupleTypeInspector.GetElementsCount()) { + do { + if (const auto bad = CheckType<MoreTypesAllowed>(typeHelper, tupleTypeInspector.GetElementType(--count))) { + return bad; + } + } while (count); + } return nullptr; case ETypeKind::Struct: - if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, shape); auto count = structTypeInspector.GetMembersCount()) do - if (const auto bad = CheckType<MoreTypesAllowed>(typeHelper, structTypeInspector.GetMemberType(--count))) - return bad; - while (count); + if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, shape); auto count = structTypeInspector.GetMembersCount()) { + do { + if (const auto bad = CheckType<MoreTypesAllowed>(typeHelper, structTypeInspector.GetMemberType(--count))) { + return bad; + } + } while (count); + } return nullptr; case ETypeKind::Variant: - if constexpr (MoreTypesAllowed) + if constexpr (MoreTypesAllowed) { return CheckType<MoreTypesAllowed>(typeHelper, TVariantTypeInspector(*typeHelper, shape).GetUnderlyingType()); - else + } else { return shape; + } case ETypeKind::Resource: - if (const auto inspector = TResourceTypeInspector(*typeHelper, shape); TStringBuf(inspector.GetTag()) == NodeResourceName) + if (const auto inspector = TResourceTypeInspector(*typeHelper, shape); TStringBuf(inspector.GetTag()) == NodeResourceName) { return nullptr; + } [[fallthrough]]; // AUTOGENERATED_FALLTHROUGH_FIXME default: return shape; @@ -859,13 +921,14 @@ protected: const TSourcePosition Pos_; const ITypeInfoHelper::TPtr TypeHelper_; - const TType *const Shape_; + const TType* const Shape_; }; class TFrom: public TBase { TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { return MakeDom(TypeHelper_.Get(), Shape_, *args, valueBuilder); } + public: static const TStringRef& Name() { static auto name = TStringRef::Of("From"); @@ -874,7 +937,8 @@ public: TFrom(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape) : TBase(pos, typeHelper, shape) - {} + { + } static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { if (Name() == name) { @@ -931,23 +995,24 @@ public: class TConvert: public TBase { TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &PeelDom<true, true> : &PeelDom<true, false>)(TypeHelper_.Get(), Shape_, args[0], valueBuilder, Pos_); - else + } else { return (options.AutoConvert ? &PeelDom<false, true> : &PeelDom<false, false>)(TypeHelper_.Get(), Shape_, args[0], valueBuilder, Pos_); + } } public: TConvert(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape) : TBase(pos, typeHelper, shape) - {} + { + } static const TStringRef& Name() { static auto name = TStringRef::Of("ConvertTo"); return name; } - static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { if (Name() == name) { const auto optionsType = builder.Optional()->Item(builder.Resource(OptionsResourceName)).Build(); @@ -1006,19 +1071,23 @@ public: } }; -template<typename TYJson, bool DecodeUtf8 = false> +template <typename TYJson, bool DecodeUtf8 = false> class TParse: public TBoxedValue { public: typedef bool TTypeAwareMarker; + private: const TSourcePosition Pos_; const bool StrictType_; TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final; + public: TParse(TSourcePosition pos, bool strictType) - : Pos_(pos), StrictType_(strictType) - {} + : Pos_(pos) + , StrictType_(strictType) + { + } static const TStringRef& Name(); @@ -1090,7 +1159,7 @@ public: } }; -template<> +template <> TUnboxedValue TParse<TYson, false>::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try { return TryParseYsonDom(args[0].AsStringRef(), valueBuilder); } catch (const std::exception& e) { @@ -1100,7 +1169,7 @@ TUnboxedValue TParse<TYson, false>::Run(const IValueBuilder* valueBuilder, const return TUnboxedValuePod(); } -template<> +template <> TUnboxedValue TParse<TJson, false>::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try { return TryParseJsonDom(args[0].AsStringRef(), valueBuilder); } catch (const std::exception& e) { @@ -1110,7 +1179,7 @@ TUnboxedValue TParse<TJson, false>::Run(const IValueBuilder* valueBuilder, const return TUnboxedValuePod(); } -template<> +template <> TUnboxedValue TParse<TJson, true>::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try { return TryParseJsonDom(args[0].AsStringRef(), valueBuilder, true); } catch (const std::exception& e) { @@ -1120,85 +1189,84 @@ TUnboxedValue TParse<TJson, true>::Run(const IValueBuilder* valueBuilder, const return TUnboxedValuePod(); } -template<> +template <> const TStringRef& TParse<TYson, false>::Name() { static auto yson = TStringRef::Of("Parse"); return yson; } -template<> +template <> const TStringRef& TParse<TJson, false>::Name() { static auto yson = TStringRef::Of("ParseJson"); return yson; } -template<> +template <> const TStringRef& TParse<TJson, true>::Name() { static auto yson = TStringRef::Of("ParseJsonDecodeUtf8"); return yson; } -} +} // namespace // TODO: optimizer that marks UDFs as strict if Yson::Options(false as Strict) is given SIMPLE_MODULE(TYson2Module, - TOptions, - TParse<TYson>, - TParse<TJson>, - TParse<TJson, true>, - TConvert, - TConvertToBool, - TConvertToInt64, - TConvertToUint64, - TConvertToDouble, - TConvertToString, - TConvertToList, - TConvertToBoolList, - TConvertToInt64List, - TConvertToUint64List, - TConvertToDoubleList, - TConvertToStringList, - TConvertToDict, - TConvertToBoolDict, - TConvertToInt64Dict, - TConvertToUint64Dict, - TConvertToDoubleDict, - TConvertToStringDict, - TAttributes, - TContains, - TLookup, - TLookupBool, - TLookupInt64, - TLookupUint64, - TLookupDouble, - TLookupString, - TLookupList, - TLookupDict, - TYPath, - TYPathBool, - TYPathInt64, - TYPathUint64, - TYPathDouble, - TYPathString, - TYPathList, - TYPathDict, - TSerialize, - TSerializeText, - TSerializePretty, - TSerializeJson, - TWithAttributes, - TIsString, - TIsInt64, - TIsUint64, - TIsBool, - TIsDouble, - TIsList, - TIsDict, - TIsEntity, - TFrom, - TGetLength, - TEquals, - TGetHash -); + TOptions, + TParse<TYson>, + TParse<TJson>, + TParse<TJson, true>, + TConvert, + TConvertToBool, + TConvertToInt64, + TConvertToUint64, + TConvertToDouble, + TConvertToString, + TConvertToList, + TConvertToBoolList, + TConvertToInt64List, + TConvertToUint64List, + TConvertToDoubleList, + TConvertToStringList, + TConvertToDict, + TConvertToBoolDict, + TConvertToInt64Dict, + TConvertToUint64Dict, + TConvertToDoubleDict, + TConvertToStringDict, + TAttributes, + TContains, + TLookup, + TLookupBool, + TLookupInt64, + TLookupUint64, + TLookupDouble, + TLookupString, + TLookupList, + TLookupDict, + TYPath, + TYPathBool, + TYPathInt64, + TYPathUint64, + TYPathDouble, + TYPathString, + TYPathList, + TYPathDict, + TSerialize, + TSerializeText, + TSerializePretty, + TSerializeJson, + TWithAttributes, + TIsString, + TIsInt64, + TIsUint64, + TIsBool, + TIsDouble, + TIsList, + TIsDict, + TIsEntity, + TFrom, + TGetLength, + TEquals, + TGetHash); REGISTER_MODULES(TYson2Module); diff --git a/yql/essentials/udfs/examples/callables/callables_udf.cpp b/yql/essentials/udfs/examples/callables/callables_udf.cpp index 6d8f1c27d9a..e367c3b990c 100644 --- a/yql/essentials/udfs/examples/callables/callables_udf.cpp +++ b/yql/essentials/udfs/examples/callables/callables_udf.cpp @@ -5,7 +5,6 @@ #include <util/generic/yexception.h> #include <util/string/cast.h> - using namespace NKikimr; using namespace NUdf; @@ -14,8 +13,7 @@ namespace { ////////////////////////////////////////////////////////////////////////////// // TFromString ////////////////////////////////////////////////////////////////////////////// -class TFromString: public TBoxedValue -{ +class TFromString: public TBoxedValue { public: static TStringRef Name() { static auto name = TStringRef::Of("FromString"); @@ -24,9 +22,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { Y_UNUSED(valueBuilder); auto str = args[0].AsStringRef(); int val = FromString<int>(str); @@ -37,8 +34,7 @@ private: ////////////////////////////////////////////////////////////////////////////// // TSum ////////////////////////////////////////////////////////////////////////////// -class TSum: public TBoxedValue -{ +class TSum: public TBoxedValue { public: static TStringRef Name() { static auto name = TStringRef::Of("Sum"); @@ -47,9 +43,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { int sum = 0; auto it = args[0].GetListIterator(); @@ -65,8 +60,7 @@ private: ////////////////////////////////////////////////////////////////////////////// // TMul ////////////////////////////////////////////////////////////////////////////// -class TMul: public TBoxedValue -{ +class TMul: public TBoxedValue { public: static TStringRef Name() { static auto name = TStringRef::Of("Mul"); @@ -75,9 +69,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { int mul = 1; const auto it = args[0].GetListIterator(); @@ -106,9 +99,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { Y_UNUSED(valueBuilder); auto res = args[0] ? args[0].Get<i32>() : 123; return TUnboxedValuePod(res + 1); @@ -127,9 +119,8 @@ public: } TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { Y_UNUSED(valueBuilder); Y_UNUSED(args); return TUnboxedValuePod(new TNamedArgUdf()); @@ -139,14 +130,14 @@ public: ////////////////////////////////////////////////////////////////////////////// // TCallablesModule ////////////////////////////////////////////////////////////////////////////// -class TCallablesModule: public IUdfModule -{ +class TCallablesModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("Callables"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } void GetAllFunctions(IFunctionsSink& sink) const final { sink.Add(TFromString::Name()); @@ -155,12 +146,11 @@ public: } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { try { Y_UNUSED(userType); Y_UNUSED(typeConfig); @@ -176,25 +166,20 @@ public: if (!typesOnly) { builder.Implementation(new TFromString); } - } - else if (TSum::Name() == name) { + } else if (TSum::Name() == name) { // function signature: // int (ListOf(String), int(*)(String)) // run config: void - builder.Returns<int>().Args()-> - Add(builder.List()->Item<char*>()) - .Add(builder.Callable()->Returns<int>().Arg<char*>()) - .Done(); + builder.Returns<int>().Args()->Add(builder.List()->Item<char*>()).Add(builder.Callable()->Returns<int>().Arg<char*>()).Done(); if (!typesOnly) { builder.Implementation(new TSum); } - } - else if (TMul::Name() == name) { + } else if (TMul::Name() == name) { // function signature: // int (ListOf(String), int(*)(String)) // run config: void - using TFuncType = int(*)(char*); + using TFuncType = int (*)(char*); builder.SimpleSignature<int(TListType<char*>, TFuncType)>(); if (!typesOnly) { diff --git a/yql/essentials/udfs/examples/callables/ya.make b/yql/essentials/udfs/examples/callables/ya.make index 63e19657996..67b6cecbb68 100644 --- a/yql/essentials/udfs/examples/callables/ya.make +++ b/yql/essentials/udfs/examples/callables/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(callables_udf) YQL_ABI_VERSION(2 38 0) +ENABLE(YQL_STYLE_CPP) + SRCS( callables_udf.cpp ) diff --git a/yql/essentials/udfs/examples/dicts/dicts_udf.cpp b/yql/essentials/udfs/examples/dicts/dicts_udf.cpp index 69231f01b13..2ce8c509a6e 100644 --- a/yql/essentials/udfs/examples/dicts/dicts_udf.cpp +++ b/yql/essentials/udfs/examples/dicts/dicts_udf.cpp @@ -13,8 +13,7 @@ namespace { ////////////////////////////////////////////////////////////////////////////// // TStrToInt ////////////////////////////////////////////////////////////////////////////// -class TStrToInt: public TBoxedValue -{ +class TStrToInt: public TBoxedValue { public: explicit TStrToInt(TType* dictType) : DictType_(dictType) @@ -28,9 +27,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { auto kind = args[0].AsStringRef(); ui32 flags = 0; @@ -40,18 +38,7 @@ private: flags |= TDictFlags::Sorted; } - return valueBuilder->NewDict(DictType_, flags)-> - Add(valueBuilder->NewString("zero"), TUnboxedValuePod((ui32) 0)) - .Add(valueBuilder->NewString("one"), TUnboxedValuePod((ui32) 1)) - .Add(valueBuilder->NewString("two"), TUnboxedValuePod((ui32) 2)) - .Add(valueBuilder->NewString("three"), TUnboxedValuePod((ui32) 3)) - .Add(valueBuilder->NewString("four"), TUnboxedValuePod((ui32) 4)) - .Add(valueBuilder->NewString("five"), TUnboxedValuePod((ui32) 5)) - .Add(valueBuilder->NewString("six"), TUnboxedValuePod((ui32) 6)) - .Add(valueBuilder->NewString("seven"), TUnboxedValuePod((ui32) 7)) - .Add(valueBuilder->NewString("eight"), TUnboxedValuePod((ui32) 8)) - .Add(valueBuilder->NewString("nine"), TUnboxedValuePod((ui32) 9)) - .Build(); + return valueBuilder->NewDict(DictType_, flags)->Add(valueBuilder->NewString("zero"), TUnboxedValuePod((ui32)0)).Add(valueBuilder->NewString("one"), TUnboxedValuePod((ui32)1)).Add(valueBuilder->NewString("two"), TUnboxedValuePod((ui32)2)).Add(valueBuilder->NewString("three"), TUnboxedValuePod((ui32)3)).Add(valueBuilder->NewString("four"), TUnboxedValuePod((ui32)4)).Add(valueBuilder->NewString("five"), TUnboxedValuePod((ui32)5)).Add(valueBuilder->NewString("six"), TUnboxedValuePod((ui32)6)).Add(valueBuilder->NewString("seven"), TUnboxedValuePod((ui32)7)).Add(valueBuilder->NewString("eight"), TUnboxedValuePod((ui32)8)).Add(valueBuilder->NewString("nine"), TUnboxedValuePod((ui32)9)).Build(); } TType* DictType_; @@ -60,26 +47,25 @@ private: ////////////////////////////////////////////////////////////////////////////// // TDictsModule ////////////////////////////////////////////////////////////////////////////// -class TDictsModule: public IUdfModule -{ +class TDictsModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("Dicts"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } void GetAllFunctions(IFunctionsSink& sink) const final { sink.Add(TStrToInt::Name()); } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { try { Y_UNUSED(userType); Y_UNUSED(typeConfig); diff --git a/yql/essentials/udfs/examples/dicts/ya.make b/yql/essentials/udfs/examples/dicts/ya.make index 4dd62e36e9d..f7a7ff40c85 100644 --- a/yql/essentials/udfs/examples/dicts/ya.make +++ b/yql/essentials/udfs/examples/dicts/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(dicts_udf) YQL_ABI_VERSION(2 9 0) +ENABLE(YQL_STYLE_CPP) + SRCS( dicts_udf.cpp ) diff --git a/yql/essentials/udfs/examples/dummylog/dummylog_udf.cpp b/yql/essentials/udfs/examples/dummylog/dummylog_udf.cpp index e3e227f1303..fd1dcb5970b 100644 --- a/yql/essentials/udfs/examples/dummylog/dummylog_udf.cpp +++ b/yql/essentials/udfs/examples/dummylog/dummylog_udf.cpp @@ -10,32 +10,28 @@ using namespace NUdf; namespace { -struct TRecordInfo -{ +struct TRecordInfo { ui32 Key; ui32 Subkey; ui32 Value; static constexpr ui32 FieldsCount = 3U; }; - ////////////////////////////////////////////////////////////////////////////// // TDummyLog ////////////////////////////////////////////////////////////////////////////// -class TDummyLog: public TBoxedValue -{ +class TDummyLog: public TBoxedValue { public: explicit TDummyLog( - const TRecordInfo& fieldIndexes) + const TRecordInfo& fieldIndexes) : RecordInfo_(fieldIndexes) { } private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { auto keyData = args[0].GetElement(RecordInfo_.Key); auto subkeyData = args[0].GetElement(RecordInfo_.Subkey); auto valueData = args[0].GetElement(RecordInfo_.Value); @@ -55,21 +51,20 @@ private: const TRecordInfo RecordInfo_; }; -class TDummyLog2 : public TBoxedValue -{ +class TDummyLog2: public TBoxedValue { public: - class TFactory : public TBoxedValue { + class TFactory: public TBoxedValue { public: TFactory(const TRecordInfo& inputInfo, const TRecordInfo& outputInfo) : InputInfo_(inputInfo) , OutputInfo_(outputInfo) - {} + { + } -private: + private: TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const TUnboxedValuePod* args) const override { Y_UNUSED(valueBuilder); return TUnboxedValuePod(new TDummyLog2(args[0], InputInfo_, OutputInfo_)); } @@ -81,8 +76,7 @@ private: explicit TDummyLog2( const TUnboxedValuePod& runConfig, const TRecordInfo& inputInfo, - const TRecordInfo& outputInfo - ) + const TRecordInfo& outputInfo) : Prefix_(runConfig.AsStringRef()) , InputInfo_(inputInfo) , OutputInfo_(outputInfo) @@ -92,8 +86,7 @@ private: private: TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const TUnboxedValuePod* args) const override { auto keyData = args[0].GetElement(InputInfo_.Key); auto valueData = args[0].GetElement(InputInfo_.Value); @@ -115,14 +108,14 @@ private: ////////////////////////////////////////////////////////////////////////////// // TDummyLogModule ////////////////////////////////////////////////////////////////////////////// -class TDummyLogModule: public IUdfModule -{ +class TDummyLogModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("DummyLog"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } void GetAllFunctions(IFunctionsSink& sink) const final { sink.Add(TStringRef::Of("ReadRecord")); @@ -130,12 +123,11 @@ public: } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { try { Y_UNUSED(userType); Y_UNUSED(typeConfig); @@ -144,11 +136,7 @@ public: if (TStringRef::Of("ReadRecord") == name) { TRecordInfo recordInfo; - auto recordType = builder.Struct(recordInfo.FieldsCount)-> - AddField<char*>("key", &recordInfo.Key) - .AddField<char*>("subkey", &recordInfo.Subkey) - .AddField<char*>("value", &recordInfo.Value) - .Build(); + auto recordType = builder.Struct(recordInfo.FieldsCount)->AddField<char*>("key", &recordInfo.Key).AddField<char*>("subkey", &recordInfo.Subkey).AddField<char*>("value", &recordInfo.Value).Build(); builder.Returns(recordType).Args()->Add(recordType).Done(); @@ -162,18 +150,10 @@ public: builder.SetError(TStringRef::Of("Only AAA is valid type config")); } TRecordInfo inputInfo; - auto inputType = builder.Struct(inputInfo.FieldsCount)-> - AddField<char*>("key", &inputInfo.Key) - .AddField<char*>("subkey", &inputInfo.Subkey) - .AddField<char*>("value", &inputInfo.Value) - .Build(); + auto inputType = builder.Struct(inputInfo.FieldsCount)->AddField<char*>("key", &inputInfo.Key).AddField<char*>("subkey", &inputInfo.Subkey).AddField<char*>("value", &inputInfo.Value).Build(); TRecordInfo outputInfo; - auto outputType = builder.Struct(2U)-> - AddField<char*>("key", &outputInfo.Key) - .AddField<char*>("value", &outputInfo.Value) - .Build(); - + auto outputType = builder.Struct(2U)->AddField<char*>("key", &outputInfo.Key).AddField<char*>("value", &outputInfo.Value).Build(); builder.Returns(outputType).Args()->Add(inputType).Done(); builder.RunConfig<char*>(); diff --git a/yql/essentials/udfs/examples/dummylog/ya.make b/yql/essentials/udfs/examples/dummylog/ya.make index d1acd15945b..2a74a5767e9 100644 --- a/yql/essentials/udfs/examples/dummylog/ya.make +++ b/yql/essentials/udfs/examples/dummylog/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(dummylog) YQL_ABI_VERSION(2 9 0) +ENABLE(YQL_STYLE_CPP) + SRCS( dummylog_udf.cpp ) diff --git a/yql/essentials/udfs/examples/linear/linear_udf.cpp b/yql/essentials/udfs/examples/linear/linear_udf.cpp index 006ee817446..8696799a865 100644 --- a/yql/essentials/udfs/examples/linear/linear_udf.cpp +++ b/yql/essentials/udfs/examples/linear/linear_udf.cpp @@ -10,7 +10,7 @@ SIMPLE_UDF(TProducer, TLinear<i32>(i32)) { return TUnboxedValuePod(args[0].Get<i32>()); } -using TExchangeRet = TTuple<TLinear<i32>,i32>; +using TExchangeRet = TTuple<TLinear<i32>, i32>; SIMPLE_UDF(TExchange, TExchangeRet(TLinear<i32>, i32)) { TUnboxedValue* items; TUnboxedValue ret = valueBuilder->NewArray(2, items); @@ -19,7 +19,7 @@ SIMPLE_UDF(TExchange, TExchangeRet(TLinear<i32>, i32)) { return ret; } -class TUnsafeConsumer : public TBoxedValue { +class TUnsafeConsumer: public TBoxedValue { public: typedef bool TTypeAwareMarker; @@ -78,8 +78,7 @@ public: } return true; - } - else { + } else { return false; } } @@ -87,6 +86,6 @@ public: SIMPLE_MODULE(TLinearModule, TProducer, TUnsafeConsumer, TExchange) -} +} // namespace REGISTER_MODULES(TLinearModule) diff --git a/yql/essentials/udfs/examples/linear/ya.make b/yql/essentials/udfs/examples/linear/ya.make index 961bc48b82e..fb38acaf8f6 100644 --- a/yql/essentials/udfs/examples/linear/ya.make +++ b/yql/essentials/udfs/examples/linear/ya.make @@ -1,6 +1,8 @@ YQL_UDF(linear_udf) YQL_ABI_VERSION(2 44 0) +ENABLE(YQL_STYLE_CPP) + SRCS( linear_udf.cpp ) diff --git a/yql/essentials/udfs/examples/lists/lists_udf.cpp b/yql/essentials/udfs/examples/lists/lists_udf.cpp index 35dbb57dfa3..e310cde3e3b 100644 --- a/yql/essentials/udfs/examples/lists/lists_udf.cpp +++ b/yql/essentials/udfs/examples/lists/lists_udf.cpp @@ -15,8 +15,7 @@ namespace { ////////////////////////////////////////////////////////////////////////////// // TNumbersList ////////////////////////////////////////////////////////////////////////////// -class TNumbers: public TBoxedValue -{ +class TNumbers: public TBoxedValue { public: static TStringRef Name() { static auto name = TStringRef::Of("Numbers"); @@ -25,9 +24,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { const auto appendPrepend = args[0].AsStringRef(); const auto count = args[1].Get<ui32>(); std::vector<TUnboxedValue> list(count); @@ -36,8 +34,7 @@ private: for (auto it = list.begin(); list.end() != it; ++it) { *it = TUnboxedValuePod(i++); } - } - else if (TStringRef::Of("Prepend") == appendPrepend) { + } else if (TStringRef::Of("Prepend") == appendPrepend) { for (auto it = list.rbegin(); list.rend() != it; ++it) { *it = TUnboxedValuePod(i++); } @@ -50,8 +47,7 @@ private: ////////////////////////////////////////////////////////////////////////////// // TExtend ////////////////////////////////////////////////////////////////////////////// -class TExtend: public TBoxedValue -{ +class TExtend: public TBoxedValue { public: static TStringRef Name() { static auto name = TStringRef::Of("Extend"); @@ -60,9 +56,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { std::array<TUnboxedValue, 2U> list = {{TUnboxedValuePod(args[0]), TUnboxedValuePod(args[1])}}; return valueBuilder->NewList(list.data(), list.size()); } @@ -71,14 +66,14 @@ private: ////////////////////////////////////////////////////////////////////////////// // TListsModule ////////////////////////////////////////////////////////////////////////////// -class TListsModule: public IUdfModule -{ +class TListsModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("Lists"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } void GetAllFunctions(IFunctionsSink& sink) const final { sink.Add(TNumbers::Name()); @@ -86,12 +81,11 @@ public: } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { try { Y_UNUSED(userType); Y_UNUSED(typeConfig); @@ -107,14 +101,16 @@ public: if (!typesOnly) { builder.Implementation(new TNumbers); } - } - else if (TExtend::Name() == name) { + } else if (TExtend::Name() == name) { // function signature: // List<ui32> Numbers(List<ui32>, List<ui32>) // runConfig: void auto listType = builder.List()->Item<ui32>().Build(); builder.Returns(listType) - .Args()->Add(listType).Add(listType).Done(); + .Args() + ->Add(listType) + .Add(listType) + .Done(); if (!typesOnly) { builder.Implementation(new TExtend); diff --git a/yql/essentials/udfs/examples/lists/ya.make b/yql/essentials/udfs/examples/lists/ya.make index 3fa002c7800..dac2ee92dd9 100644 --- a/yql/essentials/udfs/examples/lists/ya.make +++ b/yql/essentials/udfs/examples/lists/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(lists_udf) YQL_ABI_VERSION(2 9 0) +ENABLE(YQL_STYLE_CPP) + SRCS( lists_udf.cpp ) diff --git a/yql/essentials/udfs/examples/structs/structs_udf.cpp b/yql/essentials/udfs/examples/structs/structs_udf.cpp index 38ea115eaf6..3d8e89c4ff6 100644 --- a/yql/essentials/udfs/examples/structs/structs_udf.cpp +++ b/yql/essentials/udfs/examples/structs/structs_udf.cpp @@ -11,8 +11,7 @@ using namespace NUdf; namespace { -struct TPersonInfo -{ +struct TPersonInfo { ui32 FirstName = 0; ui32 LastName = 0; ui32 Age = 0; @@ -26,8 +25,7 @@ struct TPersonInfo ////////////////////////////////////////////////////////////////////////////// // TPersonMember ////////////////////////////////////////////////////////////////////////////// -class TPersonMember: public TBoxedValue -{ +class TPersonMember: public TBoxedValue { public: explicit TPersonMember(ui32 memberIndex) : MemberIndex_(memberIndex) @@ -36,9 +34,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { Y_UNUSED(valueBuilder); return args[0].GetElement(MemberIndex_); } @@ -49,8 +46,7 @@ private: ////////////////////////////////////////////////////////////////////////////// // TNewPerson ////////////////////////////////////////////////////////////////////////////// -class TNewPerson: public TBoxedValue -{ +class TNewPerson: public TBoxedValue { public: explicit TNewPerson(const TPersonInfo& personIndexes) : Info_(personIndexes) @@ -59,9 +55,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { TUnboxedValue name, surname, age; if (Info_.RemapKSV) { name = args->GetElement(Info_.Key); @@ -88,14 +83,14 @@ private: ////////////////////////////////////////////////////////////////////////////// // TPersonModule ////////////////////////////////////////////////////////////////////////////// -class TPersonModule: public IUdfModule -{ +class TPersonModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("Person"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } void GetAllFunctions(IFunctionsSink& sink) const final { sink.Add(TStringRef::Of("FirstName")); @@ -105,22 +100,17 @@ public: } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { Y_UNUSED(userType); try { bool typesOnly = (flags & TFlags::TypesOnly); TPersonInfo personInfo; - auto personType = builder.Struct(personInfo.FieldsCount)-> - AddField<char*>("FirstName", &personInfo.FirstName) - .AddField<char*>("LastName", &personInfo.LastName) - .AddField<ui32>("Age", &personInfo.Age) - .Build(); + auto personType = builder.Struct(personInfo.FieldsCount)->AddField<char*>("FirstName", &personInfo.FirstName).AddField<char*>("LastName", &personInfo.LastName).AddField<ui32>("Age", &personInfo.Age).Build(); if (TStringRef::Of("FirstName") == name) { // function signature: String FirstName(PersonStruct p) @@ -130,8 +120,7 @@ public: if (!typesOnly) { builder.Implementation(new TPersonMember(personInfo.FirstName)); } - } - else if (TStringRef::Of("LastName") == name) { + } else if (TStringRef::Of("LastName") == name) { // function signature: String LastName(PersonStruct p) // runConfig: void builder.Returns<char*>().Args()->Add(personType).Done(); @@ -139,8 +128,7 @@ public: if (!typesOnly) { builder.Implementation(new TPersonMember(personInfo.LastName)); } - } - else if (TStringRef::Of("Age") == name) { + } else if (TStringRef::Of("Age") == name) { // function signature: ui32 Age(PersonStruct p) // runConfig: void builder.Returns<ui32>().Args()->Add(personType).Done(); @@ -148,19 +136,14 @@ public: if (!typesOnly) { builder.Implementation(new TPersonMember(personInfo.Age)); } - } - else if (TStringRef::Of("New") == name) { + } else if (TStringRef::Of("New") == name) { // function signature: // PersonStruct New(String firstName, String lastName, ui32 age) // runConfig: void builder.Returns(personType); if (TStringRef::Of("RemapKSV") == typeConfig) { personInfo.RemapKSV = true; - auto inputType = builder.Struct(personInfo.FieldsCount)-> - AddField<char*>("key", &personInfo.Key) - .AddField<char*>("subkey", &personInfo.Subkey) - .AddField<char*>("value", &personInfo.Value) - .Build(); + auto inputType = builder.Struct(personInfo.FieldsCount)->AddField<char*>("key", &personInfo.Key).AddField<char*>("subkey", &personInfo.Subkey).AddField<char*>("value", &personInfo.Value).Build(); builder.Args()->Add(inputType); } else { builder.Args()->Add<char*>().Add<char*>().Add<ui32>(); diff --git a/yql/essentials/udfs/examples/structs/ya.make b/yql/essentials/udfs/examples/structs/ya.make index 2339cf0b272..00dacb1545a 100644 --- a/yql/essentials/udfs/examples/structs/ya.make +++ b/yql/essentials/udfs/examples/structs/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(structs_udf) YQL_ABI_VERSION(2 9 0) +ENABLE(YQL_STYLE_CPP) + SRCS( structs_udf.cpp ) diff --git a/yql/essentials/udfs/examples/tagged/tagged_udf.cpp b/yql/essentials/udfs/examples/tagged/tagged_udf.cpp index 0f85c3e968e..29e2c51540d 100644 --- a/yql/essentials/udfs/examples/tagged/tagged_udf.cpp +++ b/yql/essentials/udfs/examples/tagged/tagged_udf.cpp @@ -4,100 +4,100 @@ using namespace NKikimr; using namespace NUdf; namespace { - extern const char TagFoo[] = "foo"; - extern const char TagBar[] = "bar"; - extern const char TagBaz[] = "baz"; - using TTaggedFoo = TTagged<i32, TagFoo>; - using TTaggedBar = TTagged<i32, TagBar>; - using TTaggedBaz = TTagged<i32, TagBaz>; +extern const char TagFoo[] = "foo"; +extern const char TagBar[] = "bar"; +extern const char TagBaz[] = "baz"; +using TTaggedFoo = TTagged<i32, TagFoo>; +using TTaggedBar = TTagged<i32, TagBar>; +using TTaggedBaz = TTagged<i32, TagBaz>; - SIMPLE_UDF(TExample, TTaggedBaz(TTaggedFoo, TTaggedBar)) { - Y_UNUSED(valueBuilder); - const auto input1 = args[0].Get<i32>(); - const auto input2 = args[1].Get<i32>(); - return TUnboxedValuePod(input1 + input2); - } - - class TGenericTag : public TBoxedValue { - public: - typedef bool TTypeAwareMarker; +SIMPLE_UDF(TExample, TTaggedBaz(TTaggedFoo, TTaggedBar)) { + Y_UNUSED(valueBuilder); + const auto input1 = args[0].Get<i32>(); + const auto input2 = args[1].Get<i32>(); + return TUnboxedValuePod(input1 + input2); +} - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { - auto tagStr = valueBuilder->NewString(Tag_); - return valueBuilder->ConcatStrings(args[0], static_cast<const TUnboxedValuePod&>(tagStr)); - } +class TGenericTag: public TBoxedValue { +public: + typedef bool TTypeAwareMarker; - static const TStringRef& Name() { - static auto name = TStringRef::Of("GenericTag"); - return name; - } + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { + auto tagStr = valueBuilder->NewString(Tag_); + return valueBuilder->ConcatStrings(args[0], static_cast<const TUnboxedValuePod&>(tagStr)); + } - TGenericTag(TStringRef tag) - : Tag_(tag) - {} + static const TStringRef& Name() { + static auto name = TStringRef::Of("GenericTag"); + return name; + } - static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { - if (Name() == name) { - if (!userType) { - builder.SetError("Missing user type."); - return true; - } + TGenericTag(TStringRef tag) + : Tag_(tag) + { + } - builder.UserType(userType); - const auto typeHelper = builder.TypeInfoHelper(); - const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); - if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) { - builder.SetError("Invalid user type."); - return true; - } + static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { + if (Name() == name) { + if (!userType) { + builder.SetError("Missing user type."); + return true; + } - const auto argsTypeTuple = userTypeInspector.GetElementType(0); - const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); - if (!argsTypeInspector) { - builder.SetError("Invalid user type - expected tuple."); - return true; - } + builder.UserType(userType); + const auto typeHelper = builder.TypeInfoHelper(); + const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) { + builder.SetError("Invalid user type."); + return true; + } - if (const auto argsCount = argsTypeInspector.GetElementsCount(); argsCount != 1) { - ::TStringBuilder sb; - sb << "Invalid user type - expected one argument, got: " << argsCount; - builder.SetError(sb); - return true; - } + const auto argsTypeTuple = userTypeInspector.GetElementType(0); + const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); + if (!argsTypeInspector) { + builder.SetError("Invalid user type - expected tuple."); + return true; + } - const auto inputType = argsTypeInspector.GetElementType(0); - const auto tagged = TTaggedTypeInspector(*typeHelper, inputType); - if (!tagged) { - ::TStringBuilder sb; - sb << "Expected tagged string"; - builder.SetError(sb); - return true; - } + if (const auto argsCount = argsTypeInspector.GetElementsCount(); argsCount != 1) { + ::TStringBuilder sb; + sb << "Invalid user type - expected one argument, got: " << argsCount; + builder.SetError(sb); + return true; + } - const auto data = TDataTypeInspector(*typeHelper, tagged.GetBaseType()); - if (!data || data.GetTypeId() != TDataType<const char*>::Id) { - ::TStringBuilder sb; - sb << "Expected tagged string"; - builder.SetError(sb); - return true; - } + const auto inputType = argsTypeInspector.GetElementType(0); + const auto tagged = TTaggedTypeInspector(*typeHelper, inputType); + if (!tagged) { + ::TStringBuilder sb; + sb << "Expected tagged string"; + builder.SetError(sb); + return true; + } - builder.Args()->Add(inputType).Done().Returns(inputType); - if (!typesOnly) { - builder.Implementation(new TGenericTag(tagged.GetTag())); - } + const auto data = TDataTypeInspector(*typeHelper, tagged.GetBaseType()); + if (!data || data.GetTypeId() != TDataType<const char*>::Id) { + ::TStringBuilder sb; + sb << "Expected tagged string"; + builder.SetError(sb); return true; } - else { - return false; + + builder.Args()->Add(inputType).Done().Returns(inputType); + if (!typesOnly) { + builder.Implementation(new TGenericTag(tagged.GetTag())); } + return true; + } else { + return false; } - private: - TStringRef Tag_; - }; + } +private: + TStringRef Tag_; +}; - SIMPLE_MODULE(TTaggedModule, TExample, TGenericTag) -} +SIMPLE_MODULE(TTaggedModule, TExample, TGenericTag) +} // namespace REGISTER_MODULES(TTaggedModule) diff --git a/yql/essentials/udfs/examples/tagged/ya.make b/yql/essentials/udfs/examples/tagged/ya.make index 2afb4f4d42e..7209cbfbe56 100644 --- a/yql/essentials/udfs/examples/tagged/ya.make +++ b/yql/essentials/udfs/examples/tagged/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(tagged_udf) YQL_ABI_VERSION(2 21 0) +ENABLE(YQL_STYLE_CPP) + SRCS( tagged_udf.cpp ) diff --git a/yql/essentials/udfs/examples/type_inspection/type_inspection_udf.cpp b/yql/essentials/udfs/examples/type_inspection/type_inspection_udf.cpp index b4b3e4709e1..07ad4f724fd 100644 --- a/yql/essentials/udfs/examples/type_inspection/type_inspection_udf.cpp +++ b/yql/essentials/udfs/examples/type_inspection/type_inspection_udf.cpp @@ -6,7 +6,6 @@ #include <util/generic/yexception.h> - using namespace NKikimr; using namespace NUdf; @@ -15,8 +14,7 @@ namespace { ////////////////////////////////////////////////////////////////////////////// // TZip ////////////////////////////////////////////////////////////////////////////// -class TZip: public TBoxedValue -{ +class TZip: public TBoxedValue { public: static TStringRef Name() { static auto name = TStringRef::Of("Zip"); @@ -25,15 +23,15 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { const auto it1 = args[0].GetListIterator(); const auto it2 = args[1].GetListIterator(); std::vector<TUnboxedValue> list; - if (args[0].HasFastListLength() && args[1].HasFastListLength()) + if (args[0].HasFastListLength() && args[1].HasFastListLength()) { list.reserve(std::min(args[0].GetListLength(), args[1].GetListLength())); + } for (TUnboxedValue one, two, *items = nullptr; it1.Next(one) && it2.Next(two);) { auto tuple = valueBuilder->NewArray(2U, items); items[0] = std::move(one); @@ -48,8 +46,7 @@ private: ////////////////////////////////////////////////////////////////////////////// // TFold ////////////////////////////////////////////////////////////////////////////// -class TFold : public TBoxedValue -{ +class TFold: public TBoxedValue { public: static TStringRef Name() { static auto name = TStringRef::Of("Fold"); @@ -59,8 +56,7 @@ public: private: TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const TUnboxedValuePod* args) const override { const auto it = args[0].GetListIterator(); TUnboxedValue state = TUnboxedValuePod(args[1]); auto func = args[2]; @@ -76,10 +72,9 @@ private: ////////////////////////////////////////////////////////////////////////////// // TInterleave ////////////////////////////////////////////////////////////////////////////// -class TInterleave : public TBoxedValue -{ +class TInterleave: public TBoxedValue { public: - class TValue : public TBoxedValue { + class TValue: public TBoxedValue { public: TValue(const IValueBuilder* valueBuilder, const TUnboxedValuePod& left, const TUnboxedValuePod& right) : ValueBuilder_(valueBuilder) @@ -121,8 +116,7 @@ public: private: TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const TUnboxedValuePod* args) const override { return TUnboxedValuePod(new TValue(valueBuilder, args[0], args[1])); } }; @@ -130,14 +124,14 @@ private: ////////////////////////////////////////////////////////////////////////////// // TTypeInspectionModule ////////////////////////////////////////////////////////////////////////////// -class TTypeInspectionModule: public IUdfModule -{ +class TTypeInspectionModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("TypeInspection"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } void GetAllFunctions(IFunctionsSink& sink) const final { sink.Add(TZip::Name())->SetTypeAwareness(); @@ -146,12 +140,11 @@ public: } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { try { Y_UNUSED(typeConfig); diff --git a/yql/essentials/udfs/examples/type_inspection/ya.make b/yql/essentials/udfs/examples/type_inspection/ya.make index 7ce6c1b26dc..bbe8e26e0ae 100644 --- a/yql/essentials/udfs/examples/type_inspection/ya.make +++ b/yql/essentials/udfs/examples/type_inspection/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(type_inspection_udf) YQL_ABI_VERSION(2 9 0) +ENABLE(YQL_STYLE_CPP) + SRCS( type_inspection_udf.cpp ) diff --git a/yql/essentials/udfs/language/yql/ya.make b/yql/essentials/udfs/language/yql/ya.make index e0c16d6f5da..b9e673a7538 100644 --- a/yql/essentials/udfs/language/yql/ya.make +++ b/yql/essentials/udfs/language/yql/ya.make @@ -8,6 +8,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SUBSCRIBER(g:yql) SRCS( diff --git a/yql/essentials/udfs/language/yql/yql_language_udf.cpp b/yql/essentials/udfs/language/yql/yql_language_udf.cpp index 6bb324562fc..d410538a481 100644 --- a/yql/essentials/udfs/language/yql/yql_language_udf.cpp +++ b/yql/essentials/udfs/language/yql/yql_language_udf.cpp @@ -19,12 +19,12 @@ using namespace NSQLTranslation; using namespace NSQLTranslationV1; using namespace NSQLv1Generated; -class TRuleFreqTranslation : public TSqlTranslation -{ +class TRuleFreqTranslation: public TSqlTranslation { public: TRuleFreqTranslation(TContext& ctx) : TSqlTranslation(ctx, ctx.Settings.Mode) - {} + { + } }; class TRuleFreqVisitor { @@ -75,7 +75,6 @@ public: continue; } - Freqs_[std::make_pair(fullName, fieldFullName)] += 1; } @@ -106,49 +105,49 @@ private: void VisitHint(const TRule_table_hint& msg, const TString& parent) { switch (msg.Alt_case()) { - case TRule_table_hint::kAltTableHint1: { - const auto& alt = msg.GetAlt_table_hint1(); - const TString id = Id(alt.GetRule_an_id_hint1(), Translation_); - Freqs_[std::make_pair(parent, id)] += 1; - break; - } - case TRule_table_hint::kAltTableHint2: { - const auto& alt = msg.GetAlt_table_hint2(); - Freqs_[std::make_pair(parent, alt.GetToken1().GetValue())] += 1; - break; - } - case TRule_table_hint::kAltTableHint3: { - const auto& alt = msg.GetAlt_table_hint3(); - Freqs_[std::make_pair(parent, alt.GetToken1().GetValue())] += 1; - break; - } - case TRule_table_hint::kAltTableHint4: { - const auto& alt = msg.GetAlt_table_hint4(); - Freqs_[std::make_pair(parent, alt.GetToken1().GetValue())] += 1; - break; - } - case TRule_table_hint::ALT_NOT_SET: - return; + case TRule_table_hint::kAltTableHint1: { + const auto& alt = msg.GetAlt_table_hint1(); + const TString id = Id(alt.GetRule_an_id_hint1(), Translation_); + Freqs_[std::make_pair(parent, id)] += 1; + break; + } + case TRule_table_hint::kAltTableHint2: { + const auto& alt = msg.GetAlt_table_hint2(); + Freqs_[std::make_pair(parent, alt.GetToken1().GetValue())] += 1; + break; + } + case TRule_table_hint::kAltTableHint3: { + const auto& alt = msg.GetAlt_table_hint3(); + Freqs_[std::make_pair(parent, alt.GetToken1().GetValue())] += 1; + break; + } + case TRule_table_hint::kAltTableHint4: { + const auto& alt = msg.GetAlt_table_hint4(); + Freqs_[std::make_pair(parent, alt.GetToken1().GetValue())] += 1; + break; + } + case TRule_table_hint::ALT_NOT_SET: + return; } } void VisitHints(const TRule_table_hints& msg, const TString& parent) { auto& block = msg.GetBlock2(); switch (block.Alt_case()) { - case TRule_table_hints::TBlock2::kAlt1: { - VisitHint(block.GetAlt1().GetRule_table_hint1(), parent); - break; - } - case TRule_table_hints::TBlock2::kAlt2: { - VisitHint(block.GetAlt2().GetRule_table_hint2(), parent); - for (const auto& x : block.GetAlt2().GetBlock3()) { - VisitHint(x.GetRule_table_hint2(), parent); + case TRule_table_hints::TBlock2::kAlt1: { + VisitHint(block.GetAlt1().GetRule_table_hint1(), parent); + break; } + case TRule_table_hints::TBlock2::kAlt2: { + VisitHint(block.GetAlt2().GetRule_table_hint2(), parent); + for (const auto& x : block.GetAlt2().GetBlock3()) { + VisitHint(x.GetRule_table_hint2(), parent); + } - break; - } - case TRule_table_hints::TBlock2::ALT_NOT_SET: - return; + break; + } + case TRule_table_hints::TBlock2::ALT_NOT_SET: + return; } } @@ -167,7 +166,7 @@ private: } } - template<typename TUnaryCasualExprRule> + template <typename TUnaryCasualExprRule> void VisitUnaryCasualSubexpr(const TUnaryCasualExprRule& msg) { const auto& block = msg.GetBlock1(); TString func; @@ -249,14 +248,14 @@ private: const auto& alt = msg.GetAlt_atom_expr7(); module = Id(alt.GetRule_an_id_or_type1(), Translation_); switch (alt.GetBlock3().Alt_case()) { - case TRule_atom_expr::TAlt7::TBlock3::kAlt1: - func = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), Translation_); - break; - case TRule_atom_expr::TAlt7::TBlock3::kAlt2: { - return false; - } - case TRule_atom_expr::TAlt7::TBlock3::ALT_NOT_SET: - Y_ABORT("Unsigned number: you should change implementation according to grammar changes"); + case TRule_atom_expr::TAlt7::TBlock3::kAlt1: + func = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), Translation_); + break; + case TRule_atom_expr::TAlt7::TBlock3::kAlt2: { + return false; + } + case TRule_atom_expr::TAlt7::TBlock3::ALT_NOT_SET: + Y_ABORT("Unsigned number: you should change implementation according to grammar changes"); } return true; @@ -270,14 +269,14 @@ private: const auto& alt = msg.GetAlt_in_atom_expr6(); module = Id(alt.GetRule_an_id_or_type1(), Translation_); switch (alt.GetBlock3().Alt_case()) { - case TRule_in_atom_expr::TAlt6::TBlock3::kAlt1: - func = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), Translation_); - break; - case TRule_in_atom_expr::TAlt6::TBlock3::kAlt2: { - return false; - } - case TRule_in_atom_expr::TAlt6::TBlock3::ALT_NOT_SET: - Y_ABORT("You should change implementation according to grammar changes"); + case TRule_in_atom_expr::TAlt6::TBlock3::kAlt1: + func = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), Translation_); + break; + case TRule_in_atom_expr::TAlt6::TBlock3::kAlt2: { + return false; + } + case TRule_in_atom_expr::TAlt6::TBlock3::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); } return true; @@ -306,7 +305,8 @@ bool GetParseTree( NYql::TIssues& issues, NSQLTranslationV1::TLexers& lexers, NSQLTranslationV1::TParsers& parsers, - google::protobuf::Message*& message) + google::protobuf::Message*& message, + bool isAmbiguityError = false) { if (!ParseTranslationSettings(query, settings, issues)) { return false; @@ -323,8 +323,8 @@ bool GetParseTree( return false; } - parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory(); - parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory(); + parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory(isAmbiguityError); + parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory(isAmbiguityError); message = NSQLTranslationV1::SqlAST( parsers, query, @@ -402,7 +402,8 @@ SIMPLE_UDF(TRuleFreq, TOptional<TRuleFreqResult>(TAutoMap<char*>)) { } } -SIMPLE_UDF(TTestSyntax, TOptional<char*>(TAutoMap<char*>)) try { +SIMPLE_UDF(TTestSyntax, TOptional<char*>(TAutoMap<char*>)) +try { const TString query(args[0].AsStringRef()); google::protobuf::Arena arena; @@ -414,7 +415,7 @@ SIMPLE_UDF(TTestSyntax, TOptional<char*>(TAutoMap<char*>)) try { NSQLTranslationV1::TParsers parsers; google::protobuf::Message* tree; - if (!GetParseTree(query, settings, issues, lexers, parsers, tree)) { + if (!GetParseTree(query, settings, issues, lexers, parsers, tree, /*isAmbiguityError=*/true)) { return valueBuilder->NewString(issues.ToString()); } @@ -424,10 +425,8 @@ SIMPLE_UDF(TTestSyntax, TOptional<char*>(TAutoMap<char*>)) try { } SIMPLE_MODULE(TYqlLangModule, - TObfuscate, - TRuleFreq, - TTestSyntax -); + TObfuscate, + TRuleFreq, + TTestSyntax); REGISTER_MODULES(TYqlLangModule); - diff --git a/yql/essentials/udfs/logs/dsv/dsv_udf.cpp b/yql/essentials/udfs/logs/dsv/dsv_udf.cpp index 421e36a0100..8d66253a32b 100644 --- a/yql/essentials/udfs/logs/dsv/dsv_udf.cpp +++ b/yql/essentials/udfs/logs/dsv/dsv_udf.cpp @@ -11,15 +11,13 @@ using namespace NUdf; namespace { -struct TKsvIndexes -{ +struct TKsvIndexes { ui32 Key; ui32 Subkey; ui32 Value; }; -struct TResultIndexes -{ +struct TResultIndexes { TType* DictType; ui32 Key; @@ -40,28 +38,26 @@ void ParseDsv(const TUnboxedValuePod& value, const auto from = std::distance(input.begin(), part.begin()); builder->Add( valueBuilder->SubString(value, from, pos), - valueBuilder->SubString(value, from + pos + 1U, part.length() - pos - 1U) - ); + valueBuilder->SubString(value, from + pos + 1U, part.length() - pos - 1U)); } } } -class TDsvReadRecord: public TBoxedValue -{ +class TDsvReadRecord: public TBoxedValue { public: - class TFactory : public TBoxedValue { + class TFactory: public TBoxedValue { public: TFactory(const TResultIndexes& fieldIndexes, const TKsvIndexes& ksvIndexes) - : ResultIndexes_(fieldIndexes) - , KsvIndexes_(ksvIndexes) + : ResultIndexes_(fieldIndexes) + , KsvIndexes_(ksvIndexes) { } + private: TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try - { + const TUnboxedValuePod* args) const final try { const auto optRunConfig = args[0]; TUnboxedValue separator; if (optRunConfig && !optRunConfig.AsStringRef().Empty()) { @@ -71,8 +67,7 @@ public: } return TUnboxedValuePod(new TDsvReadRecord(separator, ResultIndexes_, KsvIndexes_)); - } - catch (const std::exception& e) { + } catch (const std::exception& e) { UdfTerminate(e.what()); } @@ -88,16 +83,15 @@ public: , KsvIndexes_(ksvIndexes) { } + private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { auto keyData = args[0].GetElement(KsvIndexes_.Key); auto subkeyData = args[0].GetElement(KsvIndexes_.Subkey); auto valueData = args[0].GetElement(KsvIndexes_.Value); - auto dict = valueBuilder->NewDict(ResultIndexes_.DictType, 0); ParseDsv(valueData, Separator_.AsStringRef(), valueBuilder, dict.Get()); @@ -108,8 +102,7 @@ private: items[ResultIndexes_.Subkey] = subkeyData; items[ResultIndexes_.Dict] = dict->Build(); return result; - } - catch (const std::exception& e) { + } catch (const std::exception& e) { UdfTerminate(e.what()); } @@ -118,48 +111,46 @@ private: const TKsvIndexes KsvIndexes_; }; -class TDsvParse: public TBoxedValue -{ +class TDsvParse: public TBoxedValue { public: explicit TDsvParse(TType* dictType) : DictType_(dictType) - {} + { + } + private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try - { - const std::string_view separator = args[1] ? - std::string_view(args[1].AsStringRef()): - std::string_view("\t"); + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + const std::string_view separator = args[1] ? std::string_view(args[1].AsStringRef()) : std::string_view("\t"); auto dict = valueBuilder->NewDict(DictType_, 0); ParseDsv(args[0], separator, valueBuilder, dict.Get()); return dict->Build(); - } - catch (const std::exception& e) { + } catch (const std::exception& e) { UdfTerminate(e.what()); } const TType* DictType_; }; -#define TYPE_TO_STRING(type) \ -case TDataType<type>::Id: part += ToString(member.Get<type>()); break; +#define TYPE_TO_STRING(type) \ + case TDataType<type>::Id: \ + part += ToString(member.Get<type>()); \ + break; -class TDsvSerialize: public TBoxedValue -{ +class TDsvSerialize: public TBoxedValue { public: explicit TDsvSerialize(const TVector<TDataTypeId>& typeIds, TStructTypeInspector* structInspector) : TypeIds_(typeIds) , StructInspector_(structInspector) - {} + { + } private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { TVector<TString> result; if (const ui32 structSize = StructInspector_->GetMembersCount()) { result.reserve(structSize); @@ -179,14 +170,12 @@ private: default: part += member.AsStringRef(); break; - } result.emplace_back(std::move(part)); } } return valueBuilder->NewString(JoinStrings(result, "\t")); - } - catch (const std::exception& e) { + } catch (const std::exception& e) { UdfTerminate(e.what()); } @@ -194,14 +183,14 @@ private: THolder<TStructTypeInspector> StructInspector_; }; -class TDsvModule: public IUdfModule -{ +class TDsvModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("Dsv"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } void GetAllFunctions(IFunctionsSink& sink) const final { sink.Add(TStringRef::Of("ReadRecord")); @@ -210,39 +199,36 @@ public: } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final try - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final try { Y_UNUSED(typeConfig); bool typesOnly = (flags & TFlags::TypesOnly); if (TStringRef::Of("ReadRecord") == name) { TKsvIndexes ksvIndexes; - auto recordType = builder.Struct(3U)-> - AddField<char*>("key", &ksvIndexes.Key) - .AddField<char*>("subkey", &ksvIndexes.Subkey) - .AddField<char*>("value", &ksvIndexes.Value) - .Build(); + auto recordType = builder.Struct(3U)->AddField<char*>("key", &ksvIndexes.Key).AddField<char*>("subkey", &ksvIndexes.Subkey).AddField<char*>("value", &ksvIndexes.Value).Build(); TResultIndexes resultIndexes; resultIndexes.DictType = builder.Dict()->Key<char*>().Value<char*>().Build(); const auto structType = builder.Struct(resultIndexes.FieldsCount) - ->AddField<char*>("key", &resultIndexes.Key) - .AddField<char*>("subkey", &resultIndexes.Subkey) - .AddField("dict", resultIndexes.DictType, &resultIndexes.Dict) - .Build(); + ->AddField<char*>("key", &resultIndexes.Key) + .AddField<char*>("subkey", &resultIndexes.Subkey) + .AddField("dict", resultIndexes.DictType, &resultIndexes.Dict) + .Build(); builder.Returns(structType) - .Args()->Add(recordType).Done() - .RunConfig<TOptional<char*>>(); + .Args() + ->Add(recordType) + .Done() + .RunConfig<TOptional<char*>>(); if (!typesOnly) { builder.Implementation(new TDsvReadRecord::TFactory( - resultIndexes, ksvIndexes)); + resultIndexes, ksvIndexes)); } builder.IsStrict(); } else if (TStringRef::Of("Parse") == name) { @@ -250,8 +236,12 @@ public: auto dictType = builder.Dict()->Key<char*>().Value<char*>().Build(); builder.Returns(dictType) - .Args()->Add<char*>().Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalStringType).Done() - .OptionalArgs(1); + .Args() + ->Add<char*>() + .Flags(ICallablePayload::TArgumentFlags::AutoMap) + .Add(optionalStringType) + .Done() + .OptionalArgs(1); if (!typesOnly) { builder.Implementation(new TDsvParse(dictType)); @@ -304,7 +294,6 @@ public: builder.Implementation(new TDsvSerialize(typeIds, structInspector.Release())); } builder.IsStrict(); - } } catch (const std::exception& e) { builder.SetError(CurrentExceptionMessage()); diff --git a/yql/essentials/udfs/logs/dsv/ya.make b/yql/essentials/udfs/logs/dsv/ya.make index 34e29294233..54b7f5c6d0c 100644 --- a/yql/essentials/udfs/logs/dsv/ya.make +++ b/yql/essentials/udfs/logs/dsv/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + PEERDIR( library/cpp/deprecated/split ) diff --git a/yql/essentials/udfs/test/simple/simple_udf.cpp b/yql/essentials/udfs/test/simple/simple_udf.cpp index 5ae03a7dd66..cca46ab4190 100644 --- a/yql/essentials/udfs/test/simple/simple_udf.cpp +++ b/yql/essentials/udfs/test/simple/simple_udf.cpp @@ -12,7 +12,7 @@ namespace { SIMPLE_UDF(TCrash, ui64(char*)) { Y_UNUSED(valueBuilder); Y_UNUSED(args); - int *ptr = nullptr; + int* ptr = nullptr; *ptr = 1; return TUnboxedValuePod(0); } @@ -59,15 +59,17 @@ SIMPLE_UDF(TEcho, char*(TOptional<char*>)) { } } -SIMPLE_UDF_WITH_OPTIONAL_ARGS(TEchoWithPrefix, char*(char*,TOptional<char*>), 1) { - if (!args[1]) +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TEchoWithPrefix, char*(char*, TOptional<char*>), 1) { + if (!args[1]) { return TUnboxedValuePod(args[0]); + } return valueBuilder->ConcatStrings(args[1], args[0]); } SIMPLE_UDF_RUN(TEchoWithRunPrefix, char*(char*), TOptional<char*>) { - if (!RunConfig) + if (!RunConfig) { return TUnboxedValuePod(args[0]); + } return valueBuilder->PrependString(RunConfig.AsStringRef(), args[0]); } @@ -102,11 +104,11 @@ using TComplexReturnTypeSignature = TDict<char*, ui32>(char*); SIMPLE_UDF(TComplexReturnType, TComplexReturnTypeSignature) { const TStringBuf s = args[0].AsStringRef(); THashMap<TString, ui32> stat; - for(auto c: s) { - ++stat[TString{c}]; + for (auto c : s) { + ++stat[TString{c}]; } auto dictBuilder = valueBuilder->NewDict(ReturnType_, 0); - for(const auto& [k, v]: stat) { + for (const auto& [k, v] : stat) { dictBuilder->Add(valueBuilder->NewString(k), TUnboxedValuePod{v}); } return dictBuilder->Build(); @@ -127,50 +129,40 @@ SIMPLE_UDF_WITH_OPTIONAL_ARGS(TNamedArgs, char*(ui32, TOptional<ui32>, TNamedC, return valueBuilder->NewString(res); } -UDF(TIncrement, builder.Args(2)-> - Add<ui32>().Name("Arg1").Flags(ICallablePayload::TArgumentFlags::AutoMap) - .Add(builder.SimpleType<TOptional<ui32>>()).Name("Arg2") - .Done().Returns<ui32>().OptionalArgs(1);) { +UDF(TIncrement, builder.Args(2)->Add<ui32>().Name("Arg1").Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(builder.SimpleType<TOptional<ui32>>()).Name("Arg2").Done().Returns<ui32>().OptionalArgs(1);) { Y_UNUSED(valueBuilder); return TUnboxedValuePod(args[0].Get<ui32>() + args[1].GetOrDefault<ui32>(1)); } -UDF(TIncrementOpt, builder.Args(2)-> - Add<ui32>().Name("Arg1").Flags(ICallablePayload::TArgumentFlags::AutoMap) - .Add(builder.SimpleType<TOptional<ui32>>()).Name("Arg2") - .Done().Returns(builder.SimpleType<TOptional<ui32>>()).OptionalArgs(1);) { +UDF(TIncrementOpt, builder.Args(2)->Add<ui32>().Name("Arg1").Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(builder.SimpleType<TOptional<ui32>>()).Name("Arg2").Done().Returns(builder.SimpleType<TOptional<ui32>>()).OptionalArgs(1);) { Y_UNUSED(valueBuilder); if (const ui32 by = args[1].GetOrDefault<ui32>(0)) { return TUnboxedValuePod(args[0].Get<ui32>() + by); - } - else { + } else { return TUnboxedValuePod(); } } UDF_IMPL(TIncrementWithCounters, - builder.Args(1)->Add<ui32>().Done().Returns<ui32>(); - , - mutable ::NKikimr::NUdf::TCounter Counter_; - mutable ::NKikimr::NUdf::TScopedProbe Scope_; - , - Counter_ = builder.GetCounter("IncrementWithCounters_Calls", true); - Scope_ = builder.GetScopedProbe("IncrementWithCounters_Time"); - , - "" - , - "" - , - void -) { + builder.Args(1)->Add<ui32>().Done().Returns<ui32>(); + , + mutable ::NKikimr::NUdf::TCounter Counter_; + mutable ::NKikimr::NUdf::TScopedProbe Scope_; + , + Counter_ = builder.GetCounter("IncrementWithCounters_Calls", true); + Scope_ = builder.GetScopedProbe("IncrementWithCounters_Time"); + , + "", + "", + void) { Y_UNUSED(valueBuilder); Counter_.Inc(); - with_lock(Scope_) { + with_lock (Scope_) { return TUnboxedValuePod(args[0].Get<ui32>() + 1); } } -class TGenericAsStruct : public TBoxedValue { +class TGenericAsStruct: public TBoxedValue { public: typedef bool TTypeAwareMarker; @@ -190,7 +182,8 @@ public: TGenericAsStruct(size_t argc) : Argc_(argc) - {} + { + } static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { if (Name() == name) { @@ -230,25 +223,26 @@ public: builder.Implementation(new TGenericAsStruct(argsCount)); } return true; - } - else { + } else { return false; } } + private: const size_t Argc_; }; -class TLogging : public TBoxedValue { +class TLogging: public TBoxedValue { public: TLogging(TLoggerPtr logger, TLogComponentId component) : Logger_(logger) , Component_(component) - {} + { + } TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { Y_UNUSED(valueBuilder); - auto level = Min(args[0].Get<ui32>(),static_cast<ui32>(ELogLevel::Trace)); + auto level = Min(args[0].Get<ui32>(), static_cast<ui32>(ELogLevel::Trace)); Logger_->Log(Component_, (ELogLevel)level, args[1].AsStringRef()); return TUnboxedValue::Void(); } @@ -273,8 +267,7 @@ public: builder.Implementation(new TLogging(logger, component)); } return true; - } - else { + } else { return false; } } @@ -285,27 +278,26 @@ private: }; SIMPLE_MODULE(TSimpleUdfModule, - TCrash, - TException, - TReturnNull, - TReturnVoid, - TReturnEmpty, - TReturnBrokenInt, - TEcho, - TEchoWithPrefix, - TEchoWithRunPrefix, - TConst, - TConcat, - TRepeat, - TSleep, - TComplexReturnType, - TNamedArgs, - TIncrement, - TIncrementOpt, - TIncrementWithCounters, - TGenericAsStruct, - TLogging - ) + TCrash, + TException, + TReturnNull, + TReturnVoid, + TReturnEmpty, + TReturnBrokenInt, + TEcho, + TEchoWithPrefix, + TEchoWithRunPrefix, + TConst, + TConcat, + TRepeat, + TSleep, + TComplexReturnType, + TNamedArgs, + TIncrement, + TIncrementOpt, + TIncrementWithCounters, + TGenericAsStruct, + TLogging) } // namespace diff --git a/yql/essentials/udfs/test/simple/ya.make b/yql/essentials/udfs/test/simple/ya.make index 1d045c9ef98..5bef9c94891 100644 --- a/yql/essentials/udfs/test/simple/ya.make +++ b/yql/essentials/udfs/test/simple/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(simple_udf) YQL_ABI_VERSION(2 42 0) +ENABLE(YQL_STYLE_CPP) + SRCS( simple_udf.cpp ) diff --git a/yql/essentials/udfs/test/test_import/test_import_udf.cpp b/yql/essentials/udfs/test/test_import/test_import_udf.cpp index 158194b792e..7cee1a0fa8e 100644 --- a/yql/essentials/udfs/test/test_import/test_import_udf.cpp +++ b/yql/essentials/udfs/test/test_import/test_import_udf.cpp @@ -24,9 +24,8 @@ SIMPLE_UDF(TRepeat, char*(char*, ui64)) { } SIMPLE_MODULE(TTestImportUdfModule, - TConcat, - TRepeat - ) + TConcat, + TRepeat) } // namespace diff --git a/yql/essentials/udfs/test/test_import/ya.make b/yql/essentials/udfs/test/test_import/ya.make index 1adb06149f1..f6ccf6c65f2 100644 --- a/yql/essentials/udfs/test/test_import/ya.make +++ b/yql/essentials/udfs/test/test_import/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(test_import_udf) YQL_ABI_VERSION(2 9 0) +ENABLE(YQL_STYLE_CPP) + SRCS( test_import_udf.cpp ) diff --git a/yql/essentials/utils/backtrace/backtrace.cpp b/yql/essentials/utils/backtrace/backtrace.cpp index 938ac90501a..5d289454210 100644 --- a/yql/essentials/utils/backtrace/backtrace.cpp +++ b/yql/essentials/utils/backtrace/backtrace.cpp @@ -18,7 +18,7 @@ #include <util/system/mlock.h> #ifdef _linux_ -#include <signal.h> + #include <signal.h> #endif #include <functional> @@ -38,16 +38,16 @@ bool SetSignalHandler(int signo, void (*handler)(int)) { } namespace { -#if defined(_linux_) && defined(_x86_64_) - bool SetSignalAction(int signo, void (*handler)(int, siginfo_t*, void*)) { - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_flags = SA_RESETHAND | SA_SIGINFO; - sa.sa_sigaction = (decltype(sa.sa_sigaction))handler; - sigfillset(&sa.sa_mask); - return sigaction(signo, &sa, nullptr) != -1; - } -#endif + #if defined(_linux_) && defined(_x86_64_) +bool SetSignalAction(int signo, void (*handler)(int, siginfo_t*, void*)) { + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_flags = SA_RESETHAND | SA_SIGINFO; + sa.sa_sigaction = (decltype(sa.sa_sigaction))handler; + sigfillset(&sa.sa_mask); + return sigaction(signo, &sa, nullptr) != -1; +} + #endif } // namespace #endif // _win_ @@ -56,7 +56,7 @@ TAtomic BacktraceStarted = 0; void SetFatalSignalHandler(void (*handler)(int)) { Y_UNUSED(handler); #ifndef _win_ - for (int signo: {SIGSEGV, SIGILL, SIGABRT, SIGFPE}) { + for (int signo : {SIGSEGV, SIGILL, SIGABRT, SIGFPE}) { if (!SetSignalHandler(signo, handler)) { ythrow TSystemError() << "Cannot set handler for signal " << strsignal(signo); } @@ -67,7 +67,7 @@ void SetFatalSignalHandler(void (*handler)(int)) { #if defined(_linux_) && defined(_x86_64_) void SetFatalSignalAction(void (*sigaction)(int, siginfo_t*, void*)) { - for (int signo: {SIGSEGV, SIGILL, SIGABRT, SIGFPE}) { + for (int signo : {SIGSEGV, SIGILL, SIGABRT, SIGFPE}) { if (!SetSignalAction(signo, sigaction)) { ythrow TSystemError() << "Cannot set sigaction for signal " << strsignal(signo); } @@ -76,140 +76,139 @@ void SetFatalSignalAction(void (*sigaction)(int, siginfo_t*, void*)) #endif namespace { - std::vector<std::function<void(int)>> Before, After; - bool KikimrSymbolize = false; - NYql::NBacktrace::TCollectedFrame Frames[NYql::NBacktrace::Limit]; +std::vector<std::function<void(int)>> Before, After; +bool KikimrSymbolize = false; +NYql::NBacktrace::TCollectedFrame Frames[NYql::NBacktrace::Limit]; - void CallCallbacks(decltype(Before)& where, int signum) { - for (const auto &fn: where) { - if (fn) { - fn(signum); - } +void CallCallbacks(decltype(Before)& where, int signum) { + for (const auto& fn : where) { + if (fn) { + fn(signum); } } +} - void PrintFrames(IOutputStream* out, const NYql::NBacktrace::TCollectedFrame* frames, size_t cnt); +void PrintFrames(IOutputStream* out, const NYql::NBacktrace::TCollectedFrame* frames, size_t cnt); - void DoBacktrace(IOutputStream* out, void* data) { - auto cnt = NYql::NBacktrace::CollectFrames(Frames, data); - PrintFrames(out, Frames, cnt); - } +void DoBacktrace(IOutputStream* out, void* data) { + auto cnt = NYql::NBacktrace::CollectFrames(Frames, data); + PrintFrames(out, Frames, cnt); +} - void DoBacktrace(IOutputStream* out, void** stack, size_t cnt) { - Y_UNUSED(NYql::NBacktrace::CollectFrames(Frames, stack, cnt)); - PrintFrames(out, Frames, cnt); - } - +void DoBacktrace(IOutputStream* out, void** stack, size_t cnt) { + Y_UNUSED(NYql::NBacktrace::CollectFrames(Frames, stack, cnt)); + PrintFrames(out, Frames, cnt); +} - void SignalHandler(int signum) { - CallCallbacks(Before, signum); +void SignalHandler(int signum) { + CallCallbacks(Before, signum); - if (!NMalloc::IsAllocatorCorrupted) { - if (!AtomicTryLock(&BacktraceStarted)) { - return; - } - - UnlockAllMemory(); - DoBacktrace(&Cerr, nullptr); + if (!NMalloc::IsAllocatorCorrupted) { + if (!AtomicTryLock(&BacktraceStarted)) { + return; } - - CallCallbacks(After, signum); - raise(signum); + + UnlockAllMemory(); + DoBacktrace(&Cerr, nullptr); } -#if defined(_linux_) && defined(_x86_64_) - void SignalAction(int signum, siginfo_t*, void* context) { - Y_UNUSED(SignalHandler); - CallCallbacks(Before, signum); + CallCallbacks(After, signum); + raise(signum); +} - if (!NMalloc::IsAllocatorCorrupted) { - if (!AtomicTryLock(&BacktraceStarted)) { - return; - } +#if defined(_linux_) && defined(_x86_64_) +void SignalAction(int signum, siginfo_t*, void* context) { + Y_UNUSED(SignalHandler); + CallCallbacks(Before, signum); - UnlockAllMemory(); - DoBacktrace(&Cerr, context); + if (!NMalloc::IsAllocatorCorrupted) { + if (!AtomicTryLock(&BacktraceStarted)) { + return; } - - CallCallbacks(After, signum); - raise(signum); + + UnlockAllMemory(); + DoBacktrace(&Cerr, context); } -#endif + + CallCallbacks(After, signum); + raise(signum); } +#endif +} // namespace namespace NYql { - namespace NBacktrace { - THashMap<TString, TString> Mapping; +namespace NBacktrace { +THashMap<TString, TString> Mapping; - void SetModulesMapping(const THashMap<TString, TString>& mapping) { - Mapping = mapping; - } +void SetModulesMapping(const THashMap<TString, TString>& mapping) { + Mapping = mapping; +} - void AddBeforeFatalCallback(const std::function<void(int)>& before) { - Before.push_back(before); - } +void AddBeforeFatalCallback(const std::function<void(int)>& before) { + Before.push_back(before); +} - void AddAfterFatalCallback(const std::function<void(int)>& after) { - After.push_back(after); - } +void AddAfterFatalCallback(const std::function<void(int)>& after) { + After.push_back(after); +} - void RegisterKikimrFatalActions() { +void RegisterKikimrFatalActions() { #if defined(_linux_) && defined(_x86_64_) - SetFatalSignalAction(SignalAction); + SetFatalSignalAction(SignalAction); #else - SetFatalSignalHandler(SignalHandler); + SetFatalSignalHandler(SignalHandler); #endif - } - - void EnableKikimrSymbolize() { - KikimrSymbolize = true; - } +} - void KikimrBackTrace() { - FormatBackTrace(&Cerr); - } +void EnableKikimrSymbolize() { + KikimrSymbolize = true; +} - void KikimrBackTraceFormatImpl(IOutputStream* out) { - KikimrSymbolize = true; - UnlockAllMemory(); - DoBacktrace(out, nullptr); - } +void KikimrBackTrace() { + FormatBackTrace(&Cerr); +} - void KikimrBacktraceFormatImpl(IOutputStream* out, void* const* stack, size_t stackSize) { - KikimrSymbolize = true; - DoBacktrace(out, (void**)stack, stackSize); - } +void KikimrBackTraceFormatImpl(IOutputStream* out) { + KikimrSymbolize = true; + UnlockAllMemory(); + DoBacktrace(out, nullptr); +} - } +void KikimrBacktraceFormatImpl(IOutputStream* out, void* const* stack, size_t stackSize) { + KikimrSymbolize = true; + DoBacktrace(out, (void**)stack, stackSize); } +} // namespace NBacktrace +} // namespace NYql + void EnableKikimrBacktraceFormat() { SetFormatBackTraceFn(NYql::NBacktrace::KikimrBacktraceFormatImpl); } namespace { - NYql::NBacktrace::TStackFrame SFrames[NYql::NBacktrace::Limit]; - void PrintFrames(IOutputStream* out, const NYql::NBacktrace::TCollectedFrame* frames, size_t count) { - auto& outp = *out; - Y_UNUSED(SFrames); +NYql::NBacktrace::TStackFrame SFrames[NYql::NBacktrace::Limit]; +void PrintFrames(IOutputStream* out, const NYql::NBacktrace::TCollectedFrame* frames, size_t count) { + auto& outp = *out; + Y_UNUSED(SFrames); #if defined(_linux_) && defined(_x86_64_) - if (KikimrSymbolize) { - for (size_t i = 0; i < count; ++i) { - SFrames[i] = NYql::NBacktrace::TStackFrame{frames[i].File, frames[i].Address}; - } - NYql::NBacktrace::Symbolize(SFrames, count, out); - return; + if (KikimrSymbolize) { + for (size_t i = 0; i < count; ++i) { + SFrames[i] = NYql::NBacktrace::TStackFrame{frames[i].File, frames[i].Address}; } + NYql::NBacktrace::Symbolize(SFrames, count, out); + return; + } #endif - outp << "StackFrames: " << count << "\n"; - for (size_t i = 0; i < count; ++i) { - auto& frame = frames[i]; - auto fileName = frame.File; - if (!strcmp(fileName, "/proc/self/exe")) { - fileName = "EXE"; - } - auto it = NYql::NBacktrace::Mapping.find(fileName); - outp << "StackFrame: " << (it == NYql::NBacktrace::Mapping.end() ? fileName : it->second) << " " << frame.Address << " 0\n"; + outp << "StackFrames: " << count << "\n"; + for (size_t i = 0; i < count; ++i) { + auto& frame = frames[i]; + auto fileName = frame.File; + if (!strcmp(fileName, "/proc/self/exe")) { + fileName = "EXE"; } + auto it = NYql::NBacktrace::Mapping.find(fileName); + outp << "StackFrame: " << (it == NYql::NBacktrace::Mapping.end() ? fileName : it->second) << " " << frame.Address << " 0\n"; } -}
\ No newline at end of file +} +} // namespace diff --git a/yql/essentials/utils/backtrace/backtrace.h b/yql/essentials/utils/backtrace/backtrace.h index cd843d8cb41..c401392cfed 100644 --- a/yql/essentials/utils/backtrace/backtrace.h +++ b/yql/essentials/utils/backtrace/backtrace.h @@ -29,6 +29,6 @@ void SetModulesMapping(const THashMap<TString, TString>& mapping); TString Symbolize(const TString& input, const THashMap<TString, TString>& mapping); -} /* namespace Backtrace */ +} // namespace NBacktrace } /* namespace NYql */ diff --git a/yql/essentials/utils/backtrace/backtrace_dummy.cpp b/yql/essentials/utils/backtrace/backtrace_dummy.cpp index ec54e55dcab..5035aa918e7 100644 --- a/yql/essentials/utils/backtrace/backtrace_dummy.cpp +++ b/yql/essentials/utils/backtrace/backtrace_dummy.cpp @@ -3,9 +3,9 @@ #include <util/system/backtrace.h> namespace NYql { - namespace NBacktrace { - size_t CollectBacktrace(void** addresses, size_t limit, void*) { - return BackTrace(addresses, limit); - } - } -}
\ No newline at end of file +namespace NBacktrace { +size_t CollectBacktrace(void** addresses, size_t limit, void*) { + return BackTrace(addresses, limit); +} +} // namespace NBacktrace +} // namespace NYql diff --git a/yql/essentials/utils/backtrace/backtrace_lib.cpp b/yql/essentials/utils/backtrace/backtrace_lib.cpp index dd2d295dc30..473b227cc4a 100644 --- a/yql/essentials/utils/backtrace/backtrace_lib.cpp +++ b/yql/essentials/utils/backtrace/backtrace_lib.cpp @@ -6,74 +6,73 @@ #include <algorithm> #if defined(_linux_) && defined(_x86_64_) -#include <dlfcn.h> -#include <link.h> + #include <dlfcn.h> + #include <link.h> #endif namespace { - const size_t Limit = 400; - void* Stack[Limit]; +const size_t Limit = 400; +void* Stack[Limit]; - struct TDllInfo { - const char* Path; - ui64 BaseAddress; - }; +struct TDllInfo { + const char* Path; + ui64 BaseAddress; +}; - const size_t MaxDLLCnt = 100; - TDllInfo DLLs[MaxDLLCnt]; - size_t DLLCount = 0; +const size_t MaxDLLCnt = 100; +TDllInfo DLLs[MaxDLLCnt]; +size_t DLLCount = 0; #if defined(_linux_) && defined(_x86_64_) - int DlIterCallback(struct dl_phdr_info *info, size_t, void *data) { - if (*info->dlpi_name) { - if (DLLCount + 1 < MaxDLLCnt) { - reinterpret_cast<std::remove_reference_t<decltype(DLLs[0])>*>(data)[DLLCount++] = { info->dlpi_name, (ui64)info->dlpi_addr }; - } +int DlIterCallback(struct dl_phdr_info* info, size_t, void* data) { + if (*info->dlpi_name) { + if (DLLCount + 1 < MaxDLLCnt) { + reinterpret_cast<std::remove_reference_t<decltype(DLLs[0])>*>(data)[DLLCount++] = {info->dlpi_name, (ui64)info->dlpi_addr}; } - return 0; } + return 0; +} #endif - bool Comp(const TDllInfo& a, const TDllInfo& b) { - return strcmp(a.Path, b.Path) < 0; - } - +bool Comp(const TDllInfo& a, const TDllInfo& b) { + return strcmp(a.Path, b.Path) < 0; } +} // namespace + namespace NYql { - namespace NBacktrace { - TCollectedFrame::TCollectedFrame(uintptr_t addr) { - File = GetPersistentExecPath().c_str(); - Address = addr; +namespace NBacktrace { +TCollectedFrame::TCollectedFrame(uintptr_t addr) { + File = GetPersistentExecPath().c_str(); + Address = addr; #if defined(_linux_) && defined(_x86_64_) - Dl_info dlInfo; - memset(&dlInfo, 0, sizeof(dlInfo)); - auto ret = dladdr(reinterpret_cast<void*>(addr), &dlInfo); - if (ret) { - auto it = std::lower_bound(DLLs, DLLs + DLLCount, std::remove_reference_t<decltype(DLLs[0])> {dlInfo.dli_fname, {}}, Comp); - if (it != DLLs + DLLCount && !strcmp(it->Path, dlInfo.dli_fname)) { - File = it->Path; - Address -= it->BaseAddress; - } - } -#endif + Dl_info dlInfo; + memset(&dlInfo, 0, sizeof(dlInfo)); + auto ret = dladdr(reinterpret_cast<void*>(addr), &dlInfo); + if (ret) { + auto it = std::lower_bound(DLLs, DLLs + DLLCount, std::remove_reference_t<decltype(DLLs[0])>{dlInfo.dli_fname, {}}, Comp); + if (it != DLLs + DLLCount && !strcmp(it->Path, dlInfo.dli_fname)) { + File = it->Path; + Address -= it->BaseAddress; } + } +#endif +} - size_t CollectFrames(TCollectedFrame* frames, void* data) { +size_t CollectFrames(TCollectedFrame* frames, void* data) { #if defined(_linux_) && defined(_x86_64_) - DLLCount = 0; - dl_iterate_phdr(DlIterCallback, &DLLs); + DLLCount = 0; + dl_iterate_phdr(DlIterCallback, &DLLs); #endif - std::stable_sort(DLLs, DLLs + DLLCount, Comp); - size_t cnt = CollectBacktrace(Stack, Limit, data); - return CollectFrames(frames, Stack, cnt); - } + std::stable_sort(DLLs, DLLs + DLLCount, Comp); + size_t cnt = CollectBacktrace(Stack, Limit, data); + return CollectFrames(frames, Stack, cnt); +} - size_t CollectFrames(TCollectedFrame* frames, void** stack, size_t cnt) { - for (size_t i = 0; i < cnt; ++i) { - new (frames + i)TCollectedFrame(reinterpret_cast<uintptr_t>(stack[i])); - } - return cnt; - } +size_t CollectFrames(TCollectedFrame* frames, void** stack, size_t cnt) { + for (size_t i = 0; i < cnt; ++i) { + new (frames + i) TCollectedFrame(reinterpret_cast<uintptr_t>(stack[i])); } + return cnt; } - +} // namespace NBacktrace +} // namespace NYql diff --git a/yql/essentials/utils/backtrace/backtrace_lib.h b/yql/essentials/utils/backtrace/backtrace_lib.h index 3404716da68..d0a390bf9ea 100644 --- a/yql/essentials/utils/backtrace/backtrace_lib.h +++ b/yql/essentials/utils/backtrace/backtrace_lib.h @@ -4,15 +4,15 @@ #include <util/generic/vector.h> namespace NYql { - namespace NBacktrace { - size_t CollectBacktrace(void** addresses, size_t limit, void* data); - struct TCollectedFrame { - TCollectedFrame(uintptr_t addr); - TCollectedFrame() = default; - const char* File; - size_t Address; - }; - size_t CollectFrames(TCollectedFrame* frames, void* data); - size_t CollectFrames(TCollectedFrame* frames, void** stack, size_t cnt); - } -}
\ No newline at end of file +namespace NBacktrace { +size_t CollectBacktrace(void** addresses, size_t limit, void* data); +struct TCollectedFrame { + TCollectedFrame(uintptr_t addr); + TCollectedFrame() = default; + const char* File; + size_t Address; +}; +size_t CollectFrames(TCollectedFrame* frames, void* data); +size_t CollectFrames(TCollectedFrame* frames, void** stack, size_t cnt); +} // namespace NBacktrace +} // namespace NYql diff --git a/yql/essentials/utils/backtrace/backtrace_linux.cpp b/yql/essentials/utils/backtrace/backtrace_linux.cpp index c9a1bd5a225..f819a04ae90 100644 --- a/yql/essentials/utils/backtrace/backtrace_linux.cpp +++ b/yql/essentials/utils/backtrace/backtrace_linux.cpp @@ -6,57 +6,57 @@ #include <util/system/backtrace.h> namespace { - size_t BackTrace(void** p, size_t len, ucontext_t* con) { - unw_context_t context; - unw_cursor_t cursor; - if (unw_getcontext(&context)) { - return 0; - } +size_t BackTrace(void** p, size_t len, ucontext_t* con) { + unw_context_t context; + unw_cursor_t cursor; + if (unw_getcontext(&context)) { + return 0; + } - if (unw_init_local(&cursor, &context)) { - return 0; - } - const sigcontext* signal_mcontext = (const sigcontext*)&(con->uc_mcontext); - unw_set_reg(&cursor, UNW_X86_64_RSI, signal_mcontext->rsi); - unw_set_reg(&cursor, UNW_X86_64_RDI, signal_mcontext->rdi); - unw_set_reg(&cursor, UNW_X86_64_RBP, signal_mcontext->rbp); - unw_set_reg(&cursor, UNW_X86_64_RAX, signal_mcontext->rax); - unw_set_reg(&cursor, UNW_X86_64_RBX, signal_mcontext->rbx); - unw_set_reg(&cursor, UNW_X86_64_RCX, signal_mcontext->rcx); - unw_set_reg(&cursor, UNW_X86_64_R8, signal_mcontext->r8); - unw_set_reg(&cursor, UNW_X86_64_R9, signal_mcontext->r9); - unw_set_reg(&cursor, UNW_X86_64_R10, signal_mcontext->r10); - unw_set_reg(&cursor, UNW_X86_64_R11, signal_mcontext->r11); - unw_set_reg(&cursor, UNW_X86_64_R12, signal_mcontext->r12); - unw_set_reg(&cursor, UNW_X86_64_R13, signal_mcontext->r13); - unw_set_reg(&cursor, UNW_X86_64_R14, signal_mcontext->r14); - unw_set_reg(&cursor, UNW_X86_64_R15, signal_mcontext->r15); - unw_set_reg(&cursor, UNW_X86_64_RSP, signal_mcontext->rsp); + if (unw_init_local(&cursor, &context)) { + return 0; + } + const sigcontext* signal_mcontext = (const sigcontext*)&(con->uc_mcontext); + unw_set_reg(&cursor, UNW_X86_64_RSI, signal_mcontext->rsi); + unw_set_reg(&cursor, UNW_X86_64_RDI, signal_mcontext->rdi); + unw_set_reg(&cursor, UNW_X86_64_RBP, signal_mcontext->rbp); + unw_set_reg(&cursor, UNW_X86_64_RAX, signal_mcontext->rax); + unw_set_reg(&cursor, UNW_X86_64_RBX, signal_mcontext->rbx); + unw_set_reg(&cursor, UNW_X86_64_RCX, signal_mcontext->rcx); + unw_set_reg(&cursor, UNW_X86_64_R8, signal_mcontext->r8); + unw_set_reg(&cursor, UNW_X86_64_R9, signal_mcontext->r9); + unw_set_reg(&cursor, UNW_X86_64_R10, signal_mcontext->r10); + unw_set_reg(&cursor, UNW_X86_64_R11, signal_mcontext->r11); + unw_set_reg(&cursor, UNW_X86_64_R12, signal_mcontext->r12); + unw_set_reg(&cursor, UNW_X86_64_R13, signal_mcontext->r13); + unw_set_reg(&cursor, UNW_X86_64_R14, signal_mcontext->r14); + unw_set_reg(&cursor, UNW_X86_64_R15, signal_mcontext->r15); + unw_set_reg(&cursor, UNW_X86_64_RSP, signal_mcontext->rsp); - unw_set_reg(&cursor, UNW_REG_SP, signal_mcontext->rsp); - unw_set_reg(&cursor, UNW_REG_IP, signal_mcontext->rip); + unw_set_reg(&cursor, UNW_REG_SP, signal_mcontext->rsp); + unw_set_reg(&cursor, UNW_REG_IP, signal_mcontext->rip); - size_t pos = 0; - p[pos++] = (void*)signal_mcontext->rip; - while (pos < len && unw_step(&cursor) > 0) { - unw_word_t ip = 0; - unw_get_reg(&cursor, UNW_REG_IP, &ip); - if (unw_is_signal_frame(&cursor)) { - continue; - } - p[pos++] = (void*)ip; + size_t pos = 0; + p[pos++] = (void*)signal_mcontext->rip; + while (pos < len && unw_step(&cursor) > 0) { + unw_word_t ip = 0; + unw_get_reg(&cursor, UNW_REG_IP, &ip); + if (unw_is_signal_frame(&cursor)) { + continue; } - return pos; + p[pos++] = (void*)ip; } + return pos; } +} // namespace namespace NYql { - namespace NBacktrace { - size_t CollectBacktrace(void** addresses, size_t limit, void* data) { - if (!data) { - return BackTrace(addresses, limit); - } - return BackTrace(addresses, limit, reinterpret_cast<ucontext_t*>(data)); - } +namespace NBacktrace { +size_t CollectBacktrace(void** addresses, size_t limit, void* data) { + if (!data) { + return BackTrace(addresses, limit); } -}
\ No newline at end of file + return BackTrace(addresses, limit, reinterpret_cast<ucontext_t*>(data)); +} +} // namespace NBacktrace +} // namespace NYql diff --git a/yql/essentials/utils/backtrace/backtrace_ut.cpp b/yql/essentials/utils/backtrace/backtrace_ut.cpp index 7cf363325e2..741af647ba8 100644 --- a/yql/essentials/utils/backtrace/backtrace_ut.cpp +++ b/yql/essentials/utils/backtrace/backtrace_ut.cpp @@ -3,26 +3,26 @@ #include <util/generic/string.h> #include <library/cpp/testing/unittest/registar.h> namespace { - Y_NO_INLINE void TestTrace394() { - TStringStream ss; - NYql::NBacktrace::KikimrBackTraceFormatImpl(&ss); +Y_NO_INLINE void TestTrace394() { + TStringStream ss; + NYql::NBacktrace::KikimrBackTraceFormatImpl(&ss); #if !defined(_hardening_enabled_) && !defined(_win_) - UNIT_ASSERT_STRING_CONTAINS(ss.Str(), "TestTrace394"); + UNIT_ASSERT_STRING_CONTAINS(ss.Str(), "TestTrace394"); #endif - } - Y_NO_INLINE void TestTrace39114() { - TStringStream ss; - NYql::NBacktrace::KikimrBackTraceFormatImpl(&ss); +} +Y_NO_INLINE void TestTrace39114() { + TStringStream ss; + NYql::NBacktrace::KikimrBackTraceFormatImpl(&ss); #if !defined(_hardening_enabled_) && !defined(_win_) - UNIT_ASSERT_STRING_CONTAINS(ss.Str(), "TestTrace39114"); + UNIT_ASSERT_STRING_CONTAINS(ss.Str(), "TestTrace39114"); #endif - } } +} // namespace Y_UNIT_TEST_SUITE(TEST_BACKTRACE_AND_SYMBOLIZE) { - Y_UNIT_TEST(TEST_NO_KIKIMR) { - NYql::NBacktrace::EnableKikimrSymbolize(); - TestTrace394(); - TestTrace39114(); - } +Y_UNIT_TEST(TEST_NO_KIKIMR) { + NYql::NBacktrace::EnableKikimrSymbolize(); + TestTrace394(); + TestTrace39114(); } +} // Y_UNIT_TEST_SUITE(TEST_BACKTRACE_AND_SYMBOLIZE) diff --git a/yql/essentials/utils/backtrace/symbolize.cpp b/yql/essentials/utils/backtrace/symbolize.cpp index 360ff408c7d..e96c0a313dd 100644 --- a/yql/essentials/utils/backtrace/symbolize.cpp +++ b/yql/essentials/utils/backtrace/symbolize.cpp @@ -7,56 +7,56 @@ namespace NYql { - namespace NBacktrace { - TString Symbolize(const TString& input, const THashMap<TString, TString>& mapping) { +namespace NBacktrace { +TString Symbolize(const TString& input, const THashMap<TString, TString>& mapping) { #if defined(__linux__) && defined(__x86_64__) - TString output; - TStringOutput out(output); + TString output; + TStringOutput out(output); - i64 stackSize = -1; - TVector<TStackFrame> frames; - TVector<TString> usedFilenames; - for (TStringBuf line: StringSplitter(input).SplitByString("\n")) { - if (line.StartsWith("StackFrames:")) { - TVector<TString> parts; - Split(TString(line), " ", parts); - if (parts.size() > 1) { - TryFromString<i64>(parts[1], stackSize); - frames.reserve(stackSize); - } - } else if (line.StartsWith("StackFrame:")) { - TVector<TString> parts; - Split(TString(line), " ", parts); - TString modulePath; - ui64 address; - ui64 offset; - if (parts.size() > 3) { - modulePath = parts[1]; - TryFromString<ui64>(parts[2], address); - TryFromString<ui64>(parts[3], offset); - auto it = mapping.find(modulePath); - if (it != mapping.end()) { - modulePath = it->second; - } - usedFilenames.emplace_back(std::move(modulePath)); - frames.emplace_back(TStackFrame{usedFilenames.back().c_str(), address - offset}); - } - } else { - out << line << "\n"; + i64 stackSize = -1; + TVector<TStackFrame> frames; + TVector<TString> usedFilenames; + for (TStringBuf line : StringSplitter(input).SplitByString("\n")) { + if (line.StartsWith("StackFrames:")) { + TVector<TString> parts; + Split(TString(line), " ", parts); + if (parts.size() > 1) { + TryFromString<i64>(parts[1], stackSize); + frames.reserve(stackSize); + } + } else if (line.StartsWith("StackFrame:")) { + TVector<TString> parts; + Split(TString(line), " ", parts); + TString modulePath; + ui64 address; + ui64 offset; + if (parts.size() > 3) { + modulePath = parts[1]; + TryFromString<ui64>(parts[2], address); + TryFromString<ui64>(parts[3], offset); + auto it = mapping.find(modulePath); + if (it != mapping.end()) { + modulePath = it->second; } + usedFilenames.emplace_back(std::move(modulePath)); + frames.emplace_back(TStackFrame{usedFilenames.back().c_str(), address - offset}); } + } else { + out << line << "\n"; + } + } - if (stackSize == 0) { - out << "Empty stack trace\n"; - } - Symbolize(frames.data(), frames.size(), &out); - return output; + if (stackSize == 0) { + out << "Empty stack trace\n"; + } + Symbolize(frames.data(), frames.size(), &out); + return output; #else - Y_UNUSED(mapping); - return input; + Y_UNUSED(mapping); + return input; #endif - } +} - } /* namespace NBacktrace */ +} /* namespace NBacktrace */ } /* namespace NYql */ diff --git a/yql/essentials/utils/backtrace/symbolizer.h b/yql/essentials/utils/backtrace/symbolizer.h index 0d32ba25d35..a3fcb0e6054 100644 --- a/yql/essentials/utils/backtrace/symbolizer.h +++ b/yql/essentials/utils/backtrace/symbolizer.h @@ -5,11 +5,11 @@ #include <util/generic/vector.h> namespace NYql { - namespace NBacktrace { - struct TStackFrame { - const char* File; - size_t Address; - }; - void Symbolize(const TStackFrame* frames, size_t count, IOutputStream* out); - } -}
\ No newline at end of file +namespace NBacktrace { +struct TStackFrame { + const char* File; + size_t Address; +}; +void Symbolize(const TStackFrame* frames, size_t count, IOutputStream* out); +} // namespace NBacktrace +} // namespace NYql diff --git a/yql/essentials/utils/backtrace/symbolizer_linux.cpp b/yql/essentials/utils/backtrace/symbolizer_linux.cpp index 222bc5df2e3..d92e9fc7c03 100644 --- a/yql/essentials/utils/backtrace/symbolizer_linux.cpp +++ b/yql/essentials/utils/backtrace/symbolizer_linux.cpp @@ -23,9 +23,12 @@ const size_t MaxStrLen = 512; const size_t MaxDemangleLen = 1024 * 1024; char Buff[MaxDemangleLen]; -class TNoThrowingMemoryOutput : public TMemoryOutput { +class TNoThrowingMemoryOutput: public TMemoryOutput { public: - TNoThrowingMemoryOutput(void* c, size_t l) : TMemoryOutput(c, l) {} + TNoThrowingMemoryOutput(void* c, size_t l) + : TMemoryOutput(c, l) + { + } void Truncate() { *(Buf_ - 1) = '.'; *(Buf_ - 2) = '.'; @@ -84,67 +87,66 @@ int HandleLibBacktraceFrame(void* data, uintptr_t, const char* filename, int lin out << functionName << " at " << fileName << ":" << lineno << ":0"; return 0; } -} +} // namespace namespace NYql { - namespace NBacktrace { - namespace { - std::mutex Mutex; - char* Result[Limit]; - size_t Order[Limit]; - char TmpBuffer[MaxStrLen * Limit]{}; - auto CreateState(const char* filename) { - return backtrace_create_state( - filename, - 0, - HandleLibBacktraceError, - nullptr - ); - } - } +namespace NBacktrace { +namespace { +std::mutex Mutex; +char* Result[Limit]; +size_t Order[Limit]; +char TmpBuffer[MaxStrLen * Limit]{}; +auto CreateState(const char* filename) { + return backtrace_create_state( + filename, + 0, + HandleLibBacktraceError, + nullptr); +} +} // namespace - void Symbolize(const TStackFrame* frames, size_t count, IOutputStream* out) { - if (!count) { - return; - } - memset(TmpBuffer, 0, sizeof(TmpBuffer)); - Result[0] = TmpBuffer; - for (size_t i = 1; i < Limit; ++i) { - Result[i] = Result[i - 1] + MaxStrLen; - } - const std::lock_guard lock{Mutex}; +void Symbolize(const TStackFrame* frames, size_t count, IOutputStream* out) { + if (!count) { + return; + } + memset(TmpBuffer, 0, sizeof(TmpBuffer)); + Result[0] = TmpBuffer; + for (size_t i = 1; i < Limit; ++i) { + Result[i] = Result[i - 1] + MaxStrLen; + } + const std::lock_guard lock{Mutex}; - std::iota(Order, Order + count, 0u); - std::sort(Order, Order + count, [&frames](auto a, auto b) { return strcmp(frames[a].File, frames[b].File) < 0; }); + std::iota(Order, Order + count, 0u); + std::sort(Order, Order + count, [&frames](auto a, auto b) { return strcmp(frames[a].File, frames[b].File) < 0; }); - struct backtrace_state* state = nullptr; - for (size_t i = 0; i < count; ++i) { - if (!i || frames[Order[i - 1]].File != frames[Order[i]].File) { - state = CreateState(frames[Order[i]].File); - } + struct backtrace_state* state = nullptr; + for (size_t i = 0; i < count; ++i) { + if (!i || frames[Order[i - 1]].File != frames[Order[i]].File) { + state = CreateState(frames[Order[i]].File); + } - if (!state) { - Result[Order[i]] = nullptr; // File not found - continue; - } + if (!state) { + Result[Order[i]] = nullptr; // File not found + continue; + } - int status = backtrace_pcinfo( - state, - reinterpret_cast<uintptr_t>(frames[Order[i]].Address) - 1, // last byte of the call instruction - HandleLibBacktraceFrame, - HandleLibBacktraceError, - reinterpret_cast<void*>(Result[Order[i]])); - if (0 != status) { - break; - } - } - for (size_t i = 0; i < count; ++i) { - if (Result[i]) { - *out << Result[i] << "\n"; - } else { - *out << "File `" << frames[i].File << "` not found\n"; - } - } + int status = backtrace_pcinfo( + state, + reinterpret_cast<uintptr_t>(frames[Order[i]].Address) - 1, // last byte of the call instruction + HandleLibBacktraceFrame, + HandleLibBacktraceError, + reinterpret_cast<void*>(Result[Order[i]])); + if (0 != status) { + break; } } -}
\ No newline at end of file + for (size_t i = 0; i < count; ++i) { + if (Result[i]) { + *out << Result[i] << "\n"; + } else { + *out << "File `" << frames[i].File << "` not found\n"; + } + } +} +} // namespace NBacktrace +} // namespace NYql diff --git a/yql/essentials/utils/backtrace/ut/ya.make b/yql/essentials/utils/backtrace/ut/ya.make index c54c56275a9..f81359303ce 100644 --- a/yql/essentials/utils/backtrace/ut/ya.make +++ b/yql/essentials/utils/backtrace/ut/ya.make @@ -1,5 +1,6 @@ UNITTEST_FOR(yql/essentials/utils/backtrace) +ENABLE(YQL_STYLE_CPP) IF (OS_LINUX AND ARCH_X86_64) SRCS( diff --git a/yql/essentials/utils/backtrace/ya.make b/yql/essentials/utils/backtrace/ya.make index 30c2dd1cc11..c9457e772b7 100644 --- a/yql/essentials/utils/backtrace/ya.make +++ b/yql/essentials/utils/backtrace/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( backtrace.cpp backtrace_lib.cpp @@ -31,4 +33,5 @@ ENDIF() END() -RECURSE_FOR_TESTS(ut)
\ No newline at end of file +RECURSE_FOR_TESTS(ut) + diff --git a/yql/essentials/utils/cast.h b/yql/essentials/utils/cast.h index dac85080922..5d8dd124d8e 100644 --- a/yql/essentials/utils/cast.h +++ b/yql/essentials/utils/cast.h @@ -3,7 +3,7 @@ namespace NYql { -template<class T, class F> +template <class T, class F> [[nodiscard]] inline T EnsureDynamicCast(F from) { YQL_ENSURE(from, "source should not be null"); diff --git a/yql/essentials/utils/chunked_buffer.cpp b/yql/essentials/utils/chunked_buffer.cpp index 7ae66d04efc..75b86c2ad42 100644 --- a/yql/essentials/utils/chunked_buffer.cpp +++ b/yql/essentials/utils/chunked_buffer.cpp @@ -114,4 +114,4 @@ TChunkedBuffer CopyData(TChunkedBuffer&& src) { return result; } -} +} // namespace NYql diff --git a/yql/essentials/utils/chunked_buffer.h b/yql/essentials/utils/chunked_buffer.h index 684e185fbee..8794ab8f5af 100644 --- a/yql/essentials/utils/chunked_buffer.h +++ b/yql/essentials/utils/chunked_buffer.h @@ -50,11 +50,12 @@ private: size_t Size_ = 0; }; -class TChunkedBufferOutput : public IOutputStream { +class TChunkedBufferOutput: public IOutputStream { public: explicit TChunkedBufferOutput(TChunkedBuffer& dst); + private: - virtual void DoWrite(const void *buf, size_t len) override; + virtual void DoWrite(const void* buf, size_t len) override; TChunkedBuffer& Dst_; }; @@ -62,4 +63,4 @@ private: TChunkedBuffer CopyData(const TChunkedBuffer& src); TChunkedBuffer CopyData(TChunkedBuffer&& src); -} +} // namespace NYql diff --git a/yql/essentials/utils/debug_info.cpp b/yql/essentials/utils/debug_info.cpp index ff56166695e..bfb390864fe 100644 --- a/yql/essentials/utils/debug_info.cpp +++ b/yql/essentials/utils/debug_info.cpp @@ -7,25 +7,22 @@ #include <string.h> - namespace NYql { static const size_t OPERATION_ID_MAX_LENGTH = 24; static const size_t THREAD_NAME_MAX_LENGTH = 16; - struct TDebugInfo { char OperationId[OPERATION_ID_MAX_LENGTH + 1]; }; Y_POD_THREAD(TDebugInfo) TlsDebugInfo; - void SetCurrentOperationId(const char* operationId) { size_t len = strlcpy( - (&TlsDebugInfo)->OperationId, - operationId, - OPERATION_ID_MAX_LENGTH); + (&TlsDebugInfo)->OperationId, + operationId, + OPERATION_ID_MAX_LENGTH); const char* threadName = nullptr; if (len > THREAD_NAME_MAX_LENGTH) { @@ -38,11 +35,11 @@ void SetCurrentOperationId(const char* operationId) { long GetRunnigThreadsCount() { TString procStat = TFileInput("/proc/self/stat").ReadAll(); - long num_threads = -2; // Number of threads in this process (since Linux 2.6) + long num_threads = -2; // Number of threads in this process (since Linux 2.6) int n = sscanf(procStat.data(), - "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %*u %*u %*d %*d %*d %*d %ld", - &num_threads); + "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %*u %*u %*d %*d %*d %*d %ld", + &num_threads); return n == 1 ? num_threads : -2; } diff --git a/yql/essentials/utils/debug_info.h b/yql/essentials/utils/debug_info.h index 3e2a55140b1..40502060426 100644 --- a/yql/essentials/utils/debug_info.h +++ b/yql/essentials/utils/debug_info.h @@ -1,6 +1,5 @@ #pragma once - namespace NYql { void SetCurrentOperationId(const char* operationId); diff --git a/yql/essentials/utils/docs/link.cpp b/yql/essentials/utils/docs/link.cpp index 2fd5321ff12..af3d648db9e 100644 --- a/yql/essentials/utils/docs/link.cpp +++ b/yql/essentials/utils/docs/link.cpp @@ -11,102 +11,102 @@ namespace NYql::NDocs { - TLinkTarget TLinkTarget::Parse(TStringBuf string) { - static const RE2 Regex(R"re(([^#?()]*)(#[^?()]*)?)re"); +TLinkTarget TLinkTarget::Parse(TStringBuf string) { + static const RE2 Regex(R"re(([^#?()]*)(#[^?()]*)?)re"); - TString path; - TString anchor; - if (RE2::FullMatch(string, Regex, &path, &anchor)) { - if (!anchor.empty()) { - YQL_ENSURE(anchor.StartsWith('#')); - anchor.erase(0, 1); - } - - return { - .RelativePath = path, - .Anchor = !anchor.empty() ? TMaybe<TString>(anchor) : Nothing(), - }; + TString path; + TString anchor; + if (RE2::FullMatch(string, Regex, &path, &anchor)) { + if (!anchor.empty()) { + YQL_ENSURE(anchor.StartsWith('#')); + anchor.erase(0, 1); } - throw yexception() - << "invalid link target '" << string << "': " - << "does not match regex '" << Regex.pattern() << "'"; + return { + .RelativePath = path, + .Anchor = !anchor.empty() ? TMaybe<TString>(anchor) : Nothing(), + }; } - TMaybe<TLinkTarget> LookupUDF(const TLinks& links, TStringBuf name) { - const auto udf = SplitUDF(TString(name)); - YQL_ENSURE(udf, "Invalid UDF: " << name); + throw yexception() + << "invalid link target '" << string << "': " + << "does not match regex '" << Regex.pattern() << "'"; +} - const auto [module, function] = *udf; +TMaybe<TLinkTarget> LookupUDF(const TLinks& links, TStringBuf name) { + const auto udf = SplitUDF(TString(name)); + YQL_ENSURE(udf, "Invalid UDF: " << name); - if (const TLinkTarget* target = nullptr; - (target = links.FindPtr(module + "::" + function)) || - (target = links.FindPtr(module + "::" + "*"))) { - return *target; - } + const auto [module, function] = *udf; - return Nothing(); + if (const TLinkTarget* target = nullptr; + (target = links.FindPtr(module + "::" + function)) || + (target = links.FindPtr(module + "::" + "*"))) { + return *target; } - TMaybe<TLinkTarget> LookupBasic(const TLinks& links, TStringBuf name) { - TMaybe<TLinkKey> key = NormalizedName(TString(name)); - if (!key) { - return Nothing(); - } - - if (const TLinkTarget* target = links.FindPtr(*key)) { - return *target; - } + return Nothing(); +} +TMaybe<TLinkTarget> LookupBasic(const TLinks& links, TStringBuf name) { + TMaybe<TLinkKey> key = NormalizedName(TString(name)); + if (!key) { return Nothing(); } - TMaybe<TLinkTarget> Lookup(const TLinks& links, TStringBuf name) { - if (IsUDF(name)) { - return LookupUDF(links, name); - } + if (const TLinkTarget* target = links.FindPtr(*key)) { + return *target; + } + + return Nothing(); +} - return LookupBasic(links, name); +TMaybe<TLinkTarget> Lookup(const TLinks& links, TStringBuf name) { + if (IsUDF(name)) { + return LookupUDF(links, name); } - TLinkKey ParseLinkKey(TStringBuf string) { - static RE2 UDFRegex(TStringBuilder() - << "(" << NormalizedNameRegex.pattern() << ")\\:\\:(" - << "\\*|" << NormalizedNameRegex.pattern() << ")"); + return LookupBasic(links, name); +} - if (IsNormalizedName(string)) { - return TString(string); - } +TLinkKey ParseLinkKey(TStringBuf string) { + static RE2 UDFRegex(TStringBuilder() + << "(" << NormalizedNameRegex.pattern() << ")\\:\\:(" + << "\\*|" << NormalizedNameRegex.pattern() << ")"); - if (RE2::FullMatch(string, UDFRegex)) { - return TString(string); - } + if (IsNormalizedName(string)) { + return TString(string); + } - ythrow yexception() - << "invalid link key '" << string << "': " - << "does not match any regex"; + if (RE2::FullMatch(string, UDFRegex)) { + return TString(string); } - TLinks ParseLinks(const NJson::TJsonValue& json) { - TLinks links; - for (const auto& [keyString, value] : json.GetMapSafe()) { - TLinkKey key = ParseLinkKey(keyString); - TLinkTarget target = TLinkTarget::Parse(value.GetStringSafe()); - links[std::move(key)] = std::move(target); - } - return links; + ythrow yexception() + << "invalid link key '" << string << "': " + << "does not match any regex"; +} + +TLinks ParseLinks(const NJson::TJsonValue& json) { + TLinks links; + for (const auto& [keyString, value] : json.GetMapSafe()) { + TLinkKey key = ParseLinkKey(keyString); + TLinkTarget target = TLinkTarget::Parse(value.GetStringSafe()); + links[std::move(key)] = std::move(target); } + return links; +} - TLinks Merge(TLinks&& lhs, TLinks&& rhs) { - for (auto& [k, v] : rhs) { - YQL_ENSURE( - !lhs.contains(k), - "Duplicate '" << k << "', old '" << lhs[k] << "', new '" << v << "'"); +TLinks Merge(TLinks&& lhs, TLinks&& rhs) { + for (auto& [k, v] : rhs) { + YQL_ENSURE( + !lhs.contains(k), + "Duplicate '" << k << "', old '" << lhs[k] << "', new '" << v << "'"); - lhs[k] = std::move(v); - } - return lhs; + lhs[k] = std::move(v); } + return lhs; +} } // namespace NYql::NDocs diff --git a/yql/essentials/utils/docs/link.h b/yql/essentials/utils/docs/link.h index 961f460b99a..70e91d99a27 100644 --- a/yql/essentials/utils/docs/link.h +++ b/yql/essentials/utils/docs/link.h @@ -7,23 +7,23 @@ namespace NYql::NDocs { - struct TLinkTarget { - TString RelativePath; - TMaybe<TString> Anchor; +struct TLinkTarget { + TString RelativePath; + TMaybe<TString> Anchor; - static TLinkTarget Parse(TStringBuf string); - }; + static TLinkTarget Parse(TStringBuf string); +}; - using TLinkKey = TString; +using TLinkKey = TString; - using TLinks = THashMap<TLinkKey, TLinkTarget>; +using TLinks = THashMap<TLinkKey, TLinkTarget>; - TMaybe<TLinkTarget> Lookup(const TLinks& links, TStringBuf name); +TMaybe<TLinkTarget> Lookup(const TLinks& links, TStringBuf name); - TLinkKey ParseLinkKey(TStringBuf string); +TLinkKey ParseLinkKey(TStringBuf string); - TLinks ParseLinks(const NJson::TJsonValue& json); +TLinks ParseLinks(const NJson::TJsonValue& json); - TLinks Merge(TLinks&& lhs, TLinks&& rhs); +TLinks Merge(TLinks&& lhs, TLinks&& rhs); } // namespace NYql::NDocs diff --git a/yql/essentials/utils/docs/link_page.cpp b/yql/essentials/utils/docs/link_page.cpp index eb71979462d..985410fafd5 100644 --- a/yql/essentials/utils/docs/link_page.cpp +++ b/yql/essentials/utils/docs/link_page.cpp @@ -7,110 +7,110 @@ namespace NYql::NDocs { - TMaybe<TString> MatchSingleFunctionHeader(TStringBuf header) { - return NormalizedName(TString(header)); - } +TMaybe<TString> MatchSingleFunctionHeader(TStringBuf header) { + return NormalizedName(TString(header)); +} - TVector<TString> SplitBy(TStringBuf delim, const TVector<TString>& strings) { - TVector<TString> parts; - for (const TString& s : strings) { - StringSplitter(s).SplitByString(delim).AddTo(&parts); - } - return parts; +TVector<TString> SplitBy(TStringBuf delim, const TVector<TString>& strings) { + TVector<TString> parts; + for (const TString& s : strings) { + StringSplitter(s).SplitByString(delim).AddTo(&parts); } + return parts; +} - TVector<TString> SplitByPunctuation(TStringBuf header) { - TVector<TString> parts = {TString(header)}; - parts = SplitBy(" и ", parts); - parts = SplitBy(" / ", parts); - parts = SplitBy(", ", parts); - return parts; - } +TVector<TString> SplitByPunctuation(TStringBuf header) { + TVector<TString> parts = {TString(header)}; + parts = SplitBy(" и ", parts); + parts = SplitBy(" / ", parts); + parts = SplitBy(", ", parts); + return parts; +} - TVector<TString> MatchMultiFunctionHeader(TStringBuf header) { - TVector<TString> names = SplitByPunctuation(header); - - for (TString& name : names) { - TMaybe<TString> normalized = NormalizedName(std::move(name)); - if (!normalized) { - return {}; - } +TVector<TString> MatchMultiFunctionHeader(TStringBuf header) { + TVector<TString> names = SplitByPunctuation(header); - name = std::move(*normalized); + for (TString& name : names) { + TMaybe<TString> normalized = NormalizedName(std::move(name)); + if (!normalized) { + return {}; } - return names; + name = std::move(*normalized); } - TVector<TString> ExtractNormalized(TStringBuf header) { - if (auto single = MatchSingleFunctionHeader(header)) { - return {*single}; - } - if (auto multi = MatchMultiFunctionHeader(header)) { - return multi; - } - return {}; - } + return names; +} - void EnrichFromMarkdown(TLinks& links, const TString& path, const TMarkdownHeader& header) { - for (const TString& name : ExtractNormalized(header.Content)) { - links[name] = { - .RelativePath = path, - .Anchor = header.Anchor, - }; - } +TVector<TString> ExtractNormalized(TStringBuf header) { + if (auto single = MatchSingleFunctionHeader(header)) { + return {*single}; + } + if (auto multi = MatchMultiFunctionHeader(header)) { + return multi; } + return {}; +} - void EnrichFromMarkdown(TLinks& links, const TString& path, const TMarkdownPage& page) { - for (const auto& [anchor, section] : page.SectionsByAnchor) { - const TMarkdownHeader& header = section.Header; - EnrichFromMarkdown(links, path, header); - } +void EnrichFromMarkdown(TLinks& links, const TString& path, const TMarkdownHeader& header) { + for (const TString& name : ExtractNormalized(header.Content)) { + links[name] = { + .RelativePath = path, + .Anchor = header.Anchor, + }; } +} - void EnrichFromMarkdown(TLinks& links, const TPages& pages) { - for (const auto& [path, page] : pages) { - EnrichFromMarkdown(links, path, page); - } +void EnrichFromMarkdown(TLinks& links, const TString& path, const TMarkdownPage& page) { + for (const auto& [anchor, section] : page.SectionsByAnchor) { + const TMarkdownHeader& header = section.Header; + EnrichFromMarkdown(links, path, header); } +} - TLinks GetLinksFromPages(const TPages& pages) { - TLinks links; - EnrichFromMarkdown(links, pages); - return links; +void EnrichFromMarkdown(TLinks& links, const TPages& pages) { + for (const auto& [path, page] : pages) { + EnrichFromMarkdown(links, path, page); } +} - TPages Stripped(TPages&& pages, const TLinks& links) { - THashSet<TString> usedPaths; - THashMap<TString, THashSet<TString>> usedAnchors; - for (const auto& [_, link] : links) { - TString anchor = link.Anchor.GetOrElse(""); - usedAnchors[link.RelativePath].emplace(std::move(anchor)); - } +TLinks GetLinksFromPages(const TPages& pages) { + TLinks links; + EnrichFromMarkdown(links, pages); + return links; +} - THashSet<TString> unusedPaths; - THashMap<TString, THashSet<TString>> unusedAnchors; - for (const auto& [path, page] : pages) { - for (const auto& [anchor, _] : page.SectionsByAnchor) { - if (!usedAnchors.contains(path)) { - unusedPaths.emplace(path); - } else if (!usedAnchors[path].contains(anchor)) { - unusedAnchors[path].emplace(anchor); - } - } - } +TPages Stripped(TPages&& pages, const TLinks& links) { + THashSet<TString> usedPaths; + THashMap<TString, THashSet<TString>> usedAnchors; + for (const auto& [_, link] : links) { + TString anchor = link.Anchor.GetOrElse(""); + usedAnchors[link.RelativePath].emplace(std::move(anchor)); + } - for (const auto& [path, anchors] : unusedAnchors) { - for (const auto& anchor : anchors) { - pages[path].SectionsByAnchor.erase(anchor); + THashSet<TString> unusedPaths; + THashMap<TString, THashSet<TString>> unusedAnchors; + for (const auto& [path, page] : pages) { + for (const auto& [anchor, _] : page.SectionsByAnchor) { + if (!usedAnchors.contains(path)) { + unusedPaths.emplace(path); + } else if (!usedAnchors[path].contains(anchor)) { + unusedAnchors[path].emplace(anchor); } } + } - for (const auto& path : unusedPaths) { - pages.erase(path); + for (const auto& [path, anchors] : unusedAnchors) { + for (const auto& anchor : anchors) { + pages[path].SectionsByAnchor.erase(anchor); } + } - return pages; + for (const auto& path : unusedPaths) { + pages.erase(path); } + return pages; +} + } // namespace NYql::NDocs diff --git a/yql/essentials/utils/docs/link_page.h b/yql/essentials/utils/docs/link_page.h index beeb1252ef2..532da3fc539 100644 --- a/yql/essentials/utils/docs/link_page.h +++ b/yql/essentials/utils/docs/link_page.h @@ -5,8 +5,8 @@ namespace NYql::NDocs { - TLinks GetLinksFromPages(const TPages& pages); +TLinks GetLinksFromPages(const TPages& pages); - TPages Stripped(TPages&& pages, const TLinks& links); +TPages Stripped(TPages&& pages, const TLinks& links); } // namespace NYql::NDocs diff --git a/yql/essentials/utils/docs/markdown.cpp b/yql/essentials/utils/docs/markdown.cpp index fd13820b8c4..fb4209717e4 100644 --- a/yql/essentials/utils/docs/markdown.cpp +++ b/yql/essentials/utils/docs/markdown.cpp @@ -9,127 +9,127 @@ namespace NYql::NDocs { - class TMarkdownParser { - private: - static constexpr TStringBuf HeaderRegex = R"re(([^#]+)(\s+{#([a-z0-9\-_]+)})?)re"; +class TMarkdownParser { +private: + static constexpr TStringBuf HeaderRegex = R"re(([^#]+)(\s+{#([a-z0-9\-_]+)})?)re"; - public: - explicit TMarkdownParser(size_t headerDepth) - : HeaderDepth_(headerDepth) - , SectionHeaderRegex_(" *" + TString(HeaderDepth_, '#') + " " + HeaderRegex) - , IsSkipping_(true) - { - } +public: + explicit TMarkdownParser(size_t headerDepth) + : HeaderDepth_(headerDepth) + , SectionHeaderRegex_(" *" + TString(HeaderDepth_, '#') + " " + HeaderRegex) + , IsSkipping_(true) + { + } - void Parse(IInputStream& markdown, TMarkdownCallback&& onSection) { - for (TString line; markdown.ReadLine(line) != 0;) { - size_t depth = HeaderDepth(line); - if (IsSkipping_) { - if (HeaderDepth_ == depth) { - ResetSection(std::move(line)); - IsSkipping_ = false; - } else { - // Skip - } + void Parse(IInputStream& markdown, TMarkdownCallback&& onSection) { + for (TString line; markdown.ReadLine(line) != 0;) { + size_t depth = HeaderDepth(line); + if (IsSkipping_) { + if (HeaderDepth_ == depth) { + ResetSection(std::move(line)); + IsSkipping_ = false; } else { - if (HeaderDepth_ == depth) { - onSection(std::move(Section_)); - ResetSection(std::move(line)); - } else if (depth == 0 || HeaderDepth_ < depth) { - line.append('\n'); - Section_.Body.append(std::move(line)); - } else { - onSection(std::move(Section_)); - IsSkipping_ = true; - } + // Skip + } + } else { + if (HeaderDepth_ == depth) { + onSection(std::move(Section_)); + ResetSection(std::move(line)); + } else if (depth == 0 || HeaderDepth_ < depth) { + line.append('\n'); + Section_.Body.append(std::move(line)); + } else { + onSection(std::move(Section_)); + IsSkipping_ = true; } - } - - if (!IsSkipping_) { - onSection(std::move(Section_)); } } - private: - void ResetSection(TString&& line) { - Section_ = TMarkdownSection(); + if (!IsSkipping_) { + onSection(std::move(Section_)); + } + } - TString content; - std::optional<TString> dummy; - std::optional<TString> anchor; - if (!RE2::FullMatch(line, SectionHeaderRegex_, &content, &dummy, &anchor)) { - Section_.Header.Content = std::move(line); - return; - } +private: + void ResetSection(TString&& line) { + Section_ = TMarkdownSection(); - Section_.Header.Content = std::move(content); - if (anchor) { - Section_.Header.Anchor = std::move(*anchor); - } + TString content; + std::optional<TString> dummy; + std::optional<TString> anchor; + if (!RE2::FullMatch(line, SectionHeaderRegex_, &content, &dummy, &anchor)) { + Section_.Header.Content = std::move(line); + return; } - size_t HeaderDepth(TStringBuf line) const { - while (line.StartsWith(' ') || line.StartsWith('\t')) { - line.Skip(1); - } + Section_.Header.Content = std::move(content); + if (anchor) { + Section_.Header.Anchor = std::move(*anchor); + } + } - if (!line.StartsWith('#')) { - return 0; - } + size_t HeaderDepth(TStringBuf line) const { + while (line.StartsWith(' ') || line.StartsWith('\t')) { + line.Skip(1); + } - size_t begin = line.find('#'); - size_t end = line.find_first_not_of('#', begin); - return end != TStringBuf::npos ? (end - begin) : 0; + if (!line.StartsWith('#')) { + return 0; } - size_t HeaderDepth_; - RE2 SectionHeaderRegex_; - bool IsSkipping_; - TMarkdownSection Section_; - }; + size_t begin = line.find('#'); + size_t end = line.find_first_not_of('#', begin); + return end != TStringBuf::npos ? (end - begin) : 0; + } - TMaybe<TString> Anchor(const TMarkdownHeader& header) { - static RE2 Regex(R"re([0-9a-z\-_]+)re"); + size_t HeaderDepth_; + RE2 SectionHeaderRegex_; + bool IsSkipping_; + TMarkdownSection Section_; +}; - if (header.Anchor) { - return header.Anchor; - } +TMaybe<TString> Anchor(const TMarkdownHeader& header) { + static RE2 Regex(R"re([0-9a-z\-_]+)re"); - TString content = ToLowerUTF8(header.Content); - SubstGlobal(content, ' ', '-'); + if (header.Anchor) { + return header.Anchor; + } - if (RE2::FullMatch(content, Regex)) { - return content; - } + TString content = ToLowerUTF8(header.Content); + SubstGlobal(content, ' ', '-'); - return Nothing(); + if (RE2::FullMatch(content, Regex)) { + return content; } - TMarkdownPage ParseMarkdownPage(TString markdown) { - TMarkdownPage page; + return Nothing(); +} - const auto onSection = [&](TMarkdownSection&& section) { - if (TMaybe<TString> anchor = Anchor(section.Header)) { - section.Header.Anchor = anchor; - page.SectionsByAnchor[*anchor] = std::move(section); - } - }; +TMarkdownPage ParseMarkdownPage(TString markdown) { + TMarkdownPage page; - { - TMarkdownParser parser(/*headerDepth=*/2); - TStringStream stream(markdown); - parser.Parse(stream, onSection); - } - - { - TMarkdownParser parser(/*headerDepth=*/3); - TStringStream stream(markdown); - parser.Parse(stream, onSection); + const auto onSection = [&](TMarkdownSection&& section) { + if (TMaybe<TString> anchor = Anchor(section.Header)) { + section.Header.Anchor = anchor; + page.SectionsByAnchor[*anchor] = std::move(section); } + }; - page.Text = std::move(markdown); + { + TMarkdownParser parser(/*headerDepth=*/2); + TStringStream stream(markdown); + parser.Parse(stream, onSection); + } - return page; + { + TMarkdownParser parser(/*headerDepth=*/3); + TStringStream stream(markdown); + parser.Parse(stream, onSection); } + page.Text = std::move(markdown); + + return page; +} + } // namespace NYql::NDocs diff --git a/yql/essentials/utils/docs/markdown.h b/yql/essentials/utils/docs/markdown.h index 6b5477ac253..ebc9707f52a 100644 --- a/yql/essentials/utils/docs/markdown.h +++ b/yql/essentials/utils/docs/markdown.h @@ -7,23 +7,23 @@ namespace NYql::NDocs { - struct TMarkdownHeader { - TString Content; - TMaybe<TString> Anchor; - }; +struct TMarkdownHeader { + TString Content; + TMaybe<TString> Anchor; +}; - struct TMarkdownSection { - TMarkdownHeader Header; - TString Body; - }; +struct TMarkdownSection { + TMarkdownHeader Header; + TString Body; +}; - struct TMarkdownPage { - TString Text; - THashMap<TString, TMarkdownSection> SectionsByAnchor; - }; +struct TMarkdownPage { + TString Text; + THashMap<TString, TMarkdownSection> SectionsByAnchor; +}; - using TMarkdownCallback = std::function<void(TMarkdownSection&&)>; +using TMarkdownCallback = std::function<void(TMarkdownSection&&)>; - TMarkdownPage ParseMarkdownPage(TString markdown); +TMarkdownPage ParseMarkdownPage(TString markdown); } // namespace NYql::NDocs diff --git a/yql/essentials/utils/docs/markdown_ut.cpp b/yql/essentials/utils/docs/markdown_ut.cpp index 45e45e645a5..7aee0e3deac 100644 --- a/yql/essentials/utils/docs/markdown_ut.cpp +++ b/yql/essentials/utils/docs/markdown_ut.cpp @@ -6,8 +6,8 @@ using namespace NYql::NDocs; Y_UNIT_TEST_SUITE(MarkdownParserTests) { - Y_UNIT_TEST(ParseMarkdown) { - TString markdown = R"( +Y_UNIT_TEST(ParseMarkdown) { + TString markdown = R"( # Basic built-in functions Below are the general-purpose functions. @@ -51,27 +51,27 @@ SELECT FROM my_table; ``` )"; - TMarkdownPage page = ParseMarkdownPage(markdown); + TMarkdownPage page = ParseMarkdownPage(markdown); - UNIT_ASSERT_VALUES_EQUAL(page.SectionsByAnchor.size(), 2); + UNIT_ASSERT_VALUES_EQUAL(page.SectionsByAnchor.size(), 2); - const auto& coelcese = page.SectionsByAnchor["coalesce"]; - UNIT_ASSERT_STRING_CONTAINS(coelcese.Header.Content, "COALESCE"); - UNIT_ASSERT_VALUES_EQUAL(coelcese.Header.Anchor, "coalesce"); - UNIT_ASSERT_STRING_CONTAINS(coelcese.Body, "Iterates"); - UNIT_ASSERT_STRING_CONTAINS(coelcese.Body, "COALESCE"); - UNIT_ASSERT_GE(Count(coelcese.Body, '\n'), 5); + const auto& coelcese = page.SectionsByAnchor["coalesce"]; + UNIT_ASSERT_STRING_CONTAINS(coelcese.Header.Content, "COALESCE"); + UNIT_ASSERT_VALUES_EQUAL(coelcese.Header.Anchor, "coalesce"); + UNIT_ASSERT_STRING_CONTAINS(coelcese.Body, "Iterates"); + UNIT_ASSERT_STRING_CONTAINS(coelcese.Body, "COALESCE"); + UNIT_ASSERT_GE(Count(coelcese.Body, '\n'), 5); - const auto& random = page.SectionsByAnchor["random"]; - UNIT_ASSERT_STRING_CONTAINS(random.Header.Content, "Random"); - UNIT_ASSERT_VALUES_EQUAL(random.Header.Anchor, "random"); - UNIT_ASSERT_STRING_CONTAINS(random.Body, "Generates"); - UNIT_ASSERT_STRING_CONTAINS(random.Body, "Random"); - UNIT_ASSERT_GE(Count(random.Body, '\n'), 5); - } + const auto& random = page.SectionsByAnchor["random"]; + UNIT_ASSERT_STRING_CONTAINS(random.Header.Content, "Random"); + UNIT_ASSERT_VALUES_EQUAL(random.Header.Anchor, "random"); + UNIT_ASSERT_STRING_CONTAINS(random.Body, "Generates"); + UNIT_ASSERT_STRING_CONTAINS(random.Body, "Random"); + UNIT_ASSERT_GE(Count(random.Body, '\n'), 5); +} - Y_UNIT_TEST(NestedSections) { - TString markdown = R"( +Y_UNIT_TEST(NestedSections) { + TString markdown = R"( # Section 1 {#s1} Section 1 Text. ## Subsection 1 {#s1s1} @@ -89,19 +89,19 @@ Subsection 2.2.1 Text. # Section 3 {#s3} Section 3 Text. )"; - TMarkdownPage page = ParseMarkdownPage(markdown); - { - const TMarkdownSection& section = page.SectionsByAnchor["s1s2"]; - UNIT_ASSERT_STRING_CONTAINS(section.Body, "Subsection 1.2 Text."); - UNIT_ASSERT_C(!section.Body.Contains("Section 1 Text."), section.Body); - UNIT_ASSERT_C(!section.Body.Contains("Section 2 Text."), section.Body); - UNIT_ASSERT_C(!section.Body.Contains("Section 3 Text."), section.Body); - } - { - const TMarkdownSection& section = page.SectionsByAnchor["s2s2s1"]; - UNIT_ASSERT_STRING_CONTAINS(section.Body, "Subsection 2.2.1 Text."); - UNIT_ASSERT_C(!section.Body.Contains("Section 3 Text."), section.Body); - } + TMarkdownPage page = ParseMarkdownPage(markdown); + { + const TMarkdownSection& section = page.SectionsByAnchor["s1s2"]; + UNIT_ASSERT_STRING_CONTAINS(section.Body, "Subsection 1.2 Text."); + UNIT_ASSERT_C(!section.Body.Contains("Section 1 Text."), section.Body); + UNIT_ASSERT_C(!section.Body.Contains("Section 2 Text."), section.Body); + UNIT_ASSERT_C(!section.Body.Contains("Section 3 Text."), section.Body); + } + { + const TMarkdownSection& section = page.SectionsByAnchor["s2s2s1"]; + UNIT_ASSERT_STRING_CONTAINS(section.Body, "Subsection 2.2.1 Text."); + UNIT_ASSERT_C(!section.Body.Contains("Section 3 Text."), section.Body); } +} } // Y_UNIT_TEST_SUITE(MarkdownParserTests) diff --git a/yql/essentials/utils/docs/name.cpp b/yql/essentials/utils/docs/name.cpp index 570bf23f313..9ebf04d06bd 100644 --- a/yql/essentials/utils/docs/name.cpp +++ b/yql/essentials/utils/docs/name.cpp @@ -7,43 +7,43 @@ namespace NYql::NDocs { - const RE2 NormalizedNameRegex(R"re([a-z_]{1,2}[a-z0-9]*)re"); +const RE2 NormalizedNameRegex(R"re([a-z_]{1,2}[a-z0-9]*)re"); - bool IsNormalizedName(TStringBuf name) { - return RE2::FullMatch(name, NormalizedNameRegex); +bool IsNormalizedName(TStringBuf name) { + return RE2::FullMatch(name, NormalizedNameRegex); +} + +TMaybe<TString> NormalizedName(TString name) { + if (TMaybe<TIssue> issue = NormalizeName(TPosition(), name)) { + return Nothing(); } - TMaybe<TString> NormalizedName(TString name) { - if (TMaybe<TIssue> issue = NormalizeName(TPosition(), name)) { - return Nothing(); - } + if (!IsNormalizedName(name)) { + return Nothing(); + } - if (!IsNormalizedName(name)) { - return Nothing(); - } + return name; +} - return name; - } +bool IsUDF(TStringBuf name) { + return name.Contains("::"); +} - bool IsUDF(TStringBuf name) { - return name.Contains("::"); +TMaybe<std::pair<TString, TString>> SplitUDF(TString name) { + if (!IsUDF(name)) { + return Nothing(); } - TMaybe<std::pair<TString, TString>> SplitUDF(TString name) { - if (!IsUDF(name)) { - return Nothing(); - } + TVector<TString> words; + words.reserve(2); + StringSplitter(name).SplitByString("::").Collect(&words); + YQL_ENSURE(words.size() == 2, "Invalid UDF pattern: " << name); - TVector<TString> words; - words.reserve(2); - StringSplitter(name).SplitByString("::").Collect(&words); - YQL_ENSURE(words.size() == 2, "Invalid UDF pattern: " << name); + TMaybe<TString> module = NormalizedName(std::move(words[0])); + TMaybe<TString> function = NormalizedName(std::move(words[1])); + YQL_ENSURE(module && function, "Unable to normalize " << name); - TMaybe<TString> module = NormalizedName(std::move(words[0])); - TMaybe<TString> function = NormalizedName(std::move(words[1])); - YQL_ENSURE(module && function, "Unable to normalize " << name); - - return std::make_pair(*module, *function); - } + return std::make_pair(*module, *function); +} } // namespace NYql::NDocs diff --git a/yql/essentials/utils/docs/name.h b/yql/essentials/utils/docs/name.h index 3ab303ca23f..0328801b76b 100644 --- a/yql/essentials/utils/docs/name.h +++ b/yql/essentials/utils/docs/name.h @@ -6,14 +6,14 @@ namespace NYql::NDocs { - extern const RE2 NormalizedNameRegex; +extern const RE2 NormalizedNameRegex; - bool IsNormalizedName(TStringBuf name); +bool IsNormalizedName(TStringBuf name); - TMaybe<TString> NormalizedName(TString name); +TMaybe<TString> NormalizedName(TString name); - bool IsUDF(TStringBuf name); +bool IsUDF(TStringBuf name); - TMaybe<std::pair<TString, TString>> SplitUDF(TString name); +TMaybe<std::pair<TString, TString>> SplitUDF(TString name); } // namespace NYql::NDocs diff --git a/yql/essentials/utils/docs/page.cpp b/yql/essentials/utils/docs/page.cpp index 516b8b2886e..f0b1671a9dd 100644 --- a/yql/essentials/utils/docs/page.cpp +++ b/yql/essentials/utils/docs/page.cpp @@ -6,71 +6,71 @@ namespace NYql::NDocs { - TString ResolvedMarkdownText(TStringBuf relativePath, TString text, TStringBuf baseURL) { - static const RE2 anchorRegex(R"re(\[([^\\\]]+)\]\((#[^\\)]+)\))re"); - static const RE2 linkRegex(R"re(\[([^\\\]]+)\]\(([A-Za-z0-9/_\-\.]+).md(#[^\\)]+)?\))re"); +TString ResolvedMarkdownText(TStringBuf relativePath, TString text, TStringBuf baseURL) { + static const RE2 anchorRegex(R"re(\[([^\\\]]+)\]\((#[^\\)]+)\))re"); + static const RE2 linkRegex(R"re(\[([^\\\]]+)\]\(([A-Za-z0-9/_\-\.]+).md(#[^\\)]+)?\))re"); - TString base = TString(baseURL) + "/" + TString(relativePath); - TString anchorRewrite = "[\\1](" + base + "\\2)"; - TString linkRewrite = "[\\1](" + base + "/../" + "\\2\\3)"; + TString base = TString(baseURL) + "/" + TString(relativePath); + TString anchorRewrite = "[\\1](" + base + "\\2)"; + TString linkRewrite = "[\\1](" + base + "/../" + "\\2\\3)"; - TString error; - YQL_ENSURE( - anchorRegex.CheckRewriteString(anchorRewrite, &error), - "Bad rewrite '" << anchorRewrite << "': " << error); - YQL_ENSURE( - linkRegex.CheckRewriteString(linkRewrite, &error), - "Bad rewrite '" << linkRewrite << "': " << error); + TString error; + YQL_ENSURE( + anchorRegex.CheckRewriteString(anchorRewrite, &error), + "Bad rewrite '" << anchorRewrite << "': " << error); + YQL_ENSURE( + linkRegex.CheckRewriteString(linkRewrite, &error), + "Bad rewrite '" << linkRewrite << "': " << error); - RE2::GlobalReplace(&text, anchorRegex, anchorRewrite); - RE2::GlobalReplace(&text, linkRegex, linkRewrite); + RE2::GlobalReplace(&text, anchorRegex, anchorRewrite); + RE2::GlobalReplace(&text, linkRegex, linkRewrite); - return text; - } + return text; +} - TMarkdownPage Resolved(TStringBuf relativePath, TMarkdownPage page, TStringBuf baseURL) { - page.Text = ResolvedMarkdownText(relativePath, page.Text, baseURL); - for (auto& [_, section] : page.SectionsByAnchor) { - section.Body = ResolvedMarkdownText(relativePath, std::move(section.Body), baseURL); - } - return page; +TMarkdownPage Resolved(TStringBuf relativePath, TMarkdownPage page, TStringBuf baseURL) { + page.Text = ResolvedMarkdownText(relativePath, page.Text, baseURL); + for (auto& [_, section] : page.SectionsByAnchor) { + section.Body = ResolvedMarkdownText(relativePath, std::move(section.Body), baseURL); } + return page; +} - TString ExtendedSyntaxRemoved(TString text) { - static const RE2 regex(R"re( *{%[^\\]*?%} *\n?)re"); - RE2::GlobalReplace(&text, regex, ""); - return text; - } +TString ExtendedSyntaxRemoved(TString text) { + static const RE2 regex(R"re( *{%[^\\]*?%} *\n?)re"); + RE2::GlobalReplace(&text, regex, ""); + return text; +} - TMarkdownPage ExtendedSyntaxRemoved(TMarkdownPage page) { - page.Text = ExtendedSyntaxRemoved(page.Text); - for (auto& [_, section] : page.SectionsByAnchor) { - section.Body = ExtendedSyntaxRemoved(std::move(section.Body)); - } - return page; +TMarkdownPage ExtendedSyntaxRemoved(TMarkdownPage page) { + page.Text = ExtendedSyntaxRemoved(page.Text); + for (auto& [_, section] : page.SectionsByAnchor) { + section.Body = ExtendedSyntaxRemoved(std::move(section.Body)); } + return page; +} - TPages ParsePages(TResourcesByRelativePath resources) { - TPages pages; - for (auto& [path, resource] : resources) { - TMarkdownPage page = ParseMarkdownPage(std::move(resource)); - pages.emplace(std::move(path), std::move(page)); - } - return pages; +TPages ParsePages(TResourcesByRelativePath resources) { + TPages pages; + for (auto& [path, resource] : resources) { + TMarkdownPage page = ParseMarkdownPage(std::move(resource)); + pages.emplace(std::move(path), std::move(page)); } + return pages; +} - TPages Resolved(TPages pages, TStringBuf baseURL) { - for (auto& [relativeURL, page] : pages) { - page = Resolved(relativeURL, std::move(page), baseURL); - } - return pages; +TPages Resolved(TPages pages, TStringBuf baseURL) { + for (auto& [relativeURL, page] : pages) { + page = Resolved(relativeURL, std::move(page), baseURL); } + return pages; +} - TPages ExtendedSyntaxRemoved(TPages pages) { - for (auto& [_, page] : pages) { - page = ExtendedSyntaxRemoved(std::move(page)); - } - return pages; +TPages ExtendedSyntaxRemoved(TPages pages) { + for (auto& [_, page] : pages) { + page = ExtendedSyntaxRemoved(std::move(page)); } + return pages; +} } // namespace NYql::NDocs diff --git a/yql/essentials/utils/docs/page.h b/yql/essentials/utils/docs/page.h index 39d569f72c8..e01939a9292 100644 --- a/yql/essentials/utils/docs/page.h +++ b/yql/essentials/utils/docs/page.h @@ -5,12 +5,12 @@ namespace NYql::NDocs { - using TPages = THashMap<TString, TMarkdownPage>; +using TPages = THashMap<TString, TMarkdownPage>; - TPages ParsePages(TResourcesByRelativePath resources); +TPages ParsePages(TResourcesByRelativePath resources); - TPages Resolved(TPages pages, TStringBuf baseURL); +TPages Resolved(TPages pages, TStringBuf baseURL); - TPages ExtendedSyntaxRemoved(TPages pages); +TPages ExtendedSyntaxRemoved(TPages pages); } // namespace NYql::NDocs diff --git a/yql/essentials/utils/docs/page_ut.cpp b/yql/essentials/utils/docs/page_ut.cpp index ede92785140..a11d7784a5b 100644 --- a/yql/essentials/utils/docs/page_ut.cpp +++ b/yql/essentials/utils/docs/page_ut.cpp @@ -6,8 +6,8 @@ using namespace NYql::NDocs; Y_UNIT_TEST_SUITE(PageTests) { - Y_UNIT_TEST(ResolveURL) { - TString markdown = R"( +Y_UNIT_TEST(ResolveURL) { + TString markdown = R"( # List of window functions in YQL The syntax for calling window functions is detailed in a @@ -35,26 +35,26 @@ If one of the compared arguments is 0.0, the function always returns false. End. )"; - TPages pages = {{"builtins/window", ParseMarkdownPage(markdown)}}; - pages = Resolved(std::move(pages), "https://ytsaurus.tech/docs/en/yql"); - pages = ExtendedSyntaxRemoved(std::move(pages)); + TPages pages = {{"builtins/window", ParseMarkdownPage(markdown)}}; + pages = Resolved(std::move(pages), "https://ytsaurus.tech/docs/en/yql"); + pages = ExtendedSyntaxRemoved(std::move(pages)); - TVector<TString> changes = { - "[separate article](https://ytsaurus.tech/docs/en/yql/builtins/window/../../syntax/window)", - "[aggregate functions](https://ytsaurus.tech/docs/en/yql/builtins/window/../aggregation)", - "[window frame](https://ytsaurus.tech/docs/en/yql/builtins/window/../../syntax/window#frame)", - "[any()](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/any/)", - }; + TVector<TString> changes = { + "[separate article](https://ytsaurus.tech/docs/en/yql/builtins/window/../../syntax/window)", + "[aggregate functions](https://ytsaurus.tech/docs/en/yql/builtins/window/../aggregation)", + "[window frame](https://ytsaurus.tech/docs/en/yql/builtins/window/../../syntax/window#frame)", + "[any()](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/any/)", + }; - UNIT_ASSERT_STRING_CONTAINS(pages["builtins/window"].Text, changes.at(0)); - UNIT_ASSERT_STRING_CONTAINS(pages["builtins/window"].Text, changes.at(1)); - UNIT_ASSERT_STRING_CONTAINS(pages["builtins/window"].Text, changes.at(2)); - UNIT_ASSERT_STRING_CONTAINS(pages["builtins/window"].Text, changes.at(3)); + UNIT_ASSERT_STRING_CONTAINS(pages["builtins/window"].Text, changes.at(0)); + UNIT_ASSERT_STRING_CONTAINS(pages["builtins/window"].Text, changes.at(1)); + UNIT_ASSERT_STRING_CONTAINS(pages["builtins/window"].Text, changes.at(2)); + UNIT_ASSERT_STRING_CONTAINS(pages["builtins/window"].Text, changes.at(3)); - UNIT_ASSERT_STRING_CONTAINS(pages["builtins/window"].Text, "the function always returns false"); - UNIT_ASSERT_STRING_CONTAINS(pages["builtins/window"].Text, "End."); - UNIT_ASSERT(!pages["builtins/window"].Text.Contains("{% note alert %}")); - UNIT_ASSERT(!pages["builtins/window"].Text.Contains("{% endnote %}")); - } + UNIT_ASSERT_STRING_CONTAINS(pages["builtins/window"].Text, "the function always returns false"); + UNIT_ASSERT_STRING_CONTAINS(pages["builtins/window"].Text, "End."); + UNIT_ASSERT(!pages["builtins/window"].Text.Contains("{% note alert %}")); + UNIT_ASSERT(!pages["builtins/window"].Text.Contains("{% endnote %}")); +} } // Y_UNIT_TEST_SUITE(PageTests) diff --git a/yql/essentials/utils/docs/resource.cpp b/yql/essentials/utils/docs/resource.cpp index 8ca724c432e..f453fe8975c 100644 --- a/yql/essentials/utils/docs/resource.cpp +++ b/yql/essentials/utils/docs/resource.cpp @@ -6,38 +6,38 @@ namespace NYql::NDocs { - bool IsMatching(const TResourceFilter& filter, TStringBuf key) { - return key.Contains(filter.BaseDirectorySuffix) && - key.EndsWith(filter.CutSuffix); - } +bool IsMatching(const TResourceFilter& filter, TStringBuf key) { + return key.Contains(filter.BaseDirectorySuffix) && + key.EndsWith(filter.CutSuffix); +} - TStringBuf RelativePath(const TResourceFilter& filter, TStringBuf key Y_LIFETIME_BOUND) { - size_t pos = key.find(filter.BaseDirectorySuffix); - YQL_ENSURE(pos != TString::npos); - pos += filter.BaseDirectorySuffix.size(); +TStringBuf RelativePath(const TResourceFilter& filter, TStringBuf key Y_LIFETIME_BOUND) { + size_t pos = key.find(filter.BaseDirectorySuffix); + YQL_ENSURE(pos != TString::npos); + pos += filter.BaseDirectorySuffix.size(); - TStringBuf tail = TStringBuf(key).SubStr(pos); - tail.remove_suffix(filter.CutSuffix.size()); - return tail; - } + TStringBuf tail = TStringBuf(key).SubStr(pos); + tail.remove_suffix(filter.CutSuffix.size()); + return tail; +} - TResourcesByRelativePath FindResources(const TResourceFilter& filter) { - YQL_ENSURE( - filter.BaseDirectorySuffix.EndsWith('/'), - "BaseDirectory should end with '/', but got '" << filter.BaseDirectorySuffix << "'"); +TResourcesByRelativePath FindResources(const TResourceFilter& filter) { + YQL_ENSURE( + filter.BaseDirectorySuffix.EndsWith('/'), + "BaseDirectory should end with '/', but got '" << filter.BaseDirectorySuffix << "'"); - TResourcesByRelativePath resources; - for (TStringBuf key : NResource::ListAllKeys()) { - if (!key.StartsWith("resfs/file/") || !IsMatching(filter, key)) { - continue; - } + TResourcesByRelativePath resources; + for (TStringBuf key : NResource::ListAllKeys()) { + if (!key.StartsWith("resfs/file/") || !IsMatching(filter, key)) { + continue; + } - TStringBuf path = RelativePath(filter, key); - YQL_ENSURE(!resources.contains(path)); + TStringBuf path = RelativePath(filter, key); + YQL_ENSURE(!resources.contains(path)); - resources[path] = NResource::Find(key); - } - return resources; + resources[path] = NResource::Find(key); } + return resources; +} } // namespace NYql::NDocs diff --git a/yql/essentials/utils/docs/resource.h b/yql/essentials/utils/docs/resource.h index 0221b5ed9fc..3a39fbc2d71 100644 --- a/yql/essentials/utils/docs/resource.h +++ b/yql/essentials/utils/docs/resource.h @@ -6,14 +6,14 @@ namespace NYql::NDocs { - struct TResourceFilter { - TString BaseDirectorySuffix; - TString CutSuffix; - }; +struct TResourceFilter { + TString BaseDirectorySuffix; + TString CutSuffix; +}; - using TResourcesByRelativePath = THashMap<TString, TString>; +using TResourcesByRelativePath = THashMap<TString, TString>; - // Useful when YaTool ALL_RESOURCE_FILES macro is used. - TResourcesByRelativePath FindResources(const TResourceFilter& filter); +// Useful when YaTool ALL_RESOURCE_FILES macro is used. +TResourcesByRelativePath FindResources(const TResourceFilter& filter); } // namespace NYql::NDocs diff --git a/yql/essentials/utils/docs/ut/ya.make b/yql/essentials/utils/docs/ut/ya.make index 3afe8f60aa8..72c013d9e29 100644 --- a/yql/essentials/utils/docs/ut/ya.make +++ b/yql/essentials/utils/docs/ut/ya.make @@ -1,5 +1,7 @@ UNITTEST_FOR(yql/essentials/utils/docs) +ENABLE(YQL_STYLE_CPP) + SRCS( markdown_ut.cpp page_ut.cpp diff --git a/yql/essentials/utils/docs/verification.cpp b/yql/essentials/utils/docs/verification.cpp index 0eaedc2a3b4..13f84a16330 100644 --- a/yql/essentials/utils/docs/verification.cpp +++ b/yql/essentials/utils/docs/verification.cpp @@ -10,121 +10,121 @@ namespace NYql::NDocs { - auto Fames = { - EFame::BadLinked, - EFame::Unknown, - EFame::Mentioned, - EFame::Documented, - }; +auto Fames = { + EFame::BadLinked, + EFame::Unknown, + EFame::Mentioned, + EFame::Documented, +}; - bool IsLikelyDocumentedAt(TString text, TString name) { - SubstGlobal(text, "_", ""); +bool IsLikelyDocumentedAt(TString text, TString name) { + SubstGlobal(text, "_", ""); - TVector<TStringBuf> tokens; - Split(name, ":.", tokens); + TVector<TStringBuf> tokens; + Split(name, ":.", tokens); - for (TStringBuf token : tokens) { - YQL_ENSURE(!token.Empty()); + for (TStringBuf token : tokens) { + YQL_ENSURE(!token.Empty()); - TMaybe<TString> normalized = NormalizedName(TString(token)); - YQL_ENSURE(normalized, "Unable to normalize " << token); + TMaybe<TString> normalized = NormalizedName(TString(token)); + YQL_ENSURE(normalized, "Unable to normalize " << token); - if (TCaseInsensitiveAsciiString(text).Contains(*normalized)) { - return true; - } + if (TCaseInsensitiveAsciiString(text).Contains(*normalized)) { + return true; } - return false; } + return false; +} - void Verify(const TLinks& links, const TPages& pages, TString name, TFameReport& report) { - TMaybe<TLinkTarget> target = Lookup(links, name); - if (!target) { - report[EFame::Unknown][std::move(name)] = "Unknown"; - return; - } - - const TMarkdownPage* page = pages.FindPtr(target->RelativePath); - if (!page) { - report[EFame::BadLinked][std::move(name)] = - TStringBuilder() - << "Page '" << target->RelativePath << "' not found"; - return; - } +void Verify(const TLinks& links, const TPages& pages, TString name, TFameReport& report) { + TMaybe<TLinkTarget> target = Lookup(links, name); + if (!target) { + report[EFame::Unknown][std::move(name)] = "Unknown"; + return; + } - if (!target->Anchor && !IsLikelyDocumentedAt(page->Text, name)) { - report[EFame::BadLinked][std::move(name)] = - TStringBuilder() - << "Absent at '" << target->RelativePath << "'"; - return; - } + const TMarkdownPage* page = pages.FindPtr(target->RelativePath); + if (!page) { + report[EFame::BadLinked][std::move(name)] = + TStringBuilder() + << "Page '" << target->RelativePath << "' not found"; + return; + } - if (!target->Anchor) { - report[EFame::Mentioned][std::move(name)] = - TStringBuilder() - << "Mentioned at '" << target->RelativePath << "'"; - return; - } + if (!target->Anchor && !IsLikelyDocumentedAt(page->Text, name)) { + report[EFame::BadLinked][std::move(name)] = + TStringBuilder() + << "Absent at '" << target->RelativePath << "'"; + return; + } - const TMarkdownSection* section = page->SectionsByAnchor.FindPtr(*target->Anchor); - if (!section) { - report[EFame::BadLinked][std::move(name)] = - TStringBuilder() - << "Section '" << *target->Anchor << "' not found " - << "at '" << target->RelativePath << "'"; - return; - } + if (!target->Anchor) { + report[EFame::Mentioned][std::move(name)] = + TStringBuilder() + << "Mentioned at '" << target->RelativePath << "'"; + return; + } - if (!IsLikelyDocumentedAt(section->Header.Content, name) && - !IsLikelyDocumentedAt(section->Body, name)) { - report[EFame::BadLinked][std::move(name)] = - TStringBuilder() - << "Absent at section '" << target << "', " - << "section header is '" << section->Header.Content << "', " - << "section prefix is '" << TStringBuf(section->Body).SubString(0, 32) << "'"; - return; - } + const TMarkdownSection* section = page->SectionsByAnchor.FindPtr(*target->Anchor); + if (!section) { + report[EFame::BadLinked][std::move(name)] = + TStringBuilder() + << "Section '" << *target->Anchor << "' not found " + << "at '" << target->RelativePath << "'"; + return; + } - report[EFame::Documented][std::move(name)] = + if (!IsLikelyDocumentedAt(section->Header.Content, name) && + !IsLikelyDocumentedAt(section->Body, name)) { + report[EFame::BadLinked][std::move(name)] = TStringBuilder() - << "Documented at '" << target << "'"; + << "Absent at section '" << target << "', " + << "section header is '" << section->Header.Content << "', " + << "section prefix is '" << TStringBuf(section->Body).SubString(0, 32) << "'"; + return; } - void ExamineShortHands(TFameReport& report, const TMap<TString, TString>& shortHands) { - for (const auto& [shorten, qualified] : shortHands) { - report[EFame::BadLinked].erase(shorten); - for (EFame fame : Fames) { - auto it = report[fame].find(qualified); - if (it != report[fame].end()) { - report[fame][shorten] = it->second; - } + report[EFame::Documented][std::move(name)] = + TStringBuilder() + << "Documented at '" << target << "'"; +} + +void ExamineShortHands(TFameReport& report, const TMap<TString, TString>& shortHands) { + for (const auto& [shorten, qualified] : shortHands) { + report[EFame::BadLinked].erase(shorten); + for (EFame fame : Fames) { + auto it = report[fame].find(qualified); + if (it != report[fame].end()) { + report[fame][shorten] = it->second; } } } +} - TFameReport Verify(TVerificationInput input) { - TFameReport report; - for (TString name : input.Names) { - Verify(input.Links, input.Pages, std::move(name), report); - } - ExamineShortHands(report, input.ShortHands); - return report; +TFameReport Verify(TVerificationInput input) { + TFameReport report; + for (TString name : input.Names) { + Verify(input.Links, input.Pages, std::move(name), report); } + ExamineShortHands(report, input.ShortHands); + return report; +} - double Coverage(const TFameReport& report, const TVector<TString>& names) { - if (!report.contains(EFame::Documented)) { - return 0; - } +double Coverage(const TFameReport& report, const TVector<TString>& names) { + if (!report.contains(EFame::Documented)) { + return 0; + } - const TStatusesByName& documented = report.at(EFame::Documented); + const TStatusesByName& documented = report.at(EFame::Documented); - size_t covered = 0; - for (const TString& name : names) { - covered += documented.contains(name) ? 1 : 0; - } - - return static_cast<double>(covered) / names.size(); + size_t covered = 0; + for (const TString& name : names) { + covered += documented.contains(name) ? 1 : 0; } + return static_cast<double>(covered) / names.size(); +} + } // namespace NYql::NDocs template <> diff --git a/yql/essentials/utils/docs/verification.h b/yql/essentials/utils/docs/verification.h index 4eac54017f8..0f9a2aa8214 100644 --- a/yql/essentials/utils/docs/verification.h +++ b/yql/essentials/utils/docs/verification.h @@ -8,26 +8,26 @@ namespace NYql::NDocs { - enum class EFame { - BadLinked, - Unknown, - Mentioned, - Documented, - }; +enum class EFame { + BadLinked, + Unknown, + Mentioned, + Documented, +}; - using TStatusesByName = TMap<TString, TString>; +using TStatusesByName = TMap<TString, TString>; - using TFameReport = THashMap<EFame, TStatusesByName>; +using TFameReport = THashMap<EFame, TStatusesByName>; - struct TVerificationInput { - TLinks Links; - TPages Pages; - TSet<TString> Names; - TMap<TString, TString> ShortHands; - }; +struct TVerificationInput { + TLinks Links; + TPages Pages; + TSet<TString> Names; + TMap<TString, TString> ShortHands; +}; - TFameReport Verify(TVerificationInput input); +TFameReport Verify(TVerificationInput input); - double Coverage(const TFameReport& report, const TVector<TString>& names); +double Coverage(const TFameReport& report, const TVector<TString>& names); } // namespace NYql::NDocs diff --git a/yql/essentials/utils/docs/ya.make b/yql/essentials/utils/docs/ya.make index 6f828c1434b..40cc3862c93 100644 --- a/yql/essentials/utils/docs/ya.make +++ b/yql/essentials/utils/docs/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( link_page.cpp link.cpp diff --git a/yql/essentials/utils/exceptions.cpp b/yql/essentials/utils/exceptions.cpp index 8a6d09fc9d1..0a813bf8c36 100644 --- a/yql/essentials/utils/exceptions.cpp +++ b/yql/essentials/utils/exceptions.cpp @@ -7,13 +7,15 @@ namespace NYql { TCodeLineException::TCodeLineException(ui32 code) : SourceLocation("", 0) , Code(code) -{} +{ +} TCodeLineException::TCodeLineException(const TSourceLocation& sl, const TCodeLineException& t) : yexception(t) , SourceLocation(sl) , Code(t.Code) -{} +{ +} const char* TCodeLineException::GetRawMessage() const { return yexception::what(); @@ -25,7 +27,7 @@ const char* TCodeLineException::what() const noexcept { Message = TStringBuilder{} << SourceLocation << TStringBuf(": ") << yexception::what(); } return Message.c_str(); - } catch(...) { + } catch (...) { return "Unexpected exception in TCodeLineException::what()"; } } @@ -34,4 +36,4 @@ TCodeLineException operator+(const TSourceLocation& sl, TCodeLineException&& t) return TCodeLineException(sl, t); } -} // namespace NFq
\ No newline at end of file +} // namespace NYql diff --git a/yql/essentials/utils/exceptions.h b/yql/essentials/utils/exceptions.h index 8df5307da2d..1fd13e39f61 100644 --- a/yql/essentials/utils/exceptions.h +++ b/yql/essentials/utils/exceptions.h @@ -4,9 +4,8 @@ namespace NYql { -// This exception can separate code line and file name from the error message +// This exception can separate code line and file name from the error message struct TCodeLineException: public yexception { - TSourceLocation SourceLocation; mutable TString Message; ui32 Code; @@ -18,16 +17,15 @@ struct TCodeLineException: public yexception { virtual const char* what() const noexcept override; const char* GetRawMessage() const; - }; TCodeLineException operator+(const TSourceLocation& sl, TCodeLineException&& t); -#define YQL_ENSURE_CODELINE(CONDITION, CODE, ...) \ - do { \ - if (Y_UNLIKELY(!(CONDITION))) { \ +#define YQL_ENSURE_CODELINE(CONDITION, CODE, ...) \ + do { \ + if (Y_UNLIKELY(!(CONDITION))) { \ ythrow TCodeLineException(CODE) << __VA_ARGS__; \ - } \ + } \ } while (0) -} // namespace NYql
\ No newline at end of file +} // namespace NYql diff --git a/yql/essentials/utils/failure_injector/failure_injector.cpp b/yql/essentials/utils/failure_injector/failure_injector.cpp index dfaff74503a..f875362d4af 100644 --- a/yql/essentials/utils/failure_injector/failure_injector.cpp +++ b/yql/essentials/utils/failure_injector/failure_injector.cpp @@ -30,7 +30,7 @@ THashMap<TString, TFailureInjector::TFailureSpec> TFailureInjector::GetCurrentSt THashMap<TString, TFailureInjector::TFailureSpec> TFailureInjector::GetCurrentStateImpl() { THashMap<TString, TFailureInjector::TFailureSpec> copy; - with_lock(Lock_) { + with_lock (Lock_) { copy = FailureSpecs_; } return copy; @@ -40,7 +40,7 @@ void TFailureInjector::ReachImpl(std::string_view name, std::function<void()> ac if (!Enabled_.load()) { return; } - with_lock(Lock_) { + with_lock (Lock_) { if (auto failureSpec = FailureSpecs_.FindPtr(name)) { YQL_LOG(DEBUG) << "TFailureInjector::Reach: " << name << ", Skip=" << failureSpec->Skip << ", Fails=" << failureSpec->CountOfFails; if (failureSpec->Skip > 0) { @@ -55,10 +55,10 @@ void TFailureInjector::ReachImpl(std::string_view name, std::function<void()> ac } void TFailureInjector::SetImpl(std::string_view name, ui64 skip, ui64 countOfFails) { - with_lock(Lock_) { + with_lock (Lock_) { YQL_ENSURE(countOfFails > 0, "failure " << name << ", 'countOfFails' must be positive"); FailureSpecs_[TString{name}] = TFailureSpec{skip, countOfFails}; } } -} // NYql +} // namespace NYql diff --git a/yql/essentials/utils/failure_injector/failure_injector.h b/yql/essentials/utils/failure_injector/failure_injector.h index 227eb1518f1..a791d4f1e52 100644 --- a/yql/essentials/utils/failure_injector/failure_injector.h +++ b/yql/essentials/utils/failure_injector/failure_injector.h @@ -33,4 +33,4 @@ private: TMutex Lock_; }; -} // NYql +} // namespace NYql diff --git a/yql/essentials/utils/failure_injector/failure_injector_ut.cpp b/yql/essentials/utils/failure_injector/failure_injector_ut.cpp index 4d94b5cce7d..75c67e30048 100644 --- a/yql/essentials/utils/failure_injector/failure_injector_ut.cpp +++ b/yql/essentials/utils/failure_injector/failure_injector_ut.cpp @@ -26,62 +26,62 @@ void SetUpLogger() { } Y_UNIT_TEST_SUITE(TFailureInjectorTests) { - Y_UNIT_TEST(BasicFailureTest) { - SetUpLogger(); - std::atomic<bool> called; - called.store(false); - auto behavior = [&called] { OnReach(called); }; - TFailureInjector::Reach("misc_failure", behavior); - UNIT_ASSERT_EQUAL(false, called.load()); - TFailureInjector::Activate(); - TFailureInjector::Set("misc_failure", 0, 1); - TFailureInjector::Reach("misc_failure", behavior); - UNIT_ASSERT_EQUAL(true, called.load()); - } +Y_UNIT_TEST(BasicFailureTest) { + SetUpLogger(); + std::atomic<bool> called; + called.store(false); + auto behavior = [&called] { OnReach(called); }; + TFailureInjector::Reach("misc_failure", behavior); + UNIT_ASSERT_EQUAL(false, called.load()); + TFailureInjector::Activate(); + TFailureInjector::Set("misc_failure", 0, 1); + TFailureInjector::Reach("misc_failure", behavior); + UNIT_ASSERT_EQUAL(true, called.load()); +} - Y_UNIT_TEST(CheckSkipTest) { - SetUpLogger(); - std::atomic<bool> called; - called.store(false); - auto behavior = [&called] { OnReach(called); }; - TFailureInjector::Activate(); - TFailureInjector::Set("misc_failure", 1, 1); +Y_UNIT_TEST(CheckSkipTest) { + SetUpLogger(); + std::atomic<bool> called; + called.store(false); + auto behavior = [&called] { OnReach(called); }; + TFailureInjector::Activate(); + TFailureInjector::Set("misc_failure", 1, 1); - TFailureInjector::Reach("misc_failure", behavior); - UNIT_ASSERT_EQUAL(false, called.load()); - TFailureInjector::Reach("misc_failure", behavior); - UNIT_ASSERT_EQUAL(true, called.load()); - } + TFailureInjector::Reach("misc_failure", behavior); + UNIT_ASSERT_EQUAL(false, called.load()); + TFailureInjector::Reach("misc_failure", behavior); + UNIT_ASSERT_EQUAL(true, called.load()); +} - Y_UNIT_TEST(CheckFailCountTest) { - SetUpLogger(); - int called = 0; - auto behavior = [&called] { ++called; }; - TFailureInjector::Activate(); - TFailureInjector::Set("misc_failure", 1, 2); +Y_UNIT_TEST(CheckFailCountTest) { + SetUpLogger(); + int called = 0; + auto behavior = [&called] { ++called; }; + TFailureInjector::Activate(); + TFailureInjector::Set("misc_failure", 1, 2); - TFailureInjector::Reach("misc_failure", behavior); - UNIT_ASSERT_EQUAL(0, called); - TFailureInjector::Reach("misc_failure", behavior); - UNIT_ASSERT_EQUAL(1, called); - TFailureInjector::Reach("misc_failure", behavior); - UNIT_ASSERT_EQUAL(2, called); - TFailureInjector::Reach("misc_failure", behavior); - UNIT_ASSERT_EQUAL(2, called); - TFailureInjector::Reach("misc_failure", behavior); - UNIT_ASSERT_EQUAL(2, called); - } + TFailureInjector::Reach("misc_failure", behavior); + UNIT_ASSERT_EQUAL(0, called); + TFailureInjector::Reach("misc_failure", behavior); + UNIT_ASSERT_EQUAL(1, called); + TFailureInjector::Reach("misc_failure", behavior); + UNIT_ASSERT_EQUAL(2, called); + TFailureInjector::Reach("misc_failure", behavior); + UNIT_ASSERT_EQUAL(2, called); + TFailureInjector::Reach("misc_failure", behavior); + UNIT_ASSERT_EQUAL(2, called); +} - Y_UNIT_TEST(SlowDownTest) { - SetUpLogger(); - TFailureInjector::Activate(); - TFailureInjector::Set("misc_failure", 0, 1); +Y_UNIT_TEST(SlowDownTest) { + SetUpLogger(); + TFailureInjector::Activate(); + TFailureInjector::Set("misc_failure", 0, 1); - auto start = system_clock::now(); - TFailureInjector::Reach("misc_failure", [] { ::Sleep(TDuration::Seconds(5)); }); - auto finish = system_clock::now(); - auto duration = duration_cast<std::chrono::seconds>(finish - start); - YQL_LOG(DEBUG) << "Duration :" << duration.count(); - UNIT_ASSERT_GE(duration.count(), 5); - } + auto start = system_clock::now(); + TFailureInjector::Reach("misc_failure", [] { ::Sleep(TDuration::Seconds(5)); }); + auto finish = system_clock::now(); + auto duration = duration_cast<std::chrono::seconds>(finish - start); + YQL_LOG(DEBUG) << "Duration :" << duration.count(); + UNIT_ASSERT_GE(duration.count(), 5); } +} // Y_UNIT_TEST_SUITE(TFailureInjectorTests) diff --git a/yql/essentials/utils/failure_injector/ut/ya.make b/yql/essentials/utils/failure_injector/ut/ya.make index 579a4662872..2b9138f378c 100644 --- a/yql/essentials/utils/failure_injector/ut/ya.make +++ b/yql/essentials/utils/failure_injector/ut/ya.make @@ -1,6 +1,8 @@ IF (OS_LINUX OR OS_DARWIN) UNITTEST_FOR(yql/essentials/utils/failure_injector) + ENABLE(YQL_STYLE_CPP) + SIZE(SMALL) SRCS( diff --git a/yql/essentials/utils/failure_injector/ya.make b/yql/essentials/utils/failure_injector/ya.make index e10dfdaecba..a7f969d4b67 100644 --- a/yql/essentials/utils/failure_injector/ya.make +++ b/yql/essentials/utils/failure_injector/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( failure_injector.cpp ) diff --git a/yql/essentials/utils/fetch/fetch.cpp b/yql/essentials/utils/fetch/fetch.cpp index 4b0777e9feb..d7a76e3e6ed 100644 --- a/yql/essentials/utils/fetch/fetch.cpp +++ b/yql/essentials/utils/fetch/fetch.cpp @@ -125,33 +125,32 @@ inline bool IsRedirectCode(unsigned code) { return false; } -} // unnamed +} // namespace ERetryErrorClass DefaultClassifyHttpCode(unsigned code) { switch (code) { - case HTTP_REQUEST_TIME_OUT: //408 - case HTTP_AUTHENTICATION_TIMEOUT: //419 + case HTTP_REQUEST_TIME_OUT: // 408 + case HTTP_AUTHENTICATION_TIMEOUT: // 419 return ERetryErrorClass::ShortRetry; - case HTTP_TOO_MANY_REQUESTS: //429 - case HTTP_SERVICE_UNAVAILABLE: //503 + case HTTP_TOO_MANY_REQUESTS: // 429 + case HTTP_SERVICE_UNAVAILABLE: // 503 return ERetryErrorClass::LongRetry; default: return IsServerError(code) - ? ERetryErrorClass::ShortRetry //5xx - : ERetryErrorClass::NoRetry; + ? ERetryErrorClass::ShortRetry // 5xx + : ERetryErrorClass::NoRetry; } } IRetryPolicy<unsigned>::TPtr GetDefaultPolicy() { static const auto policy = IRetryPolicy<unsigned>::GetExponentialBackoffPolicy( - /*retryClassFunction=*/DefaultClassifyHttpCode, - /*minDelay=*/TDuration::Seconds(1), - /*minLongRetryDelay:*/TDuration::Seconds(5), - /*maxDelay=*/TDuration::Minutes(1), - /*maxRetries=*/3, - /*maxTime=*/TDuration::Minutes(3), - /*scaleFactor=*/2 - ); + /*retryClassFunction=*/DefaultClassifyHttpCode, + /*minDelay=*/TDuration::Seconds(1), + /*minLongRetryDelay:*/ TDuration::Seconds(5), + /*maxDelay=*/TDuration::Minutes(1), + /*maxRetries=*/3, + /*maxTime=*/TDuration::Minutes(3), + /*scaleFactor=*/2); return policy; } @@ -215,4 +214,4 @@ TFetchResultPtr Fetch(const THttpURL& url, const THttpHeaders& additionalHeaders ythrow yexception() << "Failed to fetch url '" << currentUrl.PrintS() << "': too many redirects"; } -} // NYql +} // namespace NYql diff --git a/yql/essentials/utils/fetch/fetch.h b/yql/essentials/utils/fetch/fetch.h index 3909de07215..c644a05a9f3 100644 --- a/yql/essentials/utils/fetch/fetch.h +++ b/yql/essentials/utils/fetch/fetch.h @@ -24,4 +24,4 @@ IRetryPolicy<unsigned>::TPtr GetDefaultPolicy(); ERetryErrorClass DefaultClassifyHttpCode(unsigned code); TFetchResultPtr Fetch(const THttpURL& url, const THttpHeaders& additionalHeaders = {}, const TDuration& timeout = TDuration::Max(), size_t redirects = 10, const IRetryPolicy<unsigned>::TPtr& policy = nullptr); -} // NYql +} // namespace NYql diff --git a/yql/essentials/utils/fetch/ya.make b/yql/essentials/utils/fetch/ya.make index 71b7d54d501..337496a541d 100644 --- a/yql/essentials/utils/fetch/ya.make +++ b/yql/essentials/utils/fetch/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( fetch.cpp ) diff --git a/yql/essentials/utils/fp_bits.h b/yql/essentials/utils/fp_bits.h index ffce6e161b4..9d2a812544d 100644 --- a/yql/essentials/utils/fp_bits.h +++ b/yql/essentials/utils/fp_bits.h @@ -98,13 +98,13 @@ struct TCanonizeFpBitsImpl<T, false> { using TNumTraits = std::numeric_limits<T>; const T value = *(T*)buffer; switch (std::fpclassify(value)) { - case FP_NAN: - static_assert(TNumTraits::has_quiet_NaN, "no QNAN"); - *(T*)buffer = TNumTraits::quiet_NaN(); - break; - case FP_ZERO: - *(T*)buffer = T(0); - break; + case FP_NAN: + static_assert(TNumTraits::has_quiet_NaN, "no QNAN"); + *(T*)buffer = TNumTraits::quiet_NaN(); + break; + case FP_ZERO: + *(T*)buffer = T(0); + break; } } }; @@ -119,4 +119,4 @@ void CanonizeFpBits(void* buffer) { return TCanonizeFpBitsImpl<T, TFpTraits<T>::Supported>::Do(buffer); } -} +} // namespace NYql diff --git a/yql/essentials/utils/fp_bits_ut.cpp b/yql/essentials/utils/fp_bits_ut.cpp index d6d94b56f46..b8572a265ec 100644 --- a/yql/essentials/utils/fp_bits_ut.cpp +++ b/yql/essentials/utils/fp_bits_ut.cpp @@ -83,23 +83,23 @@ void CanonizeFpBitsTest() { UNIT_ASSERT(std::memcmp((const void*)&newValues[v], (const void*)&values[originalV], std::min(size_t(10), sizeof(T))) == 0); } } -} +} // namespace Y_UNIT_TEST_SUITE(TFpBits) { - Y_UNIT_TEST(CanonizeFloat) { - CanonizeFpBitsTest<float>(); - } +Y_UNIT_TEST(CanonizeFloat) { + CanonizeFpBitsTest<float>(); +} - Y_UNIT_TEST(CanonizeDouble) { - CanonizeFpBitsTest<double>(); - } +Y_UNIT_TEST(CanonizeDouble) { + CanonizeFpBitsTest<double>(); +} - Y_UNIT_TEST(CanonizeLongDouble) { - if (NValgrind::ValgrindIsOn()) { - return; // TODO KIKIMR-3431 - } - CanonizeFpBitsTest<long double>(); +Y_UNIT_TEST(CanonizeLongDouble) { + if (NValgrind::ValgrindIsOn()) { + return; // TODO KIKIMR-3431 } + CanonizeFpBitsTest<long double>(); } +} // Y_UNIT_TEST_SUITE(TFpBits) -} +} // namespace NYql diff --git a/yql/essentials/utils/future_action.h b/yql/essentials/utils/future_action.h index d4a8dd69aad..0da98307a38 100644 --- a/yql/essentials/utils/future_action.h +++ b/yql/essentials/utils/future_action.h @@ -43,12 +43,12 @@ NThreading::TFuture<std::function<V()>> AddConstantActionToFuture(NThreading::TF /* Transform action result by applying mapper */ -template <typename R, typename TMapper, typename ...Args> +template <typename R, typename TMapper, typename... Args> auto MapFutureAction(NThreading::TFuture<std::function<R(Args&&...)>> f, const TMapper& mapper) { using V = decltype(mapper(std::declval<R>())); return f.Apply([mapper](NThreading::TFuture<std::function<R(Args&&...)>> f) { - std::function<V(Args&&...)> r = [f, mapper](Args&& ...args) { + std::function<V(Args && ...)> r = [f, mapper](Args&&... args) { return mapper(f.GetValue()(std::forward<Args>(args)...)); }; @@ -56,4 +56,4 @@ auto MapFutureAction(NThreading::TFuture<std::function<R(Args&&...)>> f, const T }); } -} +} // namespace NYql diff --git a/yql/essentials/utils/hash.cpp b/yql/essentials/utils/hash.cpp index b0bc284d4f8..803dcb3b4b8 100644 --- a/yql/essentials/utils/hash.cpp +++ b/yql/essentials/utils/hash.cpp @@ -11,11 +11,12 @@ size_t VaryingHash(size_t src) { TPid() : Value(GetEnv("YQL_MUTATE_HASHCODE") ? IntHash(GetPID()) : 0) - {} + { + } }; return Singleton<TPid>()->Value ^ src; } #endif -} +} // namespace NYql diff --git a/yql/essentials/utils/hash.h b/yql/essentials/utils/hash.h index 45cd95b777a..ac6f1324a11 100644 --- a/yql/essentials/utils/hash.h +++ b/yql/essentials/utils/hash.h @@ -23,7 +23,8 @@ struct TVaryingHash { TVaryingHash(const TVaryingHash&) = default; TVaryingHash(const THasher& underlying) : Underlying(underlying) - {} + { + } TVaryingHash& operator=(const TVaryingHash& other) = default; @@ -33,55 +34,55 @@ struct TVaryingHash { }; template <class TKey, - class TValue, - class THasher = std::hash<TKey>, - class TEqual = std::equal_to<TKey>, - class TAlloc = std::allocator<std::pair<const TKey, TValue>>> + class TValue, + class THasher = std::hash<TKey>, + class TEqual = std::equal_to<TKey>, + class TAlloc = std::allocator<std::pair<const TKey, TValue>>> using TVaryingUnorderedMap = std::unordered_map<TKey, TValue, TVaryingHash<TKey, THasher>, TEqual, TAlloc>; template <class TKey, - class TValue, - class THasher = std::hash<TKey>, - class TEqual = std::equal_to<TKey>, - class TAlloc = std::allocator<std::pair<const TKey, TValue>>> + class TValue, + class THasher = std::hash<TKey>, + class TEqual = std::equal_to<TKey>, + class TAlloc = std::allocator<std::pair<const TKey, TValue>>> using TVaryingUnorderedMultiMap = std::unordered_multimap<TKey, TValue, TVaryingHash<TKey, THasher>, TEqual, TAlloc>; template <class TKey, - class THasher = std::hash<TKey>, - class TEqual = std::equal_to<TKey>, - class TAlloc = std::allocator<TKey>> + class THasher = std::hash<TKey>, + class TEqual = std::equal_to<TKey>, + class TAlloc = std::allocator<TKey>> using TVaryingUnorderedSet = std::unordered_set<TKey, TVaryingHash<TKey, THasher>, TEqual, TAlloc>; template <class TKey, - class THasher = std::hash<TKey>, - class TEqual = std::equal_to<TKey>, - class TAlloc = std::allocator<TKey>> + class THasher = std::hash<TKey>, + class TEqual = std::equal_to<TKey>, + class TAlloc = std::allocator<TKey>> using TVaryingUnorderedMultiSet = std::unordered_multiset<TKey, TVaryingHash<TKey, THasher>, TEqual, TAlloc>; template <class TKey, - class TValue, - class THasher = THash<TKey>, - class TEqual = TEqualTo<TKey>, - class TAlloc = std::allocator<std::pair<const TKey, TValue>>> + class TValue, + class THasher = THash<TKey>, + class TEqual = TEqualTo<TKey>, + class TAlloc = std::allocator<std::pair<const TKey, TValue>>> using TVaryingHashMap = THashMap<TKey, TValue, TVaryingHash<TKey, THasher>, TEqual, TAlloc>; template <class TKey, - class TValue, - class THasher = THash<TKey>, - class TEqual = TEqualTo<TKey>, - class TAlloc = std::allocator<std::pair<const TKey, TValue>>> + class TValue, + class THasher = THash<TKey>, + class TEqual = TEqualTo<TKey>, + class TAlloc = std::allocator<std::pair<const TKey, TValue>>> using TVaryingHashMultiMap = THashMultiMap<TKey, TValue, TVaryingHash<TKey, THasher>, TEqual, TAlloc>; template <class TKey, - class THasher = THash<TKey>, - class TEqual = TEqualTo<TKey>, - class TAlloc = std::allocator<TKey>> + class THasher = THash<TKey>, + class TEqual = TEqualTo<TKey>, + class TAlloc = std::allocator<TKey>> using TVaryingHashSet = THashSet<TKey, TVaryingHash<TKey, THasher>, TEqual, TAlloc>; template <class TKey, - class THasher = THash<TKey>, - class TEqual = TEqualTo<TKey>, - class TAlloc = std::allocator<TKey>> + class THasher = THash<TKey>, + class TEqual = TEqualTo<TKey>, + class TAlloc = std::allocator<TKey>> using TVaryingHashMultiSet = THashMultiSet<TKey, TVaryingHash<TKey, THasher>, TEqual, TAlloc>; } // namespace NYql diff --git a/yql/essentials/utils/limiting_allocator.cpp b/yql/essentials/utils/limiting_allocator.cpp index 0ff84f90378..e25ec99a427 100644 --- a/yql/essentials/utils/limiting_allocator.cpp +++ b/yql/essentials/utils/limiting_allocator.cpp @@ -4,9 +4,12 @@ #include <util/generic/yexception.h> namespace { -class TLimitingAllocator : public IAllocator { +class TLimitingAllocator: public IAllocator { public: - TLimitingAllocator(size_t limit, IAllocator* allocator) : Alloc_(allocator), Limit_(limit) {}; + TLimitingAllocator(size_t limit, IAllocator* allocator) + : Alloc_(allocator) + , Limit_(limit) + {}; TBlock Allocate(size_t len) override final { if (Allocated_ + len > Limit_) { throw std::runtime_error("Out of memory"); @@ -26,10 +29,10 @@ private: size_t Allocated_ = 0; const size_t Limit_; }; -} +} // namespace namespace NYql { std::unique_ptr<IAllocator> MakeLimitingAllocator(size_t limit, IAllocator* underlying) { return std::make_unique<TLimitingAllocator>(limit, underlying); } -} +} // namespace NYql diff --git a/yql/essentials/utils/limiting_allocator.h b/yql/essentials/utils/limiting_allocator.h index 7d94aa7f2ad..f90a09e7b7b 100644 --- a/yql/essentials/utils/limiting_allocator.h +++ b/yql/essentials/utils/limiting_allocator.h @@ -5,4 +5,4 @@ namespace NYql { std::unique_ptr<IAllocator> MakeLimitingAllocator(size_t limit, IAllocator* underlying); -} +} // namespace NYql diff --git a/yql/essentials/utils/log/context.cpp b/yql/essentials/utils/log/context.cpp index 6f4cb0c580e..fb7db9a5548 100644 --- a/yql/essentials/utils/log/context.cpp +++ b/yql/essentials/utils/log/context.cpp @@ -3,7 +3,6 @@ #include <util/thread/singleton.h> - namespace NYql { namespace NLog { namespace { @@ -12,28 +11,28 @@ struct TThrowedLogContext { TString LocationWithLogContext; // separated with ': ' }; -} // namspace +} // namespace TStringBuf ToStringBuf(EContextKey key) { switch (key) { - case EContextKey::DateTime: - return "datetime"; - case EContextKey::Level: - return "level"; - case EContextKey::ProcessName: - return "procname"; - case EContextKey::ProcessID: - return "pid"; - case EContextKey::ThreadID: - return "tid"; - case EContextKey::Component: - return "component"; - case EContextKey::FileName: - return "filename"; - case EContextKey::Line: - return "line"; - case EContextKey::Path: - return "path"; + case EContextKey::DateTime: + return "datetime"; + case EContextKey::Level: + return "level"; + case EContextKey::ProcessName: + return "procname"; + case EContextKey::ProcessID: + return "pid"; + case EContextKey::ThreadID: + return "tid"; + case EContextKey::Component: + return "component"; + case EContextKey::FileName: + return "filename"; + case EContextKey::Line: + return "line"; + case EContextKey::Path: + return "path"; } } @@ -50,7 +49,7 @@ void OutputLogCtx(IOutputStream* out, bool withBraces, bool skipSessionId) { bool isFirst = true; while (ctxItem != ctxList) { - for (const TString& name: *ctxItem) { + for (const TString& name : *ctxItem) { if (!skipSessionId && !name.empty()) { if (!isFirst) { (*out) << '/'; @@ -92,7 +91,6 @@ TString ThrowedLogContextPath() { return std::move(tlc->LocationWithLogContext); } - TAutoPtr<TLogElement> TContextPreprocessor::Preprocess(TAutoPtr<TLogElement> element) { TStringStream out; diff --git a/yql/essentials/utils/log/context.h b/yql/essentials/utils/log/context.h index f2b423cf494..b8bbb63e9e1 100644 --- a/yql/essentials/utils/log/context.h +++ b/yql/essentials/utils/log/context.h @@ -9,32 +9,34 @@ // continues existing contexts chain -#define YQL_LOG_CTX_SCOPE(...) \ +#define YQL_LOG_CTX_SCOPE(...) \ auto Y_CAT(c, __LINE__) = ::NYql::NLog::MakeCtx(__VA_ARGS__); \ Y_UNUSED(Y_CAT(c, __LINE__)) -#define YQL_LOG_CTX_BLOCK(...) \ +#define YQL_LOG_CTX_BLOCK(...) \ if (auto Y_GENERATE_UNIQUE_ID(c) = ::NYql::NLog::MakeCtx(__VA_ARGS__)) { \ - goto Y_CAT(YQL_LOG_CTX_LABEL, __LINE__); \ - } else Y_CAT(YQL_LOG_CTX_LABEL, __LINE__): - + goto Y_CAT(YQL_LOG_CTX_LABEL, __LINE__); \ + } else \ + Y_CAT(YQL_LOG_CTX_LABEL, __LINE__) \ + : // starts new contexts chain, after leaving current scope restores // previous contexts chain -#define YQL_LOG_CTX_ROOT_SESSION_SCOPE(sessionId, ...) \ +#define YQL_LOG_CTX_ROOT_SESSION_SCOPE(sessionId, ...) \ auto Y_CAT(c, __LINE__) = ::NYql::NLog::MakeRootCtx(sessionId, ##__VA_ARGS__); \ Y_UNUSED(Y_CAT(c, __LINE__)) -#define YQL_LOG_CTX_ROOT_SCOPE(...) \ +#define YQL_LOG_CTX_ROOT_SCOPE(...) \ auto Y_CAT(c, __LINE__) = ::NYql::NLog::MakeRootCtx("", __VA_ARGS__); \ Y_UNUSED(Y_CAT(c, __LINE__)) -#define YQL_LOG_CTX_ROOT_BLOCK(...) \ +#define YQL_LOG_CTX_ROOT_BLOCK(...) \ if (auto Y_GENERATE_UNIQUE_ID(c) = ::NYql::NLog::MakeRootCtx(__VA_ARGS__)) { \ - goto Y_CAT(YQL_LOG_CTX_LABEL, __LINE__); \ - } else Y_CAT(YQL_LOG_CTX_LABEL, __LINE__): - + goto Y_CAT(YQL_LOG_CTX_LABEL, __LINE__); \ + } else \ + Y_CAT(YQL_LOG_CTX_LABEL, __LINE__) \ + : // adds current contexts path to exception message before throwing it @@ -93,7 +95,9 @@ public: } void Unlink() { - if (!HasNext()) return; + if (!HasNext()) { + return; + } Prev->Next = Next; Next->Prev = Prev; @@ -112,11 +116,12 @@ TLogContextListItem* GetLogContextList(); /** * @brief Context element with stored SessionId. -*/ -class TLogContextSessionItem : public TLogContextListItem { + */ +class TLogContextSessionItem: public TLogContextListItem { public: TLogContextSessionItem(size_t size, bool hasSessionId_) - : TLogContextListItem(size, sizeof(*this)) { + : TLogContextListItem(size, sizeof(*this)) + { HasSessionId_ = hasSessionId_; } @@ -128,7 +133,7 @@ private: bool HasSessionId_; }; -} // namspace NImpl +} // namespace NImpl enum class EContextKey { DateTime = 0, @@ -153,7 +158,7 @@ public: template <typename... TArgs> TLogContext(TArgs... args) : TLogContextListItem(Size) - , Names_{{ TString{std::forward<TArgs>(args)}... }} + , Names_{{TString{std::forward<TArgs>(args)}...}} { LinkBefore(NImpl::GetLogContextList()); } @@ -180,7 +185,7 @@ public: template <typename... TArgs> TRootLogContext(const TString& sessionId, TArgs... args) : TLogContextSessionItem(Size, !sessionId.empty()) - , Names_{{ sessionId, TString{std::forward<TArgs>(args)}... }} + , Names_{{sessionId, TString{std::forward<TArgs>(args)}...}} { NImpl::TLogContextListItem* ctxList = NImpl::GetLogContextList(); PrevLogContextHead_.Prev = ctxList->Prev; diff --git a/yql/essentials/utils/log/format.cpp b/yql/essentials/utils/log/format.cpp index 6292235d52d..6cd57fe3fda 100644 --- a/yql/essentials/utils/log/format.cpp +++ b/yql/essentials/utils/log/format.cpp @@ -15,160 +15,160 @@ namespace NYql::NLog { - namespace { +namespace { - constexpr size_t MaxRequiredContextKey = static_cast<size_t>(EContextKey::Line); +constexpr size_t MaxRequiredContextKey = static_cast<size_t>(EContextKey::Line); - auto RequiredContextAccessor(const TLogRecord& rec) { - return [&](EContextKey key) -> TStringBuf { - return rec.MetaFlags.at(static_cast<size_t>(key)).second; - }; - } - - auto OptionalContextAccessor(const TLogRecord& rec) { - return [&](TStringBuf key) -> TMaybe<TStringBuf> { - const auto isContextKeyPath = [&](const auto& pair) { - return pair.first == key; - }; +auto RequiredContextAccessor(const TLogRecord& rec) { + return [&](EContextKey key) -> TStringBuf { + return rec.MetaFlags.at(static_cast<size_t>(key)).second; + }; +} - const auto* path = FindIfPtr( - rec.MetaFlags.begin() + MaxRequiredContextKey + 1, - rec.MetaFlags.end(), - isContextKeyPath); +auto OptionalContextAccessor(const TLogRecord& rec) { + return [&](TStringBuf key) -> TMaybe<TStringBuf> { + const auto isContextKeyPath = [&](const auto& pair) { + return pair.first == key; + }; - if (!path) { - return Nothing(); - } + const auto* path = FindIfPtr( + rec.MetaFlags.begin() + MaxRequiredContextKey + 1, + rec.MetaFlags.end(), + isContextKeyPath); - return path->second; - }; + if (!path) { + return Nothing(); } - void PrintBody(TStringBuilder& out, const TLogRecord& rec, size_t flagBegin) { - out << TStringBuf(rec.Data, rec.Len); + return path->second; + }; +} - if (flagBegin < rec.MetaFlags.size()) { - out << ". Extra context: "; - } +void PrintBody(TStringBuilder& out, const TLogRecord& rec, size_t flagBegin) { + out << TStringBuf(rec.Data, rec.Len); - for (size_t i = flagBegin; i < rec.MetaFlags.size(); ++i) { - const auto& [key, value] = rec.MetaFlags[i]; - out << key << " = " << value; - if (i + 1 != rec.MetaFlags.size()) { - out << ", "; - } - } - } + if (flagBegin < rec.MetaFlags.size()) { + out << ". Extra context: "; + } - TString FallbackFormat(const TLogRecord& rec) { - TStringBuilder out; - PrintBody(out, rec, /*flagBegin=*/0); - return out; + for (size_t i = flagBegin; i < rec.MetaFlags.size(); ++i) { + const auto& [key, value] = rec.MetaFlags[i]; + out << key << " = " << value; + if (i + 1 != rec.MetaFlags.size()) { + out << ", "; } + } +} - class TFormattingLogBackend final: public TForwardingLogBackend { - public: - explicit TFormattingLogBackend(TFormatter formatter, bool isStrict, TAutoPtr<TLogBackend> child) - : TForwardingLogBackend(std::move(child)) - , Formatter_(std::move(formatter)) - , IsStrict_(isStrict) - { - } - - void WriteData(const TLogRecord& rec) final { - if (rec.MetaFlags.empty()) { - // NB. For signal handler. - return TForwardingLogBackend::WriteData(rec); - } +TString FallbackFormat(const TLogRecord& rec) { + TStringBuilder out; + PrintBody(out, rec, /*flagBegin=*/0); + return out; +} - TString message; - if (IsSupported(rec.MetaFlags)) { - message = Formatter_(rec); - } else if (IsStrict_) { - TStringBuilder message; - message << "LogRecord is not supported: "; - PrintBody(message, rec, /* flagBegin = */ 0); - ythrow yexception() << std::move(message); - } else { - message = FallbackFormat(rec); - } - message.append('\n'); +class TFormattingLogBackend final: public TForwardingLogBackend { +public: + explicit TFormattingLogBackend(TFormatter formatter, bool isStrict, TAutoPtr<TLogBackend> child) + : TForwardingLogBackend(std::move(child)) + , Formatter_(std::move(formatter)) + , IsStrict_(isStrict) + { + } - const TLogRecord formatted(rec.Priority, message.data(), message.size()); - return TForwardingLogBackend::WriteData(formatted); - } + void WriteData(const TLogRecord& rec) final { + if (rec.MetaFlags.empty()) { + // NB. For signal handler. + return TForwardingLogBackend::WriteData(rec); + } - protected: - static bool IsSupported(const TLogRecord::TMetaFlags& flags) { - const auto isSupported = [&](size_t i) -> bool { - const EContextKey key = static_cast<EContextKey>(i); + TString message; + if (IsSupported(rec.MetaFlags)) { + message = Formatter_(rec); + } else if (IsStrict_) { + TStringBuilder message; + message << "LogRecord is not supported: "; + PrintBody(message, rec, /* flagBegin = */ 0); + ythrow yexception() << std::move(message); + } else { + message = FallbackFormat(rec); + } + message.append('\n'); - const TStringBuf expected = ToStringBuf(key); - if (flags.size() <= i) { - return false; - } + const TLogRecord formatted(rec.Priority, message.data(), message.size()); + return TForwardingLogBackend::WriteData(formatted); + } - const TStringBuf actual = flags[i].first; - if (actual != expected) { - return false; - } +protected: + static bool IsSupported(const TLogRecord::TMetaFlags& flags) { + const auto isSupported = [&](size_t i) -> bool { + const EContextKey key = static_cast<EContextKey>(i); - return true; - }; + const TStringBuf expected = ToStringBuf(key); + if (flags.size() <= i) { + return false; + } - return AllOf(std::views::iota(Min<size_t>(), MaxRequiredContextKey), isSupported); + const TStringBuf actual = flags[i].first; + if (actual != expected) { + return false; } - private: - TFormatter Formatter_; - bool IsStrict_; + return true; }; - } // namespace + return AllOf(std::views::iota(Min<size_t>(), MaxRequiredContextKey), isSupported); + } - TString LegacyFormat(const TLogRecord& rec) { - const auto get = RequiredContextAccessor(rec); - const auto opt = OptionalContextAccessor(rec); +private: + TFormatter Formatter_; + bool IsStrict_; +}; - TStringBuilder out; - out << get(EContextKey::DateTime) << ' ' - << get(EContextKey::Level) << ' ' - << get(EContextKey::ProcessName) - << "(pid=" << get(EContextKey::ProcessID) - << ", tid=" << get(EContextKey::ThreadID) - << ") [" << get(EContextKey::Component) << "] " - << get(EContextKey::FileName) - << ':' << get(EContextKey::Line) << ": "; +} // namespace - size_t unknownContextBegin = MaxRequiredContextKey + 1; - if (auto path = opt(ToStringBuf(EContextKey::Path))) { - out << "{" << *path << "} "; - unknownContextBegin += 1; - } +TString LegacyFormat(const TLogRecord& rec) { + const auto get = RequiredContextAccessor(rec); + const auto opt = OptionalContextAccessor(rec); - PrintBody(out, rec, unknownContextBegin); - return out; - } + TStringBuilder out; + out << get(EContextKey::DateTime) << ' ' + << get(EContextKey::Level) << ' ' + << get(EContextKey::ProcessName) + << "(pid=" << get(EContextKey::ProcessID) + << ", tid=" << get(EContextKey::ThreadID) + << ") [" << get(EContextKey::Component) << "] " + << get(EContextKey::FileName) + << ':' << get(EContextKey::Line) << ": "; - TString JsonFormat(const TLogRecord& rec) { - TStringStream out; - NJsonWriter::TBuf buf(NJsonWriter::HEM_DONT_ESCAPE_HTML, &out); - buf.BeginObject(); - buf.WriteKey("message"); - buf.WriteString(TStringBuf(rec.Data, rec.Len)); - buf.WriteKey("@fields"); - buf.BeginObject(); - for (const auto& [key, value] : rec.MetaFlags) { - buf.WriteKey(key); - buf.WriteString(value); - } - buf.EndObject(); - buf.EndObject(); - return std::move(out.Str()); + size_t unknownContextBegin = MaxRequiredContextKey + 1; + if (auto path = opt(ToStringBuf(EContextKey::Path))) { + out << "{" << *path << "} "; + unknownContextBegin += 1; } - TAutoPtr<TLogBackend> MakeFormattingLogBackend(TFormatter formatter, bool isStrict, TAutoPtr<TLogBackend> child) { - return new TFormattingLogBackend(std::move(formatter), isStrict, std::move(child)); + PrintBody(out, rec, unknownContextBegin); + return out; +} + +TString JsonFormat(const TLogRecord& rec) { + TStringStream out; + NJsonWriter::TBuf buf(NJsonWriter::HEM_DONT_ESCAPE_HTML, &out); + buf.BeginObject(); + buf.WriteKey("message"); + buf.WriteString(TStringBuf(rec.Data, rec.Len)); + buf.WriteKey("@fields"); + buf.BeginObject(); + for (const auto& [key, value] : rec.MetaFlags) { + buf.WriteKey(key); + buf.WriteString(value); } + buf.EndObject(); + buf.EndObject(); + return std::move(out.Str()); +} + +TAutoPtr<TLogBackend> MakeFormattingLogBackend(TFormatter formatter, bool isStrict, TAutoPtr<TLogBackend> child) { + return new TFormattingLogBackend(std::move(formatter), isStrict, std::move(child)); +} } // namespace NYql::NLog diff --git a/yql/essentials/utils/log/format.h b/yql/essentials/utils/log/format.h index c0ece9412ba..3f4b9b01c0e 100644 --- a/yql/essentials/utils/log/format.h +++ b/yql/essentials/utils/log/format.h @@ -6,12 +6,12 @@ namespace NYql::NLog { - using TFormatter = std::function<TString(const TLogRecord&)>; +using TFormatter = std::function<TString(const TLogRecord&)>; - TString LegacyFormat(const TLogRecord& rec); +TString LegacyFormat(const TLogRecord& rec); - TString JsonFormat(const TLogRecord& rec); +TString JsonFormat(const TLogRecord& rec); - TAutoPtr<TLogBackend> MakeFormattingLogBackend(TFormatter formatter, bool isStrict, TAutoPtr<TLogBackend> child); +TAutoPtr<TLogBackend> MakeFormattingLogBackend(TFormatter formatter, bool isStrict, TAutoPtr<TLogBackend> child); } // namespace NYql::NLog diff --git a/yql/essentials/utils/log/fwd_backend.cpp b/yql/essentials/utils/log/fwd_backend.cpp index 091b07c78f1..be4ce454e54 100644 --- a/yql/essentials/utils/log/fwd_backend.cpp +++ b/yql/essentials/utils/log/fwd_backend.cpp @@ -2,37 +2,37 @@ namespace NYql::NLog { - TForwardingLogBackend::TForwardingLogBackend(TAutoPtr<TLogBackend> child) - : Child_(std::move(child)) - { - } +TForwardingLogBackend::TForwardingLogBackend(TAutoPtr<TLogBackend> child) + : Child_(std::move(child)) +{ +} - void TForwardingLogBackend::WriteData(const TLogRecord& rec) { - return Child_->WriteData(rec); - } +void TForwardingLogBackend::WriteData(const TLogRecord& rec) { + return Child_->WriteData(rec); +} - void TForwardingLogBackend::ReopenLog() { - return Child_->ReopenLog(); - } +void TForwardingLogBackend::ReopenLog() { + return Child_->ReopenLog(); +} - void TForwardingLogBackend::ReopenLogNoFlush() { - return Child_->ReopenLogNoFlush(); - } +void TForwardingLogBackend::ReopenLogNoFlush() { + return Child_->ReopenLogNoFlush(); +} - ELogPriority TForwardingLogBackend::FiltrationLevel() const { - return Child_->FiltrationLevel(); - } +ELogPriority TForwardingLogBackend::FiltrationLevel() const { + return Child_->FiltrationLevel(); +} - size_t TForwardingLogBackend::QueueSize() const { - return Child_->QueueSize(); - } +size_t TForwardingLogBackend::QueueSize() const { + return Child_->QueueSize(); +} - void TForwardingLogBackend::SetChild(TAutoPtr<TLogBackend> child) { - Child_ = std::move(child); - } +void TForwardingLogBackend::SetChild(TAutoPtr<TLogBackend> child) { + Child_ = std::move(child); +} - TAutoPtr<TLogBackend> TForwardingLogBackend::GetChild() const { - return Child_; - } +TAutoPtr<TLogBackend> TForwardingLogBackend::GetChild() const { + return Child_; +} } // namespace NYql::NLog diff --git a/yql/essentials/utils/log/fwd_backend.h b/yql/essentials/utils/log/fwd_backend.h index 144d2eccee2..21fcd3dc32f 100644 --- a/yql/essentials/utils/log/fwd_backend.h +++ b/yql/essentials/utils/log/fwd_backend.h @@ -6,21 +6,21 @@ namespace NYql::NLog { - class TForwardingLogBackend: public TLogBackend { - public: - explicit TForwardingLogBackend(TAutoPtr<TLogBackend> child); +class TForwardingLogBackend: public TLogBackend { +public: + explicit TForwardingLogBackend(TAutoPtr<TLogBackend> child); - void WriteData(const TLogRecord& rec) override; - void ReopenLog() override; - void ReopenLogNoFlush() override; - ELogPriority FiltrationLevel() const override; - size_t QueueSize() const override; + void WriteData(const TLogRecord& rec) override; + void ReopenLog() override; + void ReopenLogNoFlush() override; + ELogPriority FiltrationLevel() const override; + size_t QueueSize() const override; - void SetChild(TAutoPtr<TLogBackend> child); - TAutoPtr<TLogBackend> GetChild() const; + void SetChild(TAutoPtr<TLogBackend> child); + TAutoPtr<TLogBackend> GetChild() const; - private: - TAutoPtr<TLogBackend> Child_; - }; +private: + TAutoPtr<TLogBackend> Child_; +}; } // namespace NYql::NLog diff --git a/yql/essentials/utils/log/log.cpp b/yql/essentials/utils/log/log.cpp index 1b03c194b6a..73394c4f2de 100644 --- a/yql/essentials/utils/log/log.cpp +++ b/yql/essentials/utils/log/log.cpp @@ -25,10 +25,9 @@ static TMutex g_InitLoggerMutex; static int g_LoggerInitialized = 0; - namespace { -class TLimitedLogBackend final : public TLogBackend { +class TLimitedLogBackend final: public TLogBackend { public: TLimitedLogBackend(TAutoPtr<TLogBackend> b, TAtomic& flag, ui64 limit) noexcept : Backend_(b) @@ -97,7 +96,6 @@ private: char Buf_[1 << 20]; char* Current_; char* const End_; - }; TEmergencyLogOutput EMERGENCY_LOG_OUT; @@ -106,82 +104,119 @@ void LogBacktraceOnSignal(int signum) { if (NYql::NLog::IsYqlLoggerInitialized()) { EMERGENCY_LOG_OUT << #ifdef _win_ - signum + signum #else - strsignal(signum) + strsignal(signum) #endif - << TStringBuf(" (pid=") << GetPID() << TStringBuf("): "); + << TStringBuf(" (pid=") << GetPID() << TStringBuf("): "); NYql::NBacktrace::KikimrBackTraceFormatImpl(&EMERGENCY_LOG_OUT); EMERGENCY_LOG_OUT.Flush(); } } - // Conversions between NYql::NProto::TLoggingConfig enums and NYql::NLog enums NYql::NLog::ELevel ConvertLevel(NYql::NProto::TLoggingConfig::ELevel level) { using namespace NYql::NProto; using namespace NYql::NLog; switch (level) { - case TLoggingConfig::FATAL: return ELevel::FATAL; - case TLoggingConfig::ERROR: return ELevel::ERROR; - case TLoggingConfig::WARN: return ELevel::WARN; - case TLoggingConfig::INFO: return ELevel::INFO; - case TLoggingConfig::DEBUG: return ELevel::DEBUG; - case TLoggingConfig::TRACE: return ELevel::TRACE; + case TLoggingConfig::FATAL: + return ELevel::FATAL; + case TLoggingConfig::ERROR: + return ELevel::ERROR; + case TLoggingConfig::WARN: + return ELevel::WARN; + case TLoggingConfig::INFO: + return ELevel::INFO; + case TLoggingConfig::DEBUG: + return ELevel::DEBUG; + case TLoggingConfig::TRACE: + return ELevel::TRACE; } ythrow yexception() << "unknown log level: " - << TLoggingConfig::ELevel_Name(level); + << TLoggingConfig::ELevel_Name(level); } NYql::NLog::EComponent ConvertComponent(NYql::NProto::TLoggingConfig::EComponent c) { using namespace NYql::NProto; using namespace NYql::NLog; switch (c) { - case TLoggingConfig::DEFAULT: return EComponent::Default; - case TLoggingConfig::CORE: return EComponent::Core; - case TLoggingConfig::CORE_EVAL: return EComponent::CoreEval; - case TLoggingConfig::CORE_PEEPHOLE: return EComponent::CorePeepHole; - case TLoggingConfig::CORE_EXECUTION: return EComponent::CoreExecution; - case TLoggingConfig::SQL: return EComponent::Sql; - case TLoggingConfig::PROVIDER_COMMON: return EComponent::ProviderCommon; - case TLoggingConfig::PROVIDER_CONFIG: return EComponent::ProviderConfig; - case TLoggingConfig::PROVIDER_RESULT: return EComponent::ProviderResult; - case TLoggingConfig::PROVIDER_YT: return EComponent::ProviderYt; - case TLoggingConfig::PROVIDER_KIKIMR: return EComponent::ProviderKikimr; - case TLoggingConfig::PROVIDER_KQP: return EComponent::ProviderKqp; - case TLoggingConfig::PROVIDER_RTMR: return EComponent::ProviderRtmr; - case TLoggingConfig::PERFORMANCE: return EComponent::Perf; - case TLoggingConfig::NET: return EComponent::Net; - case TLoggingConfig::PROVIDER_STAT: return EComponent::ProviderStat; - case TLoggingConfig::PROVIDER_SOLOMON: return EComponent::ProviderSolomon; - case TLoggingConfig::PROVIDER_DQ: return EComponent::ProviderDq; - case TLoggingConfig::PROVIDER_CLICKHOUSE: return EComponent::ProviderClickHouse; - case TLoggingConfig::PROVIDER_YDB: return EComponent::ProviderYdb; - case TLoggingConfig::PROVIDER_PQ: return EComponent::ProviderPq; - case TLoggingConfig::PROVIDER_S3: return EComponent::ProviderS3; - case TLoggingConfig::CORE_DQ: return EComponent::CoreDq; - case TLoggingConfig::HTTP_GATEWAY: return EComponent::HttpGateway; - case TLoggingConfig::PROVIDER_GENERIC: return EComponent::ProviderGeneric; - case TLoggingConfig::PROVIDER_PG: return EComponent::ProviderPg; - case TLoggingConfig::PROVIDER_PURE: return EComponent::ProviderPure; - case TLoggingConfig::FAST_MAP_REDUCE: return EComponent::FastMapReduce; - case TLoggingConfig::PROVIDER_YTFLOW: return EComponent::ProviderYtflow; + case TLoggingConfig::DEFAULT: + return EComponent::Default; + case TLoggingConfig::CORE: + return EComponent::Core; + case TLoggingConfig::CORE_EVAL: + return EComponent::CoreEval; + case TLoggingConfig::CORE_PEEPHOLE: + return EComponent::CorePeepHole; + case TLoggingConfig::CORE_EXECUTION: + return EComponent::CoreExecution; + case TLoggingConfig::SQL: + return EComponent::Sql; + case TLoggingConfig::PROVIDER_COMMON: + return EComponent::ProviderCommon; + case TLoggingConfig::PROVIDER_CONFIG: + return EComponent::ProviderConfig; + case TLoggingConfig::PROVIDER_RESULT: + return EComponent::ProviderResult; + case TLoggingConfig::PROVIDER_YT: + return EComponent::ProviderYt; + case TLoggingConfig::PROVIDER_KIKIMR: + return EComponent::ProviderKikimr; + case TLoggingConfig::PROVIDER_KQP: + return EComponent::ProviderKqp; + case TLoggingConfig::PROVIDER_RTMR: + return EComponent::ProviderRtmr; + case TLoggingConfig::PERFORMANCE: + return EComponent::Perf; + case TLoggingConfig::NET: + return EComponent::Net; + case TLoggingConfig::PROVIDER_STAT: + return EComponent::ProviderStat; + case TLoggingConfig::PROVIDER_SOLOMON: + return EComponent::ProviderSolomon; + case TLoggingConfig::PROVIDER_DQ: + return EComponent::ProviderDq; + case TLoggingConfig::PROVIDER_CLICKHOUSE: + return EComponent::ProviderClickHouse; + case TLoggingConfig::PROVIDER_YDB: + return EComponent::ProviderYdb; + case TLoggingConfig::PROVIDER_PQ: + return EComponent::ProviderPq; + case TLoggingConfig::PROVIDER_S3: + return EComponent::ProviderS3; + case TLoggingConfig::CORE_DQ: + return EComponent::CoreDq; + case TLoggingConfig::HTTP_GATEWAY: + return EComponent::HttpGateway; + case TLoggingConfig::PROVIDER_GENERIC: + return EComponent::ProviderGeneric; + case TLoggingConfig::PROVIDER_PG: + return EComponent::ProviderPg; + case TLoggingConfig::PROVIDER_PURE: + return EComponent::ProviderPure; + case TLoggingConfig::FAST_MAP_REDUCE: + return EComponent::FastMapReduce; + case TLoggingConfig::PROVIDER_YTFLOW: + return EComponent::ProviderYtflow; } ythrow yexception() << "unknown log component: " - << TLoggingConfig::EComponent_Name(c); + << TLoggingConfig::EComponent_Name(c); } TString ConvertDestinationType(NYql::NProto::TLoggingConfig::ELogTo c) { switch (c) { - case NYql::NProto::TLoggingConfig::STDOUT: return "cout"; - case NYql::NProto::TLoggingConfig::STDERR: return "cerr"; - case NYql::NProto::TLoggingConfig::CONSOLE: return "console"; - default : { - ythrow yexception() << "unsupported ELogTo destination in Convert"; - } + case NYql::NProto::TLoggingConfig::STDOUT: + return "cout"; + case NYql::NProto::TLoggingConfig::STDERR: + return "cerr"; + case NYql::NProto::TLoggingConfig::CONSOLE: + return "console"; + default: { + ythrow yexception() << "unsupported ELogTo destination in Convert"; + } } ythrow yexception() << "unknown ELogTo destination"; @@ -204,16 +239,16 @@ NYql::NProto::TLoggingConfig::TLogDestination CreateLogDestination(const TString NYql::NLog::TFormatter Formatter(const NYql::NProto::TLoggingConfig& config) { switch (config.GetFormat().Format_case()) { - case NYql::NProto::TLoggingConfig_TFormat::kLegacyFormat: - return NYql::NLog::LegacyFormat; - case NYql::NProto::TLoggingConfig_TFormat::kJsonFormat: - return NYql::NLog::JsonFormat; - case NYql::NProto::TLoggingConfig_TFormat::FORMAT_NOT_SET: - return NYql::NLog::LegacyFormat; + case NYql::NProto::TLoggingConfig_TFormat::kLegacyFormat: + return NYql::NLog::LegacyFormat; + case NYql::NProto::TLoggingConfig_TFormat::kJsonFormat: + return NYql::NLog::JsonFormat; + case NYql::NProto::TLoggingConfig_TFormat::FORMAT_NOT_SET: + return NYql::NLog::LegacyFormat; } } -} // namspace +} // namespace namespace NYql { namespace NLog { @@ -234,7 +269,7 @@ TString GetLocalTime() { return std::move(time.Str()); } -} +} // namespace NImpl void WriteLocalTime(IOutputStream* out) { struct timeval now; @@ -255,7 +290,9 @@ TYqlLog::TYqlLog() : TLog() , ProcName_() , ProcId_() - , WriteTruncMsg_(0) {} + , WriteTruncMsg_(0) +{ +} TYqlLog::TYqlLog(const TString& logType, const TComponentLevels& levels) : TLog(logType) @@ -285,9 +322,8 @@ void TYqlLog::UpdateProcInfo(const TString& procName) { } TAutoPtr<TLogElement> TYqlLog::CreateLogElement( - EComponent component, ELevel level, - TStringBuf file, int line) const -{ + EComponent component, ELevel level, + TStringBuf file, int line) const { if (/* const bool writeMsg = */ AtomicCas(&WriteTruncMsg_, 0, 1)) { TLogElement fatal(this, ELevelHelpers::ToLogPriority(ELevel::FATAL)); Contextify(fatal, EComponent::Default, ELevel::FATAL, __FILE__, __LINE__); @@ -336,7 +372,7 @@ void InitLogger(const TString& logType, bool startAsDaemon) { } void InitLogger(const NProto::TLoggingConfig& config, bool startAsDaemon) { - with_lock(g_InitLoggerMutex) { + with_lock (g_InitLoggerMutex) { ++g_LoggerInitialized; if (g_LoggerInitialized > 1) { return; @@ -349,7 +385,7 @@ void InitLogger(const NProto::TLoggingConfig& config, bool startAsDaemon) { levels.fill(ELevel::INFO); } - for (const auto& cmpLevel: config.GetLevels()) { + for (const auto& cmpLevel : config.GetLevels()) { auto component = ConvertComponent(cmpLevel.GetC()); auto level = ConvertLevel(cmpLevel.GetL()); levels[EComponentHelpers::ToInt(component)] = level; @@ -410,11 +446,11 @@ void InitLogger(const NProto::TLoggingConfig& config, bool startAsDaemon) { config.GetFormat().GetIsStrict(), std::move(backend))); } - NYql::NBacktrace::AddAfterFatalCallback([](int signo){ LogBacktraceOnSignal(signo); }); + NYql::NBacktrace::AddAfterFatalCallback([](int signo) { LogBacktraceOnSignal(signo); }); } void InitLogger(TAutoPtr<TLogBackend> backend, TFormatter formatter, bool isStrictFormatting) { - with_lock(g_InitLoggerMutex) { + with_lock (g_InitLoggerMutex) { ++g_LoggerInitialized; if (g_LoggerInitialized > 1) { return; @@ -426,7 +462,7 @@ void InitLogger(TAutoPtr<TLogBackend> backend, TFormatter formatter, bool isStri levels.fill(ELevel::INFO); TLoggerOperator<TYqlLog>::Set(new TYqlLog(backend, levels)); } - NYql::NBacktrace::AddAfterFatalCallback([](int signo){ LogBacktraceOnSignal(signo); }); + NYql::NBacktrace::AddAfterFatalCallback([](int signo) { LogBacktraceOnSignal(signo); }); } void InitLogger(IOutputStream* out, TFormatter formatter, bool isStrictFormatting) { @@ -434,7 +470,7 @@ void InitLogger(IOutputStream* out, TFormatter formatter, bool isStrictFormattin } void CleanupLogger() { - with_lock(g_InitLoggerMutex) { + with_lock (g_InitLoggerMutex) { --g_LoggerInitialized; if (g_LoggerInitialized > 0) { return; @@ -445,7 +481,7 @@ void CleanupLogger() { } void ReopenLog() { - with_lock(g_InitLoggerMutex) { + with_lock (g_InitLoggerMutex) { TLoggerOperator<TYqlLog>::Log().ReopenLog(); } } diff --git a/yql/essentials/utils/log/log.h b/yql/essentials/utils/log/log.h index 68aa8b09b0e..e531c183104 100644 --- a/yql/essentials/utils/log/log.h +++ b/yql/essentials/utils/log/log.h @@ -14,45 +14,43 @@ #include <array> - -#define YQL_LOG_IMPL(logger, component, level, preprocessor, file, line) \ +#define YQL_LOG_IMPL(logger, component, level, preprocessor, file, line) \ logger.NeedToLog(component, level) && NPrivateGlobalLogger::TEatStream() | \ - (*preprocessor::Preprocess(logger.CreateLogElement(component, level, file, line))) + (*preprocessor::Preprocess(logger.CreateLogElement(component, level, file, line))) #define YQL_LOG_IF_IMPL(logger, component, level, preprocessor, condition, file, line) \ - logger.NeedToLog(component, level) && (condition) && NPrivateGlobalLogger::TEatStream() | \ - (*preprocessor::Preprocess(logger.CreateLogElement(component, level, file, line))) + logger.NeedToLog(component, level) && (condition) && NPrivateGlobalLogger::TEatStream() | (*preprocessor::Preprocess(logger.CreateLogElement(component, level, file, line))) // with component logger -#define YQL_CLOG_PREP(level, component, preprocessor) YQL_LOG_IMPL(\ - ::NYql::NLog::YqlLogger(), \ - ::NYql::NLog::EComponent::component, \ - ::NYql::NLog::ELevel::level, \ - preprocessor, \ +#define YQL_CLOG_PREP(level, component, preprocessor) YQL_LOG_IMPL( \ + ::NYql::NLog::YqlLogger(), \ + ::NYql::NLog::EComponent::component, \ + ::NYql::NLog::ELevel::level, \ + preprocessor, \ __FILE__, __LINE__) #define YQL_CLOG(level, component) \ YQL_CLOG_PREP(level, component, ::NYql::NLog::TContextPreprocessor) #define YQL_CLOG_ACTIVE(level, component) ::NYql::NLog::YqlLogger().NeedToLog( \ - ::NYql::NLog::EComponent::component, \ + ::NYql::NLog::EComponent::component, \ ::NYql::NLog::ELevel::level) // with component/level values logger -#define YQL_CVLOG_PREP(level, component, preprocessor) YQL_LOG_IMPL(\ - ::NYql::NLog::YqlLogger(), \ - component, \ - level, \ - preprocessor, \ +#define YQL_CVLOG_PREP(level, component, preprocessor) YQL_LOG_IMPL( \ + ::NYql::NLog::YqlLogger(), \ + component, \ + level, \ + preprocessor, \ __FILE__, __LINE__) #define YQL_CVLOG(level, component) \ YQL_CVLOG_PREP(level, component, ::NYql::NLog::TContextPreprocessor) #define YQL_CVLOG_ACTIVE(level, component) ::NYql::NLog::YqlLogger().NeedToLog( \ - component, \ + component, \ level) // default logger @@ -67,12 +65,12 @@ // conditional logger -#define YQL_CLOG_PREP_IF(level, component, preprocessor, condition) YQL_LOG_IF_IMPL(\ - ::NYql::NLog::YqlLogger(), \ - ::NYql::NLog::EComponent::component, \ - ::NYql::NLog::ELevel::level, \ - preprocessor, \ - condition, \ +#define YQL_CLOG_PREP_IF(level, component, preprocessor, condition) YQL_LOG_IF_IMPL( \ + ::NYql::NLog::YqlLogger(), \ + ::NYql::NLog::EComponent::component, \ + ::NYql::NLog::ELevel::level, \ + preprocessor, \ + condition, \ __FILE__, __LINE__) #define YQL_CLOG_IF(level, component, condition) \ @@ -84,12 +82,11 @@ #define YQL_LOG_IF(level, condition) \ YQL_LOG_PREP_IF(level, ::NYql::NLog::TContextPreprocessor, condition) - namespace NYql { namespace NProto { - class TLoggingConfig; -} // NProto +class TLoggingConfig; +} // namespace NProto namespace NLog { @@ -99,10 +96,10 @@ TString GetThreadId(); TString GetLocalTime(); -} +} // namespace NImpl using TComponentLevels = - std::array<ELevel, EComponentHelpers::ToInt(EComponent::MaxValue)>; + std::array<ELevel, EComponentHelpers::ToInt(EComponent::MaxValue)>; void WriteLocalTime(IOutputStream* out); @@ -181,7 +178,7 @@ void InitLogger(const TString& log, bool startAsDaemon = false); /** * @brief Initialize logger with backends described in config. -*/ + */ void InitLogger(const NProto::TLoggingConfig& loggingConfig, bool startAsDaemon = false); /** @@ -204,7 +201,9 @@ void ReopenLog(); class YqlLoggerScope { public: - YqlLoggerScope(const TString& log, bool startAsDaemon = false) { InitLogger(log, startAsDaemon); } + YqlLoggerScope(const TString& log, bool startAsDaemon = false) { + InitLogger(log, startAsDaemon); + } YqlLoggerScope(TAutoPtr<TLogBackend> backend, TFormatter formatter = LegacyFormat, bool isStrictFormatting = true) { InitLogger(backend, std::move(formatter), isStrictFormatting); @@ -214,7 +213,9 @@ public: InitLogger(out, std::move(formatter), isStrictFormatting); } - ~YqlLoggerScope() { CleanupLogger(); } + ~YqlLoggerScope() { + CleanupLogger(); + } }; } // namespace NLog diff --git a/yql/essentials/utils/log/log_component.h b/yql/essentials/utils/log/log_component.h index be1df138635..bd284cb9d81 100644 --- a/yql/essentials/utils/log/log_component.h +++ b/yql/essentials/utils/log/log_component.h @@ -3,7 +3,6 @@ #include <util/generic/strbuf.h> #include <util/generic/yexception.h> - namespace NYql { namespace NLog { @@ -20,7 +19,8 @@ enum class EComponent { ProviderKikimr, ProviderKqp, ProviderRtmr, - Performance, Perf = Performance, + Performance, + Perf = Performance, Net, ProviderStat, ProviderSolomon, @@ -50,77 +50,164 @@ struct EComponentHelpers { static constexpr EComponent FromInt(int component) { return (component >= ToInt(EComponent::Default) && component < ToInt(EComponent::MaxValue)) - ? static_cast<EComponent>(component) - : EComponent::Default; + ? static_cast<EComponent>(component) + : EComponent::Default; } static TStringBuf ToString(EComponent component) { switch (component) { - case EComponent::Default: return TStringBuf("default"); - case EComponent::Core: return TStringBuf("core"); - case EComponent::CoreEval: return TStringBuf("core eval"); - case EComponent::CorePeepHole: return TStringBuf("core peephole"); - case EComponent::CoreExecution: return TStringBuf("core exec"); - case EComponent::Sql: return TStringBuf("sql"); - case EComponent::ProviderCommon: return TStringBuf("common provider"); - case EComponent::ProviderConfig: return TStringBuf("CONFIG"); - case EComponent::ProviderResult: return TStringBuf("RESULT"); - case EComponent::ProviderYt: return TStringBuf("YT"); - case EComponent::ProviderKikimr: return TStringBuf("KIKIMR"); - case EComponent::ProviderKqp: return TStringBuf("KQP"); - case EComponent::ProviderRtmr: return TStringBuf("RTMR"); - case EComponent::Performance: return TStringBuf("perf"); - case EComponent::Net: return TStringBuf("net"); - case EComponent::ProviderStat: return TStringBuf("STATFACE"); - case EComponent::ProviderSolomon: return TStringBuf("SOLOMON"); - case EComponent::ProviderDq: return TStringBuf("DQ"); - case EComponent::ProviderClickHouse: return TStringBuf("CLICKHOUSE"); - case EComponent::ProviderYdb: return TStringBuf("YDB"); - case EComponent::ProviderPq: return TStringBuf("PQ"); - case EComponent::ProviderS3: return TStringBuf("S3"); - case EComponent::CoreDq: return TStringBuf("core dq"); - case EComponent::HttpGateway: return TStringBuf("http gw"); - case EComponent::ProviderGeneric: return TStringBuf("generic"); - case EComponent::ProviderPg: return TStringBuf("PG"); - case EComponent::ProviderPure: return TStringBuf("pure"); - case EComponent::FastMapReduce: return TStringBuf("FMR"); - case EComponent::ProviderYtflow: return TStringBuf("YTFLOW"); - default: - ythrow yexception() << "invalid log component value: " - << ToInt(component); + case EComponent::Default: + return TStringBuf("default"); + case EComponent::Core: + return TStringBuf("core"); + case EComponent::CoreEval: + return TStringBuf("core eval"); + case EComponent::CorePeepHole: + return TStringBuf("core peephole"); + case EComponent::CoreExecution: + return TStringBuf("core exec"); + case EComponent::Sql: + return TStringBuf("sql"); + case EComponent::ProviderCommon: + return TStringBuf("common provider"); + case EComponent::ProviderConfig: + return TStringBuf("CONFIG"); + case EComponent::ProviderResult: + return TStringBuf("RESULT"); + case EComponent::ProviderYt: + return TStringBuf("YT"); + case EComponent::ProviderKikimr: + return TStringBuf("KIKIMR"); + case EComponent::ProviderKqp: + return TStringBuf("KQP"); + case EComponent::ProviderRtmr: + return TStringBuf("RTMR"); + case EComponent::Performance: + return TStringBuf("perf"); + case EComponent::Net: + return TStringBuf("net"); + case EComponent::ProviderStat: + return TStringBuf("STATFACE"); + case EComponent::ProviderSolomon: + return TStringBuf("SOLOMON"); + case EComponent::ProviderDq: + return TStringBuf("DQ"); + case EComponent::ProviderClickHouse: + return TStringBuf("CLICKHOUSE"); + case EComponent::ProviderYdb: + return TStringBuf("YDB"); + case EComponent::ProviderPq: + return TStringBuf("PQ"); + case EComponent::ProviderS3: + return TStringBuf("S3"); + case EComponent::CoreDq: + return TStringBuf("core dq"); + case EComponent::HttpGateway: + return TStringBuf("http gw"); + case EComponent::ProviderGeneric: + return TStringBuf("generic"); + case EComponent::ProviderPg: + return TStringBuf("PG"); + case EComponent::ProviderPure: + return TStringBuf("pure"); + case EComponent::FastMapReduce: + return TStringBuf("FMR"); + case EComponent::ProviderYtflow: + return TStringBuf("YTFLOW"); + default: + ythrow yexception() << "invalid log component value: " + << ToInt(component); } } static EComponent FromString(TStringBuf str) { - if (str == TStringBuf("default")) return EComponent::Default; - if (str == TStringBuf("core")) return EComponent::Core; - if (str == TStringBuf("core eval")) return EComponent::CoreEval; - if (str == TStringBuf("core peephole")) return EComponent::CorePeepHole; - if (str == TStringBuf("core exec")) return EComponent::CoreExecution; - if (str == TStringBuf("sql")) return EComponent::Sql; - if (str == TStringBuf("common provider")) return EComponent::ProviderCommon; - if (str == TStringBuf("CONFIG")) return EComponent::ProviderConfig; - if (str == TStringBuf("RESULT")) return EComponent::ProviderResult; - if (str == TStringBuf("YT")) return EComponent::ProviderYt; - if (str == TStringBuf("KIKIMR")) return EComponent::ProviderKikimr; - if (str == TStringBuf("KQP")) return EComponent::ProviderKqp; - if (str == TStringBuf("RTMR")) return EComponent::ProviderRtmr; - if (str == TStringBuf("perf")) return EComponent::Performance; - if (str == TStringBuf("net")) return EComponent::Net; - if (str == TStringBuf("STATFACE")) return EComponent::ProviderStat; - if (str == TStringBuf("SOLOMON")) return EComponent::ProviderSolomon; - if (str == TStringBuf("DQ")) return EComponent::ProviderDq; - if (str == TStringBuf("CLICKHOUSE")) return EComponent::ProviderClickHouse; - if (str == TStringBuf("YDB")) return EComponent::ProviderYdb; - if (str == TStringBuf("PQ")) return EComponent::ProviderPq; - if (str == TStringBuf("S3")) return EComponent::ProviderS3; - if (str == TStringBuf("core dq")) return EComponent::CoreDq; - if (str == TStringBuf("http gw")) return EComponent::HttpGateway; - if (str == TStringBuf("generic")) return EComponent::ProviderGeneric; - if (str == TStringBuf("PG")) return EComponent::ProviderPg; - if (str == TStringBuf("pure")) return EComponent::ProviderPure; - if (str == TStringBuf("FMR")) return EComponent::FastMapReduce; - if (str == TStringBuf("YTFLOW")) return EComponent::ProviderYtflow; + if (str == TStringBuf("default")) { + return EComponent::Default; + } + if (str == TStringBuf("core")) { + return EComponent::Core; + } + if (str == TStringBuf("core eval")) { + return EComponent::CoreEval; + } + if (str == TStringBuf("core peephole")) { + return EComponent::CorePeepHole; + } + if (str == TStringBuf("core exec")) { + return EComponent::CoreExecution; + } + if (str == TStringBuf("sql")) { + return EComponent::Sql; + } + if (str == TStringBuf("common provider")) { + return EComponent::ProviderCommon; + } + if (str == TStringBuf("CONFIG")) { + return EComponent::ProviderConfig; + } + if (str == TStringBuf("RESULT")) { + return EComponent::ProviderResult; + } + if (str == TStringBuf("YT")) { + return EComponent::ProviderYt; + } + if (str == TStringBuf("KIKIMR")) { + return EComponent::ProviderKikimr; + } + if (str == TStringBuf("KQP")) { + return EComponent::ProviderKqp; + } + if (str == TStringBuf("RTMR")) { + return EComponent::ProviderRtmr; + } + if (str == TStringBuf("perf")) { + return EComponent::Performance; + } + if (str == TStringBuf("net")) { + return EComponent::Net; + } + if (str == TStringBuf("STATFACE")) { + return EComponent::ProviderStat; + } + if (str == TStringBuf("SOLOMON")) { + return EComponent::ProviderSolomon; + } + if (str == TStringBuf("DQ")) { + return EComponent::ProviderDq; + } + if (str == TStringBuf("CLICKHOUSE")) { + return EComponent::ProviderClickHouse; + } + if (str == TStringBuf("YDB")) { + return EComponent::ProviderYdb; + } + if (str == TStringBuf("PQ")) { + return EComponent::ProviderPq; + } + if (str == TStringBuf("S3")) { + return EComponent::ProviderS3; + } + if (str == TStringBuf("core dq")) { + return EComponent::CoreDq; + } + if (str == TStringBuf("http gw")) { + return EComponent::HttpGateway; + } + if (str == TStringBuf("generic")) { + return EComponent::ProviderGeneric; + } + if (str == TStringBuf("PG")) { + return EComponent::ProviderPg; + } + if (str == TStringBuf("pure")) { + return EComponent::ProviderPure; + } + if (str == TStringBuf("FMR")) { + return EComponent::FastMapReduce; + } + if (str == TStringBuf("YTFLOW")) { + return EComponent::ProviderYtflow; + } ythrow yexception() << "unknown log component: '" << str << '\''; } diff --git a/yql/essentials/utils/log/log_level.h b/yql/essentials/utils/log/log_level.h index ccb12e4690b..5471e411d52 100644 --- a/yql/essentials/utils/log/log_level.h +++ b/yql/essentials/utils/log/log_level.h @@ -5,7 +5,6 @@ #include <util/generic/strbuf.h> #include <util/generic/yexception.h> - namespace NYql { namespace NLog { @@ -38,47 +37,74 @@ struct ELevelHelpers { static ELevel FromInt(int level) { switch (level) { - case TLOG_EMERG: - case TLOG_ALERT: - case TLOG_CRIT: return ELevel::FATAL; + case TLOG_EMERG: + case TLOG_ALERT: + case TLOG_CRIT: + return ELevel::FATAL; - case TLOG_ERR: return ELevel::ERROR; - case TLOG_WARNING: return ELevel::WARN; + case TLOG_ERR: + return ELevel::ERROR; + case TLOG_WARNING: + return ELevel::WARN; - case TLOG_NOTICE: - case TLOG_INFO: return ELevel::INFO; + case TLOG_NOTICE: + case TLOG_INFO: + return ELevel::INFO; - case TLOG_DEBUG: return ELevel::DEBUG; - case TLOG_RESOURCES: return ELevel::TRACE; + case TLOG_DEBUG: + return ELevel::DEBUG; + case TLOG_RESOURCES: + return ELevel::TRACE; - default: - return ELevel::INFO; + default: + return ELevel::INFO; } } static TStringBuf ToString(ELevel level) { // aligned 5-letters string switch (level) { - case ELevel::FATAL: return TStringBuf("FATAL"); - case ELevel::ERROR: return TStringBuf("ERROR"); - case ELevel::WARN: return TStringBuf("WARN "); - case ELevel::NOTICE:return TStringBuf("NOTE "); - case ELevel::INFO: return TStringBuf("INFO "); - case ELevel::DEBUG: return TStringBuf("DEBUG"); - case ELevel::TRACE: return TStringBuf("TRACE"); + case ELevel::FATAL: + return TStringBuf("FATAL"); + case ELevel::ERROR: + return TStringBuf("ERROR"); + case ELevel::WARN: + return TStringBuf("WARN "); + case ELevel::NOTICE: + return TStringBuf("NOTE "); + case ELevel::INFO: + return TStringBuf("INFO "); + case ELevel::DEBUG: + return TStringBuf("DEBUG"); + case ELevel::TRACE: + return TStringBuf("TRACE"); } ythrow yexception() << "unknown log level: " << ToInt(level); } static ELevel FromString(TStringBuf str) { // aligned 5-letters string - if (str == TStringBuf("FATAL")) return ELevel::FATAL; - if (str == TStringBuf("ERROR")) return ELevel::ERROR; - if (str == TStringBuf("WARN ")) return ELevel::WARN; - if (str == TStringBuf("NOTE ")) return ELevel::NOTICE; - if (str == TStringBuf("INFO ")) return ELevel::INFO; - if (str == TStringBuf("DEBUG")) return ELevel::DEBUG; - if (str == TStringBuf("TRACE")) return ELevel::TRACE; + if (str == TStringBuf("FATAL")) { + return ELevel::FATAL; + } + if (str == TStringBuf("ERROR")) { + return ELevel::ERROR; + } + if (str == TStringBuf("WARN ")) { + return ELevel::WARN; + } + if (str == TStringBuf("NOTE ")) { + return ELevel::NOTICE; + } + if (str == TStringBuf("INFO ")) { + return ELevel::INFO; + } + if (str == TStringBuf("DEBUG")) { + return ELevel::DEBUG; + } + if (str == TStringBuf("TRACE")) { + return ELevel::TRACE; + } ythrow yexception() << "unknown log level: " << str; } @@ -93,6 +119,5 @@ struct ELevelHelpers { } }; - -} // namspace NLog -} // namspace NYql +} // namespace NLog +} // namespace NYql diff --git a/yql/essentials/utils/log/log_ut.cpp b/yql/essentials/utils/log/log_ut.cpp index e2ad8b24824..32cb5c9c02e 100644 --- a/yql/essentials/utils/log/log_ut.cpp +++ b/yql/essentials/utils/log/log_ut.cpp @@ -18,710 +18,708 @@ #include <regex> - using namespace NYql; using namespace NLog; Y_UNIT_TEST_SUITE(TLogTest) { - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(WrittingWithoutMacro) { - TStringStream out; - YqlLoggerScope logger(&out, Format, /* isStrict */ false); +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(WrittingWithoutMacro) { + TStringStream out; + YqlLoggerScope logger(&out, Format, /* isStrict */ false); - TString message = "some performance info"; - YqlLogger().Write(ELogPriority::TLOG_INFO, message); + TString message = "some performance info"; + YqlLogger().Write(ELogPriority::TLOG_INFO, message); - TString logRow = out.Str(); - UNIT_ASSERT_STRING_CONTAINS(logRow, message); - } + TString logRow = out.Str(); + UNIT_ASSERT_STRING_CONTAINS(logRow, message); +} - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(WrittingUnknownMetaFlag) { - TStringStream out; - YqlLoggerScope logger(&out, Format, /* isStrict */ false); +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(WrittingUnknownMetaFlag) { + TStringStream out; + YqlLoggerScope logger(&out, Format, /* isStrict */ false); - TString message = "some performance info"; - YqlLogger().Write(ELogPriority::TLOG_INFO, message, {{"unknown", "value"}}); + TString message = "some performance info"; + YqlLogger().Write(ELogPriority::TLOG_INFO, message, {{"unknown", "value"}}); - TString logRow = out.Str(); - UNIT_ASSERT_STRING_CONTAINS(logRow, message); - UNIT_ASSERT_STRING_CONTAINS(logRow, "unknown = value"); - } + TString logRow = out.Str(); + UNIT_ASSERT_STRING_CONTAINS(logRow, message); + UNIT_ASSERT_STRING_CONTAINS(logRow, "unknown = value"); +} - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Formatting) { - TStringStream out; - YqlLoggerScope logger(&out, Format); - YqlLogger().UpdateProcInfo("my_proc"); +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Formatting) { + TStringStream out; + YqlLoggerScope logger(&out, Format); + YqlLogger().UpdateProcInfo("my_proc"); - TString message = "some performance info"; - YQL_LOG(INFO) << message; + TString message = "some performance info"; + YQL_LOG(INFO) << message; - TLogRow logRow = ParseLogRow(out.Str()); + TLogRow logRow = ParseLogRow(out.Str()); - TDuration elapsed(logRow.Time - TInstant::Now()); - UNIT_ASSERT(elapsed < TDuration::MilliSeconds(5)); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_STRINGS_EQUAL(logRow.ProcName, "my_proc"); - UNIT_ASSERT_EQUAL(logRow.ProcId, GetPID()); - UNIT_ASSERT(logRow.ThreadId > 0); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL( - logRow.FileName, - TStringBuf(__FILE__).RNextTok(LOCSLASH_C)); - UNIT_ASSERT(logRow.LineNumber != 0); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, message); - } + TDuration elapsed(logRow.Time - TInstant::Now()); + UNIT_ASSERT(elapsed < TDuration::MilliSeconds(5)); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_STRINGS_EQUAL(logRow.ProcName, "my_proc"); + UNIT_ASSERT_EQUAL(logRow.ProcId, GetPID()); + UNIT_ASSERT(logRow.ThreadId > 0); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL( + logRow.FileName, + TStringBuf(__FILE__).RNextTok(LOCSLASH_C)); + UNIT_ASSERT(logRow.LineNumber != 0); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, message); +} - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(SpecialCharacters) { - TStringStream out; - YqlLoggerScope logger(&out, Format); - YqlLogger().UpdateProcInfo("\\evil\\"); +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(SpecialCharacters) { + TStringStream out; + YqlLoggerScope logger(&out, Format); + YqlLogger().UpdateProcInfo("\\evil\\"); - YQL_LOG(INFO) << "My name is \"YQL\"!"; + YQL_LOG(INFO) << "My name is \"YQL\"!"; - TLogRow logRow = ParseLogRow(out.Str()); - UNIT_ASSERT_STRINGS_EQUAL(logRow.ProcName, "\\evil\\"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "My name is \"YQL\"!"); - } + TLogRow logRow = ParseLogRow(out.Str()); + UNIT_ASSERT_STRINGS_EQUAL(logRow.ProcName, "\\evil\\"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "My name is \"YQL\"!"); +} - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Levels) { - TStringStream out; - YqlLoggerScope logger(&out, Format); // default log level INFO +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Levels) { + TStringStream out; + YqlLoggerScope logger(&out, Format); // default log level INFO - YQL_LOG(FATAL) << "fatal message"; - YQL_LOG(ERROR) << "error message"; - YQL_LOG(WARN) << "warning message"; - YQL_LOG(INFO) << "info message"; - YQL_LOG(DEBUG) << "debug message"; - YQL_LOG(TRACE) << "trace message"; + YQL_LOG(FATAL) << "fatal message"; + YQL_LOG(ERROR) << "error message"; + YQL_LOG(WARN) << "warning message"; + YQL_LOG(INFO) << "info message"; + YQL_LOG(DEBUG) << "debug message"; + YQL_LOG(TRACE) << "trace message"; - TString fatalStr, errorStr, warnStr, infoStr, _; - Split(out.Str(), '\n', fatalStr, errorStr, warnStr, infoStr, _); + TString fatalStr, errorStr, warnStr, infoStr, _; + Split(out.Str(), '\n', fatalStr, errorStr, warnStr, infoStr, _); - { - TLogRow logRow = ParseLogRow(fatalStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::FATAL); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "fatal message"); - } - { - TLogRow logRow = ParseLogRow(errorStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::ERROR); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "error message"); - } - { - TLogRow logRow = ParseLogRow(warnStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "warning message"); - } - { - TLogRow logRow = ParseLogRow(infoStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "info message"); - } + { + TLogRow logRow = ParseLogRow(fatalStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::FATAL); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "fatal message"); + } + { + TLogRow logRow = ParseLogRow(errorStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::ERROR); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "error message"); } + { + TLogRow logRow = ParseLogRow(warnStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "warning message"); + } + { + TLogRow logRow = ParseLogRow(infoStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "info message"); + } +} - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Components) { - TStringStream out; - YqlLoggerScope logger(&out, Format); +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Components) { + TStringStream out; + YqlLoggerScope logger(&out, Format); - YQL_CLOG(INFO, Default) << "default message"; - YQL_CLOG(INFO, Core) << "core message"; - YQL_CLOG(INFO, Sql) << "sql message"; - YQL_CLOG(INFO, ProviderCommon) << "common message"; - YQL_CLOG(INFO, ProviderYt) << "yt message"; - YQL_CLOG(INFO, ProviderKikimr) << "kikimr message"; - YQL_CLOG(INFO, ProviderRtmr) << "rtmr message"; - YQL_CLOG(INFO, Performance) << "performance message"; - YQL_CLOG(INFO, Perf) << "perf message"; + YQL_CLOG(INFO, Default) << "default message"; + YQL_CLOG(INFO, Core) << "core message"; + YQL_CLOG(INFO, Sql) << "sql message"; + YQL_CLOG(INFO, ProviderCommon) << "common message"; + YQL_CLOG(INFO, ProviderYt) << "yt message"; + YQL_CLOG(INFO, ProviderKikimr) << "kikimr message"; + YQL_CLOG(INFO, ProviderRtmr) << "rtmr message"; + YQL_CLOG(INFO, Performance) << "performance message"; + YQL_CLOG(INFO, Perf) << "perf message"; - TString defaultStr, coreStr, sqlStr, commonStr, ytStr, - kikimrStr, rtmrStr, performanceStr, perfStr, _; - Split(out.Str(), '\n', defaultStr, coreStr, sqlStr, - commonStr, ytStr, - kikimrStr, rtmrStr, - performanceStr, perfStr, _); + TString defaultStr, coreStr, sqlStr, commonStr, ytStr, + kikimrStr, rtmrStr, performanceStr, perfStr, _; + Split(out.Str(), '\n', defaultStr, coreStr, sqlStr, + commonStr, ytStr, + kikimrStr, rtmrStr, + performanceStr, perfStr, _); - { - TLogRow logRow = ParseLogRow(defaultStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "default message"); - } - { - TLogRow logRow = ParseLogRow(coreStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Core); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "core message"); - } - { - TLogRow logRow = ParseLogRow(sqlStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Sql); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "sql message"); - } - { - TLogRow logRow = ParseLogRow(commonStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::ProviderCommon); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "common message"); - } - { - TLogRow logRow = ParseLogRow(ytStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::ProviderYt); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "yt message"); - } - { - TLogRow logRow = ParseLogRow(kikimrStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::ProviderKikimr); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "kikimr message"); - } - { - TLogRow logRow = ParseLogRow(rtmrStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::ProviderRtmr); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "rtmr message"); - } - { - TLogRow logRow = ParseLogRow(performanceStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Performance); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "performance message"); - } - { - TLogRow logRow = ParseLogRow(perfStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Performance); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "perf message"); - } + { + TLogRow logRow = ParseLogRow(defaultStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "default message"); } + { + TLogRow logRow = ParseLogRow(coreStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Core); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "core message"); + } + { + TLogRow logRow = ParseLogRow(sqlStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Sql); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "sql message"); + } + { + TLogRow logRow = ParseLogRow(commonStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::ProviderCommon); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "common message"); + } + { + TLogRow logRow = ParseLogRow(ytStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::ProviderYt); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "yt message"); + } + { + TLogRow logRow = ParseLogRow(kikimrStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::ProviderKikimr); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "kikimr message"); + } + { + TLogRow logRow = ParseLogRow(rtmrStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::ProviderRtmr); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "rtmr message"); + } + { + TLogRow logRow = ParseLogRow(performanceStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Performance); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "performance message"); + } + { + TLogRow logRow = ParseLogRow(perfStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Performance); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "perf message"); + } +} - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Conditional) { - TStringStream out; - YqlLoggerScope logger(&out, Format); +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Conditional) { + TStringStream out; + YqlLoggerScope logger(&out, Format); - YQL_LOG_IF(INFO, true) << "default info message"; - YQL_LOG_IF(INFO, false) << "must not be logged"; + YQL_LOG_IF(INFO, true) << "default info message"; + YQL_LOG_IF(INFO, false) << "must not be logged"; - YQL_CLOG_IF(INFO, Perf, true) << "perf info message"; - YQL_CLOG_IF(INFO, Perf, false) << "perf info message"; + YQL_CLOG_IF(INFO, Perf, true) << "perf info message"; + YQL_CLOG_IF(INFO, Perf, false) << "perf info message"; - TString defaultStr, perfStr, _; - Split(out.Str(), '\n', defaultStr, perfStr, _); + TString defaultStr, perfStr, _; + Split(out.Str(), '\n', defaultStr, perfStr, _); - { - TLogRow logRow = ParseLogRow(defaultStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "default info message"); - } - { - TLogRow logRow = ParseLogRow(perfStr); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "perf info message"); - } + { + TLogRow logRow = ParseLogRow(defaultStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "default info message"); } + { + TLogRow logRow = ParseLogRow(perfStr); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "perf info message"); + } +} - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Contexts) { - TStringStream out; - YqlLoggerScope logger(&out, Format); - - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, ""); - YQL_LOG(INFO) << "level0 - begin"; - { - YQL_LOG_CTX_SCOPE("ctx1"); - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx1"); - YQL_LOG(INFO) << "level1 - begin"; +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Contexts) { + TStringStream out; + YqlLoggerScope logger(&out, Format); - YQL_LOG_CTX_BLOCK(TStringBuf("ctx2")) { - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx1/ctx2"); - YQL_LOG(WARN) << "level2"; - } + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, ""); + YQL_LOG(INFO) << "level0 - begin"; + { + YQL_LOG_CTX_SCOPE("ctx1"); + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx1"); + YQL_LOG(INFO) << "level1 - begin"; - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx1"); - YQL_LOG(INFO) << "level1 - end"; + YQL_LOG_CTX_BLOCK(TStringBuf("ctx2")) { + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx1/ctx2"); + YQL_LOG(WARN) << "level2"; } - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, ""); - YQL_LOG(INFO) << "level0 - end"; - TString row1Str, row2Str, row3Str, row4Str, row5Str, _; - Split(out.Str(), '\n', row1Str, row2Str, row3Str, row4Str, row5Str, _); + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx1"); + YQL_LOG(INFO) << "level1 - end"; + } + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, ""); + YQL_LOG(INFO) << "level0 - end"; - { - TLogRow logRow = ParseLogRow(row1Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level0 - begin"); - } - { - TLogRow logRow = ParseLogRow(row2Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx1"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - begin"); - } - { - TLogRow logRow = ParseLogRow(row3Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx1/ctx2"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level2"); - } - { - TLogRow logRow = ParseLogRow(row4Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx1"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - end"); - } - { - TLogRow logRow = ParseLogRow(row5Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level0 - end"); - } + TString row1Str, row2Str, row3Str, row4Str, row5Str, _; + Split(out.Str(), '\n', row1Str, row2Str, row3Str, row4Str, row5Str, _); + + { + TLogRow logRow = ParseLogRow(row1Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level0 - begin"); + } + { + TLogRow logRow = ParseLogRow(row2Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx1"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - begin"); + } + { + TLogRow logRow = ParseLogRow(row3Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx1/ctx2"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level2"); } + { + TLogRow logRow = ParseLogRow(row4Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx1"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - end"); + } + { + TLogRow logRow = ParseLogRow(row5Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level0 - end"); + } +} - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(UnknownSessionContexts) { - TStringStream out; - YqlLoggerScope logger(&out, Format); +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(UnknownSessionContexts) { + TStringStream out; + YqlLoggerScope logger(&out, Format); + + { + YQL_LOG_CTX_ROOT_SCOPE("ctx"); + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().first, ""); + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx"); + YQL_LOG(INFO) << "level0 - begin"; { - YQL_LOG_CTX_ROOT_SCOPE("ctx"); + YQL_LOG_CTX_ROOT_SESSION_SCOPE(CurrentLogContextPath()); UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().first, ""); UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx"); - YQL_LOG(INFO) << "level0 - begin"; - { - YQL_LOG_CTX_ROOT_SESSION_SCOPE(CurrentLogContextPath()); + YQL_LOG(INFO) << "level1 - begin"; + YQL_LOG_CTX_BLOCK("ctx1") { UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().first, ""); - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx"); - - YQL_LOG(INFO) << "level1 - begin"; - YQL_LOG_CTX_BLOCK("ctx1") { - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().first, ""); - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx/ctx1"); - - YQL_LOG(WARN) << "level2"; - } + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx/ctx1"); - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().first, ""); - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx"); - YQL_LOG(INFO) << "level1 - end"; + YQL_LOG(WARN) << "level2"; } + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().first, ""); UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx"); - YQL_LOG(INFO) << "level0 - end"; + YQL_LOG(INFO) << "level1 - end"; } + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().first, ""); + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx"); + YQL_LOG(INFO) << "level0 - end"; + } - TString row1Str, row2Str, row3Str, row4Str, row5Str, _; - Split(out.Str(), '\n', row1Str, row2Str, row3Str, row4Str, row5Str, _); - { - TLogRow logRow = ParseLogRow(row1Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level0 - begin"); - } - { - TLogRow logRow = ParseLogRow(row2Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - begin"); - } - { - TLogRow logRow = ParseLogRow(row3Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx/ctx1"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level2"); - } - { - TLogRow logRow = ParseLogRow(row4Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - end"); - } - { - TLogRow logRow = ParseLogRow(row5Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level0 - end"); - } + TString row1Str, row2Str, row3Str, row4Str, row5Str, _; + Split(out.Str(), '\n', row1Str, row2Str, row3Str, row4Str, row5Str, _); + { + TLogRow logRow = ParseLogRow(row1Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level0 - begin"); + } + { + TLogRow logRow = ParseLogRow(row2Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - begin"); + } + { + TLogRow logRow = ParseLogRow(row3Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx/ctx1"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level2"); } + { + TLogRow logRow = ParseLogRow(row4Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - end"); + } + { + TLogRow logRow = ParseLogRow(row5Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level0 - end"); + } +} - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(SessionContexts) { - TStringStream out; - YqlLoggerScope logger(&out, Format); +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(SessionContexts) { + TStringStream out; + YqlLoggerScope logger(&out, Format); + + { + YQL_LOG_CTX_ROOT_SESSION_SCOPE("sessionId", "ctx"); + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().first, "sessionId"); + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx"); + YQL_LOG(INFO) << "level0 - begin"; { - YQL_LOG_CTX_ROOT_SESSION_SCOPE("sessionId", "ctx"); + YQL_LOG_CTX_ROOT_SESSION_SCOPE(CurrentLogContextPath()); UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().first, "sessionId"); UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx"); - YQL_LOG(INFO) << "level0 - begin"; - { - YQL_LOG_CTX_ROOT_SESSION_SCOPE(CurrentLogContextPath()); + YQL_LOG(INFO) << "level1 - begin"; + YQL_LOG_CTX_BLOCK("ctx1") { UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().first, "sessionId"); - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx"); - - YQL_LOG(INFO) << "level1 - begin"; - YQL_LOG_CTX_BLOCK("ctx1") { - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().first, "sessionId"); - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx/ctx1"); - - YQL_LOG(WARN) << "level2"; - } + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx/ctx1"); - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().first, "sessionId"); - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx"); - YQL_LOG(INFO) << "level1 - end"; + YQL_LOG(WARN) << "level2"; } + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().first, "sessionId"); UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx"); - YQL_LOG(INFO) << "level0 - end"; - } - - TString row1Str, row2Str, row3Str, row4Str, row5Str, _; - Split(out.Str(), '\n', row1Str, row2Str, row3Str, row4Str, row5Str, _); - { - TLogRow logRow = ParseLogRow(row1Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "sessionId/ctx"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level0 - begin"); - } - { - TLogRow logRow = ParseLogRow(row2Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "sessionId/ctx"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - begin"); - } - { - TLogRow logRow = ParseLogRow(row3Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "sessionId/ctx/ctx1"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level2"); - } - { - TLogRow logRow = ParseLogRow(row4Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "sessionId/ctx"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - end"); - } - { - TLogRow logRow = ParseLogRow(row5Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "sessionId/ctx"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level0 - end"); + YQL_LOG(INFO) << "level1 - end"; } + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().first, "sessionId"); + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx"); + YQL_LOG(INFO) << "level0 - end"; } - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(ThrowWithContext) { - bool isThrown = false; - YQL_LOG_CTX_SCOPE("first"); - try { - YQL_LOG_CTX_SCOPE("second"); - YQL_LOG_CTX_THROW yexception() << "some message"; - } catch (const yexception& e) { - isThrown = true; + TString row1Str, row2Str, row3Str, row4Str, row5Str, _; + Split(out.Str(), '\n', row1Str, row2Str, row3Str, row4Str, row5Str, _); + { + TLogRow logRow = ParseLogRow(row1Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "sessionId/ctx"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level0 - begin"); + } + { + TLogRow logRow = ParseLogRow(row2Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "sessionId/ctx"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - begin"); + } + { + TLogRow logRow = ParseLogRow(row3Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "sessionId/ctx/ctx1"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level2"); + } + { + TLogRow logRow = ParseLogRow(row4Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "sessionId/ctx"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - end"); + } + { + TLogRow logRow = ParseLogRow(row5Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "sessionId/ctx"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level0 - end"); + } +} - UNIT_ASSERT_STRINGS_EQUAL(e.AsStrBuf(), "some message"); +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(ThrowWithContext) { + bool isThrown = false; + YQL_LOG_CTX_SCOPE("first"); + try { + YQL_LOG_CTX_SCOPE("second"); + YQL_LOG_CTX_THROW yexception() << "some message"; + } catch (const yexception& e) { + isThrown = true; - TString throwedLogCtx = ThrowedLogContextPath(); - TStringBuf file, line, context; - TStringBuf(throwedLogCtx).Split(".cpp:", file, line); - line.Split(':', line, context); + UNIT_ASSERT_STRINGS_EQUAL(e.AsStrBuf(), "some message"); - TString expectedFile(__LOCATION__.File); - SubstGlobal(expectedFile, LOCSLASH_C, '/'); - UNIT_ASSERT_STRINGS_EQUAL(TString(file)+".cpp", expectedFile); - int lineNumber; - UNIT_ASSERT(TryFromString<int>(line, lineNumber)); - UNIT_ASSERT(lineNumber > 0); - UNIT_ASSERT_STRINGS_EQUAL(context, " {first/second} "); + TString throwedLogCtx = ThrowedLogContextPath(); + TStringBuf file, line, context; + TStringBuf(throwedLogCtx).Split(".cpp:", file, line); + line.Split(':', line, context); - // second call without throw returns empty string - throwedLogCtx = ThrowedLogContextPath(); - UNIT_ASSERT(throwedLogCtx.empty()); - } + TString expectedFile(__LOCATION__.File); + SubstGlobal(expectedFile, LOCSLASH_C, '/'); + UNIT_ASSERT_STRINGS_EQUAL(TString(file) + ".cpp", expectedFile); + int lineNumber; + UNIT_ASSERT(TryFromString<int>(line, lineNumber)); + UNIT_ASSERT(lineNumber > 0); + UNIT_ASSERT_STRINGS_EQUAL(context, " {first/second} "); - UNIT_ASSERT_C(isThrown, "exception was not thrown"); + // second call without throw returns empty string + throwedLogCtx = ThrowedLogContextPath(); + UNIT_ASSERT(throwedLogCtx.empty()); } - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(ContextOverride) { - TStringStream out; - YqlLoggerScope logger(&out, Format); + UNIT_ASSERT_C(isThrown, "exception was not thrown"); +} - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, ""); - { - YQL_LOG_CTX_SCOPE("ctx1"); - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx1"); - YQL_LOG(INFO) << "level1 - begin"; +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(ContextOverride) { + TStringStream out; + YqlLoggerScope logger(&out, Format); - YQL_LOG_CTX_BLOCK(TStringBuf("ctx2")) { - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx1/ctx2"); - YQL_LOG(WARN) << "level2 - begin"; + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, ""); + { + YQL_LOG_CTX_SCOPE("ctx1"); + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx1"); + YQL_LOG(INFO) << "level1 - begin"; - { - YQL_LOG_CTX_ROOT_SCOPE("ctx3"); - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx3"); - YQL_LOG(ERROR) << "level3"; - } + YQL_LOG_CTX_BLOCK(TStringBuf("ctx2")) { + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx1/ctx2"); + YQL_LOG(WARN) << "level2 - begin"; - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx1/ctx2"); - YQL_LOG(WARN) << "level2 - end"; + { + YQL_LOG_CTX_ROOT_SCOPE("ctx3"); + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx3"); + YQL_LOG(ERROR) << "level3"; } - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx1"); - YQL_LOG(INFO) << "level1 - end"; + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx1/ctx2"); + YQL_LOG(WARN) << "level2 - end"; } - UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, ""); - - TString row1Str, row2Str, row3Str, row4Str, row5Str, _; - Split(out.Str(), '\n', row1Str, row2Str, row3Str, row4Str, row5Str, _); - { - TLogRow logRow = ParseLogRow(row1Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx1"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - begin"); - } - { - TLogRow logRow = ParseLogRow(row2Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx1/ctx2"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level2 - begin"); - } - { - TLogRow logRow = ParseLogRow(row3Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::ERROR); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx3"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level3"); - } - { - TLogRow logRow = ParseLogRow(row4Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx1/ctx2"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level2 - end"); - } - { - TLogRow logRow = ParseLogRow(row5Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx1"); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - end"); - } + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, "ctx1"); + YQL_LOG(INFO) << "level1 - end"; } + UNIT_ASSERT_STRINGS_EQUAL(CurrentLogContextPath().second, ""); - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Profiling) { - TStringStream out; - YqlLoggerScope logger(&out, Format); + TString row1Str, row2Str, row3Str, row4Str, row5Str, _; + Split(out.Str(), '\n', row1Str, row2Str, row3Str, row4Str, row5Str, _); - { - YQL_PROFILE_SCOPE(INFO, "scope1"); - } + { + TLogRow logRow = ParseLogRow(row1Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx1"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - begin"); + } + { + TLogRow logRow = ParseLogRow(row2Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx1/ctx2"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level2 - begin"); + } + { + TLogRow logRow = ParseLogRow(row3Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::ERROR); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx3"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level3"); + } + { + TLogRow logRow = ParseLogRow(row4Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx1/ctx2"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level2 - end"); + } + { + TLogRow logRow = ParseLogRow(row5Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Path, "ctx1"); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "level1 - end"); + } +} - YQL_PROFILE_BLOCK(WARN, "block1") { - Sleep(TDuration::MilliSeconds(2)); - } +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Profiling) { + TStringStream out; + YqlLoggerScope logger(&out, Format); - YQL_PROFILE_BLOCK(ERROR, "block2") { - Sleep(TDuration::MilliSeconds(1200)); - } + { + YQL_PROFILE_SCOPE(INFO, "scope1"); + } - bool isExecuted = false; - YQL_PROFILE_BLOCK(TRACE, "block3") { // log will be filtered out - isExecuted = true; - } - UNIT_ASSERT(isExecuted); + YQL_PROFILE_BLOCK(WARN, "block1") { + Sleep(TDuration::MilliSeconds(2)); + } - TString row1Str, row2Str, row3Str, _; - Split(out.Str(), '\n', row1Str, row2Str, row3Str, _); + YQL_PROFILE_BLOCK(ERROR, "block2") { + Sleep(TDuration::MilliSeconds(1200)); + } - { - TLogRow logRow = ParseLogRow(row1Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Performance); - std::regex re("Execution of \\[scope1\\] took [0-9\\.]+us"); - bool isMatch = std::regex_match(logRow.Message.c_str(), re); - UNIT_ASSERT_C(isMatch, "Unexpected message: " << logRow.Message); - } - { - TLogRow logRow = ParseLogRow(row2Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Performance); - std::regex re("Execution of \\[block1\\] took [0-9\\.]+ms"); - bool isMatch = std::regex_match(logRow.Message.c_str(), re); - UNIT_ASSERT_C(isMatch, "Unexpected message: " << logRow.Message); - } - { - TLogRow logRow = ParseLogRow(row3Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::ERROR); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Performance); - std::regex re("Execution of \\[block2\\] took [0-9\\.]+s"); - bool isMatch = std::regex_match(logRow.Message.c_str(), re); - UNIT_ASSERT_C(isMatch, "Unexpected message: " << logRow.Message); - } + bool isExecuted = false; + YQL_PROFILE_BLOCK(TRACE, "block3") { // log will be filtered out + isExecuted = true; } + UNIT_ASSERT(isExecuted); + TString row1Str, row2Str, row3Str, _; + Split(out.Str(), '\n', row1Str, row2Str, row3Str, _); - int Func1(int a, char b) { - YQL_PROFILE_FUNC(INFO); - return a + b; + { + TLogRow logRow = ParseLogRow(row1Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Performance); + std::regex re("Execution of \\[scope1\\] took [0-9\\.]+us"); + bool isMatch = std::regex_match(logRow.Message.c_str(), re); + UNIT_ASSERT_C(isMatch, "Unexpected message: " << logRow.Message); } - - int Func2(int a, char b) { - YQL_PROFILE_FUNCSIG(WARN); - return a + b; + { + TLogRow logRow = ParseLogRow(row2Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Performance); + std::regex re("Execution of \\[block1\\] took [0-9\\.]+ms"); + bool isMatch = std::regex_match(logRow.Message.c_str(), re); + UNIT_ASSERT_C(isMatch, "Unexpected message: " << logRow.Message); + } + { + TLogRow logRow = ParseLogRow(row3Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::ERROR); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Performance); + std::regex re("Execution of \\[block2\\] took [0-9\\.]+s"); + bool isMatch = std::regex_match(logRow.Message.c_str(), re); + UNIT_ASSERT_C(isMatch, "Unexpected message: " << logRow.Message); } +} - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(ProfilingFuncs) { - TStringStream out; - YqlLoggerScope logger(&out, Format); +int Func1(int a, char b) { + YQL_PROFILE_FUNC(INFO); + return a + b; +} - Func1(1, 2); - Func2(1, 2); +int Func2(int a, char b) { + YQL_PROFILE_FUNCSIG(WARN); + return a + b; +} - TString row1Str, row2Str, _; - Split(out.Str(), '\n', row1Str, row2Str, _); +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(ProfilingFuncs) { + TStringStream out; + YqlLoggerScope logger(&out, Format); - { - TLogRow logRow = ParseLogRow(row1Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Performance); + Func1(1, 2); + Func2(1, 2); + + TString row1Str, row2Str, _; + Split(out.Str(), '\n', row1Str, row2Str, _); + + { + TLogRow logRow = ParseLogRow(row1Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Performance); #ifdef _win_ - std::regex re("Execution of \\[[NTestSuiteTLogTest::Func1\\] took [0-9\\.]+us"); + std::regex re("Execution of \\[[NTestSuiteTLogTest::Func1\\] took [0-9\\.]+us"); #else - std::regex re("Execution of \\[Func1\\] took [0-9\\.]+us"); + std::regex re("Execution of \\[Func1\\] took [0-9\\.]+us"); #endif - bool isMatch = std::regex_match(logRow.Message.c_str(), re); - UNIT_ASSERT_C(isMatch, "Unexpected message: " << logRow.Message); - } - { - TLogRow logRow = ParseLogRow(row2Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Performance); + bool isMatch = std::regex_match(logRow.Message.c_str(), re); + UNIT_ASSERT_C(isMatch, "Unexpected message: " << logRow.Message); + } + { + TLogRow logRow = ParseLogRow(row2Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Perf); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Performance); #ifdef _win_ - std::regex re("Execution of \\[int __cdecl NTestSuiteTLogTest::Func2\\(int, char\\)\\] took [0-9\\.]+us"); + std::regex re("Execution of \\[int __cdecl NTestSuiteTLogTest::Func2\\(int, char\\)\\] took [0-9\\.]+us"); #else - std::regex re("Execution of \\[int NTestSuiteTLogTest::Func2\\(int, char\\)\\] took [0-9\\.]+us"); + std::regex re("Execution of \\[int NTestSuiteTLogTest::Func2\\(int, char\\)\\] took [0-9\\.]+us"); #endif - bool isMatch = std::regex_match(logRow.Message.c_str(), re); - UNIT_ASSERT_C(isMatch, "Unexpected message: " << logRow.Message); - } + bool isMatch = std::regex_match(logRow.Message.c_str(), re); + UNIT_ASSERT_C(isMatch, "Unexpected message: " << logRow.Message); } +} - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Limit1) { - size_t limit = 0; - { - TStringStream out; - YqlLoggerScope logger(&out, Format); - YqlLogger().UpdateProcInfo("proc"); - YQL_CLOG(INFO, Core) << "message1"; - limit = out.Str().length() * 2 - 7; // Not more than 2 log lines - } - +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Limit1) { + size_t limit = 0; + { TStringStream out; YqlLoggerScope logger(&out, Format); YqlLogger().UpdateProcInfo("proc"); - YqlLogger().SetMaxLogLimit(limit); - YQL_CLOG(INFO, Core) << "message1"; - YQL_CLOG(INFO, Core) << "message2"; - YQL_CLOG(INFO, Core) << "message3"; + limit = out.Str().length() * 2 - 7; // Not more than 2 log lines + } - TString row1Str, row2Str, row3Str, _; - Split(out.Str(), '\n', row1Str, row2Str, row3Str, _); + TStringStream out; + YqlLoggerScope logger(&out, Format); + YqlLogger().UpdateProcInfo("proc"); + YqlLogger().SetMaxLogLimit(limit); - { - TLogRow logRow = ParseLogRow(row1Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Core); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "message1"); - } - { - TLogRow logRow = ParseLogRow(row2Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Core); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "message2"); - } - { - TLogRow logRow = ParseLogRow(row3Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::FATAL); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "Log is truncated by limit"); - } - } + YQL_CLOG(INFO, Core) << "message1"; + YQL_CLOG(INFO, Core) << "message2"; + YQL_CLOG(INFO, Core) << "message3"; - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Limit2) { - size_t limit = 0; - { - TStringStream out; - YqlLoggerScope logger(&out, Format); - YqlLogger().UpdateProcInfo("proc"); - YQL_CLOG(INFO, Core) << "message1"; - limit = out.Str().length() * 2 - 7; // Not more than 2 log lines - } + TString row1Str, row2Str, row3Str, _; + Split(out.Str(), '\n', row1Str, row2Str, row3Str, _); + + { + TLogRow logRow = ParseLogRow(row1Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Core); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "message1"); + } + { + TLogRow logRow = ParseLogRow(row2Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Core); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "message2"); + } + { + TLogRow logRow = ParseLogRow(row3Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::FATAL); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "Log is truncated by limit"); + } +} +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(Limit2) { + size_t limit = 0; + { TStringStream out; YqlLoggerScope logger(&out, Format); YqlLogger().UpdateProcInfo("proc"); - YqlLogger().SetMaxLogLimit(limit); - YQL_CLOG(INFO, Core) << "message1"; - YQL_CLOG(INFO, Core) << "message2"; - YQL_CLOG(WARN, Core) << "message3"; + limit = out.Str().length() * 2 - 7; // Not more than 2 log lines + } - TString row1Str, row2Str, row3Str, row4Str, _; - Split(out.Str(), '\n', row1Str, row2Str, row3Str, row4Str, _); + TStringStream out; + YqlLoggerScope logger(&out, Format); + YqlLogger().UpdateProcInfo("proc"); + YqlLogger().SetMaxLogLimit(limit); - { - TLogRow logRow = ParseLogRow(row1Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Core); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "message1"); - } - { - TLogRow logRow = ParseLogRow(row2Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Core); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "message2"); - } - { - TLogRow logRow = ParseLogRow(row3Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::FATAL); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "Log is truncated by limit"); - } - { - TLogRow logRow = ParseLogRow(row4Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Core); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "message3"); - } + YQL_CLOG(INFO, Core) << "message1"; + YQL_CLOG(INFO, Core) << "message2"; + YQL_CLOG(WARN, Core) << "message3"; + + TString row1Str, row2Str, row3Str, row4Str, _; + Split(out.Str(), '\n', row1Str, row2Str, row3Str, row4Str, _); + + { + TLogRow logRow = ParseLogRow(row1Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Core); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "message1"); + } + { + TLogRow logRow = ParseLogRow(row2Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Core); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "message2"); + } + { + TLogRow logRow = ParseLogRow(row3Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::FATAL); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "Log is truncated by limit"); + } + { + TLogRow logRow = ParseLogRow(row4Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::WARN); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Core); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "message3"); } } +} // Y_UNIT_TEST_SUITE(TLogTest) diff --git a/yql/essentials/utils/log/profile.cpp b/yql/essentials/utils/log/profile.cpp index 130bb05a4ab..66bf8de7171 100644 --- a/yql/essentials/utils/log/profile.cpp +++ b/yql/essentials/utils/log/profile.cpp @@ -3,12 +3,10 @@ #include <util/stream/format.h> - -#define YQL_PERF_LOG(level, file, line) YQL_LOG_IMPL( \ +#define YQL_PERF_LOG(level, file, line) YQL_LOG_IMPL( \ ::NYql::NLog::YqlLogger(), ::NYql::NLog::EComponent::Perf, level, \ ::NYql::NLog::TContextPreprocessor, file, line) - namespace NYql { namespace NLog { @@ -30,8 +28,8 @@ TProfilingScope::~TProfilingScope() { auto doLog = [&]() { YQL_PERF_LOG(Level_, File_, Line_) - << TStringBuf("Execution of [") << Name_ - << TStringBuf("] took ") << Prec(elapsed, 3) << unit; + << TStringBuf("Execution of [") << Name_ + << TStringBuf("] took ") << Prec(elapsed, 3) << unit; }; if (!LogCtxPath_.first.empty() || !LogCtxPath_.second.empty()) { @@ -46,5 +44,5 @@ TProfilingScope::~TProfilingScope() { } } -} // namspace NLog -} // namspace NYql +} // namespace NLog +} // namespace NYql diff --git a/yql/essentials/utils/log/profile.h b/yql/essentials/utils/log/profile.h index dda7e03f9d2..0c5fd4f74d9 100644 --- a/yql/essentials/utils/log/profile.h +++ b/yql/essentials/utils/log/profile.h @@ -5,29 +5,30 @@ #include <util/system/datetime.h> - -#define YQL_PROFILE_SCOPE(level, name) \ +#define YQL_PROFILE_SCOPE(level, name) \ ::NYql::NLog::TProfilingScope Y_GENERATE_UNIQUE_ID(ps)( \ - name, ::NYql::NLog::ELevel::level, __FILE__, __LINE__) + name, ::NYql::NLog::ELevel::level, __FILE__, __LINE__) #define YQL_PROFILE_BLOCK_IMPL(level, name) \ - ::NYql::NLog::TProfilingScope( \ - name, ::NYql::NLog::ELevel::level, __FILE__, __LINE__) + ::NYql::NLog::TProfilingScope( \ + name, ::NYql::NLog::ELevel::level, __FILE__, __LINE__) -#define YQL_PROFILE_SCOPE_VAL(level, name) \ - TAutoPtr<::NYql::NLog::TProfilingScope>(new ::NYql::NLog::TProfilingScope(\ - name, ::NYql::NLog::ELevel::level, __FILE__, __LINE__, \ - ::NYql::NLog::CurrentLogContextPath())) +#define YQL_PROFILE_SCOPE_VAL(level, name) \ + TAutoPtr<::NYql::NLog::TProfilingScope>(new ::NYql::NLog::TProfilingScope( \ + name, ::NYql::NLog::ELevel::level, __FILE__, __LINE__, \ + ::NYql::NLog::CurrentLogContextPath())) -#define YQL_PROFILE_BIND_VAL(future, scopeVal) \ +#define YQL_PROFILE_BIND_VAL(future, scopeVal) \ future.Apply([scopeVal](const decltype(future)& f) { \ - return f.GetValue(); \ + return f.GetValue(); \ }); -#define YQL_PROFILE_BLOCK(level, name) \ +#define YQL_PROFILE_BLOCK(level, name) \ if (auto Y_GENERATE_UNIQUE_ID(t) = YQL_PROFILE_SCOPE_VAL(level, name)) { \ - goto Y_CAT(YQL_LOG_CTX_LABEL, __LINE__); \ - } else Y_CAT(YQL_LOG_CTX_LABEL, __LINE__): + goto Y_CAT(YQL_LOG_CTX_LABEL, __LINE__); \ + } else \ + Y_CAT(YQL_LOG_CTX_LABEL, __LINE__) \ + : #define YQL_PROFILE_FUNC(level) YQL_PROFILE_SCOPE(level, __FUNCTION__) #define YQL_PROFILE_FUNCSIG(level) YQL_PROFILE_SCOPE(level, Y_FUNC_SIGNATURE) @@ -35,7 +36,6 @@ #define YQL_PROFILE_FUNC_VAL(level) YQL_PROFILE_SCOPE_VAL(level, __FUNCTION__) #define YQL_PROFILE_FUNCSIG_VAL(level) YQL_PROFILE_SCOPE_VAL(level, Y_FUNC_SIGNATURE) - namespace NYql { namespace NLog { @@ -70,5 +70,5 @@ private: std::pair<TString, TString> LogCtxPath_; }; -} // namspace NLog -} // namspace NYql +} // namespace NLog +} // namespace NYql diff --git a/yql/essentials/utils/log/tls_backend.cpp b/yql/essentials/utils/log/tls_backend.cpp index a92f123c9bf..ec677fe8e8a 100644 --- a/yql/essentials/utils/log/tls_backend.cpp +++ b/yql/essentials/utils/log/tls_backend.cpp @@ -2,19 +2,18 @@ #include <util/system/tls.h> - namespace NYql { namespace NLog { namespace { Y_POD_STATIC_THREAD(TLogBackend*) CurrentBackend; -} // namspace +} // namespace TLogBackend* SetLogBackendForCurrentThread(TLogBackend* backend) { - TLogBackend* prev = *(&CurrentBackend); - *(&CurrentBackend) = backend; - return prev; + TLogBackend* prev = *(&CurrentBackend); + *(&CurrentBackend) = backend; + return prev; } void TTlsLogBackend::WriteData(const TLogRecord& rec) { @@ -45,5 +44,5 @@ ELogPriority TTlsLogBackend::FiltrationLevel() const { return LOG_MAX_PRIORITY; } -} // namspace NLog -} // namspace NYql +} // namespace NLog +} // namespace NYql diff --git a/yql/essentials/utils/log/tls_backend.h b/yql/essentials/utils/log/tls_backend.h index 802a73aae90..f8bded8bae7 100644 --- a/yql/essentials/utils/log/tls_backend.h +++ b/yql/essentials/utils/log/tls_backend.h @@ -6,7 +6,6 @@ #include <utility> - namespace NYql { namespace NLog { @@ -63,5 +62,5 @@ private: TLogBackend* PrevBacked_; }; -} // namspace NLog -} // namspace NYql +} // namespace NLog +} // namespace NYql diff --git a/yql/essentials/utils/log/tls_backend_ut.cpp b/yql/essentials/utils/log/tls_backend_ut.cpp index 96d77751316..e5496c746a3 100644 --- a/yql/essentials/utils/log/tls_backend_ut.cpp +++ b/yql/essentials/utils/log/tls_backend_ut.cpp @@ -15,7 +15,6 @@ #include <thread> #include <chrono> - using namespace NYql; using namespace NLog; @@ -53,72 +52,72 @@ private: Y_UNIT_TEST_SUITE(TTlsLogBackendTest) { - Y_UNIT_TEST_ON_EACH_LOG_FORMAT(CaptureOutputs) { - YqlLoggerScope logger(new TTlsLogBackend(new TNullLogBackend), Format); +Y_UNIT_TEST_ON_EACH_LOG_FORMAT(CaptureOutputs) { + YqlLoggerScope logger(new TTlsLogBackend(new TNullLogBackend), Format); - YQL_LOG(INFO) << "this message will be missed"; + YQL_LOG(INFO) << "this message will be missed"; - TRunnable r1("t1", 3); - std::thread t1(std::ref(r1)); + TRunnable r1("t1", 3); + std::thread t1(std::ref(r1)); - TRunnable r2("t2", 2); - std::thread t2(std::ref(r2)); + TRunnable r2("t2", 2); + std::thread t2(std::ref(r2)); - t1.join(); - t2.join(); + t1.join(); + t2.join(); -// Cout << "--[t1 logs]-----------------\n" << r1.GetLogs() << Endl; -// Cout << "--[t2 logs]-----------------\n" << r2.GetLogs() << Endl; + // Cout << "--[t1 logs]-----------------\n" << r1.GetLogs() << Endl; + // Cout << "--[t2 logs]-----------------\n" << r2.GetLogs() << Endl; - { // t1 - TString row1Str, row2Str, row3Str, _; - Split(r1.GetLogs(), '\n', row1Str, row2Str, row3Str, _); + { // t1 + TString row1Str, row2Str, row3Str, _; + Split(r1.GetLogs(), '\n', row1Str, row2Str, row3Str, _); - ui64 threadId = 0; - { - TLogRow logRow = ParseLogRow(row1Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT(logRow.ThreadId > 0); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "t1"); - threadId = logRow.ThreadId; - } - { - TLogRow logRow = ParseLogRow(row2Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_EQUAL(logRow.ThreadId, threadId); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "t1"); - } - { - TLogRow logRow = ParseLogRow(row3Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_EQUAL(logRow.ThreadId, threadId); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "t1"); - } + ui64 threadId = 0; + { + TLogRow logRow = ParseLogRow(row1Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT(logRow.ThreadId > 0); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "t1"); + threadId = logRow.ThreadId; + } + { + TLogRow logRow = ParseLogRow(row2Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_EQUAL(logRow.ThreadId, threadId); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "t1"); } + { + TLogRow logRow = ParseLogRow(row3Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_EQUAL(logRow.ThreadId, threadId); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "t1"); + } + } - { // t2 - TString row1Str, row2Str, _; - Split(r2.GetLogs(), '\n', row1Str, row2Str, _); + { // t2 + TString row1Str, row2Str, _; + Split(r2.GetLogs(), '\n', row1Str, row2Str, _); - ui64 threadId = 0; - { - TLogRow logRow = ParseLogRow(row1Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT(logRow.ThreadId > 0); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "t2"); - threadId = logRow.ThreadId; - } - { - TLogRow logRow = ParseLogRow(row2Str); - UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); - UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); - UNIT_ASSERT_EQUAL(logRow.ThreadId, threadId); - UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "t2"); - } + ui64 threadId = 0; + { + TLogRow logRow = ParseLogRow(row1Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT(logRow.ThreadId > 0); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "t2"); + threadId = logRow.ThreadId; + } + { + TLogRow logRow = ParseLogRow(row2Str); + UNIT_ASSERT_EQUAL(logRow.Level, ELevel::INFO); + UNIT_ASSERT_EQUAL(logRow.Component, EComponent::Default); + UNIT_ASSERT_EQUAL(logRow.ThreadId, threadId); + UNIT_ASSERT_STRINGS_EQUAL(logRow.Message, "t2"); } } } +} // Y_UNIT_TEST_SUITE(TTlsLogBackendTest) diff --git a/yql/essentials/utils/log/ut/log_parser.cpp b/yql/essentials/utils/log/ut/log_parser.cpp index 461fd67cdbd..55f23ba0f57 100644 --- a/yql/essentials/utils/log/ut/log_parser.cpp +++ b/yql/essentials/utils/log/ut/log_parser.cpp @@ -4,67 +4,67 @@ namespace NYql::NLog { - TLogRow ParseJsonLogRow(TStringBuf str) { - NJson::TJsonMap json; - UNIT_ASSERT_C(NJson::ReadJsonTree(str, &json), "invalid json '" << str << "'"); +TLogRow ParseJsonLogRow(TStringBuf str) { + NJson::TJsonMap json; + UNIT_ASSERT_C(NJson::ReadJsonTree(str, &json), "invalid json '" << str << "'"); - return { - .Time = TInstant::ParseIso8601(json["@fields"]["datetime"].GetStringSafe()) - TDuration::Hours(4), - .Level = ELevelHelpers::FromString(json["@fields"]["level"].GetStringSafe()), - .ProcName = json["@fields"]["procname"].GetStringSafe(), - .ProcId = FromString<pid_t>(json["@fields"]["pid"].GetStringSafe()), - .ThreadId = [&] { - TString string = json["@fields"]["tid"].GetStringSafe(); - if (string.substr(0, 2) == "0x") { - return IntFromString<ui64, 16, TStringBuf>(string.substr(2)); - } else { - return IntFromString<ui64, 10, TStringBuf>(string); - } - }(), - .Component = EComponentHelpers::FromString(json["@fields"]["component"].GetStringSafe()), - .FileName = json["@fields"]["filename"].GetStringSafe(), - .LineNumber = FromString<ui32>(json["@fields"]["line"].GetStringSafe()), - .Path = json["@fields"]["path"].GetStringRobust(), - .Message = json["message"].GetStringSafe(), - }; - } + return { + .Time = TInstant::ParseIso8601(json["@fields"]["datetime"].GetStringSafe()) - TDuration::Hours(4), + .Level = ELevelHelpers::FromString(json["@fields"]["level"].GetStringSafe()), + .ProcName = json["@fields"]["procname"].GetStringSafe(), + .ProcId = FromString<pid_t>(json["@fields"]["pid"].GetStringSafe()), + .ThreadId = [&] { + TString string = json["@fields"]["tid"].GetStringSafe(); + if (string.substr(0, 2) == "0x") { + return IntFromString<ui64, 16, TStringBuf>(string.substr(2)); + } else { + return IntFromString<ui64, 10, TStringBuf>(string); + } + }(), + .Component = EComponentHelpers::FromString(json["@fields"]["component"].GetStringSafe()), + .FileName = json["@fields"]["filename"].GetStringSafe(), + .LineNumber = FromString<ui32>(json["@fields"]["line"].GetStringSafe()), + .Path = json["@fields"]["path"].GetStringRobust(), + .Message = json["message"].GetStringSafe(), + }; +} - TLogRow ParseLegacyLogRow(TStringBuf str) { - static std::regex rowRe( - "^([0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\\.[0-9]{3}) " // (1) time - "([A-Z ]{5}) " // (2) level - "([a-zA-Z0-9_\\.-]+)" // (3) process name - ".pid=([0-9]+)," // (4) process id - " tid=(0?x?[0-9a-fA-F]+). " // (5) thread id - ".([a-zA-Z0-9_\\. ]+). " // (6) component name - "([^:]+):" // (7) file name - "([0-9]+): " // (8) line number - "(\\{[^\n]*\\} )?" // (9) path - "([^\n]*)\n?$" // (10) message - , std::regex_constants::extended); +TLogRow ParseLegacyLogRow(TStringBuf str) { + static std::regex rowRe( + "^([0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\\.[0-9]{3}) " // (1) time + "([A-Z ]{5}) " // (2) level + "([a-zA-Z0-9_\\.-]+)" // (3) process name + ".pid=([0-9]+)," // (4) process id + " tid=(0?x?[0-9a-fA-F]+). " // (5) thread id + ".([a-zA-Z0-9_\\. ]+). " // (6) component name + "([^:]+):" // (7) file name + "([0-9]+): " // (8) line number + "(\\{[^\n]*\\} )?" // (9) path + "([^\n]*)\n?$" // (10) message + , std::regex_constants::extended); - std::cmatch match; - bool isMatch = std::regex_match(str.data(), match, rowRe); + std::cmatch match; + bool isMatch = std::regex_match(str.data(), match, rowRe); - UNIT_ASSERT_C(isMatch, "log row does not match format: '" << str << '\''); - UNIT_ASSERT_EQUAL_C(match.size(), 11, "expected 11 groups in log row: '" << str << '\''); + UNIT_ASSERT_C(isMatch, "log row does not match format: '" << str << '\''); + UNIT_ASSERT_EQUAL_C(match.size(), 11, "expected 11 groups in log row: '" << str << '\''); - return { - .Time = TInstant::ParseIso8601(match[1].str()) - TDuration::Hours(4), - .Level = ELevelHelpers::FromString(match[2].str()), - .ProcName = match[3].str(), - .ProcId = FromString<pid_t>(match[4].str()), - .ThreadId = match[5].str().substr(0, 2) == "0x" - ? IntFromString<ui64, 16, TStringBuf>(match[5].str().substr(2)) - : IntFromString<ui64, 10, TStringBuf>(match[5].str()), - .Component = EComponentHelpers::FromString(match[6].str()), - .FileName = match[7].str(), - .LineNumber = FromString<ui32>(match[8].str()), - .Path = match[9].str() != "" - ? match[9].str().substr(1, match[9].str().size() - 3) - : "", - .Message = match[10].str(), - }; - } + return { + .Time = TInstant::ParseIso8601(match[1].str()) - TDuration::Hours(4), + .Level = ELevelHelpers::FromString(match[2].str()), + .ProcName = match[3].str(), + .ProcId = FromString<pid_t>(match[4].str()), + .ThreadId = match[5].str().substr(0, 2) == "0x" + ? IntFromString<ui64, 16, TStringBuf>(match[5].str().substr(2)) + : IntFromString<ui64, 10, TStringBuf>(match[5].str()), + .Component = EComponentHelpers::FromString(match[6].str()), + .FileName = match[7].str(), + .LineNumber = FromString<ui32>(match[8].str()), + .Path = match[9].str() != "" + ? match[9].str().substr(1, match[9].str().size() - 3) + : "", + .Message = match[10].str(), + }; +} } // namespace NYql::NLog diff --git a/yql/essentials/utils/log/ut/log_parser.h b/yql/essentials/utils/log/ut/log_parser.h index a1bea046821..922000924b9 100644 --- a/yql/essentials/utils/log/ut/log_parser.h +++ b/yql/essentials/utils/log/ut/log_parser.h @@ -27,5 +27,5 @@ struct TLogRow { TLogRow ParseLegacyLogRow(TStringBuf str); TLogRow ParseJsonLogRow(TStringBuf str); -} // namspace NLog -} // namspace NYql +} // namespace NLog +} // namespace NYql diff --git a/yql/essentials/utils/log/ut/ya.make b/yql/essentials/utils/log/ut/ya.make index a78d4e0601b..3732c3f1ebc 100644 --- a/yql/essentials/utils/log/ut/ya.make +++ b/yql/essentials/utils/log/ut/ya.make @@ -1,5 +1,7 @@ UNITTEST_FOR(yql/essentials/utils/log) +ENABLE(YQL_STYLE_CPP) + SRCS( log_parser.cpp log_ut.cpp diff --git a/yql/essentials/utils/log/ya.make b/yql/essentials/utils/log/ya.make index 540b524c5eb..2a0d3dc3a4a 100644 --- a/yql/essentials/utils/log/ya.make +++ b/yql/essentials/utils/log/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( context.cpp format.cpp diff --git a/yql/essentials/utils/md5_stream.cpp b/yql/essentials/utils/md5_stream.cpp index 8a1c2191645..10e7bd07c9c 100644 --- a/yql/essentials/utils/md5_stream.cpp +++ b/yql/essentials/utils/md5_stream.cpp @@ -8,7 +8,7 @@ TMd5OutputStream::TMd5OutputStream(IOutputStream& delegatee) } TString TMd5OutputStream::Finalize() { - char buf[33] = { 0 }; + char buf[33] = {0}; return TString(Accumulator_.End(buf)); } @@ -17,4 +17,4 @@ void TMd5OutputStream::DoWrite(const void* buf, size_t len) { Accumulator_.Update(buf, len); } -} +} // namespace NYql diff --git a/yql/essentials/utils/md5_stream.h b/yql/essentials/utils/md5_stream.h index ca8b1c4bb94..3fd73e8e304 100644 --- a/yql/essentials/utils/md5_stream.h +++ b/yql/essentials/utils/md5_stream.h @@ -4,7 +4,7 @@ #include <library/cpp/digest/md5/md5.h> namespace NYql { -class TMd5OutputStream : public IOutputStream { +class TMd5OutputStream: public IOutputStream { public: explicit TMd5OutputStream(IOutputStream& delegatee); TString Finalize(); @@ -16,4 +16,4 @@ private: IOutputStream& Delegatee_; MD5 Accumulator_; }; -} +} // namespace NYql diff --git a/yql/essentials/utils/md5_stream_ut.cpp b/yql/essentials/utils/md5_stream_ut.cpp index 1d04c632d49..a120e83eda0 100644 --- a/yql/essentials/utils/md5_stream_ut.cpp +++ b/yql/essentials/utils/md5_stream_ut.cpp @@ -17,31 +17,31 @@ TString Consume(const TString& input) { UNIT_ASSERT_VALUES_EQUAL(input, output); return md5Stream.Finalize(); } -} +} // namespace Y_UNIT_TEST_SUITE(TStreamMd5Tests) { - Y_UNIT_TEST(Empty) { - const auto md5 = Consume(""); - const TString emptyStringMd5 = "d41d8cd98f00b204e9800998ecf8427e"; - UNIT_ASSERT_VALUES_EQUAL(md5, emptyStringMd5); - } +Y_UNIT_TEST(Empty) { + const auto md5 = Consume(""); + const TString emptyStringMd5 = "d41d8cd98f00b204e9800998ecf8427e"; + UNIT_ASSERT_VALUES_EQUAL(md5, emptyStringMd5); +} - Y_UNIT_TEST(ShortText) { - const auto md5 = Consume("hello from Y!"); - const TString expectedMd5 = "abf59ed7b0daa71085e76e461a737cc2"; - UNIT_ASSERT_VALUES_EQUAL(md5, expectedMd5); - } +Y_UNIT_TEST(ShortText) { + const auto md5 = Consume("hello from Y!"); + const TString expectedMd5 = "abf59ed7b0daa71085e76e461a737cc2"; + UNIT_ASSERT_VALUES_EQUAL(md5, expectedMd5); +} - Y_UNIT_TEST(BigText) { - // TransferData uses TempBuf of 64K - const TString s(1000000, 'A'); - const auto md5 = Consume(s.c_str()); - /* - $ for i in {1..1000000};do echo -n A >> 1M.txt;done - $ md5sum 1M.txt - 48fcdb8b87ce8ef779774199a856091d 1M.txt - */ - const TString expectedMd5 = "48fcdb8b87ce8ef779774199a856091d"; - UNIT_ASSERT_VALUES_EQUAL(md5, expectedMd5); - } +Y_UNIT_TEST(BigText) { + // TransferData uses TempBuf of 64K + const TString s(1000000, 'A'); + const auto md5 = Consume(s.c_str()); + /* + $ for i in {1..1000000};do echo -n A >> 1M.txt;done + $ md5sum 1M.txt + 48fcdb8b87ce8ef779774199a856091d 1M.txt + */ + const TString expectedMd5 = "48fcdb8b87ce8ef779774199a856091d"; + UNIT_ASSERT_VALUES_EQUAL(md5, expectedMd5); } +} // Y_UNIT_TEST_SUITE(TStreamMd5Tests) diff --git a/yql/essentials/utils/mem_limit.cpp b/yql/essentials/utils/mem_limit.cpp index 87a0db833e3..6f03f796c82 100644 --- a/yql/essentials/utils/mem_limit.cpp +++ b/yql/essentials/utils/mem_limit.cpp @@ -1,5 +1,5 @@ #ifdef __unix__ -#include <sys/resource.h> + #include <sys/resource.h> #endif #include <util/generic/yexception.h> @@ -9,21 +9,21 @@ namespace NYql { void SetAddressSpaceLimit(ui64 memLimit) { if (memLimit) { - #ifdef __unix__ - auto memLimitBytes = memLimit * 1024 * 1024; +#ifdef __unix__ + auto memLimitBytes = memLimit * 1024 * 1024; - struct rlimit rl; - if (getrlimit(RLIMIT_AS, &rl)) { - throw TSystemError() << "Cannot getrlimit(RLIMIT_AS)"; - } + struct rlimit rl; + if (getrlimit(RLIMIT_AS, &rl)) { + throw TSystemError() << "Cannot getrlimit(RLIMIT_AS)"; + } - rl.rlim_cur = memLimitBytes; - if (setrlimit(RLIMIT_AS, &rl)) { - throw TSystemError() << "Cannot setrlimit(RLIMIT_AS) to " << memLimitBytes << " bytes"; - } - #else - throw yexception() << "Memory limit can not be set on this platfrom"; - #endif + rl.rlim_cur = memLimitBytes; + if (setrlimit(RLIMIT_AS, &rl)) { + throw TSystemError() << "Cannot setrlimit(RLIMIT_AS) to " << memLimitBytes << " bytes"; + } +#else + throw yexception() << "Memory limit can not be set on this platfrom"; +#endif } } diff --git a/yql/essentials/utils/memory_profiling/ya.make b/yql/essentials/utils/memory_profiling/ya.make index 3e728ecbb83..2b74947d5b8 100644 --- a/yql/essentials/utils/memory_profiling/ya.make +++ b/yql/essentials/utils/memory_profiling/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + IF (PROFILE_MEMORY_ALLOCATIONS) CFLAGS(GLOBAL -DPROFILE_MEMORY_ALLOCATIONS) CFLAGS(GLOBAL -DALLOW_DEFAULT_ALLOCATOR) diff --git a/yql/essentials/utils/method_index.cpp b/yql/essentials/utils/method_index.cpp index 16dd0e520be..704a17dc1e5 100644 --- a/yql/essentials/utils/method_index.cpp +++ b/yql/essentials/utils/method_index.cpp @@ -42,4 +42,4 @@ size_t GetMethodPtrIndex(uintptr_t ptr) { #endif } -} +} // namespace NYql diff --git a/yql/essentials/utils/method_index.h b/yql/essentials/utils/method_index.h index 04944049c29..40be550b0ce 100644 --- a/yql/essentials/utils/method_index.h +++ b/yql/essentials/utils/method_index.h @@ -8,11 +8,11 @@ namespace NYql { size_t GetMethodPtrIndex(uintptr_t ptr); -template<typename Method> +template <typename Method> inline size_t GetMethodIndex(Method method) { uintptr_t ptr; std::memcpy(&ptr, &method, sizeof(uintptr_t)); return GetMethodPtrIndex(ptr); } -} +} // namespace NYql diff --git a/yql/essentials/utils/network/bind_in_range.cpp b/yql/essentials/utils/network/bind_in_range.cpp index 88cf641ce39..3229ffad530 100644 --- a/yql/essentials/utils/network/bind_in_range.cpp +++ b/yql/essentials/utils/network/bind_in_range.cpp @@ -24,4 +24,4 @@ TVector<NBus::TBindResult> BindInRange(TRangeWalker<int>& portWalker) { ythrow yexception() << "Unable to bind within port range [" << portWalker.GetStart() << ", " << portWalker.GetFinish() << "]"; } -} +} // namespace NYql diff --git a/yql/essentials/utils/network/bind_in_range.h b/yql/essentials/utils/network/bind_in_range.h index 5621529dd52..83de399b1fa 100644 --- a/yql/essentials/utils/network/bind_in_range.h +++ b/yql/essentials/utils/network/bind_in_range.h @@ -5,4 +5,4 @@ namespace NYql { TVector<NBus::TBindResult> BindInRange(TRangeWalker<int>& portWalker); -} +} // namespace NYql diff --git a/yql/essentials/utils/network/ya.make b/yql/essentials/utils/network/ya.make index 282e1502b3c..b123808aa48 100644 --- a/yql/essentials/utils/network/ya.make +++ b/yql/essentials/utils/network/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( bind_in_range.cpp bind_in_range.h diff --git a/yql/essentials/utils/oom_helper/inject.cpp b/yql/essentials/utils/oom_helper/inject.cpp index e0b6a2e0c8e..e4342813167 100644 --- a/yql/essentials/utils/oom_helper/inject.cpp +++ b/yql/essentials/utils/oom_helper/inject.cpp @@ -6,12 +6,11 @@ #include <stdio.h> #include <sys/syscall.h> - #define SYSCALL_MMAP2_UNIT 4096ULL #define UNIT SYSCALL_MMAP2_UNIT -#define OFF_MASK ((-0x2000ULL << (8*sizeof(long)-1)) | (UNIT-1)) +#define OFF_MASK ((-0x2000ULL << (8 * sizeof(long) - 1)) | (UNIT - 1)) -void *Mmap(void *start, size_t len, int prot, int flags, int fd, off_t off) +void* Mmap(void* start, size_t len, int prot, int flags, int fd, off_t off) { void* ret = (void*)-1; if (off & OFF_MASK) { @@ -23,7 +22,7 @@ void *Mmap(void *start, size_t len, int prot, int flags, int fd, off_t off) return ret; } #ifdef SYS_mmap2 - ret = (void*)syscall(SYS_mmap2, start, len, prot, flags, fd, off/UNIT); + ret = (void*)syscall(SYS_mmap2, start, len, prot, flags, fd, off / UNIT); #else ret = (void*)syscall(SYS_mmap, start, len, prot, flags, fd, off); #endif @@ -36,11 +35,10 @@ void *Mmap(void *start, size_t len, int prot, int flags, int fd, off_t off) return ret; } - -void *mmap(void *start, size_t len, int prot, int flags, int fd, off_t off) +void* mmap(void* start, size_t len, int prot, int flags, int fd, off_t off) { auto res = Mmap(start, len, prot, flags, fd, off); - if (res == (void*) -1 && errno == ENOMEM) { + if (res == (void*)-1 && errno == ENOMEM) { fprintf(stderr, "mmap failed with ENOMEM\n"); _exit(2); } diff --git a/yql/essentials/utils/oom_helper/ya.make b/yql/essentials/utils/oom_helper/ya.make index 6049debee44..bc8dfa4d1d3 100644 --- a/yql/essentials/utils/oom_helper/ya.make +++ b/yql/essentials/utils/oom_helper/ya.make @@ -1,4 +1,5 @@ IF (OS_LINUX) + ENABLE(YQL_STYLE_CPP) LIBRARY(oom_helper) SRCS(inject.cpp) END() diff --git a/yql/essentials/utils/parse_double.cpp b/yql/essentials/utils/parse_double.cpp index 90923160c55..a5cbebe50e6 100644 --- a/yql/essentials/utils/parse_double.cpp +++ b/yql/essentials/utils/parse_double.cpp @@ -16,7 +16,7 @@ bool GenericTryFloatFromString(TStringBuf buf, T& value) { if (TryFromString(buf.data(), buf.size(), value)) { return true; } - + const char* ptr = buf.data(); ui32 size = buf.size(); char sign = '+'; @@ -45,7 +45,7 @@ bool GenericTryFloatFromString(TStringBuf buf, T& value) { return true; } -} +} // namespace float FloatFromString(TStringBuf buf) { float result = 0; @@ -73,4 +73,4 @@ bool TryDoubleFromString(TStringBuf buf, double& value) { return GenericTryFloatFromString(buf, value); } -} +} // namespace NYql diff --git a/yql/essentials/utils/parse_double.h b/yql/essentials/utils/parse_double.h index 61d1d940c96..85ca3b8560f 100644 --- a/yql/essentials/utils/parse_double.h +++ b/yql/essentials/utils/parse_double.h @@ -15,4 +15,4 @@ double DoubleFromString(TStringBuf buf); bool TryFloatFromString(TStringBuf buf, float& value); bool TryDoubleFromString(TStringBuf buf, double& value); -} +} // namespace NYql diff --git a/yql/essentials/utils/parse_double_ut.cpp b/yql/essentials/utils/parse_double_ut.cpp index 4aecf64f88b..b61594139c9 100644 --- a/yql/essentials/utils/parse_double_ut.cpp +++ b/yql/essentials/utils/parse_double_ut.cpp @@ -5,51 +5,51 @@ namespace NYql { Y_UNIT_TEST_SUITE(TParseDouble) { - template <typename T, typename F> - void ParseAndCheck(TStringBuf buf, F f, T expected) { - T result = 0; - UNIT_ASSERT(f(buf, result)); - UNIT_ASSERT_DOUBLES_EQUAL(expected, result, 1e-6); - } +template <typename T, typename F> +void ParseAndCheck(TStringBuf buf, F f, T expected) { + T result = 0; + UNIT_ASSERT(f(buf, result)); + UNIT_ASSERT_DOUBLES_EQUAL(expected, result, 1e-6); +} - Y_UNIT_TEST(ExactValues) { - ParseAndCheck(TStringBuf("nan"), TryFloatFromString, std::numeric_limits<float>::quiet_NaN()); - ParseAndCheck(TStringBuf("nAn"), TryDoubleFromString, std::numeric_limits<double>::quiet_NaN()); +Y_UNIT_TEST(ExactValues) { + ParseAndCheck(TStringBuf("nan"), TryFloatFromString, std::numeric_limits<float>::quiet_NaN()); + ParseAndCheck(TStringBuf("nAn"), TryDoubleFromString, std::numeric_limits<double>::quiet_NaN()); - ParseAndCheck(TStringBuf("+nan"), TryFloatFromString, std::numeric_limits<float>::quiet_NaN()); - ParseAndCheck(TStringBuf("+NAN"), TryDoubleFromString, std::numeric_limits<double>::quiet_NaN()); + ParseAndCheck(TStringBuf("+nan"), TryFloatFromString, std::numeric_limits<float>::quiet_NaN()); + ParseAndCheck(TStringBuf("+NAN"), TryDoubleFromString, std::numeric_limits<double>::quiet_NaN()); - ParseAndCheck(TStringBuf("-nan"), TryFloatFromString, std::numeric_limits<float>::quiet_NaN()); - ParseAndCheck(TStringBuf("-NaN"), TryDoubleFromString, std::numeric_limits<double>::quiet_NaN()); + ParseAndCheck(TStringBuf("-nan"), TryFloatFromString, std::numeric_limits<float>::quiet_NaN()); + ParseAndCheck(TStringBuf("-NaN"), TryDoubleFromString, std::numeric_limits<double>::quiet_NaN()); - ParseAndCheck(TStringBuf("inf"), TryFloatFromString, std::numeric_limits<float>::infinity()); - ParseAndCheck(TStringBuf("iNf"), TryDoubleFromString, std::numeric_limits<double>::infinity()); + ParseAndCheck(TStringBuf("inf"), TryFloatFromString, std::numeric_limits<float>::infinity()); + ParseAndCheck(TStringBuf("iNf"), TryDoubleFromString, std::numeric_limits<double>::infinity()); - ParseAndCheck(TStringBuf("+inf"), TryFloatFromString, std::numeric_limits<float>::infinity()); - ParseAndCheck(TStringBuf("+INF"), TryDoubleFromString, std::numeric_limits<double>::infinity()); + ParseAndCheck(TStringBuf("+inf"), TryFloatFromString, std::numeric_limits<float>::infinity()); + ParseAndCheck(TStringBuf("+INF"), TryDoubleFromString, std::numeric_limits<double>::infinity()); - ParseAndCheck(TStringBuf("-inf"), TryFloatFromString, -std::numeric_limits<float>::infinity()); - ParseAndCheck(TStringBuf("-InF"), TryDoubleFromString, -std::numeric_limits<double>::infinity()); + ParseAndCheck(TStringBuf("-inf"), TryFloatFromString, -std::numeric_limits<float>::infinity()); + ParseAndCheck(TStringBuf("-InF"), TryDoubleFromString, -std::numeric_limits<double>::infinity()); - ParseAndCheck<float>(TStringBuf("-12.3456"), TryFloatFromString, -12.3456); - ParseAndCheck(TStringBuf("-12.3456"), TryDoubleFromString, -12.3456); + ParseAndCheck<float>(TStringBuf("-12.3456"), TryFloatFromString, -12.3456); + ParseAndCheck(TStringBuf("-12.3456"), TryDoubleFromString, -12.3456); - ParseAndCheck<float>(TStringBuf("1.23e-2"), TryFloatFromString, 0.0123); - ParseAndCheck(TStringBuf("1.23e-2"), TryDoubleFromString, 0.0123); + ParseAndCheck<float>(TStringBuf("1.23e-2"), TryFloatFromString, 0.0123); + ParseAndCheck(TStringBuf("1.23e-2"), TryDoubleFromString, 0.0123); - UNIT_ASSERT_EQUAL(FloatFromString(TStringBuf("iNf")), std::numeric_limits<float>::infinity()); - UNIT_ASSERT_EQUAL(DoubleFromString(TStringBuf("iNf")), std::numeric_limits<float>::infinity()); - } + UNIT_ASSERT_EQUAL(FloatFromString(TStringBuf("iNf")), std::numeric_limits<float>::infinity()); + UNIT_ASSERT_EQUAL(DoubleFromString(TStringBuf("iNf")), std::numeric_limits<float>::infinity()); +} - Y_UNIT_TEST(Errors) { - UNIT_ASSERT_EXCEPTION_CONTAINS(FloatFromString(TStringBuf("")), std::exception, "unable to parse float from ''"); - UNIT_ASSERT_EXCEPTION_CONTAINS(DoubleFromString(TStringBuf("")), std::exception, "unable to parse double from ''"); +Y_UNIT_TEST(Errors) { + UNIT_ASSERT_EXCEPTION_CONTAINS(FloatFromString(TStringBuf("")), std::exception, "unable to parse float from ''"); + UNIT_ASSERT_EXCEPTION_CONTAINS(DoubleFromString(TStringBuf("")), std::exception, "unable to parse double from ''"); - UNIT_ASSERT_EXCEPTION_CONTAINS(FloatFromString(TStringBuf("info")), std::exception, "unable to parse float from 'info'"); - UNIT_ASSERT_EXCEPTION_CONTAINS(DoubleFromString(TStringBuf("-nana")), std::exception, "unable to parse double from '-nana'"); + UNIT_ASSERT_EXCEPTION_CONTAINS(FloatFromString(TStringBuf("info")), std::exception, "unable to parse float from 'info'"); + UNIT_ASSERT_EXCEPTION_CONTAINS(DoubleFromString(TStringBuf("-nana")), std::exception, "unable to parse double from '-nana'"); - UNIT_ASSERT_EXCEPTION_CONTAINS(FloatFromString(TStringBuf(nullptr)), std::exception, "unable to parse float from ''"); - UNIT_ASSERT_EXCEPTION_CONTAINS(DoubleFromString(TStringBuf(nullptr)), std::exception, "unable to parse double from ''"); - } -} + UNIT_ASSERT_EXCEPTION_CONTAINS(FloatFromString(TStringBuf(nullptr)), std::exception, "unable to parse float from ''"); + UNIT_ASSERT_EXCEPTION_CONTAINS(DoubleFromString(TStringBuf(nullptr)), std::exception, "unable to parse double from ''"); } +} // Y_UNIT_TEST_SUITE(TParseDouble) +} // namespace NYql diff --git a/yql/essentials/utils/proc_alive.cpp b/yql/essentials/utils/proc_alive.cpp index 7efb2584bcc..b81efa01eb7 100644 --- a/yql/essentials/utils/proc_alive.cpp +++ b/yql/essentials/utils/proc_alive.cpp @@ -7,7 +7,6 @@ #include <errno.h> - namespace NYql { bool IsProcessAlive(TProcessId pid) { @@ -33,5 +32,4 @@ bool IsProcessAlive(TProcessId pid) { #endif } -} // NYql - +} // namespace NYql diff --git a/yql/essentials/utils/proc_alive.h b/yql/essentials/utils/proc_alive.h index c4b798b4ca6..2fc310f0817 100644 --- a/yql/essentials/utils/proc_alive.h +++ b/yql/essentials/utils/proc_alive.h @@ -6,4 +6,4 @@ namespace NYql { bool IsProcessAlive(TProcessId pid); -} +} // namespace NYql diff --git a/yql/essentials/utils/rand_guid.cpp b/yql/essentials/utils/rand_guid.cpp index d89eefbd3b8..386706b85f5 100644 --- a/yql/essentials/utils/rand_guid.cpp +++ b/yql/essentials/utils/rand_guid.cpp @@ -29,4 +29,4 @@ TString TRandGuid::GenGuid() { ui64 TRandGuid::GenNumber() { return GetRnd().GenRand(); } -} +} // namespace NYql diff --git a/yql/essentials/utils/rand_guid.h b/yql/essentials/utils/rand_guid.h index 30496bdd5f3..a19f02fcc99 100644 --- a/yql/essentials/utils/rand_guid.h +++ b/yql/essentials/utils/rand_guid.h @@ -23,8 +23,8 @@ private: } private: - std::aligned_storage<sizeof(TMersenne<ui64>) ,alignof(TMersenne<ui64>)>::type Rnd_; + std::aligned_storage<sizeof(TMersenne<ui64>), alignof(TMersenne<ui64>)>::type Rnd_; static TAtomic Counter; }; -} +} // namespace NYql diff --git a/yql/essentials/utils/range_walker.h b/yql/essentials/utils/range_walker.h index 268fe364728..74805fb28d1 100644 --- a/yql/essentials/utils/range_walker.h +++ b/yql/essentials/utils/range_walker.h @@ -44,4 +44,4 @@ public: return result; } }; -} +} // namespace NYql diff --git a/yql/essentials/utils/range_walker_ut.cpp b/yql/essentials/utils/range_walker_ut.cpp index d6a86cc8048..8bbb34cec86 100644 --- a/yql/essentials/utils/range_walker_ut.cpp +++ b/yql/essentials/utils/range_walker_ut.cpp @@ -5,31 +5,31 @@ using namespace NYql; Y_UNIT_TEST_SUITE(TRangeWalkerTests) { - Y_UNIT_TEST(InvalidRange) { - UNIT_ASSERT_EXCEPTION_CONTAINS(TRangeWalker<int>(2, 1), yexception, "Invalid range for walker"); - } +Y_UNIT_TEST(InvalidRange) { + UNIT_ASSERT_EXCEPTION_CONTAINS(TRangeWalker<int>(2, 1), yexception, "Invalid range for walker"); +} - Y_UNIT_TEST(SingleValueRange) { - TRangeWalker<int> w(5, 5); - UNIT_ASSERT_EQUAL(5, w.GetStart()); - UNIT_ASSERT_EQUAL(5, w.GetFinish()); - UNIT_ASSERT_EQUAL(1, w.GetRangeSize()); +Y_UNIT_TEST(SingleValueRange) { + TRangeWalker<int> w(5, 5); + UNIT_ASSERT_EQUAL(5, w.GetStart()); + UNIT_ASSERT_EQUAL(5, w.GetFinish()); + UNIT_ASSERT_EQUAL(1, w.GetRangeSize()); - for (int i = 0; i < 10; ++i) { - UNIT_ASSERT_EQUAL(5, w.MoveToNext()); - } + for (int i = 0; i < 10; ++i) { + UNIT_ASSERT_EQUAL(5, w.MoveToNext()); } +} - Y_UNIT_TEST(ManyValuesRange) { - TRangeWalker<int> w(5, 7); - UNIT_ASSERT_EQUAL(5, w.GetStart()); - UNIT_ASSERT_EQUAL(7, w.GetFinish()); - UNIT_ASSERT_EQUAL(3, w.GetRangeSize()); +Y_UNIT_TEST(ManyValuesRange) { + TRangeWalker<int> w(5, 7); + UNIT_ASSERT_EQUAL(5, w.GetStart()); + UNIT_ASSERT_EQUAL(7, w.GetFinish()); + UNIT_ASSERT_EQUAL(3, w.GetRangeSize()); - for (int i = 0; i < 10; ++i) { - UNIT_ASSERT_EQUAL(5, w.MoveToNext()); - UNIT_ASSERT_EQUAL(6, w.MoveToNext()); - UNIT_ASSERT_EQUAL(7, w.MoveToNext()); - } + for (int i = 0; i < 10; ++i) { + UNIT_ASSERT_EQUAL(5, w.MoveToNext()); + UNIT_ASSERT_EQUAL(6, w.MoveToNext()); + UNIT_ASSERT_EQUAL(7, w.MoveToNext()); } } +} // Y_UNIT_TEST_SUITE(TRangeWalkerTests) diff --git a/yql/essentials/utils/resetable_setting.h b/yql/essentials/utils/resetable_setting.h index 0112105a3bf..21bba878f06 100644 --- a/yql/essentials/utils/resetable_setting.h +++ b/yql/essentials/utils/resetable_setting.h @@ -64,4 +64,4 @@ public: } }; -} +} // namespace NYql diff --git a/yql/essentials/utils/retry.h b/yql/essentials/utils/retry.h index aa8391fa48d..ab6d9bf809a 100644 --- a/yql/essentials/utils/retry.h +++ b/yql/essentials/utils/retry.h @@ -14,4 +14,4 @@ auto WithRetry(int attempts, TAction&& a, TExceptionHandler&& exceptionHandler) return a(); } -} +} // namespace NYql diff --git a/yql/essentials/utils/retry_ut.cpp b/yql/essentials/utils/retry_ut.cpp index 47cb35fd739..d3d42265180 100644 --- a/yql/essentials/utils/retry_ut.cpp +++ b/yql/essentials/utils/retry_ut.cpp @@ -6,68 +6,63 @@ using namespace NYql; namespace { -class TMyError : public yexception { +class TMyError: public yexception { }; -} +} // namespace Y_UNIT_TEST_SUITE(TRetryTests) { - Y_UNIT_TEST(ZeroAttempts) { - auto r = WithRetry<TMyError>(0, - []() { return TString("abc"); }, - [](auto, auto, auto) { UNIT_FAIL("Exception handler invoked"); }); +Y_UNIT_TEST(ZeroAttempts) { + auto r = WithRetry<TMyError>(0, + []() { return TString("abc"); }, + [](auto, auto, auto) { UNIT_FAIL("Exception handler invoked"); }); - UNIT_ASSERT_VALUES_EQUAL("abc", r); - } + UNIT_ASSERT_VALUES_EQUAL("abc", r); +} - Y_UNIT_TEST(NoRetries) { - auto r = WithRetry<TMyError>(5, - []() { return TString("abc"); }, - [](auto, auto, auto) { UNIT_FAIL("Exception handler invoked"); }); +Y_UNIT_TEST(NoRetries) { + auto r = WithRetry<TMyError>(5, + []() { return TString("abc"); }, + [](auto, auto, auto) { UNIT_FAIL("Exception handler invoked"); }); - UNIT_ASSERT_VALUES_EQUAL("abc", r); - } + UNIT_ASSERT_VALUES_EQUAL("abc", r); +} - Y_UNIT_TEST(NoRetriesButException) { - UNIT_ASSERT_EXCEPTION_CONTAINS(WithRetry<TMyError>(5, - []() { throw yexception() << "xxxx"; }, - [](auto, auto, auto) { UNIT_FAIL("Exception handler invoked"); }), yexception, "xxxx"); - } +Y_UNIT_TEST(NoRetriesButException) { + UNIT_ASSERT_EXCEPTION_CONTAINS(WithRetry<TMyError>(5, + []() { throw yexception() << "xxxx"; }, + [](auto, auto, auto) { UNIT_FAIL("Exception handler invoked"); }), yexception, "xxxx"); +} - Y_UNIT_TEST(FewRetries) { - int counter = 0; - int exceptions = 0; - auto r = WithRetry<TMyError>(3, [&]() { +Y_UNIT_TEST(FewRetries) { + int counter = 0; + int exceptions = 0; + auto r = WithRetry<TMyError>(3, [&]() { if (counter++ < 2) { throw TMyError() << "yyyy"; } - return counter; - }, [&](const auto& e, int attempt, int attemptCount) { + return counter; }, [&](const auto& e, int attempt, int attemptCount) { ++exceptions; UNIT_ASSERT_VALUES_EQUAL(e.what(), "yyyy"); UNIT_ASSERT_VALUES_EQUAL(attempt, counter); - UNIT_ASSERT_VALUES_EQUAL(attemptCount, 3); - }); + UNIT_ASSERT_VALUES_EQUAL(attemptCount, 3); }); - UNIT_ASSERT_VALUES_EQUAL(2, exceptions); - UNIT_ASSERT_VALUES_EQUAL(3, r); - UNIT_ASSERT_VALUES_EQUAL(3, counter); - } + UNIT_ASSERT_VALUES_EQUAL(2, exceptions); + UNIT_ASSERT_VALUES_EQUAL(3, r); + UNIT_ASSERT_VALUES_EQUAL(3, counter); +} - Y_UNIT_TEST(ManyRetries) { - int counter = 0; - int exceptions = 0; - UNIT_ASSERT_EXCEPTION_CONTAINS(WithRetry<TMyError>(3, [&]() { - throw TMyError() << "yyyy" << counter++; - }, [&](const auto& e, int attempt, int attemptCount) { +Y_UNIT_TEST(ManyRetries) { + int counter = 0; + int exceptions = 0; + UNIT_ASSERT_EXCEPTION_CONTAINS(WithRetry<TMyError>(3, [&]() { throw TMyError() << "yyyy" << counter++; }, [&](const auto& e, int attempt, int attemptCount) { ++exceptions; UNIT_ASSERT_STRING_CONTAINS(e.what(), "yyyy"); UNIT_ASSERT_VALUES_EQUAL(attempt, counter); - UNIT_ASSERT_VALUES_EQUAL(attemptCount, 3); - }), TMyError, "yyyy2"); + UNIT_ASSERT_VALUES_EQUAL(attemptCount, 3); }), TMyError, "yyyy2"); - UNIT_ASSERT_VALUES_EQUAL(2, exceptions); - UNIT_ASSERT_VALUES_EQUAL(3, counter); - } + UNIT_ASSERT_VALUES_EQUAL(2, exceptions); + UNIT_ASSERT_VALUES_EQUAL(3, counter); } +} // Y_UNIT_TEST_SUITE(TRetryTests) diff --git a/yql/essentials/utils/signals/signals.cpp b/yql/essentials/utils/signals/signals.cpp index bb22e266240..6d3a3c05f7e 100644 --- a/yql/essentials/utils/signals/signals.cpp +++ b/yql/essentials/utils/signals/signals.cpp @@ -10,7 +10,7 @@ #include <util/system/getpid.h> #ifdef _linux_ -# include <sys/prctl.h> + #include <sys/prctl.h> #endif #include <string.h> @@ -18,7 +18,6 @@ #include <errno.h> #include <stdlib.h> - namespace NYql { volatile sig_atomic_t NeedTerminate = 0; @@ -38,41 +37,41 @@ namespace { void SignalHandler(int signo) { switch (signo) { - case SIGTERM: - NeedTerminate = 1; - break; + case SIGTERM: + NeedTerminate = 1; + break; - case SIGQUIT: - NeedQuit = 1; - break; + case SIGQUIT: + NeedQuit = 1; + break; #ifdef _unix_ - case SIGHUP: - NeedReconfigure = 1; - break; + case SIGHUP: + NeedReconfigure = 1; + break; - case SIGUSR1: - NeedReopenLog = 1; - break; + case SIGUSR1: + NeedReopenLog = 1; + break; - case SIGCHLD: - NeedReapZombies = 1; - break; + case SIGCHLD: + NeedReapZombies = 1; + break; #endif - case SIGINT: - if (CatchInterrupt) { - NeedInterrupt = 1; - } else { - fprintf(stderr, "%s (pid=%d) captured SIGINT\n", - GetProcTitle(), getpid()); - signal(signo, SIG_DFL); - raise(signo); - } - break; + case SIGINT: + if (CatchInterrupt) { + NeedInterrupt = 1; + } else { + fprintf(stderr, "%s (pid=%d) captured SIGINT\n", + GetProcTitle(), getpid()); + signal(signo, SIG_DFL); + raise(signo); + } + break; - default: - break; + default: + break; } } @@ -84,7 +83,7 @@ void SignalHandlerWithSelfPipe(int signo) if (write(SignalPipeW.GetHandle(), "x", 1) == -1 && errno != EAGAIN) { static TStringBuf msg("cannot write to signal pipe"); #ifndef STDERR_FILENO -#define STDERR_FILENO 2 + #define STDERR_FILENO 2 #endif write(STDERR_FILENO, msg.data(), msg.size()); abort(); @@ -92,21 +91,22 @@ void SignalHandlerWithSelfPipe(int signo) errno = savedErrno; } - #ifndef _unix_ const char* strsignal(int signo) { switch (signo) { - case SIGTERM: return "SIGTERM"; - case SIGINT: return "SIGINT"; - case SIGQUIT: return "SIGQUIT"; - default: - return "UNKNOWN"; + case SIGTERM: + return "SIGTERM"; + case SIGINT: + return "SIGINT"; + case SIGQUIT: + return "SIGQUIT"; + default: + return "UNKNOWN"; } } #endif - #ifdef _unix_ int SetSignalHandler(int signo, void (*handler)(int)) { @@ -126,8 +126,7 @@ int SetSignalHandler(int signo, void (*handler)(int)) #endif -struct TSignalHandlerDesc -{ +struct TSignalHandlerDesc { int Signo; void (*Handler)(int); }; @@ -156,21 +155,19 @@ void SetSignalHandlers(const TSignalHandlerDesc* handlerDescs) } // namespace - void InitSignals() { TSignalHandlerDesc handlerDescs[] = { - { SIGTERM, SignalHandler }, - { SIGINT, SignalHandler }, - { SIGQUIT, SignalHandler }, + {SIGTERM, SignalHandler}, + {SIGINT, SignalHandler}, + {SIGQUIT, SignalHandler}, #ifdef _unix_ - { SIGPIPE, SIG_IGN }, - { SIGHUP, SignalHandler }, - { SIGUSR1, SignalHandler }, - { SIGCHLD, SignalHandler }, + {SIGPIPE, SIG_IGN}, + {SIGHUP, SignalHandler}, + {SIGUSR1, SignalHandler}, + {SIGCHLD, SignalHandler}, #endif - { -1, nullptr } - }; + {-1, nullptr}}; SetSignalHandlers(handlerDescs); } @@ -178,17 +175,16 @@ void InitSignals() void InitSignalsWithSelfPipe() { TSignalHandlerDesc handlerDescs[] = { - { SIGTERM, SignalHandlerWithSelfPipe }, - { SIGINT, SignalHandlerWithSelfPipe }, - { SIGQUIT, SignalHandlerWithSelfPipe }, + {SIGTERM, SignalHandlerWithSelfPipe}, + {SIGINT, SignalHandlerWithSelfPipe}, + {SIGQUIT, SignalHandlerWithSelfPipe}, #ifdef _unix_ - { SIGPIPE, SIG_IGN }, - { SIGHUP, SignalHandlerWithSelfPipe }, - { SIGUSR1, SignalHandlerWithSelfPipe }, - { SIGCHLD, SignalHandlerWithSelfPipe }, + {SIGPIPE, SIG_IGN}, + {SIGHUP, SignalHandlerWithSelfPipe}, + {SIGUSR1, SignalHandlerWithSelfPipe}, + {SIGCHLD, SignalHandlerWithSelfPipe}, #endif - { -1, nullptr } - }; + {-1, nullptr}}; TPipe::Pipe(SignalPipeR, SignalPipeW); SetNonBlock(SignalPipeR.GetHandle()); diff --git a/yql/essentials/utils/signals/signals.h b/yql/essentials/utils/signals/signals.h index 612f9062072..3a10ad04578 100644 --- a/yql/essentials/utils/signals/signals.h +++ b/yql/essentials/utils/signals/signals.h @@ -6,7 +6,6 @@ #include <signal.h> - namespace NYql { #ifdef _win_ diff --git a/yql/essentials/utils/signals/utils.cpp b/yql/essentials/utils/signals/utils.cpp index b1de131b30f..f8f0dd00cea 100644 --- a/yql/essentials/utils/signals/utils.cpp +++ b/yql/essentials/utils/signals/utils.cpp @@ -73,7 +73,9 @@ void ProcTitleInit(int argc, const char* argv[]) void SetProcTitle(const char* title) { - if (!g_OriginalArgv) return; + if (!g_OriginalArgv) { + return; + } char* p = g_OriginalArgv[0]; p += strlcpy(p, "yqlworker: ", g_OriginalArgvLast - p); @@ -88,7 +90,9 @@ void SetProcTitle(const char* title) void AddProcTitleSuffix(const char* suffix) { - if (!g_OriginalArgv) return; + if (!g_OriginalArgv) { + return; + } char* p = g_OriginalArgv[0]; p += strlcat(p, " ", g_OriginalArgvLast - p); diff --git a/yql/essentials/utils/signals/utils.h b/yql/essentials/utils/signals/utils.h index 75c55244fac..845695e3469 100644 --- a/yql/essentials/utils/signals/utils.h +++ b/yql/essentials/utils/signals/utils.h @@ -4,7 +4,7 @@ namespace google { namespace protobuf { - class Message; +class Message; } // namespace protobuf } // namespace google diff --git a/yql/essentials/utils/signals/ya.make b/yql/essentials/utils/signals/ya.make index 9a2ab2d0204..8d69ae7c088 100644 --- a/yql/essentials/utils/signals/ya.make +++ b/yql/essentials/utils/signals/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( signals.cpp signals.h diff --git a/yql/essentials/utils/sort.cpp b/yql/essentials/utils/sort.cpp index 67992aff39b..a5a69fa930f 100644 --- a/yql/essentials/utils/sort.cpp +++ b/yql/essentials/utils/sort.cpp @@ -1 +1 @@ -#include "sort.h"
\ No newline at end of file +#include "sort.h" diff --git a/yql/essentials/utils/sort.h b/yql/essentials/utils/sort.h index 4adb7c9225d..06e9584486b 100644 --- a/yql/essentials/utils/sort.h +++ b/yql/essentials/utils/sort.h @@ -24,4 +24,4 @@ void FastPartialSort(RandomIt first, RandomIt middle, RandomIt last, Compare com ::miniselect::floyd_rivest_partial_sort(first, middle, last, compare); } -} +} // namespace NYql diff --git a/yql/essentials/utils/swap_bytes.h b/yql/essentials/utils/swap_bytes.h index bf09bb321ab..90f12cab341 100644 --- a/yql/essentials/utils/swap_bytes.h +++ b/yql/essentials/utils/swap_bytes.h @@ -6,8 +6,7 @@ namespace NYql { // clang generates bswap for ui32 and ui64 template <typename TUnsigned> -Y_FORCE_INLINE -TUnsigned SwapBytes(TUnsigned value) { +Y_FORCE_INLINE TUnsigned SwapBytes(TUnsigned value) { TUnsigned result; auto* from = (ui8*)&value + sizeof(TUnsigned) - 1; auto* to = (ui8*)&result; @@ -17,4 +16,4 @@ TUnsigned SwapBytes(TUnsigned value) { return result; } -} +} // namespace NYql diff --git a/yql/essentials/utils/sys/become_user.cpp b/yql/essentials/utils/sys/become_user.cpp index bbb6b5735ca..af24dc0d555 100644 --- a/yql/essentials/utils/sys/become_user.cpp +++ b/yql/essentials/utils/sys/become_user.cpp @@ -1,29 +1,29 @@ #include "become_user.h" #ifdef _linux_ -#include <yql/essentials/utils/sys/linux_version.h> + #include <yql/essentials/utils/sys/linux_version.h> -#include <util/generic/yexception.h> -#include <util/system/user.h> + #include <util/generic/yexception.h> + #include <util/system/user.h> -#include <memory> -#include <vector> -#include <errno.h> + #include <memory> + #include <vector> + #include <errno.h> -#include <grp.h> -#include <pwd.h> -#include <unistd.h> + #include <grp.h> + #include <pwd.h> + #include <unistd.h> -#include <sys/prctl.h> -#include <contrib/libs/libcap/include/sys/capability.h> -#include <contrib/libs/libcap/include/sys/securebits.h> + #include <sys/prctl.h> + #include <contrib/libs/libcap/include/sys/capability.h> + #include <contrib/libs/libcap/include/sys/securebits.h> -// strange, but sometimes we have to specify values manually -#define PR_CAP_AMBIENT 47 -#define PR_CAP_AMBIENT_IS_SET 1 -#define PR_CAP_AMBIENT_RAISE 2 -#define PR_CAP_AMBIENT_LOWER 3 -#define PR_CAP_AMBIENT_CLEAR_ALL 4 + // strange, but sometimes we have to specify values manually + #define PR_CAP_AMBIENT 47 + #define PR_CAP_AMBIENT_IS_SET 1 + #define PR_CAP_AMBIENT_RAISE 2 + #define PR_CAP_AMBIENT_LOWER 3 + #define PR_CAP_AMBIENT_CLEAR_ALL 4 namespace NYql { @@ -90,7 +90,7 @@ void EnsureCapFlagsVectorCannotBeRaised(const std::vector<cap_value_t>& flags) { for (auto f : flags) { try { // one-by-one - SetCapFlagsVector({ f }); + SetCapFlagsVector({f}); } catch (const TSystemError&) { continue; } @@ -141,14 +141,14 @@ void DoBecomeUser(const char* username, const char* groupname) { } } -} +} // namespace void BecomeUser(const TString& username, const TString& groupname) { DoBecomeUser(username.data(), groupname.data()); } void TurnOnBecomeUserAmbientCaps() { - SetCapFlagsVector({ CAP_SETUID, CAP_SETGID, CAP_SETPCAP, CAP_KILL }); + SetCapFlagsVector({CAP_SETUID, CAP_SETGID, CAP_SETPCAP, CAP_KILL}); if (prctl(PR_SET_SECUREBITS, SECBIT_NO_SETUID_FIXUP | SECBIT_NO_SETUID_FIXUP_LOCKED, 0, 0, 0) == -1) { ythrow TSystemError() << "can't set secure bits for a process"; } @@ -157,7 +157,7 @@ void TurnOnBecomeUserAmbientCaps() { void TurnOffBecomeUserAbility() { ClearAmbientCapFlags(); SetCapFlagsVector({}); - EnsureCapFlagsVectorCannotBeRaised({ CAP_SETUID, CAP_SETGID, CAP_SETPCAP, CAP_KILL }); + EnsureCapFlagsVectorCannotBeRaised({CAP_SETUID, CAP_SETGID, CAP_SETPCAP, CAP_KILL}); // ensure we cannot get root access back if (setuid(0) != -1) { @@ -183,6 +183,6 @@ void SendSignalOnParentThreadExit(int signo) } } -} +} // namespace NYql #endif diff --git a/yql/essentials/utils/sys/become_user.h b/yql/essentials/utils/sys/become_user.h index c5c2025d8b3..dc7710e2dba 100644 --- a/yql/essentials/utils/sys/become_user.h +++ b/yql/essentials/utils/sys/become_user.h @@ -23,4 +23,4 @@ void DumpCaps(const TString& title); // subscribe child process on receiving signal on parent process death (particularly on parent thread exit) void SendSignalOnParentThreadExit(int signo); -} +} // namespace NYql diff --git a/yql/essentials/utils/sys/become_user_dummy.cpp b/yql/essentials/utils/sys/become_user_dummy.cpp index 897d9c39774..ef8ca8ccafa 100644 --- a/yql/essentials/utils/sys/become_user_dummy.cpp +++ b/yql/essentials/utils/sys/become_user_dummy.cpp @@ -22,5 +22,5 @@ void SendSignalOnParentThreadExit(int signo) Y_UNUSED(signo); } -} +} // namespace NYql #endif diff --git a/yql/essentials/utils/sys/linux_version.cpp b/yql/essentials/utils/sys/linux_version.cpp index 5d10af82948..e2be5fcce40 100644 --- a/yql/essentials/utils/sys/linux_version.cpp +++ b/yql/essentials/utils/sys/linux_version.cpp @@ -4,43 +4,43 @@ #include <util/system/platform.h> #ifdef _linux_ -# include <sys/utsname.h> + #include <sys/utsname.h> #endif namespace NYql { - std::tuple<int, int, int> DetectLinuxKernelVersion3() { +std::tuple<int, int, int> DetectLinuxKernelVersion3() { #ifdef _linux_ - // see https://github.com/torvalds/linux/blob/master/Makefile - // version is composed as follows: - // VERSION = 4 - // PATCHLEVEL = 18 - // SUBLEVEL = 0 - // EXTRAVERSION = -rc4 - // KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(SUBLEVEL)))$(EXTRAVERSION) + // see https://github.com/torvalds/linux/blob/master/Makefile + // version is composed as follows: + // VERSION = 4 + // PATCHLEVEL = 18 + // SUBLEVEL = 0 + // EXTRAVERSION = -rc4 + // KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(SUBLEVEL)))$(EXTRAVERSION) - utsname buf = {}; - if (uname(&buf)) { - ythrow TSystemError() << "uname call failed"; - } + utsname buf = {}; + if (uname(&buf)) { + ythrow TSystemError() << "uname call failed"; + } - int v = 0; - int p = 0; - int s = 0; - if (sscanf(buf.release, "%d.%d.%d", &v, &p, &s) != 3) { - ythrow yexception() << "Failed to parse linux kernel version " << buf.release; - } - return std::make_tuple(v, p, s); + int v = 0; + int p = 0; + int s = 0; + if (sscanf(buf.release, "%d.%d.%d", &v, &p, &s) != 3) { + ythrow yexception() << "Failed to parse linux kernel version " << buf.release; + } + return std::make_tuple(v, p, s); #else - return {}; + return {}; #endif - } +} - std::pair<int, int> DetectLinuxKernelVersion2() { - auto v = DetectLinuxKernelVersion3(); - return std::make_pair(std::get<0>(v), std::get<1>(v)); - } +std::pair<int, int> DetectLinuxKernelVersion2() { + auto v = DetectLinuxKernelVersion3(); + return std::make_pair(std::get<0>(v), std::get<1>(v)); +} - bool IsLinuxKernelBelow4_3() { - return DetectLinuxKernelVersion2() < std::make_pair(4, 3); - } +bool IsLinuxKernelBelow4_3() { + return DetectLinuxKernelVersion2() < std::make_pair(4, 3); } +} // namespace NYql diff --git a/yql/essentials/utils/sys/linux_version.h b/yql/essentials/utils/sys/linux_version.h index 7ae893d257d..567c92bedb3 100644 --- a/yql/essentials/utils/sys/linux_version.h +++ b/yql/essentials/utils/sys/linux_version.h @@ -3,11 +3,11 @@ #include <tuple> namespace NYql { - // returns version, patch level, sublevel, e.g. (4, 4, 114) for `uname -r` == "4.4.114-50" - std::tuple<int, int, int> DetectLinuxKernelVersion3(); +// returns version, patch level, sublevel, e.g. (4, 4, 114) for `uname -r` == "4.4.114-50" +std::tuple<int, int, int> DetectLinuxKernelVersion3(); - // returns version, patch level - std::pair<int, int> DetectLinuxKernelVersion2(); +// returns version, patch level +std::pair<int, int> DetectLinuxKernelVersion2(); - bool IsLinuxKernelBelow4_3(); // NOLINT(readability-identifier-naming) -} +bool IsLinuxKernelBelow4_3(); // NOLINT(readability-identifier-naming) +} // namespace NYql diff --git a/yql/essentials/utils/sys/ya.make b/yql/essentials/utils/sys/ya.make index 698aeb8ba8c..b1d6f8be96c 100644 --- a/yql/essentials/utils/sys/ya.make +++ b/yql/essentials/utils/sys/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( become_user.h become_user_dummy.cpp diff --git a/yql/essentials/utils/test_http_server/test_http_server.cpp b/yql/essentials/utils/test_http_server/test_http_server.cpp index 5d55230c410..aa4c48badaf 100644 --- a/yql/essentials/utils/test_http_server/test_http_server.cpp +++ b/yql/essentials/utils/test_http_server/test_http_server.cpp @@ -7,8 +7,8 @@ namespace NYql { -class TTestHttpServer::TImpl : public THttpServer::ICallBack { - class TRequestProcessor : public THttpClientRequestEx { +class TTestHttpServer::TImpl: public THttpServer::ICallBack { + class TRequestProcessor: public THttpClientRequestEx { public: explicit TRequestProcessor(TImpl* parent) : Parent_(parent) @@ -46,20 +46,20 @@ class TTestHttpServer::TImpl : public THttpServer::ICallBack { auto reply = Parent_->ProcessNextRequest(r); switch (reply.Code) { - case HTTP_OK: - Output() << "HTTP/1.1 200 Ok\r\n"; - break; + case HTTP_OK: + Output() << "HTTP/1.1 200 Ok\r\n"; + break; - case HTTP_NOT_MODIFIED: - Output() << "HTTP/1.1 304 Not modified\r\n"; - break; + case HTTP_NOT_MODIFIED: + Output() << "HTTP/1.1 304 Not modified\r\n"; + break; - case HTTP_FORBIDDEN: - Output() << "HTTP/1.1 403 Forbidden\r\n"; - break; + case HTTP_FORBIDDEN: + Output() << "HTTP/1.1 403 Forbidden\r\n"; + break; - default: - return true; + default: + return true; } if (reply.ETag) { @@ -94,7 +94,6 @@ public: : HttpServer_(this, THttpServer::TOptions(port)) , Port_(port) { - } TClientRequest* CreateClient() override { @@ -129,7 +128,8 @@ private: }; TTestHttpServer::TTestHttpServer(int port) - : Impl_(new TImpl(port)) { + : Impl_(new TImpl(port)) +{ } TTestHttpServer::~TTestHttpServer() { @@ -148,4 +148,4 @@ void TTestHttpServer::SetRequestHandler(TRequestHandler handler) { return Impl_->SetRequestHandler(std::move(handler)); } -} +} // namespace NYql diff --git a/yql/essentials/utils/test_http_server/test_http_server.h b/yql/essentials/utils/test_http_server/test_http_server.h index 385cfaf9706..762278eee86 100644 --- a/yql/essentials/utils/test_http_server/test_http_server.h +++ b/yql/essentials/utils/test_http_server/test_http_server.h @@ -74,4 +74,4 @@ private: THolder<TImpl> Impl_; }; -} +} // namespace NYql diff --git a/yql/essentials/utils/test_http_server/ya.make b/yql/essentials/utils/test_http_server/ya.make index 41c6710c5f9..9f4b1b02d0b 100644 --- a/yql/essentials/utils/test_http_server/ya.make +++ b/yql/essentials/utils/test_http_server/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( test_http_server.cpp ) diff --git a/yql/essentials/utils/threading/async_queue.cpp b/yql/essentials/utils/threading/async_queue.cpp index 4021636b552..c910c68ef60 100644 --- a/yql/essentials/utils/threading/async_queue.cpp +++ b/yql/essentials/utils/threading/async_queue.cpp @@ -15,4 +15,4 @@ TAsyncQueue::TPtr TAsyncQueue::Make(size_t numThreads, const TString& poolName) return new TAsyncQueue(numThreads, poolName); } -} +} // namespace NYql diff --git a/yql/essentials/utils/threading/async_queue.h b/yql/essentials/utils/threading/async_queue.h index 3dd75b9e08a..4c2b43aeca7 100644 --- a/yql/essentials/utils/threading/async_queue.h +++ b/yql/essentials/utils/threading/async_queue.h @@ -48,4 +48,4 @@ private: THolder<IThreadPool> MtpQueue_; }; -} // NYql +} // namespace NYql diff --git a/yql/essentials/utils/threading/ya.make b/yql/essentials/utils/threading/ya.make index d1cd6291a70..c68c12323ea 100644 --- a/yql/essentials/utils/threading/ya.make +++ b/yql/essentials/utils/threading/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( async_queue.cpp ) diff --git a/yql/essentials/utils/tty.cpp b/yql/essentials/utils/tty.cpp index 57db2d9bf31..53dacef2f07 100644 --- a/yql/essentials/utils/tty.cpp +++ b/yql/essentials/utils/tty.cpp @@ -2,10 +2,10 @@ #include <util/system/platform.h> #ifdef _win_ -#include <io.h> -#include <stdio.h> + #include <io.h> + #include <stdio.h> #else -#include <unistd.h> + #include <unistd.h> #endif namespace NYql { @@ -13,21 +13,21 @@ namespace NYql { bool IsTty(EStdStream stream) { #ifdef _win_ switch (stream) { - case EStdStream::In: - return _isatty(_fileno(stdin)); - case EStdStream::Out: - return _isatty(_fileno(stdout)); - case EStdStream::Err: - return _isatty(_fileno(stderr)); + case EStdStream::In: + return _isatty(_fileno(stdin)); + case EStdStream::Out: + return _isatty(_fileno(stdout)); + case EStdStream::Err: + return _isatty(_fileno(stderr)); } #else switch (stream) { - case EStdStream::In: - return isatty(STDIN_FILENO); - case EStdStream::Out: - return isatty(STDOUT_FILENO); - case EStdStream::Err: - return isatty(STDERR_FILENO); + case EStdStream::In: + return isatty(STDIN_FILENO); + case EStdStream::Out: + return isatty(STDOUT_FILENO); + case EStdStream::Err: + return isatty(STDERR_FILENO); } #endif } diff --git a/yql/essentials/utils/url_builder.cpp b/yql/essentials/utils/url_builder.cpp index e8a55561e84..a5c21ee9843 100644 --- a/yql/essentials/utils/url_builder.cpp +++ b/yql/essentials/utils/url_builder.cpp @@ -10,7 +10,7 @@ TUrlBuilder::TUrlBuilder(const TString& uri) } TUrlBuilder& TUrlBuilder::AddUrlParam(const TString& name, const TString& value) { - Params_.emplace_back(TParam {name, value}); + Params_.emplace_back(TParam{name, value}); return *this; } @@ -47,4 +47,4 @@ TString TUrlBuilder::Build() const { return std::move(res); } -} // NYql +} // namespace NYql diff --git a/yql/essentials/utils/url_builder.h b/yql/essentials/utils/url_builder.h index c4d4cfb7854..f4beee9c1fd 100644 --- a/yql/essentials/utils/url_builder.h +++ b/yql/essentials/utils/url_builder.h @@ -10,6 +10,7 @@ class TUrlBuilder { TString Name; TString Value; }; + public: explicit TUrlBuilder(const TString& uri); @@ -18,9 +19,10 @@ public: TUrlBuilder& AddPathComponent(const TString& value); TString Build() const; + private: std::vector<TParam> Params_; TString MainUri_; }; -} // NYql +} // namespace NYql diff --git a/yql/essentials/utils/url_builder_ut.cpp b/yql/essentials/utils/url_builder_ut.cpp index ad15a916982..e4ec9b5e5d2 100644 --- a/yql/essentials/utils/url_builder_ut.cpp +++ b/yql/essentials/utils/url_builder_ut.cpp @@ -5,53 +5,53 @@ using namespace NYql; Y_UNIT_TEST_SUITE(TUrlBuilder) { - Y_UNIT_TEST(UriOnly) { - TUrlBuilder builder("https://localhost/abc"); - UNIT_ASSERT_VALUES_EQUAL(builder.Build(), "https://localhost/abc"); - } +Y_UNIT_TEST(UriOnly) { + TUrlBuilder builder("https://localhost/abc"); + UNIT_ASSERT_VALUES_EQUAL(builder.Build(), "https://localhost/abc"); +} - Y_UNIT_TEST(Basic) { - TUrlBuilder builder("https://localhost/abc"); - builder.AddUrlParam("param1", "val1"); - builder.AddUrlParam("param2", "val2"); +Y_UNIT_TEST(Basic) { + TUrlBuilder builder("https://localhost/abc"); + builder.AddUrlParam("param1", "val1"); + builder.AddUrlParam("param2", "val2"); - UNIT_ASSERT_VALUES_EQUAL(builder.Build(), "https://localhost/abc?param1=val1¶m2=val2"); - } + UNIT_ASSERT_VALUES_EQUAL(builder.Build(), "https://localhost/abc?param1=val1¶m2=val2"); +} - Y_UNIT_TEST(BasicWithEncoding) { - auto url = TUrlBuilder("https://localhost/abc") - .AddUrlParam("param1", "=!@#$%^&*(){}[]\" ") - .AddUrlParam("param2", "val2") - .Build(); +Y_UNIT_TEST(BasicWithEncoding) { + auto url = TUrlBuilder("https://localhost/abc") + .AddUrlParam("param1", "=!@#$%^&*(){}[]\" ") + .AddUrlParam("param2", "val2") + .Build(); - UNIT_ASSERT_VALUES_EQUAL(url, "https://localhost/abc?param1=%3D!@%23$%25%5E%26*%28%29%7B%7D%5B%5D%22+¶m2=val2"); - } + UNIT_ASSERT_VALUES_EQUAL(url, "https://localhost/abc?param1=%3D!@%23$%25%5E%26*%28%29%7B%7D%5B%5D%22+¶m2=val2"); +} - Y_UNIT_TEST(EmptyPathComponent) { - TUrlBuilder builder("https://localhost/abc"); - UNIT_ASSERT_EXCEPTION_CONTAINS(builder.AddPathComponent(""), std::exception, "Empty path component is not allowed"); - auto url = builder.Build(); - // not changed - UNIT_ASSERT_VALUES_EQUAL(url, "https://localhost/abc"); - } +Y_UNIT_TEST(EmptyPathComponent) { + TUrlBuilder builder("https://localhost/abc"); + UNIT_ASSERT_EXCEPTION_CONTAINS(builder.AddPathComponent(""), std::exception, "Empty path component is not allowed"); + auto url = builder.Build(); + // not changed + UNIT_ASSERT_VALUES_EQUAL(url, "https://localhost/abc"); +} - Y_UNIT_TEST(SeveralPathComponents) { - auto url = TUrlBuilder("https://localhost/abc") - .AddPathComponent("oops") - .AddPathComponent("long oops") - .AddUrlParam("param1", "val1") - .AddUrlParam("param1", "long param") - .Build(); - UNIT_ASSERT_VALUES_EQUAL(url, "https://localhost/abc/oops/long%20oops?param1=val1¶m1=long+param"); - } +Y_UNIT_TEST(SeveralPathComponents) { + auto url = TUrlBuilder("https://localhost/abc") + .AddPathComponent("oops") + .AddPathComponent("long oops") + .AddUrlParam("param1", "val1") + .AddUrlParam("param1", "long param") + .Build(); + UNIT_ASSERT_VALUES_EQUAL(url, "https://localhost/abc/oops/long%20oops?param1=val1¶m1=long+param"); +} - Y_UNIT_TEST(SeveralPathComponentsWithSlashInBaseUri) { - // base uri ends with '/' - auto url = TUrlBuilder("https://localhost/abc/") - .AddPathComponent("oops%1234") - .AddPathComponent("long&oops=xxx") - .AddUrlParam("param1", "a&b=cdef") - .Build(); - UNIT_ASSERT_VALUES_EQUAL(url, "https://localhost/abc/oops%251234/long&oops=xxx?param1=a%26b%3Dcdef"); - } +Y_UNIT_TEST(SeveralPathComponentsWithSlashInBaseUri) { + // base uri ends with '/' + auto url = TUrlBuilder("https://localhost/abc/") + .AddPathComponent("oops%1234") + .AddPathComponent("long&oops=xxx") + .AddUrlParam("param1", "a&b=cdef") + .Build(); + UNIT_ASSERT_VALUES_EQUAL(url, "https://localhost/abc/oops%251234/long&oops=xxx?param1=a%26b%3Dcdef"); } +} // Y_UNIT_TEST_SUITE(TUrlBuilder) diff --git a/yql/essentials/utils/ut/ya.make b/yql/essentials/utils/ut/ya.make index 30639f4c11f..8bc04c8be60 100644 --- a/yql/essentials/utils/ut/ya.make +++ b/yql/essentials/utils/ut/ya.make @@ -1,5 +1,7 @@ UNITTEST_FOR(yql/essentials/utils) +ENABLE(YQL_STYLE_CPP) + SRCS( checkpoint_map_ut.cpp fp_bits_ut.cpp diff --git a/yql/essentials/utils/utf8.cpp b/yql/essentials/utils/utf8.cpp index af284849a83..d47743e2b7a 100644 --- a/yql/essentials/utils/utf8.cpp +++ b/yql/essentials/utils/utf8.cpp @@ -12,6 +12,7 @@ namespace { unsigned char GetRange(unsigned char c) { // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types. + // clang-format off static const unsigned char type[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -24,6 +25,7 @@ unsigned char GetRange(unsigned char c) { 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, }; + // clang-format on return type[c]; } @@ -38,6 +40,7 @@ struct TUtf8Ranges { }; // see https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8 +// clang-format off inline static const std::vector<TUtf8Ranges> Utf8Ranges = { { 1, { {0x00, 0x7f}, {0x00, 0x00}, {0x00, 0x00}, {0x00, 0x00}, } }, { 2, { {0xc2, 0xdf}, {0x80, 0xbf}, {0x00, 0x00}, {0x00, 0x00}, } }, @@ -49,9 +52,10 @@ inline static const std::vector<TUtf8Ranges> Utf8Ranges = { { 4, { {0xf1, 0xf3}, {0x80, 0xbf}, {0x80, 0xbf}, {0x80, 0xbf}, } }, { 4, { {0xf4, 0xf4}, {0x80, 0x8f}, {0x80, 0xbf}, {0x80, 0xbf}, } }, }; +// clang-format on std::optional<std::string> RoundBadUtf8(size_t range, std::string_view inputString, size_t pos, - bool roundDown) + bool roundDown) { Y_ENSURE(range > 0); Y_ENSURE(range < Utf8Ranges.size()); @@ -119,35 +123,72 @@ std::optional<std::string> RoundBadUtf8(size_t range, std::string_view inputStri } } } - } return prefix + newSuffix; } -} +} // namespace bool IsUtf8(const std::string_view& str) { for (auto it = str.cbegin(); str.cend() != it;) { -#define COPY() if (str.cend() != it) { c = *it++; } else { return false; } +#define COPY() \ + if (str.cend() != it) { \ + c = *it++; \ + } else { \ + return false; \ + } #define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) -#define TAIL() COPY(); TRANS(0x70) +#define TAIL() \ + COPY(); \ + TRANS(0x70) auto c = *it++; - if (!(c & 0x80)) + if (!(c & 0x80)) { continue; + } bool result = true; switch (GetRange(static_cast<unsigned char>(c))) { - case 2: TAIL(); break; - case 3: TAIL(); TAIL(); break; - case 4: COPY(); TRANS(0x50); TAIL(); break; - case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); break; - case 6: TAIL(); TAIL(); TAIL(); break; - case 10: COPY(); TRANS(0x20); TAIL(); break; - case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); break; - default: return false; + case 2: + TAIL(); + break; + case 3: + TAIL(); + TAIL(); + break; + case 4: + COPY(); + TRANS(0x50); + TAIL(); + break; + case 5: + COPY(); + TRANS(0x10); + TAIL(); + TAIL(); + break; + case 6: + TAIL(); + TAIL(); + TAIL(); + break; + case 10: + COPY(); + TRANS(0x20); + TAIL(); + break; + case 11: + COPY(); + TRANS(0x60); + TAIL(); + TAIL(); + break; + default: + return false; } - if (!result) return false; + if (!result) { + return false; + } #undef COPY #undef TRANS #undef TAIL @@ -157,21 +198,30 @@ bool IsUtf8(const std::string_view& str) { unsigned char WideCharSize(char head) { switch (GetRange(static_cast<unsigned char>(head))) { - case 0: return 1; - case 2: return 2; - case 3: return 3; - case 4: return 3; - case 5: return 4; - case 6: return 4; - case 10: return 3; - case 11: return 4; - default: return 0; + case 0: + return 1; + case 2: + return 2; + case 3: + return 3; + case 4: + return 3; + case 5: + return 4; + case 6: + return 4; + case 10: + return 3; + case 11: + return 4; + default: + return 0; } } std::optional<std::string> RoundToNearestValidUtf8(const std::string_view& str, bool roundDown) { const size_t ss = str.size(); - for (size_t pos = 0; pos < ss; ) { + for (size_t pos = 0; pos < ss;) { ui8 c = str[pos]; for (size_t i = 0; i < Utf8Ranges.size(); ++i) { @@ -197,7 +247,7 @@ std::optional<std::string> RoundToNearestValidUtf8(const std::string_view& str, break; } else if (i + 1 == Utf8Ranges.size()) { if (!roundDown) { - return NextValidUtf8(str.substr(0, pos)); + return NextValidUtf8(str.substr(0, pos)); } return RoundBadUtf8(i, str, pos, roundDown); } @@ -257,4 +307,4 @@ std::optional<std::string> NextLexicographicString(const std::string_view& str) return result; } -} +} // namespace NYql diff --git a/yql/essentials/utils/utf8.h b/yql/essentials/utils/utf8.h index 5c28353416a..9bd463ffd16 100644 --- a/yql/essentials/utils/utf8.h +++ b/yql/essentials/utils/utf8.h @@ -13,4 +13,4 @@ std::optional<std::string> RoundToNearestValidUtf8(const std::string_view& str, std::optional<std::string> NextValidUtf8(const std::string_view& str); std::optional<std::string> NextLexicographicString(const std::string_view& str); -} +} // namespace NYql diff --git a/yql/essentials/utils/utf8_ut.cpp b/yql/essentials/utils/utf8_ut.cpp index 7479acd7a11..417658cea05 100644 --- a/yql/essentials/utils/utf8_ut.cpp +++ b/yql/essentials/utils/utf8_ut.cpp @@ -3,97 +3,97 @@ #include <library/cpp/testing/unittest/registar.h> Y_UNIT_TEST_SUITE(TUtf8Tests) { - Y_UNIT_TEST(Simple) { - UNIT_ASSERT(NYql::IsUtf8("")); - UNIT_ASSERT(NYql::IsUtf8("\x01_ASCII_\x7F")); - UNIT_ASSERT(NYql::IsUtf8("Привет!")); - UNIT_ASSERT(NYql::IsUtf8("\xF0\x9F\x94\xA2")); +Y_UNIT_TEST(Simple) { + UNIT_ASSERT(NYql::IsUtf8("")); + UNIT_ASSERT(NYql::IsUtf8("\x01_ASCII_\x7F")); + UNIT_ASSERT(NYql::IsUtf8("Привет!")); + UNIT_ASSERT(NYql::IsUtf8("\xF0\x9F\x94\xA2")); - UNIT_ASSERT(!NYql::IsUtf8("\xf5\x80\x80\x80")); - UNIT_ASSERT(!NYql::IsUtf8("\xed\xa6\x80")); - UNIT_ASSERT(!NYql::IsUtf8("\xF0\x9F\x94")); - UNIT_ASSERT(!NYql::IsUtf8("\xE3\x85\xB6\xE7\x9C\xB0\xE3\x9C\xBA\xE2\xAA\x96\xEE\xA2\x8C\xEC\xAF\xB8\xE1\xB2\xBB\xEC\xA3\x9C\xE3\xAB\x8B\xEC\x95\x92\xE1\x8A\xBF\xE2\x8E\x86\xEC\x9B\x8D\xE2\x8E\xAE\xE3\x8A\xA3\xE0\xAC\xBC\xED\xB6\x85")); - UNIT_ASSERT(!NYql::IsUtf8("\xc0\xbe\xd0\xb1\xd0\xbd\xd0\xbe\xd0\xb2\xd0\xbb\xd0\xb5\xd0\xbd\xd0\xb8\xd1\x8e")); - } + UNIT_ASSERT(!NYql::IsUtf8("\xf5\x80\x80\x80")); + UNIT_ASSERT(!NYql::IsUtf8("\xed\xa6\x80")); + UNIT_ASSERT(!NYql::IsUtf8("\xF0\x9F\x94")); + UNIT_ASSERT(!NYql::IsUtf8("\xE3\x85\xB6\xE7\x9C\xB0\xE3\x9C\xBA\xE2\xAA\x96\xEE\xA2\x8C\xEC\xAF\xB8\xE1\xB2\xBB\xEC\xA3\x9C\xE3\xAB\x8B\xEC\x95\x92\xE1\x8A\xBF\xE2\x8E\x86\xEC\x9B\x8D\xE2\x8E\xAE\xE3\x8A\xA3\xE0\xAC\xBC\xED\xB6\x85")); + UNIT_ASSERT(!NYql::IsUtf8("\xc0\xbe\xd0\xb1\xd0\xbd\xd0\xbe\xd0\xb2\xd0\xbb\xd0\xb5\xd0\xbd\xd0\xb8\xd1\x8e")); +} - Y_UNIT_TEST(CharSize) { - UNIT_ASSERT_VALUES_EQUAL(NYql::WideCharSize(' '), 1); - UNIT_ASSERT_VALUES_EQUAL(NYql::WideCharSize('\x00'), 1); - UNIT_ASSERT_VALUES_EQUAL(NYql::WideCharSize('\x7F'), 1); - UNIT_ASSERT_VALUES_EQUAL(NYql::WideCharSize('\xD1'), 2); - UNIT_ASSERT_VALUES_EQUAL(NYql::WideCharSize('\xF0'), 4); - UNIT_ASSERT_VALUES_EQUAL(NYql::WideCharSize('\xFF'), 0); - } +Y_UNIT_TEST(CharSize) { + UNIT_ASSERT_VALUES_EQUAL(NYql::WideCharSize(' '), 1); + UNIT_ASSERT_VALUES_EQUAL(NYql::WideCharSize('\x00'), 1); + UNIT_ASSERT_VALUES_EQUAL(NYql::WideCharSize('\x7F'), 1); + UNIT_ASSERT_VALUES_EQUAL(NYql::WideCharSize('\xD1'), 2); + UNIT_ASSERT_VALUES_EQUAL(NYql::WideCharSize('\xF0'), 4); + UNIT_ASSERT_VALUES_EQUAL(NYql::WideCharSize('\xFF'), 0); +} - Y_UNIT_TEST(RoundingDown) { - auto checkDown = [](std::string_view in, std::string_view out) { - auto res = NYql::RoundToNearestValidUtf8(in, true); - UNIT_ASSERT(res); - UNIT_ASSERT(NYql::IsUtf8(*res)); - UNIT_ASSERT_VALUES_EQUAL(*res, out); - UNIT_ASSERT(*res <= in); - }; - checkDown("привет", "привет"); - checkDown("тест\x80", "тест\x7f"); - checkDown("привет\xf5", "привет\xf4\x8f\xbf\xbf"); - checkDown("тест2\xee\x80\x7f", "тест2\xed\x9f\xbf"); - checkDown("ага\xf0\xaa\xaa\xff", "ага\xf0\xaa\xaa\xbf"); - } +Y_UNIT_TEST(RoundingDown) { + auto checkDown = [](std::string_view in, std::string_view out) { + auto res = NYql::RoundToNearestValidUtf8(in, true); + UNIT_ASSERT(res); + UNIT_ASSERT(NYql::IsUtf8(*res)); + UNIT_ASSERT_VALUES_EQUAL(*res, out); + UNIT_ASSERT(*res <= in); + }; + checkDown("привет", "привет"); + checkDown("тест\x80", "тест\x7f"); + checkDown("привет\xf5", "привет\xf4\x8f\xbf\xbf"); + checkDown("тест2\xee\x80\x7f", "тест2\xed\x9f\xbf"); + checkDown("ага\xf0\xaa\xaa\xff", "ага\xf0\xaa\xaa\xbf"); +} - Y_UNIT_TEST(RoundingUp) { - auto checkUp = [](std::string_view in, std::string_view out) { - auto res = NYql::RoundToNearestValidUtf8(in, false); - UNIT_ASSERT(res); - UNIT_ASSERT(NYql::IsUtf8(*res)); - UNIT_ASSERT_VALUES_EQUAL(*res, out); - UNIT_ASSERT(*res >= in); - }; +Y_UNIT_TEST(RoundingUp) { + auto checkUp = [](std::string_view in, std::string_view out) { + auto res = NYql::RoundToNearestValidUtf8(in, false); + UNIT_ASSERT(res); + UNIT_ASSERT(NYql::IsUtf8(*res)); + UNIT_ASSERT_VALUES_EQUAL(*res, out); + UNIT_ASSERT(*res >= in); + }; - checkUp("", ""); - checkUp("привет", "привет"); - checkUp("а\xf6", "б"); - checkUp("\xf4\x8f\xbf\xbfа\xf4\x8f\xbf\xbf\xf5", "\xf4\x8f\xbf\xbfб"); - UNIT_ASSERT(!NYql::RoundToNearestValidUtf8("\xf4\x8f\xbf\xbf\xf5", false)); - UNIT_ASSERT(!NYql::RoundToNearestValidUtf8("\xf5", false)); - checkUp("тест\x80", "тест\xc2\x80"); - checkUp("тест\xdf", "тест\xdf\x80"); - checkUp("тест\xf0\x90\xff", "тест\xf0\x91\x80\x80"); - checkUp("ааа\xff", "ааб"); - } + checkUp("", ""); + checkUp("привет", "привет"); + checkUp("а\xf6", "б"); + checkUp("\xf4\x8f\xbf\xbfа\xf4\x8f\xbf\xbf\xf5", "\xf4\x8f\xbf\xbfб"); + UNIT_ASSERT(!NYql::RoundToNearestValidUtf8("\xf4\x8f\xbf\xbf\xf5", false)); + UNIT_ASSERT(!NYql::RoundToNearestValidUtf8("\xf5", false)); + checkUp("тест\x80", "тест\xc2\x80"); + checkUp("тест\xdf", "тест\xdf\x80"); + checkUp("тест\xf0\x90\xff", "тест\xf0\x91\x80\x80"); + checkUp("ааа\xff", "ааб"); +} - Y_UNIT_TEST(NextValid) { - auto checkNext = [](std::string_view in, std::string_view out) { - auto res = NYql::NextValidUtf8(in); - UNIT_ASSERT(res); - UNIT_ASSERT(NYql::IsUtf8(*res)); - UNIT_ASSERT_VALUES_EQUAL(*res, out); - UNIT_ASSERT(*res > in); - }; +Y_UNIT_TEST(NextValid) { + auto checkNext = [](std::string_view in, std::string_view out) { + auto res = NYql::NextValidUtf8(in); + UNIT_ASSERT(res); + UNIT_ASSERT(NYql::IsUtf8(*res)); + UNIT_ASSERT_VALUES_EQUAL(*res, out); + UNIT_ASSERT(*res > in); + }; - UNIT_ASSERT(!NYql::NextValidUtf8("")); - checkNext("привет", "привеу"); - checkNext("а", "б"); - checkNext(std::string_view("\x00", 1), "\x01"); - checkNext("\xf4\x8f\xbf\xbfа\xf4\x8f\xbf\xbf", "\xf4\x8f\xbf\xbfб"); - UNIT_ASSERT(!NYql::NextValidUtf8("\xf4\x8f\xbf\xbf")); - UNIT_ASSERT(!NYql::NextValidUtf8("\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf")); - } + UNIT_ASSERT(!NYql::NextValidUtf8("")); + checkNext("привет", "привеу"); + checkNext("а", "б"); + checkNext(std::string_view("\x00", 1), "\x01"); + checkNext("\xf4\x8f\xbf\xbfа\xf4\x8f\xbf\xbf", "\xf4\x8f\xbf\xbfб"); + UNIT_ASSERT(!NYql::NextValidUtf8("\xf4\x8f\xbf\xbf")); + UNIT_ASSERT(!NYql::NextValidUtf8("\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf")); +} - Y_UNIT_TEST(NextValidString) { - auto checkNext = [](std::string_view in, std::string_view out) { - auto res = NYql::NextLexicographicString(in); - UNIT_ASSERT(res); - UNIT_ASSERT_VALUES_EQUAL(*res, out); - UNIT_ASSERT(*res > in); - }; +Y_UNIT_TEST(NextValidString) { + auto checkNext = [](std::string_view in, std::string_view out) { + auto res = NYql::NextLexicographicString(in); + UNIT_ASSERT(res); + UNIT_ASSERT_VALUES_EQUAL(*res, out); + UNIT_ASSERT(*res > in); + }; - UNIT_ASSERT(!NYql::NextLexicographicString("")); - checkNext("привет", "привеу"); - checkNext("а", "б"); - checkNext(std::string_view("\x00", 1), "\x01"); - checkNext("\xf4\x8f\xbf\xbfа\xf4\x8f\xbf\xbf", "\xf4\x8f\xbf\xbfа\xf4\x8f\xbf\xc0"); - UNIT_ASSERT(!NYql::NextLexicographicString("\xff")); - UNIT_ASSERT(!NYql::NextLexicographicString("\xff\xff")); - checkNext(std::string_view("x\x00\xff\xff", 4), "x\x01"); - } + UNIT_ASSERT(!NYql::NextLexicographicString("")); + checkNext("привет", "привеу"); + checkNext("а", "б"); + checkNext(std::string_view("\x00", 1), "\x01"); + checkNext("\xf4\x8f\xbf\xbfа\xf4\x8f\xbf\xbf", "\xf4\x8f\xbf\xbfа\xf4\x8f\xbf\xc0"); + UNIT_ASSERT(!NYql::NextLexicographicString("\xff")); + UNIT_ASSERT(!NYql::NextLexicographicString("\xff\xff")); + checkNext(std::string_view("x\x00\xff\xff", 4), "x\x01"); } +} // Y_UNIT_TEST_SUITE(TUtf8Tests) diff --git a/yql/essentials/utils/ya.make b/yql/essentials/utils/ya.make index 1797655ae68..4c42d50206a 100644 --- a/yql/essentials/utils/ya.make +++ b/yql/essentials/utils/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( cast.h checkpoint_map.cpp diff --git a/yql/essentials/utils/yql_panic.cpp b/yql/essentials/utils/yql_panic.cpp index 83c6acc21fe..3910426dc40 100644 --- a/yql/essentials/utils/yql_panic.cpp +++ b/yql/essentials/utils/yql_panic.cpp @@ -4,9 +4,9 @@ namespace NYql { namespace NDetail { void YqlPanic(const ::NPrivate::TStaticBuf& file, int line, const char* function, - const TStringBuf& condition, const TStringBuf& message) { + const TStringBuf& condition, const TStringBuf& message) { auto err = TYqlPanic() << file.As<TStringBuf>() << ":" << line << " " - << function << "(): requirement " << condition << " failed"; + << function << "(): requirement " << condition << " failed"; if (!message.empty()) { err << ", message: " << message; } diff --git a/yql/essentials/utils/yql_panic.h b/yql/essentials/utils/yql_panic.h index e7e559b2c50..00dedf2a7de 100644 --- a/yql/essentials/utils/yql_panic.h +++ b/yql/essentials/utils/yql_panic.h @@ -5,18 +5,17 @@ namespace NYql { -class TYqlPanic : public yexception -{}; +class TYqlPanic: public yexception {}; namespace NDetail { - [[noreturn]] void YqlPanic(const ::NPrivate::TStaticBuf& file, int line, const char* function, const TStringBuf& condition, const TStringBuf& message); -} +[[noreturn]] void YqlPanic(const ::NPrivate::TStaticBuf& file, int line, const char* function, const TStringBuf& condition, const TStringBuf& message); +} // namespace NDetail -#define YQL_ENSURE(CONDITION, ...) \ - do { \ - if (Y_UNLIKELY(!(CONDITION))) { \ +#define YQL_ENSURE(CONDITION, ...) \ + do { \ + if (Y_UNLIKELY(!(CONDITION))) { \ ::NYql::NDetail::YqlPanic(__SOURCE_FILE_IMPL__, __LINE__, __FUNCTION__, #CONDITION, TStringBuilder() << "" __VA_ARGS__); \ - } \ + } \ } while (0) } // namespace NYql diff --git a/yql/essentials/utils/yql_paths.cpp b/yql/essentials/utils/yql_paths.cpp index cc7dd29bfbe..ca359f1ca87 100644 --- a/yql/essentials/utils/yql_paths.cpp +++ b/yql/essentials/utils/yql_paths.cpp @@ -20,4 +20,4 @@ TString BuildTablePath(TStringBuf prefixPath, TStringBuf path) { return prefixPathSplit.AppendMany(pathSplit.begin(), pathSplit.end()).Reconstruct(); } -} +} // namespace NYql diff --git a/yql/essentials/utils/yql_paths.h b/yql/essentials/utils/yql_paths.h index 3ec05e7df82..7ad08d2914f 100644 --- a/yql/essentials/utils/yql_paths.h +++ b/yql/essentials/utils/yql_paths.h @@ -6,4 +6,4 @@ namespace NYql { TString BuildTablePath(TStringBuf prefixPath, TStringBuf path); -} +} // namespace NYql diff --git a/yt/yt/client/chaos_client/replication_card_serialization.cpp b/yt/yt/client/chaos_client/replication_card_serialization.cpp index bd2820316d7..a1f12d9c6ef 100644 --- a/yt/yt/client/chaos_client/replication_card_serialization.cpp +++ b/yt/yt/client/chaos_client/replication_card_serialization.cpp @@ -402,11 +402,10 @@ void ToProto( const TReplicationCardFetchOptions& options) { protoReplicationCard->mutable_replicas()->Reserve(replicationCard.Replicas.size()); - auto sortedIterators = GetSortedIterators(replicationCard.Replicas); - for (const auto& iterator : sortedIterators) { + for (auto it : GetSortedIterators(replicationCard.Replicas)) { auto* protoReplicaEntry = protoReplicationCard->add_replicas(); - ToProto(protoReplicaEntry->mutable_id(), iterator->first); - ToProto(protoReplicaEntry->mutable_info(), iterator->second, options); + ToProto(protoReplicaEntry->mutable_id(), it->first); + ToProto(protoReplicaEntry->mutable_info(), it->second, options); } if (options.IncludeCoordinators) { diff --git a/yt/yt/client/table_client/adapters.cpp b/yt/yt/client/table_client/adapters.cpp index a16ac651c90..2840ee8d208 100644 --- a/yt/yt/client/table_client/adapters.cpp +++ b/yt/yt/client/table_client/adapters.cpp @@ -266,11 +266,13 @@ void PipeReaderToWriterByBatches( } } -void PipeInputToOutput( +i64 PipeInputToOutput( IInputStream* input, IOutputStream* output, i64 bufferBlockSize) { + i64 totalBytes = 0; + struct TWriteBufferTag { }; TBlob buffer(GetRefCountedTypeCookie<TWriteBufferTag>(), bufferBlockSize, /*initializeStorage*/ false); @@ -284,17 +286,23 @@ void PipeInputToOutput( break; } + totalBytes += length; + output->Write(buffer.Begin(), length); } output->Finish(); + + return totalBytes; } -void PipeInputToOutput( +i64 PipeInputToOutput( const IAsyncInputStreamPtr& input, IOutputStream* output, i64 bufferBlockSize) { + i64 totalBytes = 0; + struct TWriteBufferTag { }; auto buffer = TSharedMutableRef::Allocate<TWriteBufferTag>(bufferBlockSize, {.InitializeStorage = false}); @@ -306,16 +314,22 @@ void PipeInputToOutput( break; } + totalBytes += length; + output->Write(buffer.Begin(), length); } output->Finish(); + + return totalBytes; } -void PipeInputToOutput( +i64 PipeInputToOutput( const IAsyncZeroCopyInputStreamPtr& input, IOutputStream* output) { + i64 totalBytes = 0; + while (true) { auto data = WaitFor(input->Read()) .ValueOrThrow(); @@ -324,10 +338,14 @@ void PipeInputToOutput( break; } + totalBytes += data.Size(); + output->Write(data.Begin(), data.Size()); } output->Finish(); + + return totalBytes; } //////////////////////////////////////////////////////////////////////////////// diff --git a/yt/yt/client/table_client/adapters.h b/yt/yt/client/table_client/adapters.h index f4b058bf23c..5b4a83e97d2 100644 --- a/yt/yt/client/table_client/adapters.h +++ b/yt/yt/client/table_client/adapters.h @@ -50,17 +50,17 @@ void PipeReaderToWriterByBatches( TCallback<void(TRowBatchReadOptions* mutableOptions, TDuration timeForBatch)> optionsUpdater = {}, TDuration pipeDelay = TDuration::Zero()); -void PipeInputToOutput( +i64 PipeInputToOutput( IInputStream* input, IOutputStream* output, i64 bufferBlockSize); -void PipeInputToOutput( +i64 PipeInputToOutput( const NConcurrency::IAsyncInputStreamPtr& input, IOutputStream* output, i64 bufferBlockSize); -void PipeInputToOutput( +i64 PipeInputToOutput( const NConcurrency::IAsyncZeroCopyInputStreamPtr& input, IOutputStream* output); diff --git a/yt/yt/core/misc/proc.cpp b/yt/yt/core/misc/proc.cpp index 929d2643e11..40e05044b37 100644 --- a/yt/yt/core/misc/proc.cpp +++ b/yt/yt/core/misc/proc.cpp @@ -1165,7 +1165,7 @@ TString SafeGetUsernameByUid(int /*uid*/) } #endif -void CloseAllDescriptors(const std::vector<int>& exceptFor) +std::vector<int> CloseAllDescriptors(const std::vector<int>& exceptFor) { #ifdef _linux_ std::vector<int> fds; @@ -1189,8 +1189,11 @@ void CloseAllDescriptors(const std::vector<int>& exceptFor) for (int fd : fds) { YT_VERIFY(TryClose(fd, ignoreBadFD)); } + + return fds; #else Y_UNUSED(exceptFor); + return {}; #endif } diff --git a/yt/yt/core/misc/proc.h b/yt/yt/core/misc/proc.h index 40302f5c18d..885bd1320c5 100644 --- a/yt/yt/core/misc/proc.h +++ b/yt/yt/core/misc/proc.h @@ -183,7 +183,7 @@ TString SafeGetUsernameByUid(int uid); void SetUid(int uid); -void CloseAllDescriptors(const std::vector<int>& exceptFor = std::vector<int>()); +std::vector<int> CloseAllDescriptors(const std::vector<int>& exceptFor = std::vector<int>()); int GetFileDescriptorCount(); |
