diff options
author | Alexander Smirnov <alex@ydb.tech> | 2025-02-14 00:51:34 +0000 |
---|---|---|
committer | Alexander Smirnov <alex@ydb.tech> | 2025-02-14 00:51:34 +0000 |
commit | 7c59c24919f9e86614d1cd19c62829e01dd54097 (patch) | |
tree | d42e64eb38c0388a0f4f3ad148bad8938324e279 | |
parent | 28180f60aec6dcb2b662b6417c90226553ebe2dc (diff) | |
parent | c26bb8abd161c590e8cb0e7280a14c335c1eb893 (diff) | |
download | ydb-7c59c24919f9e86614d1cd19c62829e01dd54097.tar.gz |
Merge branch 'rightlib' into merge-libs-250214-0050
133 files changed, 4138 insertions, 2154 deletions
diff --git a/build/conf/java.conf b/build/conf/java.conf index bc18623f91..dbc53bb371 100644 --- a/build/conf/java.conf +++ b/build/conf/java.conf @@ -1850,7 +1850,6 @@ MANAGED_PEERS= MANAGED_PEERS_CLOSURE= MANAGEABLE_PEERS_ROOTS=contrib/java HAS_MANAGEABLE_PEERS=no -PROPAGATES_MANAGEABLE_PEERS=no # tag:java-specific DEPENDENCY_MANAGEMENT_VALUE= diff --git a/build/conf/linkers/ld.conf b/build/conf/linkers/ld.conf index 79006c21ab..fecd6699cc 100644 --- a/build/conf/linkers/ld.conf +++ b/build/conf/linkers/ld.conf @@ -244,6 +244,7 @@ REAL_LINK_EXEC_DYN_LIB_CMDLINE =\ $YMAKE_PYTHON ${input:"build/scripts/link_dyn_lib.py"} \ ${hide;input:"build/scripts/link_exe.py"} \ --target $TARGET +REAL_LINK_EXEC_DYN_LIB_CMDLINE+=--start-plugins ${ext=.pyplugin:SRCS_GLOBAL} --end-plugins REAL_LINK_EXEC_DYN_LIB_CMDLINE+=$_LD_LINKER_OUTPUT REAL_LINK_EXEC_DYN_LIB_CMDLINE+=\ $_ROOT_FLAGS \ @@ -271,6 +272,7 @@ REAL_LINK_DYN_LIB_CMDLINE =\ $YMAKE_PYTHON ${input:"build/scripts/link_dyn_lib.py"} \ ${hide;input:"build/scripts/link_exe.py"} \ --target $TARGET +REAL_LINK_DYN_LIB_CMDLINE+=--start-plugins ${ext=.pyplugin:SRCS_GLOBAL} --end-plugins REAL_LINK_DYN_LIB_CMDLINE+=$_LD_LINKER_OUTPUT REAL_LINK_DYN_LIB_CMDLINE+=\ ${pre=--whole-archive-peers :WHOLE_ARCHIVE_PEERS} \ diff --git a/build/mapping.conf.json b/build/mapping.conf.json index 7340df7db4..f3dd3a3ad7 100644 --- a/build/mapping.conf.json +++ b/build/mapping.conf.json @@ -498,6 +498,7 @@ "7914217459": "https://devtools-registry.s3.yandex.net/7914217459", "7948644946": "https://devtools-registry.s3.yandex.net/7948644946", "7994647367": "https://devtools-registry.s3.yandex.net/7994647367", + "8029671029": "https://devtools-registry.s3.yandex.net/8029671029", "5486731632": "https://devtools-registry.s3.yandex.net/5486731632", "5514350352": "https://devtools-registry.s3.yandex.net/5514350352", "5514360398": "https://devtools-registry.s3.yandex.net/5514360398", @@ -1729,6 +1730,7 @@ "7914217459": "devtools/ya/test/programs/test_tool/bin/test_tool for linux", "7948644946": "devtools/ya/test/programs/test_tool/bin/test_tool for linux", "7994647367": "devtools/ya/test/programs/test_tool/bin/test_tool for linux", + "8029671029": "devtools/ya/test/programs/test_tool/bin/test_tool for linux", "5486731632": "devtools/ya/test/programs/test_tool/bin3/test_tool3 for linux", "5514350352": "devtools/ya/test/programs/test_tool/bin3/test_tool3 for linux", "5514360398": "devtools/ya/test/programs/test_tool/bin3/test_tool3 for linux", diff --git a/build/platform/test_tool/host.ya.make.inc b/build/platform/test_tool/host.ya.make.inc index 4399fb5d13..bee5c088fb 100644 --- a/build/platform/test_tool/host.ya.make.inc +++ b/build/platform/test_tool/host.ya.make.inc @@ -1,12 +1,12 @@ IF (HOST_OS_DARWIN AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8018525488) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8029723475) ELSEIF (HOST_OS_DARWIN AND HOST_ARCH_ARM64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8018523316) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8029717949) ELSEIF (HOST_OS_LINUX AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8018529985) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8029737034) ELSEIF (HOST_OS_LINUX AND HOST_ARCH_AARCH64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8018521599) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8029713125) ELSEIF (HOST_OS_WINDOWS AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8018527620) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8029729848) ENDIF() diff --git a/build/platform/test_tool/host_os.ya.make.inc b/build/platform/test_tool/host_os.ya.make.inc index 1b8adbc03a..28e960307c 100644 --- a/build/platform/test_tool/host_os.ya.make.inc +++ b/build/platform/test_tool/host_os.ya.make.inc @@ -1,12 +1,12 @@ IF (HOST_OS_DARWIN AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8018808572) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8029664104) ELSEIF (HOST_OS_DARWIN AND HOST_ARCH_ARM64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8018807025) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8029660543) ELSEIF (HOST_OS_LINUX AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8018811524) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8029671029) ELSEIF (HOST_OS_LINUX AND HOST_ARCH_AARCH64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8018805118) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8029657728) ELSEIF (HOST_OS_WINDOWS AND HOST_ARCH_X86_64) - DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8018809911) + DECLARE_EXTERNAL_RESOURCE(TEST_TOOL_HOST sbr:8029667517) ENDIF() diff --git a/build/scripts/link_dyn_lib.py b/build/scripts/link_dyn_lib.py index 53757a7c75..f8e757a3c0 100644 --- a/build/scripts/link_dyn_lib.py +++ b/build/scripts/link_dyn_lib.py @@ -1,6 +1,7 @@ from __future__ import print_function import sys import os +import json import subprocess import tempfile import collections @@ -129,8 +130,6 @@ def fix_windows_param(ex): return ['/DEF:{}'.format(def_file.name)] -MUSL_LIBS = '-lc', '-lcrypt', '-ldl', '-lm', '-lpthread', '-lrt', '-lutil' - CUDA_LIBRARIES = { '-lcublas_static': '-lcublas', '-lcublasLt_static': '-lcublasLt', @@ -179,14 +178,6 @@ def fix_cmd(arch, c): return sum((do_fix(x) for x in c), []) -def fix_cmd_for_musl(cmd): - flags = [] - for flag in cmd: - if flag not in MUSL_LIBS: - flags.append(flag) - return flags - - def fix_cmd_for_dynamic_cuda(cmd): flags = [] for flag in cmd: @@ -208,7 +199,7 @@ def fix_blas_resolving(cmd): return cmd -def parse_args(): +def parse_args(args): parser = optparse.OptionParser() parser.disable_interspersed_args() parser.add_option('--arch') @@ -218,7 +209,6 @@ def parse_args(): parser.add_option('--build-root') parser.add_option('--fix-elf') parser.add_option('--linker-output') - parser.add_option('--musl', action='store_true') parser.add_option('--dynamic-cuda', action='store_true') parser.add_option('--cuda-architectures', help='List of supported CUDA architectures, separated by ":" (e.g. "sm_52:compute_70:lto_90a"') @@ -229,11 +219,26 @@ def parse_args(): parser.add_option('--custom-step') parser.add_option('--python') thinlto_cache.add_options(parser) - return parser.parse_args() + return parser.parse_args(args) if __name__ == '__main__': - opts, args = parse_args() + args = sys.argv[1:] + plugins = [] + + if '--start-plugins' in args: + ib = args.index('--start-plugins') + ie = args.index('--end-plugins') + plugins = args[ib + 1:ie] + args = args[:ib] + args[ie + 1:] + + for p in plugins: + res = subprocess.check_output([sys.executable, p] + args).decode().strip() + + if res: + args = json.loads(res) + + opts, args = parse_args(args) assert opts.arch assert opts.target @@ -242,8 +247,6 @@ if __name__ == '__main__': cmd = fix_cmd(opts.arch, cmd) cmd = fix_py2(cmd) - if opts.musl: - cmd = fix_cmd_for_musl(cmd) if opts.dynamic_cuda: cmd = fix_cmd_for_dynamic_cuda(cmd) else: diff --git a/build/scripts/link_exe.py b/build/scripts/link_exe.py index de5e215ab5..cc47d689f3 100644 --- a/build/scripts/link_exe.py +++ b/build/scripts/link_exe.py @@ -335,10 +335,13 @@ def parse_args(args): if __name__ == '__main__': args = sys.argv[1:] - ib = args.index('--start-plugins') - ie = args.index('--end-plugins') - plugins = args[ib + 1:ie] - args = args[:ib] + args[ie + 1:] + plugins = [] + + if '--start-plugins' in args: + ib = args.index('--start-plugins') + ie = args.index('--end-plugins') + plugins = args[ib + 1:ie] + args = args[:ib] + args[ie + 1:] for p in plugins: res = subprocess.check_output([sys.executable, p] + args).decode().strip() diff --git a/build/ya.conf.json b/build/ya.conf.json index 270a8d48cc..9e0d37efcb 100644 --- a/build/ya.conf.json +++ b/build/ya.conf.json @@ -552,6 +552,7 @@ "cxx_compiler": "$(CLANG)/bin/clang++", "llvm-symbolizer": "$(CLANG)/bin/llvm-symbolizer", "match_root": "CLANG", + "nm": "$(CLANG)/bin/llvm-nm", "objcopy": "$(CLANG)/bin/llvm-objcopy", "profiles": "$(XCODE_TOOLS_ROOT-sbr:799017771)/Xcode/Contents/Developer/Platforms/iPhoneOS.platform/Developer/Library/CoreSimulator/Profiles", "simctl": "$(XCODE_TOOLS_ROOT-sbr:799017771)/Xcode/SystemRoot/PrivateFrameworks/CoreSimulator.framework/Resources/bin/simctl", @@ -1229,6 +1230,7 @@ "cxx_compiler": "$(CLANG)/bin/clang++", "llvm-symbolizer": "$(CLANG)/bin/llvm-symbolizer", "match_root": "CLANG", + "nm": "$(CLANG)/bin/llvm-nm", "objcopy": "$(CLANG)/bin/llvm-objcopy", "profiles": "$(XCODE_TOOLS_ROOT-sbr:799017771)/Xcode/Contents/Developer/Platforms/iPhoneOS.platform/Developer/Library/CoreSimulator/Profiles", "simctl": "$(XCODE_TOOLS_ROOT-sbr:799017771)/Xcode/SystemRoot/PrivateFrameworks/CoreSimulator.framework/Resources/bin/simctl", diff --git a/build/ymake.core.conf b/build/ymake.core.conf index 34a2f5dbb2..d35952e8da 100644 --- a/build/ymake.core.conf +++ b/build/ymake.core.conf @@ -769,7 +769,6 @@ module _BASE_UNIT: _BARE_UNIT { when ($MUSL == "yes") { CFLAGS += -D_musl_ - LINK_DYN_LIB_FLAGS += --musl PEERDIR+=contrib/libs/musl/include } @@ -824,8 +823,9 @@ module _BASE_UNIT: _BARE_UNIT { DEFAULT(SWIG_LANG python) DEFAULT(GP_FLAGS -CtTLANSI-C -Dk* -c) - when ($NEED_BINUTILS_PEERDIR && $BINUTILS_USED && $NEED_PLATFORM_PEERDIRS == "yes") { - PEERDIR+=build/platform/binutils + when ($NEED_LLVM_TOOLS_PEERDIR && $NEED_PLATFORM_PEERDIRS == "yes") { + PEERDIR+=build/platform/clang + LLVM_TOOLS_ROOT=$CLANG18_RESOURCE_GLOBAL } when ($TIDY_ENABLED == "yes") { @@ -2536,20 +2536,25 @@ when ($BT_MINSIZEREL == "yes" || $LINKER_ICF == "yes") { OBJCOPY_TOOL=$OBJCOPY_TOOL_VENDOR OBJDUMP_TOOL=$OBJDUMP_TOOL_VENDOR STRIP_TOOL=$STRIP_TOOL_VENDOR -NEED_BINUTILS_PEERDIR= -BINUTILS_USED= +NM_TOOL=$NM_TOOL_VENDOR +NEED_LLVM_TOOLS_PEERDIR= +LLVM_TOOLS_ROOT= when (!$OBJCOPY_TOOL_VENDOR) { - OBJCOPY_TOOL=$BINUTILS_ROOT_RESOURCE_GLOBAL/bin/objcopy - NEED_BINUTILS_PEERDIR=yes + OBJCOPY_TOOL=${LLVM_TOOLS_ROOT}/bin/llvm-objcopy + NEED_LLVM_TOOLS_PEERDIR=yes } when (!$OBJDUMP_TOOL_VENDOR) { - OBJDUMP_TOOL=$BINUTILS_ROOT_RESOURCE_GLOBAL/bin/objdump - NEED_BINUTILS_PEERDIR=yes + OBJDUMP_TOOL=${LLVM_TOOLS_ROOT}/bin/llvm-objdump + NEED_LLVM_TOOLS_PEERDIR=yes } when (!$STRIP_TOOL_VENDOR) { - STRIP_TOOL=$BINUTILS_ROOT_RESOURCE_GLOBAL/bin/strip - NEED_BINUTILS_PEERDIR=yes + STRIP_TOOL=${LLVM_TOOLS_ROOT}/bin/llvm-strip + NEED_LLVM_TOOLS_PEERDIR=yes +} +when (!$NM_TOOL_VENDOR) { + NM_TOOL=${LLVM_TOOLS_ROOT}/bin/llvm-nm + NEED_LLVM_TOOLS_PEERDIR=yes } SPLIT_DWARF_VALUE=no @@ -2586,7 +2591,6 @@ when ($SPLIT_DWARF_VALUE == "yes" && $NO_SPLIT_DWARF != "yes" && $NO_DEBUGINFO ! $OBJCOPY_TOOL --only-keep-debug $TARGET $SPLIT_DWARF_OUTPUT && \ $STRIP_TOOL --strip-debug $TARGET && \ $OBJCOPY_TOOL --remove-section=.gnu_debuglink --add-gnu-debuglink $SPLIT_DWARF_OUTPUT $TARGET - BINUTILS_USED=yes } ### @usage: EXTRALIBS_STATIC(Libs...) @@ -3135,10 +3139,19 @@ macro _SRC_f_old(SRC, SRCFLAGS...) { .CMD=$YMAKE_PYTHON ${input:"build/scripts/f2c.py"} -t ${tool:"contrib/tools/f2c"} -c ${input:SRC} -o ${output:SRC.c} ${hide;output_include:"f2c.h"} ${hide;kv:"p FT"} ${hide;kv:"pc light-green"} } +### @usage: AR_PLUGIN(plugin_name) +### +### Register script, which will process module's .a (archive) output +### Script will receive path to archive, which it should modify in place macro AR_PLUGIN(name) { SET(_AR_PLUGIN $name.pyplugin) } +### @usage: LD_PLUGIN(plugin_name) +### +### Register script, which will process all inputs to any link_exe.py call with modules's library +### Script will receive all arguments to link_exe.py, and can output into stdout preprocessed list +### of all arguments, in JSON format macro LD_PLUGIN(name) { SRCS(GLOBAL $name.pyplugin) } diff --git a/build/ymake_conf.py b/build/ymake_conf.py index 83a8fc726e..84390dd85f 100755 --- a/build/ymake_conf.py +++ b/build/ymake_conf.py @@ -1067,6 +1067,7 @@ class GnuToolchainOptions(ToolchainOptions): self.objcopy = self.params.get('objcopy') self.objdump = self.params.get('objdump') self.isystem = self.params.get('isystem') + self.nm = self.params.get('nm') self.dwarf_tool = self.target.find_in_dict(self.params.get('dwarf_tool')) @@ -1665,6 +1666,7 @@ class LD(Linker): self.strip = self.tc.strip self.objcopy = self.tc.objcopy self.objdump = self.tc.objdump + self.nm = self.tc.nm self.musl = Setting('MUSL', convert=to_bool) @@ -1728,6 +1730,7 @@ class LD(Linker): emit('STRIP_TOOL_VENDOR', self.strip) emit('OBJCOPY_TOOL_VENDOR', self.objcopy) emit('OBJDUMP_TOOL_VENDOR', self.objdump) + emit('NM_TOOL_VENDOR', self.nm) emit('_LD_FLAGS', self.ld_flags) emit('LD_SDK_VERSION', self.ld_sdk) @@ -2013,10 +2016,6 @@ class MSVCCompiler(MSVC, Compiler): # for msvc compatibility # https://clang.llvm.org/docs/UsersManual.html#microsoft-extensions # '-fdelayed-template-parsing', - '-Wno-deprecated-this-capture', - '-Wno-c++11-narrowing-const-reference', - '-Wno-vla-cxx-extension', # https://github.com/llvm/llvm-project/issues/62836 - '-Wno-invalid-offsetof', ] if target.is_x86: flags.append('-m32') @@ -2037,12 +2036,16 @@ class MSVCCompiler(MSVC, Compiler): # Issue a warning if certain overload is hidden due to inheritance '-Woverloaded-virtual', '-Wno-ambiguous-reversed-operator', + '-Wno-c++11-narrowing-const-reference', '-Wno-defaulted-function-deleted', '-Wno-deprecated-anon-enum-enum-conversion', '-Wno-deprecated-enum-enum-conversion', '-Wno-deprecated-enum-float-conversion', + '-Wno-deprecated-this-capture', '-Wno-deprecated-volatile', + '-Wno-invalid-offsetof', '-Wno-undefined-var-template', + '-Wno-vla-cxx-extension', # https://github.com/llvm/llvm-project/issues/62836 ] defines.append('/D_WIN32_WINNT={0}'.format(WINDOWS_VERSION_MIN)) diff --git a/contrib/libs/poco/Crypto/include/Poco/Crypto/OpenSSLInitializer.h b/contrib/libs/poco/Crypto/include/Poco/Crypto/OpenSSLInitializer.h index 1fbd7d3a2c..694406acc9 100644 --- a/contrib/libs/poco/Crypto/include/Poco/Crypto/OpenSSLInitializer.h +++ b/contrib/libs/poco/Crypto/include/Poco/Crypto/OpenSSLInitializer.h @@ -28,6 +28,16 @@ #endif +#ifndef POCO_CRYPT_NO_SANITIZE_THREAD + #define POCO_CRYPT_NO_SANITIZE_THREAD + #if defined(__has_feature) + #if __has_feature(thread_sanitizer) + #undef POCO_CRYPT_NO_SANITIZE_THREAD + #define POCO_CRYPT_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread)) + #endif + #endif +#endif + extern "C" { struct CRYPTO_dynlock_value @@ -50,14 +60,14 @@ class Crypto_API OpenSSLInitializer public: OpenSSLInitializer(); /// Automatically initialize OpenSSL on startup. - + ~OpenSSLInitializer(); /// Automatically shut down OpenSSL on exit. - - static void initialize(); + + POCO_CRYPT_NO_SANITIZE_THREAD static void initialize(); /// Initializes the OpenSSL machinery. - static void uninitialize(); + POCO_CRYPT_NO_SANITIZE_THREAD static void uninitialize(); /// Shuts down the OpenSSL machinery. static bool isFIPSEnabled(); @@ -71,7 +81,7 @@ protected: { SEEDSIZE = 256 }; - + // OpenSSL multithreading support static void lock(int mode, int n, const char* file, int line); static unsigned long id(); diff --git a/contrib/python/ydb/py3/.dist-info/METADATA b/contrib/python/ydb/py3/.dist-info/METADATA index 181a3da9c5..aa17898a66 100644 --- a/contrib/python/ydb/py3/.dist-info/METADATA +++ b/contrib/python/ydb/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: ydb -Version: 3.18.15 +Version: 3.18.16 Summary: YDB Python SDK Home-page: http://github.com/ydb-platform/ydb-python-sdk Author: Yandex LLC diff --git a/contrib/python/ydb/py3/ya.make b/contrib/python/ydb/py3/ya.make index 1da9bb152c..8b10140b34 100644 --- a/contrib/python/ydb/py3/ya.make +++ b/contrib/python/ydb/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(3.18.15) +VERSION(3.18.16) LICENSE(Apache-2.0) diff --git a/contrib/python/ydb/py3/ydb/_grpc/grpcwrapper/ydb_topic.py b/contrib/python/ydb/py3/ydb/_grpc/grpcwrapper/ydb_topic.py index 972003989c..d1872f4245 100644 --- a/contrib/python/ydb/py3/ydb/_grpc/grpcwrapper/ydb_topic.py +++ b/contrib/python/ydb/py3/ydb/_grpc/grpcwrapper/ydb_topic.py @@ -418,12 +418,14 @@ class StreamReadMessage: class InitRequest(IToProto): topics_read_settings: List["StreamReadMessage.InitRequest.TopicReadSettings"] consumer: str + auto_partitioning_support: bool def to_proto(self) -> ydb_topic_pb2.StreamReadMessage.InitRequest: res = ydb_topic_pb2.StreamReadMessage.InitRequest() res.consumer = self.consumer for settings in self.topics_read_settings: res.topics_read_settings.append(settings.to_proto()) + res.auto_partitioning_support = self.auto_partitioning_support return res @dataclass @@ -696,6 +698,20 @@ class StreamReadMessage: ) @dataclass + class EndPartitionSession(IFromProto): + partition_session_id: int + adjacent_partition_ids: List[int] + child_partition_ids: List[int] + + @staticmethod + def from_proto(msg: ydb_topic_pb2.StreamReadMessage.EndPartitionSession): + return StreamReadMessage.EndPartitionSession( + partition_session_id=msg.partition_session_id, + adjacent_partition_ids=list(msg.adjacent_partition_ids), + child_partition_ids=list(msg.child_partition_ids), + ) + + @dataclass class FromClient(IToProto): client_message: "ReaderMessagesFromClientToServer" @@ -774,6 +790,13 @@ class StreamReadMessage: msg.partition_session_status_response ), ) + elif mess_type == "end_partition_session": + return StreamReadMessage.FromServer( + server_status=server_status, + server_message=StreamReadMessage.EndPartitionSession.from_proto( + msg.end_partition_session, + ), + ) else: raise issues.UnexpectedGrpcMessage( "Unexpected message while parse ReaderMessagesFromServerToClient: '%s'" % mess_type @@ -798,6 +821,7 @@ ReaderMessagesFromServerToClient = Union[ UpdateTokenResponse, StreamReadMessage.StartPartitionSessionRequest, StreamReadMessage.StopPartitionSessionRequest, + StreamReadMessage.EndPartitionSession, ] @@ -942,18 +966,130 @@ class AlterConsumer(IToProto, IFromPublic): class PartitioningSettings(IToProto, IFromProto): min_active_partitions: int partition_count_limit: int + max_active_partitions: int + auto_partitioning_settings: AutoPartitioningSettings @staticmethod def from_proto(msg: ydb_topic_pb2.PartitioningSettings) -> "PartitioningSettings": return PartitioningSettings( min_active_partitions=msg.min_active_partitions, partition_count_limit=msg.partition_count_limit, + max_active_partitions=msg.max_active_partitions, + auto_partitioning_settings=AutoPartitioningSettings.from_proto(msg.auto_partitioning_settings), ) def to_proto(self) -> ydb_topic_pb2.PartitioningSettings: + auto_partitioning_settings = None + if self.auto_partitioning_settings is not None: + auto_partitioning_settings = self.auto_partitioning_settings.to_proto() + return ydb_topic_pb2.PartitioningSettings( min_active_partitions=self.min_active_partitions, partition_count_limit=self.partition_count_limit, + max_active_partitions=self.max_active_partitions, + auto_partitioning_settings=auto_partitioning_settings, + ) + + +class AutoPartitioningStrategy(int, IFromProto, IFromPublic, IToPublic): + UNSPECIFIED = 0 + DISABLED = 1 + SCALE_UP = 2 + SCALE_UP_AND_DOWN = 3 + PAUSED = 4 + + @staticmethod + def from_public( + strategy: Optional[ydb_topic_public_types.PublicAutoPartitioningStrategy], + ) -> Optional["AutoPartitioningStrategy"]: + if strategy is None: + return None + + return AutoPartitioningStrategy(strategy) + + @staticmethod + def from_proto(code: Optional[int]) -> Optional["AutoPartitioningStrategy"]: + if code is None: + return None + + return AutoPartitioningStrategy(code) + + def to_public(self) -> ydb_topic_public_types.PublicAutoPartitioningStrategy: + try: + return ydb_topic_public_types.PublicAutoPartitioningStrategy(int(self)) + except KeyError: + return ydb_topic_public_types.PublicAutoPartitioningStrategy.UNSPECIFIED + + +@dataclass +class AutoPartitioningSettings(IToProto, IFromProto, IFromPublic, IToPublic): + strategy: AutoPartitioningStrategy + partition_write_speed: AutoPartitioningWriteSpeedStrategy + + @staticmethod + def from_public( + settings: Optional[ydb_topic_public_types.PublicAutoPartitioningSettings], + ) -> Optional[AutoPartitioningSettings]: + if not settings: + return None + + return AutoPartitioningSettings( + strategy=settings.strategy, + partition_write_speed=AutoPartitioningWriteSpeedStrategy( + stabilization_window=settings.stabilization_window, + up_utilization_percent=settings.up_utilization_percent, + down_utilization_percent=settings.down_utilization_percent, + ), + ) + + @staticmethod + def from_proto(msg: ydb_topic_pb2.AutoPartitioningSettings) -> AutoPartitioningSettings: + if msg is None: + return None + + return AutoPartitioningSettings( + strategy=AutoPartitioningStrategy.from_proto(msg.strategy), + partition_write_speed=AutoPartitioningWriteSpeedStrategy.from_proto(msg.partition_write_speed), + ) + + def to_proto(self) -> ydb_topic_pb2.AutoPartitioningSettings: + return ydb_topic_pb2.AutoPartitioningSettings( + strategy=self.strategy, partition_write_speed=self.partition_write_speed.to_proto() + ) + + def to_public(self) -> ydb_topic_public_types.PublicAutoPartitioningSettings: + return ydb_topic_public_types.PublicAutoPartitioningSettings( + strategy=self.strategy.to_public(), + stabilization_window=self.partition_write_speed.stabilization_window, + up_utilization_percent=self.partition_write_speed.up_utilization_percent, + down_utilization_percent=self.partition_write_speed.down_utilization_percent, + ) + + +@dataclass +class AutoPartitioningWriteSpeedStrategy(IToProto, IFromProto): + stabilization_window: Optional[datetime.timedelta] + up_utilization_percent: Optional[int] + down_utilization_percent: Optional[int] + + def to_proto(self): + return ydb_topic_pb2.AutoPartitioningWriteSpeedStrategy( + stabilization_window=proto_duration_from_timedelta(self.stabilization_window), + up_utilization_percent=self.up_utilization_percent, + down_utilization_percent=self.down_utilization_percent, + ) + + @staticmethod + def from_proto( + msg: Optional[ydb_topic_pb2.AutoPartitioningWriteSpeedStrategy], + ) -> Optional[AutoPartitioningWriteSpeedStrategy]: + if msg is None: + return None + + return AutoPartitioningWriteSpeedStrategy( + stabilization_window=timedelta_from_proto_duration(msg.stabilization_window), + up_utilization_percent=msg.up_utilization_percent, + down_utilization_percent=msg.down_utilization_percent, ) @@ -961,11 +1097,65 @@ class PartitioningSettings(IToProto, IFromProto): class AlterPartitioningSettings(IToProto): set_min_active_partitions: Optional[int] set_partition_count_limit: Optional[int] + set_max_active_partitions: Optional[int] + alter_auto_partitioning_settings: Optional[AlterAutoPartitioningSettings] def to_proto(self) -> ydb_topic_pb2.AlterPartitioningSettings: + alter_auto_partitioning_settings = None + if self.alter_auto_partitioning_settings is not None: + alter_auto_partitioning_settings = self.alter_auto_partitioning_settings.to_proto() + return ydb_topic_pb2.AlterPartitioningSettings( set_min_active_partitions=self.set_min_active_partitions, set_partition_count_limit=self.set_partition_count_limit, + set_max_active_partitions=self.set_max_active_partitions, + alter_auto_partitioning_settings=alter_auto_partitioning_settings, + ) + + +@dataclass +class AlterAutoPartitioningSettings(IToProto, IFromPublic): + set_strategy: Optional[AutoPartitioningStrategy] + set_partition_write_speed: Optional[AlterAutoPartitioningWriteSpeedStrategy] + + @staticmethod + def from_public( + settings: Optional[ydb_topic_public_types.PublicAlterAutoPartitioningSettings], + ) -> Optional[AlterAutoPartitioningSettings]: + if not settings: + return None + + return AlterAutoPartitioningSettings( + set_strategy=settings.set_strategy, + set_partition_write_speed=AlterAutoPartitioningWriteSpeedStrategy( + set_stabilization_window=settings.set_stabilization_window, + set_up_utilization_percent=settings.set_up_utilization_percent, + set_down_utilization_percent=settings.set_down_utilization_percent, + ), + ) + + def to_proto(self) -> ydb_topic_pb2.AlterAutoPartitioningSettings: + set_partition_write_speed = None + if self.set_partition_write_speed: + set_partition_write_speed = self.set_partition_write_speed.to_proto() + + return ydb_topic_pb2.AlterAutoPartitioningSettings( + set_strategy=self.set_strategy, + set_partition_write_speed=set_partition_write_speed, + ) + + +@dataclass +class AlterAutoPartitioningWriteSpeedStrategy(IToProto): + set_stabilization_window: Optional[datetime.timedelta] + set_up_utilization_percent: Optional[int] + set_down_utilization_percent: Optional[int] + + def to_proto(self) -> ydb_topic_pb2.AlterAutoPartitioningWriteSpeedStrategy: + return ydb_topic_pb2.AlterAutoPartitioningWriteSpeedStrategy( + set_stabilization_window=proto_duration_from_timedelta(self.set_stabilization_window), + set_up_utilization_percent=self.set_up_utilization_percent, + set_down_utilization_percent=self.set_down_utilization_percent, ) @@ -992,7 +1182,7 @@ class MeteringMode(int, IFromProto, IFromPublic, IToPublic): def to_public(self) -> ydb_topic_public_types.PublicMeteringMode: try: - ydb_topic_public_types.PublicMeteringMode(int(self)) + return ydb_topic_public_types.PublicMeteringMode(int(self)) except KeyError: return ydb_topic_public_types.PublicMeteringMode.UNSPECIFIED @@ -1011,9 +1201,13 @@ class CreateTopicRequest(IToProto, IFromPublic): metering_mode: "MeteringMode" def to_proto(self) -> ydb_topic_pb2.CreateTopicRequest: + partitioning_settings = None + if self.partitioning_settings is not None: + partitioning_settings = self.partitioning_settings.to_proto() + return ydb_topic_pb2.CreateTopicRequest( path=self.path, - partitioning_settings=self.partitioning_settings.to_proto(), + partitioning_settings=partitioning_settings, retention_period=proto_duration_from_timedelta(self.retention_period), retention_storage_mb=self.retention_storage_mb, supported_codecs=self.supported_codecs.to_proto(), @@ -1038,11 +1232,17 @@ class CreateTopicRequest(IToProto, IFromPublic): consumer = ydb_topic_public_types.PublicConsumer(name=consumer) consumers.append(Consumer.from_public(consumer)) + auto_partitioning_settings = None + if req.auto_partitioning_settings is not None: + auto_partitioning_settings = AutoPartitioningSettings.from_public(req.auto_partitioning_settings) + return CreateTopicRequest( path=req.path, partitioning_settings=PartitioningSettings( min_active_partitions=req.min_active_partitions, partition_count_limit=req.partition_count_limit, + max_active_partitions=req.max_active_partitions, + auto_partitioning_settings=auto_partitioning_settings, ), retention_period=req.retention_period, retention_storage_mb=req.retention_storage_mb, @@ -1113,6 +1313,12 @@ class AlterTopicRequest(IToProto, IFromPublic): consumer = ydb_topic_public_types.PublicAlterConsumer(name=consumer) alter_consumers.append(AlterConsumer.from_public(consumer)) + alter_auto_partitioning_settings = None + if req.alter_auto_partitioning_settings is not None: + alter_auto_partitioning_settings = AlterAutoPartitioningSettings.from_public( + req.alter_auto_partitioning_settings + ) + drop_consumers = req.drop_consumers if req.drop_consumers else [] return AlterTopicRequest( @@ -1120,6 +1326,8 @@ class AlterTopicRequest(IToProto, IFromPublic): alter_partitioning_settings=AlterPartitioningSettings( set_min_active_partitions=req.set_min_active_partitions, set_partition_count_limit=req.set_partition_count_limit, + set_max_active_partitions=req.set_max_active_partitions, + alter_auto_partitioning_settings=alter_auto_partitioning_settings, ), add_consumers=add_consumers, set_retention_period=req.set_retention_period, @@ -1180,6 +1388,8 @@ class DescribeTopicResult(IFromProtoWithProtoType, IToPublic): return ydb_topic_public_types.PublicDescribeTopicResult( self=scheme._wrap_scheme_entry(self.self_proto), min_active_partitions=self.partitioning_settings.min_active_partitions, + max_active_partitions=self.partitioning_settings.max_active_partitions, + auto_partitioning_settings=self.partitioning_settings.auto_partitioning_settings.to_public(), partition_count_limit=self.partitioning_settings.partition_count_limit, partitions=list(map(DescribeTopicResult.PartitionInfo.to_public, self.partitions)), retention_period=self.retention_period, diff --git a/contrib/python/ydb/py3/ydb/_grpc/grpcwrapper/ydb_topic_public_types.py b/contrib/python/ydb/py3/ydb/_grpc/grpcwrapper/ydb_topic_public_types.py index 917dd53363..e3b118e9ca 100644 --- a/contrib/python/ydb/py3/ydb/_grpc/grpcwrapper/ydb_topic_public_types.py +++ b/contrib/python/ydb/py3/ydb/_grpc/grpcwrapper/ydb_topic_public_types.py @@ -18,6 +18,7 @@ from ...scheme import SchemeEntry class CreateTopicRequestParams: path: str min_active_partitions: Optional[int] + max_active_partitions: Optional[int] partition_count_limit: Optional[int] retention_period: Optional[datetime.timedelta] retention_storage_mb: Optional[int] @@ -27,12 +28,14 @@ class CreateTopicRequestParams: attributes: Optional[Dict[str, str]] consumers: Optional[List[Union["PublicConsumer", str]]] metering_mode: Optional["PublicMeteringMode"] + auto_partitioning_settings: Optional["PublicAutoPartitioningSettings"] @dataclass class AlterTopicRequestParams: path: str set_min_active_partitions: Optional[int] + set_max_active_partitions: Optional[int] set_partition_count_limit: Optional[int] add_consumers: Optional[List[Union["PublicConsumer", str]]] alter_consumers: Optional[List[Union["PublicAlterConsumer", str]]] @@ -44,6 +47,7 @@ class AlterTopicRequestParams: set_retention_period: Optional[datetime.timedelta] set_retention_storage_mb: Optional[int] set_supported_codecs: Optional[List[Union["PublicCodec", int]]] + alter_auto_partitioning_settings: Optional["PublicAlterAutoPartitioningSettings"] class PublicCodec(int): @@ -67,6 +71,30 @@ class PublicMeteringMode(IntEnum): REQUEST_UNITS = 2 +class PublicAutoPartitioningStrategy(IntEnum): + UNSPECIFIED = 0 + DISABLED = 1 + SCALE_UP = 2 + SCALE_UP_AND_DOWN = 3 + PAUSED = 4 + + +@dataclass +class PublicAutoPartitioningSettings: + strategy: Optional["PublicAutoPartitioningStrategy"] = None + stabilization_window: Optional[datetime.timedelta] = None + down_utilization_percent: Optional[int] = None + up_utilization_percent: Optional[int] = None + + +@dataclass +class PublicAlterAutoPartitioningSettings: + set_strategy: Optional["PublicAutoPartitioningStrategy"] = None + set_stabilization_window: Optional[datetime.timedelta] = None + set_down_utilization_percent: Optional[int] = None + set_up_utilization_percent: Optional[int] = None + + @dataclass class PublicConsumer: name: str @@ -137,6 +165,9 @@ class PublicDescribeTopicResult: min_active_partitions: int "Minimum partition count auto merge would stop working at" + max_active_partitions: int + "Minimum partition count auto split would stop working at" + partition_count_limit: int "Limit for total partition count, including active (open for write) and read-only partitions" @@ -170,6 +201,8 @@ class PublicDescribeTopicResult: topic_stats: "PublicDescribeTopicResult.TopicStats" "Statistics of topic" + auto_partitioning_settings: "PublicAutoPartitioningSettings" + @dataclass class PartitionInfo: partition_id: int diff --git a/contrib/python/ydb/py3/ydb/_topic_reader/datatypes.py b/contrib/python/ydb/py3/ydb/_topic_reader/datatypes.py index a9c811ac4f..b48501aff2 100644 --- a/contrib/python/ydb/py3/ydb/_topic_reader/datatypes.py +++ b/contrib/python/ydb/py3/ydb/_topic_reader/datatypes.py @@ -121,6 +121,16 @@ class PartitionSession: def closed(self): return self.state == PartitionSession.State.Stopped + def end(self): + if self.closed: + return + + self.state = PartitionSession.State.Ended + + @property + def ended(self): + return self.state == PartitionSession.State.Ended + def _ensure_not_closed(self): if self.state == PartitionSession.State.Stopped: raise topic_reader_asyncio.PublicTopicReaderPartitionExpiredError() @@ -129,6 +139,7 @@ class PartitionSession: Active = 1 GracefulShutdown = 2 Stopped = 3 + Ended = 4 @dataclass(order=True) class CommitAckWaiter: diff --git a/contrib/python/ydb/py3/ydb/_topic_reader/topic_reader.py b/contrib/python/ydb/py3/ydb/_topic_reader/topic_reader.py index b907ee2794..8bc12cc0d8 100644 --- a/contrib/python/ydb/py3/ydb/_topic_reader/topic_reader.py +++ b/contrib/python/ydb/py3/ydb/_topic_reader/topic_reader.py @@ -45,6 +45,7 @@ class PublicReaderSettings: consumer: str topic: TopicSelectorTypes buffer_size_bytes: int = 50 * 1024 * 1024 + auto_partitioning_support: bool = True decoders: Union[Mapping[int, Callable[[bytes], bytes]], None] = None """decoders: map[codec_code] func(encoded_bytes)->decoded_bytes""" @@ -77,6 +78,7 @@ class PublicReaderSettings: return StreamReadMessage.InitRequest( topics_read_settings=list(map(PublicTopicSelector._to_topic_read_settings, selectors)), # type: ignore consumer=self.consumer, + auto_partitioning_support=self.auto_partitioning_support, ) def _retry_settings(self) -> RetrySettings: diff --git a/contrib/python/ydb/py3/ydb/_topic_reader/topic_reader_asyncio.py b/contrib/python/ydb/py3/ydb/_topic_reader/topic_reader_asyncio.py index e407fe01da..7061b4e449 100644 --- a/contrib/python/ydb/py3/ydb/_topic_reader/topic_reader_asyncio.py +++ b/contrib/python/ydb/py3/ydb/_topic_reader/topic_reader_asyncio.py @@ -388,6 +388,14 @@ class ReaderStream: partition_session_id, batch = self._message_batches.popitem(last=False) return partition_session_id, batch + def _return_batch_to_queue(self, part_sess_id: int, batch: datatypes.PublicBatch): + self._message_batches[part_sess_id] = batch + + # In case of auto-split we should return all parent messages ASAP + # without queue rotation to prevent child's messages before parent's. + if part_sess_id in self._partition_sessions and self._partition_sessions[part_sess_id].ended: + self._message_batches.move_to_end(part_sess_id, last=False) + def receive_batch_nowait(self, max_messages: Optional[int] = None): if self._get_first_error(): raise self._get_first_error() @@ -403,7 +411,8 @@ class ReaderStream: cutted_batch = batch._pop_batch(message_count=max_messages) - self._message_batches[part_sess_id] = batch + self._return_batch_to_queue(part_sess_id, batch) + self._buffer_release_bytes(cutted_batch._bytes_size) return cutted_batch @@ -423,7 +432,7 @@ class ReaderStream: self._buffer_release_bytes(batch._bytes_size) else: # TODO: we should somehow release bytes from single message as well - self._message_batches[part_sess_id] = batch + self._return_batch_to_queue(part_sess_id, batch) return message @@ -498,6 +507,12 @@ class ReaderStream: ): self._on_partition_session_stop(message.server_message) + elif isinstance( + message.server_message, + StreamReadMessage.EndPartitionSession, + ): + self._on_end_partition_session(message.server_message) + elif isinstance(message.server_message, UpdateTokenResponse): self._update_token_event.set() @@ -575,6 +590,16 @@ class ReaderStream: ) ) + def _on_end_partition_session(self, message: StreamReadMessage.EndPartitionSession): + logger.debug( + f"End partition session with id: {message.partition_session_id}, " + f"child partitions: {message.child_partition_ids}" + ) + + if message.partition_session_id in self._partition_sessions: + # Mark partition session as ended not to shuffle messages. + self._partition_sessions[message.partition_session_id].end() + def _on_read_response(self, message: StreamReadMessage.ReadResponse): self._buffer_consume_bytes(message.bytes_size) diff --git a/contrib/python/ydb/py3/ydb/aio/query/pool.py b/contrib/python/ydb/py3/ydb/aio/query/pool.py index 456896dbb5..f6a84eb0b1 100644 --- a/contrib/python/ydb/py3/ydb/aio/query/pool.py +++ b/contrib/python/ydb/py3/ydb/aio/query/pool.py @@ -13,9 +13,11 @@ from ...retries import ( RetrySettings, retry_operation_async, ) +from ...query.base import BaseQueryTxMode from ...query.base import QueryClientSettings from ... import convert from ..._grpc.grpcwrapper import common_utils +from ..._grpc.grpcwrapper import ydb_query_public_types as _ydb_query_public logger = logging.getLogger(__name__) @@ -122,6 +124,39 @@ class QuerySessionPool: return await retry_operation_async(wrapped_callee, retry_settings) + async def retry_tx_async( + self, + callee: Callable, + tx_mode: Optional[BaseQueryTxMode] = None, + retry_settings: Optional[RetrySettings] = None, + *args, + **kwargs, + ): + """Special interface to execute a bunch of commands with transaction in a safe, retriable way. + + :param callee: A function, that works with session. + :param tx_mode: Transaction mode, which is a one from the following choises: + 1) QuerySerializableReadWrite() which is default mode; + 2) QueryOnlineReadOnly(allow_inconsistent_reads=False); + 3) QuerySnapshotReadOnly(); + 4) QueryStaleReadOnly(). + :param retry_settings: RetrySettings object. + + :return: Result sets or exception in case of execution errors. + """ + + tx_mode = tx_mode if tx_mode else _ydb_query_public.QuerySerializableReadWrite() + retry_settings = RetrySettings() if retry_settings is None else retry_settings + + async def wrapped_callee(): + async with self.checkout() as session: + async with session.transaction(tx_mode=tx_mode) as tx: + result = await callee(tx, *args, **kwargs) + await tx.commit() + return result + + return await retry_operation_async(wrapped_callee, retry_settings) + async def execute_with_retries( self, query: str, diff --git a/contrib/python/ydb/py3/ydb/import_client.py b/contrib/python/ydb/py3/ydb/import_client.py index 830f10c5bb..9a01e5a508 100644 --- a/contrib/python/ydb/py3/ydb/import_client.py +++ b/contrib/python/ydb/py3/ydb/import_client.py @@ -32,7 +32,10 @@ class ImportProgress(enum.IntEnum): def _initialize_progresses(): for key, value in ydb_import_pb2.ImportProgress.Progress.items(): - _progresses[value] = getattr(ImportProgress, key[len("PROGRESS_") :]) + try: + _progresses[value] = getattr(ImportProgress, key[len("PROGRESS_") :]) + except AttributeError: + pass _initialize_progresses() diff --git a/contrib/python/ydb/py3/ydb/query/pool.py b/contrib/python/ydb/py3/ydb/query/pool.py index f1fcd17360..e3775c4dd1 100644 --- a/contrib/python/ydb/py3/ydb/query/pool.py +++ b/contrib/python/ydb/py3/ydb/query/pool.py @@ -8,6 +8,7 @@ import time import threading import queue +from .base import BaseQueryTxMode from .base import QueryClientSettings from .session import ( QuerySession, @@ -20,6 +21,7 @@ from .. import issues from .. import convert from ..settings import BaseRequestSettings from .._grpc.grpcwrapper import common_utils +from .._grpc.grpcwrapper import ydb_query_public_types as _ydb_query_public logger = logging.getLogger(__name__) @@ -138,6 +140,39 @@ class QuerySessionPool: return retry_operation_sync(wrapped_callee, retry_settings) + def retry_tx_sync( + self, + callee: Callable, + tx_mode: Optional[BaseQueryTxMode] = None, + retry_settings: Optional[RetrySettings] = None, + *args, + **kwargs, + ): + """Special interface to execute a bunch of commands with transaction in a safe, retriable way. + + :param callee: A function, that works with session. + :param tx_mode: Transaction mode, which is a one from the following choises: + 1) QuerySerializableReadWrite() which is default mode; + 2) QueryOnlineReadOnly(allow_inconsistent_reads=False); + 3) QuerySnapshotReadOnly(); + 4) QueryStaleReadOnly(). + :param retry_settings: RetrySettings object. + + :return: Result sets or exception in case of execution errors. + """ + + tx_mode = tx_mode if tx_mode else _ydb_query_public.QuerySerializableReadWrite() + retry_settings = RetrySettings() if retry_settings is None else retry_settings + + def wrapped_callee(): + with self.checkout(timeout=retry_settings.max_session_acquire_timeout) as session: + with session.transaction(tx_mode=tx_mode) as tx: + result = callee(tx, *args, **kwargs) + tx.commit() + return result + + return retry_operation_sync(wrapped_callee, retry_settings) + def execute_with_retries( self, query: str, diff --git a/contrib/python/ydb/py3/ydb/scheme.py b/contrib/python/ydb/py3/ydb/scheme.py index 04951b5eae..263d1c65d3 100644 --- a/contrib/python/ydb/py3/ydb/scheme.py +++ b/contrib/python/ydb/py3/ydb/scheme.py @@ -24,6 +24,10 @@ class SchemeEntryType(enum.IntEnum): SEQUENCE = 15 REPLICATION = 16 TOPIC = 17 + EXTERNAL_TABLE = 18 + EXTERNAL_DATA_SOURCE = 19 + VIEW = 20 + RESOURCE_POOL = 21 @classmethod def _missing_(cls, value): @@ -103,6 +107,38 @@ class SchemeEntryType(enum.IntEnum): """ return entry == SchemeEntryType.DATABASE or entry == SchemeEntryType.DIRECTORY + @staticmethod + def is_external_table(entry): + """ + :param entry: A scheme entry to check + :return: True if scheme entry is an external table and False otherwise + """ + return entry == SchemeEntryType.EXTERNAL_TABLE + + @staticmethod + def is_external_data_source(entry): + """ + :param entry: A scheme entry to check + :return: True if scheme entry is an external data source and False otherwise + """ + return entry == SchemeEntryType.EXTERNAL_DATA_SOURCE + + @staticmethod + def is_external_view(entry): + """ + :param entry: A scheme entry to check + :return: True if scheme entry is a view and False otherwise + """ + return entry == SchemeEntryType.VIEW + + @staticmethod + def is_external_resource_pool(entry): + """ + :param entry: A scheme entry to check + :return: True if scheme entry is a resource pool and False otherwise + """ + return entry == SchemeEntryType.RESOURCE_POOL + class SchemeEntry(object): __slots__ = ( @@ -185,6 +221,30 @@ class SchemeEntry(object): """ return SchemeEntryType.is_coordination_node(self.type) + def is_external_table(self): + """ + :return: True if scheme entry is an external table and False otherwise + """ + return SchemeEntryType.is_external_table(self.type) + + def is_external_data_source(self): + """ + :return: True if scheme entry is an external data source and False otherwise + """ + return SchemeEntryType.is_external_data_source(self.type) + + def is_view(self): + """ + :return: True if scheme entry is a view and False otherwise + """ + return SchemeEntryType.is_view(self.type) + + def is_resource_pool(self): + """ + :return: True if scheme entry is a resource pool and False otherwise + """ + return SchemeEntryType.is_resource_pool(self.type) + class Directory(SchemeEntry): __slots__ = ("children",) diff --git a/contrib/python/ydb/py3/ydb/topic.py b/contrib/python/ydb/py3/ydb/topic.py index f0b872e297..55f4ea04c5 100644 --- a/contrib/python/ydb/py3/ydb/topic.py +++ b/contrib/python/ydb/py3/ydb/topic.py @@ -7,6 +7,9 @@ __all__ = [ "TopicCodec", "TopicConsumer", "TopicAlterConsumer", + "TopicAlterAutoPartitioningSettings", + "TopicAutoPartitioningSettings", + "TopicAutoPartitioningStrategy", "TopicDescription", "TopicError", "TopicMeteringMode", @@ -80,6 +83,9 @@ from ._grpc.grpcwrapper.ydb_topic_public_types import ( # noqa: F401 PublicConsumer as TopicConsumer, PublicAlterConsumer as TopicAlterConsumer, PublicMeteringMode as TopicMeteringMode, + PublicAutoPartitioningStrategy as TopicAutoPartitioningStrategy, + PublicAutoPartitioningSettings as TopicAutoPartitioningSettings, + PublicAlterAutoPartitioningSettings as TopicAlterAutoPartitioningSettings, ) @@ -108,6 +114,7 @@ class TopicClientAsyncIO: self, path: str, min_active_partitions: Optional[int] = None, + max_active_partitions: Optional[int] = None, partition_count_limit: Optional[int] = None, retention_period: Optional[datetime.timedelta] = None, retention_storage_mb: Optional[int] = None, @@ -117,6 +124,7 @@ class TopicClientAsyncIO: attributes: Optional[Dict[str, str]] = None, consumers: Optional[List[Union[TopicConsumer, str]]] = None, metering_mode: Optional[TopicMeteringMode] = None, + auto_partitioning_settings: Optional[TopicAutoPartitioningSettings] = None, ): """ create topic command @@ -151,6 +159,7 @@ class TopicClientAsyncIO: self, path: str, set_min_active_partitions: Optional[int] = None, + set_max_active_partitions: Optional[int] = None, set_partition_count_limit: Optional[int] = None, add_consumers: Optional[List[Union[TopicConsumer, str]]] = None, alter_consumers: Optional[List[Union[TopicAlterConsumer, str]]] = None, @@ -162,6 +171,7 @@ class TopicClientAsyncIO: set_retention_period: Optional[datetime.timedelta] = None, set_retention_storage_mb: Optional[int] = None, set_supported_codecs: Optional[List[Union[TopicCodec, int]]] = None, + alter_auto_partitioning_settings: Optional[TopicAlterAutoPartitioningSettings] = None, ): """ alter topic command @@ -226,6 +236,7 @@ class TopicClientAsyncIO: # custom decoder executor for call builtin and custom decoders. If None - use shared executor pool. # if max_worker in the executor is 1 - then decoders will be called from the thread without parallel decoder_executor: Optional[concurrent.futures.Executor] = None, + auto_partitioning_support: Optional[bool] = True, # Auto partitioning feature flag. Default - True. ) -> TopicReaderAsyncIO: if not decoder_executor: @@ -305,6 +316,7 @@ class TopicClient: self, path: str, min_active_partitions: Optional[int] = None, + max_active_partitions: Optional[int] = None, partition_count_limit: Optional[int] = None, retention_period: Optional[datetime.timedelta] = None, retention_storage_mb: Optional[int] = None, @@ -314,6 +326,7 @@ class TopicClient: attributes: Optional[Dict[str, str]] = None, consumers: Optional[List[Union[TopicConsumer, str]]] = None, metering_mode: Optional[TopicMeteringMode] = None, + auto_partitioning_settings: Optional[TopicAutoPartitioningSettings] = None, ): """ create topic command @@ -350,6 +363,7 @@ class TopicClient: self, path: str, set_min_active_partitions: Optional[int] = None, + set_max_active_partitions: Optional[int] = None, set_partition_count_limit: Optional[int] = None, add_consumers: Optional[List[Union[TopicConsumer, str]]] = None, alter_consumers: Optional[List[Union[TopicAlterConsumer, str]]] = None, @@ -361,6 +375,7 @@ class TopicClient: set_retention_period: Optional[datetime.timedelta] = None, set_retention_storage_mb: Optional[int] = None, set_supported_codecs: Optional[List[Union[TopicCodec, int]]] = None, + alter_auto_partitioning_settings: Optional[TopicAlterAutoPartitioningSettings] = None, ): """ alter topic command @@ -431,6 +446,7 @@ class TopicClient: # custom decoder executor for call builtin and custom decoders. If None - use shared executor pool. # if max_worker in the executor is 1 - then decoders will be called from the thread without parallel decoder_executor: Optional[concurrent.futures.Executor] = None, # default shared client executor pool + auto_partitioning_support: Optional[bool] = True, # Auto partitioning feature flag. Default - True. ) -> TopicReader: if not decoder_executor: decoder_executor = self._executor diff --git a/contrib/python/ydb/py3/ydb/ydb_version.py b/contrib/python/ydb/py3/ydb/ydb_version.py index bdc80c2111..750aee1df8 100644 --- a/contrib/python/ydb/py3/ydb/ydb_version.py +++ b/contrib/python/ydb/py3/ydb/ydb_version.py @@ -1 +1 @@ -VERSION = "3.18.15" +VERSION = "3.18.16" diff --git a/library/cpp/tld/tlds-alpha-by-domain.txt b/library/cpp/tld/tlds-alpha-by-domain.txt index aa72a0fcd4..c3968f49e2 100644 --- a/library/cpp/tld/tlds-alpha-by-domain.txt +++ b/library/cpp/tld/tlds-alpha-by-domain.txt @@ -1,4 +1,4 @@ -# Version 2025021000, Last Updated Mon Feb 10 07:07:01 2025 UTC +# Version 2025021300, Last Updated Thu Feb 13 07:07:02 2025 UTC AAA AARP ABB @@ -39,33 +39,33 @@ REGISTRY_ENDPOINT = os.environ.get("YA_REGISTRY_ENDPOINT", "https://devtools-reg PLATFORM_MAP = { "data": { "darwin": { - "md5": "72d97f4b5e3f6d679e8c80ba990a07c0", + "md5": "31aefe161dc826d89c768cbc8a8ceaa8", "urls": [ - f"{REGISTRY_ENDPOINT}/8018530253" + f"{REGISTRY_ENDPOINT}/8029716860" ] }, "darwin-arm64": { - "md5": "ed328a769385939c5a3239fb820525a8", + "md5": "ab82c6dfb3005aecbad5695de814d3b6", "urls": [ - f"{REGISTRY_ENDPOINT}/8018528420" + f"{REGISTRY_ENDPOINT}/8029710985" ] }, "linux-aarch64": { - "md5": "c2b9717969cfa225f00f6b53a68e5b87", + "md5": "6cce83fc212c5e0f19754b0bec02a2b4", "urls": [ - f"{REGISTRY_ENDPOINT}/8018526676" + f"{REGISTRY_ENDPOINT}/8029706049" ] }, "win32-clang-cl": { - "md5": "7ec6b941b0d54a9ddbb4407ba8d59104", + "md5": "05f2ddd43f41f4ebd239195003c94797", "urls": [ - f"{REGISTRY_ENDPOINT}/8018531973" + f"{REGISTRY_ENDPOINT}/8029721213" ] }, "linux": { - "md5": "3cc47f9b19480b745b2837524b9eb139", + "md5": "510059486631c6c59c6fa2abacd89928", "urls": [ - f"{REGISTRY_ENDPOINT}/8018534052" + f"{REGISTRY_ENDPOINT}/8029728073" ] } } diff --git a/yql/essentials/cfg/tests/gateways-experimental.conf b/yql/essentials/cfg/tests/gateways-experimental.conf index d8b4c0727a..778a8be39a 100644 --- a/yql/essentials/cfg/tests/gateways-experimental.conf +++ b/yql/essentials/cfg/tests/gateways-experimental.conf @@ -13,6 +13,11 @@ Yt { Name: "TableContentLocalExecution" Value: "true" } + + DefaultSettings { + Name: "DisableOptimizers" + Value: "NONE" + } } Dq { diff --git a/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp b/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp index 94754fb67d..c6973d7acb 100644 --- a/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp +++ b/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp @@ -146,14 +146,37 @@ TExprNode::TPtr OptimizeWideToBlocks(const TExprNode::TPtr& node, TExprContext& .Build(); } - if (input.IsCallable({"Extend", "OrderedExtend"})) { - YQL_CLOG(DEBUG, CorePeepHole) << "Swap " << node->Content() << " with " << input.Content(); + if (input.IsCallable("FromFlow") && input.Head().IsCallable({"Extend", "OrderedExtend"})) { + const auto& extend = input.Head(); + // Technically, the code below rewrites the following sequence + // (WideToBlocks (FromFlow (Extend (<input>)))) + // into (Extend (WideToBlocks (FromFlow (<input>))), but + // the logging is left intact, omitting the FromFlow barrier. + YQL_CLOG(DEBUG, CorePeepHole) << "Swap " << node->Content() << " with " << extend.Content(); TExprNodeList newChildren; - newChildren.reserve(input.ChildrenSize()); - for (auto& child : input.ChildrenList()) { - newChildren.emplace_back(ctx.ChangeChild(*node, 0, std::move(child))); + newChildren.reserve(extend.ChildrenSize()); + for (const auto& child : extend.ChildrenList()) { + // Extend callable can handle any sequential type, so + // just wrap all its children with (ToStream (...)). + // However, its *block* overload works only with WideFlow, + // so the new child is wrapped with ToFlow callable. + const auto newChild = ctx.Builder(node->Pos()) + .Callable("ToFlow") + .Callable(0, "WideToBlocks") + .Callable(0, "ToStream") + .Add(0, child) + .Seal() + .Seal() + .Seal() + .Build(); + newChildren.emplace_back(newChild); } - return ctx.NewCallable(input.Pos(), input.IsCallable("Extend") ? "BlockExtend" : "BlockOrderedExtend", std::move(newChildren)); + const auto newName = extend.IsCallable("Extend") ? "BlockExtend" : "BlockOrderedExtend"; + return ctx.Builder(node->Pos()) + .Callable("FromFlow") + .Add(0, ctx.NewCallable(input.Pos(), newName, std::move(newChildren))) + .Seal() + .Build(); } return node; diff --git a/yql/essentials/core/qplayer/storage/file/yql_qstorage_file.cpp b/yql/essentials/core/qplayer/storage/file/yql_qstorage_file.cpp index 0fad744f8e..6448f6f889 100644 --- a/yql/essentials/core/qplayer/storage/file/yql_qstorage_file.cpp +++ b/yql/essentials/core/qplayer/storage/file/yql_qstorage_file.cpp @@ -7,8 +7,8 @@ #include <util/folder/tempdir.h> #include <util/generic/hash_set.h> #include <util/system/fs.h> -#include <util/system/mutex.h> #include <util/stream/file.h> +#include <util/system/mutex.h> namespace NYql { @@ -195,9 +195,9 @@ public: auto opPath = Folder_ / operationId; auto writtenAt = writerSettings.WrittenAt.GetOrElse(Now()); if (Settings_.BufferUntilCommit) { - return std::make_shared<TBufferedWriter>(opPath, writtenAt, writerSettings); + return MakeCloseAwareWriterDecorator(std::make_shared<TBufferedWriter>(opPath, writtenAt, writerSettings)); } else { - return std::make_shared<TUnbufferedWriter>(opPath, writtenAt, writerSettings, Settings_.AlwaysFlushIndex); + return MakeCloseAwareWriterDecorator(std::make_shared<TUnbufferedWriter>(opPath, writtenAt, writerSettings, Settings_.AlwaysFlushIndex)); } } diff --git a/yql/essentials/core/qplayer/storage/interface/yql_qstorage.cpp b/yql/essentials/core/qplayer/storage/interface/yql_qstorage.cpp index 43b073e130..603c30809c 100644 --- a/yql/essentials/core/qplayer/storage/interface/yql_qstorage.cpp +++ b/yql/essentials/core/qplayer/storage/interface/yql_qstorage.cpp @@ -1 +1,37 @@ #include "yql_qstorage.h" + +namespace NYql { +class TQWriterDecorator : public IQWriter { + public: + TQWriterDecorator(IQWriterPtr&& underlying) : Underlying_(std::move(underlying)) {} + NThreading::TFuture<void> Put(const TQItemKey& key, const TString& value) override final { + if (Closed_) { + return NThreading::MakeFuture(); + } + return Underlying_->Put(key, value); + } + + NThreading::TFuture<void> Commit() override final { + if (Closed_) { + throw yexception() << "QWriter closed"; + } + return Underlying_->Commit(); + } + + // Close all used files, doesn't commit anything + void Close() override final { + bool expected = false; + if (Closed_.compare_exchange_strong(expected, true)) { + Underlying_ = {}; + } + } +private: + IQWriterPtr Underlying_; + std::atomic<bool> Closed_ = false; +}; + +IQWriterPtr MakeCloseAwareWriterDecorator(IQWriterPtr&& rhs) { + return std::make_shared<TQWriterDecorator>(std::move(rhs)); +} + +} diff --git a/yql/essentials/core/qplayer/storage/interface/yql_qstorage.h b/yql/essentials/core/qplayer/storage/interface/yql_qstorage.h index 137e9ad639..7ff0caa013 100644 --- a/yql/essentials/core/qplayer/storage/interface/yql_qstorage.h +++ b/yql/essentials/core/qplayer/storage/interface/yql_qstorage.h @@ -48,10 +48,10 @@ using IQReaderPtr = std::shared_ptr<IQReader>; class IQWriter { public: virtual ~IQWriter() = default; - virtual NThreading::TFuture<void> Put(const TQItemKey& key, const TString& value) = 0; // Commmit should be called at most once, no more Put are allowed after it virtual NThreading::TFuture<void> Commit() = 0; + virtual void Close() {}; }; using IQWriterPtr = std::shared_ptr<IQWriter>; @@ -134,6 +134,7 @@ private: IQWriterPtr Writer_; }; +IQWriterPtr MakeCloseAwareWriterDecorator(IQWriterPtr&& rhs); } template <> diff --git a/yql/essentials/core/qplayer/storage/memory/yql_qstorage_memory.cpp b/yql/essentials/core/qplayer/storage/memory/yql_qstorage_memory.cpp index ae11feb3b8..dcd1205df7 100644 --- a/yql/essentials/core/qplayer/storage/memory/yql_qstorage_memory.cpp +++ b/yql/essentials/core/qplayer/storage/memory/yql_qstorage_memory.cpp @@ -129,7 +129,7 @@ public: } IQWriterPtr MakeWriter(const TString& operationId, const TQWriterSettings& writerSettings) const final { - return std::make_shared<TWriter>(GetOperation(operationId, true), writerSettings); + return MakeCloseAwareWriterDecorator(std::make_shared<TWriter>(GetOperation(operationId, true), writerSettings)); } IQIteratorPtr MakeIterator(const TString& operationId, const TQIteratorSettings& iteratorSettings) const final { diff --git a/yql/essentials/core/qplayer/storage/ydb/yql_qstorage_ydb.cpp b/yql/essentials/core/qplayer/storage/ydb/yql_qstorage_ydb.cpp index 505de648d9..c39d8c514a 100644 --- a/yql/essentials/core/qplayer/storage/ydb/yql_qstorage_ydb.cpp +++ b/yql/essentials/core/qplayer/storage/ydb/yql_qstorage_ydb.cpp @@ -218,7 +218,7 @@ public: } IQWriterPtr MakeWriter(const TString& operationId, const TQWriterSettings& settings) const final { - return std::make_shared<TWriter>(Settings_, operationId, settings); + return MakeCloseAwareWriterDecorator(std::make_shared<TWriter>(Settings_, operationId, settings)); } IQReaderPtr MakeReader(const TString& operationId, const TQReaderSettings& settings) const final { @@ -238,7 +238,7 @@ private: void LoadTable(const TString& operationId, const IQStoragePtr& memory) const { auto driver = MakeDriver(Settings_); NYdb::NTable::TTableClient tableClient(driver); - + auto operationsTable = Settings_.TablesPrefix + "operations"; auto fullOperationId = Settings_.OperationIdPrefix + operationId; @@ -287,7 +287,7 @@ private: TString blobTable = Settings_.Database + "/" + Settings_.TablesPrefix + "blobs"; const auto maxBatchSize = Settings_.MaxBatchSize.GetOrElse(DefaultMaxBatchSize); - auto rtResult = tableClient.RetryOperationSync([&tableIter, maxBatchSize, blobTable, + auto rtResult = tableClient.RetryOperationSync([&tableIter, maxBatchSize, blobTable, fullOperationId, writtenAt, loadedTotalItems](NYdb::NTable::TSession session) { auto key1 = NYdb::TValueBuilder() .BeginTuple() @@ -321,7 +321,7 @@ private: if (res.IsSuccess()) { tableIter = res; } - + return res; }, readRetrySettings); ThrowOnError(rtResult); diff --git a/yql/essentials/core/type_ann/type_ann_core.cpp b/yql/essentials/core/type_ann/type_ann_core.cpp index d56c5bc760..1b1f95c254 100644 --- a/yql/essentials/core/type_ann/type_ann_core.cpp +++ b/yql/essentials/core/type_ann/type_ann_core.cpp @@ -8786,6 +8786,8 @@ template <NKikimr::NUdf::EDataSlot DataSlot> ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Child(0)->Pos()), TStringBuilder() << "Mismatch item type, expected: " << *firstType << ", got: " << *input->Child(0)->GetTypeAnn())); return IGraphTransformer::TStatus::Error; + } else if (convertStatus.Level != IGraphTransformer::TStatus::Ok) { + return convertStatus; } input->SetTypeAnn(ctx.Expr.MakeType<TOptionalExprType>(variantType)); diff --git a/yql/essentials/core/yql_expr_constraint.cpp b/yql/essentials/core/yql_expr_constraint.cpp index f65d23757e..49997179f5 100644 --- a/yql/essentials/core/yql_expr_constraint.cpp +++ b/yql/essentials/core/yql_expr_constraint.cpp @@ -201,6 +201,7 @@ public: Functions["Visit"] = &TCallableConstraintTransformer::VisitWrap; Functions["VariantItem"] = &TCallableConstraintTransformer::VariantItemWrap; Functions["Variant"] = &TCallableConstraintTransformer::VariantWrap; + Functions["DynamicVariant"] = &TCallableConstraintTransformer::DynamicVariantWrap; Functions["Guess"] = &TCallableConstraintTransformer::GuessWrap; Functions["Mux"] = &TCallableConstraintTransformer::MuxWrap; Functions["Nth"] = &TCallableConstraintTransformer::NthWrap; @@ -668,6 +669,43 @@ private: FilterFromHead<TPartOfChoppedConstraintNode>(input, filter, ctx); FilterFromHead<TPartOfUniqueConstraintNode>(input, filterForUnique, ctx); FilterFromHead<TPartOfDistinctConstraintNode>(input, filterForDistinct, ctx); + + const auto unwrapedOutItemType = RemoveOptionalType(outItemType); + const auto unwrapedInItemType = RemoveOptionalType(inItemType); + if (unwrapedInItemType->GetKind() == ETypeAnnotationKind::Variant && unwrapedOutItemType->GetKind() == ETypeAnnotationKind::Variant + && unwrapedOutItemType->Cast<TVariantExprType>()->GetUnderlyingType()->GetKind() == ETypeAnnotationKind::Tuple) { + + const auto tupleUnderInType = unwrapedInItemType->Cast<TVariantExprType>()->GetUnderlyingType()->Cast<TTupleExprType>(); + const auto tupleUnderOutType = unwrapedOutItemType->Cast<TVariantExprType>()->GetUnderlyingType()->Cast<TTupleExprType>(); + if (auto multi = input->Head().GetConstraint<TMultiConstraintNode>()) { + if (tupleUnderOutType->GetSize() < tupleUnderInType->GetSize()) { + TMultiConstraintNode::TMapType multiItems; + std::copy_if(multi->GetItems().cbegin(), multi->GetItems().cend(), + std::back_inserter(multiItems), + [&](const auto& item) { return item.first < tupleUnderOutType->GetSize(); } + ); + if (!multiItems.empty()) { + input->AddConstraint(ctx.MakeConstraint<TMultiConstraintNode>(std::move(multiItems))); + } + } else { + input->AddConstraint(multi); + } + } + if (auto varItem = input->Head().GetConstraint<TVarIndexConstraintNode>()) { + if (tupleUnderOutType->GetSize() < tupleUnderInType->GetSize()) { + TVarIndexConstraintNode::TMapType filteredItems; + std::copy_if(varItem->GetIndexMapping().cbegin(), varItem->GetIndexMapping().cend(), + std::back_inserter(filteredItems), + [&](const auto& item) { return item.second < tupleUnderOutType->GetSize(); } + ); + if (!filteredItems.empty()) { + input->AddConstraint(ctx.MakeConstraint<TVarIndexConstraintNode>(std::move(filteredItems))); + } + } else { + input->AddConstraint(varItem); + } + } + } return TStatus::Ok; } @@ -1985,6 +2023,28 @@ private: return TStatus::Ok; } + TStatus DynamicVariantWrap(const TExprNode::TPtr& input, TExprNode::TPtr& /*output*/, TExprContext& ctx) const { + if (auto underlyingType = RemoveOptionalType(input->GetTypeAnn())->Cast<TVariantExprType>()->GetUnderlyingType(); underlyingType->GetKind() == ETypeAnnotationKind::Tuple) { + TConstraintSet target; + CopyExcept(target, input->Head().GetConstraintSet(), TVarIndexConstraintNode::Name()); + TMultiConstraintNode::TMapType items; + for (ui32 i = 0; i < underlyingType->Cast<TTupleExprType>()->GetSize(); ++i) { + items.emplace_back(i, target); + } + input->AddConstraint(ctx.MakeConstraint<TMultiConstraintNode>(std::move(items))); + if (auto varIndex = input->Head().GetConstraint<TVarIndexConstraintNode>()) { + TVarIndexConstraintNode::TMapType filteredItems; + for (ui32 i = 0; i < underlyingType->Cast<TTupleExprType>()->GetSize(); ++i) { + for (auto& item: varIndex->GetIndexMapping()) { + filteredItems.push_back(std::make_pair(i, item.second)); + } + } + input->AddConstraint(ctx.MakeConstraint<TVarIndexConstraintNode>(std::move(filteredItems))); + } + } + return TStatus::Ok; + } + TStatus GuessWrap(const TExprNode::TPtr& input, TExprNode::TPtr& /*output*/, TExprContext& ctx) const { auto inputType = input->Head().GetTypeAnn(); if (inputType->GetKind() == ETypeAnnotationKind::Optional) { diff --git a/yql/essentials/minikql/protobuf_udf/ut/value_builder_ut.cpp b/yql/essentials/minikql/protobuf_udf/ut/value_builder_ut.cpp index 4d8a77dced..a3ed5f796b 100644 --- a/yql/essentials/minikql/protobuf_udf/ut/value_builder_ut.cpp +++ b/yql/essentials/minikql/protobuf_udf/ut/value_builder_ut.cpp @@ -5,6 +5,7 @@ #include <yql/essentials/providers/common/codec/yql_codec.h> #include <yql/essentials/providers/common/codec/yql_codec_buf.h> +#include <yt/yql/providers/yt/codec/yt_codec.h> #include <yql/essentials/minikql/mkql_alloc.h> #include <yql/essentials/minikql/mkql_node.h> #include <yql/essentials/minikql/mkql_type_builder.h> @@ -58,12 +59,12 @@ struct TSetup { template <typename TProto> TString YsonToProtoText(TSetup& setup, NUdf::TProtoInfo& info, TStringBuf yson) { TStringStream err; - auto val = NCommon::ParseYsonValue( + auto val = ParseYsonValueInTableFormat( setup.HolderFactory, NYT::NodeToYsonString(NYT::NodeFromYsonString(yson), ::NYson::EYsonFormat::Binary), static_cast<NKikimr::NMiniKQL::TStructType*>(info.StructType), 0, - &err, true); + &err); if (!val) { throw yexception() << err.Str(); } @@ -120,7 +121,7 @@ TString ProtoTextToYson(TSetup& setup, NUdf::TProtoInfo& info, TStringBuf protoT auto value = FillValueFromProto(proto, &setup.ValueBuilder, info); TTestWriter out; NCommon::TOutputBuf buf(out, nullptr); - NCommon::WriteYsonValueInTableFormat(buf, static_cast<NKikimr::NMiniKQL::TStructType*>(info.StructType), 0, value, true); + WriteYsonValueInTableFormat(buf, static_cast<NKikimr::NMiniKQL::TStructType*>(info.StructType), 0, value, true); buf.Finish(); return NYT::NodeToYsonString(NYT::NodeFromYsonString(out.Str()), ::NYson::EYsonFormat::Text); diff --git a/yql/essentials/minikql/protobuf_udf/ut/ya.make b/yql/essentials/minikql/protobuf_udf/ut/ya.make index dd4741870f..e788d5da8d 100644 --- a/yql/essentials/minikql/protobuf_udf/ut/ya.make +++ b/yql/essentials/minikql/protobuf_udf/ut/ya.make @@ -11,6 +11,7 @@ SRCS( PEERDIR( yt/yql/providers/yt/lib/schema yt/yql/providers/yt/common + yt/yql/providers/yt/codec yql/essentials/public/udf/service/exception_policy yql/essentials/minikql yql/essentials/public/udf diff --git a/yql/essentials/parser/pg_wrapper/interface/parser.h b/yql/essentials/parser/pg_wrapper/interface/parser.h index f86fee1630..db82da2298 100644 --- a/yql/essentials/parser/pg_wrapper/interface/parser.h +++ b/yql/essentials/parser/pg_wrapper/interface/parser.h @@ -6,6 +6,8 @@ namespace NSQLTranslation { struct TTranslationSettings; +class ITranslator; +using TTranslatorPtr = TIntrusivePtr<ITranslator>; } // NSQLTranslation @@ -16,5 +18,6 @@ TVector<NYql::TAstParseResult> PGToYqlStatements(const TString& query, const NSQ std::unique_ptr<NYql::NPg::IExtensionSqlParser> CreateExtensionSqlParser(); std::unique_ptr<NYql::NPg::ISystemFunctionsParser> CreateSystemFunctionsParser(); std::unique_ptr<NYql::NPg::ISqlLanguageParser> CreateSqlLanguageParser(); +NSQLTranslation::TTranslatorPtr MakeTranslator(); } // NSQLTranslationPG diff --git a/yql/essentials/providers/common/codec/ya.make b/yql/essentials/providers/common/codec/ya.make index ca9d05dd2a..a55ef22bbf 100644 --- a/yql/essentials/providers/common/codec/ya.make +++ b/yql/essentials/providers/common/codec/ya.make @@ -19,7 +19,6 @@ PEERDIR( library/cpp/yson library/cpp/json library/cpp/enumbitset - yt/yt/library/decimal ) YQL_LAST_ABI_VERSION() diff --git a/yql/essentials/providers/common/codec/yql_codec.cpp b/yql/essentials/providers/common/codec/yql_codec.cpp index 6710498276..04517c6e2e 100644 --- a/yql/essentials/providers/common/codec/yql_codec.cpp +++ b/yql/essentials/providers/common/codec/yql_codec.cpp @@ -23,8 +23,6 @@ #include <util/string/cast.h> #include <util/generic/map.h> -#include <yt/yt/library/decimal/decimal.h> - namespace NYql { namespace NCommon { @@ -289,7 +287,7 @@ TMaybe<TVector<ui32>> CreateStructPositions(TType* inputType, const TVector<TStr if (inputType->GetKind() != TType::EKind::Struct) { return Nothing(); } - + auto inputStruct = AS_TYPE(TStructType, inputType); TMap<TStringBuf, ui32> members; TVector<ui32> structPositions(inputStruct->GetMembersCount(), Max<ui32>()); @@ -784,119 +782,66 @@ T ReadNextSerializedNumber(char cmd, TInputBuf& buf) { } template <typename T> -T ReadYsonFloatNumber(char cmd, TInputBuf& buf, bool isTableFormat) { - if (isTableFormat) { - CHECK_EXPECTED(cmd, DoubleMarker); - double dbl; - buf.ReadMany((char*)&dbl, sizeof(dbl)); - return dbl; - } - +T ReadYsonFloatNumber(char cmd, TInputBuf& buf) { return ReadNextSerializedNumber<T>(cmd, buf); } -NUdf::TUnboxedValue ReadYsonValue(TType* type, ui64 nativeYtTypeFlags, - const NKikimr::NMiniKQL::THolderFactory& holderFactory, char cmd, TInputBuf& buf, bool isTableFormat) { +NUdf::TUnboxedValue ReadYsonValue(TType* type, + const NKikimr::NMiniKQL::THolderFactory& holderFactory, char cmd, TInputBuf& buf) { switch (type->GetKind()) { case TType::EKind::Variant: { auto varType = static_cast<TVariantType*>(type); auto underlyingType = varType->GetUnderlyingType(); - if (isTableFormat && (nativeYtTypeFlags & NTCF_COMPLEX)) { - CHECK_EXPECTED(cmd, BeginListSymbol); - cmd = buf.Read(); - TType* type = nullptr; - i64 index = 0; - if (cmd == StringMarker) { - YQL_ENSURE(underlyingType->IsStruct(), "Expected struct as underlying type"); - auto structType = static_cast<TStructType*>(underlyingType); - auto nameBuffer = ReadNextString(cmd, buf); - auto foundIndex = structType->FindMemberIndex(nameBuffer); - YQL_ENSURE(foundIndex.Defined(), "Unexpected member: " << nameBuffer); - index = *foundIndex; - type = varType->GetAlternativeType(index); - } else { - YQL_ENSURE(cmd == Int64Marker || cmd == Uint64Marker); - YQL_ENSURE(underlyingType->IsTuple(), "Expected tuple as underlying type"); - if (cmd == Uint64Marker) { - index = buf.ReadVarUI64(); - } else { - index = buf.ReadVarI64(); - } - YQL_ENSURE(0 <= index && index < varType->GetAlternativesCount(), "Unexpected member index: " << index); - type = varType->GetAlternativeType(index); - } - cmd = buf.Read(); - CHECK_EXPECTED(cmd, ListItemSeparatorSymbol); - cmd = buf.Read(); - auto value = ReadYsonValue(type, nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat); - cmd = buf.Read(); - if (cmd != EndListSymbol) { - CHECK_EXPECTED(cmd, ListItemSeparatorSymbol); - cmd = buf.Read(); - CHECK_EXPECTED(cmd, EndListSymbol); - } - return holderFactory.CreateVariantHolder(value.Release(), index); - } else { - if (cmd == StringMarker) { - YQL_ENSURE(underlyingType->IsStruct(), "Expected struct as underlying type"); - auto name = ReadNextString(cmd, buf); - auto index = static_cast<TStructType*>(underlyingType)->FindMemberIndex(name); - YQL_ENSURE(index, "Unexpected member: " << name); - YQL_ENSURE(static_cast<TStructType*>(underlyingType)->GetMemberType(*index)->IsVoid(), "Expected Void as underlying type"); - return holderFactory.CreateVariantHolder(NUdf::TUnboxedValuePod::Zero(), *index); - } - - CHECK_EXPECTED(cmd, BeginListSymbol); - cmd = buf.Read(); - i64 index = 0; - if (isTableFormat) { - YQL_ENSURE(cmd == Int64Marker || cmd == Uint64Marker); - if (cmd == Uint64Marker) { - index = buf.ReadVarUI64(); - } else { - index = buf.ReadVarI64(); - } - } else { - if (cmd == BeginListSymbol) { - cmd = buf.Read(); - YQL_ENSURE(underlyingType->IsStruct(), "Expected struct as underlying type"); - auto name = ReadNextString(cmd, buf); - auto foundIndex = static_cast<TStructType*>(underlyingType)->FindMemberIndex(name); - YQL_ENSURE(foundIndex, "Unexpected member: " << name); - index = *foundIndex; - cmd = buf.Read(); - if (cmd == ListItemSeparatorSymbol) { - cmd = buf.Read(); - } - - CHECK_EXPECTED(cmd, EndListSymbol); - } else { - index = ReadNextSerializedNumber<ui64>(cmd, buf); - } - } - - YQL_ENSURE(index < varType->GetAlternativesCount(), "Bad variant alternative: " << index << ", only " << - varType->GetAlternativesCount() << " are available"); - YQL_ENSURE(underlyingType->IsTuple() || underlyingType->IsStruct(), "Wrong underlying type"); - TType* itemType; - if (underlyingType->IsTuple()) { - itemType = static_cast<TTupleType*>(underlyingType)->GetElementType(index); - } - else { - itemType = static_cast<TStructType*>(underlyingType)->GetMemberType(index); - } + if (cmd == StringMarker) { + YQL_ENSURE(underlyingType->IsStruct(), "Expected struct as underlying type"); + auto name = ReadNextString(cmd, buf); + auto index = static_cast<TStructType*>(underlyingType)->FindMemberIndex(name); + YQL_ENSURE(index, "Unexpected member: " << name); + YQL_ENSURE(static_cast<TStructType*>(underlyingType)->GetMemberType(*index)->IsVoid(), "Expected Void as underlying type"); + return holderFactory.CreateVariantHolder(NUdf::TUnboxedValuePod::Zero(), *index); + } - EXPECTED(buf, ListItemSeparatorSymbol); + CHECK_EXPECTED(cmd, BeginListSymbol); + cmd = buf.Read(); + i64 index = 0; + if (cmd == BeginListSymbol) { cmd = buf.Read(); - auto value = ReadYsonValue(itemType, nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat); + YQL_ENSURE(underlyingType->IsStruct(), "Expected struct as underlying type"); + auto name = ReadNextString(cmd, buf); + auto foundIndex = static_cast<TStructType*>(underlyingType)->FindMemberIndex(name); + YQL_ENSURE(foundIndex, "Unexpected member: " << name); + index = *foundIndex; cmd = buf.Read(); if (cmd == ListItemSeparatorSymbol) { cmd = buf.Read(); } CHECK_EXPECTED(cmd, EndListSymbol); - return holderFactory.CreateVariantHolder(value.Release(), index); + } else { + index = ReadNextSerializedNumber<ui64>(cmd, buf); } + + YQL_ENSURE(index < varType->GetAlternativesCount(), "Bad variant alternative: " << index << ", only " << + varType->GetAlternativesCount() << " are available"); + YQL_ENSURE(underlyingType->IsTuple() || underlyingType->IsStruct(), "Wrong underlying type"); + TType* itemType; + if (underlyingType->IsTuple()) { + itemType = static_cast<TTupleType*>(underlyingType)->GetElementType(index); + } + else { + itemType = static_cast<TStructType*>(underlyingType)->GetMemberType(index); + } + + EXPECTED(buf, ListItemSeparatorSymbol); + cmd = buf.Read(); + auto value = ReadYsonValue(itemType, holderFactory, cmd, buf); + cmd = buf.Read(); + if (cmd == ListItemSeparatorSymbol) { + cmd = buf.Read(); + } + + CHECK_EXPECTED(cmd, EndListSymbol); + return holderFactory.CreateVariantHolder(value.Release(), index); } case TType::EKind::Data: { @@ -907,117 +852,49 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, ui64 nativeYtTypeFlags, return NUdf::TUnboxedValuePod(cmd == TrueMarker); case NUdf::TDataType<ui8>::Id: - if (isTableFormat) { - CHECK_EXPECTED(cmd, Uint64Marker); - return NUdf::TUnboxedValuePod(ui8(buf.ReadVarUI64())); - } return NUdf::TUnboxedValuePod(ReadNextSerializedNumber<ui8>(cmd, buf)); case NUdf::TDataType<i8>::Id: - if (isTableFormat) { - CHECK_EXPECTED(cmd, Int64Marker); - return NUdf::TUnboxedValuePod(i8(buf.ReadVarI64())); - } return NUdf::TUnboxedValuePod(ReadNextSerializedNumber<i8>(cmd, buf)); case NUdf::TDataType<ui16>::Id: - if (isTableFormat) { - CHECK_EXPECTED(cmd, Uint64Marker); - return NUdf::TUnboxedValuePod(ui16(buf.ReadVarUI64())); - } return NUdf::TUnboxedValuePod(ReadNextSerializedNumber<ui16>(cmd, buf)); case NUdf::TDataType<i16>::Id: - if (isTableFormat) { - CHECK_EXPECTED(cmd, Int64Marker); - return NUdf::TUnboxedValuePod(i16(buf.ReadVarI64())); - } return NUdf::TUnboxedValuePod(ReadNextSerializedNumber<i16>(cmd, buf)); case NUdf::TDataType<i32>::Id: - if (isTableFormat) { - CHECK_EXPECTED(cmd, Int64Marker); - return NUdf::TUnboxedValuePod(i32(buf.ReadVarI64())); - } return NUdf::TUnboxedValuePod(ReadNextSerializedNumber<i32>(cmd, buf)); case NUdf::TDataType<ui32>::Id: - if (isTableFormat) { - CHECK_EXPECTED(cmd, Uint64Marker); - return NUdf::TUnboxedValuePod(ui32(buf.ReadVarUI64())); - } return NUdf::TUnboxedValuePod(ReadNextSerializedNumber<ui32>(cmd, buf)); case NUdf::TDataType<i64>::Id: - if (isTableFormat) { - CHECK_EXPECTED(cmd, Int64Marker); - return NUdf::TUnboxedValuePod(buf.ReadVarI64()); - } return NUdf::TUnboxedValuePod(ReadNextSerializedNumber<i64>(cmd, buf)); case NUdf::TDataType<ui64>::Id: - if (isTableFormat) { - CHECK_EXPECTED(cmd, Uint64Marker); - return NUdf::TUnboxedValuePod(buf.ReadVarUI64()); - } return NUdf::TUnboxedValuePod(ReadNextSerializedNumber<ui64>(cmd, buf)); case NUdf::TDataType<float>::Id: - return NUdf::TUnboxedValuePod(ReadYsonFloatNumber<float>(cmd, buf, isTableFormat)); + return NUdf::TUnboxedValuePod(ReadYsonFloatNumber<float>(cmd, buf)); case NUdf::TDataType<double>::Id: - return NUdf::TUnboxedValuePod(ReadYsonFloatNumber<double>(cmd, buf, isTableFormat)); + return NUdf::TUnboxedValuePod(ReadYsonFloatNumber<double>(cmd, buf)); case NUdf::TDataType<NUdf::TUtf8>::Id: case NUdf::TDataType<char*>::Id: case NUdf::TDataType<NUdf::TJson>::Id: case NUdf::TDataType<NUdf::TDyNumber>::Id: case NUdf::TDataType<NUdf::TUuid>::Id: { - if (isTableFormat) { - auto nextString = ReadNextString(cmd, buf); - return NUdf::TUnboxedValue(MakeString(NUdf::TStringRef(nextString))); - } - return ReadYsonStringInResultFormat(cmd, buf); } case NUdf::TDataType<NUdf::TDecimal>::Id: { auto nextString = ReadNextString(cmd, buf); - if (isTableFormat) { - if (nativeYtTypeFlags & NTCF_DECIMAL) { - auto const params = static_cast<TDataDecimalType*>(type)->GetParams(); - if (params.first < 10) { - // The YQL format differs from the YT format in the inf/nan values. NDecimal::FromYtDecimal converts nan/inf - NDecimal::TInt128 res = NDecimal::FromYtDecimal(NYT::NDecimal::TDecimal::ParseBinary32(params.first, nextString)); - YQL_ENSURE(!NDecimal::IsError(res)); - return NUdf::TUnboxedValuePod(res); - } else if (params.first < 19) { - NDecimal::TInt128 res = NDecimal::FromYtDecimal(NYT::NDecimal::TDecimal::ParseBinary64(params.first, nextString)); - YQL_ENSURE(!NDecimal::IsError(res)); - return NUdf::TUnboxedValuePod(res); - } else { - YQL_ENSURE(params.first < 36); - NYT::NDecimal::TDecimal::TValue128 tmpRes = NYT::NDecimal::TDecimal::ParseBinary128(params.first, nextString); - NDecimal::TInt128 res; - static_assert(sizeof(NDecimal::TInt128) == sizeof(NYT::NDecimal::TDecimal::TValue128)); - memcpy(&res, &tmpRes, sizeof(NDecimal::TInt128)); - res = NDecimal::FromYtDecimal(res); - YQL_ENSURE(!NDecimal::IsError(res)); - return NUdf::TUnboxedValuePod(res); - } - } - else { - const auto& des = NDecimal::Deserialize(nextString.data(), nextString.size()); - YQL_ENSURE(!NDecimal::IsError(des.first)); - YQL_ENSURE(nextString.size() == des.second); - return NUdf::TUnboxedValuePod(des.first); - } - } else { - const auto params = static_cast<TDataDecimalType*>(type)->GetParams(); - const auto val = NDecimal::FromString(nextString, params.first, params.second); - YQL_ENSURE(!NDecimal::IsError(val)); - return NUdf::TUnboxedValuePod(val); - } + const auto params = static_cast<TDataDecimalType*>(type)->GetParams(); + const auto val = NDecimal::FromString(nextString, params.first, params.second); + YQL_ENSURE(!NDecimal::IsError(val)); + return NUdf::TUnboxedValuePod(val); } case NUdf::TDataType<NUdf::TYson>::Id: { @@ -1025,114 +902,55 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, ui64 nativeYtTypeFlags, yson.clear(); CopyYsonWithAttrs(cmd, buf, yson); - if (isTableFormat) { - return NUdf::TUnboxedValue(MakeString(NUdf::TStringRef(yson))); - } - TString decodedYson = NResult::DecodeRestrictedYson(TStringBuf(yson.data(), yson.size()), NYson::EYsonFormat::Text); return NUdf::TUnboxedValue(MakeString(NUdf::TStringRef(decodedYson))); } case NUdf::TDataType<NUdf::TDate>::Id: - if (isTableFormat) { - CHECK_EXPECTED(cmd, Uint64Marker); - return NUdf::TUnboxedValuePod((ui16)buf.ReadVarUI64()); - } return NUdf::TUnboxedValuePod(ReadNextSerializedNumber<ui16>(cmd, buf)); case NUdf::TDataType<NUdf::TDatetime>::Id: - if (isTableFormat) { - CHECK_EXPECTED(cmd, Uint64Marker); - return NUdf::TUnboxedValuePod((ui32)buf.ReadVarUI64()); - } return NUdf::TUnboxedValuePod(ReadNextSerializedNumber<ui32>(cmd, buf)); case NUdf::TDataType<NUdf::TTimestamp>::Id: - if (isTableFormat) { - CHECK_EXPECTED(cmd, Uint64Marker); - return NUdf::TUnboxedValuePod(buf.ReadVarUI64()); - } return NUdf::TUnboxedValuePod(ReadNextSerializedNumber<ui64>(cmd, buf)); case NUdf::TDataType<NUdf::TInterval>::Id: - if (isTableFormat) { - CHECK_EXPECTED(cmd, Int64Marker); - return NUdf::TUnboxedValuePod(buf.ReadVarI64()); - } return NUdf::TUnboxedValuePod(ReadNextSerializedNumber<i64>(cmd, buf)); case NUdf::TDataType<NUdf::TTzDate>::Id: { auto nextString = ReadNextString(cmd, buf); NUdf::TUnboxedValuePod data; - if (isTableFormat) { - ui16 value; - ui16 tzId = 0; - YQL_ENSURE(DeserializeTzDate(nextString, value, tzId)); - data = NUdf::TUnboxedValuePod(value); - data.SetTimezoneId(tzId); - } else { - data = ValueFromString(NUdf::EDataSlot::TzDate, nextString); - YQL_ENSURE(data, "incorrect tz date format for value " << nextString); - } - + data = ValueFromString(NUdf::EDataSlot::TzDate, nextString); + YQL_ENSURE(data, "incorrect tz date format for value " << nextString); return data; } case NUdf::TDataType<NUdf::TTzDatetime>::Id: { auto nextString = ReadNextString(cmd, buf); NUdf::TUnboxedValuePod data; - if (isTableFormat) { - ui32 value; - ui16 tzId = 0; - YQL_ENSURE(DeserializeTzDatetime(nextString, value, tzId)); - data = NUdf::TUnboxedValuePod(value); - data.SetTimezoneId(tzId); - } else { - data = ValueFromString(NUdf::EDataSlot::TzDatetime, nextString); - YQL_ENSURE(data, "incorrect tz datetime format for value " << nextString); - } - + data = ValueFromString(NUdf::EDataSlot::TzDatetime, nextString); + YQL_ENSURE(data, "incorrect tz datetime format for value " << nextString); return data; } case NUdf::TDataType<NUdf::TTzTimestamp>::Id: { auto nextString = ReadNextString(cmd, buf); NUdf::TUnboxedValuePod data; - if (isTableFormat) { - ui64 value; - ui16 tzId = 0; - YQL_ENSURE(DeserializeTzTimestamp(nextString, value, tzId)); - data = NUdf::TUnboxedValuePod(value); - data.SetTimezoneId(tzId); - } else { - data = ValueFromString(NUdf::EDataSlot::TzTimestamp, nextString); - YQL_ENSURE(data, "incorrect tz timestamp format for value " << nextString); - } - + data = ValueFromString(NUdf::EDataSlot::TzTimestamp, nextString); + YQL_ENSURE(data, "incorrect tz timestamp format for value " << nextString); return data; } case NUdf::TDataType<NUdf::TDate32>::Id: - if (isTableFormat) { - CHECK_EXPECTED(cmd, Int64Marker); - return NUdf::TUnboxedValuePod((i32)buf.ReadVarI64()); - } return NUdf::TUnboxedValuePod(ReadNextSerializedNumber<i32>(cmd, buf)); case NUdf::TDataType<NUdf::TDatetime64>::Id: case NUdf::TDataType<NUdf::TTimestamp64>::Id: case NUdf::TDataType<NUdf::TInterval64>::Id: - if (isTableFormat) { - CHECK_EXPECTED(cmd, Int64Marker); - return NUdf::TUnboxedValuePod(buf.ReadVarI64()); - } return NUdf::TUnboxedValuePod(ReadNextSerializedNumber<i64>(cmd, buf)); case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - if (isTableFormat) { - return ValueFromString(EDataSlot::JsonDocument, ReadNextString(cmd, buf)); - } - const auto json = ReadYsonStringInResultFormat(cmd, buf); return ValueFromString(EDataSlot::JsonDocument, json.AsStringRef()); } @@ -1140,51 +958,24 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, ui64 nativeYtTypeFlags, case NUdf::TDataType<NUdf::TTzDate32>::Id: { auto nextString = ReadNextString(cmd, buf); NUdf::TUnboxedValuePod data; - if (isTableFormat) { - i32 value; - ui16 tzId = 0; - YQL_ENSURE(DeserializeTzDate32(nextString, value, tzId)); - data = NUdf::TUnboxedValuePod(value); - data.SetTimezoneId(tzId); - } else { - data = ValueFromString(NUdf::EDataSlot::TzDate32, nextString); - YQL_ENSURE(data, "incorrect tz date format for value " << nextString); - } - + data = ValueFromString(NUdf::EDataSlot::TzDate32, nextString); + YQL_ENSURE(data, "incorrect tz date format for value " << nextString); return data; } case NUdf::TDataType<NUdf::TTzDatetime64>::Id: { auto nextString = ReadNextString(cmd, buf); NUdf::TUnboxedValuePod data; - if (isTableFormat) { - i64 value; - ui16 tzId = 0; - YQL_ENSURE(DeserializeTzDatetime64(nextString, value, tzId)); - data = NUdf::TUnboxedValuePod(value); - data.SetTimezoneId(tzId); - } else { - data = ValueFromString(NUdf::EDataSlot::TzDatetime64, nextString); - YQL_ENSURE(data, "incorrect tz datetime format for value " << nextString); - } - + data = ValueFromString(NUdf::EDataSlot::TzDatetime64, nextString); + YQL_ENSURE(data, "incorrect tz datetime format for value " << nextString); return data; } case NUdf::TDataType<NUdf::TTzTimestamp64>::Id: { auto nextString = ReadNextString(cmd, buf); NUdf::TUnboxedValuePod data; - if (isTableFormat) { - i64 value; - ui16 tzId = 0; - YQL_ENSURE(DeserializeTzTimestamp64(nextString, value, tzId)); - data = NUdf::TUnboxedValuePod(value); - data.SetTimezoneId(tzId); - } else { - data = ValueFromString(NUdf::EDataSlot::TzTimestamp64, nextString); - YQL_ENSURE(data, "incorrect tz timestamp format for value " << nextString); - } - + data = ValueFromString(NUdf::EDataSlot::TzTimestamp64, nextString); + YQL_ENSURE(data, "incorrect tz timestamp format for value " << nextString); return data; } @@ -1202,7 +993,7 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, ui64 nativeYtTypeFlags, cmd = buf.Read(); for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { - items[i] = ReadYsonValue(structType->GetMemberType(i), nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat); + items[i] = ReadYsonValue(structType->GetMemberType(i), holderFactory, cmd, buf); cmd = buf.Read(); if (cmd == ListItemSeparatorSymbol) { @@ -1227,11 +1018,7 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, ui64 nativeYtTypeFlags, if (pos && cmd != '#') { auto memberType = structType->GetMemberType(*pos); auto unwrappedType = memberType; - if (!(nativeYtTypeFlags & ENativeTypeCompatFlags::NTCF_COMPLEX) && isTableFormat && unwrappedType->IsOptional()) { - unwrappedType = static_cast<TOptionalType*>(unwrappedType)->GetItemType(); - } - - items[*pos] = ReadYsonValue(unwrappedType, nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat); + items[*pos] = ReadYsonValue(unwrappedType, holderFactory, cmd, buf); } else { SkipYson(cmd, buf); } @@ -1265,7 +1052,7 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, ui64 nativeYtTypeFlags, break; } - items = items.Append(ReadYsonValue(itemType, nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat)); + items = items.Append(ReadYsonValue(itemType, holderFactory, cmd, buf)); cmd = buf.Read(); if (cmd == ListItemSeparatorSymbol) { @@ -1281,40 +1068,24 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, ui64 nativeYtTypeFlags, return NUdf::TUnboxedValuePod(); } auto itemType = static_cast<TOptionalType*>(type)->GetItemType(); - if (isTableFormat && (nativeYtTypeFlags & ENativeTypeCompatFlags::NTCF_COMPLEX)) { - if (itemType->GetKind() == TType::EKind::Optional || itemType->GetKind() == TType::EKind::Pg) { - CHECK_EXPECTED(cmd, BeginListSymbol); - cmd = buf.Read(); - auto value = ReadYsonValue(itemType, nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat); - cmd = buf.Read(); - if (cmd == ListItemSeparatorSymbol) { - cmd = buf.Read(); - } - CHECK_EXPECTED(cmd, EndListSymbol); - return value.Release().MakeOptional(); - } else { - return ReadYsonValue(itemType, nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat).Release().MakeOptional(); - } - } else { - if (cmd != BeginListSymbol) { - auto value = ReadYsonValue(itemType, nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat); - return value.Release().MakeOptional(); - } + if (cmd != BeginListSymbol) { + auto value = ReadYsonValue(itemType, holderFactory, cmd, buf); + return value.Release().MakeOptional(); + } - cmd = buf.Read(); - if (cmd == EndListSymbol) { - return NUdf::TUnboxedValuePod(); - } + cmd = buf.Read(); + if (cmd == EndListSymbol) { + return NUdf::TUnboxedValuePod(); + } - auto value = ReadYsonValue(itemType, nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat); + auto value = ReadYsonValue(itemType, holderFactory, cmd, buf); + cmd = buf.Read(); + if (cmd == ListItemSeparatorSymbol) { cmd = buf.Read(); - if (cmd == ListItemSeparatorSymbol) { - cmd = buf.Read(); - } - - CHECK_EXPECTED(cmd, EndListSymbol); - return value.Release().MakeOptional(); } + + CHECK_EXPECTED(cmd, EndListSymbol); + return value.Release().MakeOptional(); } case TType::EKind::Dict: { @@ -1353,7 +1124,7 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, ui64 nativeYtTypeFlags, auto keyStr = NUdf::TUnboxedValue(MakeString(keyBuffer)); EXPECTED(buf, KeyValueSeparatorSymbol); cmd = buf.Read(); - auto payload = ReadYsonValue(payloadType, nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat); + auto payload = ReadYsonValue(payloadType, holderFactory, cmd, buf); map.emplace(std::move(keyStr), std::move(payload)); cmd = buf.Read(); @@ -1378,10 +1149,10 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, ui64 nativeYtTypeFlags, CHECK_EXPECTED(cmd, BeginListSymbol); cmd = buf.Read(); - auto key = ReadYsonValue(keyType, nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat); + auto key = ReadYsonValue(keyType, holderFactory, cmd, buf); EXPECTED(buf, ListItemSeparatorSymbol); cmd = buf.Read(); - auto payload = ReadYsonValue(payloadType, nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat); + auto payload = ReadYsonValue(payloadType, holderFactory, cmd, buf); cmd = buf.Read(); if (cmd == ListItemSeparatorSymbol) { cmd = buf.Read(); @@ -1416,7 +1187,7 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, ui64 nativeYtTypeFlags, cmd = buf.Read(); for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { - items[i] = ReadYsonValue(tupleType->GetElementType(i), nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat); + items[i] = ReadYsonValue(tupleType->GetElementType(i), holderFactory, cmd, buf); cmd = buf.Read(); if (cmd == ListItemSeparatorSymbol) { @@ -1466,12 +1237,12 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, ui64 nativeYtTypeFlags, case TType::EKind::Pg: { auto pgType = static_cast<TPgType*>(type); - return isTableFormat ? ReadYsonValueInTableFormatPg(pgType, cmd, buf) : ReadYsonValuePg(pgType, cmd, buf); + return ReadYsonValuePg(pgType, cmd, buf); } case TType::EKind::Tagged: { auto taggedType = static_cast<TTaggedType*>(type); - return ReadYsonValue(taggedType->GetBaseType(), nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat); + return ReadYsonValue(taggedType->GetBaseType(), holderFactory, cmd, buf); } default: @@ -1480,7 +1251,7 @@ NUdf::TUnboxedValue ReadYsonValue(TType* type, ui64 nativeYtTypeFlags, } TMaybe<NUdf::TUnboxedValue> ParseYsonValue(const THolderFactory& holderFactory, - const TStringBuf& yson, TType* type, ui64 nativeYtTypeFlags, IOutputStream* err, bool isTableFormat) { + const TStringBuf& yson, TType* type, IOutputStream* err) { try { class TReader : public IBlockReader { public: @@ -1520,7 +1291,7 @@ TMaybe<NUdf::TUnboxedValue> ParseYsonValue(const THolderFactory& holderFactory, TReader reader(yson); TInputBuf buf(reader, nullptr); char cmd = buf.Read(); - return ReadYsonValue(type, nativeYtTypeFlags, holderFactory, cmd, buf, isTableFormat); + return ReadYsonValue(type, holderFactory, cmd, buf); } catch (const yexception& e) { if (err) { @@ -1530,1293 +1301,9 @@ TMaybe<NUdf::TUnboxedValue> ParseYsonValue(const THolderFactory& holderFactory, } } -TMaybe<NUdf::TUnboxedValue> ParseYsonNode(const THolderFactory& holderFactory, - const NYT::TNode& node, TType* type, ui64 nativeYtTypeFlags, IOutputStream* err) { - return ParseYsonValue(holderFactory, NYT::NodeToYsonString(node, NYson::EYsonFormat::Binary), type, nativeYtTypeFlags, err, true); -} - TMaybe<NUdf::TUnboxedValue> ParseYsonNodeInResultFormat(const THolderFactory& holderFactory, const NYT::TNode& node, TType* type, IOutputStream* err) { - return ParseYsonValue(holderFactory, NYT::NodeToYsonString(node, NYson::EYsonFormat::Binary), type, 0, err, false); -} - -extern "C" void ReadYsonContainerValue(TType* type, ui64 nativeYtTypeFlags, const NKikimr::NMiniKQL::THolderFactory& holderFactory, - NUdf::TUnboxedValue& value, TInputBuf& buf, bool wrapOptional) { - // yson content - ui32 size; - buf.ReadMany((char*)&size, sizeof(size)); - CHECK_STRING_LENGTH_UNSIGNED(size); - // parse binary yson... - YQL_ENSURE(size > 0); - char cmd = buf.Read(); - auto tmp = ReadYsonValue(type, nativeYtTypeFlags, holderFactory, cmd, buf, true); - if (!wrapOptional) { - value = std::move(tmp); - } - else { - value = tmp.Release().MakeOptional(); - } -} - -NUdf::TUnboxedValue ReadSkiffData(TType* type, ui64 nativeYtTypeFlags, TInputBuf& buf) { - auto schemeType = static_cast<TDataType*>(type)->GetSchemeType(); - switch (schemeType) { - case NUdf::TDataType<bool>::Id: { - ui8 data; - buf.ReadMany((char*)&data, sizeof(data)); - return NUdf::TUnboxedValuePod(data != 0); - } - - case NUdf::TDataType<ui8>::Id: { - ui64 data; - buf.ReadMany((char*)&data, sizeof(data)); - return NUdf::TUnboxedValuePod(ui8(data)); - } - - case NUdf::TDataType<i8>::Id: { - i64 data; - buf.ReadMany((char*)&data, sizeof(data)); - return NUdf::TUnboxedValuePod(i8(data)); - } - - case NUdf::TDataType<NUdf::TDate>::Id: - case NUdf::TDataType<ui16>::Id: { - ui64 data; - buf.ReadMany((char*)&data, sizeof(data)); - return NUdf::TUnboxedValuePod(ui16(data)); - } - - case NUdf::TDataType<i16>::Id: { - i64 data; - buf.ReadMany((char*)&data, sizeof(data)); - return NUdf::TUnboxedValuePod(i16(data)); - } - - case NUdf::TDataType<NUdf::TDate32>::Id: - case NUdf::TDataType<i32>::Id: { - i64 data; - buf.ReadMany((char*)&data, sizeof(data)); - return NUdf::TUnboxedValuePod(i32(data)); - } - - case NUdf::TDataType<NUdf::TDatetime>::Id: - case NUdf::TDataType<ui32>::Id: { - ui64 data; - buf.ReadMany((char*)&data, sizeof(data)); - return NUdf::TUnboxedValuePod(ui32(data)); - } - - case NUdf::TDataType<NUdf::TInterval>::Id: - case NUdf::TDataType<NUdf::TInterval64>::Id: - case NUdf::TDataType<NUdf::TDatetime64>::Id: - case NUdf::TDataType<NUdf::TTimestamp64>::Id: - case NUdf::TDataType<i64>::Id: { - i64 data; - buf.ReadMany((char*)&data, sizeof(data)); - return NUdf::TUnboxedValuePod(data); - } - - case NUdf::TDataType<NUdf::TTimestamp>::Id: - case NUdf::TDataType<ui64>::Id: { - ui64 data; - buf.ReadMany((char*)&data, sizeof(data)); - return NUdf::TUnboxedValuePod(data); - } - - case NUdf::TDataType<float>::Id: { - double data; - buf.ReadMany((char*)&data, sizeof(data)); - return NUdf::TUnboxedValuePod(float(data)); - } - - case NUdf::TDataType<double>::Id: { - double data; - buf.ReadMany((char*)&data, sizeof(data)); - return NUdf::TUnboxedValuePod(data); - } - - case NUdf::TDataType<NUdf::TUtf8>::Id: - case NUdf::TDataType<char*>::Id: - case NUdf::TDataType<NUdf::TJson>::Id: - case NUdf::TDataType<NUdf::TYson>::Id: - case NUdf::TDataType<NUdf::TDyNumber>::Id: - case NUdf::TDataType<NUdf::TUuid>::Id: { - ui32 size; - buf.ReadMany((char*)&size, sizeof(size)); - CHECK_STRING_LENGTH_UNSIGNED(size); - auto str = NUdf::TUnboxedValue(MakeStringNotFilled(size)); - buf.ReadMany(str.AsStringRef().Data(), size); - return str; - } - - case NUdf::TDataType<NUdf::TDecimal>::Id: { - if (nativeYtTypeFlags & NTCF_DECIMAL) { - auto const params = static_cast<TDataDecimalType*>(type)->GetParams(); - if (params.first < 10) { - i32 data; - buf.ReadMany((char*)&data, sizeof(data)); - return NUdf::TUnboxedValuePod(NDecimal::FromYtDecimal(data)); - } else if (params.first < 19) { - i64 data; - buf.ReadMany((char*)&data, sizeof(data)); - return NUdf::TUnboxedValuePod(NDecimal::FromYtDecimal(data)); - } else { - YQL_ENSURE(params.first < 36); - NDecimal::TInt128 data; - buf.ReadMany((char*)&data, sizeof(data)); - return NUdf::TUnboxedValuePod(NDecimal::FromYtDecimal(data)); - } - } else { - ui32 size; - buf.ReadMany(reinterpret_cast<char*>(&size), sizeof(size)); - const auto maxSize = sizeof(NDecimal::TInt128); - YQL_ENSURE(size > 0U && size <= maxSize, "Bad decimal field size: " << size); - char data[maxSize]; - buf.ReadMany(data, size); - const auto& v = NDecimal::Deserialize(data, size); - YQL_ENSURE(!NDecimal::IsError(v.first), "Bad decimal field data: " << data); - YQL_ENSURE(size == v.second, "Bad decimal field size: " << size); - return NUdf::TUnboxedValuePod(v.first); - } - } - - case NUdf::TDataType<NUdf::TTzDate>::Id: { - ui32 size; - buf.ReadMany((char*)&size, sizeof(size)); - CHECK_STRING_LENGTH_UNSIGNED(size); - auto& vec = buf.YsonBuffer(); - vec.resize(size); - buf.ReadMany(vec.data(), size); - ui16 value; - ui16 tzId; - YQL_ENSURE(DeserializeTzDate(TStringBuf(vec.begin(), vec.end()), value, tzId)); - auto data = NUdf::TUnboxedValuePod(value); - data.SetTimezoneId(tzId); - return data; - } - - case NUdf::TDataType<NUdf::TTzDatetime>::Id: { - ui32 size; - buf.ReadMany((char*)&size, sizeof(size)); - CHECK_STRING_LENGTH_UNSIGNED(size); - auto& vec = buf.YsonBuffer(); - vec.resize(size); - buf.ReadMany(vec.data(), size); - ui32 value; - ui16 tzId; - YQL_ENSURE(DeserializeTzDatetime(TStringBuf(vec.begin(), vec.end()), value, tzId)); - auto data = NUdf::TUnboxedValuePod(value); - data.SetTimezoneId(tzId); - return data; - } - - case NUdf::TDataType<NUdf::TTzTimestamp>::Id: { - ui32 size; - buf.ReadMany((char*)&size, sizeof(size)); - CHECK_STRING_LENGTH_UNSIGNED(size); - auto& vec = buf.YsonBuffer(); - vec.resize(size); - buf.ReadMany(vec.data(), size); - ui64 value; - ui16 tzId; - YQL_ENSURE(DeserializeTzTimestamp(TStringBuf(vec.begin(), vec.end()), value, tzId)); - auto data = NUdf::TUnboxedValuePod(value); - data.SetTimezoneId(tzId); - return data; - } - - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - ui32 size; - buf.ReadMany((char*)&size, sizeof(size)); - CHECK_STRING_LENGTH_UNSIGNED(size); - auto json = NUdf::TUnboxedValue(MakeStringNotFilled(size)); - buf.ReadMany(json.AsStringRef().Data(), size); - return ValueFromString(EDataSlot::JsonDocument, json.AsStringRef()); - } - - default: - YQL_ENSURE(false, "Unsupported data type: " << schemeType); - } -} - -void SkipSkiffField(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, TInputBuf& buf) { - const bool isOptional = type->IsOptional(); - if (isOptional) { - // Unwrap optional - type = static_cast<TOptionalType*>(type)->GetItemType(); - } - - if (isOptional) { - auto marker = buf.Read(); - if (!marker) { - return; - } - } - - if (type->IsData()) { - auto schemeType = static_cast<TDataType*>(type)->GetSchemeType(); - switch (schemeType) { - case NUdf::TDataType<bool>::Id: - buf.SkipMany(sizeof(ui8)); - break; - - case NUdf::TDataType<ui8>::Id: - case NUdf::TDataType<ui16>::Id: - case NUdf::TDataType<ui32>::Id: - case NUdf::TDataType<ui64>::Id: - case NUdf::TDataType<NUdf::TDate>::Id: - case NUdf::TDataType<NUdf::TDatetime>::Id: - case NUdf::TDataType<NUdf::TTimestamp>::Id: - buf.SkipMany(sizeof(ui64)); - break; - - case NUdf::TDataType<i8>::Id: - case NUdf::TDataType<i16>::Id: - case NUdf::TDataType<i32>::Id: - case NUdf::TDataType<i64>::Id: - case NUdf::TDataType<NUdf::TInterval>::Id: - case NUdf::TDataType<NUdf::TDate32>::Id: - case NUdf::TDataType<NUdf::TDatetime64>::Id: - case NUdf::TDataType<NUdf::TTimestamp64>::Id: - case NUdf::TDataType<NUdf::TInterval64>::Id: - buf.SkipMany(sizeof(i64)); - break; - - case NUdf::TDataType<float>::Id: - case NUdf::TDataType<double>::Id: - buf.SkipMany(sizeof(double)); - break; - - case NUdf::TDataType<NUdf::TUtf8>::Id: - case NUdf::TDataType<char*>::Id: - case NUdf::TDataType<NUdf::TJson>::Id: - case NUdf::TDataType<NUdf::TYson>::Id: - case NUdf::TDataType<NUdf::TUuid>::Id: - case NUdf::TDataType<NUdf::TDyNumber>::Id: - case NUdf::TDataType<NUdf::TTzDate>::Id: - case NUdf::TDataType<NUdf::TTzDatetime>::Id: - case NUdf::TDataType<NUdf::TTzTimestamp>::Id: - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - ui32 size; - buf.ReadMany((char*)&size, sizeof(size)); - CHECK_STRING_LENGTH_UNSIGNED(size); - buf.SkipMany(size); - break; - } - case NUdf::TDataType<NUdf::TDecimal>::Id: { - if (nativeYtTypeFlags & NTCF_DECIMAL) { - auto const params = static_cast<TDataDecimalType*>(type)->GetParams(); - if (params.first < 10) { - buf.SkipMany(sizeof(i32)); - } else if (params.first < 19) { - buf.SkipMany(sizeof(i64)); - } else { - buf.SkipMany(sizeof(NDecimal::TInt128)); - } - } else { - ui32 size; - buf.ReadMany((char*)&size, sizeof(size)); - CHECK_STRING_LENGTH_UNSIGNED(size); - buf.SkipMany(size); - } - break; - } - default: - YQL_ENSURE(false, "Unsupported data type: " << schemeType); - } - return; - } - - if (type->IsPg()) { - SkipSkiffPg(static_cast<TPgType*>(type), buf); - return; - } - - if (type->IsStruct()) { - auto structType = static_cast<TStructType*>(type); - const std::vector<size_t>* reorder = nullptr; - if (auto cookie = structType->GetCookie()) { - reorder = ((const std::vector<size_t>*)cookie); - } - for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { - SkipSkiffField(structType->GetMemberType(reorder ? reorder->at(i) : i), nativeYtTypeFlags, buf); - } - return; - } - - if (type->IsList()) { - auto itemType = static_cast<TListType*>(type)->GetItemType(); - while (buf.Read() == '\0') { - SkipSkiffField(itemType, nativeYtTypeFlags, buf); - } - return; - } - - if (type->IsTuple()) { - auto tupleType = static_cast<TTupleType*>(type); - - for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { - SkipSkiffField(tupleType->GetElementType(i), nativeYtTypeFlags, buf); - } - return; - } - - if (type->IsVariant()) { - auto varType = AS_TYPE(TVariantType, type); - ui16 data = 0; - if (varType->GetAlternativesCount() < 256) { - buf.ReadMany((char*)&data, 1); - } else { - buf.ReadMany((char*)&data, sizeof(data)); - } - - if (varType->GetUnderlyingType()->IsTuple()) { - auto tupleType = AS_TYPE(TTupleType, varType->GetUnderlyingType()); - YQL_ENSURE(data < tupleType->GetElementsCount()); - SkipSkiffField(tupleType->GetElementType(data), nativeYtTypeFlags, buf); - } else { - auto structType = AS_TYPE(TStructType, varType->GetUnderlyingType()); - if (auto cookie = structType->GetCookie()) { - const std::vector<size_t>& reorder = *((const std::vector<size_t>*)cookie); - data = reorder[data]; - } - YQL_ENSURE(data < structType->GetMembersCount()); - - SkipSkiffField(structType->GetMemberType(data), nativeYtTypeFlags, buf); - } - return; - } - - if (type->IsVoid()) { - return; - } - - if (type->IsNull()) { - return; - } - - if (type->IsEmptyList() || type->IsEmptyDict()) { - return; - } - - if (type->IsDict()) { - auto dictType = AS_TYPE(TDictType, type); - auto keyType = dictType->GetKeyType(); - auto payloadType = dictType->GetPayloadType(); - while (buf.Read() == '\0') { - SkipSkiffField(keyType, nativeYtTypeFlags, buf); - SkipSkiffField(payloadType, nativeYtTypeFlags, buf); - } - return; - } - - YQL_ENSURE(false, "Unsupported type for skip: " << type->GetKindAsStr()); -} - -NKikimr::NUdf::TUnboxedValue ReadSkiffNativeYtValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, - const NKikimr::NMiniKQL::THolderFactory& holderFactory, TInputBuf& buf) -{ - if (type->IsData()) { - return ReadSkiffData(type, nativeYtTypeFlags, buf); - } - - if (type->IsPg()) { - return ReadSkiffPg(static_cast<TPgType*>(type), buf); - } - - if (type->IsOptional()) { - auto marker = buf.Read(); - if (!marker) { - return NUdf::TUnboxedValue(); - } - - auto value = ReadSkiffNativeYtValue(AS_TYPE(TOptionalType, type)->GetItemType(), nativeYtTypeFlags, holderFactory, buf); - return value.Release().MakeOptional(); - } - - if (type->IsTuple()) { - auto tupleType = AS_TYPE(TTupleType, type); - NUdf::TUnboxedValue* items; - auto value = holderFactory.CreateDirectArrayHolder(tupleType->GetElementsCount(), items); - for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { - items[i] = ReadSkiffNativeYtValue(tupleType->GetElementType(i), nativeYtTypeFlags, holderFactory, buf); - } - - return value; - } - - if (type->IsStruct()) { - auto structType = AS_TYPE(TStructType, type); - NUdf::TUnboxedValue* items; - auto value = holderFactory.CreateDirectArrayHolder(structType->GetMembersCount(), items); - - if (auto cookie = type->GetCookie()) { - const std::vector<size_t>& reorder = *((const std::vector<size_t>*)cookie); - for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { - const auto ndx = reorder[i]; - items[ndx] = ReadSkiffNativeYtValue(structType->GetMemberType(ndx), nativeYtTypeFlags, holderFactory, buf); - } - } else { - for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { - items[i] = ReadSkiffNativeYtValue(structType->GetMemberType(i), nativeYtTypeFlags, holderFactory, buf); - } - } - - return value; - } - - if (type->IsList()) { - auto itemType = AS_TYPE(TListType, type)->GetItemType(); - TDefaultListRepresentation items; - while (buf.Read() == '\0') { - items = items.Append(ReadSkiffNativeYtValue(itemType, nativeYtTypeFlags, holderFactory, buf)); - } - - return holderFactory.CreateDirectListHolder(std::move(items)); - } - - if (type->IsVariant()) { - auto varType = AS_TYPE(TVariantType, type); - ui16 data = 0; - if (varType->GetAlternativesCount() < 256) { - buf.ReadMany((char*)&data, 1); - } else { - buf.ReadMany((char*)&data, sizeof(data)); - } - if (varType->GetUnderlyingType()->IsTuple()) { - auto tupleType = AS_TYPE(TTupleType, varType->GetUnderlyingType()); - YQL_ENSURE(data < tupleType->GetElementsCount()); - auto item = ReadSkiffNativeYtValue(tupleType->GetElementType(data), nativeYtTypeFlags, holderFactory, buf); - return holderFactory.CreateVariantHolder(item.Release(), data); - } - else { - auto structType = AS_TYPE(TStructType, varType->GetUnderlyingType()); - if (auto cookie = structType->GetCookie()) { - const std::vector<size_t>& reorder = *((const std::vector<size_t>*)cookie); - data = reorder[data]; - } - YQL_ENSURE(data < structType->GetMembersCount()); - - auto item = ReadSkiffNativeYtValue(structType->GetMemberType(data), nativeYtTypeFlags, holderFactory, buf); - return holderFactory.CreateVariantHolder(item.Release(), data); - } - } - - if (type->IsVoid()) { - return NUdf::TUnboxedValue::Zero(); - } - - if (type->IsNull()) { - return NUdf::TUnboxedValue(); - } - - if (type->IsEmptyList() || type->IsEmptyDict()) { - return holderFactory.GetEmptyContainerLazy(); - } - - if (type->IsDict()) { - auto dictType = AS_TYPE(TDictType, type); - auto keyType = dictType->GetKeyType(); - auto payloadType = dictType->GetPayloadType(); - - auto builder = holderFactory.NewDict(dictType, NUdf::TDictFlags::EDictKind::Hashed); - while (buf.Read() == '\0') { - auto key = ReadSkiffNativeYtValue(keyType, nativeYtTypeFlags, holderFactory, buf); - auto payload = ReadSkiffNativeYtValue(payloadType, nativeYtTypeFlags, holderFactory, buf); - builder->Add(std::move(key), std::move(payload)); - } - - return builder->Build(); - } - - YQL_ENSURE(false, "Unsupported type: " << type->GetKindAsStr()); -} - -extern "C" void ReadContainerNativeYtValue(TType* type, ui64 nativeYtTypeFlags, const NKikimr::NMiniKQL::THolderFactory& holderFactory, - NUdf::TUnboxedValue& value, TInputBuf& buf, bool wrapOptional) { - auto tmp = ReadSkiffNativeYtValue(type, nativeYtTypeFlags, holderFactory, buf); - if (!wrapOptional) { - value = std::move(tmp); - } else { - value = tmp.Release().MakeOptional(); - } -} - -/////////////////////////////////////////// -// -// Initial state first = last = &dummy -// -// +1 block first = &dummy, last = newPage, first.next = newPage, newPage.next= &dummy -// +1 block first = &dummy, last = newPage2, first.next = newPage, newPage.next = newPage2, newPage2.next = &dummy -// -/////////////////////////////////////////// -class TTempBlockWriter : public NCommon::IBlockWriter { -public: - TTempBlockWriter() - : Pool_(*TlsAllocState) - , Last_(&Dummy_) - { - Dummy_.Avail_ = 0; - Dummy_.Next_ = &Dummy_; - } - - ~TTempBlockWriter() { - auto current = Dummy_.Next_; // skip dummy node - while (current != &Dummy_) { - auto next = current->Next_; - Pool_.ReturnPage(current); - current = next; - } - } - - void SetRecordBoundaryCallback(std::function<void()> callback) override { - Y_UNUSED(callback); - } - - void WriteBlocks(TOutputBuf& buf) const { - auto current = Dummy_.Next_; // skip dummy node - while (current != &Dummy_) { - auto next = current->Next_; - buf.WriteMany((const char*)(current + 1), current->Avail_); - current = next; - } - } - - TTempBlockWriter(const TTempBlockWriter&) = delete; - void operator=(const TTempBlockWriter&) = delete; - - std::pair<char*, char*> NextEmptyBlock() override { - auto newPage = Pool_.GetPage(); - auto header = (TPageHeader*)newPage; - header->Avail_ = 0; - header->Next_ = &Dummy_; - Last_->Next_ = header; - Last_ = header; - return std::make_pair((char*)(header + 1), (char*)newPage + TAlignedPagePool::POOL_PAGE_SIZE); - } - - void ReturnBlock(size_t avail, std::optional<size_t> lastRecordBoundary) override { - Y_UNUSED(lastRecordBoundary); - YQL_ENSURE(avail <= TAlignedPagePool::POOL_PAGE_SIZE - sizeof(TPageHeader)); - Last_->Avail_ = avail; - } - - void Finish() override { - } - -private: - struct TPageHeader { - TPageHeader* Next_ = nullptr; - ui32 Avail_ = 0; - }; - - NKikimr::TAlignedPagePool& Pool_; - TPageHeader* Last_; - TPageHeader Dummy_; -}; - -void WriteYsonValueInTableFormat(TOutputBuf& buf, TType* type, ui64 nativeYtTypeFlags, const NUdf::TUnboxedValuePod& value, bool topLevel) { - // Table format, very compact - switch (type->GetKind()) { - case TType::EKind::Variant: { - buf.Write(BeginListSymbol); - auto varType = static_cast<TVariantType*>(type); - auto underlyingType = varType->GetUnderlyingType(); - auto index = value.GetVariantIndex(); - YQL_ENSURE(index < varType->GetAlternativesCount(), "Bad variant alternative: " << index << ", only " << varType->GetAlternativesCount() << " are available"); - YQL_ENSURE(underlyingType->IsTuple() || underlyingType->IsStruct(), "Wrong underlying type"); - TType* itemType; - if (underlyingType->IsTuple()) { - itemType = static_cast<TTupleType*>(underlyingType)->GetElementType(index); - } - else { - itemType = static_cast<TStructType*>(underlyingType)->GetMemberType(index); - } - if (!(nativeYtTypeFlags & NTCF_COMPLEX) || underlyingType->IsTuple()) { - buf.Write(Uint64Marker); - buf.WriteVarUI64(index); - } else { - auto structType = static_cast<TStructType*>(underlyingType); - auto varName = structType->GetMemberName(index); - buf.Write(StringMarker); - buf.WriteVarI32(varName.size()); - buf.WriteMany(varName); - } - buf.Write(ListItemSeparatorSymbol); - WriteYsonValueInTableFormat(buf, itemType, nativeYtTypeFlags, value.GetVariantItem(), false); - buf.Write(ListItemSeparatorSymbol); - buf.Write(EndListSymbol); - break; - } - - case TType::EKind::Data: { - auto schemeType = static_cast<TDataType*>(type)->GetSchemeType(); - switch (schemeType) { - case NUdf::TDataType<bool>::Id: { - buf.Write(value.Get<bool>() ? TrueMarker : FalseMarker); - break; - } - - case NUdf::TDataType<ui8>::Id: - buf.Write(Uint64Marker); - buf.WriteVarUI64(value.Get<ui8>()); - break; - - case NUdf::TDataType<i8>::Id: - buf.Write(Int64Marker); - buf.WriteVarI64(value.Get<i8>()); - break; - - case NUdf::TDataType<ui16>::Id: - buf.Write(Uint64Marker); - buf.WriteVarUI64(value.Get<ui16>()); - break; - - case NUdf::TDataType<i16>::Id: - buf.Write(Int64Marker); - buf.WriteVarI64(value.Get<i16>()); - break; - - case NUdf::TDataType<i32>::Id: - buf.Write(Int64Marker); - buf.WriteVarI64(value.Get<i32>()); - break; - - case NUdf::TDataType<ui32>::Id: - buf.Write(Uint64Marker); - buf.WriteVarUI64(value.Get<ui32>()); - break; - - case NUdf::TDataType<i64>::Id: - buf.Write(Int64Marker); - buf.WriteVarI64(value.Get<i64>()); - break; - - case NUdf::TDataType<ui64>::Id: - buf.Write(Uint64Marker); - buf.WriteVarUI64(value.Get<ui64>()); - break; - - case NUdf::TDataType<float>::Id: { - buf.Write(DoubleMarker); - double val = value.Get<float>(); - buf.WriteMany((const char*)&val, sizeof(val)); - break; - } - - case NUdf::TDataType<double>::Id: { - buf.Write(DoubleMarker); - double val = value.Get<double>(); - buf.WriteMany((const char*)&val, sizeof(val)); - break; - } - - case NUdf::TDataType<NUdf::TUtf8>::Id: - case NUdf::TDataType<char*>::Id: - case NUdf::TDataType<NUdf::TJson>::Id: - case NUdf::TDataType<NUdf::TDyNumber>::Id: - case NUdf::TDataType<NUdf::TUuid>::Id: { - buf.Write(StringMarker); - auto str = value.AsStringRef(); - buf.WriteVarI32(str.Size()); - buf.WriteMany(str); - break; - } - - case NUdf::TDataType<NUdf::TDecimal>::Id: { - buf.Write(StringMarker); - if (nativeYtTypeFlags & NTCF_DECIMAL){ - auto const params = static_cast<TDataDecimalType*>(type)->GetParams(); - const NDecimal::TInt128 data128 = value.GetInt128(); - char tmpBuf[NYT::NDecimal::TDecimal::MaxBinarySize]; - if (params.first < 10) { - // The YQL format differs from the YT format in the inf/nan values. NDecimal::FromYtDecimal converts nan/inf - TStringBuf resBuf = NYT::NDecimal::TDecimal::WriteBinary32(params.first, NDecimal::ToYtDecimal<i32>(data128), tmpBuf, NYT::NDecimal::TDecimal::MaxBinarySize); - buf.WriteVarI32(resBuf.size()); - buf.WriteMany(resBuf.data(), resBuf.size()); - } else if (params.first < 19) { - TStringBuf resBuf = NYT::NDecimal::TDecimal::WriteBinary64(params.first, NDecimal::ToYtDecimal<i64>(data128), tmpBuf, NYT::NDecimal::TDecimal::MaxBinarySize); - buf.WriteVarI32(resBuf.size()); - buf.WriteMany(resBuf.data(), resBuf.size()); - } else { - YQL_ENSURE(params.first < 36); - NYT::NDecimal::TDecimal::TValue128 val; - auto data128Converted = NDecimal::ToYtDecimal<NDecimal::TInt128>(data128); - memcpy(&val, &data128Converted, sizeof(val)); - auto resBuf = NYT::NDecimal::TDecimal::WriteBinary128(params.first, val, tmpBuf, NYT::NDecimal::TDecimal::MaxBinarySize); - buf.WriteVarI32(resBuf.size()); - buf.WriteMany(resBuf.data(), resBuf.size()); - } - } else { - char data[sizeof(NDecimal::TInt128)]; - const ui32 size = NDecimal::Serialize(value.GetInt128(), data); - buf.WriteVarI32(size); - buf.WriteMany(data, size); - } - break; - } - - case NUdf::TDataType<NUdf::TYson>::Id: { - // embed content - buf.WriteMany(value.AsStringRef()); - break; - } - - case NUdf::TDataType<NUdf::TDate>::Id: - buf.Write(Uint64Marker); - buf.WriteVarUI64(value.Get<ui16>()); - break; - - case NUdf::TDataType<NUdf::TDatetime>::Id: - buf.Write(Uint64Marker); - buf.WriteVarUI64(value.Get<ui32>()); - break; - - case NUdf::TDataType<NUdf::TTimestamp>::Id: - buf.Write(Uint64Marker); - buf.WriteVarUI64(value.Get<ui64>()); - break; - - case NUdf::TDataType<NUdf::TInterval>::Id: - case NUdf::TDataType<NUdf::TInterval64>::Id: - case NUdf::TDataType<NUdf::TDatetime64>::Id: - case NUdf::TDataType<NUdf::TTimestamp64>::Id: - buf.Write(Int64Marker); - buf.WriteVarI64(value.Get<i64>()); - break; - - case NUdf::TDataType<NUdf::TDate32>::Id: - buf.Write(Int64Marker); - buf.WriteVarI64(value.Get<i32>()); - break; - - case NUdf::TDataType<NUdf::TTzDate>::Id: { - ui16 tzId = SwapBytes(value.GetTimezoneId()); - ui16 data = SwapBytes(value.Get<ui16>()); - ui32 size = sizeof(data) + sizeof(tzId); - buf.Write(StringMarker); - buf.WriteVarI32(size); - buf.WriteMany((const char*)&data, sizeof(data)); - buf.WriteMany((const char*)&tzId, sizeof(tzId)); - break; - } - - case NUdf::TDataType<NUdf::TTzDatetime>::Id: { - ui16 tzId = SwapBytes(value.GetTimezoneId()); - ui32 data = SwapBytes(value.Get<ui32>()); - ui32 size = sizeof(data) + sizeof(tzId); - buf.Write(StringMarker); - buf.WriteVarI32(size); - buf.WriteMany((const char*)&data, sizeof(data)); - buf.WriteMany((const char*)&tzId, sizeof(tzId)); - break; - } - - case NUdf::TDataType<NUdf::TTzTimestamp>::Id: { - ui16 tzId = SwapBytes(value.GetTimezoneId()); - ui64 data = SwapBytes(value.Get<ui64>()); - ui32 size = sizeof(data) + sizeof(tzId); - buf.Write(StringMarker); - buf.WriteVarI32(size); - buf.WriteMany((const char*)&data, sizeof(data)); - buf.WriteMany((const char*)&tzId, sizeof(tzId)); - break; - } - - case NUdf::TDataType<NUdf::TTzDate32>::Id: { - ui16 tzId = SwapBytes(value.GetTimezoneId()); - ui32 data = 0x80 ^ SwapBytes((ui32)value.Get<i32>()); - ui32 size = sizeof(data) + sizeof(tzId); - buf.Write(StringMarker); - buf.WriteVarI32(size); - buf.WriteMany((const char*)&data, sizeof(data)); - buf.WriteMany((const char*)&tzId, sizeof(tzId)); - break; - } - - case NUdf::TDataType<NUdf::TTzDatetime64>::Id: { - ui16 tzId = SwapBytes(value.GetTimezoneId()); - ui64 data = 0x80 ^ SwapBytes((ui64)value.Get<i64>()); - ui32 size = sizeof(data) + sizeof(tzId); - buf.Write(StringMarker); - buf.WriteVarI32(size); - buf.WriteMany((const char*)&data, sizeof(data)); - buf.WriteMany((const char*)&tzId, sizeof(tzId)); - break; - } - - case NUdf::TDataType<NUdf::TTzTimestamp64>::Id: { - ui16 tzId = SwapBytes(value.GetTimezoneId()); - ui64 data = 0x80 ^ SwapBytes((ui64)value.Get<i64>()); - ui32 size = sizeof(data) + sizeof(tzId); - buf.Write(StringMarker); - buf.WriteVarI32(size); - buf.WriteMany((const char*)&data, sizeof(data)); - buf.WriteMany((const char*)&tzId, sizeof(tzId)); - break; - } - - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - buf.Write(StringMarker); - NUdf::TUnboxedValue json = ValueToString(EDataSlot::JsonDocument, value); - auto str = json.AsStringRef(); - buf.WriteVarI32(str.Size()); - buf.WriteMany(str); - break; - } - - default: - YQL_ENSURE(false, "Unsupported data type: " << schemeType); - } - - break; - } - - case TType::EKind::Struct: { - auto structType = static_cast<TStructType*>(type); - if (nativeYtTypeFlags & ENativeTypeCompatFlags::NTCF_COMPLEX) { - buf.Write(BeginMapSymbol); - for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { - buf.Write(StringMarker); - auto key = structType->GetMemberName(i); - buf.WriteVarI32(key.size()); - buf.WriteMany(key); - buf.Write(KeyValueSeparatorSymbol); - WriteYsonValueInTableFormat(buf, structType->GetMemberType(i), nativeYtTypeFlags, value.GetElement(i), false); - buf.Write(KeyedItemSeparatorSymbol); - } - buf.Write(EndMapSymbol); - } else { - buf.Write(BeginListSymbol); - for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { - WriteYsonValueInTableFormat(buf, structType->GetMemberType(i), nativeYtTypeFlags, value.GetElement(i), false); - buf.Write(ListItemSeparatorSymbol); - } - buf.Write(EndListSymbol); - } - break; - } - - case TType::EKind::List: { - auto itemType = static_cast<TListType*>(type)->GetItemType(); - const auto iter = value.GetListIterator(); - buf.Write(BeginListSymbol); - for (NUdf::TUnboxedValue item; iter.Next(item); buf.Write(ListItemSeparatorSymbol)) { - WriteYsonValueInTableFormat(buf, itemType, nativeYtTypeFlags, item, false); - } - - buf.Write(EndListSymbol); - break; - } - - case TType::EKind::Optional: { - auto itemType = static_cast<TOptionalType*>(type)->GetItemType(); - if (nativeYtTypeFlags & ENativeTypeCompatFlags::NTCF_COMPLEX) { - if (value) { - if (itemType->GetKind() == TType::EKind::Optional || itemType->GetKind() == TType::EKind::Pg) { - buf.Write(BeginListSymbol); - } - WriteYsonValueInTableFormat(buf, itemType, nativeYtTypeFlags, value.GetOptionalValue(), false); - if (itemType->GetKind() == TType::EKind::Optional || itemType->GetKind() == TType::EKind::Pg) { - buf.Write(ListItemSeparatorSymbol); - buf.Write(EndListSymbol); - } - } else { - buf.Write(EntitySymbol); - } - } else { - if (!value) { - if (topLevel) { - buf.Write(BeginListSymbol); - buf.Write(EndListSymbol); - } - else { - buf.Write(EntitySymbol); - } - } - else { - buf.Write(BeginListSymbol); - WriteYsonValueInTableFormat(buf, itemType, nativeYtTypeFlags, value.GetOptionalValue(), false); - buf.Write(ListItemSeparatorSymbol); - buf.Write(EndListSymbol); - } - } - break; - } - - case TType::EKind::Dict: { - auto dictType = static_cast<TDictType*>(type); - const auto iter = value.GetDictIterator(); - buf.Write(BeginListSymbol); - for (NUdf::TUnboxedValue key, payload; iter.NextPair(key, payload);) { - buf.Write(BeginListSymbol); - WriteYsonValueInTableFormat(buf, dictType->GetKeyType(), nativeYtTypeFlags, key, false); - buf.Write(ListItemSeparatorSymbol); - WriteYsonValueInTableFormat(buf, dictType->GetPayloadType(), nativeYtTypeFlags, payload, false); - buf.Write(ListItemSeparatorSymbol); - buf.Write(EndListSymbol); - buf.Write(ListItemSeparatorSymbol); - } - - buf.Write(EndListSymbol); - break; - } - - case TType::EKind::Tuple: { - auto tupleType = static_cast<TTupleType*>(type); - buf.Write(BeginListSymbol); - for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { - WriteYsonValueInTableFormat(buf, tupleType->GetElementType(i), nativeYtTypeFlags, value.GetElement(i), false); - buf.Write(ListItemSeparatorSymbol); - } - - buf.Write(EndListSymbol); - break; - } - - case TType::EKind::Void: { - buf.Write(EntitySymbol); - break; - } - - case TType::EKind::Null: { - buf.Write(EntitySymbol); - break; - } - - case TType::EKind::EmptyList: { - buf.Write(BeginListSymbol); - buf.Write(EndListSymbol); - break; - } - - case TType::EKind::EmptyDict: { - buf.Write(BeginListSymbol); - buf.Write(EndListSymbol); - break; - } - - case TType::EKind::Pg: { - auto pgType = static_cast<TPgType*>(type); - WriteYsonValueInTableFormatPg(buf, pgType, value, topLevel); - break; - } - - default: - YQL_ENSURE(false, "Unsupported type: " << type->GetKindAsStr()); - } -} - -extern "C" void WriteYsonContainerValue(TType* type, ui64 nativeYtTypeFlags, const NUdf::TUnboxedValuePod& value, TOutputBuf& buf) { - TTempBlockWriter blockWriter; - TOutputBuf ysonBuf(blockWriter, nullptr); - WriteYsonValueInTableFormat(ysonBuf, type, nativeYtTypeFlags, value, true); - ysonBuf.Flush(); - ui32 size = ysonBuf.GetWrittenBytes(); - buf.WriteMany((const char*)&size, sizeof(size)); - blockWriter.WriteBlocks(buf); -} - -extern "C" void WriteContainerNativeYtValue(TType* type, ui64 nativeYtTypeFlags, const NUdf::TUnboxedValuePod& value, TOutputBuf& buf) { - WriteSkiffNativeYtValue(type, nativeYtTypeFlags, value, buf); -} - -void WriteSkiffData(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, const NKikimr::NUdf::TUnboxedValuePod& value, NCommon::TOutputBuf& buf) { - auto schemeType = static_cast<TDataType*>(type)->GetSchemeType(); - switch (schemeType) { - case NUdf::TDataType<bool>::Id: { - ui8 data = value.Get<ui8>(); - buf.Write(data); - break; - } - - case NUdf::TDataType<ui8>::Id: { - ui64 data = value.Get<ui8>(); - buf.WriteMany((const char*)&data, sizeof(data)); - break; - } - - case NUdf::TDataType<i8>::Id: { - i64 data = value.Get<i8>(); - buf.WriteMany((const char*)&data, sizeof(data)); - break; - } - - case NUdf::TDataType<NUdf::TDate>::Id: - case NUdf::TDataType<ui16>::Id: { - ui64 data = value.Get<ui16>(); - buf.WriteMany((const char*)&data, sizeof(data)); - break; - } - - case NUdf::TDataType<i16>::Id: { - i64 data = value.Get<i16>(); - buf.WriteMany((const char*)&data, sizeof(data)); - break; - } - - case NUdf::TDataType<NUdf::TDate32>::Id: - case NUdf::TDataType<i32>::Id: { - i64 data = value.Get<i32>(); - buf.WriteMany((const char*)&data, sizeof(data)); - break; - } - - case NUdf::TDataType<NUdf::TDatetime>::Id: - case NUdf::TDataType<ui32>::Id: { - ui64 data = value.Get<ui32>(); - buf.WriteMany((const char*)&data, sizeof(data)); - break; - } - - case NUdf::TDataType<NUdf::TInterval>::Id: - case NUdf::TDataType<NUdf::TInterval64>::Id: - case NUdf::TDataType<NUdf::TDatetime64>::Id: - case NUdf::TDataType<NUdf::TTimestamp64>::Id: - case NUdf::TDataType<i64>::Id: { - i64 data = value.Get<i64>(); - buf.WriteMany((const char*)&data, sizeof(data)); - break; - } - - case NUdf::TDataType<NUdf::TTimestamp>::Id: - case NUdf::TDataType<ui64>::Id: { - ui64 data = value.Get<ui64>(); - buf.WriteMany((const char*)&data, sizeof(data)); - break; - } - - case NUdf::TDataType<float>::Id: { - double data = value.Get<float>(); - buf.WriteMany((const char*)&data, sizeof(data)); - break; - } - - case NUdf::TDataType<double>::Id: { - double data = value.Get<double>(); - buf.WriteMany((const char*)&data, sizeof(data)); - break; - } - - case NUdf::TDataType<NUdf::TUtf8>::Id: - case NUdf::TDataType<char*>::Id: - case NUdf::TDataType<NUdf::TJson>::Id: - case NUdf::TDataType<NUdf::TYson>::Id: - case NUdf::TDataType<NUdf::TDyNumber>::Id: - case NUdf::TDataType<NUdf::TUuid>::Id: { - auto str = value.AsStringRef(); - ui32 size = str.Size(); - buf.WriteMany((const char*)&size, sizeof(size)); - buf.WriteMany(str); - break; - } - - case NUdf::TDataType<NUdf::TDecimal>::Id: { - if (nativeYtTypeFlags & NTCF_DECIMAL) { - auto const params = static_cast<TDataDecimalType*>(type)->GetParams(); - const NDecimal::TInt128 data128 = value.GetInt128(); - if (params.first < 10) { - auto data = NDecimal::ToYtDecimal<i32>(data128); - buf.WriteMany((const char*)&data, sizeof(data)); - } else if (params.first < 19) { - auto data = NDecimal::ToYtDecimal<i64>(data128); - buf.WriteMany((const char*)&data, sizeof(data)); - } else { - YQL_ENSURE(params.first < 36); - auto data = NDecimal::ToYtDecimal<NDecimal::TInt128>(data128); - buf.WriteMany((const char*)&data, sizeof(data)); - } - } else { - char data[sizeof(NDecimal::TInt128)]; - const ui32 size = NDecimal::Serialize(value.GetInt128(), data); - buf.WriteMany(reinterpret_cast<const char*>(&size), sizeof(size)); - buf.WriteMany(data, size); - } - break; - } - - case NUdf::TDataType<NUdf::TTzDate>::Id: { - ui16 tzId = SwapBytes(value.GetTimezoneId()); - ui16 data = SwapBytes(value.Get<ui16>()); - ui32 size = sizeof(data) + sizeof(tzId); - buf.WriteMany((const char*)&size, sizeof(size)); - buf.WriteMany((const char*)&data, sizeof(data)); - buf.WriteMany((const char*)&tzId, sizeof(tzId)); - break; - } - - case NUdf::TDataType<NUdf::TTzDatetime>::Id: { - ui16 tzId = SwapBytes(value.GetTimezoneId()); - ui32 data = SwapBytes(value.Get<ui32>()); - ui32 size = sizeof(data) + sizeof(tzId); - buf.WriteMany((const char*)&size, sizeof(size)); - buf.WriteMany((const char*)&data, sizeof(data)); - buf.WriteMany((const char*)&tzId, sizeof(tzId)); - break; - } - - case NUdf::TDataType<NUdf::TTzTimestamp>::Id: { - ui16 tzId = SwapBytes(value.GetTimezoneId()); - ui64 data = SwapBytes(value.Get<ui64>()); - ui32 size = sizeof(data) + sizeof(tzId); - buf.WriteMany((const char*)&size, sizeof(size)); - buf.WriteMany((const char*)&data, sizeof(data)); - buf.WriteMany((const char*)&tzId, sizeof(tzId)); - break; - } - - case NUdf::TDataType<NUdf::TTzDate32>::Id: { - ui16 tzId = SwapBytes(value.GetTimezoneId()); - ui32 data = 0x80 ^ SwapBytes((ui32)value.Get<i32>()); - ui32 size = sizeof(data) + sizeof(tzId); - buf.WriteMany((const char*)&size, sizeof(size)); - buf.WriteMany((const char*)&data, sizeof(data)); - buf.WriteMany((const char*)&tzId, sizeof(tzId)); - break; - } - - case NUdf::TDataType<NUdf::TTzDatetime64>::Id: { - ui16 tzId = SwapBytes(value.GetTimezoneId()); - ui64 data = 0x80 ^ SwapBytes((ui64)value.Get<i64>()); - ui32 size = sizeof(data) + sizeof(tzId); - buf.WriteMany((const char*)&size, sizeof(size)); - buf.WriteMany((const char*)&data, sizeof(data)); - buf.WriteMany((const char*)&tzId, sizeof(tzId)); - break; - } - - case NUdf::TDataType<NUdf::TTzTimestamp64>::Id: { - ui16 tzId = SwapBytes(value.GetTimezoneId()); - ui64 data = 0x80 ^ SwapBytes((ui64)value.Get<i64>()); - ui32 size = sizeof(data) + sizeof(tzId); - buf.WriteMany((const char*)&size, sizeof(size)); - buf.WriteMany((const char*)&data, sizeof(data)); - buf.WriteMany((const char*)&tzId, sizeof(tzId)); - break; - } - - case NUdf::TDataType<NUdf::TJsonDocument>::Id: { - NUdf::TUnboxedValue json = ValueToString(EDataSlot::JsonDocument, value); - auto str = json.AsStringRef(); - ui32 size = str.Size(); - buf.WriteMany((const char*)&size, sizeof(size)); - buf.WriteMany(str); - break; - } - - default: - YQL_ENSURE(false, "Unsupported data type: " << schemeType); - } -} - -void WriteSkiffNativeYtValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, const NKikimr::NUdf::TUnboxedValuePod& value, NCommon::TOutputBuf& buf) { - if (type->IsData()) { - WriteSkiffData(type, nativeYtTypeFlags, value, buf); - } else if (type->IsPg()) { - WriteSkiffPgValue(static_cast<TPgType*>(type), value, buf); - } else if (type->IsOptional()) { - if (!value) { - buf.Write('\0'); - return; - } - - buf.Write('\1'); - WriteSkiffNativeYtValue(AS_TYPE(TOptionalType, type)->GetItemType(), nativeYtTypeFlags, value.GetOptionalValue(), buf); - } else if (type->IsList()) { - auto itemType = AS_TYPE(TListType, type)->GetItemType(); - auto elements = value.GetElements(); - if (elements) { - ui32 size = value.GetListLength(); - for (ui32 i = 0; i < size; ++i) { - buf.Write('\0'); - WriteSkiffNativeYtValue(itemType, nativeYtTypeFlags, elements[i], buf); - } - } else { - NUdf::TUnboxedValue item; - for (auto iter = value.GetListIterator(); iter.Next(item); ) { - buf.Write('\0'); - WriteSkiffNativeYtValue(itemType, nativeYtTypeFlags, item, buf); - } - } - - buf.Write('\xff'); - } else if (type->IsTuple()) { - auto tupleType = AS_TYPE(TTupleType, type); - auto elements = value.GetElements(); - if (elements) { - for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { - WriteSkiffNativeYtValue(tupleType->GetElementType(i), nativeYtTypeFlags, elements[i], buf); - } - } else { - for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { - WriteSkiffNativeYtValue(tupleType->GetElementType(i), nativeYtTypeFlags, value.GetElement(i), buf); - } - } - } else if (type->IsStruct()) { - auto structType = AS_TYPE(TStructType, type); - auto elements = value.GetElements(); - if (auto cookie = type->GetCookie()) { - const std::vector<size_t>& reorder = *((const std::vector<size_t>*)cookie); - if (elements) { - for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { - const auto ndx = reorder[i]; - WriteSkiffNativeYtValue(structType->GetMemberType(ndx), nativeYtTypeFlags, elements[ndx], buf); - } - } else { - for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { - const auto ndx = reorder[i]; - WriteSkiffNativeYtValue(structType->GetMemberType(ndx), nativeYtTypeFlags, value.GetElement(ndx), buf); - } - } - } else { - if (elements) { - for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { - WriteSkiffNativeYtValue(structType->GetMemberType(i), nativeYtTypeFlags, elements[i], buf); - } - } else { - for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { - WriteSkiffNativeYtValue(structType->GetMemberType(i), nativeYtTypeFlags, value.GetElement(i), buf); - } - } - } - } else if (type->IsVariant()) { - auto varType = AS_TYPE(TVariantType, type); - ui16 index = (ui16)value.GetVariantIndex(); - if (varType->GetAlternativesCount() < 256) { - buf.WriteMany((const char*)&index, 1); - } else { - buf.WriteMany((const char*)&index, sizeof(index)); - } - - if (varType->GetUnderlyingType()->IsTuple()) { - auto tupleType = AS_TYPE(TTupleType, varType->GetUnderlyingType()); - WriteSkiffNativeYtValue(tupleType->GetElementType(index), nativeYtTypeFlags, value.GetVariantItem(), buf); - } else { - auto structType = AS_TYPE(TStructType, varType->GetUnderlyingType()); - if (auto cookie = structType->GetCookie()) { - const std::vector<size_t>& reorder = *((const std::vector<size_t>*)cookie); - index = reorder[index]; - } - YQL_ENSURE(index < structType->GetMembersCount()); - - WriteSkiffNativeYtValue(structType->GetMemberType(index), nativeYtTypeFlags, value.GetVariantItem(), buf); - } - } else if (type->IsVoid() || type->IsNull() || type->IsEmptyList() || type->IsEmptyDict()) { - } else if (type->IsDict()) { - auto dictType = AS_TYPE(TDictType, type); - auto keyType = dictType->GetKeyType(); - auto payloadType = dictType->GetPayloadType(); - NUdf::TUnboxedValue key, payload; - for (auto iter = value.GetDictIterator(); iter.NextPair(key, payload); ) { - buf.Write('\0'); - WriteSkiffNativeYtValue(keyType, nativeYtTypeFlags, key, buf); - WriteSkiffNativeYtValue(payloadType, nativeYtTypeFlags, payload, buf); - } - - buf.Write('\xff'); - } else { - YQL_ENSURE(false, "Unsupported type: " << type->GetKindAsStr()); - } + return ParseYsonValue(holderFactory, NYT::NodeToYsonString(node, NYson::EYsonFormat::Binary), type, err); } TExprNode::TPtr ValueToExprLiteral(const TTypeAnnotationNode* type, const NKikimr::NUdf::TUnboxedValuePod& value, TExprContext& ctx, diff --git a/yql/essentials/providers/common/codec/yql_codec.h b/yql/essentials/providers/common/codec/yql_codec.h index f3f50e5c58..741f0b1966 100644 --- a/yql/essentials/providers/common/codec/yql_codec.h +++ b/yql/essentials/providers/common/codec/yql_codec.h @@ -63,43 +63,15 @@ struct TCodecContext { void SkipYson(char cmd, TInputBuf& buf); void CopyYson(char cmd, TInputBuf& buf, TVector<char>& yson); void CopyYsonWithAttrs(char cmd, TInputBuf& buf, TVector<char>& yson); -NKikimr::NUdf::TUnboxedValue ReadYsonValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, const NKikimr::NMiniKQL::THolderFactory& holderFactory, char cmd, TInputBuf& buf, bool isTableFormat); +TStringBuf ReadNextString(char cmd, TInputBuf& buf); +NKikimr::NUdf::TUnboxedValue ReadYsonValue(NKikimr::NMiniKQL::TType* type, const NKikimr::NMiniKQL::THolderFactory& holderFactory, char cmd, TInputBuf& buf); TMaybe<NKikimr::NUdf::TUnboxedValue> ParseYsonValue(const NKikimr::NMiniKQL::THolderFactory& holderFactory, - const TStringBuf& yson, NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, IOutputStream* err, bool isTableFormat); -TMaybe<NKikimr::NUdf::TUnboxedValue> ParseYsonNode(const NKikimr::NMiniKQL::THolderFactory& holderFactory, - const NYT::TNode& node, NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, IOutputStream* err); + const TStringBuf& yson, NKikimr::NMiniKQL::TType* type, IOutputStream* err); TMaybe<NKikimr::NUdf::TUnboxedValue> ParseYsonNodeInResultFormat(const NKikimr::NMiniKQL::THolderFactory& holderFactory, const NYT::TNode& node, NKikimr::NMiniKQL::TType* type, IOutputStream* err); -extern "C" void ReadYsonContainerValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, - const NKikimr::NMiniKQL::THolderFactory& holderFactory, NKikimr::NUdf::TUnboxedValue& value, NCommon::TInputBuf& buf, - bool wrapOptional); - -void SkipSkiffField(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, TInputBuf& buf); - -NKikimr::NUdf::TUnboxedValue ReadSkiffNativeYtValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, - const NKikimr::NMiniKQL::THolderFactory& holderFactory, TInputBuf& buf); - -NKikimr::NUdf::TUnboxedValue ReadSkiffData(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, NCommon::TInputBuf& buf); -extern "C" void ReadContainerNativeYtValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, - const NKikimr::NMiniKQL::THolderFactory& holderFactory, NKikimr::NUdf::TUnboxedValue& value, NCommon::TInputBuf& buf, - bool wrapOptional); - -extern "C" void WriteYsonContainerValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, - const NKikimr::NUdf::TUnboxedValuePod& value, NCommon::TOutputBuf& buf); - -void WriteSkiffData(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, const NKikimr::NUdf::TUnboxedValuePod& value, NCommon::TOutputBuf& buf); - -void WriteSkiffNativeYtValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, - const NKikimr::NUdf::TUnboxedValuePod& value, NCommon::TOutputBuf& buf); - -extern "C" void WriteContainerNativeYtValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, - const NKikimr::NUdf::TUnboxedValuePod& value, NCommon::TOutputBuf& buf); - -void WriteYsonValueInTableFormat(TOutputBuf& buf, NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, const NKikimr::NUdf::TUnboxedValuePod& value, bool topLevel); - TExprNode::TPtr ValueToExprLiteral(const TTypeAnnotationNode* type, const NKikimr::NUdf::TUnboxedValuePod& value, TExprContext& ctx, TPositionHandle pos = {}); diff --git a/yql/essentials/sql/pg/pg_sql.cpp b/yql/essentials/sql/pg/pg_sql.cpp index 84d3c39454..1843ea4f9e 100644 --- a/yql/essentials/sql/pg/pg_sql.cpp +++ b/yql/essentials/sql/pg/pg_sql.cpp @@ -29,6 +29,7 @@ extern "C" { #include "utils.h" #include <yql/essentials/ast/yql_expr.h> #include <yql/essentials/sql/settings/partitioning.h> +#include <yql/essentials/sql/settings/translator.h> #include <yql/essentials/parser/pg_wrapper/interface/config.h> #include <yql/essentials/parser/pg_wrapper/interface/parser.h> #include <yql/essentials/parser/pg_wrapper/interface/utils.h> @@ -1665,7 +1666,7 @@ public: const auto select = ParseSelectStmt( &selectStmt, { - .Inner = true, + .Inner = true, .AllowEmptyResSet = true, .EmitPgStar = true, .FillTargetColumns = false, @@ -2971,7 +2972,7 @@ public: return State.Statements.back(); } - [[nodiscard]] + [[nodiscard]] TAstNode* ParseAlterTableStmt(const AlterTableStmt* value) { std::vector<TAstNode*> options; TString mode = (value->missing_ok) ? "alter_if_exists" : "alter"; @@ -3015,7 +3016,7 @@ public: return nullptr; } const A_Const* localConst = nullptr; - if (NodeTag(rawArg) == T_TypeCast) { + if (NodeTag(rawArg) == T_TypeCast) { auto localCast = CAST_NODE(TypeCast, rawArg)->arg; if (NodeTag(localCast) != T_A_Const) { AddError(TStringBuilder() << "Expected a_const in cast, but got something wrong: " << NodeTag(localCast)); @@ -3043,7 +3044,7 @@ public: NodeNotImplemented(def); return nullptr; } - break; + break; } default: NodeNotImplemented(rawNode); @@ -3054,7 +3055,7 @@ public: std::vector<TAstNode*> actions { QL(QA("alterColumns"), QVL(alterColumns.data(), alterColumns.size())) }; options.push_back( - QL(QA("actions"), + QL(QA("actions"), QVL(actions.data(), actions.size()) ) ); @@ -4037,7 +4038,7 @@ public: for (const auto& s : argStrs) { concatArgs.push_back(L(A("Key"), QL(QA("table"),L(A("String"), QAX(s))))); } - + key = VL(concatArgs); } else if (lowerName == "concat_view") { if (argStrs.size() % 2 != 0) { @@ -4048,11 +4049,11 @@ public: TVector<TAstNode*> concatArgs; concatArgs.push_back(A("MrTableConcat")); for (ui32 i = 0; i < argStrs.size(); i += 2) { - concatArgs.push_back(L(A("Key"), + concatArgs.push_back(L(A("Key"), QL(QA("table"),L(A("String"), QAX(argStrs[i]))), QL(QA("view"),L(A("String"), QAX(argStrs[i + 1]))))); } - + key = VL(concatArgs); } else if (lowerName == "range") { if (argStrs.size() > 5) { @@ -4094,8 +4095,8 @@ public: A("item")); } else { expr = L(A("Apply"),L(A("Udf"),QA("Re2.Match"), - QL(L(A("Apply"), - L(A("Udf"), QA("Re2.PatternFromLike")), + QL(L(A("Apply"), + L(A("Udf"), QA("Re2.PatternFromLike")), L(A("String"),QAX(argStrs[1]))),L(A("Null")))), A("item")); } @@ -5806,8 +5807,8 @@ public: if (!leftType) { return false; - } - + } + if (procedureName.empty()) { return false; } @@ -6194,13 +6195,13 @@ public: desc.Family = familyName; TVector<NPg::TAmOpDesc> ops; TVector<NPg::TAmProcDesc> procs; - + for (int i = 0; i < ListLength(value->items); ++i) { auto node = LIST_CAST_NTH(CreateOpClassItem, value->items, i); if (node->itemtype != OPCLASS_ITEM_OPERATOR && node->itemtype != OPCLASS_ITEM_FUNCTION) { continue; } - + if (ListLength(node->name->objname) != 1) { return false; } @@ -6422,4 +6423,47 @@ std::unique_ptr<NYql::NPg::ISqlLanguageParser> CreateSqlLanguageParser() { return std::make_unique<TSqlLanguageParser>(); } +class TTranslator : public NSQLTranslation::ITranslator { +public: + NSQLTranslation::ILexer::TPtr MakeLexer(const NSQLTranslation::TTranslationSettings& settings) final { + Y_UNUSED(settings); + ythrow yexception() << "Unsupported method"; + } + + NYql::TAstParseResult TextToAst(const TString& query, const NSQLTranslation::TTranslationSettings& settings, + NYql::TWarningRules* warningRules, NYql::TStmtParseInfo* stmtParseInfo) final { + Y_UNUSED(warningRules); + return PGToYql(query, settings, stmtParseInfo); + } + + google::protobuf::Message* TextToMessage(const TString& query, const TString& queryName, + NYql::TIssues& issues, size_t maxErrors, const NSQLTranslation::TTranslationSettings& settings) final { + Y_UNUSED(query); + Y_UNUSED(queryName); + Y_UNUSED(issues); + Y_UNUSED(maxErrors); + Y_UNUSED(settings); + ythrow yexception() << "Unsupported method"; + } + + NYql::TAstParseResult TextAndMessageToAst(const TString& query, const google::protobuf::Message& protoAst, + const NSQLTranslation::TSQLHints& hints, const NSQLTranslation::TTranslationSettings& settings) final { + Y_UNUSED(query); + Y_UNUSED(protoAst); + Y_UNUSED(hints); + Y_UNUSED(settings); + ythrow yexception() << "Unsupported method"; + } + + TVector<NYql::TAstParseResult> TextToManyAst(const TString& query, const NSQLTranslation::TTranslationSettings& settings, + NYql::TWarningRules* warningRules, TVector<NYql::TStmtParseInfo>* stmtParseInfo) final { + Y_UNUSED(warningRules); + return PGToYqlStatements(query, settings, stmtParseInfo); + } +}; + +NSQLTranslation::TTranslatorPtr MakeTranslator() { + return MakeIntrusive<TTranslator>(); +} + } // NSQLTranslationPG diff --git a/yql/essentials/sql/pg_dummy/pg_sql_dummy.cpp b/yql/essentials/sql/pg_dummy/pg_sql_dummy.cpp index b2d1cd1c6d..ddf7f42ca7 100644 --- a/yql/essentials/sql/pg_dummy/pg_sql_dummy.cpp +++ b/yql/essentials/sql/pg_dummy/pg_sql_dummy.cpp @@ -2,6 +2,7 @@ #include <yql/essentials/minikql/computation/mkql_computation_node_pack_impl.h> #include <yql/essentials/minikql/mkql_buffer.h> +#include <yql/essentials/sql/settings/translator.h> namespace NSQLTranslationPG { @@ -33,6 +34,10 @@ std::unique_ptr<NYql::NPg::ISqlLanguageParser> CreateSqlLanguageParser() { throw yexception() << "CreateSqlLanguageParser: PG types are not supported"; } +NSQLTranslation::TTranslatorPtr MakeTranslator() { + return NSQLTranslation::MakeDummyTranslator("pg"); +} + } // NSQLTranslationPG namespace NYql { diff --git a/yql/essentials/sql/pg_dummy/ya.make b/yql/essentials/sql/pg_dummy/ya.make index 0341787faf..10615731c0 100644 --- a/yql/essentials/sql/pg_dummy/ya.make +++ b/yql/essentials/sql/pg_dummy/ya.make @@ -8,6 +8,7 @@ PROVIDES( PEERDIR( yql/essentials/parser/pg_wrapper/interface yql/essentials/minikql + yql/essentials/sql/settings ) SRCS( diff --git a/yql/essentials/sql/settings/translator.cpp b/yql/essentials/sql/settings/translator.cpp new file mode 100644 index 0000000000..92e8c011b8 --- /dev/null +++ b/yql/essentials/sql/settings/translator.cpp @@ -0,0 +1,70 @@ +#include "translator.h" + +namespace NSQLTranslation { + +namespace { + +class TDummyTranslator : public ITranslator { +public: + TDummyTranslator(const TString& name) + : Name_(name) + {} + + NSQLTranslation::ILexer::TPtr MakeLexer(const NSQLTranslation::TTranslationSettings& settings) final { + Y_UNUSED(settings); + ThrowNotSupported(); + } + + NYql::TAstParseResult TextToAst(const TString& query, const NSQLTranslation::TTranslationSettings& settings, + NYql::TWarningRules* warningRules, NYql::TStmtParseInfo* stmtParseInfo) final { + Y_UNUSED(query); + Y_UNUSED(settings); + Y_UNUSED(warningRules); + Y_UNUSED(stmtParseInfo); + ThrowNotSupported(); + } + + google::protobuf::Message* TextToMessage(const TString& query, const TString& queryName, + NYql::TIssues& issues, size_t maxErrors, const TTranslationSettings& settings) final { + Y_UNUSED(query); + Y_UNUSED(queryName); + Y_UNUSED(issues); + Y_UNUSED(maxErrors); + Y_UNUSED(settings); + ThrowNotSupported(); + } + + NYql::TAstParseResult TextAndMessageToAst(const TString& query, const google::protobuf::Message& protoAst, + const TSQLHints& hints, const TTranslationSettings& settings) final { + Y_UNUSED(query); + Y_UNUSED(protoAst); + Y_UNUSED(hints); + Y_UNUSED(settings); + ThrowNotSupported(); + } + + TVector<NYql::TAstParseResult> TextToManyAst(const TString& query, const TTranslationSettings& settings, + NYql::TWarningRules* warningRules, TVector<NYql::TStmtParseInfo>* stmtParseInfo) final { + Y_UNUSED(query); + Y_UNUSED(settings); + Y_UNUSED(warningRules); + Y_UNUSED(stmtParseInfo); + ThrowNotSupported(); + } + +private: + [[noreturn]] void ThrowNotSupported() { + throw yexception() << "Translator '" << Name_ << "' is not supported"; + } + +private: + const TString Name_; +}; + +} + +TTranslatorPtr MakeDummyTranslator(const TString& name) { + return MakeIntrusive<TDummyTranslator>(name); +} + +} // namespace NSQLTranslation diff --git a/yql/essentials/sql/settings/translator.h b/yql/essentials/sql/settings/translator.h new file mode 100644 index 0000000000..aa8ef5ac5e --- /dev/null +++ b/yql/essentials/sql/settings/translator.h @@ -0,0 +1,30 @@ +#pragma once +#include "translation_settings.h" + +#include <yql/essentials/parser/lexer_common/lexer.h> +#include <yql/essentials/parser/lexer_common/hints.h> +#include <yql/essentials/public/issue/yql_warning.h> +#include <yql/essentials/ast/yql_ast.h> + +namespace NSQLTranslation { + +class ITranslator : public TThrRefBase { +public: + virtual ~ITranslator() = default; + + virtual ILexer::TPtr MakeLexer(const TTranslationSettings& settings) = 0; + virtual NYql::TAstParseResult TextToAst(const TString& query, const TTranslationSettings& settings, + NYql::TWarningRules* warningRules, NYql::TStmtParseInfo* stmtParseInfo) = 0; + virtual google::protobuf::Message* TextToMessage(const TString& query, const TString& queryName, + NYql::TIssues& issues, size_t maxErrors, const TTranslationSettings& settings) = 0; + virtual NYql::TAstParseResult TextAndMessageToAst(const TString& query, const google::protobuf::Message& protoAst, + const TSQLHints& hints, const TTranslationSettings& settings) = 0; + virtual TVector<NYql::TAstParseResult> TextToManyAst(const TString& query, const TTranslationSettings& settings, + NYql::TWarningRules* warningRules, TVector<NYql::TStmtParseInfo>* stmtParseInfo) = 0; +}; + +using TTranslatorPtr = TIntrusivePtr<ITranslator>; + +TTranslatorPtr MakeDummyTranslator(const TString& name); + +} // namespace NSQLTranslation diff --git a/yql/essentials/sql/settings/ya.make b/yql/essentials/sql/settings/ya.make index affdcbf303..784aa84d4f 100644 --- a/yql/essentials/sql/settings/ya.make +++ b/yql/essentials/sql/settings/ya.make @@ -3,11 +3,13 @@ LIBRARY() SRCS( partitioning.cpp translation_settings.cpp + translator.cpp ) PEERDIR( library/cpp/deprecated/split library/cpp/json + yql/essentials/public/issue yql/essentials/core/issue yql/essentials/core/pg_settings yql/essentials/core/issue/protos diff --git a/yql/essentials/sql/sql.cpp b/yql/essentials/sql/sql.cpp index d38fff8838..9e18a1bb01 100644 --- a/yql/essentials/sql/sql.cpp +++ b/yql/essentials/sql/sql.cpp @@ -14,7 +14,7 @@ namespace NSQLTranslation { - NYql::TAstParseResult SqlToYql(const TString& query, const TTranslationSettings& settings, + NYql::TAstParseResult SqlToYql(const TTranslators& translators, const TString& query, const TTranslationSettings& settings, NYql::TWarningRules* warningRules, NYql::TStmtParseInfo* stmtParseInfo, TTranslationSettings* effectiveSettings) { NYql::TAstParseResult result; @@ -45,7 +45,7 @@ namespace NSQLTranslation { } if (parsedSettings.PgParser) { - return NSQLTranslationPG::PGToYql(query, parsedSettings, stmtParseInfo); + return translators.PG->TextToAst(query, parsedSettings, warningRules, stmtParseInfo); } switch (parsedSettings.SyntaxVersion) { @@ -62,9 +62,9 @@ namespace NSQLTranslation { return result; } - return NSQLTranslationV0::SqlToYql(query, parsedSettings, warningRules); + return translators.V0->TextToAst(query, parsedSettings, warningRules, nullptr); case 1: - return NSQLTranslationV1::SqlToYql(query, parsedSettings, warningRules); + return translators.V1->TextToAst(query, parsedSettings, warningRules, nullptr); default: result.Issues.AddIssue(NYql::YqlIssue(NYql::TPosition(), NYql::TIssuesIds::DEFAULT_ERROR, TStringBuilder() << "Unknown SQL syntax version: " << parsedSettings.SyntaxVersion)); @@ -72,7 +72,12 @@ namespace NSQLTranslation { } } - google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, NYql::TIssues& issues, + NYql::TAstParseResult SqlToYql(const TString& query, const TTranslationSettings& settings, + NYql::TWarningRules* warningRules, NYql::TStmtParseInfo* stmtParseInfo, TTranslationSettings* effectiveSettings) { + return SqlToYql(MakeAllTranslators(), query, settings, warningRules, stmtParseInfo, effectiveSettings); + } + + google::protobuf::Message* SqlAST(const TTranslators& translators, const TString& query, const TString& queryName, NYql::TIssues& issues, size_t maxErrors, const TTranslationSettings& settings, ui16* actualSyntaxVersion) { TTranslationSettings parsedSettings(settings); @@ -98,9 +103,9 @@ namespace NSQLTranslation { return nullptr; } - return NSQLTranslationV0::SqlAST(query, queryName, issues, maxErrors, settings.Arena); + return translators.V0->TextToMessage(query, queryName, issues, maxErrors, settings); case 1: - return NSQLTranslationV1::SqlAST(query, queryName, issues, maxErrors, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.TestAntlr4, settings.Arena); + return translators.V1->TextToMessage(query, queryName, issues, maxErrors, parsedSettings); default: issues.AddIssue(NYql::YqlIssue(NYql::TPosition(), NYql::TIssuesIds::DEFAULT_ERROR, TStringBuilder() << "Unknown SQL syntax version: " << parsedSettings.SyntaxVersion)); @@ -108,7 +113,12 @@ namespace NSQLTranslation { } } - ILexer::TPtr SqlLexer(const TString& query, NYql::TIssues& issues, const TTranslationSettings& settings, ui16* actualSyntaxVersion) + google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, NYql::TIssues& issues, + size_t maxErrors, const TTranslationSettings& settings, ui16* actualSyntaxVersion) { + return SqlAST(MakeAllTranslators(), query, queryName, issues, maxErrors, settings, actualSyntaxVersion); + } + + ILexer::TPtr SqlLexer(const TTranslators& translators, const TString& query, NYql::TIssues& issues, const TTranslationSettings& settings, ui16* actualSyntaxVersion) { TTranslationSettings parsedSettings(settings); if (!ParseTranslationSettings(query, parsedSettings, issues)) { @@ -133,9 +143,9 @@ namespace NSQLTranslation { return {}; } - return NSQLTranslationV0::MakeLexer(); + return translators.V0->MakeLexer(parsedSettings); case 1: - return NSQLTranslationV1::MakeLexer(parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser); + return translators.V1->MakeLexer(parsedSettings); default: issues.AddIssue(NYql::YqlIssue(NYql::TPosition(), NYql::TIssuesIds::DEFAULT_ERROR, TStringBuilder() << "Unknown SQL syntax version: " << parsedSettings.SyntaxVersion)); @@ -143,7 +153,12 @@ namespace NSQLTranslation { } } - NYql::TAstParseResult SqlASTToYql(const TString& query, const google::protobuf::Message& protoAst, const TSQLHints& hints, const TTranslationSettings& settings) { + ILexer::TPtr SqlLexer(const TString& query, NYql::TIssues& issues, const TTranslationSettings& settings, ui16* actualSyntaxVersion) { + return SqlLexer(MakeAllTranslators(), query, issues, settings, actualSyntaxVersion); + } + + NYql::TAstParseResult SqlASTToYql(const TTranslators& translators, const TString& query, + const google::protobuf::Message& protoAst, const TSQLHints& hints, const TTranslationSettings& settings) { NYql::TAstParseResult result; switch (settings.SyntaxVersion) { case 0: @@ -159,9 +174,9 @@ namespace NSQLTranslation { return result; } - return NSQLTranslationV0::SqlASTToYql(protoAst, settings); + return translators.V0->TextAndMessageToAst(query, protoAst, hints, settings); case 1: - return NSQLTranslationV1::SqlASTToYql(query, protoAst, hints, settings); + return translators.V1->TextAndMessageToAst(query, protoAst, hints, settings); default: result.Issues.AddIssue(NYql::YqlIssue(NYql::TPosition(), NYql::TIssuesIds::DEFAULT_ERROR, TStringBuilder() << "Unknown SQL syntax version: " << settings.SyntaxVersion)); @@ -169,8 +184,14 @@ namespace NSQLTranslation { } } - TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& query, const TTranslationSettings& settings, - NYql::TWarningRules* warningRules, ui16* actualSyntaxVersion, TVector<NYql::TStmtParseInfo>* stmtParseInfo) + NYql::TAstParseResult SqlASTToYql(const TString& query, const google::protobuf::Message& protoAst, + const TSQLHints& hints, const TTranslationSettings& settings) { + return SqlASTToYql(MakeAllTranslators(), query, protoAst, hints, settings); + } + + TVector<NYql::TAstParseResult> SqlToAstStatements(const TTranslators& translators, const TString& query, + const TTranslationSettings& settings, NYql::TWarningRules* warningRules, ui16* actualSyntaxVersion, + TVector<NYql::TStmtParseInfo>* stmtParseInfo) { TVector<NYql::TAstParseResult> result; NYql::TIssues issues; @@ -201,16 +222,16 @@ namespace NSQLTranslation { } if (parsedSettings.PgParser) { - return NSQLTranslationPG::PGToYqlStatements(query, parsedSettings, stmtParseInfo); + return translators.PG->TextToManyAst(query, parsedSettings, warningRules, stmtParseInfo); } switch (parsedSettings.SyntaxVersion) { case 0: issues.AddIssue(NYql::YqlIssue(NYql::TPosition(), NYql::TIssuesIds::DEFAULT_ERROR, "V0 syntax is disabled")); - return {}; + return translators.V0->TextToManyAst(query, parsedSettings, warningRules, stmtParseInfo); case 1: - return NSQLTranslationV1::SqlToAstStatements(query, parsedSettings, warningRules, stmtParseInfo); + return translators.V1->TextToManyAst(query, parsedSettings, warningRules, stmtParseInfo); default: issues.AddIssue(NYql::YqlIssue(NYql::TPosition(), NYql::TIssuesIds::DEFAULT_ERROR, TStringBuilder() << "Unknown SQL syntax version: " << parsedSettings.SyntaxVersion)); @@ -218,4 +239,23 @@ namespace NSQLTranslation { } } + TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& query, const TTranslationSettings& settings, + NYql::TWarningRules* warningRules, ui16* actualSyntaxVersion, TVector<NYql::TStmtParseInfo>* stmtParseInfo) { + return SqlToAstStatements(MakeAllTranslators(), query, settings, warningRules, actualSyntaxVersion, stmtParseInfo); + } + + TTranslators MakeAllTranslators() { + return TTranslators( + NSQLTranslationV0::MakeTranslator(), + NSQLTranslationV1::MakeTranslator(), + NSQLTranslationPG::MakeTranslator() + ); + } + + TTranslators::TTranslators(TTranslatorPtr v0, TTranslatorPtr v1, TTranslatorPtr pg) + : V0(v0 ? v0 : MakeDummyTranslator("v0")) + , V1(v1 ? v1 : MakeDummyTranslator("v1")) + , PG(pg ? pg : MakeDummyTranslator("pg")) + {} + } // namespace NSQLTranslation diff --git a/yql/essentials/sql/sql.h b/yql/essentials/sql/sql.h index 891ea4f684..250cefc2b1 100644 --- a/yql/essentials/sql/sql.h +++ b/yql/essentials/sql/sql.h @@ -7,6 +7,7 @@ #include <yql/essentials/public/issue/yql_issue_manager.h> #include <yql/essentials/ast/yql_ast.h> #include <yql/essentials/sql/settings/translation_settings.h> +#include <yql/essentials/sql/settings/translator.h> #include <util/generic/hash.h> #include <util/generic/hash_set.h> @@ -15,16 +16,37 @@ namespace NSQLTranslation { + struct TTranslators { + TTranslatorPtr const V0; + TTranslatorPtr const V1; + TTranslatorPtr const PG; + + TTranslators(TTranslatorPtr v0, TTranslatorPtr v1, TTranslatorPtr pg); + }; + + //FIXME drop this function and overloads without translators + TTranslators MakeAllTranslators(); + NYql::TAstParseResult SqlToYql(const TString& query, const TTranslationSettings& settings, NYql::TWarningRules* warningRules = nullptr, NYql::TStmtParseInfo* stmtParseInfo = nullptr, TTranslationSettings* effectiveSettings = nullptr); + NYql::TAstParseResult SqlToYql(const TTranslators& translators, const TString& query, const TTranslationSettings& settings, + NYql::TWarningRules* warningRules = nullptr, NYql::TStmtParseInfo* stmtParseInfo = nullptr, + TTranslationSettings* effectiveSettings = nullptr); + google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, NYql::TIssues& issues, size_t maxErrors, const TTranslationSettings& settings = {}, ui16* actualSyntaxVersion = nullptr); + google::protobuf::Message* SqlAST(const TTranslators& translators, const TString& query, const TString& queryName, NYql::TIssues& issues, size_t maxErrors, + const TTranslationSettings& settings = {}, ui16* actualSyntaxVersion = nullptr); + ILexer::TPtr SqlLexer(const TString& query, NYql::TIssues& issues, const TTranslationSettings& settings = {}, ui16* actualSyntaxVersion = nullptr); + ILexer::TPtr SqlLexer(const TTranslators& translators, const TString& query, NYql::TIssues& issues, const TTranslationSettings& settings = {}, ui16* actualSyntaxVersion = nullptr); NYql::TAstParseResult SqlASTToYql(const TString& query, const google::protobuf::Message& protoAst, const TSQLHints& hints, const TTranslationSettings& settings); + NYql::TAstParseResult SqlASTToYql(const TTranslators& translators, const TString& query, const google::protobuf::Message& protoAst, const TSQLHints& hints, const TTranslationSettings& settings); TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& query, const TTranslationSettings& settings, NYql::TWarningRules* warningRules = nullptr, ui16* actualSyntaxVersion = nullptr, TVector<NYql::TStmtParseInfo>* stmtParseInfo = nullptr); - -} // namespace NSQLTranslationV0 + TVector<NYql::TAstParseResult> SqlToAstStatements(const TTranslators& translators, const TString& query, const TTranslationSettings& settings, + NYql::TWarningRules* warningRules = nullptr, ui16* actualSyntaxVersion = nullptr, TVector<NYql::TStmtParseInfo>* stmtParseInfo = nullptr); +} // namespace NSQLTranslation diff --git a/yql/essentials/sql/v0/sql.cpp b/yql/essentials/sql/v0/sql.cpp index caf61bdd0e..f83a016b22 100644 --- a/yql/essentials/sql/v0/sql.cpp +++ b/yql/essentials/sql/v0/sql.cpp @@ -1,5 +1,6 @@ #include "sql.h" +#include <yql/essentials/sql/v0/lexer/lexer.h> #include "context.h" #include "node.h" @@ -5315,4 +5316,43 @@ NYql::TAstParseResult SqlToYql(const TString& query, const NSQLTranslation::TTra return res; } +class TTranslator : public NSQLTranslation::ITranslator { +public: + NSQLTranslation::ILexer::TPtr MakeLexer(const NSQLTranslation::TTranslationSettings& settings) final { + Y_UNUSED(settings); + return NSQLTranslationV0::MakeLexer(); + } + + NYql::TAstParseResult TextToAst(const TString& query, const NSQLTranslation::TTranslationSettings& settings, + NYql::TWarningRules* warningRules, NYql::TStmtParseInfo* stmtParseInfo) final { + Y_UNUSED(stmtParseInfo); + return SqlToYql(query, settings, warningRules); + } + + google::protobuf::Message* TextToMessage(const TString& query, const TString& queryName, + NYql::TIssues& issues, size_t maxErrors, const NSQLTranslation::TTranslationSettings& settings) final { + return SqlAST(query, queryName, issues, maxErrors, settings.Arena); + } + + NYql::TAstParseResult TextAndMessageToAst(const TString& query, const google::protobuf::Message& protoAst, + const NSQLTranslation::TSQLHints& hints, const NSQLTranslation::TTranslationSettings& settings) final { + Y_UNUSED(query); + Y_UNUSED(hints); + return SqlASTToYql(protoAst, settings); + } + + TVector<NYql::TAstParseResult> TextToManyAst(const TString& query, const NSQLTranslation::TTranslationSettings& settings, + NYql::TWarningRules* warningRules, TVector<NYql::TStmtParseInfo>* stmtParseInfo) final { + Y_UNUSED(query); + Y_UNUSED(settings); + Y_UNUSED(warningRules); + Y_UNUSED(stmtParseInfo); + return {}; + } +}; + +NSQLTranslation::TTranslatorPtr MakeTranslator() { + return MakeIntrusive<TTranslator>(); +} + } // namespace NSQLTranslationV0 diff --git a/yql/essentials/sql/v0/sql.h b/yql/essentials/sql/v0/sql.h index 66516adce7..dffcbdd3de 100644 --- a/yql/essentials/sql/v0/sql.h +++ b/yql/essentials/sql/v0/sql.h @@ -5,6 +5,7 @@ #include <yql/essentials/public/issue/yql_warning.h> #include <yql/essentials/public/issue/yql_issue_manager.h> #include <yql/essentials/sql/settings/translation_settings.h> +#include <yql/essentials/sql/settings/translator.h> #include <google/protobuf/message.h> @@ -13,5 +14,6 @@ namespace NSQLTranslationV0 { NYql::TAstParseResult SqlToYql(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules = nullptr); google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, NYql::TIssues& err, size_t maxErrors, google::protobuf::Arena* arena = nullptr); NYql::TAstParseResult SqlASTToYql(const google::protobuf::Message& protoAst, const NSQLTranslation::TTranslationSettings& settings); + NSQLTranslation::TTranslatorPtr MakeTranslator(); } // namespace NSQLTranslationV0 diff --git a/yql/essentials/sql/v0/ya.make b/yql/essentials/sql/v0/ya.make index 92fc1a21f8..e0f6936e94 100644 --- a/yql/essentials/sql/v0/ya.make +++ b/yql/essentials/sql/v0/ya.make @@ -11,6 +11,7 @@ PEERDIR( yql/essentials/core/issue/protos yql/essentials/parser/proto_ast/collect_issues yql/essentials/parser/proto_ast/gen/v0 + yql/essentials/sql/v0/lexer ) SRCS( diff --git a/yql/essentials/sql/v1/sql.cpp b/yql/essentials/sql/v1/sql.cpp index f131653f92..2fc80a2fe8 100644 --- a/yql/essentials/sql/v1/sql.cpp +++ b/yql/essentials/sql/v1/sql.cpp @@ -275,4 +275,37 @@ bool SplitQueryToStatements(const TString& query, TVector<TString>& statements, return true; } +class TTranslator : public NSQLTranslation::ITranslator { +public: + NSQLTranslation::ILexer::TPtr MakeLexer(const NSQLTranslation::TTranslationSettings& settings) final { + return NSQLTranslationV1::MakeLexer(settings.AnsiLexer, settings.Antlr4Parser); + } + + NYql::TAstParseResult TextToAst(const TString& query, const NSQLTranslation::TTranslationSettings& settings, + NYql::TWarningRules* warningRules, NYql::TStmtParseInfo* stmtParseInfo) final { + Y_UNUSED(stmtParseInfo); + return SqlToYql(query, settings, warningRules); + } + + google::protobuf::Message* TextToMessage(const TString& query, const TString& queryName, + NYql::TIssues& issues, size_t maxErrors, const NSQLTranslation::TTranslationSettings& settings) final { + return SqlAST(query, queryName, issues, maxErrors, settings.AnsiLexer, settings.Antlr4Parser, + settings.TestAntlr4, settings.Arena); + } + + NYql::TAstParseResult TextAndMessageToAst(const TString& query, const google::protobuf::Message& protoAst, + const NSQLTranslation::TSQLHints& hints, const NSQLTranslation::TTranslationSettings& settings) final { + return SqlASTToYql(query, protoAst, hints, settings); + } + + TVector<NYql::TAstParseResult> TextToManyAst(const TString& query, const NSQLTranslation::TTranslationSettings& settings, + NYql::TWarningRules* warningRules, TVector<NYql::TStmtParseInfo>* stmtParseInfo) final { + return SqlToAstStatements(query, settings, warningRules, stmtParseInfo); + } +}; + +NSQLTranslation::TTranslatorPtr MakeTranslator() { + return MakeIntrusive<TTranslator>(); +} + } // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql.h b/yql/essentials/sql/v1/sql.h index 63a4ee153f..7f0ded1a55 100644 --- a/yql/essentials/sql/v1/sql.h +++ b/yql/essentials/sql/v1/sql.h @@ -7,6 +7,7 @@ #include <yql/essentials/public/issue/yql_warning.h> #include <yql/essentials/public/issue/yql_issue_manager.h> #include <yql/essentials/sql/settings/translation_settings.h> +#include <yql/essentials/sql/settings/translator.h> #include <google/protobuf/message.h> @@ -27,4 +28,5 @@ namespace NSQLTranslationV1 { bool SplitQueryToStatements(const TString& query, TVector<TString>& statements, NYql::TIssues& issues, const NSQLTranslation::TTranslationSettings& settings); + NSQLTranslation::TTranslatorPtr MakeTranslator(); } // namespace NSQLTranslationV1 diff --git a/yql/essentials/tests/sql/minirun/part5/canondata/result.json b/yql/essentials/tests/sql/minirun/part5/canondata/result.json index 64e4208d97..c15133e45f 100644 --- a/yql/essentials/tests/sql/minirun/part5/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part5/canondata/result.json @@ -375,9 +375,9 @@ ], "test.test[blocks-extend-default.txt-Peephole]": [ { - "checksum": "2bec63a1689cb0b100f82fec2b89cd3c", - "size": 629, - "uri": "https://{canondata_backend}/1809005/02f459fce1f16d89b3444e6e8728b9747bb52b53/resource.tar.gz#test.test_blocks-extend-default.txt-Peephole_/opt.yql" + "checksum": "9d73defb3c7ec979ad15c0d105f3211e", + "size": 664, + "uri": "https://{canondata_backend}/1775059/85273762ecc854fd58fe1daec09cca032c02ccb1/resource.tar.gz#test.test_blocks-extend-default.txt-Peephole_/opt.yql" } ], "test.test[blocks-extend-default.txt-Results]": [ diff --git a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h index 6982dbe162..d27abcb9f6 100644 --- a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h +++ b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h @@ -16,6 +16,7 @@ #include <util/string/subst.h> #include <util/charset/wide.h> #include <util/charset/utf8.h> +#include <util/generic/scope.h> #include <util/string/strip.h> #include <util/string/ascii.h> #include <util/charset/unidata.h> @@ -25,6 +26,10 @@ using namespace NUdf; using namespace NUnicode; namespace { +#define DISABLE_IMPICT_ARGUMENT_CAST \ + template <typename... Args> \ + static auto Execute(Args&&... args) = delete; + inline constexpr bool IsAscii(wchar32 c) noexcept { return ::IsAscii(c); } @@ -54,6 +59,13 @@ namespace { } static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TMaybe<TStringRef>(TStringRef())); } + { + auto executeResult = TDerived::Execute(args[0] ? TMaybe<TStringRef>(args[0].AsStringRef()) : Nothing()); + return ProcessResult(builder, std::move(executeResult), args); + } + + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) requires requires { TDerived::Execute(TStringRef(), TStringRef()); } { auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef()); @@ -61,6 +73,13 @@ namespace { } static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef(), TMaybe<ui16>()); } + { + auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1] ? TMaybe<ui16>(args[1].Get<ui16>()) : Nothing()); + return ProcessResult(builder, std::move(executeResult), args); + } + + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) requires requires { TDerived::Execute(TStringRef(), TStringRef(), TStringRef()); } { auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef(), args[2].AsStringRef()); @@ -74,11 +93,24 @@ namespace { return ProcessResult(builder, std::move(executeResult), args); } + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef(), TMaybe<ui64>(), TMaybe<ui64>()); } + { + auto executeResult = TDerived::Execute(args[0].AsStringRef(), + args[1] ? TMaybe<ui64>(args[1].Get<ui64>()) : Nothing(), + args[2] ? TMaybe<ui64>(args[2].Get<ui64>()) : Nothing()); + return ProcessResult(builder, std::move(executeResult), args); + } + private: static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TString& newString, const TUnboxedValuePod*) { return builder->NewString(newString); } + static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TStringBuf newString, const TUnboxedValuePod*) { + return builder->NewString(newString); + } + template <typename T> static TUnboxedValue ProcessResult(const IValueBuilder* builder, const std::variant<TNoChangesTag, T>& newValue, const TUnboxedValuePod* initialArg) { if (std::holds_alternative<T>(newValue)) { @@ -117,6 +149,15 @@ namespace { } template <typename TSink> + static void BlockDoExecute(const TBlockItem arg, const TSink& sink) + requires requires { TDerived::Execute(TMaybe<TStringRef>(TStringRef())); } + { + auto executeResult = TDerived::Execute(arg ? TMaybe<TStringRef>(arg.AsStringRef()) : Nothing()); + TBlockItem boxedValue = ProcessResult(executeResult, arg); + sink(boxedValue); + } + + template <typename TSink> static void BlockDoExecute(const TBlockItem arg1, const TBlockItem arg2, const TSink& sink) requires requires { TDerived::Execute(TStringRef(), TStringRef()); } { @@ -127,6 +168,15 @@ namespace { } template <typename TSink> + static void BlockDoExecute(const TBlockItem arg1, const TBlockItem arg2, const TSink& sink) + requires requires { TDerived::Execute(TStringRef(), TMaybe<ui16>()); } + { + auto executeResult = TDerived::Execute(arg1.AsStringRef(), arg2 ? TMaybe<ui16>(arg2.Get<ui16>()) : Nothing()); + TBlockItem boxedValue = ProcessResult(executeResult, arg1); + sink(boxedValue); + } + + template <typename TSink> static void BlockDoExecute(const TBlockItem args, const TSink& sink) requires(requires { TDerived::Execute(TStringRef(), TStringRef(), TStringRef()); }) { @@ -148,12 +198,28 @@ namespace { sink(boxedValue); } + template <typename TSink> + static void BlockDoExecute(const TBlockItem args, const TSink& sink) + requires(requires { TDerived::Execute(TStringRef(), TMaybe<ui64>(0ULL), TMaybe<ui64>(0ULL)); }) + { + auto executeResult = TDerived::Execute(args.GetElement(0).AsStringRef(), + (args.GetElement(1) ? TMaybe<ui64>(args.GetElement(1).Get<ui64>()) : Nothing()), + (args.GetElement(2) ? TMaybe<ui64>(args.GetElement(2).Get<ui64>()) : Nothing())); + TBlockItem boxedValue = ProcessResult(executeResult, args.GetElement(0)); + sink(boxedValue); + } + private: static TBlockItem ProcessResult(const TString& newString, const TBlockItem arg) { Y_UNUSED(arg); return TBlockItem(newString); } + static TBlockItem ProcessResult(const TStringBuf newString, const TBlockItem arg) { + Y_UNUSED(arg); + return TBlockItem(newString); + } + template <typename T> static TBlockItem ProcessResult(const TMaybe<T>& newValue, const TBlockItem arg) { if (newValue.Defined()) { @@ -188,6 +254,7 @@ namespace { const TUtf16String& input = UTF8ToWide(arg.Data(), arg.Size()); return WideToUTF8(Normalize<mode>(input)); } + DISABLE_IMPICT_ARGUMENT_CAST; }; template <bool (*Function)(wchar32)> @@ -205,6 +272,7 @@ namespace { } return true; } + DISABLE_IMPICT_ARGUMENT_CAST; }; template <bool (*Function)(TUtf16String&, size_t pos, size_t count)> @@ -217,6 +285,7 @@ namespace { return TNoChangesTag{}; } } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TLengthGetter: public TOperationMixin<TLengthGetter> { @@ -225,6 +294,7 @@ namespace { GetNumberOfUTF8Chars(inputRef.Data(), inputRef.Size(), result); return static_cast<ui64>(result); } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TReverser: public TOperationMixin<TReverser> { @@ -233,6 +303,7 @@ namespace { ReverseInPlace(wide); return WideToUTF8(wide); } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TStripper: public TOperationMixin<TStripper> { @@ -241,6 +312,7 @@ namespace { const auto& result = StripString(input, IsUnicodeSpaceAdapter(input.begin())); return WideToUTF8(result); } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TAllRemover: public TOperationMixin<TAllRemover> { @@ -260,6 +332,7 @@ namespace { } return TNoChangesTag{}; } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TFirstRemover: public TOperationMixin<TFirstRemover> { @@ -275,6 +348,7 @@ namespace { } return TNoChangesTag{}; } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TUnicodeSetMatcher: public TOperationMixin<TUnicodeSetMatcher> { @@ -298,6 +372,7 @@ namespace { } return true; } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TLevensteinDistanceFinder: public TOperationMixin<TLevensteinDistanceFinder> { @@ -308,6 +383,7 @@ namespace { const auto& rightUtf32 = UTF8ToUTF32<true>(right); return NLevenshtein::Distance(leftUtf32, rightUtf32); } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TLastRemoval: public TOperationMixin<TLastRemoval> { @@ -323,6 +399,7 @@ namespace { } return TNoChangesTag{}; } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TAllReplacer: public TOperationMixin<TAllReplacer> { @@ -333,9 +410,7 @@ namespace { return TNoChangesTag{}; } } - // Disable implict casts for arguments. - template <typename... Args> - static auto Execute(Args&&... args) = delete; + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TFirstReplacer: public TOperationMixin<TFirstReplacer> { @@ -348,9 +423,7 @@ namespace { } return TNoChangesTag{}; } - // Disable implict casts for arguments. - template <typename... Args> - static auto Execute(Args&&... args) = delete; + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TLastReplacer: public TOperationMixin<TLastReplacer> { @@ -363,9 +436,7 @@ namespace { } return TNoChangesTag{}; } - // Disable implict casts for arguments. - template <typename... Args> - static auto Execute(Args&&... args) = delete; + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TFinder: public TOperationMixin<TFinder> { @@ -389,9 +460,7 @@ namespace { } return Nothing(); } - // Disable implict casts for arguments. - template <typename... Args> - static auto Execute(Args&&... args) = delete; + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TRFinder: public TOperationMixin<TRFinder> { @@ -416,9 +485,65 @@ namespace { } return Nothing(); } - // Disable implict casts for arguments. - template <typename... Args> - static auto Execute(Args&&... args) = delete; + DISABLE_IMPICT_ARGUMENT_CAST; + }; + + template <bool strict> + struct TToUint64Converter: public TOperationMixin<TToUint64Converter<strict>> { + static TNothing Terminate(const char* message) { + if constexpr (strict) { + return Nothing(); + } else { + UdfTerminate(message); + } + }; + + static TMaybe<ui64> Execute(TStringRef inputRef, TMaybe<ui16> inputBase) { + const TString inputStr(inputRef); + const char* input = inputStr.data(); + const int base = inputBase.GetOrElse(0); + char* pos = nullptr; + auto prevErrno = errno; + errno = 0; + Y_DEFER { + errno = prevErrno; + }; + unsigned long long res = std::strtoull(input, &pos, base); + if (!res && errno == EINVAL) { + return Terminate("Incorrect base"); + } + + ui64 ret = static_cast<ui64>(res); + if (!res && pos == input) { + return Terminate("Input string is not a number"); + } else if ((res == ULLONG_MAX && errno == ERANGE) || ret != res) { + return Terminate("Converted value falls out of Uint64 range"); + } else if (*pos) { + return Terminate("Input string contains junk after the number"); + } + return ret; + } + DISABLE_IMPICT_ARGUMENT_CAST; + }; + + struct TUtf8Checker: public TOperationMixin<TUtf8Checker> { + static bool Execute(TMaybe<TStringRef> inputRef) { + if (!inputRef.Defined()) { + return false; + } + return IsUtf8(*inputRef); + } + DISABLE_IMPICT_ARGUMENT_CAST; + }; + + struct TSubstringGetter: public TOperationMixin<TSubstringGetter> { + static TStringBuf Execute(TStringRef inputRef Y_LIFETIME_BOUND, TMaybe<ui64> inputFrom, TMaybe<ui64> inputLen) { + const TStringBuf input(inputRef); + size_t from = inputFrom.GetOrElse(0); + size_t len = inputLen.GetOrElse(TStringBuf::npos); + return SubstrUTF8(input, from, len); + } + DISABLE_IMPICT_ARGUMENT_CAST; }; #define DEFINE_UTF8_OPERATION_STRICT(udfName, Executor, signature, optArgs) \ @@ -475,6 +600,8 @@ namespace { \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) + DEFINE_UTF8_OPERATION_STRICT(IsUtf, TUtf8Checker, bool(TOptional<char*>), /*optArgs=*/1); + DEFINE_UTF8_OPERATION_STRICT(Normalize, TNormalizeUTF8<NFC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); DEFINE_UTF8_OPERATION_STRICT(NormalizeNFD, TNormalizeUTF8<NFD>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); DEFINE_UTF8_OPERATION_STRICT(NormalizeNFC, TNormalizeUTF8<NFC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); @@ -498,6 +625,7 @@ namespace { DEFINE_UTF8_OPERATION_STRICT(Reverse, TReverser, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); DEFINE_UTF8_OPERATION_STRICT(Strip, TStripper, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); + DEFINE_UTF8_OPERATION_MANY_STRICT(Substring, TSubstringGetter, TUtf8(TAutoMap<TUtf8>, TOptional<ui64>, TOptional<ui64>), /*argsCount=*/3, /*optArgs=*/1); DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveAll, TAllRemover, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveFirst, TFirstRemover, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); @@ -512,69 +640,8 @@ namespace { DEFINE_UTF8_OPERATION_MANY_STRICT(Find, TFinder, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), /*argsCount=*/3, /*optionalArgs=*/1); DEFINE_UTF8_OPERATION_MANY_STRICT(RFind, TRFinder, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), /*argsCount=*/3, /*optionalArgs=*/1); - SIMPLE_UDF(TIsUtf, bool(TOptional<char*>)) { - Y_UNUSED(valueBuilder); - if (args[0]) { - return TUnboxedValuePod(IsUtf8(args[0].AsStringRef())); - } else { - return TUnboxedValuePod(false); - } - } - - SIMPLE_UDF_WITH_OPTIONAL_ARGS(TToUint64, ui64(TAutoMap<TUtf8>, TOptional<ui16>), 1) { - Y_UNUSED(valueBuilder); - const TString inputStr(args[0].AsStringRef()); - const char* input = inputStr.data(); - const int base = static_cast<int>(args[1].GetOrDefault<ui16>(0)); - char* pos = nullptr; - errno = 0; - unsigned long long res = std::strtoull(input, &pos, base); - if (!res && errno == EINVAL) { - UdfTerminate("Incorrect base"); - } - - ui64 ret = static_cast<ui64>(res); - if (!res && pos == input) { - UdfTerminate("Input string is not a number"); - } else if ((res == ULLONG_MAX && errno == ERANGE) || ret != res) { - UdfTerminate("Converted value falls out of Uint64 range"); - } else if (*pos) { - UdfTerminate("Input string contains junk after the number"); - } - return TUnboxedValuePod(ret); - } - - SIMPLE_UDF_WITH_OPTIONAL_ARGS(TTryToUint64, TOptional<ui64>(TAutoMap<TUtf8>, TOptional<ui16>), 1) { - Y_UNUSED(valueBuilder); - const TString inputStr(args[0].AsStringRef()); - const char* input = inputStr.data(); - const int base = static_cast<int>(args[1].GetOrDefault<ui16>(0)); - char* pos = nullptr; - errno = 0; - unsigned long long res = std::strtoull(input, &pos, base); - if (!res && errno == EINVAL) { - return TUnboxedValuePod(); - } - - ui64 ret = static_cast<ui64>(res); - if (!res && pos == input) { - return TUnboxedValuePod(); - } - if ((res == ULLONG_MAX && errno == ERANGE) || ret != res) { - return TUnboxedValuePod(); - } - if (*pos) { - return TUnboxedValuePod(); - } - return TUnboxedValuePod(ret); - } - - SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSubstring, TUtf8(TAutoMap<TUtf8>, TOptional<ui64>, TOptional<ui64>), 1) { - const TStringBuf input(args[0].AsStringRef()); - size_t from = args[1].GetOrDefault<ui64>(0); - size_t len = !args[2] ? TStringBuf::npos : size_t(args[2].Get<ui64>()); - return valueBuilder->NewString(SubstrUTF8(input, from, len)); - } + DEFINE_UTF8_OPERATION_BIN_NOT_STRICT(ToUint64, TToUint64Converter</*strict=*/false>, ui64(TAutoMap<TUtf8>, TOptional<ui16>), /*optionalArgs=*/1); + DEFINE_UTF8_OPERATION_BIN_STRICT(TryToUint64, TToUint64Converter</*strict=*/true>, TOptional<ui64>(TAutoMap<TUtf8>, TOptional<ui16>), /*optionalArgs=*/1); using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>; diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/result.json b/yql/essentials/udfs/common/unicode_base/test/canondata/result.json index bac6e1ebc4..15b8b4b473 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/result.json +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/result.json @@ -29,11 +29,41 @@ "uri": "file://test.test_BlockStrip_/results.txt" } ], + "test.test[BlockToUint64F0]": [ + { + "uri": "file://test.test_BlockToUint64F0_/extracted" + } + ], + "test.test[BlockToUint64F1]": [ + { + "uri": "file://test.test_BlockToUint64F1_/extracted" + } + ], + "test.test[BlockToUint64F2]": [ + { + "uri": "file://test.test_BlockToUint64F2_/extracted" + } + ], + "test.test[BlockToUint64F3]": [ + { + "uri": "file://test.test_BlockToUint64F3_/extracted" + } + ], + "test.test[BlockToUint64]": [ + { + "uri": "file://test.test_BlockToUint64_/results.txt" + } + ], "test.test[BlockTo]": [ { "uri": "file://test.test_BlockTo_/results.txt" } ], + "test.test[BlockTryToUint64]": [ + { + "uri": "file://test.test_BlockTryToUint64_/results.txt" + } + ], "test.test[BlockUnicode]": [ { "uri": "file://test.test_BlockUnicode_/results.txt" diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F0_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F0_/extracted new file mode 100644 index 0000000000..f8d4992f9e --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F0_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Error: Execution + + <tmp_path>/program.sql:<main>:10:1: Error: Execution of node: YtMap! + SELECT + ^ + <tmp_path>/program.sql:<main>:10:1: Error: Input string is not a number + SELECT + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F1_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F1_/extracted new file mode 100644 index 0000000000..bbdeae1af4 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F1_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Error: Execution + + <tmp_path>/program.sql:<main>:10:1: Error: Execution of node: YtMap! + SELECT + ^ + <tmp_path>/program.sql:<main>:10:1: Error: Input string contains junk after the number + SELECT + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F2_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F2_/extracted new file mode 100644 index 0000000000..e15ed0de7a --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F2_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Error: Execution + + <tmp_path>/program.sql:<main>:10:1: Error: Execution of node: YtMap! + SELECT + ^ + <tmp_path>/program.sql:<main>:10:1: Error: Converted value falls out of Uint64 range + SELECT + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F3_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F3_/extracted new file mode 100644 index 0000000000..aff4bb4c22 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F3_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Error: Execution + + <tmp_path>/program.sql:<main>:10:1: Error: Execution of node: YtMap! + SELECT + ^ + <tmp_path>/program.sql:<main>:10:1: Error: Incorrect base + SELECT + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64_/results.txt new file mode 100644 index 0000000000..ecd6e5bddb --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64_/results.txt @@ -0,0 +1,178 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "key"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "0x1234abcd"; + "with_format_1"; + "305441741" + ]; + [ + "0X4"; + "with_format_2"; + "4" + ]; + [ + "0644"; + "with_format_3"; + "420" + ]; + [ + "0101010"; + "binary_1"; + "33288" + ]; + [ + "101"; + "binary_2"; + "101" + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "key"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column3"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "0101010"; + "binary_1"; + "42"; + "1052688" + ]; + [ + "101"; + "binary_2"; + "5"; + "257" + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "key"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column3"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column4"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "0"; + "zero"; + "0"; + "0"; + "0" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockTryToUint64_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockTryToUint64_/results.txt new file mode 100644 index 0000000000..8e4cedcd68 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockTryToUint64_/results.txt @@ -0,0 +1,173 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "0x1234abcd"; + #; + #; + #; + #; + [ + "305441741" + ] + ]; + [ + "0X4"; + #; + #; + #; + #; + [ + "4" + ] + ]; + [ + "0644"; + [ + "644" + ]; + #; + #; + [ + "420" + ]; + [ + "1604" + ] + ]; + [ + "0101010"; + [ + "101010" + ]; + #; + [ + "1092" + ]; + [ + "33288" + ]; + [ + "1052688" + ] + ]; + [ + "101"; + [ + "101" + ]; + #; + [ + "17" + ]; + [ + "65" + ]; + [ + "257" + ] + ]; + [ + "0"; + [ + "0" + ]; + #; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ] + ]; + [ + "hell"; + #; + #; + #; + #; + # + ]; + [ + "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"; + #; + #; + #; + #; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted index 7e9db6c109..6c16e6c2e3 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted @@ -1,8 +1,8 @@ <tmp_path>/program.sql:<main>: Error: Execution - <tmp_path>/program.sql:<main>:2:1: Error: Execution of node: Result + <tmp_path>/program.sql:<main>:4:1: Error: Execution of node: YtMap! SELECT ^ - <tmp_path>/program.sql:<main>:2:1: Error: Input string is not a number + <tmp_path>/program.sql:<main>:4:1: Error: Input string is not a number SELECT ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted index dbf4721670..4288cfcc4c 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted @@ -1,8 +1,8 @@ <tmp_path>/program.sql:<main>: Error: Execution - <tmp_path>/program.sql:<main>:2:1: Error: Execution of node: Result + <tmp_path>/program.sql:<main>:4:1: Error: Execution of node: YtMap! SELECT ^ - <tmp_path>/program.sql:<main>:2:1: Error: Input string contains junk after the number + <tmp_path>/program.sql:<main>:4:1: Error: Input string contains junk after the number SELECT ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted index f6b225d561..7ebb531e66 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted @@ -1,8 +1,8 @@ <tmp_path>/program.sql:<main>: Error: Execution - <tmp_path>/program.sql:<main>:2:1: Error: Execution of node: Result + <tmp_path>/program.sql:<main>:4:1: Error: Execution of node: YtMap! SELECT ^ - <tmp_path>/program.sql:<main>:2:1: Error: Converted value falls out of Uint64 range + <tmp_path>/program.sql:<main>:4:1: Error: Converted value falls out of Uint64 range SELECT ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F3_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F3_/extracted index 5b73d97b40..6ff53caa9a 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F3_/extracted +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F3_/extracted @@ -1,8 +1,8 @@ <tmp_path>/program.sql:<main>: Error: Execution - <tmp_path>/program.sql:<main>:2:1: Error: Execution of node: Result + <tmp_path>/program.sql:<main>:4:1: Error: Execution of node: YtMap! SELECT ^ - <tmp_path>/program.sql:<main>:2:1: Error: Incorrect base + <tmp_path>/program.sql:<main>:4:1: Error: Incorrect base SELECT ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt index 399ba78143..ecd6e5bddb 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt @@ -8,17 +8,17 @@ "StructType"; [ [ - "column0"; + "value"; [ "DataType"; - "Uint64" + "Utf8" ] ]; [ - "column1"; + "key"; [ "DataType"; - "Uint64" + "Utf8" ] ]; [ @@ -27,30 +27,71 @@ "DataType"; "Uint64" ] - ]; + ] + ] + ] + ]; + "Data" = [ + [ + "0x1234abcd"; + "with_format_1"; + "305441741" + ]; + [ + "0X4"; + "with_format_2"; + "4" + ]; + [ + "0644"; + "with_format_3"; + "420" + ]; + [ + "0101010"; + "binary_1"; + "33288" + ]; + [ + "101"; + "binary_2"; + "101" + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ [ - "column3"; + "value"; [ "DataType"; - "Uint64" + "Utf8" ] ]; [ - "column4"; + "key"; [ "DataType"; - "Uint64" + "Utf8" ] ]; [ - "column5"; + "column2"; [ "DataType"; "Uint64" ] ]; [ - "column6"; + "column3"; [ "DataType"; "Uint64" @@ -61,13 +102,16 @@ ]; "Data" = [ [ - "305441741"; - "4"; - "420"; - "1052688"; + "0101010"; + "binary_1"; "42"; - "33288"; - "101" + "1052688" + ]; + [ + "101"; + "binary_2"; + "5"; + "257" ] ] } @@ -82,17 +126,17 @@ "StructType"; [ [ - "column0"; + "value"; [ "DataType"; - "Uint64" + "Utf8" ] ]; [ - "column1"; + "key"; [ "DataType"; - "Uint64" + "Utf8" ] ]; [ @@ -108,6 +152,13 @@ "DataType"; "Uint64" ] + ]; + [ + "column4"; + [ + "DataType"; + "Uint64" + ] ] ] ] @@ -115,6 +166,7 @@ "Data" = [ [ "0"; + "zero"; "0"; "0"; "0" diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt index 3b715cea08..8e4cedcd68 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt @@ -8,129 +8,10 @@ "StructType"; [ [ - "column0"; + "value"; [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ] - ] - ] - ]; - "Data" = [ - [ - # - ] - ] - } - ] - }; - { - "Write" = [ - { - "Type" = [ - "ListType"; - [ - "StructType"; - [ - [ - "column0"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ] - ] - ] - ]; - "Data" = [ - [ - # - ] - ] - } - ] - }; - { - "Write" = [ - { - "Type" = [ - "ListType"; - [ - "StructType"; - [ - [ - "column0"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ] - ] - ] - ]; - "Data" = [ - [ - # - ] - ] - } - ] - }; - { - "Write" = [ - { - "Type" = [ - "ListType"; - [ - "StructType"; - [ - [ - "column0"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ] - ] - ] - ]; - "Data" = [ - [ - # - ] - ] - } - ] - }; - { - "Write" = [ - { - "Type" = [ - "ListType"; - [ - "StructType"; - [ - [ - "column0"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] + "DataType"; + "Utf8" ] ]; [ @@ -182,104 +63,83 @@ "Uint64" ] ] - ]; - [ - "column6"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] ] ] ] ]; "Data" = [ [ + "0x1234abcd"; + #; + #; + #; + #; [ "305441741" - ]; + ] + ]; + [ + "0X4"; + #; + #; + #; + #; [ "4" + ] + ]; + [ + "0644"; + [ + "644" ]; + #; + #; [ "420" ]; [ - "1052688" + "1604" + ] + ]; + [ + "0101010"; + [ + "101010" ]; + #; [ - "42" + "1092" ]; [ - "101010" + "33288" ]; [ - "101" + "1052688" ] - ] - ] - } - ] - }; - { - "Write" = [ - { - "Type" = [ - "ListType"; + ]; [ - "StructType"; + "101"; [ - [ - "column0"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ]; - [ - "column1"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ]; - [ - "column2"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ]; - [ - "column3"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ] + "101" + ]; + #; + [ + "17" + ]; + [ + "65" + ]; + [ + "257" ] - ] - ]; - "Data" = [ + ]; [ + "0"; [ "0" ]; + #; [ "0" ]; @@ -289,6 +149,22 @@ [ "0" ] + ]; + [ + "hell"; + #; + #; + #; + #; + # + ]; + [ + "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"; + #; + #; + #; + #; + # ] ] } diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.in new file mode 100644 index 0000000000..c9a2f32dfc --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.in @@ -0,0 +1,6 @@ +{"key"="with_format_1";"value"="0x1234abcd"}; +{"key"="with_format_2";"value"="0X4"}; +{"key"="with_format_3";"value"="0644"}; +{"key"="binary_1";"value"="0101010"}; +{"key"="binary_2";"value"="101"}; +{"key"="zero";"value"="0"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.in.attr new file mode 100644 index 0000000000..d5e5b2ca48 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.sql new file mode 100644 index 0000000000..0c794abdb3 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.sql @@ -0,0 +1,32 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value AS value, + key AS key, + Unicode::ToUint64(value) +FROM Input +WHERE key = "with_format_1" + OR key = "with_format_2" + OR key = "with_format_3" + OR key = "binary_1" + OR key = "binary_2"; + +SELECT + value AS value, + key AS key, + Unicode::ToUint64(value, 2), + Unicode::ToUint64(value, 16) +FROM Input +WHERE key = "binary_1" + OR key = "binary_2"; + +SELECT + value AS value, + key AS key, + Unicode::ToUint64(value, 8), + Unicode::ToUint64(value, 10), + Unicode::ToUint64(value, 16) +FROM Input +WHERE key = "zero"; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.cfg new file mode 100644 index 0000000000..1235ff042d --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.cfg @@ -0,0 +1,2 @@ +in plato.Input BlockToUint64F0.in +xfail diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.in new file mode 100644 index 0000000000..c431fc6e9c --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.in @@ -0,0 +1,2 @@ +{"key"="not_a_number_1";"value"="hello"}; +{"key"="not_a_number_2";"value"="meow"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.in.attr new file mode 100644 index 0000000000..d5e5b2ca48 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.sql new file mode 100644 index 0000000000..cffd41a7f7 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value as value, + Unicode::ToUint64(value), +FROM Input + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.cfg new file mode 100644 index 0000000000..5403234fab --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.cfg @@ -0,0 +1,3 @@ +in plato.Input BlockToUint64F1.in +xfail + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.in new file mode 100644 index 0000000000..eb40ad3bf1 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.in @@ -0,0 +1,2 @@ +{"key"="error1";"value"="01238"}; +{"key"="error2";"value"="01239"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.in.attr new file mode 100644 index 0000000000..d5e5b2ca48 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.sql new file mode 100644 index 0000000000..cffd41a7f7 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value as value, + Unicode::ToUint64(value), +FROM Input + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.cfg new file mode 100644 index 0000000000..8ee27e4497 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.cfg @@ -0,0 +1,2 @@ +in plato.Input BlockToUint64F2.in +xfail diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.in new file mode 100644 index 0000000000..3895f45398 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.in @@ -0,0 +1,2 @@ +{"key"="very_big_1";"value"="0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"}; +{"key"="very_big_2";"value"="0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.in.attr new file mode 100644 index 0000000000..d5e5b2ca48 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.sql new file mode 100644 index 0000000000..cffd41a7f7 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value as value, + Unicode::ToUint64(value), +FROM Input + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.cfg new file mode 100644 index 0000000000..af5392127c --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.cfg @@ -0,0 +1,2 @@ +in plato.Input BlockToUint64F3.in +xfail diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.in new file mode 100644 index 0000000000..fde0f1b4a6 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.in @@ -0,0 +1,2 @@ +{"key"="0";"value"="0"}; +{"key"="1";"value"="1"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.in.attr new file mode 100644 index 0000000000..d5e5b2ca48 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.sql new file mode 100644 index 0000000000..e1a781e917 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value as value, + Unicode::ToUint64(value, 1), +FROM Input + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.in new file mode 100644 index 0000000000..c3bbe804f8 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.in @@ -0,0 +1,8 @@ +{"key"="with_format_1";"value"="0x1234abcd"}; +{"key"="with_format_2";"value"="0X4"}; +{"key"="with_format_3";"value"="0644"}; +{"key"="binary_1";"value"="0101010"}; +{"key"="binary_2";"value"="101"}; +{"key"="zero";"value"="0"}; +{"key"="invalid";"value"="hell"}; +{"key"="very_long";"value"="0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.in.attr new file mode 100644 index 0000000000..d5e5b2ca48 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.sql new file mode 100644 index 0000000000..b217324950 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value as value, + Unicode::TryToUint64(value, 10), + Unicode::TryToUint64(value, 1), + Unicode::TryToUint64(value, 4), + Unicode::TryToUint64(value, 8), + Unicode::TryToUint64(value, 16) +From Input diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.in b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.in new file mode 100644 index 0000000000..c9a2f32dfc --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.in @@ -0,0 +1,6 @@ +{"key"="with_format_1";"value"="0x1234abcd"}; +{"key"="with_format_2";"value"="0X4"}; +{"key"="with_format_3";"value"="0644"}; +{"key"="binary_1";"value"="0101010"}; +{"key"="binary_2";"value"="101"}; +{"key"="zero";"value"="0"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.in.attr new file mode 100644 index 0000000000..d5e5b2ca48 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql index 1cad57a4fd..531322f2d1 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql @@ -1,14 +1,29 @@ +/* syntax version 1 */ SELECT - Unicode::ToUint64("0x1234abcd"), - Unicode::ToUint64("0X4"), - Unicode::ToUint64("0644"), - Unicode::ToUint64("0101010", 16), - Unicode::ToUint64("0101010", 2), - Unicode::ToUint64("0101010"), - Unicode::ToUint64("101"); + value AS value, + key AS key, + Unicode::ToUint64(value) +FROM Input +WHERE key = "with_format_1" + OR key = "with_format_2" + OR key = "with_format_3" + OR key = "binary_1" + OR key = "binary_2"; SELECT - Unicode::ToUint64("0", 8), - Unicode::ToUint64("0", 10), - Unicode::ToUint64("0", 16), - Unicode::ToUint64("0"); + value AS value, + key AS key, + Unicode::ToUint64(value, 2), + Unicode::ToUint64(value, 16) +FROM Input +WHERE key = "binary_1" + OR key = "binary_2"; + +SELECT + value AS value, + key AS key, + Unicode::ToUint64(value, 8), + Unicode::ToUint64(value, 10), + Unicode::ToUint64(value, 16) +FROM Input +WHERE key = "zero"; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg index 83cfd96179..4900d74910 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg @@ -1,2 +1,2 @@ +in plato.Input ToUint64F0.in xfail - diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.in b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.in new file mode 100644 index 0000000000..c431fc6e9c --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.in @@ -0,0 +1,2 @@ +{"key"="not_a_number_1";"value"="hello"}; +{"key"="not_a_number_2";"value"="meow"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.in.attr new file mode 100644 index 0000000000..d5e5b2ca48 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql index dd1182a562..b84e287c50 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql @@ -1,3 +1,6 @@ +/* syntax version 1 */ SELECT - Unicode::ToUint64("hell"); + value as value, + Unicode::ToUint64(value), +FROM Input diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg index 83cfd96179..218c06cc4a 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg @@ -1,2 +1,3 @@ +in plato.Input ToUint64F1.in xfail diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.in b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.in new file mode 100644 index 0000000000..eb40ad3bf1 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.in @@ -0,0 +1,2 @@ +{"key"="error1";"value"="01238"}; +{"key"="error2";"value"="01239"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.in.attr new file mode 100644 index 0000000000..d5e5b2ca48 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql index f42380ee80..b84e287c50 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql @@ -1,3 +1,6 @@ +/* syntax version 1 */ SELECT - Unicode::ToUint64("01238"); + value as value, + Unicode::ToUint64(value), +FROM Input diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg index 83cfd96179..e377f6a260 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg @@ -1,2 +1,2 @@ +in plato.Input ToUint64F2.in xfail - diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.in b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.in new file mode 100644 index 0000000000..3895f45398 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.in @@ -0,0 +1,2 @@ +{"key"="very_big_1";"value"="0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"}; +{"key"="very_big_2";"value"="0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.in.attr new file mode 100644 index 0000000000..d5e5b2ca48 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql index 1a9b7e2449..b84e287c50 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql @@ -1,3 +1,6 @@ +/* syntax version 1 */ SELECT - Unicode::ToUint64("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"); + value as value, + Unicode::ToUint64(value), +FROM Input diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.cfg index 83cfd96179..83322ea216 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.cfg +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.cfg @@ -1,2 +1,2 @@ +in plato.Input ToUint64F3.in xfail - diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.in b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.in new file mode 100644 index 0000000000..fde0f1b4a6 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.in @@ -0,0 +1,2 @@ +{"key"="0";"value"="0"}; +{"key"="1";"value"="1"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.in.attr new file mode 100644 index 0000000000..d5e5b2ca48 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.sql index 527fb1da1b..cc2e70d71a 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.sql +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.sql @@ -1,3 +1,6 @@ +/* syntax version 1 */ SELECT - Unicode::ToUint64("0",1); + value as value, + Unicode::ToUint64(value, 1), +FROM Input diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.in b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.in new file mode 100644 index 0000000000..c3bbe804f8 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.in @@ -0,0 +1,8 @@ +{"key"="with_format_1";"value"="0x1234abcd"}; +{"key"="with_format_2";"value"="0X4"}; +{"key"="with_format_3";"value"="0644"}; +{"key"="binary_1";"value"="0101010"}; +{"key"="binary_2";"value"="101"}; +{"key"="zero";"value"="0"}; +{"key"="invalid";"value"="hell"}; +{"key"="very_long";"value"="0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.in.attr new file mode 100644 index 0000000000..d5e5b2ca48 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql index b51ce72e6f..aa07de57e9 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql +++ b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql @@ -1,26 +1,9 @@ -SELECT - Unicode::TryToUint64("hell", 10); - -SELECT - Unicode::TryToUint64("01238", 8); - -SELECT - Unicode::TryToUint64("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", 16); - -SELECT - Unicode::TryToUint64("0", 1); - -SELECT - Unicode::TryToUint64("0x1234abcd", 16), - Unicode::TryToUint64("0X4", 16), - Unicode::TryToUint64("0644", 8), - Unicode::TryToUint64("0101010", 16), - Unicode::TryToUint64("0101010", 2), - Unicode::TryToUint64("0101010", 10), - Unicode::TryToUint64("101", 10); - -SELECT - Unicode::TryToUint64("0", 8), - Unicode::TryToUint64("0", 10), - Unicode::TryToUint64("0", 16), - Unicode::TryToUint64("0"); +/* syntax version 1 */ +SELECT + value as value, + Unicode::TryToUint64(value, 10), + Unicode::TryToUint64(value, 1), + Unicode::TryToUint64(value, 4), + Unicode::TryToUint64(value, 8), + Unicode::TryToUint64(value, 16) +From Input diff --git a/yt/python/yt/common.py b/yt/python/yt/common.py index 08c41bc810..975c6be6b0 100644 --- a/yt/python/yt/common.py +++ b/yt/python/yt/common.py @@ -359,6 +359,10 @@ class YtError(Exception): """Member is already present in group.""" return self.contains_code(908) + def is_prerequisite_check_failed(self): + """Prerequisite check failed.""" + return self.contains_code(1000) + def is_prohibited_cross_cell_copy(self): """Cross-cell "copy"/"move" command is explicitly disabled.""" return self.contains_code(1002) diff --git a/yt/yql/providers/yt/codec/codegen/ya.make.inc b/yt/yql/providers/yt/codec/codegen/ya.make.inc index 0548d7e545..e418e84d22 100644 --- a/yt/yql/providers/yt/codec/codegen/ya.make.inc +++ b/yt/yql/providers/yt/codec/codegen/ya.make.inc @@ -15,6 +15,7 @@ PEERDIR( yql/essentials/parser/pg_wrapper/interface yql/essentials/utils yt/yql/providers/yt/codec/codegen + yt/yql/providers/yt/codec ) IF (NOT MKQL_DISABLE_CODEGEN) diff --git a/yt/yql/providers/yt/codec/codegen/yt_codec_cg.cpp b/yt/yql/providers/yt/codec/codegen/yt_codec_cg.cpp index 61c56e5531..4c7745155f 100644 --- a/yt/yql/providers/yt/codec/codegen/yt_codec_cg.cpp +++ b/yt/yql/providers/yt/codec/codegen/yt_codec_cg.cpp @@ -4,6 +4,7 @@ #include <yql/essentials/parser/pg_wrapper/interface/codec.h> #include <yql/essentials/providers/common/codec/yql_codec_buf.h> #include <yql/essentials/providers/common/codec/yql_codec_type_flags.h> +#include <yt/yql/providers/yt/codec/yt_codec.h> #ifndef MKQL_DISABLE_CODEGEN #include <yql/essentials/minikql/mkql_node.h> @@ -171,7 +172,7 @@ public: const auto valType = Type::getInt128Ty(context); const auto flagsConst = ConstantInt::get(Type::getInt64Ty(context), nativeYtTypeFlags); if (nativeYtTypeFlags) { - const auto funcAddr = ConstantInt::get(Type::getInt64Ty(context), (ui64)&NYql::NCommon::WriteContainerNativeYtValue); + const auto funcAddr = ConstantInt::get(Type::getInt64Ty(context), (ui64)&NYql::WriteContainerNativeYtValue); const auto funType = FunctionType::get(Type::getVoidTy(context), { Type::getInt64Ty(context), Type::getInt64Ty(context), PointerType::getUnqual(valType), PointerType::getUnqual(Type::getInt8Ty(context)) @@ -180,7 +181,7 @@ public: const auto funcPtr = CastInst::Create(Instruction::IntToPtr, funcAddr, PointerType::getUnqual(funType), "ptr", Block_); CallInst::Create(funType, funcPtr, { typeConst, flagsConst, elemPtr, buf }, "", Block_); } else { - const auto funcAddr = ConstantInt::get(Type::getInt64Ty(context), (ui64)&NYql::NCommon::WriteYsonContainerValue); + const auto funcAddr = ConstantInt::get(Type::getInt64Ty(context), (ui64)&NYql::WriteYsonContainerValue); const auto funType = FunctionType::get(Type::getVoidTy(context), { Type::getInt64Ty(context), Type::getInt64Ty(context), PointerType::getUnqual(valType), PointerType::getUnqual(Type::getInt8Ty(context)) @@ -792,7 +793,7 @@ private: void GenerateContainer(Value* velemPtr, Value* buf, TType* type, bool wrapOptional, ui64 nativeYtTypeFlags) { auto& context = Codegen_->GetContext(); - const auto funcAddr = ConstantInt::get(Type::getInt64Ty(context), nativeYtTypeFlags ? (ui64)&NCommon::ReadContainerNativeYtValue : (ui64)&NCommon::ReadYsonContainerValue); + const auto funcAddr = ConstantInt::get(Type::getInt64Ty(context), nativeYtTypeFlags ? (ui64)&ReadContainerNativeYtValue : (ui64)&ReadYsonContainerValue); const auto typeConst = ConstantInt::get(Type::getInt64Ty(context), (ui64)type); const auto holderFactoryConst = ConstantInt::get(Type::getInt64Ty(context), (ui64)&HolderFactory_); const auto wrapConst = ConstantInt::get(Type::getInt1Ty(context), wrapOptional); diff --git a/yt/yql/providers/yt/codec/ya.make b/yt/yql/providers/yt/codec/ya.make index 8853ac1485..ef0593121f 100644 --- a/yt/yql/providers/yt/codec/ya.make +++ b/yt/yql/providers/yt/codec/ya.make @@ -33,6 +33,7 @@ PEERDIR( yt/yql/providers/yt/common yt/yql/providers/yt/lib/mkql_helpers yt/yql/providers/yt/lib/skiff + yt/yt/library/decimal yql/essentials/providers/common/codec/yt_arrow_converter_interface ) diff --git a/yt/yql/providers/yt/codec/yt_codec.cpp b/yt/yql/providers/yt/codec/yt_codec.cpp index 86381e1920..6e64136937 100644 --- a/yt/yql/providers/yt/codec/yt_codec.cpp +++ b/yt/yql/providers/yt/codec/yt_codec.cpp @@ -10,9 +10,17 @@ #include <yql/essentials/minikql/mkql_node_builder.h> #include <yql/essentials/minikql/mkql_string_util.h> #include <yql/essentials/utils/yql_panic.h> +#include <yql/essentials/utils/swap_bytes.h> #include <yql/essentials/core/yql_type_annotation.h> +#include <yql/essentials/public/result_format/yql_codec_results.h> +#include <yql/essentials/public/decimal/yql_decimal.h> +#include <yql/essentials/public/decimal/yql_decimal_serialize.h> +#include <yql/essentials/minikql/computation/mkql_computation_node_pack.h> + +#include <yt/yt/library/decimal/decimal.h> #include <library/cpp/yson/node/node_io.h> +#include <library/cpp/yson/detail.h> #include <util/generic/hash_set.h> #include <util/generic/map.h> @@ -28,6 +36,7 @@ namespace NYql { using namespace NKikimr; using namespace NKikimr::NMiniKQL; +using namespace NYson::NDetail; ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -675,4 +684,1913 @@ TMkqlIOCache::TMkqlIOCache(const TMkqlIOSpecs& specs, const THolderFactory& hold } } +template <typename T> +T ReadYsonFloatNumberInTableFormat(char cmd, NCommon::TInputBuf& buf) { + CHECK_EXPECTED(cmd, DoubleMarker); + double dbl; + buf.ReadMany((char*)&dbl, sizeof(dbl)); + return dbl; +} + +NUdf::TUnboxedValue ReadYsonValueInTableFormat(TType* type, ui64 nativeYtTypeFlags, + const NKikimr::NMiniKQL::THolderFactory& holderFactory, char cmd, NCommon::TInputBuf& buf) { + switch (type->GetKind()) { + case TType::EKind::Variant: { + auto varType = static_cast<TVariantType*>(type); + auto underlyingType = varType->GetUnderlyingType(); + if (nativeYtTypeFlags & NTCF_COMPLEX) { + CHECK_EXPECTED(cmd, BeginListSymbol); + cmd = buf.Read(); + TType* type = nullptr; + i64 index = 0; + if (cmd == StringMarker) { + YQL_ENSURE(underlyingType->IsStruct(), "Expected struct as underlying type"); + auto structType = static_cast<TStructType*>(underlyingType); + auto nameBuffer = ReadNextString(cmd, buf); + auto foundIndex = structType->FindMemberIndex(nameBuffer); + YQL_ENSURE(foundIndex.Defined(), "Unexpected member: " << nameBuffer); + index = *foundIndex; + type = varType->GetAlternativeType(index); + } else { + YQL_ENSURE(cmd == Int64Marker || cmd == Uint64Marker); + YQL_ENSURE(underlyingType->IsTuple(), "Expected tuple as underlying type"); + if (cmd == Uint64Marker) { + index = buf.ReadVarUI64(); + } else { + index = buf.ReadVarI64(); + } + YQL_ENSURE(0 <= index && index < varType->GetAlternativesCount(), "Unexpected member index: " << index); + type = varType->GetAlternativeType(index); + } + cmd = buf.Read(); + CHECK_EXPECTED(cmd, ListItemSeparatorSymbol); + cmd = buf.Read(); + auto value = ReadYsonValueInTableFormat(type, nativeYtTypeFlags, holderFactory, cmd, buf); + cmd = buf.Read(); + if (cmd != EndListSymbol) { + CHECK_EXPECTED(cmd, ListItemSeparatorSymbol); + cmd = buf.Read(); + CHECK_EXPECTED(cmd, EndListSymbol); + } + return holderFactory.CreateVariantHolder(value.Release(), index); + } else { + if (cmd == StringMarker) { + YQL_ENSURE(underlyingType->IsStruct(), "Expected struct as underlying type"); + auto name = ReadNextString(cmd, buf); + auto index = static_cast<TStructType*>(underlyingType)->FindMemberIndex(name); + YQL_ENSURE(index, "Unexpected member: " << name); + YQL_ENSURE(static_cast<TStructType*>(underlyingType)->GetMemberType(*index)->IsVoid(), "Expected Void as underlying type"); + return holderFactory.CreateVariantHolder(NUdf::TUnboxedValuePod::Zero(), *index); + } + + CHECK_EXPECTED(cmd, BeginListSymbol); + cmd = buf.Read(); + i64 index = 0; + YQL_ENSURE(cmd == Int64Marker || cmd == Uint64Marker); + if (cmd == Uint64Marker) { + index = buf.ReadVarUI64(); + } else { + index = buf.ReadVarI64(); + } + + YQL_ENSURE(index < varType->GetAlternativesCount(), "Bad variant alternative: " << index << ", only " << + varType->GetAlternativesCount() << " are available"); + YQL_ENSURE(underlyingType->IsTuple() || underlyingType->IsStruct(), "Wrong underlying type"); + TType* itemType; + if (underlyingType->IsTuple()) { + itemType = static_cast<TTupleType*>(underlyingType)->GetElementType(index); + } + else { + itemType = static_cast<TStructType*>(underlyingType)->GetMemberType(index); + } + + EXPECTED(buf, ListItemSeparatorSymbol); + cmd = buf.Read(); + auto value = ReadYsonValueInTableFormat(itemType, nativeYtTypeFlags, holderFactory, cmd, buf); + cmd = buf.Read(); + if (cmd == ListItemSeparatorSymbol) { + cmd = buf.Read(); + } + + CHECK_EXPECTED(cmd, EndListSymbol); + return holderFactory.CreateVariantHolder(value.Release(), index); + } + } + + case TType::EKind::Data: { + auto schemeType = static_cast<TDataType*>(type)->GetSchemeType(); + switch (schemeType) { + case NUdf::TDataType<bool>::Id: + YQL_ENSURE(cmd == FalseMarker || cmd == TrueMarker, "Expected either true or false, but got: " << TString(cmd).Quote()); + return NUdf::TUnboxedValuePod(cmd == TrueMarker); + + case NUdf::TDataType<ui8>::Id: + CHECK_EXPECTED(cmd, Uint64Marker); + return NUdf::TUnboxedValuePod(ui8(buf.ReadVarUI64())); + + case NUdf::TDataType<i8>::Id: + CHECK_EXPECTED(cmd, Int64Marker); + return NUdf::TUnboxedValuePod(i8(buf.ReadVarI64())); + + case NUdf::TDataType<ui16>::Id: + CHECK_EXPECTED(cmd, Uint64Marker); + return NUdf::TUnboxedValuePod(ui16(buf.ReadVarUI64())); + + case NUdf::TDataType<i16>::Id: + CHECK_EXPECTED(cmd, Int64Marker); + return NUdf::TUnboxedValuePod(i16(buf.ReadVarI64())); + + case NUdf::TDataType<i32>::Id: + CHECK_EXPECTED(cmd, Int64Marker); + return NUdf::TUnboxedValuePod(i32(buf.ReadVarI64())); + + case NUdf::TDataType<ui32>::Id: + CHECK_EXPECTED(cmd, Uint64Marker); + return NUdf::TUnboxedValuePod(ui32(buf.ReadVarUI64())); + + case NUdf::TDataType<i64>::Id: + CHECK_EXPECTED(cmd, Int64Marker); + return NUdf::TUnboxedValuePod(buf.ReadVarI64()); + + case NUdf::TDataType<ui64>::Id: + CHECK_EXPECTED(cmd, Uint64Marker); + return NUdf::TUnboxedValuePod(buf.ReadVarUI64()); + + case NUdf::TDataType<float>::Id: + return NUdf::TUnboxedValuePod(ReadYsonFloatNumberInTableFormat<float>(cmd, buf)); + + case NUdf::TDataType<double>::Id: + return NUdf::TUnboxedValuePod(ReadYsonFloatNumberInTableFormat<double>(cmd, buf)); + + case NUdf::TDataType<NUdf::TUtf8>::Id: + case NUdf::TDataType<char*>::Id: + case NUdf::TDataType<NUdf::TJson>::Id: + case NUdf::TDataType<NUdf::TDyNumber>::Id: + case NUdf::TDataType<NUdf::TUuid>::Id: { + auto nextString = ReadNextString(cmd, buf); + return NUdf::TUnboxedValue(MakeString(NUdf::TStringRef(nextString))); + } + + case NUdf::TDataType<NUdf::TDecimal>::Id: { + auto nextString = ReadNextString(cmd, buf); + if (nativeYtTypeFlags & NTCF_DECIMAL) { + auto const params = static_cast<TDataDecimalType*>(type)->GetParams(); + if (params.first < 10) { + // The YQL format differs from the YT format in the inf/nan values. NDecimal::FromYtDecimal converts nan/inf + NDecimal::TInt128 res = NDecimal::FromYtDecimal(NYT::NDecimal::TDecimal::ParseBinary32(params.first, nextString)); + YQL_ENSURE(!NDecimal::IsError(res)); + return NUdf::TUnboxedValuePod(res); + } else if (params.first < 19) { + NDecimal::TInt128 res = NDecimal::FromYtDecimal(NYT::NDecimal::TDecimal::ParseBinary64(params.first, nextString)); + YQL_ENSURE(!NDecimal::IsError(res)); + return NUdf::TUnboxedValuePod(res); + } else { + YQL_ENSURE(params.first < 36); + NYT::NDecimal::TDecimal::TValue128 tmpRes = NYT::NDecimal::TDecimal::ParseBinary128(params.first, nextString); + NDecimal::TInt128 res; + static_assert(sizeof(NDecimal::TInt128) == sizeof(NYT::NDecimal::TDecimal::TValue128)); + memcpy(&res, &tmpRes, sizeof(NDecimal::TInt128)); + res = NDecimal::FromYtDecimal(res); + YQL_ENSURE(!NDecimal::IsError(res)); + return NUdf::TUnboxedValuePod(res); + } + } + else { + const auto& des = NDecimal::Deserialize(nextString.data(), nextString.size()); + YQL_ENSURE(!NDecimal::IsError(des.first)); + YQL_ENSURE(nextString.size() == des.second); + return NUdf::TUnboxedValuePod(des.first); + } + } + + case NUdf::TDataType<NUdf::TYson>::Id: { + auto& yson = buf.YsonBuffer(); + yson.clear(); + CopyYsonWithAttrs(cmd, buf, yson); + + return NUdf::TUnboxedValue(MakeString(NUdf::TStringRef(yson))); + } + + case NUdf::TDataType<NUdf::TDate>::Id: + CHECK_EXPECTED(cmd, Uint64Marker); + return NUdf::TUnboxedValuePod((ui16)buf.ReadVarUI64()); + + case NUdf::TDataType<NUdf::TDatetime>::Id: + CHECK_EXPECTED(cmd, Uint64Marker); + return NUdf::TUnboxedValuePod((ui32)buf.ReadVarUI64()); + + case NUdf::TDataType<NUdf::TTimestamp>::Id: + CHECK_EXPECTED(cmd, Uint64Marker); + return NUdf::TUnboxedValuePod(buf.ReadVarUI64()); + + case NUdf::TDataType<NUdf::TInterval>::Id: + CHECK_EXPECTED(cmd, Int64Marker); + return NUdf::TUnboxedValuePod(buf.ReadVarI64()); + + case NUdf::TDataType<NUdf::TTzDate>::Id: { + auto nextString = ReadNextString(cmd, buf); + NUdf::TUnboxedValuePod data; + ui16 value; + ui16 tzId = 0; + YQL_ENSURE(DeserializeTzDate(nextString, value, tzId)); + data = NUdf::TUnboxedValuePod(value); + data.SetTimezoneId(tzId); + return data; + } + + case NUdf::TDataType<NUdf::TTzDatetime>::Id: { + auto nextString = ReadNextString(cmd, buf); + NUdf::TUnboxedValuePod data; + ui32 value; + ui16 tzId = 0; + YQL_ENSURE(DeserializeTzDatetime(nextString, value, tzId)); + data = NUdf::TUnboxedValuePod(value); + data.SetTimezoneId(tzId); + return data; + } + + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: { + auto nextString = ReadNextString(cmd, buf); + NUdf::TUnboxedValuePod data; + ui64 value; + ui16 tzId = 0; + YQL_ENSURE(DeserializeTzTimestamp(nextString, value, tzId)); + data = NUdf::TUnboxedValuePod(value); + data.SetTimezoneId(tzId); + return data; + } + + case NUdf::TDataType<NUdf::TDate32>::Id: + CHECK_EXPECTED(cmd, Int64Marker); + return NUdf::TUnboxedValuePod((i32)buf.ReadVarI64()); + + case NUdf::TDataType<NUdf::TDatetime64>::Id: + case NUdf::TDataType<NUdf::TTimestamp64>::Id: + case NUdf::TDataType<NUdf::TInterval64>::Id: + CHECK_EXPECTED(cmd, Int64Marker); + return NUdf::TUnboxedValuePod(buf.ReadVarI64()); + + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + return ValueFromString(EDataSlot::JsonDocument, ReadNextString(cmd, buf)); + } + + case NUdf::TDataType<NUdf::TTzDate32>::Id: { + auto nextString = ReadNextString(cmd, buf); + NUdf::TUnboxedValuePod data; + i32 value; + ui16 tzId = 0; + YQL_ENSURE(DeserializeTzDate32(nextString, value, tzId)); + data = NUdf::TUnboxedValuePod(value); + data.SetTimezoneId(tzId); + return data; + } + + case NUdf::TDataType<NUdf::TTzDatetime64>::Id: { + auto nextString = ReadNextString(cmd, buf); + NUdf::TUnboxedValuePod data; + i64 value; + ui16 tzId = 0; + YQL_ENSURE(DeserializeTzDatetime64(nextString, value, tzId)); + data = NUdf::TUnboxedValuePod(value); + data.SetTimezoneId(tzId); + return data; + } + + case NUdf::TDataType<NUdf::TTzTimestamp64>::Id: { + auto nextString = ReadNextString(cmd, buf); + NUdf::TUnboxedValuePod data; + i64 value; + ui16 tzId = 0; + YQL_ENSURE(DeserializeTzTimestamp64(nextString, value, tzId)); + data = NUdf::TUnboxedValuePod(value); + data.SetTimezoneId(tzId); + return data; + } + + default: + YQL_ENSURE(false, "Unsupported data type: " << schemeType); + } + } + + case TType::EKind::Struct: { + YQL_ENSURE(cmd == BeginListSymbol || cmd == BeginMapSymbol); + auto structType = static_cast<TStructType*>(type); + NUdf::TUnboxedValue* items; + NUdf::TUnboxedValue ret = holderFactory.CreateDirectArrayHolder(structType->GetMembersCount(), items); + if (cmd == BeginListSymbol) { + cmd = buf.Read(); + + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + items[i] = ReadYsonValueInTableFormat(structType->GetMemberType(i), nativeYtTypeFlags, holderFactory, cmd, buf); + + cmd = buf.Read(); + if (cmd == ListItemSeparatorSymbol) { + cmd = buf.Read(); + } + } + + CHECK_EXPECTED(cmd, EndListSymbol); + return ret; + } else { + cmd = buf.Read(); + + for (;;) { + if (cmd == EndMapSymbol) { + break; + } + + auto keyBuffer = ReadNextString(cmd, buf); + auto pos = structType->FindMemberIndex(keyBuffer); + EXPECTED(buf, KeyValueSeparatorSymbol); + cmd = buf.Read(); + if (pos && cmd != '#') { + auto memberType = structType->GetMemberType(*pos); + auto unwrappedType = memberType; + if (!(nativeYtTypeFlags & ENativeTypeCompatFlags::NTCF_COMPLEX) && unwrappedType->IsOptional()) { + unwrappedType = static_cast<TOptionalType*>(unwrappedType)->GetItemType(); + } + + items[*pos] = ReadYsonValueInTableFormat(unwrappedType, nativeYtTypeFlags, holderFactory, cmd, buf); + } else { + SkipYson(cmd, buf); + } + + cmd = buf.Read(); + if (cmd == KeyedItemSeparatorSymbol) { + cmd = buf.Read(); + } + } + + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + if (items[i]) { + continue; + } + + YQL_ENSURE(structType->GetMemberType(i)->IsOptional(), "Missing required field: " << structType->GetMemberName(i)); + } + + return ret; + } + } + + case TType::EKind::List: { + auto itemType = static_cast<TListType*>(type)->GetItemType(); + TDefaultListRepresentation items; + CHECK_EXPECTED(cmd, BeginListSymbol); + cmd = buf.Read(); + + for (;;) { + if (cmd == EndListSymbol) { + break; + } + + items = items.Append(ReadYsonValueInTableFormat(itemType, nativeYtTypeFlags, holderFactory, cmd, buf)); + + cmd = buf.Read(); + if (cmd == ListItemSeparatorSymbol) { + cmd = buf.Read(); + } + } + + return holderFactory.CreateDirectListHolder(std::move(items)); + } + + case TType::EKind::Optional: { + if (cmd == EntitySymbol) { + return NUdf::TUnboxedValuePod(); + } + auto itemType = static_cast<TOptionalType*>(type)->GetItemType(); + if (nativeYtTypeFlags & ENativeTypeCompatFlags::NTCF_COMPLEX) { + if (itemType->GetKind() == TType::EKind::Optional || itemType->GetKind() == TType::EKind::Pg) { + CHECK_EXPECTED(cmd, BeginListSymbol); + cmd = buf.Read(); + auto value = ReadYsonValueInTableFormat(itemType, nativeYtTypeFlags, holderFactory, cmd, buf); + cmd = buf.Read(); + if (cmd == ListItemSeparatorSymbol) { + cmd = buf.Read(); + } + CHECK_EXPECTED(cmd, EndListSymbol); + return value.Release().MakeOptional(); + } else { + return ReadYsonValueInTableFormat(itemType, nativeYtTypeFlags, holderFactory, cmd, buf).Release().MakeOptional(); + } + } else { + if (cmd != BeginListSymbol) { + auto value = ReadYsonValueInTableFormat(itemType, nativeYtTypeFlags, holderFactory, cmd, buf); + return value.Release().MakeOptional(); + } + + cmd = buf.Read(); + if (cmd == EndListSymbol) { + return NUdf::TUnboxedValuePod(); + } + + auto value = ReadYsonValueInTableFormat(itemType, nativeYtTypeFlags, holderFactory, cmd, buf); + cmd = buf.Read(); + if (cmd == ListItemSeparatorSymbol) { + cmd = buf.Read(); + } + + CHECK_EXPECTED(cmd, EndListSymbol); + return value.Release().MakeOptional(); + } + } + + case TType::EKind::Dict: { + auto dictType = static_cast<TDictType*>(type); + auto keyType = dictType->GetKeyType(); + auto payloadType = dictType->GetPayloadType(); + TKeyTypes types; + bool isTuple; + bool encoded; + bool useIHash; + GetDictionaryKeyTypes(keyType, types, isTuple, encoded, useIHash); + + TMaybe<TValuePacker> packer; + if (encoded) { + packer.ConstructInPlace(true, keyType); + } + + YQL_ENSURE(cmd == BeginListSymbol || cmd == BeginMapSymbol, "Expected '{' or '[', but read: " << TString(cmd).Quote()); + if (cmd == BeginMapSymbol) { + bool unusedIsOptional; + auto unpackedType = UnpackOptional(keyType, unusedIsOptional); + YQL_ENSURE(unpackedType->IsData() && + (static_cast<TDataType*>(unpackedType)->GetSchemeType() == NUdf::TDataType<char*>::Id || + static_cast<TDataType*>(unpackedType)->GetSchemeType() == NUdf::TDataType<NUdf::TUtf8>::Id), + "Expected String or Utf8 type as dictionary key type"); + + auto filler = [&](TValuesDictHashMap& map) { + cmd = buf.Read(); + + for (;;) { + if (cmd == EndMapSymbol) { + break; + } + + auto keyBuffer = ReadNextString(cmd, buf); + auto keyStr = NUdf::TUnboxedValue(MakeString(keyBuffer)); + EXPECTED(buf, KeyValueSeparatorSymbol); + cmd = buf.Read(); + auto payload = ReadYsonValueInTableFormat(payloadType, nativeYtTypeFlags, holderFactory, cmd, buf); + map.emplace(std::move(keyStr), std::move(payload)); + + cmd = buf.Read(); + if (cmd == KeyedItemSeparatorSymbol) { + cmd = buf.Read(); + } + } + }; + + const NUdf::IHash* hash = holderFactory.GetHash(*keyType, useIHash); + const NUdf::IEquate* equate = holderFactory.GetEquate(*keyType, useIHash); + return holderFactory.CreateDirectHashedDictHolder(filler, types, isTuple, true, nullptr, hash, equate); + } + else { + auto filler = [&](TValuesDictHashMap& map) { + cmd = buf.Read(); + + for (;;) { + if (cmd == EndListSymbol) { + break; + } + + CHECK_EXPECTED(cmd, BeginListSymbol); + cmd = buf.Read(); + auto key = ReadYsonValueInTableFormat(keyType, nativeYtTypeFlags, holderFactory, cmd, buf); + EXPECTED(buf, ListItemSeparatorSymbol); + cmd = buf.Read(); + auto payload = ReadYsonValueInTableFormat(payloadType, nativeYtTypeFlags, holderFactory, cmd, buf); + cmd = buf.Read(); + if (cmd == ListItemSeparatorSymbol) { + cmd = buf.Read(); + } + + CHECK_EXPECTED(cmd, EndListSymbol); + if (packer) { + key = MakeString(packer->Pack(key)); + } + + map.emplace(std::move(key), std::move(payload)); + + cmd = buf.Read(); + if (cmd == ListItemSeparatorSymbol) { + cmd = buf.Read(); + } + } + }; + + const NUdf::IHash* hash = holderFactory.GetHash(*keyType, useIHash); + const NUdf::IEquate* equate = holderFactory.GetEquate(*keyType, useIHash); + return holderFactory.CreateDirectHashedDictHolder(filler, types, isTuple, true, encoded ? keyType : nullptr, + hash, equate); + } + } + + case TType::EKind::Tuple: { + auto tupleType = static_cast<TTupleType*>(type); + NUdf::TUnboxedValue* items; + NUdf::TUnboxedValue ret = holderFactory.CreateDirectArrayHolder(tupleType->GetElementsCount(), items); + CHECK_EXPECTED(cmd, BeginListSymbol); + cmd = buf.Read(); + + for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { + items[i] = ReadYsonValueInTableFormat(tupleType->GetElementType(i), nativeYtTypeFlags, holderFactory, cmd, buf); + + cmd = buf.Read(); + if (cmd == ListItemSeparatorSymbol) { + cmd = buf.Read(); + } + } + + + CHECK_EXPECTED(cmd, EndListSymbol); + return ret; + } + + case TType::EKind::Void: { + if (cmd == EntitySymbol) { + return NUdf::TUnboxedValuePod::Void(); + } + + auto nextString = ReadNextString(cmd, buf); + YQL_ENSURE(nextString == NResult::TYsonResultWriter::VoidString, "Expected Void"); + return NUdf::TUnboxedValuePod::Void(); + } + + case TType::EKind::Null: { + CHECK_EXPECTED(cmd, EntitySymbol); + return NUdf::TUnboxedValuePod(); + } + + case TType::EKind::EmptyList: { + CHECK_EXPECTED(cmd, BeginListSymbol); + cmd = buf.Read(); + CHECK_EXPECTED(cmd, EndListSymbol); + return holderFactory.GetEmptyContainerLazy(); + } + + case TType::EKind::EmptyDict: { + YQL_ENSURE(cmd == BeginListSymbol || cmd == BeginMapSymbol, "Expected '{' or '[', but read: " << TString(cmd).Quote()); + if (cmd == BeginListSymbol) { + cmd = buf.Read(); + CHECK_EXPECTED(cmd, EndListSymbol); + } else { + cmd = buf.Read(); + CHECK_EXPECTED(cmd, EndMapSymbol); + } + + return holderFactory.GetEmptyContainerLazy(); + } + + case TType::EKind::Pg: { + auto pgType = static_cast<TPgType*>(type); + return ReadYsonValueInTableFormatPg(pgType, cmd, buf); + } + + case TType::EKind::Tagged: { + auto taggedType = static_cast<TTaggedType*>(type); + return ReadYsonValueInTableFormat(taggedType->GetBaseType(), nativeYtTypeFlags, holderFactory, cmd, buf); + } + + default: + YQL_ENSURE(false, "Unsupported type: " << type->GetKindAsStr()); + } +} + +TMaybe<NUdf::TUnboxedValue> ParseYsonValueInTableFormat(const THolderFactory& holderFactory, + const TStringBuf& yson, TType* type, ui64 nativeYtTypeFlags, IOutputStream* err) { + try { + class TReader : public NCommon::IBlockReader { + public: + TReader(const TStringBuf& yson) + : Yson_(yson) + {} + + void SetDeadline(TInstant deadline) override { + Y_UNUSED(deadline); + } + + std::pair<const char*, const char*> NextFilledBlock() override { + if (FirstBuffer_) { + FirstBuffer_ = false; + return{ Yson_.begin(), Yson_.end() }; + } + else { + return{ nullptr, nullptr }; + } + } + + void ReturnBlock() override { + } + + bool Retry(const TMaybe<ui32>& rangeIndex, const TMaybe<ui64>& rowIndex, const std::exception_ptr& error) override { + Y_UNUSED(rangeIndex); + Y_UNUSED(rowIndex); + Y_UNUSED(error); + return false; + } + + private: + TStringBuf Yson_; + bool FirstBuffer_ = true; + }; + + TReader reader(yson); + NCommon::TInputBuf buf(reader, nullptr); + char cmd = buf.Read(); + return ReadYsonValueInTableFormat(type, nativeYtTypeFlags, holderFactory, cmd, buf); + } + catch (const yexception& e) { + if (err) { + *err << "YSON parsing failed: " << e.what(); + } + return Nothing(); + } +} + +TMaybe<NUdf::TUnboxedValue> ParseYsonNode(const THolderFactory& holderFactory, + const NYT::TNode& node, TType* type, ui64 nativeYtTypeFlags, IOutputStream* err) { + return ParseYsonValueInTableFormat(holderFactory, NYT::NodeToYsonString(node, NYson::EYsonFormat::Binary), type, nativeYtTypeFlags, err); +} + +extern "C" void ReadYsonContainerValue(TType* type, ui64 nativeYtTypeFlags, const NKikimr::NMiniKQL::THolderFactory& holderFactory, + NUdf::TUnboxedValue& value, NCommon::TInputBuf& buf, bool wrapOptional) { + // yson content + ui32 size; + buf.ReadMany((char*)&size, sizeof(size)); + CHECK_STRING_LENGTH_UNSIGNED(size); + // parse binary yson... + YQL_ENSURE(size > 0); + char cmd = buf.Read(); + auto tmp = ReadYsonValueInTableFormat(type, nativeYtTypeFlags, holderFactory, cmd, buf); + if (!wrapOptional) { + value = std::move(tmp); + } + else { + value = tmp.Release().MakeOptional(); + } +} + +extern "C" void ReadContainerNativeYtValue(TType* type, ui64 nativeYtTypeFlags, const NKikimr::NMiniKQL::THolderFactory& holderFactory, + NUdf::TUnboxedValue& value, NCommon::TInputBuf& buf, bool wrapOptional) { + auto tmp = ReadSkiffNativeYtValue(type, nativeYtTypeFlags, holderFactory, buf); + if (!wrapOptional) { + value = std::move(tmp); + } else { + value = tmp.Release().MakeOptional(); + } +} + +void WriteYsonValueInTableFormat(NCommon::TOutputBuf& buf, TType* type, ui64 nativeYtTypeFlags, const NUdf::TUnboxedValuePod& value, bool topLevel) { + // Table format, very compact + switch (type->GetKind()) { + case TType::EKind::Variant: { + buf.Write(BeginListSymbol); + auto varType = static_cast<TVariantType*>(type); + auto underlyingType = varType->GetUnderlyingType(); + auto index = value.GetVariantIndex(); + YQL_ENSURE(index < varType->GetAlternativesCount(), "Bad variant alternative: " << index << ", only " << varType->GetAlternativesCount() << " are available"); + YQL_ENSURE(underlyingType->IsTuple() || underlyingType->IsStruct(), "Wrong underlying type"); + TType* itemType; + if (underlyingType->IsTuple()) { + itemType = static_cast<TTupleType*>(underlyingType)->GetElementType(index); + } + else { + itemType = static_cast<TStructType*>(underlyingType)->GetMemberType(index); + } + if (!(nativeYtTypeFlags & NTCF_COMPLEX) || underlyingType->IsTuple()) { + buf.Write(Uint64Marker); + buf.WriteVarUI64(index); + } else { + auto structType = static_cast<TStructType*>(underlyingType); + auto varName = structType->GetMemberName(index); + buf.Write(StringMarker); + buf.WriteVarI32(varName.size()); + buf.WriteMany(varName); + } + buf.Write(ListItemSeparatorSymbol); + WriteYsonValueInTableFormat(buf, itemType, nativeYtTypeFlags, value.GetVariantItem(), false); + buf.Write(ListItemSeparatorSymbol); + buf.Write(EndListSymbol); + break; + } + + case TType::EKind::Data: { + auto schemeType = static_cast<TDataType*>(type)->GetSchemeType(); + switch (schemeType) { + case NUdf::TDataType<bool>::Id: { + buf.Write(value.Get<bool>() ? TrueMarker : FalseMarker); + break; + } + + case NUdf::TDataType<ui8>::Id: + buf.Write(Uint64Marker); + buf.WriteVarUI64(value.Get<ui8>()); + break; + + case NUdf::TDataType<i8>::Id: + buf.Write(Int64Marker); + buf.WriteVarI64(value.Get<i8>()); + break; + + case NUdf::TDataType<ui16>::Id: + buf.Write(Uint64Marker); + buf.WriteVarUI64(value.Get<ui16>()); + break; + + case NUdf::TDataType<i16>::Id: + buf.Write(Int64Marker); + buf.WriteVarI64(value.Get<i16>()); + break; + + case NUdf::TDataType<i32>::Id: + buf.Write(Int64Marker); + buf.WriteVarI64(value.Get<i32>()); + break; + + case NUdf::TDataType<ui32>::Id: + buf.Write(Uint64Marker); + buf.WriteVarUI64(value.Get<ui32>()); + break; + + case NUdf::TDataType<i64>::Id: + buf.Write(Int64Marker); + buf.WriteVarI64(value.Get<i64>()); + break; + + case NUdf::TDataType<ui64>::Id: + buf.Write(Uint64Marker); + buf.WriteVarUI64(value.Get<ui64>()); + break; + + case NUdf::TDataType<float>::Id: { + buf.Write(DoubleMarker); + double val = value.Get<float>(); + buf.WriteMany((const char*)&val, sizeof(val)); + break; + } + + case NUdf::TDataType<double>::Id: { + buf.Write(DoubleMarker); + double val = value.Get<double>(); + buf.WriteMany((const char*)&val, sizeof(val)); + break; + } + + case NUdf::TDataType<NUdf::TUtf8>::Id: + case NUdf::TDataType<char*>::Id: + case NUdf::TDataType<NUdf::TJson>::Id: + case NUdf::TDataType<NUdf::TDyNumber>::Id: + case NUdf::TDataType<NUdf::TUuid>::Id: { + buf.Write(StringMarker); + auto str = value.AsStringRef(); + buf.WriteVarI32(str.Size()); + buf.WriteMany(str); + break; + } + + case NUdf::TDataType<NUdf::TDecimal>::Id: { + buf.Write(StringMarker); + if (nativeYtTypeFlags & NTCF_DECIMAL){ + auto const params = static_cast<TDataDecimalType*>(type)->GetParams(); + const NDecimal::TInt128 data128 = value.GetInt128(); + char tmpBuf[NYT::NDecimal::TDecimal::MaxBinarySize]; + if (params.first < 10) { + // The YQL format differs from the YT format in the inf/nan values. NDecimal::FromYtDecimal converts nan/inf + TStringBuf resBuf = NYT::NDecimal::TDecimal::WriteBinary32(params.first, NDecimal::ToYtDecimal<i32>(data128), tmpBuf, NYT::NDecimal::TDecimal::MaxBinarySize); + buf.WriteVarI32(resBuf.size()); + buf.WriteMany(resBuf.data(), resBuf.size()); + } else if (params.first < 19) { + TStringBuf resBuf = NYT::NDecimal::TDecimal::WriteBinary64(params.first, NDecimal::ToYtDecimal<i64>(data128), tmpBuf, NYT::NDecimal::TDecimal::MaxBinarySize); + buf.WriteVarI32(resBuf.size()); + buf.WriteMany(resBuf.data(), resBuf.size()); + } else { + YQL_ENSURE(params.first < 36); + NYT::NDecimal::TDecimal::TValue128 val; + auto data128Converted = NDecimal::ToYtDecimal<NDecimal::TInt128>(data128); + memcpy(&val, &data128Converted, sizeof(val)); + auto resBuf = NYT::NDecimal::TDecimal::WriteBinary128(params.first, val, tmpBuf, NYT::NDecimal::TDecimal::MaxBinarySize); + buf.WriteVarI32(resBuf.size()); + buf.WriteMany(resBuf.data(), resBuf.size()); + } + } else { + char data[sizeof(NDecimal::TInt128)]; + const ui32 size = NDecimal::Serialize(value.GetInt128(), data); + buf.WriteVarI32(size); + buf.WriteMany(data, size); + } + break; + } + + case NUdf::TDataType<NUdf::TYson>::Id: { + // embed content + buf.WriteMany(value.AsStringRef()); + break; + } + + case NUdf::TDataType<NUdf::TDate>::Id: + buf.Write(Uint64Marker); + buf.WriteVarUI64(value.Get<ui16>()); + break; + + case NUdf::TDataType<NUdf::TDatetime>::Id: + buf.Write(Uint64Marker); + buf.WriteVarUI64(value.Get<ui32>()); + break; + + case NUdf::TDataType<NUdf::TTimestamp>::Id: + buf.Write(Uint64Marker); + buf.WriteVarUI64(value.Get<ui64>()); + break; + + case NUdf::TDataType<NUdf::TInterval>::Id: + case NUdf::TDataType<NUdf::TInterval64>::Id: + case NUdf::TDataType<NUdf::TDatetime64>::Id: + case NUdf::TDataType<NUdf::TTimestamp64>::Id: + buf.Write(Int64Marker); + buf.WriteVarI64(value.Get<i64>()); + break; + + case NUdf::TDataType<NUdf::TDate32>::Id: + buf.Write(Int64Marker); + buf.WriteVarI64(value.Get<i32>()); + break; + + case NUdf::TDataType<NUdf::TTzDate>::Id: { + ui16 tzId = SwapBytes(value.GetTimezoneId()); + ui16 data = SwapBytes(value.Get<ui16>()); + ui32 size = sizeof(data) + sizeof(tzId); + buf.Write(StringMarker); + buf.WriteVarI32(size); + buf.WriteMany((const char*)&data, sizeof(data)); + buf.WriteMany((const char*)&tzId, sizeof(tzId)); + break; + } + + case NUdf::TDataType<NUdf::TTzDatetime>::Id: { + ui16 tzId = SwapBytes(value.GetTimezoneId()); + ui32 data = SwapBytes(value.Get<ui32>()); + ui32 size = sizeof(data) + sizeof(tzId); + buf.Write(StringMarker); + buf.WriteVarI32(size); + buf.WriteMany((const char*)&data, sizeof(data)); + buf.WriteMany((const char*)&tzId, sizeof(tzId)); + break; + } + + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: { + ui16 tzId = SwapBytes(value.GetTimezoneId()); + ui64 data = SwapBytes(value.Get<ui64>()); + ui32 size = sizeof(data) + sizeof(tzId); + buf.Write(StringMarker); + buf.WriteVarI32(size); + buf.WriteMany((const char*)&data, sizeof(data)); + buf.WriteMany((const char*)&tzId, sizeof(tzId)); + break; + } + + case NUdf::TDataType<NUdf::TTzDate32>::Id: { + ui16 tzId = SwapBytes(value.GetTimezoneId()); + ui32 data = 0x80 ^ SwapBytes((ui32)value.Get<i32>()); + ui32 size = sizeof(data) + sizeof(tzId); + buf.Write(StringMarker); + buf.WriteVarI32(size); + buf.WriteMany((const char*)&data, sizeof(data)); + buf.WriteMany((const char*)&tzId, sizeof(tzId)); + break; + } + + case NUdf::TDataType<NUdf::TTzDatetime64>::Id: { + ui16 tzId = SwapBytes(value.GetTimezoneId()); + ui64 data = 0x80 ^ SwapBytes((ui64)value.Get<i64>()); + ui32 size = sizeof(data) + sizeof(tzId); + buf.Write(StringMarker); + buf.WriteVarI32(size); + buf.WriteMany((const char*)&data, sizeof(data)); + buf.WriteMany((const char*)&tzId, sizeof(tzId)); + break; + } + + case NUdf::TDataType<NUdf::TTzTimestamp64>::Id: { + ui16 tzId = SwapBytes(value.GetTimezoneId()); + ui64 data = 0x80 ^ SwapBytes((ui64)value.Get<i64>()); + ui32 size = sizeof(data) + sizeof(tzId); + buf.Write(StringMarker); + buf.WriteVarI32(size); + buf.WriteMany((const char*)&data, sizeof(data)); + buf.WriteMany((const char*)&tzId, sizeof(tzId)); + break; + } + + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + buf.Write(StringMarker); + NUdf::TUnboxedValue json = ValueToString(EDataSlot::JsonDocument, value); + auto str = json.AsStringRef(); + buf.WriteVarI32(str.Size()); + buf.WriteMany(str); + break; + } + + default: + YQL_ENSURE(false, "Unsupported data type: " << schemeType); + } + + break; + } + + case TType::EKind::Struct: { + auto structType = static_cast<TStructType*>(type); + if (nativeYtTypeFlags & ENativeTypeCompatFlags::NTCF_COMPLEX) { + buf.Write(BeginMapSymbol); + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + buf.Write(StringMarker); + auto key = structType->GetMemberName(i); + buf.WriteVarI32(key.size()); + buf.WriteMany(key); + buf.Write(KeyValueSeparatorSymbol); + WriteYsonValueInTableFormat(buf, structType->GetMemberType(i), nativeYtTypeFlags, value.GetElement(i), false); + buf.Write(KeyedItemSeparatorSymbol); + } + buf.Write(EndMapSymbol); + } else { + buf.Write(BeginListSymbol); + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + WriteYsonValueInTableFormat(buf, structType->GetMemberType(i), nativeYtTypeFlags, value.GetElement(i), false); + buf.Write(ListItemSeparatorSymbol); + } + buf.Write(EndListSymbol); + } + break; + } + + case TType::EKind::List: { + auto itemType = static_cast<TListType*>(type)->GetItemType(); + const auto iter = value.GetListIterator(); + buf.Write(BeginListSymbol); + for (NUdf::TUnboxedValue item; iter.Next(item); buf.Write(ListItemSeparatorSymbol)) { + WriteYsonValueInTableFormat(buf, itemType, nativeYtTypeFlags, item, false); + } + + buf.Write(EndListSymbol); + break; + } + + case TType::EKind::Optional: { + auto itemType = static_cast<TOptionalType*>(type)->GetItemType(); + if (nativeYtTypeFlags & ENativeTypeCompatFlags::NTCF_COMPLEX) { + if (value) { + if (itemType->GetKind() == TType::EKind::Optional || itemType->GetKind() == TType::EKind::Pg) { + buf.Write(BeginListSymbol); + } + WriteYsonValueInTableFormat(buf, itemType, nativeYtTypeFlags, value.GetOptionalValue(), false); + if (itemType->GetKind() == TType::EKind::Optional || itemType->GetKind() == TType::EKind::Pg) { + buf.Write(ListItemSeparatorSymbol); + buf.Write(EndListSymbol); + } + } else { + buf.Write(EntitySymbol); + } + } else { + if (!value) { + if (topLevel) { + buf.Write(BeginListSymbol); + buf.Write(EndListSymbol); + } + else { + buf.Write(EntitySymbol); + } + } + else { + buf.Write(BeginListSymbol); + WriteYsonValueInTableFormat(buf, itemType, nativeYtTypeFlags, value.GetOptionalValue(), false); + buf.Write(ListItemSeparatorSymbol); + buf.Write(EndListSymbol); + } + } + break; + } + + case TType::EKind::Dict: { + auto dictType = static_cast<TDictType*>(type); + const auto iter = value.GetDictIterator(); + buf.Write(BeginListSymbol); + for (NUdf::TUnboxedValue key, payload; iter.NextPair(key, payload);) { + buf.Write(BeginListSymbol); + WriteYsonValueInTableFormat(buf, dictType->GetKeyType(), nativeYtTypeFlags, key, false); + buf.Write(ListItemSeparatorSymbol); + WriteYsonValueInTableFormat(buf, dictType->GetPayloadType(), nativeYtTypeFlags, payload, false); + buf.Write(ListItemSeparatorSymbol); + buf.Write(EndListSymbol); + buf.Write(ListItemSeparatorSymbol); + } + + buf.Write(EndListSymbol); + break; + } + + case TType::EKind::Tuple: { + auto tupleType = static_cast<TTupleType*>(type); + buf.Write(BeginListSymbol); + for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { + WriteYsonValueInTableFormat(buf, tupleType->GetElementType(i), nativeYtTypeFlags, value.GetElement(i), false); + buf.Write(ListItemSeparatorSymbol); + } + + buf.Write(EndListSymbol); + break; + } + + case TType::EKind::Void: { + buf.Write(EntitySymbol); + break; + } + + case TType::EKind::Null: { + buf.Write(EntitySymbol); + break; + } + + case TType::EKind::EmptyList: { + buf.Write(BeginListSymbol); + buf.Write(EndListSymbol); + break; + } + + case TType::EKind::EmptyDict: { + buf.Write(BeginListSymbol); + buf.Write(EndListSymbol); + break; + } + + case TType::EKind::Pg: { + auto pgType = static_cast<TPgType*>(type); + WriteYsonValueInTableFormatPg(buf, pgType, value, topLevel); + break; + } + + default: + YQL_ENSURE(false, "Unsupported type: " << type->GetKindAsStr()); + } +} + +/////////////////////////////////////////// +// +// Initial state first = last = &dummy +// +// +1 block first = &dummy, last = newPage, first.next = newPage, newPage.next= &dummy +// +1 block first = &dummy, last = newPage2, first.next = newPage, newPage.next = newPage2, newPage2.next = &dummy +// +/////////////////////////////////////////// +class TTempBlockWriter : public NCommon::IBlockWriter { +public: + TTempBlockWriter() + : Pool_(*TlsAllocState) + , Last_(&Dummy_) + { + Dummy_.Avail_ = 0; + Dummy_.Next_ = &Dummy_; + } + + ~TTempBlockWriter() { + auto current = Dummy_.Next_; // skip dummy node + while (current != &Dummy_) { + auto next = current->Next_; + Pool_.ReturnPage(current); + current = next; + } + } + + void SetRecordBoundaryCallback(std::function<void()> callback) override { + Y_UNUSED(callback); + } + + void WriteBlocks(NCommon::TOutputBuf& buf) const { + auto current = Dummy_.Next_; // skip dummy node + while (current != &Dummy_) { + auto next = current->Next_; + buf.WriteMany((const char*)(current + 1), current->Avail_); + current = next; + } + } + + TTempBlockWriter(const TTempBlockWriter&) = delete; + void operator=(const TTempBlockWriter&) = delete; + + std::pair<char*, char*> NextEmptyBlock() override { + auto newPage = Pool_.GetPage(); + auto header = (TPageHeader*)newPage; + header->Avail_ = 0; + header->Next_ = &Dummy_; + Last_->Next_ = header; + Last_ = header; + return std::make_pair((char*)(header + 1), (char*)newPage + TAlignedPagePool::POOL_PAGE_SIZE); + } + + void ReturnBlock(size_t avail, std::optional<size_t> lastRecordBoundary) override { + Y_UNUSED(lastRecordBoundary); + YQL_ENSURE(avail <= TAlignedPagePool::POOL_PAGE_SIZE - sizeof(TPageHeader)); + Last_->Avail_ = avail; + } + + void Finish() override { + } + +private: + struct TPageHeader { + TPageHeader* Next_ = nullptr; + ui32 Avail_ = 0; + }; + + NKikimr::TAlignedPagePool& Pool_; + TPageHeader* Last_; + TPageHeader Dummy_; +}; + +extern "C" void WriteYsonContainerValue(TType* type, ui64 nativeYtTypeFlags, const NUdf::TUnboxedValuePod& value, NCommon::TOutputBuf& buf) { + TTempBlockWriter blockWriter; + NCommon::TOutputBuf ysonBuf(blockWriter, nullptr); + WriteYsonValueInTableFormat(ysonBuf, type, nativeYtTypeFlags, value, true); + ysonBuf.Flush(); + ui32 size = ysonBuf.GetWrittenBytes(); + buf.WriteMany((const char*)&size, sizeof(size)); + blockWriter.WriteBlocks(buf); +} + +void SkipSkiffField(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, NCommon::TInputBuf& buf) { + const bool isOptional = type->IsOptional(); + if (isOptional) { + // Unwrap optional + type = static_cast<TOptionalType*>(type)->GetItemType(); + } + + if (isOptional) { + auto marker = buf.Read(); + if (!marker) { + return; + } + } + + if (type->IsData()) { + auto schemeType = static_cast<TDataType*>(type)->GetSchemeType(); + switch (schemeType) { + case NUdf::TDataType<bool>::Id: + buf.SkipMany(sizeof(ui8)); + break; + + case NUdf::TDataType<ui8>::Id: + case NUdf::TDataType<ui16>::Id: + case NUdf::TDataType<ui32>::Id: + case NUdf::TDataType<ui64>::Id: + case NUdf::TDataType<NUdf::TDate>::Id: + case NUdf::TDataType<NUdf::TDatetime>::Id: + case NUdf::TDataType<NUdf::TTimestamp>::Id: + buf.SkipMany(sizeof(ui64)); + break; + + case NUdf::TDataType<i8>::Id: + case NUdf::TDataType<i16>::Id: + case NUdf::TDataType<i32>::Id: + case NUdf::TDataType<i64>::Id: + case NUdf::TDataType<NUdf::TInterval>::Id: + case NUdf::TDataType<NUdf::TDate32>::Id: + case NUdf::TDataType<NUdf::TDatetime64>::Id: + case NUdf::TDataType<NUdf::TTimestamp64>::Id: + case NUdf::TDataType<NUdf::TInterval64>::Id: + buf.SkipMany(sizeof(i64)); + break; + + case NUdf::TDataType<float>::Id: + case NUdf::TDataType<double>::Id: + buf.SkipMany(sizeof(double)); + break; + + case NUdf::TDataType<NUdf::TUtf8>::Id: + case NUdf::TDataType<char*>::Id: + case NUdf::TDataType<NUdf::TJson>::Id: + case NUdf::TDataType<NUdf::TYson>::Id: + case NUdf::TDataType<NUdf::TUuid>::Id: + case NUdf::TDataType<NUdf::TDyNumber>::Id: + case NUdf::TDataType<NUdf::TTzDate>::Id: + case NUdf::TDataType<NUdf::TTzDatetime>::Id: + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + ui32 size; + buf.ReadMany((char*)&size, sizeof(size)); + CHECK_STRING_LENGTH_UNSIGNED(size); + buf.SkipMany(size); + break; + } + case NUdf::TDataType<NUdf::TDecimal>::Id: { + if (nativeYtTypeFlags & NTCF_DECIMAL) { + auto const params = static_cast<TDataDecimalType*>(type)->GetParams(); + if (params.first < 10) { + buf.SkipMany(sizeof(i32)); + } else if (params.first < 19) { + buf.SkipMany(sizeof(i64)); + } else { + buf.SkipMany(sizeof(NDecimal::TInt128)); + } + } else { + ui32 size; + buf.ReadMany((char*)&size, sizeof(size)); + CHECK_STRING_LENGTH_UNSIGNED(size); + buf.SkipMany(size); + } + break; + } + default: + YQL_ENSURE(false, "Unsupported data type: " << schemeType); + } + return; + } + + if (type->IsPg()) { + SkipSkiffPg(static_cast<TPgType*>(type), buf); + return; + } + + if (type->IsStruct()) { + auto structType = static_cast<TStructType*>(type); + const std::vector<size_t>* reorder = nullptr; + if (auto cookie = structType->GetCookie()) { + reorder = ((const std::vector<size_t>*)cookie); + } + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + SkipSkiffField(structType->GetMemberType(reorder ? reorder->at(i) : i), nativeYtTypeFlags, buf); + } + return; + } + + if (type->IsList()) { + auto itemType = static_cast<TListType*>(type)->GetItemType(); + while (buf.Read() == '\0') { + SkipSkiffField(itemType, nativeYtTypeFlags, buf); + } + return; + } + + if (type->IsTuple()) { + auto tupleType = static_cast<TTupleType*>(type); + + for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { + SkipSkiffField(tupleType->GetElementType(i), nativeYtTypeFlags, buf); + } + return; + } + + if (type->IsVariant()) { + auto varType = AS_TYPE(TVariantType, type); + ui16 data = 0; + if (varType->GetAlternativesCount() < 256) { + buf.ReadMany((char*)&data, 1); + } else { + buf.ReadMany((char*)&data, sizeof(data)); + } + + if (varType->GetUnderlyingType()->IsTuple()) { + auto tupleType = AS_TYPE(TTupleType, varType->GetUnderlyingType()); + YQL_ENSURE(data < tupleType->GetElementsCount()); + SkipSkiffField(tupleType->GetElementType(data), nativeYtTypeFlags, buf); + } else { + auto structType = AS_TYPE(TStructType, varType->GetUnderlyingType()); + if (auto cookie = structType->GetCookie()) { + const std::vector<size_t>& reorder = *((const std::vector<size_t>*)cookie); + data = reorder[data]; + } + YQL_ENSURE(data < structType->GetMembersCount()); + + SkipSkiffField(structType->GetMemberType(data), nativeYtTypeFlags, buf); + } + return; + } + + if (type->IsVoid()) { + return; + } + + if (type->IsNull()) { + return; + } + + if (type->IsEmptyList() || type->IsEmptyDict()) { + return; + } + + if (type->IsDict()) { + auto dictType = AS_TYPE(TDictType, type); + auto keyType = dictType->GetKeyType(); + auto payloadType = dictType->GetPayloadType(); + while (buf.Read() == '\0') { + SkipSkiffField(keyType, nativeYtTypeFlags, buf); + SkipSkiffField(payloadType, nativeYtTypeFlags, buf); + } + return; + } + + YQL_ENSURE(false, "Unsupported type for skip: " << type->GetKindAsStr()); +} + +NKikimr::NUdf::TUnboxedValue ReadSkiffNativeYtValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, + const NKikimr::NMiniKQL::THolderFactory& holderFactory, NCommon::TInputBuf& buf) +{ + if (type->IsData()) { + return ReadSkiffData(type, nativeYtTypeFlags, buf); + } + + if (type->IsPg()) { + return ReadSkiffPg(static_cast<TPgType*>(type), buf); + } + + if (type->IsOptional()) { + auto marker = buf.Read(); + if (!marker) { + return NUdf::TUnboxedValue(); + } + + auto value = ReadSkiffNativeYtValue(AS_TYPE(TOptionalType, type)->GetItemType(), nativeYtTypeFlags, holderFactory, buf); + return value.Release().MakeOptional(); + } + + if (type->IsTuple()) { + auto tupleType = AS_TYPE(TTupleType, type); + NUdf::TUnboxedValue* items; + auto value = holderFactory.CreateDirectArrayHolder(tupleType->GetElementsCount(), items); + for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { + items[i] = ReadSkiffNativeYtValue(tupleType->GetElementType(i), nativeYtTypeFlags, holderFactory, buf); + } + + return value; + } + + if (type->IsStruct()) { + auto structType = AS_TYPE(TStructType, type); + NUdf::TUnboxedValue* items; + auto value = holderFactory.CreateDirectArrayHolder(structType->GetMembersCount(), items); + + if (auto cookie = type->GetCookie()) { + const std::vector<size_t>& reorder = *((const std::vector<size_t>*)cookie); + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + const auto ndx = reorder[i]; + items[ndx] = ReadSkiffNativeYtValue(structType->GetMemberType(ndx), nativeYtTypeFlags, holderFactory, buf); + } + } else { + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + items[i] = ReadSkiffNativeYtValue(structType->GetMemberType(i), nativeYtTypeFlags, holderFactory, buf); + } + } + + return value; + } + + if (type->IsList()) { + auto itemType = AS_TYPE(TListType, type)->GetItemType(); + TDefaultListRepresentation items; + while (buf.Read() == '\0') { + items = items.Append(ReadSkiffNativeYtValue(itemType, nativeYtTypeFlags, holderFactory, buf)); + } + + return holderFactory.CreateDirectListHolder(std::move(items)); + } + + if (type->IsVariant()) { + auto varType = AS_TYPE(TVariantType, type); + ui16 data = 0; + if (varType->GetAlternativesCount() < 256) { + buf.ReadMany((char*)&data, 1); + } else { + buf.ReadMany((char*)&data, sizeof(data)); + } + if (varType->GetUnderlyingType()->IsTuple()) { + auto tupleType = AS_TYPE(TTupleType, varType->GetUnderlyingType()); + YQL_ENSURE(data < tupleType->GetElementsCount()); + auto item = ReadSkiffNativeYtValue(tupleType->GetElementType(data), nativeYtTypeFlags, holderFactory, buf); + return holderFactory.CreateVariantHolder(item.Release(), data); + } + else { + auto structType = AS_TYPE(TStructType, varType->GetUnderlyingType()); + if (auto cookie = structType->GetCookie()) { + const std::vector<size_t>& reorder = *((const std::vector<size_t>*)cookie); + data = reorder[data]; + } + YQL_ENSURE(data < structType->GetMembersCount()); + + auto item = ReadSkiffNativeYtValue(structType->GetMemberType(data), nativeYtTypeFlags, holderFactory, buf); + return holderFactory.CreateVariantHolder(item.Release(), data); + } + } + + if (type->IsVoid()) { + return NUdf::TUnboxedValue::Zero(); + } + + if (type->IsNull()) { + return NUdf::TUnboxedValue(); + } + + if (type->IsEmptyList() || type->IsEmptyDict()) { + return holderFactory.GetEmptyContainerLazy(); + } + + if (type->IsDict()) { + auto dictType = AS_TYPE(TDictType, type); + auto keyType = dictType->GetKeyType(); + auto payloadType = dictType->GetPayloadType(); + + auto builder = holderFactory.NewDict(dictType, NUdf::TDictFlags::EDictKind::Hashed); + while (buf.Read() == '\0') { + auto key = ReadSkiffNativeYtValue(keyType, nativeYtTypeFlags, holderFactory, buf); + auto payload = ReadSkiffNativeYtValue(payloadType, nativeYtTypeFlags, holderFactory, buf); + builder->Add(std::move(key), std::move(payload)); + } + + return builder->Build(); + } + + YQL_ENSURE(false, "Unsupported type: " << type->GetKindAsStr()); +} + +NUdf::TUnboxedValue ReadSkiffData(TType* type, ui64 nativeYtTypeFlags, NCommon::TInputBuf& buf) { + auto schemeType = static_cast<TDataType*>(type)->GetSchemeType(); + switch (schemeType) { + case NUdf::TDataType<bool>::Id: { + ui8 data; + buf.ReadMany((char*)&data, sizeof(data)); + return NUdf::TUnboxedValuePod(data != 0); + } + + case NUdf::TDataType<ui8>::Id: { + ui64 data; + buf.ReadMany((char*)&data, sizeof(data)); + return NUdf::TUnboxedValuePod(ui8(data)); + } + + case NUdf::TDataType<i8>::Id: { + i64 data; + buf.ReadMany((char*)&data, sizeof(data)); + return NUdf::TUnboxedValuePod(i8(data)); + } + + case NUdf::TDataType<NUdf::TDate>::Id: + case NUdf::TDataType<ui16>::Id: { + ui64 data; + buf.ReadMany((char*)&data, sizeof(data)); + return NUdf::TUnboxedValuePod(ui16(data)); + } + + case NUdf::TDataType<i16>::Id: { + i64 data; + buf.ReadMany((char*)&data, sizeof(data)); + return NUdf::TUnboxedValuePod(i16(data)); + } + + case NUdf::TDataType<NUdf::TDate32>::Id: + case NUdf::TDataType<i32>::Id: { + i64 data; + buf.ReadMany((char*)&data, sizeof(data)); + return NUdf::TUnboxedValuePod(i32(data)); + } + + case NUdf::TDataType<NUdf::TDatetime>::Id: + case NUdf::TDataType<ui32>::Id: { + ui64 data; + buf.ReadMany((char*)&data, sizeof(data)); + return NUdf::TUnboxedValuePod(ui32(data)); + } + + case NUdf::TDataType<NUdf::TInterval>::Id: + case NUdf::TDataType<NUdf::TInterval64>::Id: + case NUdf::TDataType<NUdf::TDatetime64>::Id: + case NUdf::TDataType<NUdf::TTimestamp64>::Id: + case NUdf::TDataType<i64>::Id: { + i64 data; + buf.ReadMany((char*)&data, sizeof(data)); + return NUdf::TUnboxedValuePod(data); + } + + case NUdf::TDataType<NUdf::TTimestamp>::Id: + case NUdf::TDataType<ui64>::Id: { + ui64 data; + buf.ReadMany((char*)&data, sizeof(data)); + return NUdf::TUnboxedValuePod(data); + } + + case NUdf::TDataType<float>::Id: { + double data; + buf.ReadMany((char*)&data, sizeof(data)); + return NUdf::TUnboxedValuePod(float(data)); + } + + case NUdf::TDataType<double>::Id: { + double data; + buf.ReadMany((char*)&data, sizeof(data)); + return NUdf::TUnboxedValuePod(data); + } + + case NUdf::TDataType<NUdf::TUtf8>::Id: + case NUdf::TDataType<char*>::Id: + case NUdf::TDataType<NUdf::TJson>::Id: + case NUdf::TDataType<NUdf::TYson>::Id: + case NUdf::TDataType<NUdf::TDyNumber>::Id: + case NUdf::TDataType<NUdf::TUuid>::Id: { + ui32 size; + buf.ReadMany((char*)&size, sizeof(size)); + CHECK_STRING_LENGTH_UNSIGNED(size); + auto str = NUdf::TUnboxedValue(MakeStringNotFilled(size)); + buf.ReadMany(str.AsStringRef().Data(), size); + return str; + } + + case NUdf::TDataType<NUdf::TDecimal>::Id: { + if (nativeYtTypeFlags & NTCF_DECIMAL) { + auto const params = static_cast<TDataDecimalType*>(type)->GetParams(); + if (params.first < 10) { + i32 data; + buf.ReadMany((char*)&data, sizeof(data)); + return NUdf::TUnboxedValuePod(NDecimal::FromYtDecimal(data)); + } else if (params.first < 19) { + i64 data; + buf.ReadMany((char*)&data, sizeof(data)); + return NUdf::TUnboxedValuePod(NDecimal::FromYtDecimal(data)); + } else { + YQL_ENSURE(params.first < 36); + NDecimal::TInt128 data; + buf.ReadMany((char*)&data, sizeof(data)); + return NUdf::TUnboxedValuePod(NDecimal::FromYtDecimal(data)); + } + } else { + ui32 size; + buf.ReadMany(reinterpret_cast<char*>(&size), sizeof(size)); + const auto maxSize = sizeof(NDecimal::TInt128); + YQL_ENSURE(size > 0U && size <= maxSize, "Bad decimal field size: " << size); + char data[maxSize]; + buf.ReadMany(data, size); + const auto& v = NDecimal::Deserialize(data, size); + YQL_ENSURE(!NDecimal::IsError(v.first), "Bad decimal field data: " << data); + YQL_ENSURE(size == v.second, "Bad decimal field size: " << size); + return NUdf::TUnboxedValuePod(v.first); + } + } + + case NUdf::TDataType<NUdf::TTzDate>::Id: { + ui32 size; + buf.ReadMany((char*)&size, sizeof(size)); + CHECK_STRING_LENGTH_UNSIGNED(size); + auto& vec = buf.YsonBuffer(); + vec.resize(size); + buf.ReadMany(vec.data(), size); + ui16 value; + ui16 tzId; + YQL_ENSURE(DeserializeTzDate(TStringBuf(vec.begin(), vec.end()), value, tzId)); + auto data = NUdf::TUnboxedValuePod(value); + data.SetTimezoneId(tzId); + return data; + } + + case NUdf::TDataType<NUdf::TTzDatetime>::Id: { + ui32 size; + buf.ReadMany((char*)&size, sizeof(size)); + CHECK_STRING_LENGTH_UNSIGNED(size); + auto& vec = buf.YsonBuffer(); + vec.resize(size); + buf.ReadMany(vec.data(), size); + ui32 value; + ui16 tzId; + YQL_ENSURE(DeserializeTzDatetime(TStringBuf(vec.begin(), vec.end()), value, tzId)); + auto data = NUdf::TUnboxedValuePod(value); + data.SetTimezoneId(tzId); + return data; + } + + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: { + ui32 size; + buf.ReadMany((char*)&size, sizeof(size)); + CHECK_STRING_LENGTH_UNSIGNED(size); + auto& vec = buf.YsonBuffer(); + vec.resize(size); + buf.ReadMany(vec.data(), size); + ui64 value; + ui16 tzId; + YQL_ENSURE(DeserializeTzTimestamp(TStringBuf(vec.begin(), vec.end()), value, tzId)); + auto data = NUdf::TUnboxedValuePod(value); + data.SetTimezoneId(tzId); + return data; + } + + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + ui32 size; + buf.ReadMany((char*)&size, sizeof(size)); + CHECK_STRING_LENGTH_UNSIGNED(size); + auto json = NUdf::TUnboxedValue(MakeStringNotFilled(size)); + buf.ReadMany(json.AsStringRef().Data(), size); + return ValueFromString(EDataSlot::JsonDocument, json.AsStringRef()); + } + + default: + YQL_ENSURE(false, "Unsupported data type: " << schemeType); + } +} + +void WriteSkiffData(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, const NKikimr::NUdf::TUnboxedValuePod& value, NCommon::TOutputBuf& buf) { + auto schemeType = static_cast<TDataType*>(type)->GetSchemeType(); + switch (schemeType) { + case NUdf::TDataType<bool>::Id: { + ui8 data = value.Get<ui8>(); + buf.Write(data); + break; + } + + case NUdf::TDataType<ui8>::Id: { + ui64 data = value.Get<ui8>(); + buf.WriteMany((const char*)&data, sizeof(data)); + break; + } + + case NUdf::TDataType<i8>::Id: { + i64 data = value.Get<i8>(); + buf.WriteMany((const char*)&data, sizeof(data)); + break; + } + + case NUdf::TDataType<NUdf::TDate>::Id: + case NUdf::TDataType<ui16>::Id: { + ui64 data = value.Get<ui16>(); + buf.WriteMany((const char*)&data, sizeof(data)); + break; + } + + case NUdf::TDataType<i16>::Id: { + i64 data = value.Get<i16>(); + buf.WriteMany((const char*)&data, sizeof(data)); + break; + } + + case NUdf::TDataType<NUdf::TDate32>::Id: + case NUdf::TDataType<i32>::Id: { + i64 data = value.Get<i32>(); + buf.WriteMany((const char*)&data, sizeof(data)); + break; + } + + case NUdf::TDataType<NUdf::TDatetime>::Id: + case NUdf::TDataType<ui32>::Id: { + ui64 data = value.Get<ui32>(); + buf.WriteMany((const char*)&data, sizeof(data)); + break; + } + + case NUdf::TDataType<NUdf::TInterval>::Id: + case NUdf::TDataType<NUdf::TInterval64>::Id: + case NUdf::TDataType<NUdf::TDatetime64>::Id: + case NUdf::TDataType<NUdf::TTimestamp64>::Id: + case NUdf::TDataType<i64>::Id: { + i64 data = value.Get<i64>(); + buf.WriteMany((const char*)&data, sizeof(data)); + break; + } + + case NUdf::TDataType<NUdf::TTimestamp>::Id: + case NUdf::TDataType<ui64>::Id: { + ui64 data = value.Get<ui64>(); + buf.WriteMany((const char*)&data, sizeof(data)); + break; + } + + case NUdf::TDataType<float>::Id: { + double data = value.Get<float>(); + buf.WriteMany((const char*)&data, sizeof(data)); + break; + } + + case NUdf::TDataType<double>::Id: { + double data = value.Get<double>(); + buf.WriteMany((const char*)&data, sizeof(data)); + break; + } + + case NUdf::TDataType<NUdf::TUtf8>::Id: + case NUdf::TDataType<char*>::Id: + case NUdf::TDataType<NUdf::TJson>::Id: + case NUdf::TDataType<NUdf::TYson>::Id: + case NUdf::TDataType<NUdf::TDyNumber>::Id: + case NUdf::TDataType<NUdf::TUuid>::Id: { + auto str = value.AsStringRef(); + ui32 size = str.Size(); + buf.WriteMany((const char*)&size, sizeof(size)); + buf.WriteMany(str); + break; + } + + case NUdf::TDataType<NUdf::TDecimal>::Id: { + if (nativeYtTypeFlags & NTCF_DECIMAL) { + auto const params = static_cast<TDataDecimalType*>(type)->GetParams(); + const NDecimal::TInt128 data128 = value.GetInt128(); + if (params.first < 10) { + auto data = NDecimal::ToYtDecimal<i32>(data128); + buf.WriteMany((const char*)&data, sizeof(data)); + } else if (params.first < 19) { + auto data = NDecimal::ToYtDecimal<i64>(data128); + buf.WriteMany((const char*)&data, sizeof(data)); + } else { + YQL_ENSURE(params.first < 36); + auto data = NDecimal::ToYtDecimal<NDecimal::TInt128>(data128); + buf.WriteMany((const char*)&data, sizeof(data)); + } + } else { + char data[sizeof(NDecimal::TInt128)]; + const ui32 size = NDecimal::Serialize(value.GetInt128(), data); + buf.WriteMany(reinterpret_cast<const char*>(&size), sizeof(size)); + buf.WriteMany(data, size); + } + break; + } + + case NUdf::TDataType<NUdf::TTzDate>::Id: { + ui16 tzId = SwapBytes(value.GetTimezoneId()); + ui16 data = SwapBytes(value.Get<ui16>()); + ui32 size = sizeof(data) + sizeof(tzId); + buf.WriteMany((const char*)&size, sizeof(size)); + buf.WriteMany((const char*)&data, sizeof(data)); + buf.WriteMany((const char*)&tzId, sizeof(tzId)); + break; + } + + case NUdf::TDataType<NUdf::TTzDatetime>::Id: { + ui16 tzId = SwapBytes(value.GetTimezoneId()); + ui32 data = SwapBytes(value.Get<ui32>()); + ui32 size = sizeof(data) + sizeof(tzId); + buf.WriteMany((const char*)&size, sizeof(size)); + buf.WriteMany((const char*)&data, sizeof(data)); + buf.WriteMany((const char*)&tzId, sizeof(tzId)); + break; + } + + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: { + ui16 tzId = SwapBytes(value.GetTimezoneId()); + ui64 data = SwapBytes(value.Get<ui64>()); + ui32 size = sizeof(data) + sizeof(tzId); + buf.WriteMany((const char*)&size, sizeof(size)); + buf.WriteMany((const char*)&data, sizeof(data)); + buf.WriteMany((const char*)&tzId, sizeof(tzId)); + break; + } + + case NUdf::TDataType<NUdf::TTzDate32>::Id: { + ui16 tzId = SwapBytes(value.GetTimezoneId()); + ui32 data = 0x80 ^ SwapBytes((ui32)value.Get<i32>()); + ui32 size = sizeof(data) + sizeof(tzId); + buf.WriteMany((const char*)&size, sizeof(size)); + buf.WriteMany((const char*)&data, sizeof(data)); + buf.WriteMany((const char*)&tzId, sizeof(tzId)); + break; + } + + case NUdf::TDataType<NUdf::TTzDatetime64>::Id: { + ui16 tzId = SwapBytes(value.GetTimezoneId()); + ui64 data = 0x80 ^ SwapBytes((ui64)value.Get<i64>()); + ui32 size = sizeof(data) + sizeof(tzId); + buf.WriteMany((const char*)&size, sizeof(size)); + buf.WriteMany((const char*)&data, sizeof(data)); + buf.WriteMany((const char*)&tzId, sizeof(tzId)); + break; + } + + case NUdf::TDataType<NUdf::TTzTimestamp64>::Id: { + ui16 tzId = SwapBytes(value.GetTimezoneId()); + ui64 data = 0x80 ^ SwapBytes((ui64)value.Get<i64>()); + ui32 size = sizeof(data) + sizeof(tzId); + buf.WriteMany((const char*)&size, sizeof(size)); + buf.WriteMany((const char*)&data, sizeof(data)); + buf.WriteMany((const char*)&tzId, sizeof(tzId)); + break; + } + + case NUdf::TDataType<NUdf::TJsonDocument>::Id: { + NUdf::TUnboxedValue json = ValueToString(EDataSlot::JsonDocument, value); + auto str = json.AsStringRef(); + ui32 size = str.Size(); + buf.WriteMany((const char*)&size, sizeof(size)); + buf.WriteMany(str); + break; + } + + default: + YQL_ENSURE(false, "Unsupported data type: " << schemeType); + } +} + +void WriteSkiffNativeYtValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, const NKikimr::NUdf::TUnboxedValuePod& value, NCommon::TOutputBuf& buf) { + if (type->IsData()) { + WriteSkiffData(type, nativeYtTypeFlags, value, buf); + } else if (type->IsPg()) { + WriteSkiffPgValue(static_cast<TPgType*>(type), value, buf); + } else if (type->IsOptional()) { + if (!value) { + buf.Write('\0'); + return; + } + + buf.Write('\1'); + WriteSkiffNativeYtValue(AS_TYPE(TOptionalType, type)->GetItemType(), nativeYtTypeFlags, value.GetOptionalValue(), buf); + } else if (type->IsList()) { + auto itemType = AS_TYPE(TListType, type)->GetItemType(); + auto elements = value.GetElements(); + if (elements) { + ui32 size = value.GetListLength(); + for (ui32 i = 0; i < size; ++i) { + buf.Write('\0'); + WriteSkiffNativeYtValue(itemType, nativeYtTypeFlags, elements[i], buf); + } + } else { + NUdf::TUnboxedValue item; + for (auto iter = value.GetListIterator(); iter.Next(item); ) { + buf.Write('\0'); + WriteSkiffNativeYtValue(itemType, nativeYtTypeFlags, item, buf); + } + } + + buf.Write('\xff'); + } else if (type->IsTuple()) { + auto tupleType = AS_TYPE(TTupleType, type); + auto elements = value.GetElements(); + if (elements) { + for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { + WriteSkiffNativeYtValue(tupleType->GetElementType(i), nativeYtTypeFlags, elements[i], buf); + } + } else { + for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { + WriteSkiffNativeYtValue(tupleType->GetElementType(i), nativeYtTypeFlags, value.GetElement(i), buf); + } + } + } else if (type->IsStruct()) { + auto structType = AS_TYPE(TStructType, type); + auto elements = value.GetElements(); + if (auto cookie = type->GetCookie()) { + const std::vector<size_t>& reorder = *((const std::vector<size_t>*)cookie); + if (elements) { + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + const auto ndx = reorder[i]; + WriteSkiffNativeYtValue(structType->GetMemberType(ndx), nativeYtTypeFlags, elements[ndx], buf); + } + } else { + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + const auto ndx = reorder[i]; + WriteSkiffNativeYtValue(structType->GetMemberType(ndx), nativeYtTypeFlags, value.GetElement(ndx), buf); + } + } + } else { + if (elements) { + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + WriteSkiffNativeYtValue(structType->GetMemberType(i), nativeYtTypeFlags, elements[i], buf); + } + } else { + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + WriteSkiffNativeYtValue(structType->GetMemberType(i), nativeYtTypeFlags, value.GetElement(i), buf); + } + } + } + } else if (type->IsVariant()) { + auto varType = AS_TYPE(TVariantType, type); + ui16 index = (ui16)value.GetVariantIndex(); + if (varType->GetAlternativesCount() < 256) { + buf.WriteMany((const char*)&index, 1); + } else { + buf.WriteMany((const char*)&index, sizeof(index)); + } + + if (varType->GetUnderlyingType()->IsTuple()) { + auto tupleType = AS_TYPE(TTupleType, varType->GetUnderlyingType()); + WriteSkiffNativeYtValue(tupleType->GetElementType(index), nativeYtTypeFlags, value.GetVariantItem(), buf); + } else { + auto structType = AS_TYPE(TStructType, varType->GetUnderlyingType()); + if (auto cookie = structType->GetCookie()) { + const std::vector<size_t>& reorder = *((const std::vector<size_t>*)cookie); + index = reorder[index]; + } + YQL_ENSURE(index < structType->GetMembersCount()); + + WriteSkiffNativeYtValue(structType->GetMemberType(index), nativeYtTypeFlags, value.GetVariantItem(), buf); + } + } else if (type->IsVoid() || type->IsNull() || type->IsEmptyList() || type->IsEmptyDict()) { + } else if (type->IsDict()) { + auto dictType = AS_TYPE(TDictType, type); + auto keyType = dictType->GetKeyType(); + auto payloadType = dictType->GetPayloadType(); + NUdf::TUnboxedValue key, payload; + for (auto iter = value.GetDictIterator(); iter.NextPair(key, payload); ) { + buf.Write('\0'); + WriteSkiffNativeYtValue(keyType, nativeYtTypeFlags, key, buf); + WriteSkiffNativeYtValue(payloadType, nativeYtTypeFlags, payload, buf); + } + + buf.Write('\xff'); + } else { + YQL_ENSURE(false, "Unsupported type: " << type->GetKindAsStr()); + } +} + +extern "C" void WriteContainerNativeYtValue(TType* type, ui64 nativeYtTypeFlags, const NUdf::TUnboxedValuePod& value, NCommon::TOutputBuf& buf) { + WriteSkiffNativeYtValue(type, nativeYtTypeFlags, value, buf); +} + } // NYql diff --git a/yt/yql/providers/yt/codec/yt_codec.h b/yt/yql/providers/yt/codec/yt_codec.h index c7bae246ac..f7ea0e0b78 100644 --- a/yt/yql/providers/yt/codec/yt_codec.h +++ b/yt/yql/providers/yt/codec/yt_codec.h @@ -257,4 +257,30 @@ public: using IMkqlWriterImplPtr = TIntrusivePtr<IMkqlWriterImpl>; +NKikimr::NUdf::TUnboxedValue ReadYsonValueInTableFormat(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, + const NKikimr::NMiniKQL::THolderFactory& holderFactory, char cmd, NCommon::TInputBuf& buf); +TMaybe<NKikimr::NUdf::TUnboxedValue> ParseYsonValueInTableFormat(const NKikimr::NMiniKQL::THolderFactory& holderFactory, + const TStringBuf& yson, NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, IOutputStream* err); +TMaybe<NKikimr::NUdf::TUnboxedValue> ParseYsonNode(const NKikimr::NMiniKQL::THolderFactory& holderFactory, + const NYT::TNode& node, NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, IOutputStream* err); +extern "C" void ReadYsonContainerValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, + const NKikimr::NMiniKQL::THolderFactory& holderFactory, NKikimr::NUdf::TUnboxedValue& value, NCommon::TInputBuf& buf, + bool wrapOptional); +extern "C" void ReadContainerNativeYtValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, + const NKikimr::NMiniKQL::THolderFactory& holderFactory, NKikimr::NUdf::TUnboxedValue& value, NCommon::TInputBuf& buf, + bool wrapOptional); +void WriteYsonValueInTableFormat(NCommon::TOutputBuf& buf, NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, const NKikimr::NUdf::TUnboxedValuePod& value, bool topLevel); +extern "C" void WriteYsonContainerValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, + const NKikimr::NUdf::TUnboxedValuePod& value, NCommon::TOutputBuf& buf); + +void SkipSkiffField(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, NCommon::TInputBuf& buf); +NKikimr::NUdf::TUnboxedValue ReadSkiffNativeYtValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, + const NKikimr::NMiniKQL::THolderFactory& holderFactory, NCommon::TInputBuf& buf); +NKikimr::NUdf::TUnboxedValue ReadSkiffData(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, NCommon::TInputBuf& buf); +void WriteSkiffData(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, const NKikimr::NUdf::TUnboxedValuePod& value, NCommon::TOutputBuf& buf); +void WriteSkiffNativeYtValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, + const NKikimr::NUdf::TUnboxedValuePod& value, NCommon::TOutputBuf& buf); +extern "C" void WriteContainerNativeYtValue(NKikimr::NMiniKQL::TType* type, ui64 nativeYtTypeFlags, + const NKikimr::NUdf::TUnboxedValuePod& value, NCommon::TOutputBuf& buf); + } // NYql diff --git a/yt/yql/providers/yt/codec/yt_codec_io.cpp b/yt/yql/providers/yt/codec/yt_codec_io.cpp index aa47453ea2..82b1a3e2f8 100644 --- a/yt/yql/providers/yt/codec/yt_codec_io.cpp +++ b/yt/yql/providers/yt/codec/yt_codec_io.cpp @@ -901,12 +901,12 @@ protected: return NUdf::TUnboxedValue(); } auto& decoder = *SpecsCache_.GetSpecs().Inputs[TableIndex_]; - auto val = ReadYsonValue((decoder.NativeYtTypeFlags & ENativeTypeCompatFlags::NTCF_COMPLEX) ? type : uwrappedType, decoder.NativeYtTypeFlags, SpecsCache_.GetHolderFactory(), cmd, Buf_, true); + auto val = ReadYsonValueInTableFormat((decoder.NativeYtTypeFlags & ENativeTypeCompatFlags::NTCF_COMPLEX) ? type : uwrappedType, decoder.NativeYtTypeFlags, SpecsCache_.GetHolderFactory(), cmd, Buf_); return (decoder.NativeYtTypeFlags & ENativeTypeCompatFlags::NTCF_COMPLEX) ? val : val.Release().MakeOptional(); } else { if (Y_LIKELY(cmd != EntitySymbol)) { auto& decoder = *SpecsCache_.GetSpecs().Inputs[TableIndex_]; - return ReadYsonValue(type, decoder.NativeYtTypeFlags, SpecsCache_.GetHolderFactory(), cmd, Buf_, true); + return ReadYsonValueInTableFormat(type, decoder.NativeYtTypeFlags, SpecsCache_.GetHolderFactory(), cmd, Buf_); } if (type->GetKind() == TType::EKind::Data && static_cast<TDataType*>(type)->GetSchemeType() == NUdf::TDataType<NUdf::TYson>::Id) { @@ -1367,7 +1367,7 @@ protected: } if (uwrappedType->IsData()) { - return NCommon::ReadSkiffData(uwrappedType, 0, Buf_); + return ReadSkiffData(uwrappedType, 0, Buf_); } else if (!isOptional && uwrappedType->IsPg()) { return NCommon::ReadSkiffPg(static_cast<TPgType*>(uwrappedType), Buf_); } else { @@ -1378,17 +1378,17 @@ protected: // parse binary yson... YQL_ENSURE(size > 0); char cmd = Buf_.Read(); - auto value = ReadYsonValue(uwrappedType, 0, SpecsCache_.GetHolderFactory(), cmd, Buf_, true); + auto value = ReadYsonValueInTableFormat(uwrappedType, 0, SpecsCache_.GetHolderFactory(), cmd, Buf_); return isOptional ? value.Release().MakeOptional() : value; } } NUdf::TUnboxedValue ReadSkiffFieldNativeYt(TType* type, ui64 nativeYtTypeFlags) { - return NCommon::ReadSkiffNativeYtValue(type, nativeYtTypeFlags, SpecsCache_.GetHolderFactory(), Buf_); + return ReadSkiffNativeYtValue(type, nativeYtTypeFlags, SpecsCache_.GetHolderFactory(), Buf_); } void SkipSkiffField(TType* type, ui64 nativeYtTypeFlags) { - return NCommon::SkipSkiffField(type, nativeYtTypeFlags, Buf_); + return ::NYql::SkipSkiffField(type, nativeYtTypeFlags, Buf_); } }; @@ -2065,9 +2065,9 @@ protected: void WriteSkiffValue(TType* type, const NUdf::TUnboxedValuePod& value, bool wasOptional) { if (NativeYtTypeFlags_) { - NCommon::WriteSkiffNativeYtValue(type, NativeYtTypeFlags_, value, Buf_); + WriteSkiffNativeYtValue(type, NativeYtTypeFlags_, value, Buf_); } else if (type->IsData()) { - NCommon::WriteSkiffData(type, 0, value, Buf_); + WriteSkiffData(type, 0, value, Buf_); } else if (!wasOptional && type->IsPg()) { NCommon::WriteSkiffPg(static_cast<TPgType*>(type), value, Buf_); } else { diff --git a/yt/yql/providers/yt/lib/res_pull/res_or_pull.cpp b/yt/yql/providers/yt/lib/res_pull/res_or_pull.cpp index d7800dc2d7..c901181ff5 100644 --- a/yt/yql/providers/yt/lib/res_pull/res_or_pull.cpp +++ b/yt/yql/providers/yt/lib/res_pull/res_or_pull.cpp @@ -197,7 +197,7 @@ bool TSkiffExecuteResOrPull::WriteNext(TMkqlIOCache& specsCache, const NYT::TNod YQL_ENSURE(rec.GetType() == NYT::TNode::EType::Map, "Expected map node"); TStringStream err; - auto value = NCommon::ParseYsonNode(specsCache.GetHolderFactory(), rec, Specs.Outputs[0].RowType, Specs.Outputs[0].NativeYtTypeFlags, &err); + auto value = ParseYsonNode(specsCache.GetHolderFactory(), rec, Specs.Outputs[0].RowType, Specs.Outputs[0].NativeYtTypeFlags, &err); if (!value) { throw yexception() << "Could not parse yson node with error: " << err.Str(); } diff --git a/yt/yt/client/driver/driver.cpp b/yt/yt/client/driver/driver.cpp index a77f6d2c33..f699cbd74f 100644 --- a/yt/yt/client/driver/driver.cpp +++ b/yt/yt/client/driver/driver.cpp @@ -115,8 +115,8 @@ public: TDriver( TDriverConfigPtr config, IConnectionPtr connection, - TSignatureGeneratorBasePtr signatureGenerator, - TSignatureValidatorBasePtr signatureValidator) + ISignatureGeneratorPtr signatureGenerator, + ISignatureValidatorPtr signatureValidator) : Config_(std::move(config)) , Connection_(std::move(connection)) , ClientCache_(New<TClientCache>(Config_->ClientCache, Connection_)) @@ -502,12 +502,12 @@ public: return Connection_; } - TSignatureGeneratorBasePtr GetSignatureGenerator() override + ISignatureGeneratorPtr GetSignatureGenerator() override { return SignatureGenerator_; } - TSignatureValidatorBasePtr GetSignatureValidator() override + ISignatureValidatorPtr GetSignatureValidator() override { return SignatureValidator_; } @@ -535,8 +535,8 @@ private: TClientCachePtr ClientCache_; const IClientPtr RootClient_; IProxyDiscoveryCachePtr ProxyDiscoveryCache_; - TSignatureGeneratorBasePtr SignatureGenerator_; - TSignatureValidatorBasePtr SignatureValidator_; + ISignatureGeneratorPtr SignatureGenerator_; + ISignatureValidatorPtr SignatureValidator_; class TCommandContext; using TCommandContextPtr = TIntrusivePtr<TCommandContext>; @@ -730,8 +730,8 @@ private: IDriverPtr CreateDriver( IConnectionPtr connection, TDriverConfigPtr config, - TSignatureGeneratorBasePtr signatureGenerator, - TSignatureValidatorBasePtr signatureValidator) + ISignatureGeneratorPtr signatureGenerator, + ISignatureValidatorPtr signatureValidator) { YT_VERIFY(connection); YT_VERIFY(config); diff --git a/yt/yt/client/driver/driver.h b/yt/yt/client/driver/driver.h index ef23bdc67e..8e8a25de41 100644 --- a/yt/yt/client/driver/driver.h +++ b/yt/yt/client/driver/driver.h @@ -152,9 +152,9 @@ struct IDriver //! Returns the underlying connection. virtual NApi::IConnectionPtr GetConnection() = 0; - virtual NSignature::TSignatureGeneratorBasePtr GetSignatureGenerator() = 0; + virtual NSignature::ISignatureGeneratorPtr GetSignatureGenerator() = 0; - virtual NSignature::TSignatureValidatorBasePtr GetSignatureValidator() = 0; + virtual NSignature::ISignatureValidatorPtr GetSignatureValidator() = 0; //! Terminates the underlying connection. virtual void Terminate() = 0; @@ -167,8 +167,8 @@ DEFINE_REFCOUNTED_TYPE(IDriver) IDriverPtr CreateDriver( NApi::IConnectionPtr connection, TDriverConfigPtr config, - NSignature::TSignatureGeneratorBasePtr signatureGenerator, - NSignature::TSignatureValidatorBasePtr signatureValidator); + NSignature::ISignatureGeneratorPtr signatureGenerator, + NSignature::ISignatureValidatorPtr signatureValidator); //////////////////////////////////////////////////////////////////////////////// diff --git a/yt/yt/client/signature/generator.cpp b/yt/yt/client/signature/generator.cpp index 3c9917e796..7a82cdd727 100644 --- a/yt/yt/client/signature/generator.cpp +++ b/yt/yt/client/signature/generator.cpp @@ -10,7 +10,7 @@ using namespace NYson; //////////////////////////////////////////////////////////////////////////////// -TSignaturePtr TSignatureGeneratorBase::Sign(TYsonString data) +TSignaturePtr ISignatureGenerator::Sign(TYsonString data) { auto signature = New<TSignature>(); signature->Payload_ = std::move(data); @@ -21,7 +21,7 @@ TSignaturePtr TSignatureGeneratorBase::Sign(TYsonString data) //////////////////////////////////////////////////////////////////////////////// class TDummySignatureGenerator - : public TSignatureGeneratorBase + : public ISignatureGenerator { public: void Sign(const TSignaturePtr& signature) override @@ -30,7 +30,7 @@ public: } }; -TSignatureGeneratorBasePtr CreateDummySignatureGenerator() +ISignatureGeneratorPtr CreateDummySignatureGenerator() { return New<TDummySignatureGenerator>(); } @@ -38,7 +38,7 @@ TSignatureGeneratorBasePtr CreateDummySignatureGenerator() //////////////////////////////////////////////////////////////////////////////// class TAlwaysThrowingSignatureGenerator - : public TSignatureGeneratorBase + : public ISignatureGenerator { public: void Sign(const TSignaturePtr& /*signature*/) override @@ -47,7 +47,7 @@ public: } }; -TSignatureGeneratorBasePtr CreateAlwaysThrowingSignatureGenerator() +ISignatureGeneratorPtr CreateAlwaysThrowingSignatureGenerator() { return New<TAlwaysThrowingSignatureGenerator>(); } diff --git a/yt/yt/client/signature/generator.h b/yt/yt/client/signature/generator.h index be1af9a0e1..fe2f715272 100644 --- a/yt/yt/client/signature/generator.h +++ b/yt/yt/client/signature/generator.h @@ -8,7 +8,7 @@ namespace NYT::NSignature { //////////////////////////////////////////////////////////////////////////////// -class TSignatureGeneratorBase +class ISignatureGenerator : public TRefCounted { public: @@ -24,13 +24,13 @@ private: friend class TAlwaysThrowingSignatureGenerator; }; -DEFINE_REFCOUNTED_TYPE(TSignatureGeneratorBase) +DEFINE_REFCOUNTED_TYPE(ISignatureGenerator) //////////////////////////////////////////////////////////////////////////////// -TSignatureGeneratorBasePtr CreateDummySignatureGenerator(); +ISignatureGeneratorPtr CreateDummySignatureGenerator(); -TSignatureGeneratorBasePtr CreateAlwaysThrowingSignatureGenerator(); +ISignatureGeneratorPtr CreateAlwaysThrowingSignatureGenerator(); //////////////////////////////////////////////////////////////////////////////// diff --git a/yt/yt/client/signature/public.h b/yt/yt/client/signature/public.h index bf0a10cdbb..845445d9e9 100644 --- a/yt/yt/client/signature/public.h +++ b/yt/yt/client/signature/public.h @@ -10,8 +10,8 @@ DECLARE_REFCOUNTED_CLASS(TSignature) /////////////////////////////////////////////////////////////////////////////// -DECLARE_REFCOUNTED_CLASS(TSignatureGeneratorBase) -DECLARE_REFCOUNTED_CLASS(TSignatureValidatorBase) +DECLARE_REFCOUNTED_CLASS(ISignatureGenerator) +DECLARE_REFCOUNTED_CLASS(ISignatureValidator) /////////////////////////////////////////////////////////////////////////////// diff --git a/yt/yt/client/signature/signature.h b/yt/yt/client/signature/signature.h index c9aafba974..08882f2375 100644 --- a/yt/yt/client/signature/signature.h +++ b/yt/yt/client/signature/signature.h @@ -27,12 +27,12 @@ private: NYson::TYsonString Payload_; std::vector<std::byte> Signature_; - friend class TSignatureGeneratorBase; + friend class ISignatureGenerator; friend class TSignatureGenerator; friend class TDummySignatureGenerator; friend class TAlwaysThrowingSignatureGenerator; - friend class TSignatureValidatorBase; + friend class ISignatureValidator; friend class TSignatureValidator; friend class TDummySignatureValidator; friend class TAlwaysThrowingSignatureValidator; diff --git a/yt/yt/client/signature/validator.cpp b/yt/yt/client/signature/validator.cpp index 87c9d17b42..156630f135 100644 --- a/yt/yt/client/signature/validator.cpp +++ b/yt/yt/client/signature/validator.cpp @@ -9,7 +9,7 @@ namespace NYT::NSignature { //////////////////////////////////////////////////////////////////////////////// class TDummySignatureValidator - : public TSignatureValidatorBase + : public ISignatureValidator { public: TFuture<bool> Validate(const TSignaturePtr& signature) override @@ -19,7 +19,7 @@ public: } }; -TSignatureValidatorBasePtr CreateDummySignatureValidator() +ISignatureValidatorPtr CreateDummySignatureValidator() { return New<TDummySignatureValidator>(); } @@ -27,7 +27,7 @@ TSignatureValidatorBasePtr CreateDummySignatureValidator() //////////////////////////////////////////////////////////////////////////////// class TAlwaysThrowingSignatureValidator - : public TSignatureValidatorBase + : public ISignatureValidator { public: TFuture<bool> Validate(const TSignaturePtr& /*signature*/) override @@ -36,7 +36,7 @@ public: } }; -TSignatureValidatorBasePtr CreateAlwaysThrowingSignatureValidator() +ISignatureValidatorPtr CreateAlwaysThrowingSignatureValidator() { return New<TAlwaysThrowingSignatureValidator>(); } diff --git a/yt/yt/client/signature/validator.h b/yt/yt/client/signature/validator.h index 011957d6d3..f1215e7d0c 100644 --- a/yt/yt/client/signature/validator.h +++ b/yt/yt/client/signature/validator.h @@ -10,20 +10,20 @@ namespace NYT::NSignature { //////////////////////////////////////////////////////////////////////////////// -class TSignatureValidatorBase +class ISignatureValidator : public TRefCounted { public: virtual TFuture<bool> Validate(const TSignaturePtr& signature) = 0; }; -DEFINE_REFCOUNTED_TYPE(TSignatureValidatorBase) +DEFINE_REFCOUNTED_TYPE(ISignatureValidator) //////////////////////////////////////////////////////////////////////////////// -TSignatureValidatorBasePtr CreateDummySignatureValidator(); +ISignatureValidatorPtr CreateDummySignatureValidator(); -TSignatureValidatorBasePtr CreateAlwaysThrowingSignatureValidator(); +ISignatureValidatorPtr CreateAlwaysThrowingSignatureValidator(); //////////////////////////////////////////////////////////////////////////////// diff --git a/yt/yt/core/misc/ref_counted_tracker-inl.h b/yt/yt/core/misc/ref_counted_tracker-inl.h index 8e132d59a5..9e8a75e6b1 100644 --- a/yt/yt/core/misc/ref_counted_tracker-inl.h +++ b/yt/yt/core/misc/ref_counted_tracker-inl.h @@ -74,7 +74,15 @@ public: TRefCountedTrackerStatistics::TNamedSlotStatistics GetStatistics() const; - TNamedSlot& operator += (const TLocalSlot& rhs) + #define REF_COUNTED_TRACKER_NO_TSAN + #if defined(__has_feature) + #if __has_feature(thread_sanitizer) + #undef REF_COUNTED_TRACKER_NO_TSAN + #define REF_COUNTED_TRACKER_NO_TSAN __attribute__((no_sanitize("thread"))) + #endif + #endif + + TNamedSlot& REF_COUNTED_TRACKER_NO_TSAN operator += (const TLocalSlot& rhs) { #define XX(name) name ## _ += rhs.name; ENUMERATE_SLOT_FIELDS() @@ -82,6 +90,8 @@ public: return *this; } + #undef REF_COUNTED_TRACKER_NO_TSAN + TNamedSlot& operator += (const TGlobalSlot& rhs) { #define XX(name) name ## _ += rhs.name.load(); diff --git a/yt/yt/core/rpc/service_detail.cpp b/yt/yt/core/rpc/service_detail.cpp index f2dec276cb..065e417c1c 100644 --- a/yt/yt/core/rpc/service_detail.cpp +++ b/yt/yt/core/rpc/service_detail.cpp @@ -1654,8 +1654,6 @@ TServiceBase::TServiceBase( Profiler_.AddFuncGauge("/authentication_queue_size", MakeStrong(this), [this] { return AuthenticationQueueSize_.load(std::memory_order::relaxed); }); - - ServiceLivenessChecker_->Start(); } const TServiceId& TServiceBase::GetServiceId() const @@ -2431,6 +2429,25 @@ void TServiceBase::DecrementActiveRequestCount() void TServiceBase::InitContext(IServiceContext* /*context*/) { } +void TServiceBase::StartServiceLivenessChecker() +{ + // Fast path. + if (ServiceLivenessCheckerStarted_.load(std::memory_order::relaxed)) { + return; + } + if (ServiceLivenessCheckerStarted_.exchange(true)) { + return; + } + + if (auto checker = ServiceLivenessChecker_.Acquire()) { + checker->Start(); + // There may be concurrent ServiceLivenessChecker_.Exchange() call in Stop(). + if (!ServiceLivenessChecker_.Acquire()) { + YT_UNUSED_FUTURE(checker->Stop()); + } + } +} + void TServiceBase::RegisterDiscoverRequest(const TCtxDiscoverPtr& context) { auto payload = GetDiscoverRequestPayload(context); @@ -2440,6 +2457,7 @@ void TServiceBase::RegisterDiscoverRequest(const TCtxDiscoverPtr& context) auto it = DiscoverRequestsByPayload_.find(payload); if (it == DiscoverRequestsByPayload_.end()) { readerGuard.Release(); + StartServiceLivenessChecker(); auto writerGuard = WriterGuard(DiscoverRequestsByPayloadLock_); DiscoverRequestsByPayload_[payload].Insert(context, 0); } else { @@ -2706,8 +2724,9 @@ TFuture<void> TServiceBase::Stop() } } - YT_UNUSED_FUTURE(ServiceLivenessChecker_->Stop()); - + if (auto checker = ServiceLivenessChecker_.Exchange(nullptr)) { + YT_UNUSED_FUTURE(checker->Stop()); + } return StopResult_.ToFuture(); } diff --git a/yt/yt/core/rpc/service_detail.h b/yt/yt/core/rpc/service_detail.h index 531a03018d..e19338343a 100644 --- a/yt/yt/core/rpc/service_detail.h +++ b/yt/yt/core/rpc/service_detail.h @@ -987,7 +987,8 @@ private: std::atomic<bool> EnableErrorCodeCounter_ = false; - const NConcurrency::TPeriodicExecutorPtr ServiceLivenessChecker_; + std::atomic<bool> ServiceLivenessCheckerStarted_ = false; + TAtomicIntrusivePtr<NConcurrency::TPeriodicExecutor> ServiceLivenessChecker_; using TDiscoverRequestSet = TConcurrentHashMap<TCtxDiscoverPtr, int>; THashMap<TString, TDiscoverRequestSet> DiscoverRequestsByPayload_; @@ -1075,6 +1076,7 @@ private: void IncrementActiveRequestCount(); void DecrementActiveRequestCount(); + void StartServiceLivenessChecker(); void RegisterDiscoverRequest(const TCtxDiscoverPtr& context); void ReplyDiscoverRequest(const TCtxDiscoverPtr& context, bool isUp); diff --git a/yt/yt/library/profiling/solomon/sensor_service.cpp b/yt/yt/library/profiling/solomon/sensor_service.cpp index 291581f182..d6f814f2ea 100644 --- a/yt/yt/library/profiling/solomon/sensor_service.cpp +++ b/yt/yt/library/profiling/solomon/sensor_service.cpp @@ -137,6 +137,9 @@ public: , RootSensorServiceImpl_(New<TSensorServiceImpl>(/*name*/ std::string(), Registry_.Get(), &Exporter_->Lock_)) , Root_(GetEphemeralNodeFactory(/*shouldHideAttributes*/ true)->CreateMap()) , SensorTreeUpdateDuration_(Registry_->GetSelfProfiler().Timer("/sensor_service_tree_update_duration")) + { } + + void Initialize() { UpdateSensorTreeExecutor_ = New<TPeriodicExecutor>( Exporter_->ControlQueue_->GetInvoker(), @@ -272,10 +275,12 @@ IYPathServicePtr CreateSensorService( TSolomonRegistryPtr registry, TSolomonExporterPtr exporter) { - return New<TSensorService>( + auto service = New<TSensorService>( std::move(config), std::move(registry), std::move(exporter)); + service->Initialize(); + return service; } //////////////////////////////////////////////////////////////////////////////// |