diff options
author | Alexander Smirnov <alex@ydb.tech> | 2025-03-28 00:51:44 +0000 |
---|---|---|
committer | Alexander Smirnov <alex@ydb.tech> | 2025-03-28 00:51:44 +0000 |
commit | 149dc5893b4b90db5ad40d019d2d1ffa6e1f0abd (patch) | |
tree | 6e3243c1b457c31839ddc9fa75b5f46bcc1fecb4 | |
parent | 4c8dfa633cab20dabf3d11464d986335519bfcfa (diff) | |
parent | 71e9df83f284bf42c2f8ea872752bf02e1055555 (diff) | |
download | ydb-149dc5893b4b90db5ad40d019d2d1ffa6e1f0abd.tar.gz |
Merge branch 'rightlib' into merge-libs-250328-0050
85 files changed, 1943 insertions, 229 deletions
diff --git a/build/conf/java.conf b/build/conf/java.conf index ff77a303a05..053c0c4a1d9 100644 --- a/build/conf/java.conf +++ b/build/conf/java.conf @@ -476,7 +476,7 @@ macro _DO_2_RUN_JAR_PROGRAM(IN_DIRS_VAR="uniq_", IN_DIRS_INPUTS[], IN{input}[], _CHECK_RUN_JAVA_PROG_CLASSPATH($CLASSPATH) .PEERDIR=build/platform/java/jdk $JDK_RESOURCE_PEERDIR .CMD=${hide;kv:"p RJ"} ${hide;kv:"pc blue"} ${hide:JAVA_FAKEID} ${cwd:BINDIR} $YMAKE_PYTHON ${input:"build/scripts/mkdir.py"} ${OUT_DIR} && ${cwd:CWD} $YMAKE_PYTHON ${input:"build/scripts/setup_java_tmpdir.py"} $YMAKE_PYTHON ${input:"build/scripts/stdout2stderr.py"} ${pre=--file=:STDOUT} ${hide;output:STDOUT} ${pre=--file=:STDOUT_NOAUTO} ${hide;noauto;output:STDOUT_NOAUTO} $YMAKE_PYTHON ${input:"build/scripts/fix_java_command_file_cp.py"} --build-root ${ARCADIA_BUILD_ROOT} $JDK_RESOURCE/bin/java -Dfile.encoding=utf8 -classpath ${RUN_JAR_PROG_CP_PRE}${tool:CLASSPATH}${RUN_JAR_PROG_CP_SUF} ${Args} && $_GENTAR_HELPER(HASH_SUF $HASH_SUF OUT_DIR $OUT_DIR) ${hide;input:IN} ${hide;context=TEXT;input:IN_NOPARSE} ${hide;noauto;output:OUT_NOAUTO} ${hide;output:OUT} ${hide;tool:TOOL} ${IN_DIRS_INPUTS} - .SEM=runs-ITEM && runs-args ${Args} && runs-classpath ${RUN_JAR_PROG_CP_PRE}${tool:CLASSPATH}${RUN_JAR_PROG_CP_SUF} && runs-cwd ${CWD} ${hide;cwd:CWD} && runs-in ${IN} ${hide;input:IN} && runs-in_dir ${IN_DIR} && runs-in_dirs_inputs ${IN_DIRS_INPUTS} && runs-in_noparse ${IN_NOPARSE} ${hide;context=TEXT;input:IN_NOPARSE} && runs-out ${OUT} ${hide;output:OUT} ${OUT_NOAUTO} ${hide;noauto;output:OUT_NOAUTO} $_GENTAR_HELPER(HASH_SUF $HASH_SUF OUT_DIR $OUT_DIR) && runs-out_dir ${OUT_DIR} && runs-tool ${TOOL} ${hide;tool:TOOL} + .SEM=runs-ITEM && runs-args ${Args} && runs-classpath ${RUN_JAR_PROG_CP_PRE}${tool:CLASSPATH}${RUN_JAR_PROG_CP_SUF} && runs-cwd ${CWD} ${hide;cwd:CWD} && runs-in ${IN} ${hide;input:IN} && runs-in_dir ${IN_DIR} && runs-in_dirs_inputs ${IN_DIRS_INPUTS} && runs-in_noparse ${IN_NOPARSE} ${hide;context=TEXT;input:IN_NOPARSE} && runs-out ${OUT} ${hide;output:OUT} ${OUT_NOAUTO} ${hide;noauto;output:OUT_NOAUTO} $_GENTAR_HELPER(HASH_SUF $HASH_SUF OUT_DIR $OUT_DIR) && runs-out_dir ${OUT_DIR} && runs-tool ${tool:TOOL} } # tag:java-specific diff --git a/build/export_generators/ide-gradle/build.gradle.kts.jinja b/build/export_generators/ide-gradle/build.gradle.kts.jinja index 2adebe9cf99..c037f3e7bc8 100644 --- a/build/export_generators/ide-gradle/build.gradle.kts.jinja +++ b/build/export_generators/ide-gradle/build.gradle.kts.jinja @@ -1,3 +1,14 @@ +{%- macro PatchRoots(arg, depend = false) -%} +{#- Always replace (arcadia_root) === (SOURCE_ROOT in ymake) to $project_root in Gradle -#} +{%- if depend -%} +{#- Replace (export_root) === (BUILD_ROOT in ymake) to $project_root in Gradle, because prebuilt tools in arcadia, not in build_root -#} +"{{ arg|replace(export_root, "$project_root")|replace(arcadia_root, "$project_root") }}" +{%- else -%} +{#- Replace (export_root) === (BUILD_ROOT in ymake) to baseBuildDir in Gradle - root of all build folders for modules -#} +"{{ arg|replace(export_root, "$baseBuildDir")|replace(arcadia_root, "$project_root") }}" +{%- endif -%} +{%- endmacro -%} + {%- include "[generator]/vars.jinja" -%} {%- include "[generator]/import.jinja" -%} {%- include "[generator]/repositories.jinja" -%} @@ -12,6 +23,7 @@ {%- include "[generator]/source_sets.jinja" -%} {%- include "[generator]/test.jinja" -%} {%- include "[generator]/javadoc.jinja" -%} +{%- include "[generator]/run_program.jinja" -%} {%- include "[generator]/run_java_program.jinja" -%} {%- include "[generator]/dependencies.jinja" -%} {%- include "extra-tests.gradle.kts" ignore missing -%} diff --git a/build/export_generators/ide-gradle/build.gradle.kts.proto.jinja b/build/export_generators/ide-gradle/build.gradle.kts.proto.jinja index a83447aa671..ec3465d3166 100644 --- a/build/export_generators/ide-gradle/build.gradle.kts.proto.jinja +++ b/build/export_generators/ide-gradle/build.gradle.kts.proto.jinja @@ -1,3 +1,18 @@ +{%- macro PatchRoots(arg, depend = false) -%} +{#- Always replace (arcadia_root) === (SOURCE_ROOT in ymake) to $project_root in Gradle -#} +{%- if depend -%} +{#- Replace (export_root) === (BUILD_ROOT in ymake) to $project_root in Gradle, because prebuilt tools in arcadia, not in build_root -#} +"{{ arg|replace(export_root, "$project_root")|replace(arcadia_root, "$project_root") }}" +{%- else -%} +{#- Replace (export_root) === (BUILD_ROOT in ymake) to baseBuildDir in Gradle - root of all build folders for modules -#} +"{{ arg|replace(export_root, "$baseBuildDir")|replace(arcadia_root, "$project_root") }}" +{%- endif -%} +{%- endmacro -%} + +{%- macro PatchGeneratedProto(arg) -%} +"{{ arg|replace(export_root, "$mainExtractedIncludeProtosDir")|replace(arcadia_root, "$mainExtractedIncludeProtosDir") }}" +{%- endmacro -%} + {%- include "[generator]/proto_vars.jinja" -%} {%- include "[generator]/proto_import.jinja" -%} {%- include "[generator]/repositories.jinja" -%} @@ -10,6 +25,8 @@ {%- include "[generator]/proto_source_sets.jinja" -%} {%- include "[generator]/protobuf.jinja" -%} {%- include "[generator]/proto_prepare.jinja" -%} +{%- include "[generator]/run_program.jinja" -%} +{%- include "[generator]/run_java_program.jinja" -%} {%- include "[generator]/javadoc.jinja" -%} {%- include "[generator]/proto_dependencies.jinja" -%} {%- include "[generator]/debug.jinja" ignore missing -%} diff --git a/build/export_generators/ide-gradle/jdk.jinja b/build/export_generators/ide-gradle/jdk.jinja index 01fbc09daf9..5bc13819dd2 100644 --- a/build/export_generators/ide-gradle/jdk.jinja +++ b/build/export_generators/ide-gradle/jdk.jinja @@ -1,6 +1,8 @@ {#- default JDK version -#} {%- set jdk_version = '17' -%} -{%- if target.required_jdk -%} +{%- if force_jdk_version -%} +{%- set jdk_version = force_jdk_version -%} +{%- elif target.required_jdk -%} {%- set jdk_version = target.required_jdk -%} {%- elif target.jdk_version -%} {%- set jdk_version = target.jdk_version -%} diff --git a/build/export_generators/ide-gradle/proto_source_sets.jinja b/build/export_generators/ide-gradle/proto_source_sets.jinja index 7c7814d940e..db7c467f643 100644 --- a/build/export_generators/ide-gradle/proto_source_sets.jinja +++ b/build/export_generators/ide-gradle/proto_source_sets.jinja @@ -1,6 +1,15 @@ {#- empty string #} sourceSets { main { +{%- if target.jar_source_set|length -%} +{%- for source_set in target.jar_source_set -%} +{%- set srcdir_glob = split(source_set, ':') -%} +{%- set out = srcdir_glob[0] -%} +{%- if out != 'src/main/java' %} + java.srcDir({{ PatchRoots(out) }}) +{%- endif -%} +{%- endfor -%} +{%- endif %} java.srcDir("$buildDir/generated/source/proto/main/java") {%- if target.proto_grpc %} java.srcDir("$buildDir/generated/source/proto/main/grpc") @@ -14,13 +23,6 @@ sourceSets { } } -{%- if target.jar_source_set is defined -%} -{%- for source_set in target.jar_source_set -%} -{%- set srcdir_glob = split(source_set, ':') %} -sourceSets.main.java.srcDirs += "{{ srcdir_glob[0] }}" -{% endfor -%} -{%- endif %} - tasks.withType<Jar>() { duplicatesStrategy = DuplicatesStrategy.INCLUDE } diff --git a/build/export_generators/ide-gradle/proto_vars.jinja b/build/export_generators/ide-gradle/proto_vars.jinja index 3ef6275d879..3f357ce243a 100644 --- a/build/export_generators/ide-gradle/proto_vars.jinja +++ b/build/export_generators/ide-gradle/proto_vars.jinja @@ -2,5 +2,6 @@ {%- set libraries = target.consumer|selectattr('type', 'eq', 'library') -%} {%- set with_kotlin = target.with_kotlin -%} {%- set kotlin_version = target.kotlin_version -%} +{%- set proto_template = true -%} {%- include "[generator]/jdk.jinja" -%} diff --git a/build/export_generators/ide-gradle/run_java_program.jinja b/build/export_generators/ide-gradle/run_java_program.jinja index f28ef430cc6..527a81c5b47 100644 --- a/build/export_generators/ide-gradle/run_java_program.jinja +++ b/build/export_generators/ide-gradle/run_java_program.jinja @@ -4,6 +4,11 @@ val runJav{{ loop.index }} = task<JavaExec>("runJavaProgram{{ loop.index }}") { group = "build" description = "Code generation by run java program" + +{%- if run.cwd %} + workingDir = file({{ PatchRoots(run.cwd) }}) +{%- endif -%} + {%- set classpaths = run.classpath|reject('eq', '@.cplst') -%} {%- if classpaths|length -%} {% for classpath in classpaths -%} @@ -20,40 +25,61 @@ val runJav{{ loop.index }} = task<JavaExec>("runJavaProgram{{ loop.index }}") { args = listOf( {%- for arg in run.args -%} {%- if not loop.first %} - "{{ arg }}", +{%- if proto_template and (run.out_dir|select("eq", arg)|length or run.out|select("eq", arg)|length) %} + {{ PatchGeneratedProto(arg) }}, +{%- else %} + {{ PatchRoots(arg, run.tool|select("in", arg)|length) }}, +{%- endif -%} {%- endif -%} {%- endfor %} ) {% endif -%} -{%- if run.in_dir -%} -{%- for in_dir in run.in_dir %} - inputs.files(fileTree("{{ in_dir }}")) +{%- if run.in_dir|length -%} +{%- for in_dir in run.in_dir|unique %} + inputs.files(fileTree({{ PatchRoots(in_dir) }})) {% endfor -%} {%- endif -%} -{%- if run.in -%} -{%- for in_file in run.in %} - inputs.files("{{ in_file }}") +{%- if run.in|length -%} +{%- for in_file in run.in|unique %} + inputs.files({{ PatchRoots(in_file) }}) +{% endfor -%} +{%- endif -%} + +{%- if run.in_noparse|length -%} +{%- for in_file in run.in_noparse|unique %} + inputs.files({{ PatchRoots(in_file) }}) {% endfor -%} {%- endif -%} {%- if run.out_dir|length -%} {%- for out_dir in run.out_dir|unique %} - outputs.dir("{{ out_dir }}") + outputs.dir({{ PatchRoots(out_dir) }}) {%- endfor -%} {%- endif -%} + +{%- if run.out|length -%} +{%- for out in run.out|unique %} + outputs.files({{ PatchRoots(out) }}) +{%- endfor -%} +{%- endif -%} + {#- - Не использованы аттрибуты - run-out="list" - run-cwd="str" + Не использованы атрибуты run-in_dirs_inputs="list" - run-in_noparse="list" run-tool="list" #} } +{%- if proto_template %} + +tasks.getByName("prepareMainProtos").dependsOn(runJav{{ loop.index }}) +tasks.getByName("extractMainLibrariesProtos").dependsOn(runJav{{ loop.index }}) +{% else %} + tasks.getByName("sourcesJar").dependsOn(runJav{{ loop.index }}) +{% endif -%} tasks.compileJava.configure { dependsOn(runJav{{ loop.index }}) diff --git a/build/export_generators/ide-gradle/run_program.jinja b/build/export_generators/ide-gradle/run_program.jinja new file mode 100644 index 00000000000..147ae6c1657 --- /dev/null +++ b/build/export_generators/ide-gradle/run_program.jinja @@ -0,0 +1,64 @@ +{%- if target.custom_runs|length -%} +{%- for custom_run in target.custom_runs %} + +val runProg{{ loop.index }} = task<Exec>("runProgram{{ loop.index }}") { + group = "build" + description = "Code generation by run custom program" + +{%- if custom_run.cwd %} + workingDir = file({{ PatchRoots(custom_run.cwd) }}) +{%- endif %} + + commandLine( +{%- for arg in custom_run.command -%} +{%- if custom_run.depends|select("eq", arg)|length -%} +{{ PatchRoots(arg, true) }} +{%- elif proto_template -%} +{#- generated proto put to prepared proto dir -#} +{{ PatchGeneratedProto(arg) }} +{%- else -%} +{{ PatchRoots(arg) }} +{%- endif -%} +{%- if not loop.last %}, {% endif -%} +{%- endfor -%}) + +{%- if custom_run.depends|length -%} +{%- for depend in custom_run.depends|unique %} + inputs.files({{ PatchRoots(depend, true) }}) +{% endfor -%} +{%- endif -%} + +{%- if custom_run.outputs|length -%} +{%- for out in custom_run.outputs|unique %} +{%- if proto_template %} + outputs.files({{ PatchGeneratedProto(out) }}) +{%- else %} + outputs.files({{ PatchRoots(out) }}) +{%- endif -%} +{%- endfor -%} +{%- endif -%} +{#- + Не использованы атрибуты + custom_run-env="list" +#} +} + +{%- if proto_template %} + +tasks.getByName("extractMainLibrariesProtos").dependsOn(runProg{{ loop.index }}) +{% else %} + +tasks.getByName("sourcesJar").dependsOn(runProg{{ loop.index }}) +{% endif -%} + +tasks.compileJava.configure { + dependsOn(runProg{{ loop.index }}) +} +{%- if with_kotlin %} + +tasks.compileKotlin.configure { + dependsOn(runProg{{ loop.index }}) +} +{%- endif %} +{% endfor -%} +{% endif -%} diff --git a/build/export_generators/ide-gradle/source_sets.jinja b/build/export_generators/ide-gradle/source_sets.jinja index 952332fac60..009341c6dd5 100644 --- a/build/export_generators/ide-gradle/source_sets.jinja +++ b/build/export_generators/ide-gradle/source_sets.jinja @@ -14,7 +14,7 @@ sourceSets { {%- set srcdir_glob = split(source_set, ':') -%} {%- set out = srcdir_glob[0] -%} {%- if out != 'src/main/java' %} - java.srcDir("{{ out }}") + java.srcDir({{ PatchRoots(out) }}) {%- endif -%} {%- endfor -%} {%- endif %} @@ -45,7 +45,7 @@ sourceSets { {%- set srcdir_glob = split(source_set, ':') -%} {%- set out = srcdir_glob[0] -%} {%- if out != 'src/test/java' %} - java.srcDir("{{ srcdir_glob[0] }}") + java.srcDir({{ PatchRoots(srcdir_glob[0]) }}) {%- endif -%} {%- endfor -%} {%- endif %} diff --git a/build/mapping.conf.json b/build/mapping.conf.json index 2631c61d610..019d916756d 100644 --- a/build/mapping.conf.json +++ b/build/mapping.conf.json @@ -1312,6 +1312,7 @@ "6048579718": "{registry_endpoint}/6048579718", "7686710688": "{registry_endpoint}/7686710688", "7879860842": "{registry_endpoint}/7879860842", + "8367004015": "{registry_endpoint}/8367004015", "2980468199": "{registry_endpoint}/2980468199", "5562224408": "{registry_endpoint}/5562224408", "7663495611": "{registry_endpoint}/7663495611" @@ -2625,6 +2626,7 @@ "6048579718": "yt/go/ytrecipe/cmd/ytexec for linux", "7686710688": "yt/go/ytrecipe/cmd/ytexec for linux", "7879860842": "yt/go/ytrecipe/cmd/ytexec for linux", + "8367004015": "yt/go/ytrecipe/cmd/ytexec for linux", "2980468199": "ytexec for linux", "5562224408": "ytexec for linux", "7663495611": "ytexec for linux" diff --git a/build/platform/yfm/resources.json b/build/platform/yfm/resources.json index 9491f062029..7fe8b459be7 100644 --- a/build/platform/yfm/resources.json +++ b/build/platform/yfm/resources.json @@ -1,16 +1,16 @@ { "by_platform": { "win32-x86_64": { - "uri": "sbr:8347966416" + "uri": "sbr:8369232374" }, "darwin-x86_64": { - "uri": "sbr:8347965020" + "uri": "sbr:8369230226" }, "linux-x86_64": { - "uri": "sbr:8347962698" + "uri": "sbr:8369228543" }, "darwin-arm64": { - "uri": "sbr:8347965020" + "uri": "sbr:8369230226" } } } diff --git a/build/plugins/lib/test_const/__init__.py b/build/plugins/lib/test_const/__init__.py index 95ecfcbf697..3fd88c7d7c7 100644 --- a/build/plugins/lib/test_const/__init__.py +++ b/build/plugins/lib/test_const/__init__.py @@ -438,6 +438,12 @@ class ServiceTags(Enum): AnyTag = "ya:anytag" +# NOTE: Linter constants are used in ya style, ya ide, config validator check (devtools/ya/handlers/style/config_validator). +# ya and validator have different release cycles, make sure you preserve compatibility: +# - don't delete anything from here until you get rid of all usages and roll out the changes; +# - keep in mind that changes of constants used in multiple tools may get to production at different times; + + # Linter names must match `NAME` set in `_ADD_*_LINTER_CHECK` class PythonLinterName(Enum): Black = "black" @@ -464,6 +470,22 @@ class LinterConfigsValidationRules(Enum): Python = "build/config/tests/py_style/configs_validation_rules.json" +# XXX: if a new linter is added to this mapping respective path to rules file must be available in the json +LINTER_TO_DEFAULT_CONFIGS = { + CppLinterName.ClangFormat: DefaultLinterConfig.Cpp, + PythonLinterName.Black: DefaultLinterConfig.Python, + PythonLinterName.Ruff: DefaultLinterConfig.Python, +} + +# Fill up like +""" +{ + PythonLinterName.Ruff: LinterConfigsValidationRules.Python, +} +""" +# XXX: if a new linter is added to this mapping respective path to rules file must be available in the json +LINTER_TO_VALIDATION_CONFIGS = {} + LINTER_CONFIG_TYPES = { CppLinterName.ClangFormat: (".clang-format",), CppLinterName.ClangFormat15: (".clang-format",), @@ -479,6 +501,9 @@ AUTOINCLUDE_PATHS = ( ) +# End of linter constants + + class Status(object): GOOD, XFAIL, FAIL, XPASS, MISSING, CRASHED, TIMEOUT = range(1, 8) SKIPPED = -100 diff --git a/build/sysincl/misc.yml b/build/sysincl/misc.yml index 69d43563d1e..6629f94a9e9 100644 --- a/build/sysincl/misc.yml +++ b/build/sysincl/misc.yml @@ -463,6 +463,10 @@ includes: - filter.h: contrib/libs/svt-av1/Source/Lib/Codec/filter.h +- source_filter: "^contrib/libs/tinycbor" + includes: + - memory.h: contrib/libs/tinycbor/src/memory.h + - source_filter: "^contrib/restricted/ffmpeg" includes: - thread.h: contrib/restricted/ffmpeg/libavcodec/thread.h diff --git a/contrib/libs/cxxsupp/libcxx/include/complex b/contrib/libs/cxxsupp/libcxx/include/complex index 91cf3f4d508..bfe61c506e1 100644 --- a/contrib/libs/cxxsupp/libcxx/include/complex +++ b/contrib/libs/cxxsupp/libcxx/include/complex @@ -1282,8 +1282,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> acos(const complex<_Tp>& __x) { } if (std::__constexpr_isinf(__x.imag())) return complex<_Tp>(__pi / _Tp(2), -__x.imag()); - // Somehow isnan can be a macro, so we use __constexpr_isnan - if (__x.real() == 0 && (__x.imag() == 0 || std::__constexpr_isnan(__x.imag()))) + if (__x.real() == 0 && (__x.imag() == 0 || std::isnan(__x.imag()))) return complex<_Tp>(__pi / _Tp(2), -__x.imag()); complex<_Tp> __z = std::log(__x + std::sqrt(std::__sqr(__x) - _Tp(1))); if (std::signbit(__x.imag())) diff --git a/contrib/libs/cxxsupp/libcxx/patches/38-complex.patch b/contrib/libs/cxxsupp/libcxx/patches/38-complex.patch deleted file mode 100644 index 6856a5f4b2e..00000000000 --- a/contrib/libs/cxxsupp/libcxx/patches/38-complex.patch +++ /dev/null @@ -1,14 +0,0 @@ -diff --git a/include/complex b/include/complex -index bfe61c5..91cf3f4 100644 ---- a/include/complex -+++ b/include/complex -@@ -1282,7 +1282,8 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> acos(const complex<_Tp>& __x) { - } - if (std::__constexpr_isinf(__x.imag())) - return complex<_Tp>(__pi / _Tp(2), -__x.imag()); -- if (__x.real() == 0 && (__x.imag() == 0 || std::isnan(__x.imag()))) -+ // Somehow isnan can be a macro, so we use __constexpr_isnan -+ if (__x.real() == 0 && (__x.imag() == 0 || std::__constexpr_isnan(__x.imag()))) - return complex<_Tp>(__pi / _Tp(2), -__x.imag()); - complex<_Tp> __z = std::log(__x + std::sqrt(std::__sqr(__x) - _Tp(1))); - if (std::signbit(__x.imag())) diff --git a/library/cpp/neh/tcp2.cpp b/library/cpp/neh/tcp2.cpp index 641408973d9..fa24635e465 100644 --- a/library/cpp/neh/tcp2.cpp +++ b/library/cpp/neh/tcp2.cpp @@ -621,8 +621,8 @@ namespace { , State_(Init) , BuffSize_(TTcp2Options::InputBufferSize) , Buff_(new char[BuffSize_]) - , NeedCheckReqsQueue_(0) - , NeedCheckCancelsQueue_(0) + , NeedCheckReqsQueue_(false) + , NeedCheckCancelsQueue_(false) , GenReqId_(0) , LastSendedReqId_(0) { @@ -651,7 +651,7 @@ namespace { throw; } - AtomicSet(NeedCheckReqsQueue_, 1); + NeedCheckReqsQueue_.store(true); req->SetConnection(this); TAtomicBase state = AtomicGet(State_); if (Y_LIKELY(state == Connected)) { @@ -682,7 +682,7 @@ namespace { //called from client thread void Cancel(TRequestId id) { Cancels_.Enqueue(id); - AtomicSet(NeedCheckCancelsQueue_, 1); + NeedCheckCancelsQueue_.store(true); if (Y_LIKELY(AtomicGet(State_) == Connected)) { ProcessOutputCancelsQueue(); } @@ -758,7 +758,7 @@ namespace { do { if (asioThread) { - AtomicSet(NeedCheckCancelsQueue_, 0); + NeedCheckCancelsQueue_.store(false); TRequestId reqId; ProcessReqsInFlyQueue(); @@ -776,14 +776,14 @@ namespace { } } } - } else if (AtomicGet(NeedCheckCancelsQueue_)) { + } else if (NeedCheckCancelsQueue_.load()) { AS_.GetIOService().Post(std::bind(&TConnection::SendMessages, TConnectionRef(this), true)); return; } TRequestId lastReqId = 0; { - AtomicSet(NeedCheckReqsQueue_, 0); + NeedCheckReqsQueue_.store(false); TRequest* reqPtr; while (Reqs_.Dequeue(&reqPtr)) { @@ -824,7 +824,7 @@ namespace { OutputLock_.Release(); - if (!AtomicGet(NeedCheckReqsQueue_) && !AtomicGet(NeedCheckCancelsQueue_)) { + if (!NeedCheckReqsQueue_.load() && !NeedCheckCancelsQueue_.load()) { DBGOUT("TClient::SendMessages(exit2)"); return; } @@ -1031,9 +1031,9 @@ namespace { //output TSpinLock OutputLock_; //protect socket/buffers from simultaneous access from few threads - TAtomic NeedCheckReqsQueue_; + std::atomic<bool> NeedCheckReqsQueue_; TLockFreeQueue<TRequest*> Reqs_; - TAtomic NeedCheckCancelsQueue_; + std::atomic<bool> NeedCheckCancelsQueue_; TLockFreeQueue<TRequestId> Cancels_; TAdaptiveLock GenReqIdLock_; std::atomic<TRequestId> GenReqId_; @@ -1182,7 +1182,7 @@ namespace { , RemoteHost_(NNeh::PrintHostByRfc(*AS_->RemoteEndpoint().Addr())) , BuffSize_(TTcp2Options::InputBufferSize) , Buff_(new char[BuffSize_]) - , NeedCheckOutputQueue_(0) + , NeedCheckOutputQueue_(false) { DBGOUT("TServer::TConnection()"); } @@ -1383,7 +1383,7 @@ namespace { } void ProcessOutputQueue() { - AtomicSet(NeedCheckOutputQueue_, 1); + NeedCheckOutputQueue_.store(true); if (OutputLock_.TryAcquire()) { SendMessages(false); return; @@ -1396,7 +1396,7 @@ namespace { DBGOUT("TServer::SendMessages(enter)"); try { do { - AtomicUnlock(&NeedCheckOutputQueue_); + NeedCheckOutputQueue_.store(false); TAutoPtr<TOutputData> d; while (OutputData_.Dequeue(&d)) { d->MoveTo(OutputBuffers_); @@ -1415,7 +1415,7 @@ namespace { OutputLock_.Release(); - if (!AtomicGet(NeedCheckOutputQueue_)) { + if (!NeedCheckOutputQueue_.load()) { DBGOUT("Server::SendMessages(exit2): " << (int)!OutputLock_.IsLocked()); return; } @@ -1491,7 +1491,7 @@ namespace { //output TSpinLock OutputLock_; //protect socket/buffers from simultaneous access from few threads - TAtomic NeedCheckOutputQueue_; + std::atomic<bool> NeedCheckOutputQueue_; NNeh::TAutoLockFreeQueue<TOutputData> OutputData_; TOutputBuffers OutputBuffers_; }; diff --git a/library/cpp/type_info/type_io.cpp b/library/cpp/type_info/type_io.cpp index 12f0058fed1..f397466abce 100644 --- a/library/cpp/type_info/type_io.cpp +++ b/library/cpp/type_info/type_io.cpp @@ -13,6 +13,8 @@ #include <util/generic/vector.h> #include <util/generic/scope.h> +#include <optional> + namespace NTi::NIo { namespace { class TYsonDeserializer: private TNonCopyable { @@ -61,7 +63,8 @@ namespace NTi::NIo { const TType *Key, *Value; }; struct TDecimalData { - ui8 Precision, Scale; + std::optional<ui8> Precision; + std::optional<ui8> Scale; }; using TTypeData = std::variant< std::monostate, @@ -162,11 +165,19 @@ namespace NTi::NIo { } } else if (mapKey == "precision") { if (std::holds_alternative<std::monostate>(data)) { - data = TDecimalData{ReadSmallInt(R"("precision")"), 0}; + const auto precision = ReadSmallInt(R"("precision")"); + if (0 == precision) { + ythrow TDeserializationException() << R"(invalid zero "precision")"; + } + data = TDecimalData{precision, std::nullopt}; } else if (std::holds_alternative<TDecimalData>(data)) { auto& decimalData = std::get<TDecimalData>(data); - if (decimalData.Precision == 0) { - decimalData.Precision = ReadSmallInt(R"("precision")"); + if (!decimalData.Precision.has_value()) { + const auto precision = ReadSmallInt(R"("precision")"); + if (0 == precision) { + ythrow TDeserializationException() << R"(invalid zero "precision")"; + } + decimalData.Precision = precision; } else { ythrow TDeserializationException() << R"(duplicate key "precision")"; } @@ -175,10 +186,10 @@ namespace NTi::NIo { } } else if (mapKey == "scale") { if (std::holds_alternative<std::monostate>(data)) { - data = TDecimalData{0, ReadSmallInt(R"("scale")")}; + data = TDecimalData{std::nullopt, ReadSmallInt(R"("scale")")}; } else if (std::holds_alternative<TDecimalData>(data)) { auto& decimalData = std::get<TDecimalData>(data); - if (decimalData.Scale == 0) { + if (!decimalData.Scale.has_value()) { decimalData.Scale = ReadSmallInt(R"("scale")"); } else { ythrow TDeserializationException() << R"(duplicate key "scale")"; @@ -272,15 +283,15 @@ namespace NTi::NIo { auto& decimalData = std::get<TDecimalData>(data); - if (decimalData.Precision == 0) { + if (!decimalData.Precision.has_value()) { ythrow TDeserializationException() << R"(missing required key "precision" for type Decimal)"; } - if (decimalData.Scale == 0) { + if (!decimalData.Scale.has_value()) { ythrow TDeserializationException() << R"(missing required key "scale" for type Decimal)"; } - return Factory_->DecimalRaw(decimalData.Precision, decimalData.Scale); + return Factory_->DecimalRaw(decimalData.Precision.value(), decimalData.Scale.value()); } case ETypeName::Json: type = TJsonType::InstanceRaw(); @@ -420,8 +431,8 @@ namespace NTi::NIo { auto result = event.AsScalar().AsInt64(); - if (result <= 0) { - ythrow TDeserializationException() << what << " must be greater than zero"; + if (result < 0) { + ythrow TDeserializationException() << what << " must be greater or equal to zero"; } if (result > Max<ui8>()) { diff --git a/library/cpp/type_info/ut/type_deserialize.cpp b/library/cpp/type_info/ut/type_deserialize.cpp index 9e93a26bee3..7d5f60bad04 100644 --- a/library/cpp/type_info/ut/type_deserialize.cpp +++ b/library/cpp/type_info/ut/type_deserialize.cpp @@ -120,6 +120,40 @@ TEST(TypeDeserialize, Decimal) { ASSERT_DESERIALIZED_EQ(NTi::Decimal(20, 10), R"({type_name=decimal; precision=20; scale=10})"); ASSERT_DESERIALIZED_EQ(NTi::Decimal(20, 10), R"({scale=10; type_name=decimal; precision=20})"); ASSERT_DESERIALIZED_EQ(NTi::Decimal(10, 10), R"({type_name=decimal; precision=10; scale=10})"); + ASSERT_DESERIALIZED_EQ(NTi::Decimal(10, 0), R"({type_name=decimal; precision=10; scale=0})"); +} + +TEST(TypeDeserialize, DecimalBadTypeParameters) { + UNIT_ASSERT_EXCEPTION_CONTAINS( + []() { + NTi::NIo::DeserializeYson(*NTi::HeapFactory(), R"({type_name=decimal; precision=0; scale=10})"); + }(), + NTi::TDeserializationException, R"(invalid zero "precision")"); + UNIT_ASSERT_EXCEPTION_CONTAINS( + []() { + NTi::NIo::DeserializeYson(*NTi::HeapFactory(), R"({type_name=decimal; precision=-2; scale=10})"); + }(), + NTi::TDeserializationException, R"("precision" must be greater or equal to zero)"); + UNIT_ASSERT_EXCEPTION_CONTAINS( + []() { + NTi::NIo::DeserializeYson(*NTi::HeapFactory(), R"({type_name=decimal; precision=2; scale=-2})"); + }(), + NTi::TDeserializationException, R"("scale" must be greater or equal to zero)"); + UNIT_ASSERT_EXCEPTION_CONTAINS( + []() { + NTi::NIo::DeserializeYson(*NTi::HeapFactory(), R"({type_name=decimal; precision=2; scale=-2})"); + }(), + NTi::TDeserializationException, R"("scale" must be greater or equal to zero)"); + UNIT_ASSERT_EXCEPTION_CONTAINS( + []() { + NTi::NIo::DeserializeYson(*NTi::HeapFactory(), R"({type_name=decimal; precision=1000; scale=2})"); + }(), + NTi::TDeserializationException, R"("precision" is too big)"); + UNIT_ASSERT_EXCEPTION_CONTAINS( + []() { + NTi::NIo::DeserializeYson(*NTi::HeapFactory(), R"({type_name=decimal; precision=2; scale=1000})"); + }(), + NTi::TDeserializationException, R"("scale" is too big)"); } TEST(TypeDeserialize, DecimalMissingTypeParameters) { diff --git a/util/generic/enum_cast.cpp b/util/generic/enum_cast.cpp new file mode 100644 index 00000000000..8db625ef440 --- /dev/null +++ b/util/generic/enum_cast.cpp @@ -0,0 +1,12 @@ +#include "enum_cast.h" + +#include <util/generic/yexception.h> +#include <util/system/type_name.h> + +namespace NPrivate { + + [[noreturn]] void OnSafeCastToEnumUnexpectedValue(const std::type_info& valueTypeInfo) { + ythrow TBadCastException() << "Unexpected enum " << TypeName(valueTypeInfo) << " value"; + } + +} // namespace NPrivate diff --git a/util/generic/enum_cast.h b/util/generic/enum_cast.h new file mode 100644 index 00000000000..59dee47e889 --- /dev/null +++ b/util/generic/enum_cast.h @@ -0,0 +1,46 @@ +#pragma once + +#include <util/generic/cast.h> +#include <util/generic/serialized_enum.h> + +#include <optional> +#include <type_traits> +#include <typeinfo> + +namespace NPrivate { + + [[noreturn]] void OnSafeCastToEnumUnexpectedValue(const std::type_info& valueTypeInfo); + +} // namespace NPrivate + +/** + * Safely cast an integer value to the enum value. + * @throw yexception is case of unknown enum underlying type value + * + * @tparam TEnum enum type + */ +template <typename TEnum, typename TInteger, typename = std::enable_if_t<std::is_enum_v<TEnum>>> +TEnum SafeCastToEnum(TInteger integerValue) { + using TUnderlyingEnumType = std::underlying_type_t<TEnum>; + + std::optional<TUnderlyingEnumType> value; + try { + value = SafeIntegerCast<TUnderlyingEnumType>(integerValue); + } catch (const TBadCastException&) { + // SafeIntegerCast throws TBadCastException when TInteger cannot be cast + // to TUnderlyingEnumType but the exception message is about integer + // value cast being unsafe. + // SafeCastToEnum must throw TBadCastException with its own exception + // message even if integer cast fails. + } + + if (value.has_value()) { + for (TEnum enumValue : GetEnumAllValues<TEnum>()) { + if (static_cast<TUnderlyingEnumType>(enumValue) == *value) { + return enumValue; + } + } + } + + NPrivate::OnSafeCastToEnumUnexpectedValue(typeid(TEnum)); +} diff --git a/util/generic/enum_cast_ut.cpp b/util/generic/enum_cast_ut.cpp new file mode 100644 index 00000000000..0b96235bcaf --- /dev/null +++ b/util/generic/enum_cast_ut.cpp @@ -0,0 +1,49 @@ +#include "enum_cast.h" + +#include "enum_cast_ut.h" + +#include <library/cpp/testing/unittest/registar.h> + +Y_UNIT_TEST_SUITE(TestEnumCast) { + Y_UNIT_TEST(SafeCastToEnumTest) { + UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EIntEnum>(0), EIntEnum::Zero); + UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EIntEnum>(1), EIntEnum::One); + UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EIntEnum>(2), EIntEnum::Two); + UNIT_ASSERT_EXCEPTION(SafeCastToEnum<EIntEnum>(3), TBadCastException); + + UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EUcharEnum>(0), EUcharEnum::Zero); + UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EUcharEnum>(1), EUcharEnum::One); + UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EUcharEnum>(2), EUcharEnum::Two); + UNIT_ASSERT_EXCEPTION_CONTAINS( + SafeCastToEnum<EUcharEnum>(3), TBadCastException, + "Unexpected enum"); + int val1 = 256; + UNIT_ASSERT_EXCEPTION_CONTAINS( + SafeCastToEnum<EUcharEnum>(val1), TBadCastException, + "Unexpected enum"); + int val2 = -1; + UNIT_ASSERT_EXCEPTION_CONTAINS( + SafeCastToEnum<EUcharEnum>(val2), TBadCastException, + "Unexpected enum"); + int val3 = 2; + UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EUcharEnum>(val3), EUcharEnum::Two); + + UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EBoolEnum>(false), EBoolEnum::False); + UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EBoolEnum>(true), EBoolEnum::True); + + UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EUnscopedIntEnum>(2), UIE_TWO); + UNIT_ASSERT_EXCEPTION_CONTAINS( + SafeCastToEnum<EUnscopedIntEnum>(3), TBadCastException, + "Unexpected enum"); + UNIT_ASSERT_EXCEPTION_CONTAINS( + SafeCastToEnum<EUnscopedIntEnum>(9), TBadCastException, + "Unexpected enum"); + + UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<ECharEnum>(static_cast<unsigned int>(0)), ECharEnum::Zero); + UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<ECharEnum>(static_cast<short>(-1)), ECharEnum::MinusOne); + UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<ECharEnum>(static_cast<int>(-2)), ECharEnum::MinusTwo); + UNIT_ASSERT_EXCEPTION_CONTAINS( + SafeCastToEnum<ECharEnum>(static_cast<int>(2)), TBadCastException, + "Unexpected enum"); + } +} // Y_UNIT_TEST_SUITE(TestEnumCast) diff --git a/util/generic/enum_cast_ut.h b/util/generic/enum_cast_ut.h new file mode 100644 index 00000000000..c0fb52864aa --- /dev/null +++ b/util/generic/enum_cast_ut.h @@ -0,0 +1,28 @@ +#pragma once + +enum class EIntEnum: int { + Zero = 0, + One = 1, + Two = 2 +}; + +enum class EUcharEnum: unsigned char { + Zero = 0, + One = 1, + Two = 2 +}; + +enum class ECharEnum: signed char { + Zero = 0, + MinusOne = -1, + MinusTwo = -2 +}; + +enum class EBoolEnum: bool { + False = false, + True = true +}; + +enum EUnscopedIntEnum { + UIE_TWO = 2, +}; diff --git a/util/generic/fwd.h b/util/generic/fwd.h index d1acc252563..906203d60e2 100644 --- a/util/generic/fwd.h +++ b/util/generic/fwd.h @@ -8,9 +8,6 @@ template <typename TCharType, typename TTraits = std::char_traits<TCharType>> class TBasicString; using TString = TBasicString<char>; -#ifndef TSTRING_IS_STD_STRING -using TCowString = TBasicString<char>; -#endif using TUtf16String = TBasicString<wchar16>; using TUtf32String = TBasicString<wchar32>; diff --git a/util/generic/string.h b/util/generic/string.h index f1d3cf32652..520fdc746f4 100644 --- a/util/generic/string.h +++ b/util/generic/string.h @@ -39,7 +39,6 @@ void ResizeUninitialized(std::basic_string<TCharType, TCharTraits, TAllocator>& #define Y_NOEXCEPT -#ifndef TSTRING_IS_STD_STRING template <class T> class TStringPtrOps { public: @@ -83,11 +82,11 @@ struct TStdString: public TRefCountHolder, public B { } static TStdString* NullStr() noexcept { - #ifdef _LIBCPP_VERSION +#ifdef _LIBCPP_VERSION return (TStdString*)NULL_STRING_REPR; - #else +#else return Singleton<TStdString>(); - #endif +#endif } private: @@ -157,7 +156,6 @@ private: TStringType& S_; size_t Pos_; }; -#endif template <typename TCharType, typename TTraits> class TBasicString: public TStringBase<TBasicString<TCharType, TTraits>, TCharType, TTraits> { diff --git a/util/generic/ut/ya.make b/util/generic/ut/ya.make index 6a605564fec..543b30278cd 100644 --- a/util/generic/ut/ya.make +++ b/util/generic/ut/ya.make @@ -12,6 +12,7 @@ SRCS( generic/buffer_ut.cpp generic/cast_ut.cpp generic/deque_ut.cpp + generic/enum_cast_ut.cpp generic/explicit_type_ut.cpp generic/flags_ut.cpp generic/function_ref_ut.cpp @@ -63,4 +64,6 @@ PEERDIR( library/cpp/containers/absl_flat_hash ) +GENERATE_ENUM_SERIALIZATION(generic/enum_cast_ut.h) + END() diff --git a/util/ya.make b/util/ya.make index dea1f1f7219..93bcc964a48 100644 --- a/util/ya.make +++ b/util/ya.make @@ -90,6 +90,7 @@ JOIN_SRCS( generic/buffer.cpp generic/cast.cpp generic/deque.cpp + generic/enum_cast.cpp generic/explicit_type.cpp generic/fastqueue.cpp generic/flags.cpp diff --git a/yql/essentials/cfg/tests/gateways-experimental.conf b/yql/essentials/cfg/tests/gateways-experimental.conf index f0bea49094f..fe6fa97660c 100644 --- a/yql/essentials/cfg/tests/gateways-experimental.conf +++ b/yql/essentials/cfg/tests/gateways-experimental.conf @@ -28,6 +28,11 @@ Yt { Name: "UseIntermediateStreams" Value: "true" } + + DefaultSettings { + Name: "UseColumnGroupsFromInputTables" + Value: "true" + } } Dq { diff --git a/yql/essentials/core/yql_expr_type_annotation.cpp b/yql/essentials/core/yql_expr_type_annotation.cpp index c968b6fea1c..80207d35bc2 100644 --- a/yql/essentials/core/yql_expr_type_annotation.cpp +++ b/yql/essentials/core/yql_expr_type_annotation.cpp @@ -4313,7 +4313,7 @@ bool IsDataTypeTzDate(EDataSlot dataSlot) { } bool IsDataTypeBigDate(EDataSlot dataSlot) { - return (NUdf::GetDataTypeInfo(dataSlot).Features & NUdf::BigDateType); + return (NUdf::GetDataTypeInfo(dataSlot).Features & NUdf::ExtDateType); } EDataSlot WithTzDate(EDataSlot dataSlot) { diff --git a/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp b/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp new file mode 100644 index 00000000000..aa1ea57703a --- /dev/null +++ b/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp @@ -0,0 +1,97 @@ +#include <benchmark/benchmark.h> + +#include <yql/essentials/ast/yql_expr.h> +#include <yql/essentials/minikql/computation/mkql_block_item.h> +#include <yql/essentials/minikql/comp_nodes/mkql_block_coalesce.h> +#include <yql/essentials/minikql/comp_nodes/mkql_block_coalesce_blending_helper.h> +#include <yql/essentials/minikql/comp_nodes/ut/mkql_computation_node_ut.h> +#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h> +#include <yql/essentials/minikql/computation/mkql_block_builder.h> +#include <yql/essentials/ast/yql_expr_builder.h> +#include <yql/essentials/public/udf/arrow/memory_pool.h> +#include <yql/essentials/minikql/mkql_node_cast.h> +#include <yql/essentials/minikql/arrow/arrow_util.h> +#include <yql/essentials/core/arrow_kernels/request/request.h> +#include <yql/essentials/core/arrow_kernels/registry/registry.h> + +#include <arrow/compute/exec_internal.h> +#include <util/generic/string.h> + +namespace NKikimr::NMiniKQL { + +template <typename T> +static void BenchmarkFixedSizeCoalesce(benchmark::State& state) { + NYql::TExprContext exprCtx; + TSetup<false> setup; + bool secondIsScalar = state.range(1); + const auto type = setup.PgmBuilder->NewDataType(NUdf::TDataType<T>::Id); + auto* typeNode = exprCtx.template MakeType<NYql::TBlockExprType>( + exprCtx.template MakeType<NYql::TDataExprType>(NUdf::TDataType<T>::Slot)); + auto* optTypeNode = exprCtx.template MakeType<NYql::TBlockExprType>( + exprCtx.template MakeType<NYql::TOptionalExprType>( + exprCtx.template MakeType<NYql::TDataExprType>(NUdf::TDataType<T>::Slot))); + auto getBlockItem = [&](int64_t value) { + return TBlockItem(static_cast<T>(value)); + }; + + auto arrow_type = std::make_shared<typename TPrimitiveDataType<T>::TResult>(); + int arrayLength = state.range(0); + constexpr int batchSize = 30720; + + arrow::compute::ExecContext execCtx; + const auto drng = CreateDeterministicRandomProvider(1); + TTypeInfoHelper tif; + + auto getArray = [&](TType* type, bool isOptional) { + auto array_builder = MakeArrayBuilder(tif, type, *NYql::NUdf::GetYqlMemoryPool(), arrayLength, nullptr); + for (int i = 0; i < arrayLength; i++) { + if (!isOptional) { + array_builder->Add(getBlockItem(drng->GenRand64())); + } else { + array_builder->Add(drng->GenRand64() % 2 == 0 ? TBlockItem() : getBlockItem(drng->GenRand64())); + } + } + return array_builder->Build(/*finish=*/true); + }; + + arrow::compute::KernelContext ctx(&execCtx); + + const auto optType = setup.PgmBuilder->NewOptionalType(type); + auto left = getArray(optType, /*isOptional=*/true); + arrow::Datum right; + if (secondIsScalar) { + right = MakeScalarDatum<T>(drng->GenRand64()); + } else { + right = getArray(type, /*isOptional=*/false); + } + auto registry = CreateFunctionRegistry(CreateBuiltinRegistry()); + NYql::TKernelRequestBuilder b(*registry); + + b.AddBinaryOp(NYql::TKernelRequestBuilder::EBinaryOp::Coalesce, optTypeNode, typeNode, typeNode); + auto serializedNode = b.Serialize(); + auto nodeFactory = GetBuiltinFactory(); + auto kernel = NYql::LoadKernels(serializedNode, *registry, nodeFactory); + Y_ENSURE(kernel.size() == 1); + + for (auto _ : state) { + auto bi = ARROW_RESULT(arrow::compute::detail::ExecBatchIterator::Make({left, right}, batchSize)); + + arrow::compute::ExecBatch batch; + while (bi->Next(&batch)) { + arrow::Datum out; + Y_ENSURE(kernel[0]->exec(&ctx, batch, &out).ok()); + benchmark::DoNotOptimize(out); + } + } +} + +} // namespace NKikimr::NMiniKQL + +static void CustomArguments(benchmark::internal::Benchmark* b) { + b->Args({9000000, 0}); + b->Args({9000000, 1}); +} + +BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<ui8>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments); +BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<ui16>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments); +BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<ui32>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments); diff --git a/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/ya.make b/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/ya.make new file mode 100644 index 00000000000..832f401638c --- /dev/null +++ b/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/ya.make @@ -0,0 +1,22 @@ +G_BENCHMARK() + +PEERDIR( + yql/essentials/public/udf + yql/essentials/public/udf/arrow + yql/essentials/public/udf/service/exception_policy + yql/essentials/sql/pg_dummy + yql/essentials/minikql/comp_nodes/no_llvm + yql/essentials/minikql/codegen/no_llvm + yql/essentials/minikql/comp_nodes/no_llvm + yql/essentials/core/arrow_kernels/request + yql/essentials/core/arrow_kernels/registry +) + +YQL_LAST_ABI_VERSION() + +SRCS( + ../../ut/mkql_test_factory.cpp + bench.cpp +) + +END() diff --git a/yql/essentials/minikql/comp_nodes/benchmark/ya.make b/yql/essentials/minikql/comp_nodes/benchmark/ya.make new file mode 100644 index 00000000000..14aefb7f4ae --- /dev/null +++ b/yql/essentials/minikql/comp_nodes/benchmark/ya.make @@ -0,0 +1 @@ +RECURSE(block_coalesce) diff --git a/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp index 9ce760e9981..ae190e777e8 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp @@ -9,23 +9,113 @@ #include <yql/essentials/public/udf/arrow/block_builder.h> #include <yql/essentials/public/udf/arrow/block_reader.h> #include <yql/essentials/public/udf/arrow/util.h> +#include <yql/essentials/minikql/comp_nodes/mkql_block_coalesce_blending_helper.h> +#include <yql/essentials/minikql/defs.h> #include <arrow/util/bitmap_ops.h> - -namespace NKikimr { -namespace NMiniKQL { +namespace NKikimr::NMiniKQL { namespace { +template <typename TType> +void DispatchCoalesceImpl(const arrow::Datum& left, const arrow::Datum& right, arrow::Datum& out, bool outIsOptional, arrow::MemoryPool& pool) { + auto bitmap = outIsOptional ? ARROW_RESULT(arrow::AllocateBitmap((left.array()->length + left.array()->offset % 8) * sizeof(ui8), &pool)) : nullptr; + if (bitmap && bitmap->size() > 0) { + // Fill first byte with zero to prevent further uninitialized memory access. + bitmap->mutable_data()[0] = 0; + } + out = arrow::ArrayData::Make(right.type(), left.array()->length, + {std::move(bitmap), + ARROW_RESULT(arrow::AllocateBuffer((left.array()->length + left.array()->offset % 8) * sizeof(TType), &pool))}, + arrow::kUnknownNullCount, left.array()->offset % 8); + if (outIsOptional) { + if (right.is_scalar()) { + if (right.scalar()->is_valid) { + BlendCoalesce<TType, /*isScalar=*/true, /*rightIsOptional=*/true>( + TDatumStorageView<TType>(left), + TDatumStorageView<TType>(right), + TDatumStorageView<TType>(out), + left.array()->length); + } else { + out = left; + } + } else { + MKQL_ENSURE(TDatumStorageView<TType>(right).bitMask(), "Right array must have a null mask"); + BlendCoalesce<TType, /*isScalar=*/false, /*rightIsOptional=*/true>( + TDatumStorageView<TType>(left), + TDatumStorageView<TType>(right), + TDatumStorageView<TType>(out), + left.array()->length); + } + } else { + if (right.is_scalar()) { + BlendCoalesce<TType, /*isScalar=*/true, /*rightIsOptional=*/false>( + TDatumStorageView<TType>(left), + TDatumStorageView<TType>(right), + TDatumStorageView<TType>(out), + left.array()->length); + } else { + BlendCoalesce<TType, /*isScalar=*/false, /*rightIsOptional=*/false>( + TDatumStorageView<TType>(left), + TDatumStorageView<TType>(right), + TDatumStorageView<TType>(out), + left.array()->length); + } + } +} + +bool DispatchBlendingCoalesce(const arrow::Datum& left, const arrow::Datum& right, arrow::Datum& out, TType* rightType, arrow::MemoryPool& pool) { + TTypeInfoHelper typeInfoHelper; + bool rightIsOptional; + rightType = UnpackOptional(rightType, rightIsOptional); + MKQL_ENSURE(rightType, "Right type must be valid"); + + NYql::NUdf::TDataTypeInspector typeData(typeInfoHelper, rightType); + if (!typeData) { + return false; + } + auto typeId = typeData.GetTypeId(); + + switch (NYql::NUdf::GetDataSlot(typeId)) { + case NYql::NUdf::EDataSlot::Int8: + DispatchCoalesceImpl<i8>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); + return true; + case NYql::NUdf::EDataSlot::Bool: + case NYql::NUdf::EDataSlot::Uint8: + DispatchCoalesceImpl<ui8>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); + return true; + case NYql::NUdf::EDataSlot::Int16: + DispatchCoalesceImpl<i16>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); + return true; + case NYql::NUdf::EDataSlot::Uint16: + DispatchCoalesceImpl<ui16>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); + return true; + case NYql::NUdf::EDataSlot::Int32: + DispatchCoalesceImpl<i32>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); + return true; + case NYql::NUdf::EDataSlot::Uint32: + DispatchCoalesceImpl<ui32>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); + return true; + case NYql::NUdf::EDataSlot::Int64: + case NYql::NUdf::EDataSlot::Uint64: + case NYql::NUdf::EDataSlot::Double: + case NYql::NUdf::EDataSlot::Float: + // TODO(YQL-19645): Support other numeric types. + default: + // Fallback to general builder/reader pipeline. + return false; + } +} + class TCoalesceBlockExec { public: TCoalesceBlockExec(const std::shared_ptr<arrow::DataType>& returnArrowType, TType* firstItemType, TType* secondItemType, bool needUnwrapFirst) : ReturnArrowType_(returnArrowType) , FirstItemType_(firstItemType) , SecondItemType_(secondItemType) - , NeedUnwrapFirst_(needUnwrapFirst) - {} + , NeedUnwrapFirst_(needUnwrapFirst) { + } arrow::Status Exec(arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) const { const auto& first = batch.values[0]; @@ -53,6 +143,10 @@ public: builder->Add(secondValue, length); *res = builder->Build(true); } else { + if (DispatchBlendingCoalesce(first, second, *res, SecondItemType_, *ctx->memory_pool())) { + return arrow::Status::OK(); + } + auto builder = NYql::NUdf::MakeArrayBuilder(TTypeInfoHelper(), SecondItemType_, *ctx->memory_pool(), length, nullptr); auto secondValue = secondReader->GetScalarItem(*second.scalar()); for (size_t i = 0; i < length; ++i) { @@ -74,6 +168,10 @@ public: } else if ((size_t)firstArray.GetNullCount() == length) { *res = second; } else { + if (DispatchBlendingCoalesce(first, second, *res, SecondItemType_, *ctx->memory_pool())) { + return arrow::Status::OK(); + } + auto builder = NYql::NUdf::MakeArrayBuilder(TTypeInfoHelper(), SecondItemType_, *ctx->memory_pool(), length, nullptr); for (size_t i = 0; i < length; ++i) { auto firstItem = firstReader->GetItem(firstArray, i); @@ -110,9 +208,9 @@ std::shared_ptr<arrow::compute::ScalarKernel> MakeBlockCoalesceKernel(const TVec AS_TYPE(TBlockType, argTypes[1])->GetItemType(), needUnwrapFirst); auto kernel = std::make_shared<arrow::compute::ScalarKernel>(ConvertToInputTypes(argTypes), ConvertToOutputType(resultType), - [exec](arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) { - return exec->Exec(ctx, batch, res); - }); + [exec](arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) { + return exec->Exec(ctx, batch, res); + }); kernel->null_handling = arrow::compute::NullHandling::COMPUTED_NO_PREALLOCATE; return kernel; @@ -135,6 +233,8 @@ IComputationNode* WrapBlockCoalesce(TCallable& callable, const TComputationNodeF bool needUnwrapFirst = false; if (!firstItemType->IsSameType(*secondItemType)) { + // Here the left operand and right operand are of types T? and T respectively. + // The first operand must be unwrapped to obtain the resulting type. needUnwrapFirst = true; bool firstOptional; firstItemType = UnpackOptional(firstItemType, firstOptional); @@ -143,12 +243,11 @@ IComputationNode* WrapBlockCoalesce(TCallable& callable, const TComputationNodeF auto firstCompute = LocateNode(ctx.NodeLocator, callable, 0); auto secondCompute = LocateNode(ctx.NodeLocator, callable, 1); - TComputationNodePtrVector argsNodes = { firstCompute, secondCompute }; - TVector<TType*> argsTypes = { firstType, secondType }; + TComputationNodePtrVector argsNodes = {firstCompute, secondCompute}; + TVector<TType*> argsTypes = {firstType, secondType}; auto kernel = MakeBlockCoalesceKernel(argsTypes, secondType, needUnwrapFirst); return new TBlockFuncNode(ctx.Mutables, "Coalesce", std::move(argsNodes), argsTypes, *kernel, kernel); } -} -} +} // namespace NKikimr::NMiniKQL diff --git a/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.h b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.h index 247af262517..009957314e6 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.h +++ b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.h @@ -6,5 +6,5 @@ namespace NMiniKQL { IComputationNode* WrapBlockCoalesce(TCallable& callable, const TComputationNodeFactoryContext& ctx); -} -} +} // namespace NMiniKQL +} // namespace NKikimr diff --git a/yql/essentials/minikql/comp_nodes/mkql_block_coalesce_blending_helper.h b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce_blending_helper.h new file mode 100644 index 00000000000..148c2f48472 --- /dev/null +++ b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce_blending_helper.h @@ -0,0 +1,233 @@ +#pragma once + +#include <yql/essentials/public/udf/arrow/bit_util.h> +#include <yql/essentials/utils/swap_bytes.h> + +#include <util/generic/yexception.h> +#include <util/system/types.h> + +#include <arrow/datum.h> +#include <arrow/util/bit_util.h> +#include <yql/essentials/minikql/defs.h> + +#include <algorithm> +#include <array> +#include <cstddef> + +namespace NKikimr::NMiniKQL { + +template <bool isScalar, bool isOptional> +Y_FORCE_INLINE bool GetBit(const ui8* bitMask, size_t offset) { + if constexpr (isScalar || !isOptional) { + return 1; + } else { + return arrow::BitUtil::GetBit(bitMask, offset); + } +} + +template <bool isOptional> +Y_FORCE_INLINE void SetBitTo(ui8* bitMask, size_t offset, bool bit_value) { + if constexpr (!isOptional) { + return; + } else { + arrow::BitUtil::SetBitTo(bitMask, offset, bit_value); + } +} + +template <typename TType> +TType* GetScalar(const arrow::Datum& datum) { + auto& buffer = arrow::internal::checked_cast<arrow::internal::PrimitiveScalarBase&>(*datum.scalar()); + return static_cast<TType*>(buffer.mutable_data()); +}; + +template <typename TType> +class TDatumStorageView { +public: + TDatumStorageView(const arrow::Datum& datum) + : Datum_(datum) { + } + + TType* data() { + if (Datum_.is_scalar()) { + return GetScalar<TType>(Datum_); + } else { + MKQL_ENSURE(Datum_.is_array(), "Invalid datum"); + MKQL_ENSURE(Datum_.array()->buffers.size() > 1, "Invalid datum"); + MKQL_ENSURE(Datum_.array()->buffers[1], "Invalid datum"); + return reinterpret_cast<TType*>(Datum_.array()->buffers[1]->mutable_data()); + } + } + + size_t offset() { + if (Datum_.is_scalar()) { + return 0; + } else { + MKQL_ENSURE(Datum_.is_array(), "Invalid datum"); + return Datum_.array()->offset; + } + } + + ui8* bitMask() { + if (Datum_.is_scalar()) { + return nullptr; + } else { + MKQL_ENSURE(Datum_.is_array(), "Invalid datum"); + MKQL_ENSURE(Datum_.array()->buffers.size() > 1, "Invalid datum"); + if (!Datum_.array()->buffers[0]) { + return nullptr; + } + return static_cast<ui8*>(Datum_.array()->buffers[0]->mutable_data()); + } + } + +private: + const arrow::Datum& Datum_; +}; + +template <typename TType, bool rightIsScalar, bool rightIsOptional> +void CoalesceByOneElement(size_t elements, + const ui8* leftBitMask, + const ui8* rightBitMask, + size_t leftOffset, + size_t rightOffset, + const TType* left, + const TType* right, + TType* out, + size_t outOffset, + ui8* outBitMask) { + for (size_t i = 0; i < elements; i++) { + if (arrow::BitUtil::GetBit(leftBitMask, i + leftOffset)) { + out[i + outOffset] = left[i + leftOffset]; + SetBitTo<rightIsOptional>(outBitMask, i + outOffset, true); + } else { + if constexpr (rightIsScalar) { + out[i + outOffset] = right[0]; + } else { + out[i + outOffset] = right[i + rightOffset]; + } + + SetBitTo<rightIsOptional>(outBitMask, + i + outOffset, + GetBit<rightIsScalar, rightIsOptional>(rightBitMask, i + rightOffset)); + } + } +} + +template <bool isAligned> +Y_FORCE_INLINE ui8 GetMaskValue(const ui8* mask, size_t offset, size_t bitMaskPosition) { + if constexpr (isAligned) { + return mask[offset / 8 + bitMaskPosition]; + } else { + const ui8 rightBitsCount = offset % 8; + const ui8 leftBitsCount = 8 - rightBitsCount; + ui8 leftMaskPart = mask[offset / 8 + bitMaskPosition] >> rightBitsCount; + ui8 rightMaskPart = mask[offset / 8 + bitMaskPosition + 1] & ((1 << rightBitsCount) - 1); + return rightMaskPart << leftBitsCount | leftMaskPart; + } +} + +template <typename TType, bool rightIsScalar, bool rightIsOptional, bool rightIsAlignedAsLeft> +void VectorizedCoalesce(const ui8* __restrict leftBitMask, + const ui8* __restrict rightBitMask, + size_t leftOffset, + size_t rightOffset, + const TType* __restrict left, + const TType* __restrict right, + TType* __restrict out, + size_t outOffset, + ui8* __restrict outBitMask, + size_t lengthInElements) { + constexpr auto sizeInBytes = sizeof(TType) * 8; + auto lengthInBytes = lengthInElements * sizeof(TType); + // Calculates the truncated length for working with data blocks. + auto truncatedLengthInBytes = + lengthInBytes / sizeInBytes * sizeInBytes; + for (size_t bytesProcessed = 0u, elemShift = 0, step = 0; + bytesProcessed < truncatedLengthInBytes; + bytesProcessed += sizeInBytes, elemShift += 8, step += 1) { + auto currentLeftMask = leftBitMask[leftOffset / 8 + step]; + if constexpr (rightIsOptional) { + // If right is optional, updates the output bit mask. + // Otherwise, the output bit mask doesn't exist. + if constexpr (rightIsScalar) { + outBitMask[outOffset / 8 + step] = 0xFFU; + } else { + outBitMask[outOffset / 8 + step] = + currentLeftMask | GetMaskValue<rightIsAlignedAsLeft>(rightBitMask, rightOffset, step); + } + } + // Expands the current mask to an array of unsigned of type. + auto expandedMask = NYql::NUdf::BitToByteExpand<std::make_unsigned_t<TType>>(currentLeftMask); + for (size_t j = 0u; j < 8; ++j) { + auto arrayIdx = elemShift + j; + auto rightArrayIdx = arrayIdx + rightOffset; + if constexpr (rightIsScalar) { + rightArrayIdx = 0; + } + // Performs the operation of mixing data from left and right based on the mask. + out[outOffset + arrayIdx] = (left[leftOffset + arrayIdx] & expandedMask[j]) | right[rightArrayIdx] & ~expandedMask[j]; + } + } +} +// Coalesces data from two inputs based on bit masks, handling either (array, array) or (array, scalar). +// This function efficiently merges data from 'left' and 'right' into 'out' array using 'left.bitMask()' and 'right.bitMask()'. +// The function is vectorization friendly, which can significantly improve performance. +template <typename TType, bool rightIsScalar, bool rightIsOptional> +void BlendCoalesce(TDatumStorageView<TType> left, + TDatumStorageView<TType> right, + TDatumStorageView<TType> out, + size_t lengthInElements) { + Y_ENSURE(left.offset() % 8 == out.offset() % 8); + auto firstElementsToProcess = std::min((8 - left.offset() % 8) % 8, lengthInElements); + // Process one by one until left mask is aligned by byte. + CoalesceByOneElement<TType, rightIsScalar, rightIsOptional>(firstElementsToProcess, + left.bitMask(), + right.bitMask(), + left.offset(), + right.offset(), + left.data(), + right.data(), + out.data(), + out.offset(), + out.bitMask()); + lengthInElements -= firstElementsToProcess; + + // Process vectorized. + if (left.offset() % 8 != right.offset() % 8) { + VectorizedCoalesce<TType, rightIsScalar, rightIsOptional, false>(left.bitMask(), + right.bitMask(), + left.offset() + firstElementsToProcess, + right.offset() + firstElementsToProcess, + left.data(), + right.data(), + out.data(), + out.offset() + firstElementsToProcess, + out.bitMask(), + lengthInElements); + } else { + VectorizedCoalesce<TType, rightIsScalar, rightIsOptional, true>(left.bitMask(), + right.bitMask(), + left.offset() + firstElementsToProcess, + right.offset() + firstElementsToProcess, + left.data(), + right.data(), + out.data(), + out.offset() + firstElementsToProcess, + out.bitMask(), + lengthInElements); + } + // Process remaining bits that take less memory than one byte. + size_t remainingBits = (lengthInElements) % 8; + CoalesceByOneElement<TType, rightIsScalar, rightIsOptional>(remainingBits, + left.bitMask(), + right.bitMask(), + left.offset() + firstElementsToProcess + lengthInElements - remainingBits, + right.offset() + firstElementsToProcess + lengthInElements - remainingBits, + left.data(), + right.data(), + out.data(), + out.offset() + firstElementsToProcess + lengthInElements - remainingBits, + out.bitMask()); +} + +} // namespace NKikimr::NMiniKQL diff --git a/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp b/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp index 5f54771ab7f..6bf6bc38894 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp @@ -1186,6 +1186,208 @@ private: const TVector<TType*> Types_; }; +struct TListFromBlocksState : public TComputationValue<TListFromBlocksState> { +public: + TListFromBlocksState(TMemoryUsageInfo* memInfo, TComputationContext& ctx, const TVector<TType*>& types, size_t blockLengthIndex) + : TComputationValue(memInfo) + , HolderFactory_(ctx.HolderFactory) + , BlockLengthIndex_(blockLengthIndex) + , Readers_(types.size()) + , Converters_(types.size()) + , ValuesDescr_(ToValueDescr(types)) + { + const auto& pgBuilder = ctx.Builder->GetPgBuilder(); + for (size_t i = 0; i < types.size(); ++i) { + if (i == blockLengthIndex) { + continue; + } + const TType* blockItemType = AS_TYPE(TBlockType, types[i])->GetItemType(); + Readers_[i] = MakeBlockReader(TTypeInfoHelper(), blockItemType); + Converters_[i] = MakeBlockItemConverter(TTypeInfoHelper(), blockItemType, pgBuilder); + } + } + + NUdf::TUnboxedValue GetRow() { + MKQL_ENSURE(CurrentRow_ < RowCount_, "Rows out of range"); + + NUdf::TUnboxedValue* outItems = nullptr; + auto row = HolderFactory_.CreateDirectArrayHolder(Readers_.size() - 1, outItems); + + size_t outputStructIdx = 0; + for (size_t i = 0; i < Readers_.size(); i++) { + if (i == BlockLengthIndex_) { + continue; + } + + const auto& datum = TArrowBlock::From(BlockItems_[i]).GetDatum(); + ARROW_DEBUG_CHECK_DATUM_TYPES(ValuesDescr_[i], datum.descr()); + + TBlockItem item; + if (datum.is_scalar()) { + item = Readers_[i]->GetScalarItem(*datum.scalar()); + } else { + MKQL_ENSURE(datum.is_array(), "Expecting array"); + item = Readers_[i]->GetItem(*datum.array(), CurrentRow_); + } + + outItems[outputStructIdx++] = Converters_[i]->MakeValue(item, HolderFactory_); + } + + CurrentRow_++; + return row; + } + + void SetBlock(NUdf::TUnboxedValue block) { + BlockItems_ = block.GetElements(); + Block_ = std::move(block); + + CurrentRow_ = 0; + RowCount_ = GetBlockCount(BlockItems_[BlockLengthIndex_]); + } + + bool HasRows() const { + return CurrentRow_ < RowCount_; + } + +private: + const THolderFactory& HolderFactory_; + + size_t CurrentRow_ = 0; + size_t RowCount_ = 0; + + size_t BlockLengthIndex_ = 0; + + NUdf::TUnboxedValue Block_; + const NUdf::TUnboxedValue* BlockItems_ = nullptr; + + std::vector<std::unique_ptr<IBlockReader>> Readers_; + std::vector<std::unique_ptr<IBlockItemConverter>> Converters_; + const std::vector<arrow::ValueDescr> ValuesDescr_; +}; + +class TListFromBlocksWrapper : public TMutableComputationNode<TListFromBlocksWrapper> +{ + using TBaseComputation = TMutableComputationNode<TListFromBlocksWrapper>; + +public: + TListFromBlocksWrapper(TComputationMutables& mutables, + IComputationNode* list, + TStructType* structType + ) + : TBaseComputation(mutables, EValueRepresentation::Boxed) + , List_(list) + { + for (size_t i = 0; i < structType->GetMembersCount(); i++) { + if (structType->GetMemberName(i) == NYql::BlockLengthColumnName) { + BlockLengthIndex_ = i; + Types_.push_back(nullptr); + continue; + } + Types_.push_back(structType->GetMemberType(i)); + } + } + + NUdf::TUnboxedValuePod DoCalculate(TComputationContext& ctx) const + { + return ctx.HolderFactory.Create<TListFromBlocksValue>( + ctx, + Types_, + BlockLengthIndex_, + List_->GetValue(ctx) + ); + } + +private: + class TListFromBlocksValue : public TCustomListValue { + using TState = TListFromBlocksState; + + public: + class TIterator : public TComputationValue<TIterator> { + public: + TIterator(TMemoryUsageInfo* memInfo, NUdf::TUnboxedValue&& blockState, NUdf::TUnboxedValue&& iter) + : TComputationValue<TIterator>(memInfo) + , BlockState_(std::move(blockState)) + , Iter_(std::move(iter)) + {} + + private: + bool Next(NUdf::TUnboxedValue& value) final { + auto& blockState = *static_cast<TState*>(BlockState_.AsBoxed().Get()); + if (!blockState.HasRows()) { + NUdf::TUnboxedValue block; + if (!Iter_.Next(block)) { + return false; + } + blockState.SetBlock(std::move(block)); + } + + value = blockState.GetRow(); + return true; + } + + private: + const NUdf::TUnboxedValue BlockState_; + const NUdf::TUnboxedValue Iter_; + }; + + TListFromBlocksValue(TMemoryUsageInfo* memInfo, TComputationContext& ctx, + const TVector<TType*>& types, ui32 blockLengthIndex, NUdf::TUnboxedValue&& list + ) + : TCustomListValue(memInfo) + , CompCtx_(ctx) + , Types_(types) + , BlockLengthIndex_(blockLengthIndex) + , List_(std::move(list)) + {} + + private: + NUdf::TUnboxedValue GetListIterator() const final { + auto state = CompCtx_.HolderFactory.Create<TState>(CompCtx_, Types_, BlockLengthIndex_); + return CompCtx_.HolderFactory.Create<TIterator>(std::move(state), List_.GetListIterator()); + } + + bool HasListItems() const final { + if (!HasItems.has_value()) { + HasItems = List_.HasListItems(); + } + return *HasItems; + } + + ui64 GetListLength() const final { + if (!Length.has_value()) { + auto iter = List_.GetListIterator(); + + Length = 0; + NUdf::TUnboxedValue block; + while (iter.Next(block)) { + auto blockLengthValue = block.GetElement(BlockLengthIndex_); + *Length += GetBlockCount(blockLengthValue); + } + } + + return *Length; + } + + private: + TComputationContext& CompCtx_; + + const TVector<TType*>& Types_; + size_t BlockLengthIndex_ = 0; + + NUdf::TUnboxedValue List_; + }; + + void RegisterDependencies() const final { + this->DependsOn(List_); + } + +private: + TVector<TType*> Types_; + size_t BlockLengthIndex_ = 0; + + IComputationNode* const List_; +}; + class TPrecomputedArrowNode : public IArrowKernelComputationNode { public: TPrecomputedArrowNode(const arrow::Datum& datum, TStringBuf kernelName) @@ -1645,6 +1847,23 @@ IComputationNode* WrapWideFromBlocks(TCallable& callable, const TComputationNode return new TWideFromBlocksFlowWrapper(ctx.Mutables, wideFlow, std::move(items)); } +IComputationNode* WrapListFromBlocks(TCallable& callable, const TComputationNodeFactoryContext& ctx) { + MKQL_ENSURE(callable.GetInputsCount() == 1, "Expected 1 args, got " << callable.GetInputsCount()); + + const auto inputType = callable.GetInput(0).GetStaticType(); + MKQL_ENSURE(inputType->IsList(), "Expected List as an input"); + const auto outputType = callable.GetType()->GetReturnType(); + MKQL_ENSURE(outputType->IsList(), "Expected List as an output"); + + const auto inputItemType = AS_TYPE(TListType, inputType)->GetItemType(); + MKQL_ENSURE(inputItemType->IsStruct(), "Expected List of Struct as an input"); + const auto outputItemType = AS_TYPE(TListType, outputType)->GetItemType(); + MKQL_ENSURE(outputItemType->IsStruct(), "Expected List of Struct as an output"); + + const auto list = LocateNode(ctx.NodeLocator, callable, 0); + return new TListFromBlocksWrapper(ctx.Mutables, list, AS_TYPE(TStructType, inputItemType)); +} + IComputationNode* WrapAsScalar(TCallable& callable, const TComputationNodeFactoryContext& ctx) { MKQL_ENSURE(callable.GetInputsCount() == 1, "Expected 1 args, got " << callable.GetInputsCount()); diff --git a/yql/essentials/minikql/comp_nodes/mkql_blocks.h b/yql/essentials/minikql/comp_nodes/mkql_blocks.h index 326e25e57d9..fc718212091 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_blocks.h +++ b/yql/essentials/minikql/comp_nodes/mkql_blocks.h @@ -10,6 +10,7 @@ IComputationNode* WrapWideToBlocks(TCallable& callable, const TComputationNodeFa IComputationNode* WrapListToBlocks(TCallable& callable, const TComputationNodeFactoryContext& ctx); IComputationNode* WrapFromBlocks(TCallable& callable, const TComputationNodeFactoryContext& ctx); IComputationNode* WrapWideFromBlocks(TCallable& callable, const TComputationNodeFactoryContext& ctx); +IComputationNode* WrapListFromBlocks(TCallable& callable, const TComputationNodeFactoryContext& ctx); IComputationNode* WrapAsScalar(TCallable& callable, const TComputationNodeFactoryContext& ctx); IComputationNode* WrapReplicateScalar(TCallable& callable, const TComputationNodeFactoryContext& ctx); IComputationNode* WrapBlockExpandChunked(TCallable& callable, const TComputationNodeFactoryContext& ctx); diff --git a/yql/essentials/minikql/comp_nodes/mkql_factory.cpp b/yql/essentials/minikql/comp_nodes/mkql_factory.cpp index 9615a5d4d78..e32aaf1fd79 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_factory.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_factory.cpp @@ -291,6 +291,7 @@ struct TCallableComputationNodeBuilderFuncMapFiller { {"WideTopSortBlocks", &WrapWideTopSortBlocks}, {"WideSortBlocks", &WrapWideSortBlocks}, {"ListToBlocks", &WrapListToBlocks}, + {"ListFromBlocks", &WrapListFromBlocks}, {"AsScalar", &WrapAsScalar}, {"ReplicateScalar", &WrapReplicateScalar}, {"BlockCoalesce", &WrapBlockCoalesce}, diff --git a/yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp b/yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp new file mode 100644 index 00000000000..1a6a044bcf7 --- /dev/null +++ b/yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp @@ -0,0 +1,295 @@ +#include <yql/essentials/minikql/comp_nodes/mkql_block_coalesce.h> + +#include <yql/essentials/core/arrow_kernels/request/request.h> +#include <yql/essentials/minikql/comp_nodes/mkql_block_coalesce_blending_helper.h> +#include <yql/essentials/minikql/comp_nodes/ut/mkql_computation_node_ut.h> +#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h> +#include <yql/essentials/minikql/computation/mkql_block_builder.h> +#include <yql/essentials/ast/yql_expr_builder.h> +#include <yql/essentials/public/udf/arrow/memory_pool.h> +#include <yql/essentials/minikql/mkql_node_cast.h> +#include <yql/essentials/minikql/arrow/arrow_util.h> +#include <yql/essentials/core/arrow_kernels/request/request.h> +#include <yql/essentials/core/arrow_kernels/registry/registry.h> + +#include <arrow/compute/exec_internal.h> + +namespace NKikimr::NMiniKQL { + +namespace { + +#define UNIT_TEST_WITH_INTEGER(TestName) \ + template <typename TTestType> \ + void TestName##Execute(NUnitTest::TTestContext& ut_context Y_DECLARE_UNUSED); \ + Y_UNIT_TEST(TestName##i8) { \ + TestName##Execute<i8>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##ui8) { \ + TestName##Execute<ui8>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##i16) { \ + TestName##Execute<i16>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##ui16) { \ + TestName##Execute<ui16>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##i32) { \ + TestName##Execute<i32>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##ui32) { \ + TestName##Execute<ui32>(ut_context); \ + } \ + \ + template <typename TTestType> \ + void TestName##Execute(NUnitTest::TTestContext& ut_context Y_DECLARE_UNUSED) + +template <typename T> +arrow::Datum GenerateArray(TTypeInfoHelper& typeInfoHelper, TType* type, std::vector<TMaybe<T>>& array, size_t offset) { + auto rightArrayBuilder = MakeArrayBuilder(typeInfoHelper, type, *NYql::NUdf::GetYqlMemoryPool(), array.size() + offset, nullptr); + for (size_t i = 0; i < offset; i++) { + if (array[0]) { + rightArrayBuilder->Add(TBlockItem(array[0].GetRef())); + } else { + rightArrayBuilder->Add(TBlockItem()); + } + } + + for (size_t i = 0; i < array.size(); i++) { + if (array[i]) { + rightArrayBuilder->Add(TBlockItem(array[i].GetRef())); + } else { + rightArrayBuilder->Add(TBlockItem()); + } + }; + + arrow::Datum resultArray = rightArrayBuilder->Build(/*finish=*/true); + resultArray.array()->offset += offset; + resultArray.array()->null_count = arrow::kUnknownNullCount; + resultArray.array()->length -= offset; + return resultArray; +} + +enum class ERightOperandType { + SCALAR, + ARRAY, + OPTIONAL_ARRAY, + OPTIONAL_SCALAR +}; + +template <typename T, ERightOperandType rightType = ERightOperandType::ARRAY> +void TestBlockCoalesceForVector(std::vector<TMaybe<T>> left, std::vector<TMaybe<T>> right, std::vector<TMaybe<T>> expected, size_t leftOffset, size_t rightOffset) { + TSetup<false> setup; + NYql::TExprContext exprCtx; + auto* type = setup.PgmBuilder->NewDataType(NUdf::TDataType<T>::Id); + auto* typeNode = exprCtx.template MakeType<NYql::TBlockExprType>( + exprCtx.template MakeType<NYql::TDataExprType>(NUdf::TDataType<T>::Slot)); + + auto* optType = setup.PgmBuilder->NewOptionalType(type); + auto* optTypeNode = exprCtx.template MakeType<NYql::TBlockExprType>( + exprCtx.template MakeType<NYql::TOptionalExprType>( + exprCtx.template MakeType<NYql::TDataExprType>(NUdf::TDataType<T>::Slot))); + if (rightType == ERightOperandType::OPTIONAL_ARRAY || rightType == ERightOperandType::OPTIONAL_SCALAR) { + // Make both operands optional. + type = optType; + typeNode = optTypeNode; + } + TTypeInfoHelper typeInfoHelper; + arrow::Datum leftOperand = GenerateArray(typeInfoHelper, optType, left, leftOffset); + + arrow::compute::ExecContext execCtx; + arrow::compute::KernelContext ctx(&execCtx); + + arrow::Datum rightOperand; + if constexpr (rightType == ERightOperandType::SCALAR) { + rightOperand = MakeScalarDatum<T>(right[0].GetRef()); + } else if constexpr (rightType == ERightOperandType::OPTIONAL_SCALAR) { + if (right[0]) { + rightOperand = MakeScalarDatum<T>(right[0].GetRef()); + } else { + rightOperand = MakeScalarDatum<T>(0); + rightOperand.scalar()->is_valid = false; + } + } else { + rightOperand = GenerateArray(typeInfoHelper, type, right, rightOffset); + } + auto bi = arrow::compute::detail::ExecBatchIterator::Make({leftOperand, rightOperand}, 1000).ValueOrDie(); + arrow::compute::ExecBatch batch; + UNIT_ASSERT(bi->Next(&batch)); + std::shared_ptr<arrow::DataType> arrowType; + UNIT_ASSERT(ConvertArrowType(type, arrowType, [](TType*) {})); + arrow::Datum out; + auto registry = CreateFunctionRegistry(CreateBuiltinRegistry()); + NYql::TKernelRequestBuilder b(*registry); + + b.AddBinaryOp(NYql::TKernelRequestBuilder::EBinaryOp::Coalesce, optTypeNode, typeNode, optTypeNode); + auto serializedNode = b.Serialize(); + auto nodeFactory = GetBuiltinFactory(); + auto kernel = NYql::LoadKernels(serializedNode, *registry, nodeFactory); + Y_ENSURE(kernel.size() == 1); + Y_ENSURE(kernel[0]->exec(&ctx, batch, &out).ok()); + + arrow::Datum expectedArrowArray = GenerateArray(typeInfoHelper, type, expected, 0); + UNIT_ASSERT_EQUAL_C(out, expectedArrowArray, "Expected : " << expectedArrowArray.make_array()->ToString() << "\n but got : " << out.make_array()->ToString()); +} + +template <typename T, ERightOperandType rightType = ERightOperandType::ARRAY> +void TestBlockCoalesce(std::vector<TMaybe<T>> left, std::vector<TMaybe<T>> right, std::vector<TMaybe<T>> expected) { + // First test different offsets. + for (size_t leftOffset = 0; leftOffset < 10; leftOffset++) { + for (size_t rightOffset = 0; rightOffset < 10; rightOffset++) { + TestBlockCoalesceForVector<T, rightType>(left, right, expected, leftOffset, rightOffset); + } + } + + // Second test different sizes. + // Also test only small subset of offsets to prevent a combinatorial explosion. + while (left.size() > 1 || right.size() > 1 || expected.size() > 1) { + for (size_t leftOffset = 0; leftOffset < 2; leftOffset++) { + for (size_t rightOffset = 0; rightOffset < 2; rightOffset++) { + TestBlockCoalesceForVector<T, rightType>(left, right, expected, leftOffset, rightOffset); + } + } + if (left.size() > 1) { + left.pop_back(); + } + if (right.size() > 1) { + right.pop_back(); + } + if (expected.size() > 1) { + expected.pop_back(); + } + } +} + +void BlockCoalesceGraphTest(size_t length, size_t offset) { + TSetup<false> setup; + TProgramBuilder& pb = *setup.PgmBuilder; + + const auto ui32Type = pb.NewDataType(NUdf::TDataType<ui32>::Id); + const auto optui32Type = pb.NewOptionalType(ui32Type); + + const auto inputTupleType = pb.NewTupleType({ui32Type, optui32Type}); + const auto outputTupleType = pb.NewTupleType({ui32Type}); + + TRuntimeNode::TList right; + TVector<bool> isNull; + + const auto drng = CreateDeterministicRandomProvider(1); + std::vector<ui32> rightValues; + for (size_t i = 0; i < length; i++) { + const ui32 randomValue = drng->GenRand(); + const auto maybeNull = (randomValue % 2 == 0) + ? pb.NewOptional(pb.NewDataLiteral<ui32>(randomValue / 2)) + : pb.NewEmptyOptionalDataLiteral(NUdf::TDataType<ui32>::Id); + + const auto inputTuple = pb.NewTuple(inputTupleType, { + pb.NewDataLiteral<ui32>(i), + maybeNull, + }); + + right.push_back(inputTuple); + rightValues.push_back(randomValue / 2); + isNull.push_back((randomValue % 2) != 0); + } + + const auto list = pb.NewList(inputTupleType, std::move(right)); + + auto node = pb.ToFlow(list); + node = pb.ExpandMap(node, [&](TRuntimeNode item) -> TRuntimeNode::TList { + return { + pb.Nth(item, 0), + pb.Nth(item, 1), + }; + }); + + node = pb.ToFlow(pb.WideToBlocks(pb.FromFlow(node))); + if (offset > 0) { + node = pb.WideSkipBlocks(node, pb.NewDataLiteral<ui64>(offset)); + } + node = pb.WideMap(node, [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { + Y_ENSURE(items.size() == 3); + return { + pb.BlockCoalesce(items[1], items[0]), + items[2]}; + }); + + node = pb.ToFlow(pb.WideFromBlocks(pb.FromFlow(node))); + node = pb.NarrowMap(node, [&](TRuntimeNode::TList items) -> TRuntimeNode { + return pb.NewTuple(outputTupleType, {items[0]}); + }); + + const auto pgmReturn = pb.Collect(node); + const auto graph = setup.BuildGraph(pgmReturn); + + const auto iterator = graph->GetValue().GetListIterator(); + for (size_t i = 0; i < length; i++) { + if (i < offset) { + continue; + } + NUdf::TUnboxedValue outputTuple; + UNIT_ASSERT(iterator.Next(outputTuple)); + if (isNull[i]) { + UNIT_ASSERT_EQUAL(outputTuple.GetElement(0).Get<ui32>(), i); + } else { + UNIT_ASSERT_EQUAL(outputTuple.GetElement(0).Get<ui32>(), rightValues[i]); + } + } +} + +} // namespace + +Y_UNIT_TEST_SUITE(TMiniKQLBlockCoalesceTest) { + +Y_UNIT_TEST(CoalesceGraphTest) { + for (auto offset : {0, 1, 2, 3, 5, 7, 8, 11, 14, 16}) { + BlockCoalesceGraphTest(1000, offset); + } +} + +UNIT_TEST_WITH_INTEGER(KernelRightIsNotNullArray) { + auto max = std::numeric_limits<TTestType>::max(); + auto min = std::numeric_limits<TTestType>::min(); + TestBlockCoalesce<TTestType, ERightOperandType::ARRAY>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20}, + {101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120}, + {101, 2, 3, 104, 5, 6, 7, max, 9, 110, 11, 12, 13, 114, 115, 116, min, 118, 19, 20}); +} + +UNIT_TEST_WITH_INTEGER(KernelRightIsScalar) { + auto max = std::numeric_limits<TTestType>::max(); + auto min = std::numeric_limits<TTestType>::min(); + + TestBlockCoalesce<TTestType, ERightOperandType::SCALAR>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20}, + {77}, + {77, 2, 3, 77, 5, 6, 7, max, 9, 77, 11, 12, 13, 77, 77, 77, min, 77, 19, 20}); +} + +UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalArray) { + auto max = std::numeric_limits<TTestType>::max(); + auto min = std::numeric_limits<TTestType>::min(); + + TestBlockCoalesce<TTestType, ERightOperandType::OPTIONAL_ARRAY>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20}, + {Nothing(), 102, Nothing(), 104, Nothing(), 106, 107, 108, 109, 110, 111, 112, 113, 114, Nothing(), 116, 117, 118, Nothing(), 120}, + {Nothing(), 2, 3, 104, 5, 6, 7, max, 9, 110, 11, 12, 13, 114, Nothing(), 116, min, 118, 19, 20}); +} + +UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalInvalidScalar) { + auto max = std::numeric_limits<TTestType>::max(); + auto min = std::numeric_limits<TTestType>::min(); + + TestBlockCoalesce<TTestType, ERightOperandType::OPTIONAL_SCALAR>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20}, + {Nothing()}, + {Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20}); +} + +UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalValidScalar) { + auto max = std::numeric_limits<TTestType>::max(); + auto min = std::numeric_limits<TTestType>::min(); + + TestBlockCoalesce<TTestType, ERightOperandType::OPTIONAL_SCALAR>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20}, + {77}, + {77, 2, 3, 77, 5, 6, 7, max, 9, 77, 11, 12, 13, 77, 77, 77, min, 77, 19, 20}); +} + +} // Y_UNIT_TEST_SUITE(TMiniKQLBlockCoalesceTest) + +} // namespace NKikimr::NMiniKQL diff --git a/yql/essentials/minikql/comp_nodes/ut/mkql_block_exists_ut.cpp b/yql/essentials/minikql/comp_nodes/ut/mkql_block_exists_ut.cpp index 373ef67f4b1..3b9d41e2b9b 100644 --- a/yql/essentials/minikql/comp_nodes/ut/mkql_block_exists_ut.cpp +++ b/yql/essentials/minikql/comp_nodes/ut/mkql_block_exists_ut.cpp @@ -23,7 +23,7 @@ void DoBlockExistsOffset(size_t length, size_t offset) { TVector<bool> isNull; static_assert(MaxBlockSizeInBytes % 4 == 0); - const auto drng = CreateDeterministicRandomProvider(std::time(nullptr)); + const auto drng = CreateDeterministicRandomProvider(1); for (size_t i = 0; i < length; i++) { const ui64 randomValue = drng->GenRand(); diff --git a/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp b/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp index 15dd02bf30f..c2faa29bed5 100644 --- a/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp +++ b/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp @@ -80,6 +80,71 @@ namespace { }); })); } + + // Hand-made variant using WideToBlocks (in order to test ListFromBlocks by well-tested nodes rather than actual ListToBlocks) + TRuntimeNode ListToBlocks(TProgramBuilder& pb, TRuntimeNode list) { + const auto wideBlocksStream = pb.WideToBlocks(pb.FromFlow(pb.ExpandMap(pb.ToFlow(list), [&](TRuntimeNode item) -> TRuntimeNode::TList { + return { + pb.Member(item, "key"), + pb.Member(item, "value") + }; + }))); + + return pb.Collect(pb.NarrowMap(pb.ToFlow(wideBlocksStream), [&](TRuntimeNode::TList items) -> TRuntimeNode { + return pb.NewStruct({ + {"key", items[0]}, + {"value", items[1]}, + {NYql::BlockLengthColumnName, items[2]} + }); + })); + } + + using TListTransformer = std::function<TRuntimeNode(TProgramBuilder&, TRuntimeNode)>; + + void DoTestListToAndFromBlocks(TSetup<false>& setup, TListTransformer listToBlocksImpl, TListTransformer listFromBlocksImpl) { + constexpr size_t TEST_SIZE = 1 << 16; + const TString hugeString(128, '1'); + + TProgramBuilder& pb = *setup.PgmBuilder; + + const auto ui64Type = pb.NewDataType(NUdf::TDataType<ui64>::Id); + const auto strType = pb.NewDataType(NUdf::TDataType<char*>::Id); + + const auto structType = pb.NewStructType({ + {"key", ui64Type}, + {"value", strType}, + }); + + TVector<TRuntimeNode> listItems; + for (size_t i = 0; i < TEST_SIZE; i++) { + const auto str = hugeString + ToString(i); + listItems.push_back(pb.NewStruct({ + {"key", pb.NewDataLiteral<ui64>(i)}, + // Huge string is used to make less rows fit into one block (in order to test output slicing) + {"value", pb.NewDataLiteral<NUdf::EDataSlot::String>(str)}, + })); + } + + const auto list = pb.NewList(structType, listItems); + const auto blockList = listToBlocksImpl(pb, list); + + const auto graph = setup.BuildGraph(listFromBlocksImpl(pb, blockList)); + const auto iterator = graph->GetValue().GetListIterator(); + + NUdf::TUnboxedValue structValue; + for (size_t i = 0; i < TEST_SIZE; i++) { + const auto str = hugeString + ToString(i); + UNIT_ASSERT(iterator.Next(structValue)); + + const auto key = structValue.GetElement(0); + UNIT_ASSERT_VALUES_EQUAL(key.Get<ui64>(), i); + const auto value = structValue.GetElement(1); + UNIT_ASSERT_VALUES_EQUAL(std::string_view(value.AsStringRef()), str); + } + + UNIT_ASSERT(!iterator.Next(structValue)); + UNIT_ASSERT(!iterator.Next(structValue)); + } } Y_UNIT_TEST_SUITE(TMiniKQLBlocksTest) { @@ -170,49 +235,13 @@ Y_UNIT_TEST_LLVM(TestWideToBlocks) { } Y_UNIT_TEST(TestListToBlocks) { - constexpr size_t TEST_SIZE = 1 << 16; - const TString hugeString(128, '1'); - TSetup<false> setup; - TProgramBuilder& pb = *setup.PgmBuilder; - const auto ui64Type = pb.NewDataType(NUdf::TDataType<ui64>::Id); - const auto strType = pb.NewDataType(NUdf::TDataType<char*>::Id); - - const auto structType = pb.NewStructType({ - {"key", ui64Type}, - {"value", strType}, - }); - - TVector<TRuntimeNode> listItems; - for (size_t i = 0; i < TEST_SIZE; i++) { - const auto str = hugeString + ToString(i); - listItems.push_back(pb.NewStruct({ - {"key", pb.NewDataLiteral<ui64>(i)}, - // Huge string is used to make less rows fit into one block (in order to test output slicing) - {"value", pb.NewDataLiteral<NUdf::EDataSlot::String>(str)}, - })); - } - - const auto list = pb.NewList(structType, listItems); - const auto blockList = pb.ListToBlocks(list); - - const auto graph = setup.BuildGraph(ListFromBlocks(pb, blockList)); - const auto iterator = graph->GetValue().GetListIterator(); - - NUdf::TUnboxedValue structValue; - for (size_t i = 0; i < TEST_SIZE; i++) { - const auto str = hugeString + ToString(i); - UNIT_ASSERT(iterator.Next(structValue)); - - const auto key = structValue.GetElement(0); - UNIT_ASSERT_VALUES_EQUAL(key.Get<ui64>(), i); - const auto value = structValue.GetElement(1); - UNIT_ASSERT_VALUES_EQUAL(std::string_view(value.AsStringRef()), str); - } - - UNIT_ASSERT(!iterator.Next(structValue)); - UNIT_ASSERT(!iterator.Next(structValue)); + DoTestListToAndFromBlocks( + setup, + [] (TProgramBuilder& pb, TRuntimeNode list) { return pb.ListToBlocks(list); }, + ListFromBlocks + ); } Y_UNIT_TEST(TestListToBlocksMultiUsage) { @@ -700,6 +729,16 @@ Y_UNIT_TEST_LLVM(TestWideFromBlocks) { UNIT_ASSERT(!iterator.Next(item)); } +Y_UNIT_TEST(TestListFromBlocks) { + TSetup<false> setup; + + DoTestListToAndFromBlocks( + setup, + ListToBlocks, + [] (TProgramBuilder& pb, TRuntimeNode list) { return pb.ListFromBlocks(list); } + ); +} + Y_UNIT_TEST_LLVM(TestWideToAndFromBlocks) { TSetup<LLVM> setup; TProgramBuilder& pb = *setup.PgmBuilder; @@ -741,6 +780,16 @@ Y_UNIT_TEST_LLVM(TestWideToAndFromBlocks) { UNIT_ASSERT(!iterator.Next(item)); UNIT_ASSERT(!iterator.Next(item)); } + +Y_UNIT_TEST(TestListToAndFromBlocks) { + TSetup<false> setup; + + DoTestListToAndFromBlocks( + setup, + [] (TProgramBuilder& pb, TRuntimeNode list) { return pb.ListToBlocks(list); }, + [] (TProgramBuilder& pb, TRuntimeNode list) { return pb.ListFromBlocks(list); } + ); +} } Y_UNIT_TEST_SUITE(TMiniKQLDirectKernelTest) { diff --git a/yql/essentials/minikql/comp_nodes/ut/ya.make.inc b/yql/essentials/minikql/comp_nodes/ut/ya.make.inc index 46a1ad7f118..73c063d865c 100644 --- a/yql/essentials/minikql/comp_nodes/ut/ya.make.inc +++ b/yql/essentials/minikql/comp_nodes/ut/ya.make.inc @@ -2,6 +2,10 @@ FORK_SUBTESTS() SPLIT_FACTOR(60) +IF (SANITIZER_TYPE == "address") + SPLIT_FACTOR(100) +ENDIF() + IF (SANITIZER_TYPE == "thread" OR WITH_VALGRIND) TIMEOUT(3600) SIZE(LARGE) @@ -22,6 +26,7 @@ SET(ORIG_SOURCES mkql_test_factory.cpp mkql_bit_utils_ut.cpp mkql_block_compress_ut.cpp + mkql_block_coalesce_ut.cpp mkql_block_exists_ut.cpp mkql_block_skiptake_ut.cpp mkql_block_map_join_ut_utils.cpp @@ -75,6 +80,8 @@ SET(ORIG_SOURCES ) PEERDIR( + yql/essentials/core/arrow_kernels/request + yql/essentials/core/arrow_kernels/registry yql/essentials/public/udf yql/essentials/public/udf/arrow yql/essentials/public/udf/service/exception_policy diff --git a/yql/essentials/minikql/comp_nodes/ya.make b/yql/essentials/minikql/comp_nodes/ya.make index 93f0314c0ed..e726621ec07 100644 --- a/yql/essentials/minikql/comp_nodes/ya.make +++ b/yql/essentials/minikql/comp_nodes/ya.make @@ -17,4 +17,5 @@ RECURSE( RECURSE_FOR_TESTS( llvm16/ut + benchmark ) diff --git a/yql/essentials/minikql/computation/mkql_block_impl.cpp b/yql/essentials/minikql/computation/mkql_block_impl.cpp index ba9238e45a7..e960e8562e3 100644 --- a/yql/essentials/minikql/computation/mkql_block_impl.cpp +++ b/yql/essentials/minikql/computation/mkql_block_impl.cpp @@ -220,7 +220,11 @@ std::vector<arrow::ValueDescr> ToValueDescr(const TVector<TType*>& types) { std::vector<arrow::ValueDescr> res; res.reserve(types.size()); for (const auto& type : types) { - res.emplace_back(ToValueDescr(type)); + if (type) { + res.emplace_back(ToValueDescr(type)); + } else { + res.emplace_back(); + } } return res; diff --git a/yql/essentials/minikql/invoke_builtins/mkql_builtins_datetime.h b/yql/essentials/minikql/invoke_builtins/mkql_builtins_datetime.h index c1b90de073f..1cef07bec1b 100644 --- a/yql/essentials/minikql/invoke_builtins/mkql_builtins_datetime.h +++ b/yql/essentials/minikql/invoke_builtins/mkql_builtins_datetime.h @@ -160,7 +160,7 @@ NUdf::TDataType<NUdf::TInterval64>::TLayout FromScaledDate<NUdf::TDataType<NUdf: template<typename TDateType> inline bool IsBadDateTime(TScaledDate val) { static_assert(TDateType::Features & (NYql::NUdf::DateType | NYql::NUdf::TzDateType), "Date type expected"); - if constexpr (TDateType::Features & NYql::NUdf::BigDateType) { + if constexpr (TDateType::Features & NYql::NUdf::ExtDateType) { return val < NUdf::MIN_TIMESTAMP64 || val > NUdf::MAX_TIMESTAMP64; } else { return val < 0 || val >= TScaledDate(NUdf::MAX_TIMESTAMP); @@ -170,7 +170,7 @@ inline bool IsBadDateTime(TScaledDate val) { template<typename TDateType> inline bool IsBadInterval(TScaledDate val) { static_assert(TDateType::Features & NYql::NUdf::TimeIntervalType, "Interval type expected"); - if constexpr (TDateType::Features & NYql::NUdf::BigDateType) { + if constexpr (TDateType::Features & NYql::NUdf::ExtDateType) { return val < -NUdf::MAX_INTERVAL64 || val > NUdf::MAX_INTERVAL64; } else { return val <= -TScaledDate(NUdf::MAX_TIMESTAMP) || val >= TScaledDate(NUdf::MAX_TIMESTAMP); @@ -201,7 +201,7 @@ inline Value* GenIsInt64Overflow(Value* value, LLVMContext &context, BasicBlock* template<typename TDateType> inline Value* GenIsBadDateTime(Value* val, LLVMContext &context, BasicBlock* block) { static_assert(TDateType::Features & (NYql::NUdf::DateType | NYql::NUdf::TzDateType), "Date type expected"); - if constexpr (TDateType::Features & NYql::NUdf::BigDateType) { + if constexpr (TDateType::Features & NYql::NUdf::ExtDateType) { auto lt = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_SLT, val, ConstantInt::get(Type::getInt64Ty(context), NUdf::MIN_TIMESTAMP64), "lt", block); auto ge = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_SGT, val, ConstantInt::get(Type::getInt64Ty(context), NUdf::MAX_TIMESTAMP64), "ge", block); return BinaryOperator::CreateOr(lt, ge, "or", block); @@ -215,10 +215,10 @@ inline Value* GenIsBadDateTime(Value* val, LLVMContext &context, BasicBlock* blo template<typename TDateType> inline Value* GenIsBadInterval(Value* val, LLVMContext &context, BasicBlock* block) { static_assert(TDateType::Features & NYql::NUdf::TimeIntervalType, "Interval type expected"); - constexpr i64 lowerBound = (TDateType::Features & NYql::NUdf::BigDateType) + constexpr i64 lowerBound = (TDateType::Features & NYql::NUdf::ExtDateType) ? (-NUdf::MAX_INTERVAL64 - 1) : -(i64)NUdf::MAX_TIMESTAMP; - constexpr i64 upperBound = (TDateType::Features & NYql::NUdf::BigDateType) + constexpr i64 upperBound = (TDateType::Features & NYql::NUdf::ExtDateType) ? (NUdf::MAX_INTERVAL64 + 1) : (i64)NUdf::MAX_TIMESTAMP; auto le = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_SLE, val, ConstantInt::get(Type::getInt64Ty(context), lowerBound), "le", block); diff --git a/yql/essentials/minikql/mkql_program_builder.cpp b/yql/essentials/minikql/mkql_program_builder.cpp index 27c4229f996..5e8e89da4ca 100644 --- a/yql/essentials/minikql/mkql_program_builder.cpp +++ b/yql/essentials/minikql/mkql_program_builder.cpp @@ -6,7 +6,6 @@ #include "yql/essentials/minikql/mkql_function_registry.h" #include "yql/essentials/minikql/mkql_utils.h" #include "yql/essentials/minikql/mkql_type_builder.h" -#include "yql/essentials/core/sql_types/block.h" #include "yql/essentials/core/sql_types/match_recognize.h" #include "yql/essentials/core/sql_types/time_order_recover.h" #include <yql/essentials/parser/pg_catalog/catalog.h> @@ -378,7 +377,7 @@ TType* TProgramBuilder::BuildArithmeticCommonType(TType* type1, TType* type2) { const auto features2 = NUdf::GetDataTypeInfo(*data2->GetDataSlot()).Features; const bool isOptional = isOptional1 || isOptional2; if (features1 & features2 & NUdf::EDataTypeFeatures::TimeIntervalType) { - return NewOptionalType(features1 & NUdf::EDataTypeFeatures::BigDateType ? data1 : data2); + return NewOptionalType(features1 & NUdf::EDataTypeFeatures::ExtDateType ? data1 : data2); } else if (features1 & NUdf::EDataTypeFeatures::TimeIntervalType) { return NewOptionalType(features2 & NUdf::EDataTypeFeatures::IntegralType ? data1 : data2); } else if (features2 & NUdf::EDataTypeFeatures::TimeIntervalType) { @@ -387,7 +386,7 @@ TType* TProgramBuilder::BuildArithmeticCommonType(TType* type1, TType* type2) { features1 & (NUdf::EDataTypeFeatures::DateType | NUdf::EDataTypeFeatures::TzDateType) && features2 & (NUdf::EDataTypeFeatures::DateType | NUdf::EDataTypeFeatures::TzDateType) ) { - const auto used = ((features1 | features2) & NUdf::EDataTypeFeatures::BigDateType) + const auto used = ((features1 | features2) & NUdf::EDataTypeFeatures::ExtDateType) ? NewDataType(NUdf::EDataSlot::Interval64) : NewDataType(NUdf::EDataSlot::Interval); return isOptional ? NewOptionalType(used) : used; @@ -1494,24 +1493,6 @@ TRuntimeNode TProgramBuilder::WideToBlocks(TRuntimeNode stream) { return TRuntimeNode(callableBuilder.Build(), false); } -TType* TProgramBuilder::BuildBlockStructType(const TStructType* structType) { - std::vector<std::pair<std::string_view, TType*>> blockStructItems; - blockStructItems.reserve(structType->GetMembersCount() + 1); - for (size_t i = 0; i < structType->GetMembersCount(); i++) { - auto itemType = structType->GetMemberType(i); - MKQL_ENSURE(!itemType->IsBlock() , "Block types are not allowed here"); - blockStructItems.emplace_back( - structType->GetMemberName(i), - NewBlockType(itemType, TBlockType::EShape::Many) - ); - } - blockStructItems.emplace_back( - NYql::BlockLengthColumnName, - NewBlockType(NewDataType(NUdf::TDataType<ui64>::Id), TBlockType::EShape::Scalar) - ); - return NewStructType(blockStructItems); -} - TRuntimeNode TProgramBuilder::ListToBlocks(TRuntimeNode list) { if constexpr (RuntimeVersion < 60U) { THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__; @@ -1564,6 +1545,24 @@ TRuntimeNode TProgramBuilder::WideFromBlocks(TRuntimeNode stream) { return TRuntimeNode(callableBuilder.Build(), false); } +TRuntimeNode TProgramBuilder::ListFromBlocks(TRuntimeNode list) { + if constexpr (RuntimeVersion < 61U) { + THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__; + } + + MKQL_ENSURE(list.GetStaticType()->IsList(), "Expected List as input type"); + const auto listType = AS_TYPE(TListType, list.GetStaticType()); + + MKQL_ENSURE(listType->GetItemType()->IsStruct(), "Expected List of Struct as input type"); + const auto itemBlockStructType = AS_TYPE(TStructType, listType->GetItemType()); + + const auto itemStructType = ValidateBlockStructType(itemBlockStructType); + + TCallableBuilder callableBuilder(Env, __func__, NewListType(itemStructType)); + callableBuilder.Add(list); + return TRuntimeNode(callableBuilder.Build(), false); +} + TRuntimeNode TProgramBuilder::WideSkipBlocks(TRuntimeNode flow, TRuntimeNode count) { return BuildWideSkipTakeBlocks(__func__, flow, count); } diff --git a/yql/essentials/minikql/mkql_program_builder.h b/yql/essentials/minikql/mkql_program_builder.h index fb0b097dfaa..f71d47a3872 100644 --- a/yql/essentials/minikql/mkql_program_builder.h +++ b/yql/essentials/minikql/mkql_program_builder.h @@ -244,6 +244,7 @@ public: TRuntimeNode ListToBlocks(TRuntimeNode list); TRuntimeNode FromBlocks(TRuntimeNode flow); TRuntimeNode WideFromBlocks(TRuntimeNode flow); + TRuntimeNode ListFromBlocks(TRuntimeNode list); TRuntimeNode WideSkipBlocks(TRuntimeNode flow, TRuntimeNode count); TRuntimeNode WideTakeBlocks(TRuntimeNode flow, TRuntimeNode count); TRuntimeNode WideTopBlocks(TRuntimeNode flow, TRuntimeNode count, const std::vector<std::pair<ui32, TRuntimeNode>>& keys); @@ -862,7 +863,6 @@ private: TType* ChooseCommonType(TType* type1, TType* type2); TType* BuildArithmeticCommonType(TType* type1, TType* type2); TType* BuildWideBlockType(const TArrayRef<TType* const>& wideComponents); - TType* BuildBlockStructType(const TStructType* structType); bool IsNull(TRuntimeNode arg); protected: diff --git a/yql/essentials/minikql/mkql_runtime_version.h b/yql/essentials/minikql/mkql_runtime_version.h index df026ed9015..ed228c7b03d 100644 --- a/yql/essentials/minikql/mkql_runtime_version.h +++ b/yql/essentials/minikql/mkql_runtime_version.h @@ -24,7 +24,7 @@ namespace NMiniKQL { // 1. Bump this version every time incompatible runtime nodes are introduced. // 2. Make sure you provide runtime node generation for previous runtime versions. #ifndef MKQL_RUNTIME_VERSION -#define MKQL_RUNTIME_VERSION 60U +#define MKQL_RUNTIME_VERSION 61U #endif // History: diff --git a/yql/essentials/minikql/mkql_type_builder.cpp b/yql/essentials/minikql/mkql_type_builder.cpp index e37bf91b91f..c9d6e363b4f 100644 --- a/yql/essentials/minikql/mkql_type_builder.cpp +++ b/yql/essentials/minikql/mkql_type_builder.cpp @@ -2806,6 +2806,48 @@ TType* TTypeBuilder::NewVariantType(TType* underlyingType) const { return TVariantType::Create(underlyingType, Env); } +TType* TTypeBuilder::ValidateBlockStructType(const TStructType* structType) const { + MKQL_ENSURE(structType->GetMembersCount() > 0, "Expected at least one column"); + + std::vector<std::pair<std::string_view, TType*>> outStructItems; + outStructItems.reserve(structType->GetMembersCount() - 1); + bool hasBlockLengthColumn = false; + for (size_t i = 0; i < structType->GetMembersCount(); i++) { + auto blockType = AS_TYPE(TBlockType, structType->GetMemberType(i)); + bool isScalar = blockType->GetShape() == TBlockType::EShape::Scalar; + auto itemType = blockType->GetItemType(); + if (structType->GetMemberName(i) == NYql::BlockLengthColumnName) { + MKQL_ENSURE(isScalar, "Block length column should be scalar"); + MKQL_ENSURE(AS_TYPE(TDataType, itemType)->GetSchemeType() == NUdf::TDataType<ui64>::Id, "Expected Uint64"); + + MKQL_ENSURE(!hasBlockLengthColumn, "Block struct must contain only one block length column"); + hasBlockLengthColumn = true; + } else { + outStructItems.emplace_back(structType->GetMemberName(i), itemType); + } + } + MKQL_ENSURE(hasBlockLengthColumn, "Block struct must contain block length column"); + return NewStructType(outStructItems); +} + +TType* TTypeBuilder::BuildBlockStructType(const TStructType* structType) const { + std::vector<std::pair<std::string_view, TType*>> blockStructItems; + blockStructItems.reserve(structType->GetMembersCount() + 1); + for (size_t i = 0; i < structType->GetMembersCount(); i++) { + auto itemType = structType->GetMemberType(i); + MKQL_ENSURE(!itemType->IsBlock() , "Block types are not allowed here"); + blockStructItems.emplace_back( + structType->GetMemberName(i), + NewBlockType(itemType, TBlockType::EShape::Many) + ); + } + blockStructItems.emplace_back( + NYql::BlockLengthColumnName, + NewBlockType(NewDataType(NUdf::TDataType<ui64>::Id), TBlockType::EShape::Scalar) + ); + return NewStructType(blockStructItems); +} + void RebuildTypeIndex() { HugeSingleton<TPgTypeIndex>()->Rebuild(); } diff --git a/yql/essentials/minikql/mkql_type_builder.h b/yql/essentials/minikql/mkql_type_builder.h index f1ec8a9e4f3..678db9157ca 100644 --- a/yql/essentials/minikql/mkql_type_builder.h +++ b/yql/essentials/minikql/mkql_type_builder.h @@ -2,6 +2,7 @@ #include "mkql_node.h" +#include <yql/essentials/core/sql_types/block.h> #include <yql/essentials/public/udf/udf_type_builder.h> #include <yql/essentials/public/udf/arrow/block_type_helper.h> #include <yql/essentials/parser/pg_wrapper/interface/compare.h> @@ -325,6 +326,9 @@ public: TType* NewResourceType(const std::string_view& tag) const; TType* NewVariantType(TType* underlyingType) const; + TType* BuildBlockStructType(const TStructType* structType) const; + TType* ValidateBlockStructType(const TStructType* structType) const; + protected: const TTypeEnvironment& Env; bool UseNullType = true; diff --git a/yql/essentials/public/udf/arrow/bit_util.h b/yql/essentials/public/udf/arrow/bit_util.h index 4dbe785aa75..091a47bbcc6 100644 --- a/yql/essentials/public/udf/arrow/bit_util.h +++ b/yql/essentials/public/udf/arrow/bit_util.h @@ -1,6 +1,9 @@ #pragma once #include "defs.h" +#include <yql/essentials/utils/swap_bytes.h> + +#include <array> namespace NYql { namespace NUdf { @@ -119,5 +122,69 @@ inline void CopyDenseBitmap(ui8* dst, const ui8* src, size_t srcOffset, size_t l } } +// Duplicates every bit in an 8-bit value. +// Example: 0b01010101 -> 0b0011001100110011. +Y_FORCE_INLINE ui16 ReplicateEachBitTwice(ui8 b) { + ui16 x = b; + x = (x | (x << 4)) & 0x0F0F; + x = (x | (x << 2)) & 0x3333; + x = (x | (x << 1)) & 0x5555; + return x | (x << 1); +} + +// Repeat 4 times every bit in an 8-bit value. +// Example: 0b01010101 -> 0b00001111000011110000111100001111. +Y_FORCE_INLINE ui32 ReplicateEachBitFourTimes(ui8 b) { + ui32 x = b; + x = (x | (x << 12)) & 0x000F000F; + x = (x | (x << 6)) & 0x03030303; + x = (x | (x << 3)) & 0x11111111; + x *= 0x0F; + return x; +} + +// BitToByteExpand - Expands the individual bits of an 8-bit input x into an array of 8 elements of type TType. +// Each output element corresponds to one bit from the original value, expanded (via specialized routines) to fill the entire TType +// Example: BitToByteExpand<ui8>(0b10101010) yields REVERSE({0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00}). +// Example: BitToByteExpand<ui16>(0b11000011) yields REVERSE({0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0xFFFF, 0xFFFF}). +template <typename TType> +Y_FORCE_INLINE std::array<TType, 8> BitToByteExpand(ui8 x); + +template <> +Y_FORCE_INLINE std::array<ui8, 8> BitToByteExpand(ui8 x) { + std::array<ui8, 8> result; + ui64 expanded = x; + expanded = (expanded * 0x8040201008040201ULL); + expanded &= 0x8080808080808080ULL; + expanded >>= 7; + expanded *= 0xFF; + expanded = NYql::SwapBytes(expanded); + memcpy(&result[0], &expanded, sizeof(expanded)); + return result; +} + +template <> +Y_FORCE_INLINE std::array<ui16, 8> BitToByteExpand(ui8 x) { + std::array<ui8, 8> input = BitToByteExpand<ui8>(x); + std::array<ui16, 8> output{}; + + for (size_t i = 0; i < 8; ++i) { + output[i] = ReplicateEachBitTwice(input[i]); + } + + return output; +} + +template <> +Y_FORCE_INLINE std::array<ui32, 8> BitToByteExpand(ui8 x) { + std::array<ui8, 8> input = BitToByteExpand<ui8>(x); + std::array<ui32, 8> output{}; + + for (size_t i = 0; i < 8; ++i) { + output[i] = ReplicateEachBitFourTimes(input[i]); + } + + return output; +} } } diff --git a/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp b/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp index 601d3be7c86..4af399c8deb 100644 --- a/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp +++ b/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp @@ -39,3 +39,92 @@ Y_UNIT_TEST_SUITE(CopyDenseBitmapTest) { } } } + +Y_UNIT_TEST_SUITE(BitExpanding) { + +Y_UNIT_TEST(ReplicateEachBitTwice) { + // Test case 1: All zeros + UNIT_ASSERT_EQUAL(ReplicateEachBitTwice(0x00), 0x0000); + + // Test case 2: All ones + UNIT_ASSERT_EQUAL(ReplicateEachBitTwice(0xFF), 0xFFFF); + + // Test case 3: Alternating bits + UNIT_ASSERT_EQUAL(ReplicateEachBitTwice(0x55), 0x3333); + UNIT_ASSERT_EQUAL(ReplicateEachBitTwice(0xAA), 0xCCCC); + + // Test case 4: Random pattern + UNIT_ASSERT_EQUAL(ReplicateEachBitTwice(0x3C), 0x0FF0); + + // Test case 5: Single bit set + UNIT_ASSERT_EQUAL(ReplicateEachBitTwice(0x01), 0x0003); + UNIT_ASSERT_EQUAL(ReplicateEachBitTwice(0x80), 0xC000); +} + +Y_UNIT_TEST(ReplicateEachBitFourTimes) { + // Test case 1: All zeros + UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0x00), 0x00000000); + + // Test case 2: All ones + UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0xFF), 0xFFFFFFFF); + + // Test case 3: Alternating bits + UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0x55), 0x0F0F0F0F); + UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0xAA), 0xF0F0F0F0); + + // Test case 4: Random pattern + UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0x3C), 0x00FFFF00); + + // Test case 5: Single bit set + UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0x01), 0x0000000F); + UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0x80), 0xF0000000); +} + +Y_UNIT_TEST(BitToByteExpand) { + auto testBody = [](auto n) { + using T = decltype(n); + auto max = std::numeric_limits<T>::max(); + auto min = std::numeric_limits<T>::min(); + // Test case 1: All zeros + auto result1 = BitToByteExpand<T>(min); + for (size_t i = 0; i < 8; ++i) { + UNIT_ASSERT_EQUAL(result1[i], min); + } + + // Test case 2: All ones + auto result2 = BitToByteExpand<T>(max); + for (size_t i = 0; i < 8; ++i) { + UNIT_ASSERT_EQUAL(result2[i], max); + } + + // Test case 3: Alternating bits + auto result3 = BitToByteExpand<T>(0x55); + for (size_t i = 0; i < 8; ++i) { + UNIT_ASSERT_EQUAL(result3[i], (i % 2 == 0) ? max : min); + } + + // Test case 4: Random pattern + auto result4 = BitToByteExpand<T>(0x3C); + UNIT_ASSERT_EQUAL(result4[0], min); + UNIT_ASSERT_EQUAL(result4[1], min); + UNIT_ASSERT_EQUAL(result4[2], max); + UNIT_ASSERT_EQUAL(result4[3], max); + UNIT_ASSERT_EQUAL(result4[4], max); + UNIT_ASSERT_EQUAL(result4[5], max); + UNIT_ASSERT_EQUAL(result4[6], min); + UNIT_ASSERT_EQUAL(result4[7], min); + + // Test case 5: Single bit set + auto result5 = BitToByteExpand<T>(0x80); + UNIT_ASSERT_EQUAL(result5[7], max); + for (size_t i = 0; i < 7; ++i) { + UNIT_ASSERT_EQUAL(result5[i], min); + } + }; + + testBody(ui8()); + testBody(ui16()); + testBody(ui32()); +} + +} // Y_UNIT_TEST_SUITE(BitExpanding) diff --git a/yql/essentials/public/udf/udf_data_type.h b/yql/essentials/public/udf/udf_data_type.h index 9d7e2070328..9d5d4049557 100644 --- a/yql/essentials/public/udf/udf_data_type.h +++ b/yql/essentials/public/udf/udf_data_type.h @@ -45,16 +45,9 @@ enum EDataTypeFeatures : ui32 { TzDateType = 1u << 27, DecimalType = 1u << 28, TimeIntervalType = 1u << 29, - // FIXME: Remove, when no entries in the code are left. - BigDateType = 1u << 30, ExtDateType = 1u << 30, }; -// FIXME: This static assert is vital for renaming BigDateType -// flag into ExtDateType to be in sync with naming in docs. -// Remove this assert, only when BigDateType flags is removed. -static_assert(ExtDateType == BigDateType); - template <typename T> struct TDataType; diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp index 2a16a250e54..9bba9c5e71e 100644 --- a/yql/essentials/sql/v1/complete/sql_complete.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete.cpp @@ -3,6 +3,10 @@ #include "sql_context.h" #include "string_util.h" +// FIXME(YQL-19747): unwanted dependency on a lexer implementation +#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h> + #include <util/generic/algorithm.h> #include <util/charset/utf8.h> @@ -10,8 +14,8 @@ namespace NSQLComplete { class TSqlCompletionEngine: public ISqlCompletionEngine { public: - TSqlCompletionEngine() - : ContextInference(MakeSqlContextInference()) + explicit TSqlCompletionEngine(TLexerSupplier lexer) + : ContextInference(MakeSqlContextInference(lexer)) { } @@ -68,8 +72,18 @@ namespace NSQLComplete { ISqlContextInference::TPtr ContextInference; }; + // FIXME(YQL-19747): unwanted dependency on a lexer implementation ISqlCompletionEngine::TPtr MakeSqlCompletionEngine() { - return ISqlCompletionEngine::TPtr(new TSqlCompletionEngine()); + NSQLTranslationV1::TLexers lexers; + lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory(); + lexers.Antlr4PureAnsi = NSQLTranslationV1::MakeAntlr4PureAnsiLexerFactory(); + return MakeSqlCompletionEngine([lexers = std::move(lexers)](bool ansi) { + return NSQLTranslationV1::MakeLexer(lexers, ansi, /* antlr4 = */ true, /* pure = */ true); + }); + } + + ISqlCompletionEngine::TPtr MakeSqlCompletionEngine(TLexerSupplier lexer) { + return ISqlCompletionEngine::TPtr(new TSqlCompletionEngine(lexer)); } } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_complete.h b/yql/essentials/sql/v1/complete/sql_complete.h index 99e74cce7a7..354f8ffa756 100644 --- a/yql/essentials/sql/v1/complete/sql_complete.h +++ b/yql/essentials/sql/v1/complete/sql_complete.h @@ -1,5 +1,7 @@ #pragma once +#include <yql/essentials/sql/v1/lexer/lexer.h> + #include <util/generic/string.h> #include <util/generic/vector.h> @@ -39,6 +41,11 @@ namespace NSQLComplete { virtual ~ISqlCompletionEngine() = default; }; + using TLexerSupplier = std::function<NSQLTranslation::ILexer::TPtr(bool ansi)>; + + // FIXME(YQL-19747): unwanted dependency on a lexer implementation ISqlCompletionEngine::TPtr MakeSqlCompletionEngine(); + ISqlCompletionEngine::TPtr MakeSqlCompletionEngine(TLexerSupplier lexer); + } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp index e0a012f9f6e..c65eba0e2d4 100644 --- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp @@ -1,5 +1,8 @@ #include "sql_complete.h" +#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h> + #include <library/cpp/testing/unittest/registar.h> using namespace NSQLComplete; @@ -7,6 +10,15 @@ using namespace NSQLComplete; Y_UNIT_TEST_SUITE(SqlCompleteTests) { using ECandidateKind::Keyword; + ISqlCompletionEngine::TPtr MakeSqlCompletionEngineUT() { + NSQLTranslationV1::TLexers lexers; + lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory(); + lexers.Antlr4PureAnsi = NSQLTranslationV1::MakeAntlr4PureAnsiLexerFactory(); + return MakeSqlCompletionEngine([lexers = std::move(lexers)](bool ansi) { + return NSQLTranslationV1::MakeLexer(lexers, ansi, /* antlr4 = */ true, /* pure = */ true); + }); + } + TVector<TCandidate> Complete(ISqlCompletionEngine::TPtr& engine, TStringBuf prefix) { return engine->Complete({prefix}).Candidates; } @@ -50,7 +62,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "VALUES"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {""}), expected); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" "}), expected); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" "}), expected); @@ -76,7 +88,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "USER"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"ALTER "}), expected); } @@ -99,7 +111,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "VIEW"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"CREATE "}), expected); } @@ -108,7 +120,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "FROM"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"DELETE "}), expected); } @@ -128,7 +140,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "VIEW"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"DROP "}), expected); } @@ -171,7 +183,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "VALUES"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"EXPLAIN "}), expected); } @@ -196,7 +208,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "USE"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"GRANT "}), expected); } @@ -206,7 +218,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "OR"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"INSERT "}), expected); } @@ -227,7 +239,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "VARIANT"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"PRAGMA "}), expected); } @@ -265,7 +277,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "VARIANT"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT "}), expected); } @@ -275,18 +287,18 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "OBJECT"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"UPSERT "}), expected); } Y_UNIT_TEST(UTF8Wide) { - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"\xF0\x9F\x98\x8A"}).size(), 0); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"编码"}).size(), 0); } Y_UNIT_TEST(WordBreak) { - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT ("}).size(), 28); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT (1)"}).size(), 30); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT 1;"}).size(), 35); @@ -300,7 +312,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { " Bool(field), Math::Sin(var) \n" "FROM `local/test/space/table` JOIN test;"); - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); for (std::size_t size = 0; size <= queryUtf16.size(); ++size) { const TWtringBuf prefixUtf16(queryUtf16, 0, size); @@ -314,10 +326,18 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "SELECT"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "se"), expected); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "sE"), expected); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "Se"), expected); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SE"), expected); } + + Y_UNIT_TEST(InvalidStatementsRecovery) { + auto engine = MakeSqlCompletionEngineUT(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "select select; ").size(), 35); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "select select;").size(), 35); + UNIT_ASSERT_VALUES_EQUAL_C(Complete(engine, "!;").size(), 0, "Lexer failing"); + } + } // Y_UNIT_TEST_SUITE(SqlCompleteTests) diff --git a/yql/essentials/sql/v1/complete/sql_context.cpp b/yql/essentials/sql/v1/complete/sql_context.cpp index 4195daa6d83..2bd1a2af987 100644 --- a/yql/essentials/sql/v1/complete/sql_context.cpp +++ b/yql/essentials/sql/v1/complete/sql_context.cpp @@ -3,6 +3,7 @@ #include "c3_engine.h" #include "sql_syntax.h" +#include <yql/essentials/core/issue/yql_issue.h> #include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> #include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Parser.h> #include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h> @@ -30,14 +31,19 @@ namespace NSQLComplete { TDefaultYQLGrammar>; public: - TSpecializedSqlContextInference() + explicit TSpecializedSqlContextInference(TLexerSupplier lexer) : Grammar(&GetSqlGrammar(IsAnsiLexer)) + , Lexer_(lexer(/* ansi = */ IsAnsiLexer)) , C3(ComputeC3Config()) { } TCompletionContext Analyze(TCompletionInput input) override { - auto prefix = input.Text.Head(input.CursorPosition); + TStringBuf prefix; + if (!GetC3Prefix(input, &prefix)) { + return {}; + } + auto tokens = C3.Complete(prefix); return { .Keywords = SiftedKeywords(tokens), @@ -71,6 +77,26 @@ namespace NSQLComplete { return preferredRules; } + bool GetC3Prefix(TCompletionInput input, TStringBuf* prefix) { + *prefix = input.Text.Head(input.CursorPosition); + + TVector<TString> statements; + NYql::TIssues issues; + if (!NSQLTranslationV1::SplitQueryToStatements( + TString(*prefix) + (prefix->EndsWith(';') ? ";" : ""), Lexer_, + statements, issues, /* file = */ "", + /* areBlankSkipped = */ false)) { + return false; + } + + if (statements.empty()) { + return true; + } + + *prefix = prefix->Last(statements.back().size()); + return true; + } + TVector<TString> SiftedKeywords(const TVector<TSuggestedToken>& tokens) { const auto& vocabulary = Grammar->GetVocabulary(); const auto& keywordTokens = Grammar->GetKeywordTokens(); @@ -85,11 +111,18 @@ namespace NSQLComplete { } const ISqlGrammar* Grammar; + NSQLTranslation::ILexer::TPtr Lexer_; TC3Engine<G> C3; }; class TSqlContextInference: public ISqlContextInference { public: + explicit TSqlContextInference(TLexerSupplier lexer) + : DefaultEngine(lexer) + , AnsiEngine(lexer) + { + } + TCompletionContext Analyze(TCompletionInput input) override { auto isAnsiLexer = IsAnsiQuery(TString(input.Text)); auto& engine = GetSpecializedEngine(isAnsiLexer); @@ -108,8 +141,8 @@ namespace NSQLComplete { TSpecializedSqlContextInference</* IsAnsiLexer = */ true> AnsiEngine; }; - ISqlContextInference::TPtr MakeSqlContextInference() { - return TSqlContextInference::TPtr(new TSqlContextInference()); + ISqlContextInference::TPtr MakeSqlContextInference(TLexerSupplier lexer) { + return TSqlContextInference::TPtr(new TSqlContextInference(lexer)); } } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_context.h b/yql/essentials/sql/v1/complete/sql_context.h index bc3b8d4840f..72d481ca9c4 100644 --- a/yql/essentials/sql/v1/complete/sql_context.h +++ b/yql/essentials/sql/v1/complete/sql_context.h @@ -2,6 +2,8 @@ #include "sql_complete.h" +#include <yql/essentials/sql/v1/lexer/lexer.h> + #include <util/generic/string.h> namespace NSQLComplete { @@ -18,6 +20,6 @@ namespace NSQLComplete { virtual ~ISqlContextInference() = default; }; - ISqlContextInference::TPtr MakeSqlContextInference(); + ISqlContextInference::TPtr MakeSqlContextInference(TLexerSupplier lexer); } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/ut/ya.make b/yql/essentials/sql/v1/complete/ut/ya.make index 91f7da13612..07e60d5a508 100644 --- a/yql/essentials/sql/v1/complete/ut/ya.make +++ b/yql/essentials/sql/v1/complete/ut/ya.make @@ -5,4 +5,9 @@ SRCS( string_util_ut.cpp ) +PEERDIR( + yql/essentials/sql/v1/lexer/antlr4_pure + yql/essentials/sql/v1/lexer/antlr4_pure_ansi +) + END() diff --git a/yql/essentials/sql/v1/complete/ya.make b/yql/essentials/sql/v1/complete/ya.make index 70189e5f508..7142e57899c 100644 --- a/yql/essentials/sql/v1/complete/ya.make +++ b/yql/essentials/sql/v1/complete/ya.make @@ -13,6 +13,13 @@ PEERDIR( contrib/libs/antlr4-c3 yql/essentials/sql/settings yql/essentials/sql/v1/format + yql/essentials/sql/v1/lexer + + # FIXME(YQL-19747): unwanted dependency on a lexer implementation + yql/essentials/sql/v1/lexer/antlr4_pure + yql/essentials/sql/v1/lexer/antlr4_pure_ansi + + yql/essentials/core/issue yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4 yql/essentials/parser/antlr_ast/gen/v1_antlr4 ) diff --git a/yql/essentials/sql/v1/lexer/lexer.cpp b/yql/essentials/sql/v1/lexer/lexer.cpp index 2b5da9ddd53..5621cc65d7b 100644 --- a/yql/essentials/sql/v1/lexer/lexer.cpp +++ b/yql/essentials/sql/v1/lexer/lexer.cpp @@ -253,7 +253,10 @@ void SplitByStatements(TTokenIterator begin, TTokenIterator end, TVector<TTokenI } -bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr& lexer, TVector<TString>& statements, NYql::TIssues& issues, const TString& file) { +bool SplitQueryToStatements( + const TString& query, NSQLTranslation::ILexer::TPtr& lexer, + TVector<TString>& statements, NYql::TIssues& issues, const TString& file, + bool areBlankSkipped) { TParsedTokenList allTokens; auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) { if (token.Name != "EOF") { @@ -269,12 +272,14 @@ bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr& SplitByStatements(allTokens.begin(), allTokens.end(), statementsTokens); for (size_t i = 1; i < statementsTokens.size(); ++i) { - TStringBuilder currentQueryBuilder; + TString statement; for (auto it = statementsTokens[i - 1]; it != statementsTokens[i]; ++it) { - currentQueryBuilder << it->Content; + statement += it->Content; + } + + if (areBlankSkipped) { + statement = StripStringLeft(statement); } - TString statement = currentQueryBuilder; - statement = StripStringLeft(statement); bool isBlank = true; for (auto c : statement) { @@ -284,11 +289,11 @@ bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr& } }; - if (isBlank) { + if (isBlank && areBlankSkipped) { continue; } - statements.push_back(statement); + statements.emplace_back(std::move(statement)); } return true; diff --git a/yql/essentials/sql/v1/lexer/lexer.h b/yql/essentials/sql/v1/lexer/lexer.h index 857681ae51f..1cc8566fcf6 100644 --- a/yql/essentials/sql/v1/lexer/lexer.h +++ b/yql/essentials/sql/v1/lexer/lexer.h @@ -21,6 +21,8 @@ NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool a // in SELECT * FROM ... GROUP BY ... - group is a keyword. bool IsProbablyKeyword(const NSQLTranslation::TParsedToken& token); -bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr& lexer, - TVector<TString>& statements, NYql::TIssues& issues, const TString& file = ""); +bool SplitQueryToStatements( + const TString& query, NSQLTranslation::ILexer::TPtr& lexer, + TVector<TString>& statements, NYql::TIssues& issues, const TString& file = "", + bool areBlankSkipped = true); } diff --git a/yql/essentials/tests/sql/minirun/pure.py b/yql/essentials/tests/sql/minirun/pure.py index 4c92d6fee5c..c3a78adf354 100644 --- a/yql/essentials/tests/sql/minirun/pure.py +++ b/yql/essentials/tests/sql/minirun/pure.py @@ -120,7 +120,7 @@ def run_test(suite, case, cfg, tmpdir, what, yql_http_file_server): opt_res_yson = normalize_result(stable_result_file(opt_res), False) # Compare results - assert opt_res_yson == base_res_yson, 'RESULTS_DIFFER\n' \ + assert opt_res_yson == base_res_yson, 'RESULTS_DIFFER for mode {}\n'.format(what) + \ 'Result:\n %(opt_res_yson)s\n\n' \ 'Base result:\n %(base_res_yson)s\n' % locals() diff --git a/yql/essentials/tools/yql_facade_run/yql_facade_run.cpp b/yql/essentials/tools/yql_facade_run/yql_facade_run.cpp index 83939478d6d..fdfe60a433e 100644 --- a/yql/essentials/tools/yql_facade_run/yql_facade_run.cpp +++ b/yql/essentials/tools/yql_facade_run/yql_facade_run.cpp @@ -512,6 +512,7 @@ TIntrusivePtr<NKikimr::NMiniKQL::IFunctionRegistry> TFacadeRunner::GetFuncRegist int TFacadeRunner::Main(int argc, const char *argv[]) { NYql::NBacktrace::RegisterKikimrFatalActions(); NYql::NBacktrace::EnableKikimrSymbolize(); + EnableKikimrBacktraceFormat(); NYql::NLog::YqlLoggerScope logger(&Cerr); try { diff --git a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp index daa43038707..c159f63d5e5 100644 --- a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp +++ b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp @@ -266,7 +266,7 @@ public: builder.Args()->Add(argsTuple.GetElementType(0)); const TType* retType; - if (features & NUdf::BigDateType) { + if (features & NUdf::ExtDateType) { retType = builder.SimpleType<TWResult>(); } else if (features & NUdf::TimeIntervalType) { retType = builder.SimpleType<TSignedResult>(); @@ -284,7 +284,7 @@ public: } builder.Returns(retType); - if (!(features & NUdf::BigDateType)) { + if (!(features & NUdf::ExtDateType)) { // FIXME: Only non-wide overloads support block rewrite now. builder.SupportsBlocks(); } @@ -447,7 +447,7 @@ struct TGetTimeComponent { } const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; - if (features & NUdf::BigDateType) { + if (features & NUdf::ExtDateType) { BuildSignature<TFieldStorage, TM64ResourceName, WAccessor>(builder, typesOnly); return true; } @@ -832,7 +832,7 @@ TUnboxedValuePod DoAddYears(const TUnboxedValuePod& date, i64 years, const NUdf: } } else { builder.Args()->Add<TUserDataType>().Flags(ICallablePayload::TArgumentFlags::AutoMap); - if constexpr (NUdf::TDataType<TUserDataType>::Features & NYql::NUdf::BigDateType) { + if constexpr (NUdf::TDataType<TUserDataType>::Features & NYql::NUdf::ExtDateType) { builder.Returns(builder.Resource(TM64ResourceName)); } else { builder.Returns(builder.Resource(TMResourceName)); @@ -1360,7 +1360,7 @@ public: } const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; - if (features & NUdf::BigDateType) { + if (features & NUdf::ExtDateType) { BuildSignature<TResultWType, TM64ResourceName, WAccessor>(builder, typesOnly); return true; } @@ -1469,7 +1469,7 @@ public: } const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; - if (features & NUdf::BigDateType) { + if (features & NUdf::ExtDateType) { BuildSignature<TM64ResourceName, WAccessor>(builder, typesOnly); return true; } @@ -1695,7 +1695,7 @@ TUnboxedValue GetTimezoneName(const IValueBuilder* valueBuilder, const TUnboxedV } const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; - if (features & NUdf::BigDateType) { + if (features & NUdf::ExtDateType) { BuildSignature<TM64ResourceName>(builder, typesOnly); return true; } @@ -1932,7 +1932,7 @@ public: const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; if (features & NUdf::TimeIntervalType) { - if (features & NUdf::BigDateType) { + if (features & NUdf::ExtDateType) { BuildSignature<TInterval64, TWResult>(builder, typesOnly); } else { BuildSignature<TInterval, TResult>(builder, typesOnly); @@ -2057,7 +2057,7 @@ public: } const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; - if (features & NUdf::BigDateType) { + if (features & NUdf::ExtDateType) { BuildSignature<TM64ResourceName, WBoundary>(builder, typesOnly); return true; } @@ -2385,7 +2385,7 @@ public: } const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; - if (features & NUdf::BigDateType) { + if (features & NUdf::ExtDateType) { BuildSignature<TM64ResourceName, WBoundary>(builder, typesOnly); return true; } @@ -2507,7 +2507,7 @@ public: } const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; - if (features & NUdf::BigDateType) { + if (features & NUdf::ExtDateType) { BuildSignature<TM64ResourceName>(builder, typesOnly); return true; } @@ -2634,7 +2634,7 @@ public: } const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; - if (features & NUdf::BigDateType) { + if (features & NUdf::ExtDateType) { BuildSignature<TM64ResourceName, WShifter>(builder, typesOnly); return true; } diff --git a/yql/tools/yqlrun/yqlrun.cpp b/yql/tools/yqlrun/yqlrun.cpp index 6ed7689a755..b3815321c8c 100644 --- a/yql/tools/yqlrun/yqlrun.cpp +++ b/yql/tools/yqlrun/yqlrun.cpp @@ -116,6 +116,7 @@ int RunUI(int argc, const char* argv[]) NYql::NBacktrace::RegisterKikimrFatalActions(); NYql::NBacktrace::EnableKikimrSymbolize(); + EnableKikimrBacktraceFormat(); TVector<TString> udfsPaths; TString udfsDir; diff --git a/yt/yql/providers/yt/common/yql_configuration.h b/yt/yql/providers/yt/common/yql_configuration.h index 90db39a172e..e71c5ed9484 100644 --- a/yt/yql/providers/yt/common/yql_configuration.h +++ b/yt/yql/providers/yt/common/yql_configuration.h @@ -131,4 +131,6 @@ constexpr ERuntimeClusterSelectionMode DEFAULT_RUNTIME_CLUSTER_SELECTION = NYql: constexpr bool DEFAULT_ALLOW_REMOTE_CLUSTER_INPUT = false; +constexpr bool DEFAULT_USE_COLUMN_GROUPS_FROM_INPUT_TABLE = false; + } // NYql diff --git a/yt/yql/providers/yt/common/yql_yt_settings.cpp b/yt/yql/providers/yt/common/yql_yt_settings.cpp index be6694fb174..e8f019ea168 100644 --- a/yt/yql/providers/yt/common/yql_yt_settings.cpp +++ b/yt/yql/providers/yt/common/yql_yt_settings.cpp @@ -548,6 +548,7 @@ TYtConfiguration::TYtConfiguration(TTypeAnnotationContext& typeCtx) } }); REGISTER_SETTING(*this, _AllowRemoteClusterInput); + REGISTER_SETTING(*this, UseColumnGroupsFromInputTables); } EReleaseTempDataMode GetReleaseTempDataMode(const TYtSettings& settings) { diff --git a/yt/yql/providers/yt/common/yql_yt_settings.h b/yt/yql/providers/yt/common/yql_yt_settings.h index 2cb52f9cfea..e2dd916629d 100644 --- a/yt/yql/providers/yt/common/yql_yt_settings.h +++ b/yt/yql/providers/yt/common/yql_yt_settings.h @@ -315,6 +315,7 @@ struct TYtSettings { NCommon::TConfSetting<bool, false> CompactForDistinct; NCommon::TConfSetting<bool, false> DropUnusedKeysFromKeyFilter; NCommon::TConfSetting<bool, false> ReportEquiJoinStats; + NCommon::TConfSetting<bool, false> UseColumnGroupsFromInputTables; }; EReleaseTempDataMode GetReleaseTempDataMode(const TYtSettings& settings); diff --git a/yt/yql/providers/yt/comp_nodes/dq/dq_yt_reader_impl.h b/yt/yql/providers/yt/comp_nodes/dq/dq_yt_reader_impl.h index dedea97996d..e5135f2fec2 100644 --- a/yt/yql/providers/yt/comp_nodes/dq/dq_yt_reader_impl.h +++ b/yt/yql/providers/yt/comp_nodes/dq/dq_yt_reader_impl.h @@ -154,7 +154,7 @@ public: const auto half = CastInst::Create(Instruction::Trunc, state, Type::getInt64Ty(context), "half", block); const auto stateArg = CastInst::Create(Instruction::IntToPtr, half, statePtrType, "state_arg", block); - const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&TState::FetchRecord)); + const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr<&TState::FetchRecord>()); const auto funcType = FunctionType::get(valueType, { statePtrType }, false); const auto funcPtr = CastInst::Create(Instruction::IntToPtr, func, PointerType::getUnqual(funcType), "fetch_func", block); const auto fetch = CallInst::Create(funcType, funcPtr, { stateArg }, "fetch", block); diff --git a/yt/yql/providers/yt/comp_nodes/dq/dq_yt_writer.cpp b/yt/yql/providers/yt/comp_nodes/dq/dq_yt_writer.cpp index cc0642d4e63..1860f9e3c8f 100644 --- a/yt/yql/providers/yt/comp_nodes/dq/dq_yt_writer.cpp +++ b/yt/yql/providers/yt/comp_nodes/dq/dq_yt_writer.cpp @@ -183,7 +183,7 @@ public: const auto half = CastInst::Create(Instruction::Trunc, state, Type::getInt64Ty(context), "half", block); const auto stateArg = CastInst::Create(Instruction::IntToPtr, half, structPtrType, "state_arg", block); - const auto finishFunc = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&TWriterState::Finish)); + const auto finishFunc = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr<&TWriterState::Finish>()); const auto finishType = FunctionType::get(Type::getVoidTy(context), {stateArg->getType()}, false); const auto finishPtr = CastInst::Create(Instruction::IntToPtr, finishFunc, PointerType::getUnqual(finishType), "finish", block); CallInst::Create(finishType, finishPtr, {stateArg}, "", block); diff --git a/yt/yql/providers/yt/comp_nodes/yql_mkql_output.cpp b/yt/yql/providers/yt/comp_nodes/yql_mkql_output.cpp index 1dfffcf0f52..83ead71a104 100644 --- a/yt/yql/providers/yt/comp_nodes/yql_mkql_output.cpp +++ b/yt/yql/providers/yt/comp_nodes/yql_mkql_output.cpp @@ -75,7 +75,7 @@ public: block = work; - const auto addFunc = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&TYtFlowOutputWrapper::AddRowImpl)); + const auto addFunc = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr<&TYtFlowOutputWrapper::AddRowImpl>()); const auto selfArg = ConstantInt::get(Type::getInt64Ty(context), ui64(this)); const auto arg = item; const auto addType = FunctionType::get(Type::getVoidTy(context), {selfArg->getType(), arg->getType()}, false); @@ -161,7 +161,7 @@ public: new StoreInst(fields.back(), pointer, block); } - const auto addFunc = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&TYtWideOutputWrapper::AddRowImpl)); + const auto addFunc = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr<&TYtWideOutputWrapper::AddRowImpl>()); const auto selfArg = ConstantInt::get(Type::getInt64Ty(context), ui64(this)); const auto addType = FunctionType::get(Type::getVoidTy(context), {selfArg->getType(), values->getType()}, false); const auto addPtr = CastInst::Create(Instruction::IntToPtr, addFunc, PointerType::getUnqual(addType), "write", block); diff --git a/yt/yql/providers/yt/provider/yql_yt_load_table_meta.cpp b/yt/yql/providers/yt/provider/yql_yt_load_table_meta.cpp index 0d1daffc142..9b759cb8167 100644 --- a/yt/yql/providers/yt/provider/yql_yt_load_table_meta.cpp +++ b/yt/yql/providers/yt/provider/yql_yt_load_table_meta.cpp @@ -199,13 +199,24 @@ public: return TStatus::Error; } } - if (auto pSchema = tableDesc.Meta->Attrs.FindPtr(SCHEMA_ATTR_NAME)) { - const auto colGroupSpec = GetColumnGroupSpecFromSchema(NYT::NodeFromYsonString(*pSchema)); - if (colGroupSpec) { - YQL_CLOG(TRACE, ProviderYt) << "Loaded column group from schema for " << tableName << " (epoch=" << LoadCtx->Epoch << "): " << colGroupSpec; - if (LoadCtx->Epoch == 0) { - tableDesc.ColumnGroupSpec = colGroupSpec; - tableDesc.ColumnGroupSpecAlts.insert(colGroupSpec); + if (State_->Configuration->UseColumnGroupsFromInputTables.Get().GetOrElse(DEFAULT_USE_COLUMN_GROUPS_FROM_INPUT_TABLE)) { + if (auto pSchema = tableDesc.Meta->Attrs.FindPtr(SCHEMA_ATTR_NAME)) { + TString colGroupSpec; + try { + colGroupSpec = GetColumnGroupSpecFromSchema(NYT::NodeFromYsonString(*pSchema)); + } catch (...) { + YQL_CLOG(ERROR, ProviderYt) << "Error parsing column groups for " << tableName << ", schema: " << *pSchema; + auto issue = TIssue(TStringBuilder() << "Error parsing column groups from schema: " << CurrentExceptionMessage()); + issue.SetCode(UNEXPECTED_ERROR, ESeverity::TSeverityIds_ESeverityId_S_FATAL); + ctx.AddError(issue); + return TStatus::Error; + } + if (colGroupSpec) { + YQL_CLOG(TRACE, ProviderYt) << "Loaded column group from schema for " << tableName << " (epoch=" << LoadCtx->Epoch << "): " << colGroupSpec; + if (LoadCtx->Epoch == 0) { + tableDesc.ColumnGroupSpec = colGroupSpec; + tableDesc.ColumnGroupSpecAlts.insert(colGroupSpec); + } } } } diff --git a/yt/yql/tests/sql/suites/column_group/hint_append.cfg b/yt/yql/tests/sql/suites/column_group/hint_append.cfg index f1a91330c09..827ef601339 100644 --- a/yt/yql/tests/sql/suites/column_group/hint_append.cfg +++ b/yt/yql/tests/sql/suites/column_group/hint_append.cfg @@ -4,3 +4,4 @@ out Output2 input_with_groups.txt out Output3 input_with_groups.txt providers yt pragma yt.OptimizeFor="scan" +pragma yt.UseColumnGroupsFromInputTables diff --git a/yt/yql/tests/sql/suites/column_group/hint_append2.cfg b/yt/yql/tests/sql/suites/column_group/hint_append2.cfg index edce27ee9c3..7c71144647e 100644 --- a/yt/yql/tests/sql/suites/column_group/hint_append2.cfg +++ b/yt/yql/tests/sql/suites/column_group/hint_append2.cfg @@ -4,3 +4,4 @@ out Output2 input_with_single_group.txt out Output3 input_with_single_group.txt providers yt pragma yt.OptimizeFor="scan" +pragma yt.UseColumnGroupsFromInputTables diff --git a/yt/yt/client/api/config.cpp b/yt/yt/client/api/config.cpp index 4219e08000e..82a43d37d7a 100644 --- a/yt/yt/client/api/config.cpp +++ b/yt/yt/client/api/config.cpp @@ -151,6 +151,9 @@ void TJournalWriterConfig::Register(TRegistrar registrar) registrar.Parameter("prerequisite_transaction_probe_period", &TThis::PrerequisiteTransactionProbePeriod) .Default(TDuration::Seconds(60)); + registrar.Parameter("enable_checksums", &TThis::EnableChecksums) + .Default(false); + registrar.Parameter("dont_close", &TThis::DontClose) .Default(false); registrar.Parameter("dont_seal", &TThis::DontSeal) diff --git a/yt/yt/client/api/config.h b/yt/yt/client/api/config.h index 1fa2d876fc9..3811b891229 100644 --- a/yt/yt/client/api/config.h +++ b/yt/yt/client/api/config.h @@ -209,6 +209,8 @@ struct TJournalWriterConfig TDuration PrerequisiteTransactionProbePeriod; + bool EnableChecksums; + // For testing purposes only. bool DontClose; bool DontSeal; diff --git a/yt/yt/client/api/operation_client.cpp b/yt/yt/client/api/operation_client.cpp index e4fe36a8bfe..a8e025874db 100644 --- a/yt/yt/client/api/operation_client.cpp +++ b/yt/yt/client/api/operation_client.cpp @@ -285,6 +285,7 @@ void Serialize(const TJob& job, NYson::IYsonConsumer* consumer, TStringBuf idKey .OptionalItem("controller_state", job.ControllerState) .OptionalItem("archive_state", job.ArchiveState) .OptionalItem("address", job.Address) + .OptionalItem("addresses", job.Addresses) .OptionalItem("start_time", job.StartTime) .OptionalItem("finish_time", job.FinishTime) .OptionalItem("has_spec", job.HasSpec) diff --git a/yt/yt/client/api/operation_client.h b/yt/yt/client/api/operation_client.h index 9653db43362..8e672c80e5f 100644 --- a/yt/yt/client/api/operation_client.h +++ b/yt/yt/client/api/operation_client.h @@ -3,9 +3,10 @@ #include "client_common.h" #include <yt/yt/client/scheduler/operation_id_or_alias.h> - #include <yt/yt/client/scheduler/public.h> +#include <yt/yt/client/node_tracker_client/public.h> + namespace NYT::NApi { //////////////////////////////////////////////////////////////////////////////// @@ -368,6 +369,7 @@ struct TJob std::optional<TInstant> StartTime; std::optional<TInstant> FinishTime; std::optional<TString> Address; + std::optional<NNodeTrackerClient::TAddressMap> Addresses; std::optional<double> Progress; std::optional<ui64> StderrSize; std::optional<ui64> FailContextSize; diff --git a/yt/yt/client/api/rpc_proxy/helpers.cpp b/yt/yt/client/api/rpc_proxy/helpers.cpp index 1a316ed3747..8e1fdd366ec 100644 --- a/yt/yt/client/api/rpc_proxy/helpers.cpp +++ b/yt/yt/client/api/rpc_proxy/helpers.cpp @@ -840,6 +840,7 @@ void ToProto(NProto::TJob* protoJob, const NApi::TJob& job) YT_OPTIONAL_SET_PROTO(protoJob, finish_time, job.FinishTime); YT_OPTIONAL_TO_PROTO(protoJob, address, job.Address); + YT_OPTIONAL_TO_PROTO(protoJob, addresses, job.Addresses); if (job.Progress) { protoJob->set_progress(*job.Progress); } @@ -905,6 +906,11 @@ void FromProto(NApi::TJob* job, const NProto::TJob& protoJob) job->StartTime = YT_OPTIONAL_FROM_PROTO(protoJob, start_time, TInstant); job->FinishTime = YT_OPTIONAL_FROM_PROTO(protoJob, finish_time, TInstant); job->Address = YT_OPTIONAL_FROM_PROTO(protoJob, address); + if (protoJob.has_addresses()) { + job->Addresses = FromProto<NNodeTrackerClient::TAddressMap>(protoJob.addresses()); + } else { + job->Addresses = {}; + } job->Progress = YT_OPTIONAL_FROM_PROTO(protoJob, progress); job->StderrSize = YT_OPTIONAL_FROM_PROTO(protoJob, stderr_size); job->FailContextSize = YT_OPTIONAL_FROM_PROTO(protoJob, fail_context_size); diff --git a/yt/yt/client/chunk_client/config.cpp b/yt/yt/client/chunk_client/config.cpp index 6c367cdafe8..069ef37b30b 100644 --- a/yt/yt/client/chunk_client/config.cpp +++ b/yt/yt/client/chunk_client/config.cpp @@ -229,7 +229,7 @@ void TErasureReaderConfig::Register(TRegistrar registrar) registrar.Parameter("slow_reader_expiration_timeout", &TThis::SlowReaderExpirationTimeout) .Default(TDuration::Minutes(2)); registrar.Parameter("replication_reader_timeout", &TThis::ReplicationReaderTimeout) - .Default(TDuration::Seconds(60)); + .Default(TDuration::Seconds(300)); registrar.Parameter("replication_reader_failure_timeout", &TThis::ReplicationReaderFailureTimeout) .Default(TDuration::Minutes(10)); } diff --git a/yt/yt/client/misc/workload.cpp b/yt/yt/client/misc/workload.cpp index 83e355ba0f0..cd35f556572 100644 --- a/yt/yt/client/misc/workload.cpp +++ b/yt/yt/client/misc/workload.cpp @@ -101,6 +101,37 @@ IInvokerPtr GetCompressionInvoker(const TWorkloadDescriptor& workloadDescriptor) } } +bool IsSystemWorkloadCategory(EWorkloadCategory category) +{ + switch (category) { + case EWorkloadCategory::Idle: + return false; + + case EWorkloadCategory::SystemArtifactCacheDownload: + case EWorkloadCategory::SystemRepair: + case EWorkloadCategory::SystemReincarnation: + case EWorkloadCategory::SystemReplication: + case EWorkloadCategory::SystemMerge: + case EWorkloadCategory::SystemTabletCompaction: + case EWorkloadCategory::SystemTabletLogging: + case EWorkloadCategory::SystemTabletPartitioning: + case EWorkloadCategory::SystemTabletPreload: + case EWorkloadCategory::SystemTabletRecovery: + case EWorkloadCategory::SystemTabletReplication: + case EWorkloadCategory::SystemTabletSnapshot: + case EWorkloadCategory::SystemTabletStoreFlush: + return true; + + case EWorkloadCategory::UserBatch: + case EWorkloadCategory::UserInteractive: + case EWorkloadCategory::UserRealtime: + case EWorkloadCategory::UserDynamicStoreRead: + return false; + } + + return false; +} + struct TSerializableWorkloadDescriptor : public TWorkloadDescriptor , public TYsonStructLite diff --git a/yt/yt/client/misc/workload.h b/yt/yt/client/misc/workload.h index 63ce3a7cb33..4ab895195f0 100644 --- a/yt/yt/client/misc/workload.h +++ b/yt/yt/client/misc/workload.h @@ -85,6 +85,8 @@ i64 GetBasicPriority(EWorkloadCategory category); IInvokerPtr GetCompressionInvoker(const TWorkloadDescriptor& workloadDescriptor); +bool IsSystemWorkloadCategory(EWorkloadCategory category); + void FormatValue( TStringBuilderBase* builder, const TWorkloadDescriptor& descriptor, diff --git a/yt/yt/client/table_client/helpers.cpp b/yt/yt/client/table_client/helpers.cpp index 299008c50a9..b2eabd6d0aa 100644 --- a/yt/yt/client/table_client/helpers.cpp +++ b/yt/yt/client/table_client/helpers.cpp @@ -1426,8 +1426,10 @@ TUnversionedValue EncodeUnversionedAnyValue( { YT_ASSERT(None(value.Flags)); switch (value.Type) { - case EValueType::Any: case EValueType::Composite: + value.Type = EValueType::Any; + [[fallthrough]]; + case EValueType::Any: return value; case EValueType::Null: { diff --git a/yt/yt_proto/yt/client/api/rpc_proxy/proto/api_service.proto b/yt/yt_proto/yt/client/api/rpc_proxy/proto/api_service.proto index 83ff3617662..d121f2e0794 100644 --- a/yt/yt_proto/yt/client/api/rpc_proxy/proto/api_service.proto +++ b/yt/yt_proto/yt/client/api/rpc_proxy/proto/api_service.proto @@ -14,6 +14,7 @@ import "yt_proto/yt/core/ytree/proto/request_complexity_limits.proto"; import "yt_proto/yt/client/chunk_client/proto/data_statistics.proto"; import "yt_proto/yt/client/chaos_client/proto/replication_card.proto"; import "yt_proto/yt/client/hive/proto/timestamp_map.proto"; +import "yt_proto/yt/client/node_tracker_client/proto/node.proto"; import "yt_proto/yt/client/scheduler/proto/spec_patch.proto"; import "yt_proto/yt/client/table_client/proto/versioned_io_options.proto"; import "yt_proto/yt/client/tablet_client/proto/lock_mask.proto"; @@ -3211,6 +3212,7 @@ message TJob optional string monitoring_descriptor = 30; optional string operation_incarnation = 31; optional NYT.NProto.TGuid allocation_id = 32; + optional NNodeTrackerClient.NProto.TAddressMap addresses = 33; } message TListJobsStatistics diff --git a/yt/yt_proto/yt/client/node_tracker_client/proto/node.proto b/yt/yt_proto/yt/client/node_tracker_client/proto/node.proto index 497593d75d4..fa09aeb9f0f 100644 --- a/yt/yt_proto/yt/client/node_tracker_client/proto/node.proto +++ b/yt/yt_proto/yt/client/node_tracker_client/proto/node.proto @@ -3,6 +3,8 @@ package NYT.NNodeTrackerClient.NProto; import "yt_proto/yt/core/misc/proto/guid.proto"; import "yt_proto/yt/core/misc/proto/error.proto"; +option go_package = "a.yandex-team.ru/yt/go/proto/client/node_tracker_client"; + //////////////////////////////////////////////////////////////////////////////// // TODO(gritukan): Move it to TReqFullChunkHeartbeat after switching to new heartbeats. |