aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Smirnov <alex@ydb.tech>2025-03-28 00:51:44 +0000
committerAlexander Smirnov <alex@ydb.tech>2025-03-28 00:51:44 +0000
commit149dc5893b4b90db5ad40d019d2d1ffa6e1f0abd (patch)
tree6e3243c1b457c31839ddc9fa75b5f46bcc1fecb4
parent4c8dfa633cab20dabf3d11464d986335519bfcfa (diff)
parent71e9df83f284bf42c2f8ea872752bf02e1055555 (diff)
downloadydb-149dc5893b4b90db5ad40d019d2d1ffa6e1f0abd.tar.gz
Merge branch 'rightlib' into merge-libs-250328-0050
-rw-r--r--build/conf/java.conf2
-rw-r--r--build/export_generators/ide-gradle/build.gradle.kts.jinja12
-rw-r--r--build/export_generators/ide-gradle/build.gradle.kts.proto.jinja17
-rw-r--r--build/export_generators/ide-gradle/jdk.jinja4
-rw-r--r--build/export_generators/ide-gradle/proto_source_sets.jinja16
-rw-r--r--build/export_generators/ide-gradle/proto_vars.jinja1
-rw-r--r--build/export_generators/ide-gradle/run_java_program.jinja50
-rw-r--r--build/export_generators/ide-gradle/run_program.jinja64
-rw-r--r--build/export_generators/ide-gradle/source_sets.jinja4
-rw-r--r--build/mapping.conf.json2
-rw-r--r--build/platform/yfm/resources.json8
-rw-r--r--build/plugins/lib/test_const/__init__.py25
-rw-r--r--build/sysincl/misc.yml4
-rw-r--r--contrib/libs/cxxsupp/libcxx/include/complex3
-rw-r--r--contrib/libs/cxxsupp/libcxx/patches/38-complex.patch14
-rw-r--r--library/cpp/neh/tcp2.cpp30
-rw-r--r--library/cpp/type_info/type_io.cpp33
-rw-r--r--library/cpp/type_info/ut/type_deserialize.cpp34
-rw-r--r--util/generic/enum_cast.cpp12
-rw-r--r--util/generic/enum_cast.h46
-rw-r--r--util/generic/enum_cast_ut.cpp49
-rw-r--r--util/generic/enum_cast_ut.h28
-rw-r--r--util/generic/fwd.h3
-rw-r--r--util/generic/string.h8
-rw-r--r--util/generic/ut/ya.make3
-rw-r--r--util/ya.make1
-rw-r--r--yql/essentials/cfg/tests/gateways-experimental.conf5
-rw-r--r--yql/essentials/core/yql_expr_type_annotation.cpp2
-rw-r--r--yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp97
-rw-r--r--yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/ya.make22
-rw-r--r--yql/essentials/minikql/comp_nodes/benchmark/ya.make1
-rw-r--r--yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp123
-rw-r--r--yql/essentials/minikql/comp_nodes/mkql_block_coalesce.h4
-rw-r--r--yql/essentials/minikql/comp_nodes/mkql_block_coalesce_blending_helper.h233
-rw-r--r--yql/essentials/minikql/comp_nodes/mkql_blocks.cpp219
-rw-r--r--yql/essentials/minikql/comp_nodes/mkql_blocks.h1
-rw-r--r--yql/essentials/minikql/comp_nodes/mkql_factory.cpp1
-rw-r--r--yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp295
-rw-r--r--yql/essentials/minikql/comp_nodes/ut/mkql_block_exists_ut.cpp2
-rw-r--r--yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp131
-rw-r--r--yql/essentials/minikql/comp_nodes/ut/ya.make.inc7
-rw-r--r--yql/essentials/minikql/comp_nodes/ya.make1
-rw-r--r--yql/essentials/minikql/computation/mkql_block_impl.cpp6
-rw-r--r--yql/essentials/minikql/invoke_builtins/mkql_builtins_datetime.h10
-rw-r--r--yql/essentials/minikql/mkql_program_builder.cpp41
-rw-r--r--yql/essentials/minikql/mkql_program_builder.h2
-rw-r--r--yql/essentials/minikql/mkql_runtime_version.h2
-rw-r--r--yql/essentials/minikql/mkql_type_builder.cpp42
-rw-r--r--yql/essentials/minikql/mkql_type_builder.h4
-rw-r--r--yql/essentials/public/udf/arrow/bit_util.h67
-rw-r--r--yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp89
-rw-r--r--yql/essentials/public/udf/udf_data_type.h7
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete.cpp20
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete.h7
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete_ut.cpp50
-rw-r--r--yql/essentials/sql/v1/complete/sql_context.cpp41
-rw-r--r--yql/essentials/sql/v1/complete/sql_context.h4
-rw-r--r--yql/essentials/sql/v1/complete/ut/ya.make5
-rw-r--r--yql/essentials/sql/v1/complete/ya.make7
-rw-r--r--yql/essentials/sql/v1/lexer/lexer.cpp19
-rw-r--r--yql/essentials/sql/v1/lexer/lexer.h6
-rw-r--r--yql/essentials/tests/sql/minirun/pure.py2
-rw-r--r--yql/essentials/tools/yql_facade_run/yql_facade_run.cpp1
-rw-r--r--yql/essentials/udfs/common/datetime2/datetime_udf.cpp24
-rw-r--r--yql/tools/yqlrun/yqlrun.cpp1
-rw-r--r--yt/yql/providers/yt/common/yql_configuration.h2
-rw-r--r--yt/yql/providers/yt/common/yql_yt_settings.cpp1
-rw-r--r--yt/yql/providers/yt/common/yql_yt_settings.h1
-rw-r--r--yt/yql/providers/yt/comp_nodes/dq/dq_yt_reader_impl.h2
-rw-r--r--yt/yql/providers/yt/comp_nodes/dq/dq_yt_writer.cpp2
-rw-r--r--yt/yql/providers/yt/comp_nodes/yql_mkql_output.cpp4
-rw-r--r--yt/yql/providers/yt/provider/yql_yt_load_table_meta.cpp25
-rw-r--r--yt/yql/tests/sql/suites/column_group/hint_append.cfg1
-rw-r--r--yt/yql/tests/sql/suites/column_group/hint_append2.cfg1
-rw-r--r--yt/yt/client/api/config.cpp3
-rw-r--r--yt/yt/client/api/config.h2
-rw-r--r--yt/yt/client/api/operation_client.cpp1
-rw-r--r--yt/yt/client/api/operation_client.h4
-rw-r--r--yt/yt/client/api/rpc_proxy/helpers.cpp6
-rw-r--r--yt/yt/client/chunk_client/config.cpp2
-rw-r--r--yt/yt/client/misc/workload.cpp31
-rw-r--r--yt/yt/client/misc/workload.h2
-rw-r--r--yt/yt/client/table_client/helpers.cpp4
-rw-r--r--yt/yt_proto/yt/client/api/rpc_proxy/proto/api_service.proto2
-rw-r--r--yt/yt_proto/yt/client/node_tracker_client/proto/node.proto2
85 files changed, 1943 insertions, 229 deletions
diff --git a/build/conf/java.conf b/build/conf/java.conf
index ff77a303a05..053c0c4a1d9 100644
--- a/build/conf/java.conf
+++ b/build/conf/java.conf
@@ -476,7 +476,7 @@ macro _DO_2_RUN_JAR_PROGRAM(IN_DIRS_VAR="uniq_", IN_DIRS_INPUTS[], IN{input}[],
_CHECK_RUN_JAVA_PROG_CLASSPATH($CLASSPATH)
.PEERDIR=build/platform/java/jdk $JDK_RESOURCE_PEERDIR
.CMD=${hide;kv:"p RJ"} ${hide;kv:"pc blue"} ${hide:JAVA_FAKEID} ${cwd:BINDIR} $YMAKE_PYTHON ${input:"build/scripts/mkdir.py"} ${OUT_DIR} && ${cwd:CWD} $YMAKE_PYTHON ${input:"build/scripts/setup_java_tmpdir.py"} $YMAKE_PYTHON ${input:"build/scripts/stdout2stderr.py"} ${pre=--file=:STDOUT} ${hide;output:STDOUT} ${pre=--file=:STDOUT_NOAUTO} ${hide;noauto;output:STDOUT_NOAUTO} $YMAKE_PYTHON ${input:"build/scripts/fix_java_command_file_cp.py"} --build-root ${ARCADIA_BUILD_ROOT} $JDK_RESOURCE/bin/java -Dfile.encoding=utf8 -classpath ${RUN_JAR_PROG_CP_PRE}${tool:CLASSPATH}${RUN_JAR_PROG_CP_SUF} ${Args} && $_GENTAR_HELPER(HASH_SUF $HASH_SUF OUT_DIR $OUT_DIR) ${hide;input:IN} ${hide;context=TEXT;input:IN_NOPARSE} ${hide;noauto;output:OUT_NOAUTO} ${hide;output:OUT} ${hide;tool:TOOL} ${IN_DIRS_INPUTS}
- .SEM=runs-ITEM && runs-args ${Args} && runs-classpath ${RUN_JAR_PROG_CP_PRE}${tool:CLASSPATH}${RUN_JAR_PROG_CP_SUF} && runs-cwd ${CWD} ${hide;cwd:CWD} && runs-in ${IN} ${hide;input:IN} && runs-in_dir ${IN_DIR} && runs-in_dirs_inputs ${IN_DIRS_INPUTS} && runs-in_noparse ${IN_NOPARSE} ${hide;context=TEXT;input:IN_NOPARSE} && runs-out ${OUT} ${hide;output:OUT} ${OUT_NOAUTO} ${hide;noauto;output:OUT_NOAUTO} $_GENTAR_HELPER(HASH_SUF $HASH_SUF OUT_DIR $OUT_DIR) && runs-out_dir ${OUT_DIR} && runs-tool ${TOOL} ${hide;tool:TOOL}
+ .SEM=runs-ITEM && runs-args ${Args} && runs-classpath ${RUN_JAR_PROG_CP_PRE}${tool:CLASSPATH}${RUN_JAR_PROG_CP_SUF} && runs-cwd ${CWD} ${hide;cwd:CWD} && runs-in ${IN} ${hide;input:IN} && runs-in_dir ${IN_DIR} && runs-in_dirs_inputs ${IN_DIRS_INPUTS} && runs-in_noparse ${IN_NOPARSE} ${hide;context=TEXT;input:IN_NOPARSE} && runs-out ${OUT} ${hide;output:OUT} ${OUT_NOAUTO} ${hide;noauto;output:OUT_NOAUTO} $_GENTAR_HELPER(HASH_SUF $HASH_SUF OUT_DIR $OUT_DIR) && runs-out_dir ${OUT_DIR} && runs-tool ${tool:TOOL}
}
# tag:java-specific
diff --git a/build/export_generators/ide-gradle/build.gradle.kts.jinja b/build/export_generators/ide-gradle/build.gradle.kts.jinja
index 2adebe9cf99..c037f3e7bc8 100644
--- a/build/export_generators/ide-gradle/build.gradle.kts.jinja
+++ b/build/export_generators/ide-gradle/build.gradle.kts.jinja
@@ -1,3 +1,14 @@
+{%- macro PatchRoots(arg, depend = false) -%}
+{#- Always replace (arcadia_root) === (SOURCE_ROOT in ymake) to $project_root in Gradle -#}
+{%- if depend -%}
+{#- Replace (export_root) === (BUILD_ROOT in ymake) to $project_root in Gradle, because prebuilt tools in arcadia, not in build_root -#}
+"{{ arg|replace(export_root, "$project_root")|replace(arcadia_root, "$project_root") }}"
+{%- else -%}
+{#- Replace (export_root) === (BUILD_ROOT in ymake) to baseBuildDir in Gradle - root of all build folders for modules -#}
+"{{ arg|replace(export_root, "$baseBuildDir")|replace(arcadia_root, "$project_root") }}"
+{%- endif -%}
+{%- endmacro -%}
+
{%- include "[generator]/vars.jinja" -%}
{%- include "[generator]/import.jinja" -%}
{%- include "[generator]/repositories.jinja" -%}
@@ -12,6 +23,7 @@
{%- include "[generator]/source_sets.jinja" -%}
{%- include "[generator]/test.jinja" -%}
{%- include "[generator]/javadoc.jinja" -%}
+{%- include "[generator]/run_program.jinja" -%}
{%- include "[generator]/run_java_program.jinja" -%}
{%- include "[generator]/dependencies.jinja" -%}
{%- include "extra-tests.gradle.kts" ignore missing -%}
diff --git a/build/export_generators/ide-gradle/build.gradle.kts.proto.jinja b/build/export_generators/ide-gradle/build.gradle.kts.proto.jinja
index a83447aa671..ec3465d3166 100644
--- a/build/export_generators/ide-gradle/build.gradle.kts.proto.jinja
+++ b/build/export_generators/ide-gradle/build.gradle.kts.proto.jinja
@@ -1,3 +1,18 @@
+{%- macro PatchRoots(arg, depend = false) -%}
+{#- Always replace (arcadia_root) === (SOURCE_ROOT in ymake) to $project_root in Gradle -#}
+{%- if depend -%}
+{#- Replace (export_root) === (BUILD_ROOT in ymake) to $project_root in Gradle, because prebuilt tools in arcadia, not in build_root -#}
+"{{ arg|replace(export_root, "$project_root")|replace(arcadia_root, "$project_root") }}"
+{%- else -%}
+{#- Replace (export_root) === (BUILD_ROOT in ymake) to baseBuildDir in Gradle - root of all build folders for modules -#}
+"{{ arg|replace(export_root, "$baseBuildDir")|replace(arcadia_root, "$project_root") }}"
+{%- endif -%}
+{%- endmacro -%}
+
+{%- macro PatchGeneratedProto(arg) -%}
+"{{ arg|replace(export_root, "$mainExtractedIncludeProtosDir")|replace(arcadia_root, "$mainExtractedIncludeProtosDir") }}"
+{%- endmacro -%}
+
{%- include "[generator]/proto_vars.jinja" -%}
{%- include "[generator]/proto_import.jinja" -%}
{%- include "[generator]/repositories.jinja" -%}
@@ -10,6 +25,8 @@
{%- include "[generator]/proto_source_sets.jinja" -%}
{%- include "[generator]/protobuf.jinja" -%}
{%- include "[generator]/proto_prepare.jinja" -%}
+{%- include "[generator]/run_program.jinja" -%}
+{%- include "[generator]/run_java_program.jinja" -%}
{%- include "[generator]/javadoc.jinja" -%}
{%- include "[generator]/proto_dependencies.jinja" -%}
{%- include "[generator]/debug.jinja" ignore missing -%}
diff --git a/build/export_generators/ide-gradle/jdk.jinja b/build/export_generators/ide-gradle/jdk.jinja
index 01fbc09daf9..5bc13819dd2 100644
--- a/build/export_generators/ide-gradle/jdk.jinja
+++ b/build/export_generators/ide-gradle/jdk.jinja
@@ -1,6 +1,8 @@
{#- default JDK version -#}
{%- set jdk_version = '17' -%}
-{%- if target.required_jdk -%}
+{%- if force_jdk_version -%}
+{%- set jdk_version = force_jdk_version -%}
+{%- elif target.required_jdk -%}
{%- set jdk_version = target.required_jdk -%}
{%- elif target.jdk_version -%}
{%- set jdk_version = target.jdk_version -%}
diff --git a/build/export_generators/ide-gradle/proto_source_sets.jinja b/build/export_generators/ide-gradle/proto_source_sets.jinja
index 7c7814d940e..db7c467f643 100644
--- a/build/export_generators/ide-gradle/proto_source_sets.jinja
+++ b/build/export_generators/ide-gradle/proto_source_sets.jinja
@@ -1,6 +1,15 @@
{#- empty string #}
sourceSets {
main {
+{%- if target.jar_source_set|length -%}
+{%- for source_set in target.jar_source_set -%}
+{%- set srcdir_glob = split(source_set, ':') -%}
+{%- set out = srcdir_glob[0] -%}
+{%- if out != 'src/main/java' %}
+ java.srcDir({{ PatchRoots(out) }})
+{%- endif -%}
+{%- endfor -%}
+{%- endif %}
java.srcDir("$buildDir/generated/source/proto/main/java")
{%- if target.proto_grpc %}
java.srcDir("$buildDir/generated/source/proto/main/grpc")
@@ -14,13 +23,6 @@ sourceSets {
}
}
-{%- if target.jar_source_set is defined -%}
-{%- for source_set in target.jar_source_set -%}
-{%- set srcdir_glob = split(source_set, ':') %}
-sourceSets.main.java.srcDirs += "{{ srcdir_glob[0] }}"
-{% endfor -%}
-{%- endif %}
-
tasks.withType<Jar>() {
duplicatesStrategy = DuplicatesStrategy.INCLUDE
}
diff --git a/build/export_generators/ide-gradle/proto_vars.jinja b/build/export_generators/ide-gradle/proto_vars.jinja
index 3ef6275d879..3f357ce243a 100644
--- a/build/export_generators/ide-gradle/proto_vars.jinja
+++ b/build/export_generators/ide-gradle/proto_vars.jinja
@@ -2,5 +2,6 @@
{%- set libraries = target.consumer|selectattr('type', 'eq', 'library') -%}
{%- set with_kotlin = target.with_kotlin -%}
{%- set kotlin_version = target.kotlin_version -%}
+{%- set proto_template = true -%}
{%- include "[generator]/jdk.jinja" -%}
diff --git a/build/export_generators/ide-gradle/run_java_program.jinja b/build/export_generators/ide-gradle/run_java_program.jinja
index f28ef430cc6..527a81c5b47 100644
--- a/build/export_generators/ide-gradle/run_java_program.jinja
+++ b/build/export_generators/ide-gradle/run_java_program.jinja
@@ -4,6 +4,11 @@
val runJav{{ loop.index }} = task<JavaExec>("runJavaProgram{{ loop.index }}") {
group = "build"
description = "Code generation by run java program"
+
+{%- if run.cwd %}
+ workingDir = file({{ PatchRoots(run.cwd) }})
+{%- endif -%}
+
{%- set classpaths = run.classpath|reject('eq', '@.cplst') -%}
{%- if classpaths|length -%}
{% for classpath in classpaths -%}
@@ -20,40 +25,61 @@ val runJav{{ loop.index }} = task<JavaExec>("runJavaProgram{{ loop.index }}") {
args = listOf(
{%- for arg in run.args -%}
{%- if not loop.first %}
- "{{ arg }}",
+{%- if proto_template and (run.out_dir|select("eq", arg)|length or run.out|select("eq", arg)|length) %}
+ {{ PatchGeneratedProto(arg) }},
+{%- else %}
+ {{ PatchRoots(arg, run.tool|select("in", arg)|length) }},
+{%- endif -%}
{%- endif -%}
{%- endfor %}
)
{% endif -%}
-{%- if run.in_dir -%}
-{%- for in_dir in run.in_dir %}
- inputs.files(fileTree("{{ in_dir }}"))
+{%- if run.in_dir|length -%}
+{%- for in_dir in run.in_dir|unique %}
+ inputs.files(fileTree({{ PatchRoots(in_dir) }}))
{% endfor -%}
{%- endif -%}
-{%- if run.in -%}
-{%- for in_file in run.in %}
- inputs.files("{{ in_file }}")
+{%- if run.in|length -%}
+{%- for in_file in run.in|unique %}
+ inputs.files({{ PatchRoots(in_file) }})
+{% endfor -%}
+{%- endif -%}
+
+{%- if run.in_noparse|length -%}
+{%- for in_file in run.in_noparse|unique %}
+ inputs.files({{ PatchRoots(in_file) }})
{% endfor -%}
{%- endif -%}
{%- if run.out_dir|length -%}
{%- for out_dir in run.out_dir|unique %}
- outputs.dir("{{ out_dir }}")
+ outputs.dir({{ PatchRoots(out_dir) }})
{%- endfor -%}
{%- endif -%}
+
+{%- if run.out|length -%}
+{%- for out in run.out|unique %}
+ outputs.files({{ PatchRoots(out) }})
+{%- endfor -%}
+{%- endif -%}
+
{#-
- Не использованы аттрибуты
- run-out="list"
- run-cwd="str"
+ Не использованы атрибуты
run-in_dirs_inputs="list"
- run-in_noparse="list"
run-tool="list"
#}
}
+{%- if proto_template %}
+
+tasks.getByName("prepareMainProtos").dependsOn(runJav{{ loop.index }})
+tasks.getByName("extractMainLibrariesProtos").dependsOn(runJav{{ loop.index }})
+{% else %}
+
tasks.getByName("sourcesJar").dependsOn(runJav{{ loop.index }})
+{% endif -%}
tasks.compileJava.configure {
dependsOn(runJav{{ loop.index }})
diff --git a/build/export_generators/ide-gradle/run_program.jinja b/build/export_generators/ide-gradle/run_program.jinja
new file mode 100644
index 00000000000..147ae6c1657
--- /dev/null
+++ b/build/export_generators/ide-gradle/run_program.jinja
@@ -0,0 +1,64 @@
+{%- if target.custom_runs|length -%}
+{%- for custom_run in target.custom_runs %}
+
+val runProg{{ loop.index }} = task<Exec>("runProgram{{ loop.index }}") {
+ group = "build"
+ description = "Code generation by run custom program"
+
+{%- if custom_run.cwd %}
+ workingDir = file({{ PatchRoots(custom_run.cwd) }})
+{%- endif %}
+
+ commandLine(
+{%- for arg in custom_run.command -%}
+{%- if custom_run.depends|select("eq", arg)|length -%}
+{{ PatchRoots(arg, true) }}
+{%- elif proto_template -%}
+{#- generated proto put to prepared proto dir -#}
+{{ PatchGeneratedProto(arg) }}
+{%- else -%}
+{{ PatchRoots(arg) }}
+{%- endif -%}
+{%- if not loop.last %}, {% endif -%}
+{%- endfor -%})
+
+{%- if custom_run.depends|length -%}
+{%- for depend in custom_run.depends|unique %}
+ inputs.files({{ PatchRoots(depend, true) }})
+{% endfor -%}
+{%- endif -%}
+
+{%- if custom_run.outputs|length -%}
+{%- for out in custom_run.outputs|unique %}
+{%- if proto_template %}
+ outputs.files({{ PatchGeneratedProto(out) }})
+{%- else %}
+ outputs.files({{ PatchRoots(out) }})
+{%- endif -%}
+{%- endfor -%}
+{%- endif -%}
+{#-
+ Не использованы атрибуты
+ custom_run-env="list"
+#}
+}
+
+{%- if proto_template %}
+
+tasks.getByName("extractMainLibrariesProtos").dependsOn(runProg{{ loop.index }})
+{% else %}
+
+tasks.getByName("sourcesJar").dependsOn(runProg{{ loop.index }})
+{% endif -%}
+
+tasks.compileJava.configure {
+ dependsOn(runProg{{ loop.index }})
+}
+{%- if with_kotlin %}
+
+tasks.compileKotlin.configure {
+ dependsOn(runProg{{ loop.index }})
+}
+{%- endif %}
+{% endfor -%}
+{% endif -%}
diff --git a/build/export_generators/ide-gradle/source_sets.jinja b/build/export_generators/ide-gradle/source_sets.jinja
index 952332fac60..009341c6dd5 100644
--- a/build/export_generators/ide-gradle/source_sets.jinja
+++ b/build/export_generators/ide-gradle/source_sets.jinja
@@ -14,7 +14,7 @@ sourceSets {
{%- set srcdir_glob = split(source_set, ':') -%}
{%- set out = srcdir_glob[0] -%}
{%- if out != 'src/main/java' %}
- java.srcDir("{{ out }}")
+ java.srcDir({{ PatchRoots(out) }})
{%- endif -%}
{%- endfor -%}
{%- endif %}
@@ -45,7 +45,7 @@ sourceSets {
{%- set srcdir_glob = split(source_set, ':') -%}
{%- set out = srcdir_glob[0] -%}
{%- if out != 'src/test/java' %}
- java.srcDir("{{ srcdir_glob[0] }}")
+ java.srcDir({{ PatchRoots(srcdir_glob[0]) }})
{%- endif -%}
{%- endfor -%}
{%- endif %}
diff --git a/build/mapping.conf.json b/build/mapping.conf.json
index 2631c61d610..019d916756d 100644
--- a/build/mapping.conf.json
+++ b/build/mapping.conf.json
@@ -1312,6 +1312,7 @@
"6048579718": "{registry_endpoint}/6048579718",
"7686710688": "{registry_endpoint}/7686710688",
"7879860842": "{registry_endpoint}/7879860842",
+ "8367004015": "{registry_endpoint}/8367004015",
"2980468199": "{registry_endpoint}/2980468199",
"5562224408": "{registry_endpoint}/5562224408",
"7663495611": "{registry_endpoint}/7663495611"
@@ -2625,6 +2626,7 @@
"6048579718": "yt/go/ytrecipe/cmd/ytexec for linux",
"7686710688": "yt/go/ytrecipe/cmd/ytexec for linux",
"7879860842": "yt/go/ytrecipe/cmd/ytexec for linux",
+ "8367004015": "yt/go/ytrecipe/cmd/ytexec for linux",
"2980468199": "ytexec for linux",
"5562224408": "ytexec for linux",
"7663495611": "ytexec for linux"
diff --git a/build/platform/yfm/resources.json b/build/platform/yfm/resources.json
index 9491f062029..7fe8b459be7 100644
--- a/build/platform/yfm/resources.json
+++ b/build/platform/yfm/resources.json
@@ -1,16 +1,16 @@
{
"by_platform": {
"win32-x86_64": {
- "uri": "sbr:8347966416"
+ "uri": "sbr:8369232374"
},
"darwin-x86_64": {
- "uri": "sbr:8347965020"
+ "uri": "sbr:8369230226"
},
"linux-x86_64": {
- "uri": "sbr:8347962698"
+ "uri": "sbr:8369228543"
},
"darwin-arm64": {
- "uri": "sbr:8347965020"
+ "uri": "sbr:8369230226"
}
}
}
diff --git a/build/plugins/lib/test_const/__init__.py b/build/plugins/lib/test_const/__init__.py
index 95ecfcbf697..3fd88c7d7c7 100644
--- a/build/plugins/lib/test_const/__init__.py
+++ b/build/plugins/lib/test_const/__init__.py
@@ -438,6 +438,12 @@ class ServiceTags(Enum):
AnyTag = "ya:anytag"
+# NOTE: Linter constants are used in ya style, ya ide, config validator check (devtools/ya/handlers/style/config_validator).
+# ya and validator have different release cycles, make sure you preserve compatibility:
+# - don't delete anything from here until you get rid of all usages and roll out the changes;
+# - keep in mind that changes of constants used in multiple tools may get to production at different times;
+
+
# Linter names must match `NAME` set in `_ADD_*_LINTER_CHECK`
class PythonLinterName(Enum):
Black = "black"
@@ -464,6 +470,22 @@ class LinterConfigsValidationRules(Enum):
Python = "build/config/tests/py_style/configs_validation_rules.json"
+# XXX: if a new linter is added to this mapping respective path to rules file must be available in the json
+LINTER_TO_DEFAULT_CONFIGS = {
+ CppLinterName.ClangFormat: DefaultLinterConfig.Cpp,
+ PythonLinterName.Black: DefaultLinterConfig.Python,
+ PythonLinterName.Ruff: DefaultLinterConfig.Python,
+}
+
+# Fill up like
+"""
+{
+ PythonLinterName.Ruff: LinterConfigsValidationRules.Python,
+}
+"""
+# XXX: if a new linter is added to this mapping respective path to rules file must be available in the json
+LINTER_TO_VALIDATION_CONFIGS = {}
+
LINTER_CONFIG_TYPES = {
CppLinterName.ClangFormat: (".clang-format",),
CppLinterName.ClangFormat15: (".clang-format",),
@@ -479,6 +501,9 @@ AUTOINCLUDE_PATHS = (
)
+# End of linter constants
+
+
class Status(object):
GOOD, XFAIL, FAIL, XPASS, MISSING, CRASHED, TIMEOUT = range(1, 8)
SKIPPED = -100
diff --git a/build/sysincl/misc.yml b/build/sysincl/misc.yml
index 69d43563d1e..6629f94a9e9 100644
--- a/build/sysincl/misc.yml
+++ b/build/sysincl/misc.yml
@@ -463,6 +463,10 @@
includes:
- filter.h: contrib/libs/svt-av1/Source/Lib/Codec/filter.h
+- source_filter: "^contrib/libs/tinycbor"
+ includes:
+ - memory.h: contrib/libs/tinycbor/src/memory.h
+
- source_filter: "^contrib/restricted/ffmpeg"
includes:
- thread.h: contrib/restricted/ffmpeg/libavcodec/thread.h
diff --git a/contrib/libs/cxxsupp/libcxx/include/complex b/contrib/libs/cxxsupp/libcxx/include/complex
index 91cf3f4d508..bfe61c506e1 100644
--- a/contrib/libs/cxxsupp/libcxx/include/complex
+++ b/contrib/libs/cxxsupp/libcxx/include/complex
@@ -1282,8 +1282,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> acos(const complex<_Tp>& __x) {
}
if (std::__constexpr_isinf(__x.imag()))
return complex<_Tp>(__pi / _Tp(2), -__x.imag());
- // Somehow isnan can be a macro, so we use __constexpr_isnan
- if (__x.real() == 0 && (__x.imag() == 0 || std::__constexpr_isnan(__x.imag())))
+ if (__x.real() == 0 && (__x.imag() == 0 || std::isnan(__x.imag())))
return complex<_Tp>(__pi / _Tp(2), -__x.imag());
complex<_Tp> __z = std::log(__x + std::sqrt(std::__sqr(__x) - _Tp(1)));
if (std::signbit(__x.imag()))
diff --git a/contrib/libs/cxxsupp/libcxx/patches/38-complex.patch b/contrib/libs/cxxsupp/libcxx/patches/38-complex.patch
deleted file mode 100644
index 6856a5f4b2e..00000000000
--- a/contrib/libs/cxxsupp/libcxx/patches/38-complex.patch
+++ /dev/null
@@ -1,14 +0,0 @@
-diff --git a/include/complex b/include/complex
-index bfe61c5..91cf3f4 100644
---- a/include/complex
-+++ b/include/complex
-@@ -1282,7 +1282,8 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> acos(const complex<_Tp>& __x) {
- }
- if (std::__constexpr_isinf(__x.imag()))
- return complex<_Tp>(__pi / _Tp(2), -__x.imag());
-- if (__x.real() == 0 && (__x.imag() == 0 || std::isnan(__x.imag())))
-+ // Somehow isnan can be a macro, so we use __constexpr_isnan
-+ if (__x.real() == 0 && (__x.imag() == 0 || std::__constexpr_isnan(__x.imag())))
- return complex<_Tp>(__pi / _Tp(2), -__x.imag());
- complex<_Tp> __z = std::log(__x + std::sqrt(std::__sqr(__x) - _Tp(1)));
- if (std::signbit(__x.imag()))
diff --git a/library/cpp/neh/tcp2.cpp b/library/cpp/neh/tcp2.cpp
index 641408973d9..fa24635e465 100644
--- a/library/cpp/neh/tcp2.cpp
+++ b/library/cpp/neh/tcp2.cpp
@@ -621,8 +621,8 @@ namespace {
, State_(Init)
, BuffSize_(TTcp2Options::InputBufferSize)
, Buff_(new char[BuffSize_])
- , NeedCheckReqsQueue_(0)
- , NeedCheckCancelsQueue_(0)
+ , NeedCheckReqsQueue_(false)
+ , NeedCheckCancelsQueue_(false)
, GenReqId_(0)
, LastSendedReqId_(0)
{
@@ -651,7 +651,7 @@ namespace {
throw;
}
- AtomicSet(NeedCheckReqsQueue_, 1);
+ NeedCheckReqsQueue_.store(true);
req->SetConnection(this);
TAtomicBase state = AtomicGet(State_);
if (Y_LIKELY(state == Connected)) {
@@ -682,7 +682,7 @@ namespace {
//called from client thread
void Cancel(TRequestId id) {
Cancels_.Enqueue(id);
- AtomicSet(NeedCheckCancelsQueue_, 1);
+ NeedCheckCancelsQueue_.store(true);
if (Y_LIKELY(AtomicGet(State_) == Connected)) {
ProcessOutputCancelsQueue();
}
@@ -758,7 +758,7 @@ namespace {
do {
if (asioThread) {
- AtomicSet(NeedCheckCancelsQueue_, 0);
+ NeedCheckCancelsQueue_.store(false);
TRequestId reqId;
ProcessReqsInFlyQueue();
@@ -776,14 +776,14 @@ namespace {
}
}
}
- } else if (AtomicGet(NeedCheckCancelsQueue_)) {
+ } else if (NeedCheckCancelsQueue_.load()) {
AS_.GetIOService().Post(std::bind(&TConnection::SendMessages, TConnectionRef(this), true));
return;
}
TRequestId lastReqId = 0;
{
- AtomicSet(NeedCheckReqsQueue_, 0);
+ NeedCheckReqsQueue_.store(false);
TRequest* reqPtr;
while (Reqs_.Dequeue(&reqPtr)) {
@@ -824,7 +824,7 @@ namespace {
OutputLock_.Release();
- if (!AtomicGet(NeedCheckReqsQueue_) && !AtomicGet(NeedCheckCancelsQueue_)) {
+ if (!NeedCheckReqsQueue_.load() && !NeedCheckCancelsQueue_.load()) {
DBGOUT("TClient::SendMessages(exit2)");
return;
}
@@ -1031,9 +1031,9 @@ namespace {
//output
TSpinLock OutputLock_; //protect socket/buffers from simultaneous access from few threads
- TAtomic NeedCheckReqsQueue_;
+ std::atomic<bool> NeedCheckReqsQueue_;
TLockFreeQueue<TRequest*> Reqs_;
- TAtomic NeedCheckCancelsQueue_;
+ std::atomic<bool> NeedCheckCancelsQueue_;
TLockFreeQueue<TRequestId> Cancels_;
TAdaptiveLock GenReqIdLock_;
std::atomic<TRequestId> GenReqId_;
@@ -1182,7 +1182,7 @@ namespace {
, RemoteHost_(NNeh::PrintHostByRfc(*AS_->RemoteEndpoint().Addr()))
, BuffSize_(TTcp2Options::InputBufferSize)
, Buff_(new char[BuffSize_])
- , NeedCheckOutputQueue_(0)
+ , NeedCheckOutputQueue_(false)
{
DBGOUT("TServer::TConnection()");
}
@@ -1383,7 +1383,7 @@ namespace {
}
void ProcessOutputQueue() {
- AtomicSet(NeedCheckOutputQueue_, 1);
+ NeedCheckOutputQueue_.store(true);
if (OutputLock_.TryAcquire()) {
SendMessages(false);
return;
@@ -1396,7 +1396,7 @@ namespace {
DBGOUT("TServer::SendMessages(enter)");
try {
do {
- AtomicUnlock(&NeedCheckOutputQueue_);
+ NeedCheckOutputQueue_.store(false);
TAutoPtr<TOutputData> d;
while (OutputData_.Dequeue(&d)) {
d->MoveTo(OutputBuffers_);
@@ -1415,7 +1415,7 @@ namespace {
OutputLock_.Release();
- if (!AtomicGet(NeedCheckOutputQueue_)) {
+ if (!NeedCheckOutputQueue_.load()) {
DBGOUT("Server::SendMessages(exit2): " << (int)!OutputLock_.IsLocked());
return;
}
@@ -1491,7 +1491,7 @@ namespace {
//output
TSpinLock OutputLock_; //protect socket/buffers from simultaneous access from few threads
- TAtomic NeedCheckOutputQueue_;
+ std::atomic<bool> NeedCheckOutputQueue_;
NNeh::TAutoLockFreeQueue<TOutputData> OutputData_;
TOutputBuffers OutputBuffers_;
};
diff --git a/library/cpp/type_info/type_io.cpp b/library/cpp/type_info/type_io.cpp
index 12f0058fed1..f397466abce 100644
--- a/library/cpp/type_info/type_io.cpp
+++ b/library/cpp/type_info/type_io.cpp
@@ -13,6 +13,8 @@
#include <util/generic/vector.h>
#include <util/generic/scope.h>
+#include <optional>
+
namespace NTi::NIo {
namespace {
class TYsonDeserializer: private TNonCopyable {
@@ -61,7 +63,8 @@ namespace NTi::NIo {
const TType *Key, *Value;
};
struct TDecimalData {
- ui8 Precision, Scale;
+ std::optional<ui8> Precision;
+ std::optional<ui8> Scale;
};
using TTypeData = std::variant<
std::monostate,
@@ -162,11 +165,19 @@ namespace NTi::NIo {
}
} else if (mapKey == "precision") {
if (std::holds_alternative<std::monostate>(data)) {
- data = TDecimalData{ReadSmallInt(R"("precision")"), 0};
+ const auto precision = ReadSmallInt(R"("precision")");
+ if (0 == precision) {
+ ythrow TDeserializationException() << R"(invalid zero "precision")";
+ }
+ data = TDecimalData{precision, std::nullopt};
} else if (std::holds_alternative<TDecimalData>(data)) {
auto& decimalData = std::get<TDecimalData>(data);
- if (decimalData.Precision == 0) {
- decimalData.Precision = ReadSmallInt(R"("precision")");
+ if (!decimalData.Precision.has_value()) {
+ const auto precision = ReadSmallInt(R"("precision")");
+ if (0 == precision) {
+ ythrow TDeserializationException() << R"(invalid zero "precision")";
+ }
+ decimalData.Precision = precision;
} else {
ythrow TDeserializationException() << R"(duplicate key "precision")";
}
@@ -175,10 +186,10 @@ namespace NTi::NIo {
}
} else if (mapKey == "scale") {
if (std::holds_alternative<std::monostate>(data)) {
- data = TDecimalData{0, ReadSmallInt(R"("scale")")};
+ data = TDecimalData{std::nullopt, ReadSmallInt(R"("scale")")};
} else if (std::holds_alternative<TDecimalData>(data)) {
auto& decimalData = std::get<TDecimalData>(data);
- if (decimalData.Scale == 0) {
+ if (!decimalData.Scale.has_value()) {
decimalData.Scale = ReadSmallInt(R"("scale")");
} else {
ythrow TDeserializationException() << R"(duplicate key "scale")";
@@ -272,15 +283,15 @@ namespace NTi::NIo {
auto& decimalData = std::get<TDecimalData>(data);
- if (decimalData.Precision == 0) {
+ if (!decimalData.Precision.has_value()) {
ythrow TDeserializationException() << R"(missing required key "precision" for type Decimal)";
}
- if (decimalData.Scale == 0) {
+ if (!decimalData.Scale.has_value()) {
ythrow TDeserializationException() << R"(missing required key "scale" for type Decimal)";
}
- return Factory_->DecimalRaw(decimalData.Precision, decimalData.Scale);
+ return Factory_->DecimalRaw(decimalData.Precision.value(), decimalData.Scale.value());
}
case ETypeName::Json:
type = TJsonType::InstanceRaw();
@@ -420,8 +431,8 @@ namespace NTi::NIo {
auto result = event.AsScalar().AsInt64();
- if (result <= 0) {
- ythrow TDeserializationException() << what << " must be greater than zero";
+ if (result < 0) {
+ ythrow TDeserializationException() << what << " must be greater or equal to zero";
}
if (result > Max<ui8>()) {
diff --git a/library/cpp/type_info/ut/type_deserialize.cpp b/library/cpp/type_info/ut/type_deserialize.cpp
index 9e93a26bee3..7d5f60bad04 100644
--- a/library/cpp/type_info/ut/type_deserialize.cpp
+++ b/library/cpp/type_info/ut/type_deserialize.cpp
@@ -120,6 +120,40 @@ TEST(TypeDeserialize, Decimal) {
ASSERT_DESERIALIZED_EQ(NTi::Decimal(20, 10), R"({type_name=decimal; precision=20; scale=10})");
ASSERT_DESERIALIZED_EQ(NTi::Decimal(20, 10), R"({scale=10; type_name=decimal; precision=20})");
ASSERT_DESERIALIZED_EQ(NTi::Decimal(10, 10), R"({type_name=decimal; precision=10; scale=10})");
+ ASSERT_DESERIALIZED_EQ(NTi::Decimal(10, 0), R"({type_name=decimal; precision=10; scale=0})");
+}
+
+TEST(TypeDeserialize, DecimalBadTypeParameters) {
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ []() {
+ NTi::NIo::DeserializeYson(*NTi::HeapFactory(), R"({type_name=decimal; precision=0; scale=10})");
+ }(),
+ NTi::TDeserializationException, R"(invalid zero "precision")");
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ []() {
+ NTi::NIo::DeserializeYson(*NTi::HeapFactory(), R"({type_name=decimal; precision=-2; scale=10})");
+ }(),
+ NTi::TDeserializationException, R"("precision" must be greater or equal to zero)");
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ []() {
+ NTi::NIo::DeserializeYson(*NTi::HeapFactory(), R"({type_name=decimal; precision=2; scale=-2})");
+ }(),
+ NTi::TDeserializationException, R"("scale" must be greater or equal to zero)");
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ []() {
+ NTi::NIo::DeserializeYson(*NTi::HeapFactory(), R"({type_name=decimal; precision=2; scale=-2})");
+ }(),
+ NTi::TDeserializationException, R"("scale" must be greater or equal to zero)");
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ []() {
+ NTi::NIo::DeserializeYson(*NTi::HeapFactory(), R"({type_name=decimal; precision=1000; scale=2})");
+ }(),
+ NTi::TDeserializationException, R"("precision" is too big)");
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ []() {
+ NTi::NIo::DeserializeYson(*NTi::HeapFactory(), R"({type_name=decimal; precision=2; scale=1000})");
+ }(),
+ NTi::TDeserializationException, R"("scale" is too big)");
}
TEST(TypeDeserialize, DecimalMissingTypeParameters) {
diff --git a/util/generic/enum_cast.cpp b/util/generic/enum_cast.cpp
new file mode 100644
index 00000000000..8db625ef440
--- /dev/null
+++ b/util/generic/enum_cast.cpp
@@ -0,0 +1,12 @@
+#include "enum_cast.h"
+
+#include <util/generic/yexception.h>
+#include <util/system/type_name.h>
+
+namespace NPrivate {
+
+ [[noreturn]] void OnSafeCastToEnumUnexpectedValue(const std::type_info& valueTypeInfo) {
+ ythrow TBadCastException() << "Unexpected enum " << TypeName(valueTypeInfo) << " value";
+ }
+
+} // namespace NPrivate
diff --git a/util/generic/enum_cast.h b/util/generic/enum_cast.h
new file mode 100644
index 00000000000..59dee47e889
--- /dev/null
+++ b/util/generic/enum_cast.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <util/generic/cast.h>
+#include <util/generic/serialized_enum.h>
+
+#include <optional>
+#include <type_traits>
+#include <typeinfo>
+
+namespace NPrivate {
+
+ [[noreturn]] void OnSafeCastToEnumUnexpectedValue(const std::type_info& valueTypeInfo);
+
+} // namespace NPrivate
+
+/**
+ * Safely cast an integer value to the enum value.
+ * @throw yexception is case of unknown enum underlying type value
+ *
+ * @tparam TEnum enum type
+ */
+template <typename TEnum, typename TInteger, typename = std::enable_if_t<std::is_enum_v<TEnum>>>
+TEnum SafeCastToEnum(TInteger integerValue) {
+ using TUnderlyingEnumType = std::underlying_type_t<TEnum>;
+
+ std::optional<TUnderlyingEnumType> value;
+ try {
+ value = SafeIntegerCast<TUnderlyingEnumType>(integerValue);
+ } catch (const TBadCastException&) {
+ // SafeIntegerCast throws TBadCastException when TInteger cannot be cast
+ // to TUnderlyingEnumType but the exception message is about integer
+ // value cast being unsafe.
+ // SafeCastToEnum must throw TBadCastException with its own exception
+ // message even if integer cast fails.
+ }
+
+ if (value.has_value()) {
+ for (TEnum enumValue : GetEnumAllValues<TEnum>()) {
+ if (static_cast<TUnderlyingEnumType>(enumValue) == *value) {
+ return enumValue;
+ }
+ }
+ }
+
+ NPrivate::OnSafeCastToEnumUnexpectedValue(typeid(TEnum));
+}
diff --git a/util/generic/enum_cast_ut.cpp b/util/generic/enum_cast_ut.cpp
new file mode 100644
index 00000000000..0b96235bcaf
--- /dev/null
+++ b/util/generic/enum_cast_ut.cpp
@@ -0,0 +1,49 @@
+#include "enum_cast.h"
+
+#include "enum_cast_ut.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(TestEnumCast) {
+ Y_UNIT_TEST(SafeCastToEnumTest) {
+ UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EIntEnum>(0), EIntEnum::Zero);
+ UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EIntEnum>(1), EIntEnum::One);
+ UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EIntEnum>(2), EIntEnum::Two);
+ UNIT_ASSERT_EXCEPTION(SafeCastToEnum<EIntEnum>(3), TBadCastException);
+
+ UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EUcharEnum>(0), EUcharEnum::Zero);
+ UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EUcharEnum>(1), EUcharEnum::One);
+ UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EUcharEnum>(2), EUcharEnum::Two);
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ SafeCastToEnum<EUcharEnum>(3), TBadCastException,
+ "Unexpected enum");
+ int val1 = 256;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ SafeCastToEnum<EUcharEnum>(val1), TBadCastException,
+ "Unexpected enum");
+ int val2 = -1;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ SafeCastToEnum<EUcharEnum>(val2), TBadCastException,
+ "Unexpected enum");
+ int val3 = 2;
+ UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EUcharEnum>(val3), EUcharEnum::Two);
+
+ UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EBoolEnum>(false), EBoolEnum::False);
+ UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EBoolEnum>(true), EBoolEnum::True);
+
+ UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<EUnscopedIntEnum>(2), UIE_TWO);
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ SafeCastToEnum<EUnscopedIntEnum>(3), TBadCastException,
+ "Unexpected enum");
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ SafeCastToEnum<EUnscopedIntEnum>(9), TBadCastException,
+ "Unexpected enum");
+
+ UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<ECharEnum>(static_cast<unsigned int>(0)), ECharEnum::Zero);
+ UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<ECharEnum>(static_cast<short>(-1)), ECharEnum::MinusOne);
+ UNIT_ASSERT_VALUES_EQUAL(SafeCastToEnum<ECharEnum>(static_cast<int>(-2)), ECharEnum::MinusTwo);
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ SafeCastToEnum<ECharEnum>(static_cast<int>(2)), TBadCastException,
+ "Unexpected enum");
+ }
+} // Y_UNIT_TEST_SUITE(TestEnumCast)
diff --git a/util/generic/enum_cast_ut.h b/util/generic/enum_cast_ut.h
new file mode 100644
index 00000000000..c0fb52864aa
--- /dev/null
+++ b/util/generic/enum_cast_ut.h
@@ -0,0 +1,28 @@
+#pragma once
+
+enum class EIntEnum: int {
+ Zero = 0,
+ One = 1,
+ Two = 2
+};
+
+enum class EUcharEnum: unsigned char {
+ Zero = 0,
+ One = 1,
+ Two = 2
+};
+
+enum class ECharEnum: signed char {
+ Zero = 0,
+ MinusOne = -1,
+ MinusTwo = -2
+};
+
+enum class EBoolEnum: bool {
+ False = false,
+ True = true
+};
+
+enum EUnscopedIntEnum {
+ UIE_TWO = 2,
+};
diff --git a/util/generic/fwd.h b/util/generic/fwd.h
index d1acc252563..906203d60e2 100644
--- a/util/generic/fwd.h
+++ b/util/generic/fwd.h
@@ -8,9 +8,6 @@ template <typename TCharType, typename TTraits = std::char_traits<TCharType>>
class TBasicString;
using TString = TBasicString<char>;
-#ifndef TSTRING_IS_STD_STRING
-using TCowString = TBasicString<char>;
-#endif
using TUtf16String = TBasicString<wchar16>;
using TUtf32String = TBasicString<wchar32>;
diff --git a/util/generic/string.h b/util/generic/string.h
index f1d3cf32652..520fdc746f4 100644
--- a/util/generic/string.h
+++ b/util/generic/string.h
@@ -39,7 +39,6 @@ void ResizeUninitialized(std::basic_string<TCharType, TCharTraits, TAllocator>&
#define Y_NOEXCEPT
-#ifndef TSTRING_IS_STD_STRING
template <class T>
class TStringPtrOps {
public:
@@ -83,11 +82,11 @@ struct TStdString: public TRefCountHolder, public B {
}
static TStdString* NullStr() noexcept {
- #ifdef _LIBCPP_VERSION
+#ifdef _LIBCPP_VERSION
return (TStdString*)NULL_STRING_REPR;
- #else
+#else
return Singleton<TStdString>();
- #endif
+#endif
}
private:
@@ -157,7 +156,6 @@ private:
TStringType& S_;
size_t Pos_;
};
-#endif
template <typename TCharType, typename TTraits>
class TBasicString: public TStringBase<TBasicString<TCharType, TTraits>, TCharType, TTraits> {
diff --git a/util/generic/ut/ya.make b/util/generic/ut/ya.make
index 6a605564fec..543b30278cd 100644
--- a/util/generic/ut/ya.make
+++ b/util/generic/ut/ya.make
@@ -12,6 +12,7 @@ SRCS(
generic/buffer_ut.cpp
generic/cast_ut.cpp
generic/deque_ut.cpp
+ generic/enum_cast_ut.cpp
generic/explicit_type_ut.cpp
generic/flags_ut.cpp
generic/function_ref_ut.cpp
@@ -63,4 +64,6 @@ PEERDIR(
library/cpp/containers/absl_flat_hash
)
+GENERATE_ENUM_SERIALIZATION(generic/enum_cast_ut.h)
+
END()
diff --git a/util/ya.make b/util/ya.make
index dea1f1f7219..93bcc964a48 100644
--- a/util/ya.make
+++ b/util/ya.make
@@ -90,6 +90,7 @@ JOIN_SRCS(
generic/buffer.cpp
generic/cast.cpp
generic/deque.cpp
+ generic/enum_cast.cpp
generic/explicit_type.cpp
generic/fastqueue.cpp
generic/flags.cpp
diff --git a/yql/essentials/cfg/tests/gateways-experimental.conf b/yql/essentials/cfg/tests/gateways-experimental.conf
index f0bea49094f..fe6fa97660c 100644
--- a/yql/essentials/cfg/tests/gateways-experimental.conf
+++ b/yql/essentials/cfg/tests/gateways-experimental.conf
@@ -28,6 +28,11 @@ Yt {
Name: "UseIntermediateStreams"
Value: "true"
}
+
+ DefaultSettings {
+ Name: "UseColumnGroupsFromInputTables"
+ Value: "true"
+ }
}
Dq {
diff --git a/yql/essentials/core/yql_expr_type_annotation.cpp b/yql/essentials/core/yql_expr_type_annotation.cpp
index c968b6fea1c..80207d35bc2 100644
--- a/yql/essentials/core/yql_expr_type_annotation.cpp
+++ b/yql/essentials/core/yql_expr_type_annotation.cpp
@@ -4313,7 +4313,7 @@ bool IsDataTypeTzDate(EDataSlot dataSlot) {
}
bool IsDataTypeBigDate(EDataSlot dataSlot) {
- return (NUdf::GetDataTypeInfo(dataSlot).Features & NUdf::BigDateType);
+ return (NUdf::GetDataTypeInfo(dataSlot).Features & NUdf::ExtDateType);
}
EDataSlot WithTzDate(EDataSlot dataSlot) {
diff --git a/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp b/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp
new file mode 100644
index 00000000000..aa1ea57703a
--- /dev/null
+++ b/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp
@@ -0,0 +1,97 @@
+#include <benchmark/benchmark.h>
+
+#include <yql/essentials/ast/yql_expr.h>
+#include <yql/essentials/minikql/computation/mkql_block_item.h>
+#include <yql/essentials/minikql/comp_nodes/mkql_block_coalesce.h>
+#include <yql/essentials/minikql/comp_nodes/mkql_block_coalesce_blending_helper.h>
+#include <yql/essentials/minikql/comp_nodes/ut/mkql_computation_node_ut.h>
+#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
+#include <yql/essentials/minikql/computation/mkql_block_builder.h>
+#include <yql/essentials/ast/yql_expr_builder.h>
+#include <yql/essentials/public/udf/arrow/memory_pool.h>
+#include <yql/essentials/minikql/mkql_node_cast.h>
+#include <yql/essentials/minikql/arrow/arrow_util.h>
+#include <yql/essentials/core/arrow_kernels/request/request.h>
+#include <yql/essentials/core/arrow_kernels/registry/registry.h>
+
+#include <arrow/compute/exec_internal.h>
+#include <util/generic/string.h>
+
+namespace NKikimr::NMiniKQL {
+
+template <typename T>
+static void BenchmarkFixedSizeCoalesce(benchmark::State& state) {
+ NYql::TExprContext exprCtx;
+ TSetup<false> setup;
+ bool secondIsScalar = state.range(1);
+ const auto type = setup.PgmBuilder->NewDataType(NUdf::TDataType<T>::Id);
+ auto* typeNode = exprCtx.template MakeType<NYql::TBlockExprType>(
+ exprCtx.template MakeType<NYql::TDataExprType>(NUdf::TDataType<T>::Slot));
+ auto* optTypeNode = exprCtx.template MakeType<NYql::TBlockExprType>(
+ exprCtx.template MakeType<NYql::TOptionalExprType>(
+ exprCtx.template MakeType<NYql::TDataExprType>(NUdf::TDataType<T>::Slot)));
+ auto getBlockItem = [&](int64_t value) {
+ return TBlockItem(static_cast<T>(value));
+ };
+
+ auto arrow_type = std::make_shared<typename TPrimitiveDataType<T>::TResult>();
+ int arrayLength = state.range(0);
+ constexpr int batchSize = 30720;
+
+ arrow::compute::ExecContext execCtx;
+ const auto drng = CreateDeterministicRandomProvider(1);
+ TTypeInfoHelper tif;
+
+ auto getArray = [&](TType* type, bool isOptional) {
+ auto array_builder = MakeArrayBuilder(tif, type, *NYql::NUdf::GetYqlMemoryPool(), arrayLength, nullptr);
+ for (int i = 0; i < arrayLength; i++) {
+ if (!isOptional) {
+ array_builder->Add(getBlockItem(drng->GenRand64()));
+ } else {
+ array_builder->Add(drng->GenRand64() % 2 == 0 ? TBlockItem() : getBlockItem(drng->GenRand64()));
+ }
+ }
+ return array_builder->Build(/*finish=*/true);
+ };
+
+ arrow::compute::KernelContext ctx(&execCtx);
+
+ const auto optType = setup.PgmBuilder->NewOptionalType(type);
+ auto left = getArray(optType, /*isOptional=*/true);
+ arrow::Datum right;
+ if (secondIsScalar) {
+ right = MakeScalarDatum<T>(drng->GenRand64());
+ } else {
+ right = getArray(type, /*isOptional=*/false);
+ }
+ auto registry = CreateFunctionRegistry(CreateBuiltinRegistry());
+ NYql::TKernelRequestBuilder b(*registry);
+
+ b.AddBinaryOp(NYql::TKernelRequestBuilder::EBinaryOp::Coalesce, optTypeNode, typeNode, typeNode);
+ auto serializedNode = b.Serialize();
+ auto nodeFactory = GetBuiltinFactory();
+ auto kernel = NYql::LoadKernels(serializedNode, *registry, nodeFactory);
+ Y_ENSURE(kernel.size() == 1);
+
+ for (auto _ : state) {
+ auto bi = ARROW_RESULT(arrow::compute::detail::ExecBatchIterator::Make({left, right}, batchSize));
+
+ arrow::compute::ExecBatch batch;
+ while (bi->Next(&batch)) {
+ arrow::Datum out;
+ Y_ENSURE(kernel[0]->exec(&ctx, batch, &out).ok());
+ benchmark::DoNotOptimize(out);
+ }
+ }
+}
+
+} // namespace NKikimr::NMiniKQL
+
+static void CustomArguments(benchmark::internal::Benchmark* b) {
+ b->Args({9000000, 0});
+ b->Args({9000000, 1});
+}
+
+BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<ui8>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments);
+BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<ui16>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments);
+BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<ui32>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments);
diff --git a/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/ya.make b/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/ya.make
new file mode 100644
index 00000000000..832f401638c
--- /dev/null
+++ b/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/ya.make
@@ -0,0 +1,22 @@
+G_BENCHMARK()
+
+PEERDIR(
+ yql/essentials/public/udf
+ yql/essentials/public/udf/arrow
+ yql/essentials/public/udf/service/exception_policy
+ yql/essentials/sql/pg_dummy
+ yql/essentials/minikql/comp_nodes/no_llvm
+ yql/essentials/minikql/codegen/no_llvm
+ yql/essentials/minikql/comp_nodes/no_llvm
+ yql/essentials/core/arrow_kernels/request
+ yql/essentials/core/arrow_kernels/registry
+)
+
+YQL_LAST_ABI_VERSION()
+
+SRCS(
+ ../../ut/mkql_test_factory.cpp
+ bench.cpp
+)
+
+END()
diff --git a/yql/essentials/minikql/comp_nodes/benchmark/ya.make b/yql/essentials/minikql/comp_nodes/benchmark/ya.make
new file mode 100644
index 00000000000..14aefb7f4ae
--- /dev/null
+++ b/yql/essentials/minikql/comp_nodes/benchmark/ya.make
@@ -0,0 +1 @@
+RECURSE(block_coalesce)
diff --git a/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp
index 9ce760e9981..ae190e777e8 100644
--- a/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp
+++ b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp
@@ -9,23 +9,113 @@
#include <yql/essentials/public/udf/arrow/block_builder.h>
#include <yql/essentials/public/udf/arrow/block_reader.h>
#include <yql/essentials/public/udf/arrow/util.h>
+#include <yql/essentials/minikql/comp_nodes/mkql_block_coalesce_blending_helper.h>
+#include <yql/essentials/minikql/defs.h>
#include <arrow/util/bitmap_ops.h>
-
-namespace NKikimr {
-namespace NMiniKQL {
+namespace NKikimr::NMiniKQL {
namespace {
+template <typename TType>
+void DispatchCoalesceImpl(const arrow::Datum& left, const arrow::Datum& right, arrow::Datum& out, bool outIsOptional, arrow::MemoryPool& pool) {
+ auto bitmap = outIsOptional ? ARROW_RESULT(arrow::AllocateBitmap((left.array()->length + left.array()->offset % 8) * sizeof(ui8), &pool)) : nullptr;
+ if (bitmap && bitmap->size() > 0) {
+ // Fill first byte with zero to prevent further uninitialized memory access.
+ bitmap->mutable_data()[0] = 0;
+ }
+ out = arrow::ArrayData::Make(right.type(), left.array()->length,
+ {std::move(bitmap),
+ ARROW_RESULT(arrow::AllocateBuffer((left.array()->length + left.array()->offset % 8) * sizeof(TType), &pool))},
+ arrow::kUnknownNullCount, left.array()->offset % 8);
+ if (outIsOptional) {
+ if (right.is_scalar()) {
+ if (right.scalar()->is_valid) {
+ BlendCoalesce<TType, /*isScalar=*/true, /*rightIsOptional=*/true>(
+ TDatumStorageView<TType>(left),
+ TDatumStorageView<TType>(right),
+ TDatumStorageView<TType>(out),
+ left.array()->length);
+ } else {
+ out = left;
+ }
+ } else {
+ MKQL_ENSURE(TDatumStorageView<TType>(right).bitMask(), "Right array must have a null mask");
+ BlendCoalesce<TType, /*isScalar=*/false, /*rightIsOptional=*/true>(
+ TDatumStorageView<TType>(left),
+ TDatumStorageView<TType>(right),
+ TDatumStorageView<TType>(out),
+ left.array()->length);
+ }
+ } else {
+ if (right.is_scalar()) {
+ BlendCoalesce<TType, /*isScalar=*/true, /*rightIsOptional=*/false>(
+ TDatumStorageView<TType>(left),
+ TDatumStorageView<TType>(right),
+ TDatumStorageView<TType>(out),
+ left.array()->length);
+ } else {
+ BlendCoalesce<TType, /*isScalar=*/false, /*rightIsOptional=*/false>(
+ TDatumStorageView<TType>(left),
+ TDatumStorageView<TType>(right),
+ TDatumStorageView<TType>(out),
+ left.array()->length);
+ }
+ }
+}
+
+bool DispatchBlendingCoalesce(const arrow::Datum& left, const arrow::Datum& right, arrow::Datum& out, TType* rightType, arrow::MemoryPool& pool) {
+ TTypeInfoHelper typeInfoHelper;
+ bool rightIsOptional;
+ rightType = UnpackOptional(rightType, rightIsOptional);
+ MKQL_ENSURE(rightType, "Right type must be valid");
+
+ NYql::NUdf::TDataTypeInspector typeData(typeInfoHelper, rightType);
+ if (!typeData) {
+ return false;
+ }
+ auto typeId = typeData.GetTypeId();
+
+ switch (NYql::NUdf::GetDataSlot(typeId)) {
+ case NYql::NUdf::EDataSlot::Int8:
+ DispatchCoalesceImpl<i8>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
+ return true;
+ case NYql::NUdf::EDataSlot::Bool:
+ case NYql::NUdf::EDataSlot::Uint8:
+ DispatchCoalesceImpl<ui8>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
+ return true;
+ case NYql::NUdf::EDataSlot::Int16:
+ DispatchCoalesceImpl<i16>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
+ return true;
+ case NYql::NUdf::EDataSlot::Uint16:
+ DispatchCoalesceImpl<ui16>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
+ return true;
+ case NYql::NUdf::EDataSlot::Int32:
+ DispatchCoalesceImpl<i32>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
+ return true;
+ case NYql::NUdf::EDataSlot::Uint32:
+ DispatchCoalesceImpl<ui32>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
+ return true;
+ case NYql::NUdf::EDataSlot::Int64:
+ case NYql::NUdf::EDataSlot::Uint64:
+ case NYql::NUdf::EDataSlot::Double:
+ case NYql::NUdf::EDataSlot::Float:
+ // TODO(YQL-19645): Support other numeric types.
+ default:
+ // Fallback to general builder/reader pipeline.
+ return false;
+ }
+}
+
class TCoalesceBlockExec {
public:
TCoalesceBlockExec(const std::shared_ptr<arrow::DataType>& returnArrowType, TType* firstItemType, TType* secondItemType, bool needUnwrapFirst)
: ReturnArrowType_(returnArrowType)
, FirstItemType_(firstItemType)
, SecondItemType_(secondItemType)
- , NeedUnwrapFirst_(needUnwrapFirst)
- {}
+ , NeedUnwrapFirst_(needUnwrapFirst) {
+ }
arrow::Status Exec(arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) const {
const auto& first = batch.values[0];
@@ -53,6 +143,10 @@ public:
builder->Add(secondValue, length);
*res = builder->Build(true);
} else {
+ if (DispatchBlendingCoalesce(first, second, *res, SecondItemType_, *ctx->memory_pool())) {
+ return arrow::Status::OK();
+ }
+
auto builder = NYql::NUdf::MakeArrayBuilder(TTypeInfoHelper(), SecondItemType_, *ctx->memory_pool(), length, nullptr);
auto secondValue = secondReader->GetScalarItem(*second.scalar());
for (size_t i = 0; i < length; ++i) {
@@ -74,6 +168,10 @@ public:
} else if ((size_t)firstArray.GetNullCount() == length) {
*res = second;
} else {
+ if (DispatchBlendingCoalesce(first, second, *res, SecondItemType_, *ctx->memory_pool())) {
+ return arrow::Status::OK();
+ }
+
auto builder = NYql::NUdf::MakeArrayBuilder(TTypeInfoHelper(), SecondItemType_, *ctx->memory_pool(), length, nullptr);
for (size_t i = 0; i < length; ++i) {
auto firstItem = firstReader->GetItem(firstArray, i);
@@ -110,9 +208,9 @@ std::shared_ptr<arrow::compute::ScalarKernel> MakeBlockCoalesceKernel(const TVec
AS_TYPE(TBlockType, argTypes[1])->GetItemType(),
needUnwrapFirst);
auto kernel = std::make_shared<arrow::compute::ScalarKernel>(ConvertToInputTypes(argTypes), ConvertToOutputType(resultType),
- [exec](arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) {
- return exec->Exec(ctx, batch, res);
- });
+ [exec](arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) {
+ return exec->Exec(ctx, batch, res);
+ });
kernel->null_handling = arrow::compute::NullHandling::COMPUTED_NO_PREALLOCATE;
return kernel;
@@ -135,6 +233,8 @@ IComputationNode* WrapBlockCoalesce(TCallable& callable, const TComputationNodeF
bool needUnwrapFirst = false;
if (!firstItemType->IsSameType(*secondItemType)) {
+ // Here the left operand and right operand are of types T? and T respectively.
+ // The first operand must be unwrapped to obtain the resulting type.
needUnwrapFirst = true;
bool firstOptional;
firstItemType = UnpackOptional(firstItemType, firstOptional);
@@ -143,12 +243,11 @@ IComputationNode* WrapBlockCoalesce(TCallable& callable, const TComputationNodeF
auto firstCompute = LocateNode(ctx.NodeLocator, callable, 0);
auto secondCompute = LocateNode(ctx.NodeLocator, callable, 1);
- TComputationNodePtrVector argsNodes = { firstCompute, secondCompute };
- TVector<TType*> argsTypes = { firstType, secondType };
+ TComputationNodePtrVector argsNodes = {firstCompute, secondCompute};
+ TVector<TType*> argsTypes = {firstType, secondType};
auto kernel = MakeBlockCoalesceKernel(argsTypes, secondType, needUnwrapFirst);
return new TBlockFuncNode(ctx.Mutables, "Coalesce", std::move(argsNodes), argsTypes, *kernel, kernel);
}
-}
-}
+} // namespace NKikimr::NMiniKQL
diff --git a/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.h b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.h
index 247af262517..009957314e6 100644
--- a/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.h
+++ b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.h
@@ -6,5 +6,5 @@ namespace NMiniKQL {
IComputationNode* WrapBlockCoalesce(TCallable& callable, const TComputationNodeFactoryContext& ctx);
-}
-}
+} // namespace NMiniKQL
+} // namespace NKikimr
diff --git a/yql/essentials/minikql/comp_nodes/mkql_block_coalesce_blending_helper.h b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce_blending_helper.h
new file mode 100644
index 00000000000..148c2f48472
--- /dev/null
+++ b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce_blending_helper.h
@@ -0,0 +1,233 @@
+#pragma once
+
+#include <yql/essentials/public/udf/arrow/bit_util.h>
+#include <yql/essentials/utils/swap_bytes.h>
+
+#include <util/generic/yexception.h>
+#include <util/system/types.h>
+
+#include <arrow/datum.h>
+#include <arrow/util/bit_util.h>
+#include <yql/essentials/minikql/defs.h>
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+
+namespace NKikimr::NMiniKQL {
+
+template <bool isScalar, bool isOptional>
+Y_FORCE_INLINE bool GetBit(const ui8* bitMask, size_t offset) {
+ if constexpr (isScalar || !isOptional) {
+ return 1;
+ } else {
+ return arrow::BitUtil::GetBit(bitMask, offset);
+ }
+}
+
+template <bool isOptional>
+Y_FORCE_INLINE void SetBitTo(ui8* bitMask, size_t offset, bool bit_value) {
+ if constexpr (!isOptional) {
+ return;
+ } else {
+ arrow::BitUtil::SetBitTo(bitMask, offset, bit_value);
+ }
+}
+
+template <typename TType>
+TType* GetScalar(const arrow::Datum& datum) {
+ auto& buffer = arrow::internal::checked_cast<arrow::internal::PrimitiveScalarBase&>(*datum.scalar());
+ return static_cast<TType*>(buffer.mutable_data());
+};
+
+template <typename TType>
+class TDatumStorageView {
+public:
+ TDatumStorageView(const arrow::Datum& datum)
+ : Datum_(datum) {
+ }
+
+ TType* data() {
+ if (Datum_.is_scalar()) {
+ return GetScalar<TType>(Datum_);
+ } else {
+ MKQL_ENSURE(Datum_.is_array(), "Invalid datum");
+ MKQL_ENSURE(Datum_.array()->buffers.size() > 1, "Invalid datum");
+ MKQL_ENSURE(Datum_.array()->buffers[1], "Invalid datum");
+ return reinterpret_cast<TType*>(Datum_.array()->buffers[1]->mutable_data());
+ }
+ }
+
+ size_t offset() {
+ if (Datum_.is_scalar()) {
+ return 0;
+ } else {
+ MKQL_ENSURE(Datum_.is_array(), "Invalid datum");
+ return Datum_.array()->offset;
+ }
+ }
+
+ ui8* bitMask() {
+ if (Datum_.is_scalar()) {
+ return nullptr;
+ } else {
+ MKQL_ENSURE(Datum_.is_array(), "Invalid datum");
+ MKQL_ENSURE(Datum_.array()->buffers.size() > 1, "Invalid datum");
+ if (!Datum_.array()->buffers[0]) {
+ return nullptr;
+ }
+ return static_cast<ui8*>(Datum_.array()->buffers[0]->mutable_data());
+ }
+ }
+
+private:
+ const arrow::Datum& Datum_;
+};
+
+template <typename TType, bool rightIsScalar, bool rightIsOptional>
+void CoalesceByOneElement(size_t elements,
+ const ui8* leftBitMask,
+ const ui8* rightBitMask,
+ size_t leftOffset,
+ size_t rightOffset,
+ const TType* left,
+ const TType* right,
+ TType* out,
+ size_t outOffset,
+ ui8* outBitMask) {
+ for (size_t i = 0; i < elements; i++) {
+ if (arrow::BitUtil::GetBit(leftBitMask, i + leftOffset)) {
+ out[i + outOffset] = left[i + leftOffset];
+ SetBitTo<rightIsOptional>(outBitMask, i + outOffset, true);
+ } else {
+ if constexpr (rightIsScalar) {
+ out[i + outOffset] = right[0];
+ } else {
+ out[i + outOffset] = right[i + rightOffset];
+ }
+
+ SetBitTo<rightIsOptional>(outBitMask,
+ i + outOffset,
+ GetBit<rightIsScalar, rightIsOptional>(rightBitMask, i + rightOffset));
+ }
+ }
+}
+
+template <bool isAligned>
+Y_FORCE_INLINE ui8 GetMaskValue(const ui8* mask, size_t offset, size_t bitMaskPosition) {
+ if constexpr (isAligned) {
+ return mask[offset / 8 + bitMaskPosition];
+ } else {
+ const ui8 rightBitsCount = offset % 8;
+ const ui8 leftBitsCount = 8 - rightBitsCount;
+ ui8 leftMaskPart = mask[offset / 8 + bitMaskPosition] >> rightBitsCount;
+ ui8 rightMaskPart = mask[offset / 8 + bitMaskPosition + 1] & ((1 << rightBitsCount) - 1);
+ return rightMaskPart << leftBitsCount | leftMaskPart;
+ }
+}
+
+template <typename TType, bool rightIsScalar, bool rightIsOptional, bool rightIsAlignedAsLeft>
+void VectorizedCoalesce(const ui8* __restrict leftBitMask,
+ const ui8* __restrict rightBitMask,
+ size_t leftOffset,
+ size_t rightOffset,
+ const TType* __restrict left,
+ const TType* __restrict right,
+ TType* __restrict out,
+ size_t outOffset,
+ ui8* __restrict outBitMask,
+ size_t lengthInElements) {
+ constexpr auto sizeInBytes = sizeof(TType) * 8;
+ auto lengthInBytes = lengthInElements * sizeof(TType);
+ // Calculates the truncated length for working with data blocks.
+ auto truncatedLengthInBytes =
+ lengthInBytes / sizeInBytes * sizeInBytes;
+ for (size_t bytesProcessed = 0u, elemShift = 0, step = 0;
+ bytesProcessed < truncatedLengthInBytes;
+ bytesProcessed += sizeInBytes, elemShift += 8, step += 1) {
+ auto currentLeftMask = leftBitMask[leftOffset / 8 + step];
+ if constexpr (rightIsOptional) {
+ // If right is optional, updates the output bit mask.
+ // Otherwise, the output bit mask doesn't exist.
+ if constexpr (rightIsScalar) {
+ outBitMask[outOffset / 8 + step] = 0xFFU;
+ } else {
+ outBitMask[outOffset / 8 + step] =
+ currentLeftMask | GetMaskValue<rightIsAlignedAsLeft>(rightBitMask, rightOffset, step);
+ }
+ }
+ // Expands the current mask to an array of unsigned of type.
+ auto expandedMask = NYql::NUdf::BitToByteExpand<std::make_unsigned_t<TType>>(currentLeftMask);
+ for (size_t j = 0u; j < 8; ++j) {
+ auto arrayIdx = elemShift + j;
+ auto rightArrayIdx = arrayIdx + rightOffset;
+ if constexpr (rightIsScalar) {
+ rightArrayIdx = 0;
+ }
+ // Performs the operation of mixing data from left and right based on the mask.
+ out[outOffset + arrayIdx] = (left[leftOffset + arrayIdx] & expandedMask[j]) | right[rightArrayIdx] & ~expandedMask[j];
+ }
+ }
+}
+// Coalesces data from two inputs based on bit masks, handling either (array, array) or (array, scalar).
+// This function efficiently merges data from 'left' and 'right' into 'out' array using 'left.bitMask()' and 'right.bitMask()'.
+// The function is vectorization friendly, which can significantly improve performance.
+template <typename TType, bool rightIsScalar, bool rightIsOptional>
+void BlendCoalesce(TDatumStorageView<TType> left,
+ TDatumStorageView<TType> right,
+ TDatumStorageView<TType> out,
+ size_t lengthInElements) {
+ Y_ENSURE(left.offset() % 8 == out.offset() % 8);
+ auto firstElementsToProcess = std::min((8 - left.offset() % 8) % 8, lengthInElements);
+ // Process one by one until left mask is aligned by byte.
+ CoalesceByOneElement<TType, rightIsScalar, rightIsOptional>(firstElementsToProcess,
+ left.bitMask(),
+ right.bitMask(),
+ left.offset(),
+ right.offset(),
+ left.data(),
+ right.data(),
+ out.data(),
+ out.offset(),
+ out.bitMask());
+ lengthInElements -= firstElementsToProcess;
+
+ // Process vectorized.
+ if (left.offset() % 8 != right.offset() % 8) {
+ VectorizedCoalesce<TType, rightIsScalar, rightIsOptional, false>(left.bitMask(),
+ right.bitMask(),
+ left.offset() + firstElementsToProcess,
+ right.offset() + firstElementsToProcess,
+ left.data(),
+ right.data(),
+ out.data(),
+ out.offset() + firstElementsToProcess,
+ out.bitMask(),
+ lengthInElements);
+ } else {
+ VectorizedCoalesce<TType, rightIsScalar, rightIsOptional, true>(left.bitMask(),
+ right.bitMask(),
+ left.offset() + firstElementsToProcess,
+ right.offset() + firstElementsToProcess,
+ left.data(),
+ right.data(),
+ out.data(),
+ out.offset() + firstElementsToProcess,
+ out.bitMask(),
+ lengthInElements);
+ }
+ // Process remaining bits that take less memory than one byte.
+ size_t remainingBits = (lengthInElements) % 8;
+ CoalesceByOneElement<TType, rightIsScalar, rightIsOptional>(remainingBits,
+ left.bitMask(),
+ right.bitMask(),
+ left.offset() + firstElementsToProcess + lengthInElements - remainingBits,
+ right.offset() + firstElementsToProcess + lengthInElements - remainingBits,
+ left.data(),
+ right.data(),
+ out.data(),
+ out.offset() + firstElementsToProcess + lengthInElements - remainingBits,
+ out.bitMask());
+}
+
+} // namespace NKikimr::NMiniKQL
diff --git a/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp b/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp
index 5f54771ab7f..6bf6bc38894 100644
--- a/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp
+++ b/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp
@@ -1186,6 +1186,208 @@ private:
const TVector<TType*> Types_;
};
+struct TListFromBlocksState : public TComputationValue<TListFromBlocksState> {
+public:
+ TListFromBlocksState(TMemoryUsageInfo* memInfo, TComputationContext& ctx, const TVector<TType*>& types, size_t blockLengthIndex)
+ : TComputationValue(memInfo)
+ , HolderFactory_(ctx.HolderFactory)
+ , BlockLengthIndex_(blockLengthIndex)
+ , Readers_(types.size())
+ , Converters_(types.size())
+ , ValuesDescr_(ToValueDescr(types))
+ {
+ const auto& pgBuilder = ctx.Builder->GetPgBuilder();
+ for (size_t i = 0; i < types.size(); ++i) {
+ if (i == blockLengthIndex) {
+ continue;
+ }
+ const TType* blockItemType = AS_TYPE(TBlockType, types[i])->GetItemType();
+ Readers_[i] = MakeBlockReader(TTypeInfoHelper(), blockItemType);
+ Converters_[i] = MakeBlockItemConverter(TTypeInfoHelper(), blockItemType, pgBuilder);
+ }
+ }
+
+ NUdf::TUnboxedValue GetRow() {
+ MKQL_ENSURE(CurrentRow_ < RowCount_, "Rows out of range");
+
+ NUdf::TUnboxedValue* outItems = nullptr;
+ auto row = HolderFactory_.CreateDirectArrayHolder(Readers_.size() - 1, outItems);
+
+ size_t outputStructIdx = 0;
+ for (size_t i = 0; i < Readers_.size(); i++) {
+ if (i == BlockLengthIndex_) {
+ continue;
+ }
+
+ const auto& datum = TArrowBlock::From(BlockItems_[i]).GetDatum();
+ ARROW_DEBUG_CHECK_DATUM_TYPES(ValuesDescr_[i], datum.descr());
+
+ TBlockItem item;
+ if (datum.is_scalar()) {
+ item = Readers_[i]->GetScalarItem(*datum.scalar());
+ } else {
+ MKQL_ENSURE(datum.is_array(), "Expecting array");
+ item = Readers_[i]->GetItem(*datum.array(), CurrentRow_);
+ }
+
+ outItems[outputStructIdx++] = Converters_[i]->MakeValue(item, HolderFactory_);
+ }
+
+ CurrentRow_++;
+ return row;
+ }
+
+ void SetBlock(NUdf::TUnboxedValue block) {
+ BlockItems_ = block.GetElements();
+ Block_ = std::move(block);
+
+ CurrentRow_ = 0;
+ RowCount_ = GetBlockCount(BlockItems_[BlockLengthIndex_]);
+ }
+
+ bool HasRows() const {
+ return CurrentRow_ < RowCount_;
+ }
+
+private:
+ const THolderFactory& HolderFactory_;
+
+ size_t CurrentRow_ = 0;
+ size_t RowCount_ = 0;
+
+ size_t BlockLengthIndex_ = 0;
+
+ NUdf::TUnboxedValue Block_;
+ const NUdf::TUnboxedValue* BlockItems_ = nullptr;
+
+ std::vector<std::unique_ptr<IBlockReader>> Readers_;
+ std::vector<std::unique_ptr<IBlockItemConverter>> Converters_;
+ const std::vector<arrow::ValueDescr> ValuesDescr_;
+};
+
+class TListFromBlocksWrapper : public TMutableComputationNode<TListFromBlocksWrapper>
+{
+ using TBaseComputation = TMutableComputationNode<TListFromBlocksWrapper>;
+
+public:
+ TListFromBlocksWrapper(TComputationMutables& mutables,
+ IComputationNode* list,
+ TStructType* structType
+ )
+ : TBaseComputation(mutables, EValueRepresentation::Boxed)
+ , List_(list)
+ {
+ for (size_t i = 0; i < structType->GetMembersCount(); i++) {
+ if (structType->GetMemberName(i) == NYql::BlockLengthColumnName) {
+ BlockLengthIndex_ = i;
+ Types_.push_back(nullptr);
+ continue;
+ }
+ Types_.push_back(structType->GetMemberType(i));
+ }
+ }
+
+ NUdf::TUnboxedValuePod DoCalculate(TComputationContext& ctx) const
+ {
+ return ctx.HolderFactory.Create<TListFromBlocksValue>(
+ ctx,
+ Types_,
+ BlockLengthIndex_,
+ List_->GetValue(ctx)
+ );
+ }
+
+private:
+ class TListFromBlocksValue : public TCustomListValue {
+ using TState = TListFromBlocksState;
+
+ public:
+ class TIterator : public TComputationValue<TIterator> {
+ public:
+ TIterator(TMemoryUsageInfo* memInfo, NUdf::TUnboxedValue&& blockState, NUdf::TUnboxedValue&& iter)
+ : TComputationValue<TIterator>(memInfo)
+ , BlockState_(std::move(blockState))
+ , Iter_(std::move(iter))
+ {}
+
+ private:
+ bool Next(NUdf::TUnboxedValue& value) final {
+ auto& blockState = *static_cast<TState*>(BlockState_.AsBoxed().Get());
+ if (!blockState.HasRows()) {
+ NUdf::TUnboxedValue block;
+ if (!Iter_.Next(block)) {
+ return false;
+ }
+ blockState.SetBlock(std::move(block));
+ }
+
+ value = blockState.GetRow();
+ return true;
+ }
+
+ private:
+ const NUdf::TUnboxedValue BlockState_;
+ const NUdf::TUnboxedValue Iter_;
+ };
+
+ TListFromBlocksValue(TMemoryUsageInfo* memInfo, TComputationContext& ctx,
+ const TVector<TType*>& types, ui32 blockLengthIndex, NUdf::TUnboxedValue&& list
+ )
+ : TCustomListValue(memInfo)
+ , CompCtx_(ctx)
+ , Types_(types)
+ , BlockLengthIndex_(blockLengthIndex)
+ , List_(std::move(list))
+ {}
+
+ private:
+ NUdf::TUnboxedValue GetListIterator() const final {
+ auto state = CompCtx_.HolderFactory.Create<TState>(CompCtx_, Types_, BlockLengthIndex_);
+ return CompCtx_.HolderFactory.Create<TIterator>(std::move(state), List_.GetListIterator());
+ }
+
+ bool HasListItems() const final {
+ if (!HasItems.has_value()) {
+ HasItems = List_.HasListItems();
+ }
+ return *HasItems;
+ }
+
+ ui64 GetListLength() const final {
+ if (!Length.has_value()) {
+ auto iter = List_.GetListIterator();
+
+ Length = 0;
+ NUdf::TUnboxedValue block;
+ while (iter.Next(block)) {
+ auto blockLengthValue = block.GetElement(BlockLengthIndex_);
+ *Length += GetBlockCount(blockLengthValue);
+ }
+ }
+
+ return *Length;
+ }
+
+ private:
+ TComputationContext& CompCtx_;
+
+ const TVector<TType*>& Types_;
+ size_t BlockLengthIndex_ = 0;
+
+ NUdf::TUnboxedValue List_;
+ };
+
+ void RegisterDependencies() const final {
+ this->DependsOn(List_);
+ }
+
+private:
+ TVector<TType*> Types_;
+ size_t BlockLengthIndex_ = 0;
+
+ IComputationNode* const List_;
+};
+
class TPrecomputedArrowNode : public IArrowKernelComputationNode {
public:
TPrecomputedArrowNode(const arrow::Datum& datum, TStringBuf kernelName)
@@ -1645,6 +1847,23 @@ IComputationNode* WrapWideFromBlocks(TCallable& callable, const TComputationNode
return new TWideFromBlocksFlowWrapper(ctx.Mutables, wideFlow, std::move(items));
}
+IComputationNode* WrapListFromBlocks(TCallable& callable, const TComputationNodeFactoryContext& ctx) {
+ MKQL_ENSURE(callable.GetInputsCount() == 1, "Expected 1 args, got " << callable.GetInputsCount());
+
+ const auto inputType = callable.GetInput(0).GetStaticType();
+ MKQL_ENSURE(inputType->IsList(), "Expected List as an input");
+ const auto outputType = callable.GetType()->GetReturnType();
+ MKQL_ENSURE(outputType->IsList(), "Expected List as an output");
+
+ const auto inputItemType = AS_TYPE(TListType, inputType)->GetItemType();
+ MKQL_ENSURE(inputItemType->IsStruct(), "Expected List of Struct as an input");
+ const auto outputItemType = AS_TYPE(TListType, outputType)->GetItemType();
+ MKQL_ENSURE(outputItemType->IsStruct(), "Expected List of Struct as an output");
+
+ const auto list = LocateNode(ctx.NodeLocator, callable, 0);
+ return new TListFromBlocksWrapper(ctx.Mutables, list, AS_TYPE(TStructType, inputItemType));
+}
+
IComputationNode* WrapAsScalar(TCallable& callable, const TComputationNodeFactoryContext& ctx) {
MKQL_ENSURE(callable.GetInputsCount() == 1, "Expected 1 args, got " << callable.GetInputsCount());
diff --git a/yql/essentials/minikql/comp_nodes/mkql_blocks.h b/yql/essentials/minikql/comp_nodes/mkql_blocks.h
index 326e25e57d9..fc718212091 100644
--- a/yql/essentials/minikql/comp_nodes/mkql_blocks.h
+++ b/yql/essentials/minikql/comp_nodes/mkql_blocks.h
@@ -10,6 +10,7 @@ IComputationNode* WrapWideToBlocks(TCallable& callable, const TComputationNodeFa
IComputationNode* WrapListToBlocks(TCallable& callable, const TComputationNodeFactoryContext& ctx);
IComputationNode* WrapFromBlocks(TCallable& callable, const TComputationNodeFactoryContext& ctx);
IComputationNode* WrapWideFromBlocks(TCallable& callable, const TComputationNodeFactoryContext& ctx);
+IComputationNode* WrapListFromBlocks(TCallable& callable, const TComputationNodeFactoryContext& ctx);
IComputationNode* WrapAsScalar(TCallable& callable, const TComputationNodeFactoryContext& ctx);
IComputationNode* WrapReplicateScalar(TCallable& callable, const TComputationNodeFactoryContext& ctx);
IComputationNode* WrapBlockExpandChunked(TCallable& callable, const TComputationNodeFactoryContext& ctx);
diff --git a/yql/essentials/minikql/comp_nodes/mkql_factory.cpp b/yql/essentials/minikql/comp_nodes/mkql_factory.cpp
index 9615a5d4d78..e32aaf1fd79 100644
--- a/yql/essentials/minikql/comp_nodes/mkql_factory.cpp
+++ b/yql/essentials/minikql/comp_nodes/mkql_factory.cpp
@@ -291,6 +291,7 @@ struct TCallableComputationNodeBuilderFuncMapFiller {
{"WideTopSortBlocks", &WrapWideTopSortBlocks},
{"WideSortBlocks", &WrapWideSortBlocks},
{"ListToBlocks", &WrapListToBlocks},
+ {"ListFromBlocks", &WrapListFromBlocks},
{"AsScalar", &WrapAsScalar},
{"ReplicateScalar", &WrapReplicateScalar},
{"BlockCoalesce", &WrapBlockCoalesce},
diff --git a/yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp b/yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp
new file mode 100644
index 00000000000..1a6a044bcf7
--- /dev/null
+++ b/yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp
@@ -0,0 +1,295 @@
+#include <yql/essentials/minikql/comp_nodes/mkql_block_coalesce.h>
+
+#include <yql/essentials/core/arrow_kernels/request/request.h>
+#include <yql/essentials/minikql/comp_nodes/mkql_block_coalesce_blending_helper.h>
+#include <yql/essentials/minikql/comp_nodes/ut/mkql_computation_node_ut.h>
+#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
+#include <yql/essentials/minikql/computation/mkql_block_builder.h>
+#include <yql/essentials/ast/yql_expr_builder.h>
+#include <yql/essentials/public/udf/arrow/memory_pool.h>
+#include <yql/essentials/minikql/mkql_node_cast.h>
+#include <yql/essentials/minikql/arrow/arrow_util.h>
+#include <yql/essentials/core/arrow_kernels/request/request.h>
+#include <yql/essentials/core/arrow_kernels/registry/registry.h>
+
+#include <arrow/compute/exec_internal.h>
+
+namespace NKikimr::NMiniKQL {
+
+namespace {
+
+#define UNIT_TEST_WITH_INTEGER(TestName) \
+ template <typename TTestType> \
+ void TestName##Execute(NUnitTest::TTestContext& ut_context Y_DECLARE_UNUSED); \
+ Y_UNIT_TEST(TestName##i8) { \
+ TestName##Execute<i8>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##ui8) { \
+ TestName##Execute<ui8>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##i16) { \
+ TestName##Execute<i16>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##ui16) { \
+ TestName##Execute<ui16>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##i32) { \
+ TestName##Execute<i32>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##ui32) { \
+ TestName##Execute<ui32>(ut_context); \
+ } \
+ \
+ template <typename TTestType> \
+ void TestName##Execute(NUnitTest::TTestContext& ut_context Y_DECLARE_UNUSED)
+
+template <typename T>
+arrow::Datum GenerateArray(TTypeInfoHelper& typeInfoHelper, TType* type, std::vector<TMaybe<T>>& array, size_t offset) {
+ auto rightArrayBuilder = MakeArrayBuilder(typeInfoHelper, type, *NYql::NUdf::GetYqlMemoryPool(), array.size() + offset, nullptr);
+ for (size_t i = 0; i < offset; i++) {
+ if (array[0]) {
+ rightArrayBuilder->Add(TBlockItem(array[0].GetRef()));
+ } else {
+ rightArrayBuilder->Add(TBlockItem());
+ }
+ }
+
+ for (size_t i = 0; i < array.size(); i++) {
+ if (array[i]) {
+ rightArrayBuilder->Add(TBlockItem(array[i].GetRef()));
+ } else {
+ rightArrayBuilder->Add(TBlockItem());
+ }
+ };
+
+ arrow::Datum resultArray = rightArrayBuilder->Build(/*finish=*/true);
+ resultArray.array()->offset += offset;
+ resultArray.array()->null_count = arrow::kUnknownNullCount;
+ resultArray.array()->length -= offset;
+ return resultArray;
+}
+
+enum class ERightOperandType {
+ SCALAR,
+ ARRAY,
+ OPTIONAL_ARRAY,
+ OPTIONAL_SCALAR
+};
+
+template <typename T, ERightOperandType rightType = ERightOperandType::ARRAY>
+void TestBlockCoalesceForVector(std::vector<TMaybe<T>> left, std::vector<TMaybe<T>> right, std::vector<TMaybe<T>> expected, size_t leftOffset, size_t rightOffset) {
+ TSetup<false> setup;
+ NYql::TExprContext exprCtx;
+ auto* type = setup.PgmBuilder->NewDataType(NUdf::TDataType<T>::Id);
+ auto* typeNode = exprCtx.template MakeType<NYql::TBlockExprType>(
+ exprCtx.template MakeType<NYql::TDataExprType>(NUdf::TDataType<T>::Slot));
+
+ auto* optType = setup.PgmBuilder->NewOptionalType(type);
+ auto* optTypeNode = exprCtx.template MakeType<NYql::TBlockExprType>(
+ exprCtx.template MakeType<NYql::TOptionalExprType>(
+ exprCtx.template MakeType<NYql::TDataExprType>(NUdf::TDataType<T>::Slot)));
+ if (rightType == ERightOperandType::OPTIONAL_ARRAY || rightType == ERightOperandType::OPTIONAL_SCALAR) {
+ // Make both operands optional.
+ type = optType;
+ typeNode = optTypeNode;
+ }
+ TTypeInfoHelper typeInfoHelper;
+ arrow::Datum leftOperand = GenerateArray(typeInfoHelper, optType, left, leftOffset);
+
+ arrow::compute::ExecContext execCtx;
+ arrow::compute::KernelContext ctx(&execCtx);
+
+ arrow::Datum rightOperand;
+ if constexpr (rightType == ERightOperandType::SCALAR) {
+ rightOperand = MakeScalarDatum<T>(right[0].GetRef());
+ } else if constexpr (rightType == ERightOperandType::OPTIONAL_SCALAR) {
+ if (right[0]) {
+ rightOperand = MakeScalarDatum<T>(right[0].GetRef());
+ } else {
+ rightOperand = MakeScalarDatum<T>(0);
+ rightOperand.scalar()->is_valid = false;
+ }
+ } else {
+ rightOperand = GenerateArray(typeInfoHelper, type, right, rightOffset);
+ }
+ auto bi = arrow::compute::detail::ExecBatchIterator::Make({leftOperand, rightOperand}, 1000).ValueOrDie();
+ arrow::compute::ExecBatch batch;
+ UNIT_ASSERT(bi->Next(&batch));
+ std::shared_ptr<arrow::DataType> arrowType;
+ UNIT_ASSERT(ConvertArrowType(type, arrowType, [](TType*) {}));
+ arrow::Datum out;
+ auto registry = CreateFunctionRegistry(CreateBuiltinRegistry());
+ NYql::TKernelRequestBuilder b(*registry);
+
+ b.AddBinaryOp(NYql::TKernelRequestBuilder::EBinaryOp::Coalesce, optTypeNode, typeNode, optTypeNode);
+ auto serializedNode = b.Serialize();
+ auto nodeFactory = GetBuiltinFactory();
+ auto kernel = NYql::LoadKernels(serializedNode, *registry, nodeFactory);
+ Y_ENSURE(kernel.size() == 1);
+ Y_ENSURE(kernel[0]->exec(&ctx, batch, &out).ok());
+
+ arrow::Datum expectedArrowArray = GenerateArray(typeInfoHelper, type, expected, 0);
+ UNIT_ASSERT_EQUAL_C(out, expectedArrowArray, "Expected : " << expectedArrowArray.make_array()->ToString() << "\n but got : " << out.make_array()->ToString());
+}
+
+template <typename T, ERightOperandType rightType = ERightOperandType::ARRAY>
+void TestBlockCoalesce(std::vector<TMaybe<T>> left, std::vector<TMaybe<T>> right, std::vector<TMaybe<T>> expected) {
+ // First test different offsets.
+ for (size_t leftOffset = 0; leftOffset < 10; leftOffset++) {
+ for (size_t rightOffset = 0; rightOffset < 10; rightOffset++) {
+ TestBlockCoalesceForVector<T, rightType>(left, right, expected, leftOffset, rightOffset);
+ }
+ }
+
+ // Second test different sizes.
+ // Also test only small subset of offsets to prevent a combinatorial explosion.
+ while (left.size() > 1 || right.size() > 1 || expected.size() > 1) {
+ for (size_t leftOffset = 0; leftOffset < 2; leftOffset++) {
+ for (size_t rightOffset = 0; rightOffset < 2; rightOffset++) {
+ TestBlockCoalesceForVector<T, rightType>(left, right, expected, leftOffset, rightOffset);
+ }
+ }
+ if (left.size() > 1) {
+ left.pop_back();
+ }
+ if (right.size() > 1) {
+ right.pop_back();
+ }
+ if (expected.size() > 1) {
+ expected.pop_back();
+ }
+ }
+}
+
+void BlockCoalesceGraphTest(size_t length, size_t offset) {
+ TSetup<false> setup;
+ TProgramBuilder& pb = *setup.PgmBuilder;
+
+ const auto ui32Type = pb.NewDataType(NUdf::TDataType<ui32>::Id);
+ const auto optui32Type = pb.NewOptionalType(ui32Type);
+
+ const auto inputTupleType = pb.NewTupleType({ui32Type, optui32Type});
+ const auto outputTupleType = pb.NewTupleType({ui32Type});
+
+ TRuntimeNode::TList right;
+ TVector<bool> isNull;
+
+ const auto drng = CreateDeterministicRandomProvider(1);
+ std::vector<ui32> rightValues;
+ for (size_t i = 0; i < length; i++) {
+ const ui32 randomValue = drng->GenRand();
+ const auto maybeNull = (randomValue % 2 == 0)
+ ? pb.NewOptional(pb.NewDataLiteral<ui32>(randomValue / 2))
+ : pb.NewEmptyOptionalDataLiteral(NUdf::TDataType<ui32>::Id);
+
+ const auto inputTuple = pb.NewTuple(inputTupleType, {
+ pb.NewDataLiteral<ui32>(i),
+ maybeNull,
+ });
+
+ right.push_back(inputTuple);
+ rightValues.push_back(randomValue / 2);
+ isNull.push_back((randomValue % 2) != 0);
+ }
+
+ const auto list = pb.NewList(inputTupleType, std::move(right));
+
+ auto node = pb.ToFlow(list);
+ node = pb.ExpandMap(node, [&](TRuntimeNode item) -> TRuntimeNode::TList {
+ return {
+ pb.Nth(item, 0),
+ pb.Nth(item, 1),
+ };
+ });
+
+ node = pb.ToFlow(pb.WideToBlocks(pb.FromFlow(node)));
+ if (offset > 0) {
+ node = pb.WideSkipBlocks(node, pb.NewDataLiteral<ui64>(offset));
+ }
+ node = pb.WideMap(node, [&](TRuntimeNode::TList items) -> TRuntimeNode::TList {
+ Y_ENSURE(items.size() == 3);
+ return {
+ pb.BlockCoalesce(items[1], items[0]),
+ items[2]};
+ });
+
+ node = pb.ToFlow(pb.WideFromBlocks(pb.FromFlow(node)));
+ node = pb.NarrowMap(node, [&](TRuntimeNode::TList items) -> TRuntimeNode {
+ return pb.NewTuple(outputTupleType, {items[0]});
+ });
+
+ const auto pgmReturn = pb.Collect(node);
+ const auto graph = setup.BuildGraph(pgmReturn);
+
+ const auto iterator = graph->GetValue().GetListIterator();
+ for (size_t i = 0; i < length; i++) {
+ if (i < offset) {
+ continue;
+ }
+ NUdf::TUnboxedValue outputTuple;
+ UNIT_ASSERT(iterator.Next(outputTuple));
+ if (isNull[i]) {
+ UNIT_ASSERT_EQUAL(outputTuple.GetElement(0).Get<ui32>(), i);
+ } else {
+ UNIT_ASSERT_EQUAL(outputTuple.GetElement(0).Get<ui32>(), rightValues[i]);
+ }
+ }
+}
+
+} // namespace
+
+Y_UNIT_TEST_SUITE(TMiniKQLBlockCoalesceTest) {
+
+Y_UNIT_TEST(CoalesceGraphTest) {
+ for (auto offset : {0, 1, 2, 3, 5, 7, 8, 11, 14, 16}) {
+ BlockCoalesceGraphTest(1000, offset);
+ }
+}
+
+UNIT_TEST_WITH_INTEGER(KernelRightIsNotNullArray) {
+ auto max = std::numeric_limits<TTestType>::max();
+ auto min = std::numeric_limits<TTestType>::min();
+ TestBlockCoalesce<TTestType, ERightOperandType::ARRAY>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20},
+ {101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120},
+ {101, 2, 3, 104, 5, 6, 7, max, 9, 110, 11, 12, 13, 114, 115, 116, min, 118, 19, 20});
+}
+
+UNIT_TEST_WITH_INTEGER(KernelRightIsScalar) {
+ auto max = std::numeric_limits<TTestType>::max();
+ auto min = std::numeric_limits<TTestType>::min();
+
+ TestBlockCoalesce<TTestType, ERightOperandType::SCALAR>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20},
+ {77},
+ {77, 2, 3, 77, 5, 6, 7, max, 9, 77, 11, 12, 13, 77, 77, 77, min, 77, 19, 20});
+}
+
+UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalArray) {
+ auto max = std::numeric_limits<TTestType>::max();
+ auto min = std::numeric_limits<TTestType>::min();
+
+ TestBlockCoalesce<TTestType, ERightOperandType::OPTIONAL_ARRAY>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20},
+ {Nothing(), 102, Nothing(), 104, Nothing(), 106, 107, 108, 109, 110, 111, 112, 113, 114, Nothing(), 116, 117, 118, Nothing(), 120},
+ {Nothing(), 2, 3, 104, 5, 6, 7, max, 9, 110, 11, 12, 13, 114, Nothing(), 116, min, 118, 19, 20});
+}
+
+UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalInvalidScalar) {
+ auto max = std::numeric_limits<TTestType>::max();
+ auto min = std::numeric_limits<TTestType>::min();
+
+ TestBlockCoalesce<TTestType, ERightOperandType::OPTIONAL_SCALAR>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20},
+ {Nothing()},
+ {Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20});
+}
+
+UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalValidScalar) {
+ auto max = std::numeric_limits<TTestType>::max();
+ auto min = std::numeric_limits<TTestType>::min();
+
+ TestBlockCoalesce<TTestType, ERightOperandType::OPTIONAL_SCALAR>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20},
+ {77},
+ {77, 2, 3, 77, 5, 6, 7, max, 9, 77, 11, 12, 13, 77, 77, 77, min, 77, 19, 20});
+}
+
+} // Y_UNIT_TEST_SUITE(TMiniKQLBlockCoalesceTest)
+
+} // namespace NKikimr::NMiniKQL
diff --git a/yql/essentials/minikql/comp_nodes/ut/mkql_block_exists_ut.cpp b/yql/essentials/minikql/comp_nodes/ut/mkql_block_exists_ut.cpp
index 373ef67f4b1..3b9d41e2b9b 100644
--- a/yql/essentials/minikql/comp_nodes/ut/mkql_block_exists_ut.cpp
+++ b/yql/essentials/minikql/comp_nodes/ut/mkql_block_exists_ut.cpp
@@ -23,7 +23,7 @@ void DoBlockExistsOffset(size_t length, size_t offset) {
TVector<bool> isNull;
static_assert(MaxBlockSizeInBytes % 4 == 0);
- const auto drng = CreateDeterministicRandomProvider(std::time(nullptr));
+ const auto drng = CreateDeterministicRandomProvider(1);
for (size_t i = 0; i < length; i++) {
const ui64 randomValue = drng->GenRand();
diff --git a/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp b/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp
index 15dd02bf30f..c2faa29bed5 100644
--- a/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp
+++ b/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp
@@ -80,6 +80,71 @@ namespace {
});
}));
}
+
+ // Hand-made variant using WideToBlocks (in order to test ListFromBlocks by well-tested nodes rather than actual ListToBlocks)
+ TRuntimeNode ListToBlocks(TProgramBuilder& pb, TRuntimeNode list) {
+ const auto wideBlocksStream = pb.WideToBlocks(pb.FromFlow(pb.ExpandMap(pb.ToFlow(list), [&](TRuntimeNode item) -> TRuntimeNode::TList {
+ return {
+ pb.Member(item, "key"),
+ pb.Member(item, "value")
+ };
+ })));
+
+ return pb.Collect(pb.NarrowMap(pb.ToFlow(wideBlocksStream), [&](TRuntimeNode::TList items) -> TRuntimeNode {
+ return pb.NewStruct({
+ {"key", items[0]},
+ {"value", items[1]},
+ {NYql::BlockLengthColumnName, items[2]}
+ });
+ }));
+ }
+
+ using TListTransformer = std::function<TRuntimeNode(TProgramBuilder&, TRuntimeNode)>;
+
+ void DoTestListToAndFromBlocks(TSetup<false>& setup, TListTransformer listToBlocksImpl, TListTransformer listFromBlocksImpl) {
+ constexpr size_t TEST_SIZE = 1 << 16;
+ const TString hugeString(128, '1');
+
+ TProgramBuilder& pb = *setup.PgmBuilder;
+
+ const auto ui64Type = pb.NewDataType(NUdf::TDataType<ui64>::Id);
+ const auto strType = pb.NewDataType(NUdf::TDataType<char*>::Id);
+
+ const auto structType = pb.NewStructType({
+ {"key", ui64Type},
+ {"value", strType},
+ });
+
+ TVector<TRuntimeNode> listItems;
+ for (size_t i = 0; i < TEST_SIZE; i++) {
+ const auto str = hugeString + ToString(i);
+ listItems.push_back(pb.NewStruct({
+ {"key", pb.NewDataLiteral<ui64>(i)},
+ // Huge string is used to make less rows fit into one block (in order to test output slicing)
+ {"value", pb.NewDataLiteral<NUdf::EDataSlot::String>(str)},
+ }));
+ }
+
+ const auto list = pb.NewList(structType, listItems);
+ const auto blockList = listToBlocksImpl(pb, list);
+
+ const auto graph = setup.BuildGraph(listFromBlocksImpl(pb, blockList));
+ const auto iterator = graph->GetValue().GetListIterator();
+
+ NUdf::TUnboxedValue structValue;
+ for (size_t i = 0; i < TEST_SIZE; i++) {
+ const auto str = hugeString + ToString(i);
+ UNIT_ASSERT(iterator.Next(structValue));
+
+ const auto key = structValue.GetElement(0);
+ UNIT_ASSERT_VALUES_EQUAL(key.Get<ui64>(), i);
+ const auto value = structValue.GetElement(1);
+ UNIT_ASSERT_VALUES_EQUAL(std::string_view(value.AsStringRef()), str);
+ }
+
+ UNIT_ASSERT(!iterator.Next(structValue));
+ UNIT_ASSERT(!iterator.Next(structValue));
+ }
}
Y_UNIT_TEST_SUITE(TMiniKQLBlocksTest) {
@@ -170,49 +235,13 @@ Y_UNIT_TEST_LLVM(TestWideToBlocks) {
}
Y_UNIT_TEST(TestListToBlocks) {
- constexpr size_t TEST_SIZE = 1 << 16;
- const TString hugeString(128, '1');
-
TSetup<false> setup;
- TProgramBuilder& pb = *setup.PgmBuilder;
- const auto ui64Type = pb.NewDataType(NUdf::TDataType<ui64>::Id);
- const auto strType = pb.NewDataType(NUdf::TDataType<char*>::Id);
-
- const auto structType = pb.NewStructType({
- {"key", ui64Type},
- {"value", strType},
- });
-
- TVector<TRuntimeNode> listItems;
- for (size_t i = 0; i < TEST_SIZE; i++) {
- const auto str = hugeString + ToString(i);
- listItems.push_back(pb.NewStruct({
- {"key", pb.NewDataLiteral<ui64>(i)},
- // Huge string is used to make less rows fit into one block (in order to test output slicing)
- {"value", pb.NewDataLiteral<NUdf::EDataSlot::String>(str)},
- }));
- }
-
- const auto list = pb.NewList(structType, listItems);
- const auto blockList = pb.ListToBlocks(list);
-
- const auto graph = setup.BuildGraph(ListFromBlocks(pb, blockList));
- const auto iterator = graph->GetValue().GetListIterator();
-
- NUdf::TUnboxedValue structValue;
- for (size_t i = 0; i < TEST_SIZE; i++) {
- const auto str = hugeString + ToString(i);
- UNIT_ASSERT(iterator.Next(structValue));
-
- const auto key = structValue.GetElement(0);
- UNIT_ASSERT_VALUES_EQUAL(key.Get<ui64>(), i);
- const auto value = structValue.GetElement(1);
- UNIT_ASSERT_VALUES_EQUAL(std::string_view(value.AsStringRef()), str);
- }
-
- UNIT_ASSERT(!iterator.Next(structValue));
- UNIT_ASSERT(!iterator.Next(structValue));
+ DoTestListToAndFromBlocks(
+ setup,
+ [] (TProgramBuilder& pb, TRuntimeNode list) { return pb.ListToBlocks(list); },
+ ListFromBlocks
+ );
}
Y_UNIT_TEST(TestListToBlocksMultiUsage) {
@@ -700,6 +729,16 @@ Y_UNIT_TEST_LLVM(TestWideFromBlocks) {
UNIT_ASSERT(!iterator.Next(item));
}
+Y_UNIT_TEST(TestListFromBlocks) {
+ TSetup<false> setup;
+
+ DoTestListToAndFromBlocks(
+ setup,
+ ListToBlocks,
+ [] (TProgramBuilder& pb, TRuntimeNode list) { return pb.ListFromBlocks(list); }
+ );
+}
+
Y_UNIT_TEST_LLVM(TestWideToAndFromBlocks) {
TSetup<LLVM> setup;
TProgramBuilder& pb = *setup.PgmBuilder;
@@ -741,6 +780,16 @@ Y_UNIT_TEST_LLVM(TestWideToAndFromBlocks) {
UNIT_ASSERT(!iterator.Next(item));
UNIT_ASSERT(!iterator.Next(item));
}
+
+Y_UNIT_TEST(TestListToAndFromBlocks) {
+ TSetup<false> setup;
+
+ DoTestListToAndFromBlocks(
+ setup,
+ [] (TProgramBuilder& pb, TRuntimeNode list) { return pb.ListToBlocks(list); },
+ [] (TProgramBuilder& pb, TRuntimeNode list) { return pb.ListFromBlocks(list); }
+ );
+}
}
Y_UNIT_TEST_SUITE(TMiniKQLDirectKernelTest) {
diff --git a/yql/essentials/minikql/comp_nodes/ut/ya.make.inc b/yql/essentials/minikql/comp_nodes/ut/ya.make.inc
index 46a1ad7f118..73c063d865c 100644
--- a/yql/essentials/minikql/comp_nodes/ut/ya.make.inc
+++ b/yql/essentials/minikql/comp_nodes/ut/ya.make.inc
@@ -2,6 +2,10 @@ FORK_SUBTESTS()
SPLIT_FACTOR(60)
+IF (SANITIZER_TYPE == "address")
+ SPLIT_FACTOR(100)
+ENDIF()
+
IF (SANITIZER_TYPE == "thread" OR WITH_VALGRIND)
TIMEOUT(3600)
SIZE(LARGE)
@@ -22,6 +26,7 @@ SET(ORIG_SOURCES
mkql_test_factory.cpp
mkql_bit_utils_ut.cpp
mkql_block_compress_ut.cpp
+ mkql_block_coalesce_ut.cpp
mkql_block_exists_ut.cpp
mkql_block_skiptake_ut.cpp
mkql_block_map_join_ut_utils.cpp
@@ -75,6 +80,8 @@ SET(ORIG_SOURCES
)
PEERDIR(
+ yql/essentials/core/arrow_kernels/request
+ yql/essentials/core/arrow_kernels/registry
yql/essentials/public/udf
yql/essentials/public/udf/arrow
yql/essentials/public/udf/service/exception_policy
diff --git a/yql/essentials/minikql/comp_nodes/ya.make b/yql/essentials/minikql/comp_nodes/ya.make
index 93f0314c0ed..e726621ec07 100644
--- a/yql/essentials/minikql/comp_nodes/ya.make
+++ b/yql/essentials/minikql/comp_nodes/ya.make
@@ -17,4 +17,5 @@ RECURSE(
RECURSE_FOR_TESTS(
llvm16/ut
+ benchmark
)
diff --git a/yql/essentials/minikql/computation/mkql_block_impl.cpp b/yql/essentials/minikql/computation/mkql_block_impl.cpp
index ba9238e45a7..e960e8562e3 100644
--- a/yql/essentials/minikql/computation/mkql_block_impl.cpp
+++ b/yql/essentials/minikql/computation/mkql_block_impl.cpp
@@ -220,7 +220,11 @@ std::vector<arrow::ValueDescr> ToValueDescr(const TVector<TType*>& types) {
std::vector<arrow::ValueDescr> res;
res.reserve(types.size());
for (const auto& type : types) {
- res.emplace_back(ToValueDescr(type));
+ if (type) {
+ res.emplace_back(ToValueDescr(type));
+ } else {
+ res.emplace_back();
+ }
}
return res;
diff --git a/yql/essentials/minikql/invoke_builtins/mkql_builtins_datetime.h b/yql/essentials/minikql/invoke_builtins/mkql_builtins_datetime.h
index c1b90de073f..1cef07bec1b 100644
--- a/yql/essentials/minikql/invoke_builtins/mkql_builtins_datetime.h
+++ b/yql/essentials/minikql/invoke_builtins/mkql_builtins_datetime.h
@@ -160,7 +160,7 @@ NUdf::TDataType<NUdf::TInterval64>::TLayout FromScaledDate<NUdf::TDataType<NUdf:
template<typename TDateType>
inline bool IsBadDateTime(TScaledDate val) {
static_assert(TDateType::Features & (NYql::NUdf::DateType | NYql::NUdf::TzDateType), "Date type expected");
- if constexpr (TDateType::Features & NYql::NUdf::BigDateType) {
+ if constexpr (TDateType::Features & NYql::NUdf::ExtDateType) {
return val < NUdf::MIN_TIMESTAMP64 || val > NUdf::MAX_TIMESTAMP64;
} else {
return val < 0 || val >= TScaledDate(NUdf::MAX_TIMESTAMP);
@@ -170,7 +170,7 @@ inline bool IsBadDateTime(TScaledDate val) {
template<typename TDateType>
inline bool IsBadInterval(TScaledDate val) {
static_assert(TDateType::Features & NYql::NUdf::TimeIntervalType, "Interval type expected");
- if constexpr (TDateType::Features & NYql::NUdf::BigDateType) {
+ if constexpr (TDateType::Features & NYql::NUdf::ExtDateType) {
return val < -NUdf::MAX_INTERVAL64 || val > NUdf::MAX_INTERVAL64;
} else {
return val <= -TScaledDate(NUdf::MAX_TIMESTAMP) || val >= TScaledDate(NUdf::MAX_TIMESTAMP);
@@ -201,7 +201,7 @@ inline Value* GenIsInt64Overflow(Value* value, LLVMContext &context, BasicBlock*
template<typename TDateType>
inline Value* GenIsBadDateTime(Value* val, LLVMContext &context, BasicBlock* block) {
static_assert(TDateType::Features & (NYql::NUdf::DateType | NYql::NUdf::TzDateType), "Date type expected");
- if constexpr (TDateType::Features & NYql::NUdf::BigDateType) {
+ if constexpr (TDateType::Features & NYql::NUdf::ExtDateType) {
auto lt = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_SLT, val, ConstantInt::get(Type::getInt64Ty(context), NUdf::MIN_TIMESTAMP64), "lt", block);
auto ge = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_SGT, val, ConstantInt::get(Type::getInt64Ty(context), NUdf::MAX_TIMESTAMP64), "ge", block);
return BinaryOperator::CreateOr(lt, ge, "or", block);
@@ -215,10 +215,10 @@ inline Value* GenIsBadDateTime(Value* val, LLVMContext &context, BasicBlock* blo
template<typename TDateType>
inline Value* GenIsBadInterval(Value* val, LLVMContext &context, BasicBlock* block) {
static_assert(TDateType::Features & NYql::NUdf::TimeIntervalType, "Interval type expected");
- constexpr i64 lowerBound = (TDateType::Features & NYql::NUdf::BigDateType)
+ constexpr i64 lowerBound = (TDateType::Features & NYql::NUdf::ExtDateType)
? (-NUdf::MAX_INTERVAL64 - 1)
: -(i64)NUdf::MAX_TIMESTAMP;
- constexpr i64 upperBound = (TDateType::Features & NYql::NUdf::BigDateType)
+ constexpr i64 upperBound = (TDateType::Features & NYql::NUdf::ExtDateType)
? (NUdf::MAX_INTERVAL64 + 1)
: (i64)NUdf::MAX_TIMESTAMP;
auto le = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_SLE, val, ConstantInt::get(Type::getInt64Ty(context), lowerBound), "le", block);
diff --git a/yql/essentials/minikql/mkql_program_builder.cpp b/yql/essentials/minikql/mkql_program_builder.cpp
index 27c4229f996..5e8e89da4ca 100644
--- a/yql/essentials/minikql/mkql_program_builder.cpp
+++ b/yql/essentials/minikql/mkql_program_builder.cpp
@@ -6,7 +6,6 @@
#include "yql/essentials/minikql/mkql_function_registry.h"
#include "yql/essentials/minikql/mkql_utils.h"
#include "yql/essentials/minikql/mkql_type_builder.h"
-#include "yql/essentials/core/sql_types/block.h"
#include "yql/essentials/core/sql_types/match_recognize.h"
#include "yql/essentials/core/sql_types/time_order_recover.h"
#include <yql/essentials/parser/pg_catalog/catalog.h>
@@ -378,7 +377,7 @@ TType* TProgramBuilder::BuildArithmeticCommonType(TType* type1, TType* type2) {
const auto features2 = NUdf::GetDataTypeInfo(*data2->GetDataSlot()).Features;
const bool isOptional = isOptional1 || isOptional2;
if (features1 & features2 & NUdf::EDataTypeFeatures::TimeIntervalType) {
- return NewOptionalType(features1 & NUdf::EDataTypeFeatures::BigDateType ? data1 : data2);
+ return NewOptionalType(features1 & NUdf::EDataTypeFeatures::ExtDateType ? data1 : data2);
} else if (features1 & NUdf::EDataTypeFeatures::TimeIntervalType) {
return NewOptionalType(features2 & NUdf::EDataTypeFeatures::IntegralType ? data1 : data2);
} else if (features2 & NUdf::EDataTypeFeatures::TimeIntervalType) {
@@ -387,7 +386,7 @@ TType* TProgramBuilder::BuildArithmeticCommonType(TType* type1, TType* type2) {
features1 & (NUdf::EDataTypeFeatures::DateType | NUdf::EDataTypeFeatures::TzDateType) &&
features2 & (NUdf::EDataTypeFeatures::DateType | NUdf::EDataTypeFeatures::TzDateType)
) {
- const auto used = ((features1 | features2) & NUdf::EDataTypeFeatures::BigDateType)
+ const auto used = ((features1 | features2) & NUdf::EDataTypeFeatures::ExtDateType)
? NewDataType(NUdf::EDataSlot::Interval64)
: NewDataType(NUdf::EDataSlot::Interval);
return isOptional ? NewOptionalType(used) : used;
@@ -1494,24 +1493,6 @@ TRuntimeNode TProgramBuilder::WideToBlocks(TRuntimeNode stream) {
return TRuntimeNode(callableBuilder.Build(), false);
}
-TType* TProgramBuilder::BuildBlockStructType(const TStructType* structType) {
- std::vector<std::pair<std::string_view, TType*>> blockStructItems;
- blockStructItems.reserve(structType->GetMembersCount() + 1);
- for (size_t i = 0; i < structType->GetMembersCount(); i++) {
- auto itemType = structType->GetMemberType(i);
- MKQL_ENSURE(!itemType->IsBlock() , "Block types are not allowed here");
- blockStructItems.emplace_back(
- structType->GetMemberName(i),
- NewBlockType(itemType, TBlockType::EShape::Many)
- );
- }
- blockStructItems.emplace_back(
- NYql::BlockLengthColumnName,
- NewBlockType(NewDataType(NUdf::TDataType<ui64>::Id), TBlockType::EShape::Scalar)
- );
- return NewStructType(blockStructItems);
-}
-
TRuntimeNode TProgramBuilder::ListToBlocks(TRuntimeNode list) {
if constexpr (RuntimeVersion < 60U) {
THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
@@ -1564,6 +1545,24 @@ TRuntimeNode TProgramBuilder::WideFromBlocks(TRuntimeNode stream) {
return TRuntimeNode(callableBuilder.Build(), false);
}
+TRuntimeNode TProgramBuilder::ListFromBlocks(TRuntimeNode list) {
+ if constexpr (RuntimeVersion < 61U) {
+ THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
+ }
+
+ MKQL_ENSURE(list.GetStaticType()->IsList(), "Expected List as input type");
+ const auto listType = AS_TYPE(TListType, list.GetStaticType());
+
+ MKQL_ENSURE(listType->GetItemType()->IsStruct(), "Expected List of Struct as input type");
+ const auto itemBlockStructType = AS_TYPE(TStructType, listType->GetItemType());
+
+ const auto itemStructType = ValidateBlockStructType(itemBlockStructType);
+
+ TCallableBuilder callableBuilder(Env, __func__, NewListType(itemStructType));
+ callableBuilder.Add(list);
+ return TRuntimeNode(callableBuilder.Build(), false);
+}
+
TRuntimeNode TProgramBuilder::WideSkipBlocks(TRuntimeNode flow, TRuntimeNode count) {
return BuildWideSkipTakeBlocks(__func__, flow, count);
}
diff --git a/yql/essentials/minikql/mkql_program_builder.h b/yql/essentials/minikql/mkql_program_builder.h
index fb0b097dfaa..f71d47a3872 100644
--- a/yql/essentials/minikql/mkql_program_builder.h
+++ b/yql/essentials/minikql/mkql_program_builder.h
@@ -244,6 +244,7 @@ public:
TRuntimeNode ListToBlocks(TRuntimeNode list);
TRuntimeNode FromBlocks(TRuntimeNode flow);
TRuntimeNode WideFromBlocks(TRuntimeNode flow);
+ TRuntimeNode ListFromBlocks(TRuntimeNode list);
TRuntimeNode WideSkipBlocks(TRuntimeNode flow, TRuntimeNode count);
TRuntimeNode WideTakeBlocks(TRuntimeNode flow, TRuntimeNode count);
TRuntimeNode WideTopBlocks(TRuntimeNode flow, TRuntimeNode count, const std::vector<std::pair<ui32, TRuntimeNode>>& keys);
@@ -862,7 +863,6 @@ private:
TType* ChooseCommonType(TType* type1, TType* type2);
TType* BuildArithmeticCommonType(TType* type1, TType* type2);
TType* BuildWideBlockType(const TArrayRef<TType* const>& wideComponents);
- TType* BuildBlockStructType(const TStructType* structType);
bool IsNull(TRuntimeNode arg);
protected:
diff --git a/yql/essentials/minikql/mkql_runtime_version.h b/yql/essentials/minikql/mkql_runtime_version.h
index df026ed9015..ed228c7b03d 100644
--- a/yql/essentials/minikql/mkql_runtime_version.h
+++ b/yql/essentials/minikql/mkql_runtime_version.h
@@ -24,7 +24,7 @@ namespace NMiniKQL {
// 1. Bump this version every time incompatible runtime nodes are introduced.
// 2. Make sure you provide runtime node generation for previous runtime versions.
#ifndef MKQL_RUNTIME_VERSION
-#define MKQL_RUNTIME_VERSION 60U
+#define MKQL_RUNTIME_VERSION 61U
#endif
// History:
diff --git a/yql/essentials/minikql/mkql_type_builder.cpp b/yql/essentials/minikql/mkql_type_builder.cpp
index e37bf91b91f..c9d6e363b4f 100644
--- a/yql/essentials/minikql/mkql_type_builder.cpp
+++ b/yql/essentials/minikql/mkql_type_builder.cpp
@@ -2806,6 +2806,48 @@ TType* TTypeBuilder::NewVariantType(TType* underlyingType) const {
return TVariantType::Create(underlyingType, Env);
}
+TType* TTypeBuilder::ValidateBlockStructType(const TStructType* structType) const {
+ MKQL_ENSURE(structType->GetMembersCount() > 0, "Expected at least one column");
+
+ std::vector<std::pair<std::string_view, TType*>> outStructItems;
+ outStructItems.reserve(structType->GetMembersCount() - 1);
+ bool hasBlockLengthColumn = false;
+ for (size_t i = 0; i < structType->GetMembersCount(); i++) {
+ auto blockType = AS_TYPE(TBlockType, structType->GetMemberType(i));
+ bool isScalar = blockType->GetShape() == TBlockType::EShape::Scalar;
+ auto itemType = blockType->GetItemType();
+ if (structType->GetMemberName(i) == NYql::BlockLengthColumnName) {
+ MKQL_ENSURE(isScalar, "Block length column should be scalar");
+ MKQL_ENSURE(AS_TYPE(TDataType, itemType)->GetSchemeType() == NUdf::TDataType<ui64>::Id, "Expected Uint64");
+
+ MKQL_ENSURE(!hasBlockLengthColumn, "Block struct must contain only one block length column");
+ hasBlockLengthColumn = true;
+ } else {
+ outStructItems.emplace_back(structType->GetMemberName(i), itemType);
+ }
+ }
+ MKQL_ENSURE(hasBlockLengthColumn, "Block struct must contain block length column");
+ return NewStructType(outStructItems);
+}
+
+TType* TTypeBuilder::BuildBlockStructType(const TStructType* structType) const {
+ std::vector<std::pair<std::string_view, TType*>> blockStructItems;
+ blockStructItems.reserve(structType->GetMembersCount() + 1);
+ for (size_t i = 0; i < structType->GetMembersCount(); i++) {
+ auto itemType = structType->GetMemberType(i);
+ MKQL_ENSURE(!itemType->IsBlock() , "Block types are not allowed here");
+ blockStructItems.emplace_back(
+ structType->GetMemberName(i),
+ NewBlockType(itemType, TBlockType::EShape::Many)
+ );
+ }
+ blockStructItems.emplace_back(
+ NYql::BlockLengthColumnName,
+ NewBlockType(NewDataType(NUdf::TDataType<ui64>::Id), TBlockType::EShape::Scalar)
+ );
+ return NewStructType(blockStructItems);
+}
+
void RebuildTypeIndex() {
HugeSingleton<TPgTypeIndex>()->Rebuild();
}
diff --git a/yql/essentials/minikql/mkql_type_builder.h b/yql/essentials/minikql/mkql_type_builder.h
index f1ec8a9e4f3..678db9157ca 100644
--- a/yql/essentials/minikql/mkql_type_builder.h
+++ b/yql/essentials/minikql/mkql_type_builder.h
@@ -2,6 +2,7 @@
#include "mkql_node.h"
+#include <yql/essentials/core/sql_types/block.h>
#include <yql/essentials/public/udf/udf_type_builder.h>
#include <yql/essentials/public/udf/arrow/block_type_helper.h>
#include <yql/essentials/parser/pg_wrapper/interface/compare.h>
@@ -325,6 +326,9 @@ public:
TType* NewResourceType(const std::string_view& tag) const;
TType* NewVariantType(TType* underlyingType) const;
+ TType* BuildBlockStructType(const TStructType* structType) const;
+ TType* ValidateBlockStructType(const TStructType* structType) const;
+
protected:
const TTypeEnvironment& Env;
bool UseNullType = true;
diff --git a/yql/essentials/public/udf/arrow/bit_util.h b/yql/essentials/public/udf/arrow/bit_util.h
index 4dbe785aa75..091a47bbcc6 100644
--- a/yql/essentials/public/udf/arrow/bit_util.h
+++ b/yql/essentials/public/udf/arrow/bit_util.h
@@ -1,6 +1,9 @@
#pragma once
#include "defs.h"
+#include <yql/essentials/utils/swap_bytes.h>
+
+#include <array>
namespace NYql {
namespace NUdf {
@@ -119,5 +122,69 @@ inline void CopyDenseBitmap(ui8* dst, const ui8* src, size_t srcOffset, size_t l
}
}
+// Duplicates every bit in an 8-bit value.
+// Example: 0b01010101 -> 0b0011001100110011.
+Y_FORCE_INLINE ui16 ReplicateEachBitTwice(ui8 b) {
+ ui16 x = b;
+ x = (x | (x << 4)) & 0x0F0F;
+ x = (x | (x << 2)) & 0x3333;
+ x = (x | (x << 1)) & 0x5555;
+ return x | (x << 1);
+}
+
+// Repeat 4 times every bit in an 8-bit value.
+// Example: 0b01010101 -> 0b00001111000011110000111100001111.
+Y_FORCE_INLINE ui32 ReplicateEachBitFourTimes(ui8 b) {
+ ui32 x = b;
+ x = (x | (x << 12)) & 0x000F000F;
+ x = (x | (x << 6)) & 0x03030303;
+ x = (x | (x << 3)) & 0x11111111;
+ x *= 0x0F;
+ return x;
+}
+
+// BitToByteExpand - Expands the individual bits of an 8-bit input x into an array of 8 elements of type TType.
+// Each output element corresponds to one bit from the original value, expanded (via specialized routines) to fill the entire TType
+// Example: BitToByteExpand<ui8>(0b10101010) yields REVERSE({0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00}).
+// Example: BitToByteExpand<ui16>(0b11000011) yields REVERSE({0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0xFFFF, 0xFFFF}).
+template <typename TType>
+Y_FORCE_INLINE std::array<TType, 8> BitToByteExpand(ui8 x);
+
+template <>
+Y_FORCE_INLINE std::array<ui8, 8> BitToByteExpand(ui8 x) {
+ std::array<ui8, 8> result;
+ ui64 expanded = x;
+ expanded = (expanded * 0x8040201008040201ULL);
+ expanded &= 0x8080808080808080ULL;
+ expanded >>= 7;
+ expanded *= 0xFF;
+ expanded = NYql::SwapBytes(expanded);
+ memcpy(&result[0], &expanded, sizeof(expanded));
+ return result;
+}
+
+template <>
+Y_FORCE_INLINE std::array<ui16, 8> BitToByteExpand(ui8 x) {
+ std::array<ui8, 8> input = BitToByteExpand<ui8>(x);
+ std::array<ui16, 8> output{};
+
+ for (size_t i = 0; i < 8; ++i) {
+ output[i] = ReplicateEachBitTwice(input[i]);
+ }
+
+ return output;
+}
+
+template <>
+Y_FORCE_INLINE std::array<ui32, 8> BitToByteExpand(ui8 x) {
+ std::array<ui8, 8> input = BitToByteExpand<ui8>(x);
+ std::array<ui32, 8> output{};
+
+ for (size_t i = 0; i < 8; ++i) {
+ output[i] = ReplicateEachBitFourTimes(input[i]);
+ }
+
+ return output;
+}
}
}
diff --git a/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp b/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp
index 601d3be7c86..4af399c8deb 100644
--- a/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp
+++ b/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp
@@ -39,3 +39,92 @@ Y_UNIT_TEST_SUITE(CopyDenseBitmapTest) {
}
}
}
+
+Y_UNIT_TEST_SUITE(BitExpanding) {
+
+Y_UNIT_TEST(ReplicateEachBitTwice) {
+ // Test case 1: All zeros
+ UNIT_ASSERT_EQUAL(ReplicateEachBitTwice(0x00), 0x0000);
+
+ // Test case 2: All ones
+ UNIT_ASSERT_EQUAL(ReplicateEachBitTwice(0xFF), 0xFFFF);
+
+ // Test case 3: Alternating bits
+ UNIT_ASSERT_EQUAL(ReplicateEachBitTwice(0x55), 0x3333);
+ UNIT_ASSERT_EQUAL(ReplicateEachBitTwice(0xAA), 0xCCCC);
+
+ // Test case 4: Random pattern
+ UNIT_ASSERT_EQUAL(ReplicateEachBitTwice(0x3C), 0x0FF0);
+
+ // Test case 5: Single bit set
+ UNIT_ASSERT_EQUAL(ReplicateEachBitTwice(0x01), 0x0003);
+ UNIT_ASSERT_EQUAL(ReplicateEachBitTwice(0x80), 0xC000);
+}
+
+Y_UNIT_TEST(ReplicateEachBitFourTimes) {
+ // Test case 1: All zeros
+ UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0x00), 0x00000000);
+
+ // Test case 2: All ones
+ UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0xFF), 0xFFFFFFFF);
+
+ // Test case 3: Alternating bits
+ UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0x55), 0x0F0F0F0F);
+ UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0xAA), 0xF0F0F0F0);
+
+ // Test case 4: Random pattern
+ UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0x3C), 0x00FFFF00);
+
+ // Test case 5: Single bit set
+ UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0x01), 0x0000000F);
+ UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0x80), 0xF0000000);
+}
+
+Y_UNIT_TEST(BitToByteExpand) {
+ auto testBody = [](auto n) {
+ using T = decltype(n);
+ auto max = std::numeric_limits<T>::max();
+ auto min = std::numeric_limits<T>::min();
+ // Test case 1: All zeros
+ auto result1 = BitToByteExpand<T>(min);
+ for (size_t i = 0; i < 8; ++i) {
+ UNIT_ASSERT_EQUAL(result1[i], min);
+ }
+
+ // Test case 2: All ones
+ auto result2 = BitToByteExpand<T>(max);
+ for (size_t i = 0; i < 8; ++i) {
+ UNIT_ASSERT_EQUAL(result2[i], max);
+ }
+
+ // Test case 3: Alternating bits
+ auto result3 = BitToByteExpand<T>(0x55);
+ for (size_t i = 0; i < 8; ++i) {
+ UNIT_ASSERT_EQUAL(result3[i], (i % 2 == 0) ? max : min);
+ }
+
+ // Test case 4: Random pattern
+ auto result4 = BitToByteExpand<T>(0x3C);
+ UNIT_ASSERT_EQUAL(result4[0], min);
+ UNIT_ASSERT_EQUAL(result4[1], min);
+ UNIT_ASSERT_EQUAL(result4[2], max);
+ UNIT_ASSERT_EQUAL(result4[3], max);
+ UNIT_ASSERT_EQUAL(result4[4], max);
+ UNIT_ASSERT_EQUAL(result4[5], max);
+ UNIT_ASSERT_EQUAL(result4[6], min);
+ UNIT_ASSERT_EQUAL(result4[7], min);
+
+ // Test case 5: Single bit set
+ auto result5 = BitToByteExpand<T>(0x80);
+ UNIT_ASSERT_EQUAL(result5[7], max);
+ for (size_t i = 0; i < 7; ++i) {
+ UNIT_ASSERT_EQUAL(result5[i], min);
+ }
+ };
+
+ testBody(ui8());
+ testBody(ui16());
+ testBody(ui32());
+}
+
+} // Y_UNIT_TEST_SUITE(BitExpanding)
diff --git a/yql/essentials/public/udf/udf_data_type.h b/yql/essentials/public/udf/udf_data_type.h
index 9d7e2070328..9d5d4049557 100644
--- a/yql/essentials/public/udf/udf_data_type.h
+++ b/yql/essentials/public/udf/udf_data_type.h
@@ -45,16 +45,9 @@ enum EDataTypeFeatures : ui32 {
TzDateType = 1u << 27,
DecimalType = 1u << 28,
TimeIntervalType = 1u << 29,
- // FIXME: Remove, when no entries in the code are left.
- BigDateType = 1u << 30,
ExtDateType = 1u << 30,
};
-// FIXME: This static assert is vital for renaming BigDateType
-// flag into ExtDateType to be in sync with naming in docs.
-// Remove this assert, only when BigDateType flags is removed.
-static_assert(ExtDateType == BigDateType);
-
template <typename T>
struct TDataType;
diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp
index 2a16a250e54..9bba9c5e71e 100644
--- a/yql/essentials/sql/v1/complete/sql_complete.cpp
+++ b/yql/essentials/sql/v1/complete/sql_complete.cpp
@@ -3,6 +3,10 @@
#include "sql_context.h"
#include "string_util.h"
+// FIXME(YQL-19747): unwanted dependency on a lexer implementation
+#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h>
+#include <yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h>
+
#include <util/generic/algorithm.h>
#include <util/charset/utf8.h>
@@ -10,8 +14,8 @@ namespace NSQLComplete {
class TSqlCompletionEngine: public ISqlCompletionEngine {
public:
- TSqlCompletionEngine()
- : ContextInference(MakeSqlContextInference())
+ explicit TSqlCompletionEngine(TLexerSupplier lexer)
+ : ContextInference(MakeSqlContextInference(lexer))
{
}
@@ -68,8 +72,18 @@ namespace NSQLComplete {
ISqlContextInference::TPtr ContextInference;
};
+ // FIXME(YQL-19747): unwanted dependency on a lexer implementation
ISqlCompletionEngine::TPtr MakeSqlCompletionEngine() {
- return ISqlCompletionEngine::TPtr(new TSqlCompletionEngine());
+ NSQLTranslationV1::TLexers lexers;
+ lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory();
+ lexers.Antlr4PureAnsi = NSQLTranslationV1::MakeAntlr4PureAnsiLexerFactory();
+ return MakeSqlCompletionEngine([lexers = std::move(lexers)](bool ansi) {
+ return NSQLTranslationV1::MakeLexer(lexers, ansi, /* antlr4 = */ true, /* pure = */ true);
+ });
+ }
+
+ ISqlCompletionEngine::TPtr MakeSqlCompletionEngine(TLexerSupplier lexer) {
+ return ISqlCompletionEngine::TPtr(new TSqlCompletionEngine(lexer));
}
} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/sql_complete.h b/yql/essentials/sql/v1/complete/sql_complete.h
index 99e74cce7a7..354f8ffa756 100644
--- a/yql/essentials/sql/v1/complete/sql_complete.h
+++ b/yql/essentials/sql/v1/complete/sql_complete.h
@@ -1,5 +1,7 @@
#pragma once
+#include <yql/essentials/sql/v1/lexer/lexer.h>
+
#include <util/generic/string.h>
#include <util/generic/vector.h>
@@ -39,6 +41,11 @@ namespace NSQLComplete {
virtual ~ISqlCompletionEngine() = default;
};
+ using TLexerSupplier = std::function<NSQLTranslation::ILexer::TPtr(bool ansi)>;
+
+ // FIXME(YQL-19747): unwanted dependency on a lexer implementation
ISqlCompletionEngine::TPtr MakeSqlCompletionEngine();
+ ISqlCompletionEngine::TPtr MakeSqlCompletionEngine(TLexerSupplier lexer);
+
} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
index e0a012f9f6e..c65eba0e2d4 100644
--- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
+++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
@@ -1,5 +1,8 @@
#include "sql_complete.h"
+#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h>
+#include <yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h>
+
#include <library/cpp/testing/unittest/registar.h>
using namespace NSQLComplete;
@@ -7,6 +10,15 @@ using namespace NSQLComplete;
Y_UNIT_TEST_SUITE(SqlCompleteTests) {
using ECandidateKind::Keyword;
+ ISqlCompletionEngine::TPtr MakeSqlCompletionEngineUT() {
+ NSQLTranslationV1::TLexers lexers;
+ lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory();
+ lexers.Antlr4PureAnsi = NSQLTranslationV1::MakeAntlr4PureAnsiLexerFactory();
+ return MakeSqlCompletionEngine([lexers = std::move(lexers)](bool ansi) {
+ return NSQLTranslationV1::MakeLexer(lexers, ansi, /* antlr4 = */ true, /* pure = */ true);
+ });
+ }
+
TVector<TCandidate> Complete(ISqlCompletionEngine::TPtr& engine, TStringBuf prefix) {
return engine->Complete({prefix}).Candidates;
}
@@ -50,7 +62,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "VALUES"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {""}), expected);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" "}), expected);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" "}), expected);
@@ -76,7 +88,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "USER"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"ALTER "}), expected);
}
@@ -99,7 +111,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "VIEW"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"CREATE "}), expected);
}
@@ -108,7 +120,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "FROM"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"DELETE "}), expected);
}
@@ -128,7 +140,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "VIEW"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"DROP "}), expected);
}
@@ -171,7 +183,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "VALUES"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"EXPLAIN "}), expected);
}
@@ -196,7 +208,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "USE"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"GRANT "}), expected);
}
@@ -206,7 +218,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "OR"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"INSERT "}), expected);
}
@@ -227,7 +239,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "VARIANT"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"PRAGMA "}), expected);
}
@@ -265,7 +277,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "VARIANT"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT "}), expected);
}
@@ -275,18 +287,18 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "OBJECT"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"UPSERT "}), expected);
}
Y_UNIT_TEST(UTF8Wide) {
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"\xF0\x9F\x98\x8A"}).size(), 0);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"编码"}).size(), 0);
}
Y_UNIT_TEST(WordBreak) {
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT ("}).size(), 28);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT (1)"}).size(), 30);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT 1;"}).size(), 35);
@@ -300,7 +312,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
" Bool(field), Math::Sin(var) \n"
"FROM `local/test/space/table` JOIN test;");
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
for (std::size_t size = 0; size <= queryUtf16.size(); ++size) {
const TWtringBuf prefixUtf16(queryUtf16, 0, size);
@@ -314,10 +326,18 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "SELECT"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "se"), expected);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "sE"), expected);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "Se"), expected);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SE"), expected);
}
+
+ Y_UNIT_TEST(InvalidStatementsRecovery) {
+ auto engine = MakeSqlCompletionEngineUT();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "select select; ").size(), 35);
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "select select;").size(), 35);
+ UNIT_ASSERT_VALUES_EQUAL_C(Complete(engine, "!;").size(), 0, "Lexer failing");
+ }
+
} // Y_UNIT_TEST_SUITE(SqlCompleteTests)
diff --git a/yql/essentials/sql/v1/complete/sql_context.cpp b/yql/essentials/sql/v1/complete/sql_context.cpp
index 4195daa6d83..2bd1a2af987 100644
--- a/yql/essentials/sql/v1/complete/sql_context.cpp
+++ b/yql/essentials/sql/v1/complete/sql_context.cpp
@@ -3,6 +3,7 @@
#include "c3_engine.h"
#include "sql_syntax.h"
+#include <yql/essentials/core/issue/yql_issue.h>
#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Parser.h>
#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h>
@@ -30,14 +31,19 @@ namespace NSQLComplete {
TDefaultYQLGrammar>;
public:
- TSpecializedSqlContextInference()
+ explicit TSpecializedSqlContextInference(TLexerSupplier lexer)
: Grammar(&GetSqlGrammar(IsAnsiLexer))
+ , Lexer_(lexer(/* ansi = */ IsAnsiLexer))
, C3(ComputeC3Config())
{
}
TCompletionContext Analyze(TCompletionInput input) override {
- auto prefix = input.Text.Head(input.CursorPosition);
+ TStringBuf prefix;
+ if (!GetC3Prefix(input, &prefix)) {
+ return {};
+ }
+
auto tokens = C3.Complete(prefix);
return {
.Keywords = SiftedKeywords(tokens),
@@ -71,6 +77,26 @@ namespace NSQLComplete {
return preferredRules;
}
+ bool GetC3Prefix(TCompletionInput input, TStringBuf* prefix) {
+ *prefix = input.Text.Head(input.CursorPosition);
+
+ TVector<TString> statements;
+ NYql::TIssues issues;
+ if (!NSQLTranslationV1::SplitQueryToStatements(
+ TString(*prefix) + (prefix->EndsWith(';') ? ";" : ""), Lexer_,
+ statements, issues, /* file = */ "",
+ /* areBlankSkipped = */ false)) {
+ return false;
+ }
+
+ if (statements.empty()) {
+ return true;
+ }
+
+ *prefix = prefix->Last(statements.back().size());
+ return true;
+ }
+
TVector<TString> SiftedKeywords(const TVector<TSuggestedToken>& tokens) {
const auto& vocabulary = Grammar->GetVocabulary();
const auto& keywordTokens = Grammar->GetKeywordTokens();
@@ -85,11 +111,18 @@ namespace NSQLComplete {
}
const ISqlGrammar* Grammar;
+ NSQLTranslation::ILexer::TPtr Lexer_;
TC3Engine<G> C3;
};
class TSqlContextInference: public ISqlContextInference {
public:
+ explicit TSqlContextInference(TLexerSupplier lexer)
+ : DefaultEngine(lexer)
+ , AnsiEngine(lexer)
+ {
+ }
+
TCompletionContext Analyze(TCompletionInput input) override {
auto isAnsiLexer = IsAnsiQuery(TString(input.Text));
auto& engine = GetSpecializedEngine(isAnsiLexer);
@@ -108,8 +141,8 @@ namespace NSQLComplete {
TSpecializedSqlContextInference</* IsAnsiLexer = */ true> AnsiEngine;
};
- ISqlContextInference::TPtr MakeSqlContextInference() {
- return TSqlContextInference::TPtr(new TSqlContextInference());
+ ISqlContextInference::TPtr MakeSqlContextInference(TLexerSupplier lexer) {
+ return TSqlContextInference::TPtr(new TSqlContextInference(lexer));
}
} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/sql_context.h b/yql/essentials/sql/v1/complete/sql_context.h
index bc3b8d4840f..72d481ca9c4 100644
--- a/yql/essentials/sql/v1/complete/sql_context.h
+++ b/yql/essentials/sql/v1/complete/sql_context.h
@@ -2,6 +2,8 @@
#include "sql_complete.h"
+#include <yql/essentials/sql/v1/lexer/lexer.h>
+
#include <util/generic/string.h>
namespace NSQLComplete {
@@ -18,6 +20,6 @@ namespace NSQLComplete {
virtual ~ISqlContextInference() = default;
};
- ISqlContextInference::TPtr MakeSqlContextInference();
+ ISqlContextInference::TPtr MakeSqlContextInference(TLexerSupplier lexer);
} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/ut/ya.make b/yql/essentials/sql/v1/complete/ut/ya.make
index 91f7da13612..07e60d5a508 100644
--- a/yql/essentials/sql/v1/complete/ut/ya.make
+++ b/yql/essentials/sql/v1/complete/ut/ya.make
@@ -5,4 +5,9 @@ SRCS(
string_util_ut.cpp
)
+PEERDIR(
+ yql/essentials/sql/v1/lexer/antlr4_pure
+ yql/essentials/sql/v1/lexer/antlr4_pure_ansi
+)
+
END()
diff --git a/yql/essentials/sql/v1/complete/ya.make b/yql/essentials/sql/v1/complete/ya.make
index 70189e5f508..7142e57899c 100644
--- a/yql/essentials/sql/v1/complete/ya.make
+++ b/yql/essentials/sql/v1/complete/ya.make
@@ -13,6 +13,13 @@ PEERDIR(
contrib/libs/antlr4-c3
yql/essentials/sql/settings
yql/essentials/sql/v1/format
+ yql/essentials/sql/v1/lexer
+
+ # FIXME(YQL-19747): unwanted dependency on a lexer implementation
+ yql/essentials/sql/v1/lexer/antlr4_pure
+ yql/essentials/sql/v1/lexer/antlr4_pure_ansi
+
+ yql/essentials/core/issue
yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4
yql/essentials/parser/antlr_ast/gen/v1_antlr4
)
diff --git a/yql/essentials/sql/v1/lexer/lexer.cpp b/yql/essentials/sql/v1/lexer/lexer.cpp
index 2b5da9ddd53..5621cc65d7b 100644
--- a/yql/essentials/sql/v1/lexer/lexer.cpp
+++ b/yql/essentials/sql/v1/lexer/lexer.cpp
@@ -253,7 +253,10 @@ void SplitByStatements(TTokenIterator begin, TTokenIterator end, TVector<TTokenI
}
-bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr& lexer, TVector<TString>& statements, NYql::TIssues& issues, const TString& file) {
+bool SplitQueryToStatements(
+ const TString& query, NSQLTranslation::ILexer::TPtr& lexer,
+ TVector<TString>& statements, NYql::TIssues& issues, const TString& file,
+ bool areBlankSkipped) {
TParsedTokenList allTokens;
auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) {
if (token.Name != "EOF") {
@@ -269,12 +272,14 @@ bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr&
SplitByStatements(allTokens.begin(), allTokens.end(), statementsTokens);
for (size_t i = 1; i < statementsTokens.size(); ++i) {
- TStringBuilder currentQueryBuilder;
+ TString statement;
for (auto it = statementsTokens[i - 1]; it != statementsTokens[i]; ++it) {
- currentQueryBuilder << it->Content;
+ statement += it->Content;
+ }
+
+ if (areBlankSkipped) {
+ statement = StripStringLeft(statement);
}
- TString statement = currentQueryBuilder;
- statement = StripStringLeft(statement);
bool isBlank = true;
for (auto c : statement) {
@@ -284,11 +289,11 @@ bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr&
}
};
- if (isBlank) {
+ if (isBlank && areBlankSkipped) {
continue;
}
- statements.push_back(statement);
+ statements.emplace_back(std::move(statement));
}
return true;
diff --git a/yql/essentials/sql/v1/lexer/lexer.h b/yql/essentials/sql/v1/lexer/lexer.h
index 857681ae51f..1cc8566fcf6 100644
--- a/yql/essentials/sql/v1/lexer/lexer.h
+++ b/yql/essentials/sql/v1/lexer/lexer.h
@@ -21,6 +21,8 @@ NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool a
// in SELECT * FROM ... GROUP BY ... - group is a keyword.
bool IsProbablyKeyword(const NSQLTranslation::TParsedToken& token);
-bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr& lexer,
- TVector<TString>& statements, NYql::TIssues& issues, const TString& file = "");
+bool SplitQueryToStatements(
+ const TString& query, NSQLTranslation::ILexer::TPtr& lexer,
+ TVector<TString>& statements, NYql::TIssues& issues, const TString& file = "",
+ bool areBlankSkipped = true);
}
diff --git a/yql/essentials/tests/sql/minirun/pure.py b/yql/essentials/tests/sql/minirun/pure.py
index 4c92d6fee5c..c3a78adf354 100644
--- a/yql/essentials/tests/sql/minirun/pure.py
+++ b/yql/essentials/tests/sql/minirun/pure.py
@@ -120,7 +120,7 @@ def run_test(suite, case, cfg, tmpdir, what, yql_http_file_server):
opt_res_yson = normalize_result(stable_result_file(opt_res), False)
# Compare results
- assert opt_res_yson == base_res_yson, 'RESULTS_DIFFER\n' \
+ assert opt_res_yson == base_res_yson, 'RESULTS_DIFFER for mode {}\n'.format(what) + \
'Result:\n %(opt_res_yson)s\n\n' \
'Base result:\n %(base_res_yson)s\n' % locals()
diff --git a/yql/essentials/tools/yql_facade_run/yql_facade_run.cpp b/yql/essentials/tools/yql_facade_run/yql_facade_run.cpp
index 83939478d6d..fdfe60a433e 100644
--- a/yql/essentials/tools/yql_facade_run/yql_facade_run.cpp
+++ b/yql/essentials/tools/yql_facade_run/yql_facade_run.cpp
@@ -512,6 +512,7 @@ TIntrusivePtr<NKikimr::NMiniKQL::IFunctionRegistry> TFacadeRunner::GetFuncRegist
int TFacadeRunner::Main(int argc, const char *argv[]) {
NYql::NBacktrace::RegisterKikimrFatalActions();
NYql::NBacktrace::EnableKikimrSymbolize();
+ EnableKikimrBacktraceFormat();
NYql::NLog::YqlLoggerScope logger(&Cerr);
try {
diff --git a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp
index daa43038707..c159f63d5e5 100644
--- a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp
+++ b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp
@@ -266,7 +266,7 @@ public:
builder.Args()->Add(argsTuple.GetElementType(0));
const TType* retType;
- if (features & NUdf::BigDateType) {
+ if (features & NUdf::ExtDateType) {
retType = builder.SimpleType<TWResult>();
} else if (features & NUdf::TimeIntervalType) {
retType = builder.SimpleType<TSignedResult>();
@@ -284,7 +284,7 @@ public:
}
builder.Returns(retType);
- if (!(features & NUdf::BigDateType)) {
+ if (!(features & NUdf::ExtDateType)) {
// FIXME: Only non-wide overloads support block rewrite now.
builder.SupportsBlocks();
}
@@ -447,7 +447,7 @@ struct TGetTimeComponent {
}
const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
- if (features & NUdf::BigDateType) {
+ if (features & NUdf::ExtDateType) {
BuildSignature<TFieldStorage, TM64ResourceName, WAccessor>(builder, typesOnly);
return true;
}
@@ -832,7 +832,7 @@ TUnboxedValuePod DoAddYears(const TUnboxedValuePod& date, i64 years, const NUdf:
}
} else {
builder.Args()->Add<TUserDataType>().Flags(ICallablePayload::TArgumentFlags::AutoMap);
- if constexpr (NUdf::TDataType<TUserDataType>::Features & NYql::NUdf::BigDateType) {
+ if constexpr (NUdf::TDataType<TUserDataType>::Features & NYql::NUdf::ExtDateType) {
builder.Returns(builder.Resource(TM64ResourceName));
} else {
builder.Returns(builder.Resource(TMResourceName));
@@ -1360,7 +1360,7 @@ public:
}
const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
- if (features & NUdf::BigDateType) {
+ if (features & NUdf::ExtDateType) {
BuildSignature<TResultWType, TM64ResourceName, WAccessor>(builder, typesOnly);
return true;
}
@@ -1469,7 +1469,7 @@ public:
}
const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
- if (features & NUdf::BigDateType) {
+ if (features & NUdf::ExtDateType) {
BuildSignature<TM64ResourceName, WAccessor>(builder, typesOnly);
return true;
}
@@ -1695,7 +1695,7 @@ TUnboxedValue GetTimezoneName(const IValueBuilder* valueBuilder, const TUnboxedV
}
const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
- if (features & NUdf::BigDateType) {
+ if (features & NUdf::ExtDateType) {
BuildSignature<TM64ResourceName>(builder, typesOnly);
return true;
}
@@ -1932,7 +1932,7 @@ public:
const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
if (features & NUdf::TimeIntervalType) {
- if (features & NUdf::BigDateType) {
+ if (features & NUdf::ExtDateType) {
BuildSignature<TInterval64, TWResult>(builder, typesOnly);
} else {
BuildSignature<TInterval, TResult>(builder, typesOnly);
@@ -2057,7 +2057,7 @@ public:
}
const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
- if (features & NUdf::BigDateType) {
+ if (features & NUdf::ExtDateType) {
BuildSignature<TM64ResourceName, WBoundary>(builder, typesOnly);
return true;
}
@@ -2385,7 +2385,7 @@ public:
}
const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
- if (features & NUdf::BigDateType) {
+ if (features & NUdf::ExtDateType) {
BuildSignature<TM64ResourceName, WBoundary>(builder, typesOnly);
return true;
}
@@ -2507,7 +2507,7 @@ public:
}
const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
- if (features & NUdf::BigDateType) {
+ if (features & NUdf::ExtDateType) {
BuildSignature<TM64ResourceName>(builder, typesOnly);
return true;
}
@@ -2634,7 +2634,7 @@ public:
}
const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features;
- if (features & NUdf::BigDateType) {
+ if (features & NUdf::ExtDateType) {
BuildSignature<TM64ResourceName, WShifter>(builder, typesOnly);
return true;
}
diff --git a/yql/tools/yqlrun/yqlrun.cpp b/yql/tools/yqlrun/yqlrun.cpp
index 6ed7689a755..b3815321c8c 100644
--- a/yql/tools/yqlrun/yqlrun.cpp
+++ b/yql/tools/yqlrun/yqlrun.cpp
@@ -116,6 +116,7 @@ int RunUI(int argc, const char* argv[])
NYql::NBacktrace::RegisterKikimrFatalActions();
NYql::NBacktrace::EnableKikimrSymbolize();
+ EnableKikimrBacktraceFormat();
TVector<TString> udfsPaths;
TString udfsDir;
diff --git a/yt/yql/providers/yt/common/yql_configuration.h b/yt/yql/providers/yt/common/yql_configuration.h
index 90db39a172e..e71c5ed9484 100644
--- a/yt/yql/providers/yt/common/yql_configuration.h
+++ b/yt/yql/providers/yt/common/yql_configuration.h
@@ -131,4 +131,6 @@ constexpr ERuntimeClusterSelectionMode DEFAULT_RUNTIME_CLUSTER_SELECTION = NYql:
constexpr bool DEFAULT_ALLOW_REMOTE_CLUSTER_INPUT = false;
+constexpr bool DEFAULT_USE_COLUMN_GROUPS_FROM_INPUT_TABLE = false;
+
} // NYql
diff --git a/yt/yql/providers/yt/common/yql_yt_settings.cpp b/yt/yql/providers/yt/common/yql_yt_settings.cpp
index be6694fb174..e8f019ea168 100644
--- a/yt/yql/providers/yt/common/yql_yt_settings.cpp
+++ b/yt/yql/providers/yt/common/yql_yt_settings.cpp
@@ -548,6 +548,7 @@ TYtConfiguration::TYtConfiguration(TTypeAnnotationContext& typeCtx)
}
});
REGISTER_SETTING(*this, _AllowRemoteClusterInput);
+ REGISTER_SETTING(*this, UseColumnGroupsFromInputTables);
}
EReleaseTempDataMode GetReleaseTempDataMode(const TYtSettings& settings) {
diff --git a/yt/yql/providers/yt/common/yql_yt_settings.h b/yt/yql/providers/yt/common/yql_yt_settings.h
index 2cb52f9cfea..e2dd916629d 100644
--- a/yt/yql/providers/yt/common/yql_yt_settings.h
+++ b/yt/yql/providers/yt/common/yql_yt_settings.h
@@ -315,6 +315,7 @@ struct TYtSettings {
NCommon::TConfSetting<bool, false> CompactForDistinct;
NCommon::TConfSetting<bool, false> DropUnusedKeysFromKeyFilter;
NCommon::TConfSetting<bool, false> ReportEquiJoinStats;
+ NCommon::TConfSetting<bool, false> UseColumnGroupsFromInputTables;
};
EReleaseTempDataMode GetReleaseTempDataMode(const TYtSettings& settings);
diff --git a/yt/yql/providers/yt/comp_nodes/dq/dq_yt_reader_impl.h b/yt/yql/providers/yt/comp_nodes/dq/dq_yt_reader_impl.h
index dedea97996d..e5135f2fec2 100644
--- a/yt/yql/providers/yt/comp_nodes/dq/dq_yt_reader_impl.h
+++ b/yt/yql/providers/yt/comp_nodes/dq/dq_yt_reader_impl.h
@@ -154,7 +154,7 @@ public:
const auto half = CastInst::Create(Instruction::Trunc, state, Type::getInt64Ty(context), "half", block);
const auto stateArg = CastInst::Create(Instruction::IntToPtr, half, statePtrType, "state_arg", block);
- const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&TState::FetchRecord));
+ const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr<&TState::FetchRecord>());
const auto funcType = FunctionType::get(valueType, { statePtrType }, false);
const auto funcPtr = CastInst::Create(Instruction::IntToPtr, func, PointerType::getUnqual(funcType), "fetch_func", block);
const auto fetch = CallInst::Create(funcType, funcPtr, { stateArg }, "fetch", block);
diff --git a/yt/yql/providers/yt/comp_nodes/dq/dq_yt_writer.cpp b/yt/yql/providers/yt/comp_nodes/dq/dq_yt_writer.cpp
index cc0642d4e63..1860f9e3c8f 100644
--- a/yt/yql/providers/yt/comp_nodes/dq/dq_yt_writer.cpp
+++ b/yt/yql/providers/yt/comp_nodes/dq/dq_yt_writer.cpp
@@ -183,7 +183,7 @@ public:
const auto half = CastInst::Create(Instruction::Trunc, state, Type::getInt64Ty(context), "half", block);
const auto stateArg = CastInst::Create(Instruction::IntToPtr, half, structPtrType, "state_arg", block);
- const auto finishFunc = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&TWriterState::Finish));
+ const auto finishFunc = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr<&TWriterState::Finish>());
const auto finishType = FunctionType::get(Type::getVoidTy(context), {stateArg->getType()}, false);
const auto finishPtr = CastInst::Create(Instruction::IntToPtr, finishFunc, PointerType::getUnqual(finishType), "finish", block);
CallInst::Create(finishType, finishPtr, {stateArg}, "", block);
diff --git a/yt/yql/providers/yt/comp_nodes/yql_mkql_output.cpp b/yt/yql/providers/yt/comp_nodes/yql_mkql_output.cpp
index 1dfffcf0f52..83ead71a104 100644
--- a/yt/yql/providers/yt/comp_nodes/yql_mkql_output.cpp
+++ b/yt/yql/providers/yt/comp_nodes/yql_mkql_output.cpp
@@ -75,7 +75,7 @@ public:
block = work;
- const auto addFunc = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&TYtFlowOutputWrapper::AddRowImpl));
+ const auto addFunc = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr<&TYtFlowOutputWrapper::AddRowImpl>());
const auto selfArg = ConstantInt::get(Type::getInt64Ty(context), ui64(this));
const auto arg = item;
const auto addType = FunctionType::get(Type::getVoidTy(context), {selfArg->getType(), arg->getType()}, false);
@@ -161,7 +161,7 @@ public:
new StoreInst(fields.back(), pointer, block);
}
- const auto addFunc = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&TYtWideOutputWrapper::AddRowImpl));
+ const auto addFunc = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr<&TYtWideOutputWrapper::AddRowImpl>());
const auto selfArg = ConstantInt::get(Type::getInt64Ty(context), ui64(this));
const auto addType = FunctionType::get(Type::getVoidTy(context), {selfArg->getType(), values->getType()}, false);
const auto addPtr = CastInst::Create(Instruction::IntToPtr, addFunc, PointerType::getUnqual(addType), "write", block);
diff --git a/yt/yql/providers/yt/provider/yql_yt_load_table_meta.cpp b/yt/yql/providers/yt/provider/yql_yt_load_table_meta.cpp
index 0d1daffc142..9b759cb8167 100644
--- a/yt/yql/providers/yt/provider/yql_yt_load_table_meta.cpp
+++ b/yt/yql/providers/yt/provider/yql_yt_load_table_meta.cpp
@@ -199,13 +199,24 @@ public:
return TStatus::Error;
}
}
- if (auto pSchema = tableDesc.Meta->Attrs.FindPtr(SCHEMA_ATTR_NAME)) {
- const auto colGroupSpec = GetColumnGroupSpecFromSchema(NYT::NodeFromYsonString(*pSchema));
- if (colGroupSpec) {
- YQL_CLOG(TRACE, ProviderYt) << "Loaded column group from schema for " << tableName << " (epoch=" << LoadCtx->Epoch << "): " << colGroupSpec;
- if (LoadCtx->Epoch == 0) {
- tableDesc.ColumnGroupSpec = colGroupSpec;
- tableDesc.ColumnGroupSpecAlts.insert(colGroupSpec);
+ if (State_->Configuration->UseColumnGroupsFromInputTables.Get().GetOrElse(DEFAULT_USE_COLUMN_GROUPS_FROM_INPUT_TABLE)) {
+ if (auto pSchema = tableDesc.Meta->Attrs.FindPtr(SCHEMA_ATTR_NAME)) {
+ TString colGroupSpec;
+ try {
+ colGroupSpec = GetColumnGroupSpecFromSchema(NYT::NodeFromYsonString(*pSchema));
+ } catch (...) {
+ YQL_CLOG(ERROR, ProviderYt) << "Error parsing column groups for " << tableName << ", schema: " << *pSchema;
+ auto issue = TIssue(TStringBuilder() << "Error parsing column groups from schema: " << CurrentExceptionMessage());
+ issue.SetCode(UNEXPECTED_ERROR, ESeverity::TSeverityIds_ESeverityId_S_FATAL);
+ ctx.AddError(issue);
+ return TStatus::Error;
+ }
+ if (colGroupSpec) {
+ YQL_CLOG(TRACE, ProviderYt) << "Loaded column group from schema for " << tableName << " (epoch=" << LoadCtx->Epoch << "): " << colGroupSpec;
+ if (LoadCtx->Epoch == 0) {
+ tableDesc.ColumnGroupSpec = colGroupSpec;
+ tableDesc.ColumnGroupSpecAlts.insert(colGroupSpec);
+ }
}
}
}
diff --git a/yt/yql/tests/sql/suites/column_group/hint_append.cfg b/yt/yql/tests/sql/suites/column_group/hint_append.cfg
index f1a91330c09..827ef601339 100644
--- a/yt/yql/tests/sql/suites/column_group/hint_append.cfg
+++ b/yt/yql/tests/sql/suites/column_group/hint_append.cfg
@@ -4,3 +4,4 @@ out Output2 input_with_groups.txt
out Output3 input_with_groups.txt
providers yt
pragma yt.OptimizeFor="scan"
+pragma yt.UseColumnGroupsFromInputTables
diff --git a/yt/yql/tests/sql/suites/column_group/hint_append2.cfg b/yt/yql/tests/sql/suites/column_group/hint_append2.cfg
index edce27ee9c3..7c71144647e 100644
--- a/yt/yql/tests/sql/suites/column_group/hint_append2.cfg
+++ b/yt/yql/tests/sql/suites/column_group/hint_append2.cfg
@@ -4,3 +4,4 @@ out Output2 input_with_single_group.txt
out Output3 input_with_single_group.txt
providers yt
pragma yt.OptimizeFor="scan"
+pragma yt.UseColumnGroupsFromInputTables
diff --git a/yt/yt/client/api/config.cpp b/yt/yt/client/api/config.cpp
index 4219e08000e..82a43d37d7a 100644
--- a/yt/yt/client/api/config.cpp
+++ b/yt/yt/client/api/config.cpp
@@ -151,6 +151,9 @@ void TJournalWriterConfig::Register(TRegistrar registrar)
registrar.Parameter("prerequisite_transaction_probe_period", &TThis::PrerequisiteTransactionProbePeriod)
.Default(TDuration::Seconds(60));
+ registrar.Parameter("enable_checksums", &TThis::EnableChecksums)
+ .Default(false);
+
registrar.Parameter("dont_close", &TThis::DontClose)
.Default(false);
registrar.Parameter("dont_seal", &TThis::DontSeal)
diff --git a/yt/yt/client/api/config.h b/yt/yt/client/api/config.h
index 1fa2d876fc9..3811b891229 100644
--- a/yt/yt/client/api/config.h
+++ b/yt/yt/client/api/config.h
@@ -209,6 +209,8 @@ struct TJournalWriterConfig
TDuration PrerequisiteTransactionProbePeriod;
+ bool EnableChecksums;
+
// For testing purposes only.
bool DontClose;
bool DontSeal;
diff --git a/yt/yt/client/api/operation_client.cpp b/yt/yt/client/api/operation_client.cpp
index e4fe36a8bfe..a8e025874db 100644
--- a/yt/yt/client/api/operation_client.cpp
+++ b/yt/yt/client/api/operation_client.cpp
@@ -285,6 +285,7 @@ void Serialize(const TJob& job, NYson::IYsonConsumer* consumer, TStringBuf idKey
.OptionalItem("controller_state", job.ControllerState)
.OptionalItem("archive_state", job.ArchiveState)
.OptionalItem("address", job.Address)
+ .OptionalItem("addresses", job.Addresses)
.OptionalItem("start_time", job.StartTime)
.OptionalItem("finish_time", job.FinishTime)
.OptionalItem("has_spec", job.HasSpec)
diff --git a/yt/yt/client/api/operation_client.h b/yt/yt/client/api/operation_client.h
index 9653db43362..8e672c80e5f 100644
--- a/yt/yt/client/api/operation_client.h
+++ b/yt/yt/client/api/operation_client.h
@@ -3,9 +3,10 @@
#include "client_common.h"
#include <yt/yt/client/scheduler/operation_id_or_alias.h>
-
#include <yt/yt/client/scheduler/public.h>
+#include <yt/yt/client/node_tracker_client/public.h>
+
namespace NYT::NApi {
////////////////////////////////////////////////////////////////////////////////
@@ -368,6 +369,7 @@ struct TJob
std::optional<TInstant> StartTime;
std::optional<TInstant> FinishTime;
std::optional<TString> Address;
+ std::optional<NNodeTrackerClient::TAddressMap> Addresses;
std::optional<double> Progress;
std::optional<ui64> StderrSize;
std::optional<ui64> FailContextSize;
diff --git a/yt/yt/client/api/rpc_proxy/helpers.cpp b/yt/yt/client/api/rpc_proxy/helpers.cpp
index 1a316ed3747..8e1fdd366ec 100644
--- a/yt/yt/client/api/rpc_proxy/helpers.cpp
+++ b/yt/yt/client/api/rpc_proxy/helpers.cpp
@@ -840,6 +840,7 @@ void ToProto(NProto::TJob* protoJob, const NApi::TJob& job)
YT_OPTIONAL_SET_PROTO(protoJob, finish_time, job.FinishTime);
YT_OPTIONAL_TO_PROTO(protoJob, address, job.Address);
+ YT_OPTIONAL_TO_PROTO(protoJob, addresses, job.Addresses);
if (job.Progress) {
protoJob->set_progress(*job.Progress);
}
@@ -905,6 +906,11 @@ void FromProto(NApi::TJob* job, const NProto::TJob& protoJob)
job->StartTime = YT_OPTIONAL_FROM_PROTO(protoJob, start_time, TInstant);
job->FinishTime = YT_OPTIONAL_FROM_PROTO(protoJob, finish_time, TInstant);
job->Address = YT_OPTIONAL_FROM_PROTO(protoJob, address);
+ if (protoJob.has_addresses()) {
+ job->Addresses = FromProto<NNodeTrackerClient::TAddressMap>(protoJob.addresses());
+ } else {
+ job->Addresses = {};
+ }
job->Progress = YT_OPTIONAL_FROM_PROTO(protoJob, progress);
job->StderrSize = YT_OPTIONAL_FROM_PROTO(protoJob, stderr_size);
job->FailContextSize = YT_OPTIONAL_FROM_PROTO(protoJob, fail_context_size);
diff --git a/yt/yt/client/chunk_client/config.cpp b/yt/yt/client/chunk_client/config.cpp
index 6c367cdafe8..069ef37b30b 100644
--- a/yt/yt/client/chunk_client/config.cpp
+++ b/yt/yt/client/chunk_client/config.cpp
@@ -229,7 +229,7 @@ void TErasureReaderConfig::Register(TRegistrar registrar)
registrar.Parameter("slow_reader_expiration_timeout", &TThis::SlowReaderExpirationTimeout)
.Default(TDuration::Minutes(2));
registrar.Parameter("replication_reader_timeout", &TThis::ReplicationReaderTimeout)
- .Default(TDuration::Seconds(60));
+ .Default(TDuration::Seconds(300));
registrar.Parameter("replication_reader_failure_timeout", &TThis::ReplicationReaderFailureTimeout)
.Default(TDuration::Minutes(10));
}
diff --git a/yt/yt/client/misc/workload.cpp b/yt/yt/client/misc/workload.cpp
index 83e355ba0f0..cd35f556572 100644
--- a/yt/yt/client/misc/workload.cpp
+++ b/yt/yt/client/misc/workload.cpp
@@ -101,6 +101,37 @@ IInvokerPtr GetCompressionInvoker(const TWorkloadDescriptor& workloadDescriptor)
}
}
+bool IsSystemWorkloadCategory(EWorkloadCategory category)
+{
+ switch (category) {
+ case EWorkloadCategory::Idle:
+ return false;
+
+ case EWorkloadCategory::SystemArtifactCacheDownload:
+ case EWorkloadCategory::SystemRepair:
+ case EWorkloadCategory::SystemReincarnation:
+ case EWorkloadCategory::SystemReplication:
+ case EWorkloadCategory::SystemMerge:
+ case EWorkloadCategory::SystemTabletCompaction:
+ case EWorkloadCategory::SystemTabletLogging:
+ case EWorkloadCategory::SystemTabletPartitioning:
+ case EWorkloadCategory::SystemTabletPreload:
+ case EWorkloadCategory::SystemTabletRecovery:
+ case EWorkloadCategory::SystemTabletReplication:
+ case EWorkloadCategory::SystemTabletSnapshot:
+ case EWorkloadCategory::SystemTabletStoreFlush:
+ return true;
+
+ case EWorkloadCategory::UserBatch:
+ case EWorkloadCategory::UserInteractive:
+ case EWorkloadCategory::UserRealtime:
+ case EWorkloadCategory::UserDynamicStoreRead:
+ return false;
+ }
+
+ return false;
+}
+
struct TSerializableWorkloadDescriptor
: public TWorkloadDescriptor
, public TYsonStructLite
diff --git a/yt/yt/client/misc/workload.h b/yt/yt/client/misc/workload.h
index 63ce3a7cb33..4ab895195f0 100644
--- a/yt/yt/client/misc/workload.h
+++ b/yt/yt/client/misc/workload.h
@@ -85,6 +85,8 @@ i64 GetBasicPriority(EWorkloadCategory category);
IInvokerPtr GetCompressionInvoker(const TWorkloadDescriptor& workloadDescriptor);
+bool IsSystemWorkloadCategory(EWorkloadCategory category);
+
void FormatValue(
TStringBuilderBase* builder,
const TWorkloadDescriptor& descriptor,
diff --git a/yt/yt/client/table_client/helpers.cpp b/yt/yt/client/table_client/helpers.cpp
index 299008c50a9..b2eabd6d0aa 100644
--- a/yt/yt/client/table_client/helpers.cpp
+++ b/yt/yt/client/table_client/helpers.cpp
@@ -1426,8 +1426,10 @@ TUnversionedValue EncodeUnversionedAnyValue(
{
YT_ASSERT(None(value.Flags));
switch (value.Type) {
- case EValueType::Any:
case EValueType::Composite:
+ value.Type = EValueType::Any;
+ [[fallthrough]];
+ case EValueType::Any:
return value;
case EValueType::Null: {
diff --git a/yt/yt_proto/yt/client/api/rpc_proxy/proto/api_service.proto b/yt/yt_proto/yt/client/api/rpc_proxy/proto/api_service.proto
index 83ff3617662..d121f2e0794 100644
--- a/yt/yt_proto/yt/client/api/rpc_proxy/proto/api_service.proto
+++ b/yt/yt_proto/yt/client/api/rpc_proxy/proto/api_service.proto
@@ -14,6 +14,7 @@ import "yt_proto/yt/core/ytree/proto/request_complexity_limits.proto";
import "yt_proto/yt/client/chunk_client/proto/data_statistics.proto";
import "yt_proto/yt/client/chaos_client/proto/replication_card.proto";
import "yt_proto/yt/client/hive/proto/timestamp_map.proto";
+import "yt_proto/yt/client/node_tracker_client/proto/node.proto";
import "yt_proto/yt/client/scheduler/proto/spec_patch.proto";
import "yt_proto/yt/client/table_client/proto/versioned_io_options.proto";
import "yt_proto/yt/client/tablet_client/proto/lock_mask.proto";
@@ -3211,6 +3212,7 @@ message TJob
optional string monitoring_descriptor = 30;
optional string operation_incarnation = 31;
optional NYT.NProto.TGuid allocation_id = 32;
+ optional NNodeTrackerClient.NProto.TAddressMap addresses = 33;
}
message TListJobsStatistics
diff --git a/yt/yt_proto/yt/client/node_tracker_client/proto/node.proto b/yt/yt_proto/yt/client/node_tracker_client/proto/node.proto
index 497593d75d4..fa09aeb9f0f 100644
--- a/yt/yt_proto/yt/client/node_tracker_client/proto/node.proto
+++ b/yt/yt_proto/yt/client/node_tracker_client/proto/node.proto
@@ -3,6 +3,8 @@ package NYT.NNodeTrackerClient.NProto;
import "yt_proto/yt/core/misc/proto/guid.proto";
import "yt_proto/yt/core/misc/proto/error.proto";
+option go_package = "a.yandex-team.ru/yt/go/proto/client/node_tracker_client";
+
////////////////////////////////////////////////////////////////////////////////
// TODO(gritukan): Move it to TReqFullChunkHeartbeat after switching to new heartbeats.