aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrey Neporada <aneporada@ydb.tech>2024-11-26 18:58:56 +0300
committerGitHub <noreply@github.com>2024-11-26 18:58:56 +0300
commitdeada874edcf9da82238af4d21fec0c92a61b072 (patch)
treee5afd68d97d966fc942ff4ff5bb9bf23f78bd1a6
parent46e4a140ae4a66922397bb2f215981e7d5e26eb5 (diff)
downloadydb-deada874edcf9da82238af4d21fec0c92a61b072.tar.gz
Missing bits from Arcadia (YQL embedded and YT provider) (#12023)
-rw-r--r--ydb/core/fq/libs/row_dispatcher/common.cpp2
-rw-r--r--ydb/core/fq/libs/row_dispatcher/common.h2
-rw-r--r--ydb/core/fq/libs/row_dispatcher/json_filter.cpp3
-rw-r--r--ydb/core/fq/libs/row_dispatcher/purecalc_no_pg_wrapper/ya.make2
-rw-r--r--ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp2
-rw-r--r--ydb/core/fq/libs/row_dispatcher/topic_session.cpp2
-rw-r--r--ydb/core/kqp/host/kqp_host.cpp3
-rw-r--r--ydb/core/kqp/host/ya.make1
-rw-r--r--ydb/core/kqp/opt/logical/kqp_opt_log.cpp1
-rw-r--r--ydb/library/yql/dq/opt/dq_cbo_ut.cpp2
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp4
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp26
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.h7
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_cost_based.h2
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_log.h23
-rw-r--r--ydb/library/yql/dq/opt/ya.make2
-rw-r--r--ydb/library/yql/providers/dq/opt/logical_optimize.cpp9
-rw-r--r--ydb/library/yql/providers/yt/provider/ut/ya.make1
-rw-r--r--ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp41
-rw-r--r--ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp11
-rw-r--r--ydb/library/yql/providers/yt/provider/yql_yt_provider.cpp12
-rw-r--r--ydb/library/yql/providers/yt/provider/yql_yt_provider.h9
-rw-r--r--ydb/library/yql/public/embedded/ya.make1
-rw-r--r--ydb/library/yql/public/embedded/yql_embedded.cpp4
-rw-r--r--ydb/library/yql/public/purecalc/common/compile_mkql.cpp116
-rw-r--r--ydb/library/yql/public/purecalc/common/compile_mkql.h17
-rw-r--r--ydb/library/yql/public/purecalc/common/fwd.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/common/fwd.h56
-rw-r--r--ydb/library/yql/public/purecalc/common/inspect_input.cpp33
-rw-r--r--ydb/library/yql/public/purecalc/common/inspect_input.h7
-rw-r--r--ydb/library/yql/public/purecalc/common/interface.cpp128
-rw-r--r--ydb/library/yql/public/purecalc/common/interface.h1180
-rw-r--r--ydb/library/yql/public/purecalc/common/logger_init.cpp32
-rw-r--r--ydb/library/yql/public/purecalc/common/logger_init.h10
-rw-r--r--ydb/library/yql/public/purecalc/common/names.cpp19
-rw-r--r--ydb/library/yql/public/purecalc/common/names.h19
-rw-r--r--ydb/library/yql/public/purecalc/common/no_llvm/ya.make18
-rw-r--r--ydb/library/yql/public/purecalc/common/processor_mode.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/common/processor_mode.h11
-rw-r--r--ydb/library/yql/public/purecalc/common/program_factory.cpp158
-rw-r--r--ydb/library/yql/public/purecalc/common/program_factory.h48
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp122
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/align_output_schema.h25
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp96
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.h29
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp100
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.h18
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp247
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.h30
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/root_to_blocks.cpp65
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/root_to_blocks.h22
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp251
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/type_annotation.h30
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/utils.cpp179
-rw-r--r--ydb/library/yql/public/purecalc/common/transformations/utils.h83
-rw-r--r--ydb/library/yql/public/purecalc/common/type_from_schema.cpp255
-rw-r--r--ydb/library/yql/public/purecalc/common/type_from_schema.h36
-rw-r--r--ydb/library/yql/public/purecalc/common/worker.cpp613
-rw-r--r--ydb/library/yql/public/purecalc/common/worker.h178
-rw-r--r--ydb/library/yql/public/purecalc/common/worker_factory.cpp532
-rw-r--r--ydb/library/yql/public/purecalc/common/worker_factory.h168
-rw-r--r--ydb/library/yql/public/purecalc/common/wrappers.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/common/wrappers.h70
-rw-r--r--ydb/library/yql/public/purecalc/common/ya.make21
-rw-r--r--ydb/library/yql/public/purecalc/common/ya.make.inc52
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/main.cpp133
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/main.proto11
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/exectest.run_protobuf_/log.out18
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/result.json5
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/ut/ya.make15
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf/ya.make27
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp75
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto10
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/exectest.run_protobuf_pull_list_/log.out6
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/result.json6
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/ya.make9
-rw-r--r--ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ya.make20
-rw-r--r--ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp93
-rw-r--r--ydb/library/yql/public/purecalc/examples/skiff_pull_list/ya.make14
-rw-r--r--ydb/library/yql/public/purecalc/examples/ya.make5
-rw-r--r--ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp202
-rw-r--r--ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.h60
-rw-r--r--ydb/library/yql/public/purecalc/helpers/protobuf/ya.make14
-rw-r--r--ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h40
-rw-r--r--ydb/library/yql/public/purecalc/helpers/stream/ya.make13
-rw-r--r--ydb/library/yql/public/purecalc/helpers/ya.make8
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/arrow/spec.cpp576
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/arrow/spec.h130
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/arrow/ut/test_spec.cpp419
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/arrow/ut/ya.make20
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/arrow/ya.make13
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/arrow/ya.make.inc13
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/no_llvm/ya.make10
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp934
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/spec.h231
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ut/no_llvm/ya.make24
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ut/test.inl777
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp325
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ut/ya.make20
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ya.make19
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/mkql/ya.make.inc25
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.h80
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h147
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp996
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/ut/ya.make23
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf/ya.make19
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.h31
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp1064
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.h257
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/protobuf_raw/ya.make16
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/ut/ya.make5
-rw-r--r--ydb/library/yql/public/purecalc/io_specs/ya.make10
-rw-r--r--ydb/library/yql/public/purecalc/no_llvm/purecalc.h4
-rw-r--r--ydb/library/yql/public/purecalc/no_llvm/ya.make30
-rw-r--r--ydb/library/yql/public/purecalc/purecalc.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/purecalc.h3
-rw-r--r--ydb/library/yql/public/purecalc/ut/empty_stream.h20
-rw-r--r--ydb/library/yql/public/purecalc/ut/fake_spec.cpp36
-rw-r--r--ydb/library/yql/public/purecalc/ut/fake_spec.h54
-rw-r--r--ydb/library/yql/public/purecalc/ut/lib/helpers.cpp55
-rw-r--r--ydb/library/yql/public/purecalc/ut/lib/helpers.h18
-rw-r--r--ydb/library/yql/public/purecalc/ut/lib/ya.make14
-rw-r--r--ydb/library/yql/public/purecalc/ut/protos/test_structs.proto122
-rw-r--r--ydb/library/yql/public/purecalc/ut/protos/ya.make9
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_eval.cpp30
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_mixed_allocators.cpp139
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_pg.cpp71
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_pool.cpp184
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_schema.cpp1
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_sexpr.cpp55
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_sql.cpp205
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_udf.cpp195
-rw-r--r--ydb/library/yql/public/purecalc/ut/test_user_data.cpp62
-rw-r--r--ydb/library/yql/public/purecalc/ut/ya.make28
-rw-r--r--ydb/library/yql/public/purecalc/ya.make28
-rw-r--r--ydb/library/yql/public/ya.make1
-rw-r--r--ydb/library/yql/tools/dqrun/dqrun.cpp5
-rw-r--r--ydb/library/yql/tools/dqrun/ya.make2
-rw-r--r--ydb/library/yql/tools/mrrun/mrrun.cpp3
-rw-r--r--ydb/library/yql/tools/mrrun/ya.make1
-rw-r--r--ydb/library/yql/tools/yqlrun/http/ya.make1
-rw-r--r--ydb/library/yql/tools/yqlrun/http/yql_server.cpp3
-rw-r--r--ydb/library/yql/tools/yqlrun/ya.make1
-rw-r--r--ydb/library/yql/tools/yqlrun/yqlrun.cpp4
-rw-r--r--ydb/library/yql/yt/native/plugin.cpp3
-rw-r--r--ydb/library/yql/yt/native/ya.make1
150 files changed, 121 insertions, 13489 deletions
diff --git a/ydb/core/fq/libs/row_dispatcher/common.cpp b/ydb/core/fq/libs/row_dispatcher/common.cpp
index 879197b8cea..2ecbfe699c8 100644
--- a/ydb/core/fq/libs/row_dispatcher/common.cpp
+++ b/ydb/core/fq/libs/row_dispatcher/common.cpp
@@ -2,7 +2,7 @@
#include <util/system/mutex.h>
-#include <ydb/library/yql/public/purecalc/common/interface.h>
+#include <yql/essentials/public/purecalc/common/interface.h>
namespace NFq {
diff --git a/ydb/core/fq/libs/row_dispatcher/common.h b/ydb/core/fq/libs/row_dispatcher/common.h
index 32ebc7af945..b82b65dde2f 100644
--- a/ydb/core/fq/libs/row_dispatcher/common.h
+++ b/ydb/core/fq/libs/row_dispatcher/common.h
@@ -3,7 +3,7 @@
#include <util/generic/ptr.h>
#include <util/system/mutex.h>
-#include <ydb/library/yql/public/purecalc/common/fwd.h>
+#include <yql/essentials/public/purecalc/common/fwd.h>
namespace NFq {
diff --git a/ydb/core/fq/libs/row_dispatcher/json_filter.cpp b/ydb/core/fq/libs/row_dispatcher/json_filter.cpp
index e7634e2eaab..f2efb29eeed 100644
--- a/ydb/core/fq/libs/row_dispatcher/json_filter.cpp
+++ b/ydb/core/fq/libs/row_dispatcher/json_filter.cpp
@@ -1,7 +1,6 @@
#include <yql/essentials/providers/common/schema/parser/yql_type_parser.h>
#include <yql/essentials/public/udf/udf_version.h>
-#include <ydb/library/yql/public/purecalc/purecalc.h>
-#include <ydb/library/yql/public/purecalc/io_specs/mkql/spec.h>
+#include <yql/essentials/public/purecalc/purecalc.h>
#include <yql/essentials/minikql/mkql_alloc.h>
#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
#include <yql/essentials/minikql/mkql_terminator.h>
diff --git a/ydb/core/fq/libs/row_dispatcher/purecalc_no_pg_wrapper/ya.make b/ydb/core/fq/libs/row_dispatcher/purecalc_no_pg_wrapper/ya.make
index 167e6bfd628..6c02451a1b0 100644
--- a/ydb/core/fq/libs/row_dispatcher/purecalc_no_pg_wrapper/ya.make
+++ b/ydb/core/fq/libs/row_dispatcher/purecalc_no_pg_wrapper/ya.make
@@ -1,5 +1,5 @@
LIBRARY()
-INCLUDE(../../../../../library/yql/public/purecalc/common/ya.make.inc)
+INCLUDE(${ARCADIA_ROOT}/yql/essentials/public/purecalc/common/ya.make.inc)
END()
diff --git a/ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp b/ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp
index 19d23021882..041f302a767 100644
--- a/ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp
+++ b/ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp
@@ -8,7 +8,7 @@
#include <ydb/library/actors/core/interconnect.h>
#include <ydb/library/yql/dq/actors/common/retry_queue.h>
#include <ydb/library/yql/providers/dq/counters/counters.h>
-#include <ydb/library/yql/public/purecalc/common/interface.h>
+#include <yql/essentials/public/purecalc/common/interface.h>
#include <ydb/core/base/appdata_fwd.h>
#include <ydb/core/fq/libs/actors/logging/log.h>
diff --git a/ydb/core/fq/libs/row_dispatcher/topic_session.cpp b/ydb/core/fq/libs/row_dispatcher/topic_session.cpp
index 0680991ea9b..9ab486cb017 100644
--- a/ydb/core/fq/libs/row_dispatcher/topic_session.cpp
+++ b/ydb/core/fq/libs/row_dispatcher/topic_session.cpp
@@ -16,7 +16,7 @@
#include <ydb/core/fq/libs/row_dispatcher/json_parser.h>
#include <ydb/core/fq/libs/row_dispatcher/json_filter.h>
-#include <ydb/library/yql/public/purecalc/purecalc.h>
+#include <yql/essentials/public/purecalc/purecalc.h>
namespace NFq {
diff --git a/ydb/core/kqp/host/kqp_host.cpp b/ydb/core/kqp/host/kqp_host.cpp
index 4055755b9fe..42a48eb59d5 100644
--- a/ydb/core/kqp/host/kqp_host.cpp
+++ b/ydb/core/kqp/host/kqp_host.cpp
@@ -17,6 +17,7 @@
#include <yql/essentials/providers/common/codec/yql_codec.h>
#include <yql/essentials/providers/common/provider/yql_provider_names.h>
#include <yql/essentials/providers/common/udf_resolve/yql_simple_udf_resolver.h>
+#include <ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.h>
#include <ydb/library/yql/providers/s3/expr_nodes/yql_s3_expr_nodes.h>
#include <ydb/library/yql/providers/s3/provider/yql_s3_provider.h>
#include <ydb/library/yql/providers/generic/expr_nodes/yql_generic_expr_nodes.h>
@@ -1817,7 +1818,7 @@ private:
}
TString sessionId = CreateGuidAsString();
- auto [ytState, statWriter] = CreateYtNativeState(FederatedQuerySetup->YtGateway, userName, sessionId, &FederatedQuerySetup->YtGatewayConfig, TypesCtx);
+ auto [ytState, statWriter] = CreateYtNativeState(FederatedQuerySetup->YtGateway, userName, sessionId, &FederatedQuerySetup->YtGatewayConfig, TypesCtx, NDq::MakeCBOOptimizerFactory());
ytState->PassiveExecution = true;
ytState->Gateway->OpenSession(
diff --git a/ydb/core/kqp/host/ya.make b/ydb/core/kqp/host/ya.make
index 01d0c156ee6..c1d03eacd33 100644
--- a/ydb/core/kqp/host/ya.make
+++ b/ydb/core/kqp/host/ya.make
@@ -24,6 +24,7 @@ PEERDIR(
yql/essentials/sql
yql/essentials/core
yql/essentials/providers/common/codec
+ ydb/library/yql/dq/opt
ydb/library/yql/providers/common/http_gateway
yql/essentials/providers/common/udf_resolve
yql/essentials/providers/config
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp
index 3a3df0e38d4..c89c380239b 100644
--- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp
+++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp
@@ -11,6 +11,7 @@
#include <ydb/library/yql/dq/opt/dq_opt_join.h>
#include <ydb/library/yql/dq/opt/dq_opt_log.h>
#include <ydb/library/yql/dq/opt/dq_opt_hopping.h>
+#include <ydb/library/yql/dq/opt/dq_opt_join_cost_based.h>
#include <yql/essentials/utils/log/log.h>
#include <yql/essentials/providers/common/transform/yql_optimize.h>
#include <ydb/library/yql/providers/dq/common/yql_dq_settings.h>
diff --git a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp
index 4847d9803e2..c57e16e4fb0 100644
--- a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp
+++ b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp
@@ -4,7 +4,7 @@
#include <yql/essentials/providers/common/provider/yql_provider.h>
#include <yql/essentials/parser/pg_wrapper/interface/optimizer.h>
-#include "dq_opt_log.h"
+#include "dq_opt_join_cost_based.h"
#include "dq_opt_join.h"
using namespace NYql;
diff --git a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp
index ff1d3e2d311..98012b1f539 100644
--- a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp
@@ -6,7 +6,7 @@
#include <util/string/split.h>
#include "dq_opt_make_join_hypergraph.h"
-#include "dq_opt_log.h"
+#include "dq_opt_join_cost_based.h"
#include <memory>
@@ -51,7 +51,7 @@ struct TTestContext : public TBaseProviderContext {
template <typename TProviderContext = TTestContext>
std::shared_ptr<IBaseOptimizerNode> Enumerate(const std::shared_ptr<IBaseOptimizerNode>& root, const TOptimizerHints& hints = {}) {
auto ctx = TProviderContext();
- auto optimizer =
+ auto optimizer =
std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(ctx, std::numeric_limits<ui32>::max()));
Y_ENSURE(root->Kind == EOptimizerNodeKind::JoinNodeType);
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp
new file mode 100644
index 00000000000..23f15a0c69d
--- /dev/null
+++ b/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp
@@ -0,0 +1,26 @@
+#include "dq_opt_join_cbo_factory.h"
+
+#include <ydb/library/yql/dq/opt/dq_opt_join_cost_based.h>
+
+#include <yql/essentials/parser/pg_wrapper/interface/optimizer.h>
+
+namespace NYql::NDq {
+
+namespace {
+class TDqOptimizerFactory : public IOptimizerFactory {
+public:
+ virtual IOptimizerNew::TPtr MakeJoinCostBasedOptimizerNative(IProviderContext& pctx, TExprContext&, const TNativeSettings& settings) const override {
+ return IOptimizerNew::TPtr(MakeNativeOptimizerNew(pctx, settings.MaxDPhypDPTableSize));
+ }
+
+ virtual IOptimizerNew::TPtr MakeJoinCostBasedOptimizerPG(IProviderContext& pctx, TExprContext& ctx, const TPGSettings& settings) const override {
+ return IOptimizerNew::TPtr(MakePgOptimizerNew(pctx, ctx, settings.Logger));
+ }
+};
+}
+
+IOptimizerFactory::TPtr MakeCBOOptimizerFactory() {
+ return std::make_shared<TDqOptimizerFactory>();
+}
+
+}
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.h b/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.h
new file mode 100644
index 00000000000..d108e2aa93d
--- /dev/null
+++ b/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.h
@@ -0,0 +1,7 @@
+#include <yql/essentials/core/cbo/cbo_optimizer_new.h>
+
+namespace NYql::NDq {
+
+IOptimizerFactory::TPtr MakeCBOOptimizerFactory();
+
+}
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h
index d8be3b6695a..36422f41d89 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h
+++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h
@@ -38,4 +38,6 @@ NYql::NNodes::TExprBase DqOptimizeEquiJoinWithCosts(
const TOptimizerHints& hints = {}
);
+IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPccpDPTableSize);
+
} // namespace NYql::NDq
diff --git a/ydb/library/yql/dq/opt/dq_opt_log.h b/ydb/library/yql/dq/opt/dq_opt_log.h
index 34816f163c2..e33642f6d56 100644
--- a/ydb/library/yql/dq/opt/dq_opt_log.h
+++ b/ydb/library/yql/dq/opt/dq_opt_log.h
@@ -24,27 +24,6 @@ NNodes::TExprBase DqRewriteAggregate(NNodes::TExprBase node, TExprContext& ctx,
NNodes::TExprBase DqRewriteTakeSortToTopSort(NNodes::TExprBase node, TExprContext& ctx, const TParentsMap& parents);
-NNodes::TExprBase DqOptimizeEquiJoinWithCosts(
- const NNodes::TExprBase& node,
- TExprContext& ctx,
- TTypeAnnotationContext& typesCtx,
- ui32 optLevel,
- IOptimizerNew& optimizer,
- const std::function<void(TVector<std::shared_ptr<TRelOptimizerNode>>&, TStringBuf, const TExprNode::TPtr, const std::shared_ptr<TOptimizerStatistics>&)>& providerCollect,
- const TOptimizerHints& hints = {}
-);
-
-NNodes::TExprBase DqOptimizeEquiJoinWithCosts(
- const NNodes::TExprBase& node,
- TExprContext& ctx,
- TTypeAnnotationContext& typesCtx,
- ui32 optLevel,
- IOptimizerNew& optimizer,
- const std::function<void(TVector<std::shared_ptr<TRelOptimizerNode>>&, TStringBuf, const TExprNode::TPtr, const std::shared_ptr<TOptimizerStatistics>&)>& providerCollect,
- int& equiJoinCounter,
- const TOptimizerHints& hints = {}
-);
-
NNodes::TExprBase DqRewriteEquiJoin(const NNodes::TExprBase& node, TExprContext& ctx);
NNodes::TExprBase DqEnforceCompactPartition(NNodes::TExprBase node, NNodes::TExprList frames, TExprContext& ctx);
@@ -63,8 +42,6 @@ IGraphTransformer::TStatus DqWrapIO(const TExprNode::TPtr& input, TExprNode::TPt
NNodes::TExprBase DqExpandMatchRecognize(NNodes::TExprBase node, TExprContext& ctx, TTypeAnnotationContext& typeAnnCtx);
-IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPccpDPTableSize);
-
NNodes::TMaybeNode<NNodes::TExprBase> UnorderedOverDqReadWrap(NNodes::TExprBase node, TExprContext& ctx, const std::function<const TParentsMap*()>& getParents, bool enableDqReplicate, TTypeAnnotationContext& typeAnnCtx);
NNodes::TMaybeNode<NNodes::TExprBase> ExtractMembersOverDqReadWrap(NNodes::TExprBase node, TExprContext& ctx, const std::function<const TParentsMap*()>& getParents, bool enableDqReplicate, TTypeAnnotationContext& typeAnnCtx);
diff --git a/ydb/library/yql/dq/opt/ya.make b/ydb/library/yql/dq/opt/ya.make
index 6ae9b286710..e86a169981d 100644
--- a/ydb/library/yql/dq/opt/ya.make
+++ b/ydb/library/yql/dq/opt/ya.make
@@ -6,6 +6,7 @@ PEERDIR(
ydb/library/yql/dq/common
ydb/library/yql/dq/expr_nodes
yql/essentials/core/dq_integration
+ yql/essentials/parser/pg_wrapper/interface
ydb/library/yql/dq/proto
ydb/library/yql/dq/type_ann
ydb/library/yql/providers/dq/expr_nodes
@@ -16,6 +17,7 @@ SRCS(
dq_opt_build.cpp
dq_opt_conflict_rules_collector.cpp
dq_opt_join.cpp
+ dq_opt_join_cbo_factory.cpp
dq_opt_join_cost_based.cpp
dq_opt_join_tree_node.cpp
dq_opt_hopping.cpp
diff --git a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp
index 055c4145cd1..c9f72f18295 100644
--- a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp
+++ b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp
@@ -8,6 +8,8 @@
#include <yql/essentials/core/dq_integration/yql_dq_optimization.h>
#include <ydb/library/yql/dq/opt/dq_opt_log.h>
#include <ydb/library/yql/dq/opt/dq_opt.h>
+#include <ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.h>
+#include <ydb/library/yql/dq/opt/dq_opt_join_cost_based.h>
#include <ydb/library/yql/dq/opt/dq_opt_hopping.h>
#include <ydb/library/yql/dq/type_ann/dq_type_ann.h>
#include <ydb/library/yql/dq/expr_nodes/dq_expr_nodes.h>
@@ -261,15 +263,16 @@ protected:
YQL_CLOG(INFO, ProviderDq) << str;
};
- std::unique_ptr<IOptimizerNew> opt;
+ auto factory = MakeCBOOptimizerFactory();
+ std::shared_ptr<IOptimizerNew> opt;
TDqCBOProviderContext pctx(TypesCtx, Config);
switch (TypesCtx.CostBasedOptimizer) {
case ECostBasedOptimizerType::Native:
- opt = std::unique_ptr<IOptimizerNew>(NDq::MakeNativeOptimizerNew(pctx, 100000));
+ opt = factory->MakeJoinCostBasedOptimizerNative(pctx, ctx, {.MaxDPhypDPTableSize = 100000});
break;
case ECostBasedOptimizerType::PG:
- opt = std::unique_ptr<IOptimizerNew>(MakePgOptimizerNew(pctx, ctx, log));
+ opt = factory->MakeJoinCostBasedOptimizerPG(pctx, ctx, {.Logger = log});
break;
default:
YQL_ENSURE(false, "Unknown CBO type");
diff --git a/ydb/library/yql/providers/yt/provider/ut/ya.make b/ydb/library/yql/providers/yt/provider/ut/ya.make
index 2b8a0625d47..aafd53a18da 100644
--- a/ydb/library/yql/providers/yt/provider/ut/ya.make
+++ b/ydb/library/yql/providers/yt/provider/ut/ya.make
@@ -11,6 +11,7 @@ SRCS(
)
PEERDIR(
+ ydb/library/yql/dq/opt
ydb/library/yql/providers/yt/lib/schema
ydb/library/yql/providers/yt/provider
ydb/library/yql/providers/yt/gateway/file
diff --git a/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp b/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp
index f36a8e51300..c16f2964ba4 100644
--- a/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp
+++ b/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp
@@ -1,8 +1,7 @@
#include <library/cpp/testing/unittest/registar.h>
#include <ydb/library/yql/providers/yt/provider/yql_yt_join_impl.h>
-#include <yql/essentials/core/cbo/cbo_optimizer_new.h>
-#include <ydb/library/yql/dq/opt/dq_opt_log.h>
+#include <ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.h>
namespace NYql {
@@ -55,6 +54,12 @@ TYtJoinNodeLeaf::TPtr MakeLeaf(const std::vector<TString>& label, TVector<TStrin
return leaf;
}
+TYtState::TPtr MakeState(TTypeAnnotationContext& typeCtx) {
+ TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ state->OptimizerFactory_ = NDq::MakeCBOOptimizerFactory();
+ return state;
+}
+
} // namespace
Y_UNIT_TEST_SUITE(TYqlCBO) {
@@ -62,7 +67,7 @@ Y_UNIT_TEST_SUITE(TYqlCBO) {
Y_UNIT_TEST(OrderJoinsDoesNothingWhenCBODisabled) {
const TString cluster("ut_cluster");
TTypeAnnotationContext typeCtx;
- TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ TYtState::TPtr state = MakeState(typeCtx);
TYtJoinNodeOp::TPtr tree = nullptr;
TYtJoinNodeOp::TPtr optimizedTree;
@@ -82,7 +87,9 @@ Y_UNIT_TEST(NonReordable) {
auto root = std::make_shared<TJoinOptimizerNode>(
left, right, leftKeys, rightKeys, EJoinKind::InnerJoin, EJoinAlgoType::GraceJoin, false, false, true);
TBaseProviderContext optCtx;
- std::unique_ptr<IOptimizerNew> opt = std::unique_ptr<IOptimizerNew>(NDq::MakeNativeOptimizerNew(optCtx, 1024));
+ auto factory = NDq::MakeCBOOptimizerFactory();
+ TExprContext ctx;
+ std::shared_ptr<IOptimizerNew> opt = factory->MakeJoinCostBasedOptimizerNative(optCtx, ctx, {.MaxDPhypDPTableSize = 1024});
auto result = opt->JoinSearch(root);
// Join tree is built from scratch with DPhyp, check the structure by comapring with Stats
@@ -100,7 +107,7 @@ Y_UNIT_TEST(NonReordable) {
Y_UNIT_TEST(BuildOptimizerTree2Tables) {
const TString cluster("ut_cluster");
TTypeAnnotationContext typeCtx;
- TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ TYtState::TPtr state = MakeState(typeCtx);
TExprContext exprCtx;
auto tree = MakeOp({"c", "c_nationkey"}, {"n", "n_nationkey"}, {"c", "n"}, exprCtx);
tree->Left = MakeLeaf({"c"}, {"c"}, 100000, 12333, exprCtx);
@@ -128,7 +135,7 @@ Y_UNIT_TEST(BuildOptimizerTree2Tables) {
Y_UNIT_TEST(BuildOptimizerTree2TablesComplexLabel) {
const TString cluster("ut_cluster");
TTypeAnnotationContext typeCtx;
- TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ TYtState::TPtr state = MakeState(typeCtx);
TExprContext exprCtx;
auto tree = MakeOp({"c", "c_nationkey"}, {"n", "n_nationkey"}, {"c", "n", "e"}, exprCtx);
tree->Left = MakeLeaf({"c"}, {"c"}, 1000000, 1233333, exprCtx);
@@ -156,7 +163,7 @@ Y_UNIT_TEST(BuildOptimizerTree2TablesComplexLabel) {
Y_UNIT_TEST(BuildYtJoinTree2Tables) {
const TString cluster("ut_cluster");
TTypeAnnotationContext typeCtx;
- TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ TYtState::TPtr state = MakeState(typeCtx);
TExprContext exprCtx;
auto tree = MakeOp({"c", "c_nationkey"}, {"n", "n_nationkey"}, {"c", "n"}, exprCtx);
tree->Left = MakeLeaf({"c"}, {"c"}, 100000, 12333, exprCtx);
@@ -175,7 +182,7 @@ Y_UNIT_TEST(BuildYtJoinTree2Tables) {
Y_UNIT_TEST(BuildYtJoinTree2TablesForceMergeJoib) {
const TString cluster("ut_cluster");
TTypeAnnotationContext typeCtx;
- TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ TYtState::TPtr state = MakeState(typeCtx);
TExprContext exprCtx;
auto tree = MakeOp({"c", "c_nationkey"}, {"n", "n_nationkey"}, {"c", "n"}, exprCtx);
tree->Left = MakeLeaf({"c"}, {"c"}, 100000, 12333, exprCtx);
@@ -195,7 +202,7 @@ Y_UNIT_TEST(BuildYtJoinTree2TablesForceMergeJoib) {
Y_UNIT_TEST(BuildYtJoinTree2TablesComplexLabel) {
const TString cluster("ut_cluster");
TTypeAnnotationContext typeCtx;
- TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ TYtState::TPtr state = MakeState(typeCtx);
TExprContext exprCtx;
auto tree = MakeOp({"c", "c_nationkey"}, {"n", "n_nationkey"}, {"c", "n", "e"}, exprCtx);
tree->Left = MakeLeaf({"c"}, {"c"}, 1000000, 1233333, exprCtx);
@@ -214,7 +221,7 @@ Y_UNIT_TEST(BuildYtJoinTree2TablesTableIn2Rels)
{
const TString cluster("ut_cluster");
TTypeAnnotationContext typeCtx;
- TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ TYtState::TPtr state = MakeState(typeCtx);
TExprContext exprCtx;
auto tree = MakeOp({"c", "c_nationkey"}, {"n", "n_nationkey"}, {"c", "n", "c"}, exprCtx);
tree->Left = MakeLeaf({"c"}, {"c"}, 1000000, 1233333, exprCtx);
@@ -246,7 +253,7 @@ void OrderJoins2Tables(auto optimizerType) {
TTypeAnnotationContext typeCtx;
typeCtx.CostBasedOptimizer = optimizerType;
- TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ TYtState::TPtr state = MakeState(typeCtx);
auto optimizedTree = OrderJoins(tree, state, cluster, exprCtx, true);
UNIT_ASSERT(optimizedTree != tree);
UNIT_ASSERT(optimizedTree->Left);
@@ -274,7 +281,7 @@ void OrderJoins2TablesComplexLabel(auto optimizerType)
TTypeAnnotationContext typeCtx;
typeCtx.CostBasedOptimizer = optimizerType;
- TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ TYtState::TPtr state = MakeState(typeCtx);
auto optimizedTree = OrderJoins(tree, state, cluster, exprCtx, true);
UNIT_ASSERT(optimizedTree != tree);
}
@@ -291,7 +298,7 @@ void OrderJoins2TablesTableIn2Rels(auto optimizerType)
TTypeAnnotationContext typeCtx;
typeCtx.CostBasedOptimizer = optimizerType;
- TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ TYtState::TPtr state = MakeState(typeCtx);
auto optimizedTree = OrderJoins(tree, state, cluster, exprCtx, true);
UNIT_ASSERT(optimizedTree != tree);
}
@@ -309,7 +316,7 @@ Y_UNIT_TEST(OrderLeftJoin)
TTypeAnnotationContext typeCtx;
typeCtx.CostBasedOptimizer = ECostBasedOptimizerType::PG;
- TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ TYtState::TPtr state = MakeState(typeCtx);
auto optimizedTree = OrderJoins(tree, state, cluster, exprCtx, true);
UNIT_ASSERT(optimizedTree != tree);
UNIT_ASSERT_STRINGS_EQUAL("Left", optimizedTree->JoinKind->Content());
@@ -326,7 +333,7 @@ Y_UNIT_TEST(UnsupportedJoin)
TTypeAnnotationContext typeCtx;
typeCtx.CostBasedOptimizer = ECostBasedOptimizerType::PG;
- TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ TYtState::TPtr state = MakeState(typeCtx);
auto optimizedTree = OrderJoins(tree, state, cluster, exprCtx, true);
UNIT_ASSERT(optimizedTree == tree);
}
@@ -341,7 +348,7 @@ Y_UNIT_TEST(OrderJoinSinglePass) {
TTypeAnnotationContext typeCtx;
typeCtx.CostBasedOptimizer = ECostBasedOptimizerType::PG;
- TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ TYtState::TPtr state = MakeState(typeCtx);
auto optimizedTree = OrderJoins(tree, state, cluster, exprCtx, true);
UNIT_ASSERT(optimizedTree != tree);
UNIT_ASSERT(optimizedTree->CostBasedOptPassed);
@@ -358,7 +365,7 @@ Y_UNIT_TEST(OrderJoinsDoesNothingWhenCBOAlreadyPassed) {
TTypeAnnotationContext typeCtx;
typeCtx.CostBasedOptimizer = ECostBasedOptimizerType::PG;
- TYtState::TPtr state = MakeIntrusive<TYtState>(&typeCtx);
+ TYtState::TPtr state = MakeState(typeCtx);
auto optimizedTree = OrderJoins(tree, state, cluster, exprCtx, true);
UNIT_ASSERT(optimizedTree == tree);
}
diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp
index 8b67156fade..d2373606376 100644
--- a/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp
+++ b/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp
@@ -5,7 +5,6 @@
#include <yql/essentials/core/cbo/cbo_optimizer_new.h>
#include <yql/essentials/core/yql_graph_transformer.h>
-#include <ydb/library/yql/dq/opt/dq_opt_log.h>
#include <yql/essentials/parser/pg_wrapper/interface/optimizer.h>
#include <yql/essentials/providers/common/provider/yql_provider.h>
#include <ydb/library/yql/providers/yt/opt/yql_yt_join.h>
@@ -86,7 +85,7 @@ public:
YQL_CLOG(INFO, ProviderYt) << str;
};
- std::unique_ptr<IOptimizerNew> opt;
+ IOptimizerNew::TPtr opt;
switch (State->Types->CostBasedOptimizer) {
case ECostBasedOptimizerType::PG:
@@ -94,17 +93,17 @@ public:
YQL_CLOG(ERROR, ProviderYt) << "PG CBO does not support link settings";
return Root;
}
- opt = std::unique_ptr<IOptimizerNew>(MakePgOptimizerNew(*providerCtx, Ctx, log));
+ opt = State->OptimizerFactory_->MakeJoinCostBasedOptimizerPG(*providerCtx, Ctx, {.Logger = log});
break;
case ECostBasedOptimizerType::Native:
if (linkSettings.HasHints) {
YQL_CLOG(ERROR, ProviderYt) << "Native CBO does not suppor link hints";
return Root;
}
- opt = std::unique_ptr<IOptimizerNew>(NDq::MakeNativeOptimizerNew(*providerCtx, 100000));
+ opt = State->OptimizerFactory_->MakeJoinCostBasedOptimizerNative(*providerCtx, Ctx, {.MaxDPhypDPTableSize = 100000});
break;
- default:
- YQL_CLOG(ERROR, ProviderYt) << "Unknown optimizer type " << ToString(State->Types->CostBasedOptimizer);
+ case ECostBasedOptimizerType::Disable:
+ YQL_CLOG(DEBUG, ProviderYt) << "CBO disabled";
return Root;
}
diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_provider.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_provider.cpp
index 7f90d4224f1..563601d2c21 100644
--- a/ydb/library/yql/providers/yt/provider/yql_yt_provider.cpp
+++ b/ydb/library/yql/providers/yt/provider/yql_yt_provider.cpp
@@ -336,11 +336,15 @@ void TYtState::LeaveEvaluation(ui64 id) {
}
}
-std::pair<TIntrusivePtr<TYtState>, TStatWriter> CreateYtNativeState(IYtGateway::TPtr gateway, const TString& userName, const TString& sessionId, const TYtGatewayConfig* ytGatewayConfig, TIntrusivePtr<TTypeAnnotationContext> typeCtx) {
+std::pair<TIntrusivePtr<TYtState>, TStatWriter> CreateYtNativeState(IYtGateway::TPtr gateway, const TString& userName, const TString& sessionId,
+ const TYtGatewayConfig* ytGatewayConfig, TIntrusivePtr<TTypeAnnotationContext> typeCtx,
+ const IOptimizerFactory::TPtr& optFactory)
+{
auto ytState = MakeIntrusive<TYtState>(typeCtx.Get());
ytState->SessionId = sessionId;
ytState->Gateway = gateway;
ytState->DqIntegration_ = CreateYtDqIntegration(ytState.Get());
+ ytState->OptimizerFactory_ = optFactory;
if (ytGatewayConfig) {
std::unordered_set<std::string_view> groups;
@@ -374,8 +378,8 @@ std::pair<TIntrusivePtr<TYtState>, TStatWriter> CreateYtNativeState(IYtGateway::
return {ytState, statWriter};
}
-TDataProviderInitializer GetYtNativeDataProviderInitializer(IYtGateway::TPtr gateway, ui32 planLimits) {
- return [originalGateway = gateway, planLimits] (
+TDataProviderInitializer GetYtNativeDataProviderInitializer(IYtGateway::TPtr gateway, IOptimizerFactory::TPtr optFactory, ui32 planLimits) {
+ return [originalGateway = gateway, optFactory, planLimits] (
const TString& userName,
const TString& sessionId,
const TGatewaysConfig* gatewaysConfig,
@@ -404,7 +408,7 @@ TDataProviderInitializer GetYtNativeDataProviderInitializer(IYtGateway::TPtr gat
const TYtGatewayConfig* ytGatewayConfig = gatewaysConfig ? &gatewaysConfig->GetYt() : nullptr;
TIntrusivePtr<TYtState> ytState;
TStatWriter statWriter;
- std::tie(ytState, statWriter) = CreateYtNativeState(gateway, userName, sessionId, ytGatewayConfig, typeCtx);
+ std::tie(ytState, statWriter) = CreateYtNativeState(gateway, userName, sessionId, ytGatewayConfig, typeCtx, optFactory);
ytState->PlanLimits = planLimits;
info.Names.insert({TString{YtProviderName}});
diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_provider.h b/ydb/library/yql/providers/yt/provider/yql_yt_provider.h
index 962190c9452..a08cb54b515 100644
--- a/ydb/library/yql/providers/yt/provider/yql_yt_provider.h
+++ b/ydb/library/yql/providers/yt/provider/yql_yt_provider.h
@@ -7,6 +7,7 @@
#include <ydb/library/yql/providers/yt/common/yql_yt_settings.h>
#include <ydb/library/yql/providers/yt/lib/row_spec/yql_row_spec.h>
+#include <yql/essentials/core/cbo/cbo_optimizer_new.h>
#include <yql/essentials/core/dq_integration/yql_dq_integration.h>
#include <yql/essentials/core/yql_data_provider.h>
#include <yql/essentials/core/yql_execution.h>
@@ -119,7 +120,7 @@ struct TYtState : public TThrRefBase {
THashMap<ui64, TWalkFoldersImpl> WalkFoldersState;
ui32 PlanLimits = 10;
i32 FlowDependsOnId = 0;
-
+ IOptimizerFactory::TPtr OptimizerFactory_;
private:
std::unordered_map<ui64, TYtVersionedConfiguration::TState> ConfigurationEvalStates_;
std::unordered_map<ui64, ui32> EpochEvalStates_;
@@ -127,11 +128,13 @@ private:
class TYtGatewayConfig;
-std::pair<TIntrusivePtr<TYtState>, TStatWriter> CreateYtNativeState(IYtGateway::TPtr gateway, const TString& userName, const TString& sessionId, const TYtGatewayConfig* ytGatewayConfig, TIntrusivePtr<TTypeAnnotationContext> typeCtx);
+std::pair<TIntrusivePtr<TYtState>, TStatWriter> CreateYtNativeState(IYtGateway::TPtr gateway, const TString& userName, const TString& sessionId,
+ const TYtGatewayConfig* ytGatewayConfig, TIntrusivePtr<TTypeAnnotationContext> typeCtx,
+ const IOptimizerFactory::TPtr& optFactory);
TIntrusivePtr<IDataProvider> CreateYtDataSource(TYtState::TPtr state);
TIntrusivePtr<IDataProvider> CreateYtDataSink(TYtState::TPtr state);
-TDataProviderInitializer GetYtNativeDataProviderInitializer(IYtGateway::TPtr gateway, ui32 planLimits = 10);
+TDataProviderInitializer GetYtNativeDataProviderInitializer(IYtGateway::TPtr gateway, IOptimizerFactory::TPtr optFactory, ui32 planLimits = 10);
const THashSet<TStringBuf>& YtDataSourceFunctions();
const THashSet<TStringBuf>& YtDataSinkFunctions();
diff --git a/ydb/library/yql/public/embedded/ya.make b/ydb/library/yql/public/embedded/ya.make
index 7b7c333ffc7..2c68a838310 100644
--- a/ydb/library/yql/public/embedded/ya.make
+++ b/ydb/library/yql/public/embedded/ya.make
@@ -34,6 +34,7 @@ PEERDIR(
yql/essentials/providers/common/udf_resolve
yql/essentials/core/url_preprocessing
yql/essentials/core/url_lister
+ ydb/library/yql/dq/opt
ydb/library/yql/providers/yt/gateway/native
ydb/library/yql/providers/yt/lib/log
ydb/library/yql/providers/yt/lib/yt_download
diff --git a/ydb/library/yql/public/embedded/yql_embedded.cpp b/ydb/library/yql/public/embedded/yql_embedded.cpp
index 4708741462a..6bea5cb73a5 100644
--- a/ydb/library/yql/public/embedded/yql_embedded.cpp
+++ b/ydb/library/yql/public/embedded/yql_embedded.cpp
@@ -1,5 +1,7 @@
#include "yql_embedded.h"
+#include <ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.h>
+
#include <ydb/library/yql/providers/yt/lib/log/yt_logger.h>
#include <ydb/library/yql/providers/yt/lib/yt_download/yt_download.h>
#include <ydb/library/yql/providers/yt/lib/yt_url_lister/yt_url_lister.h>
@@ -343,7 +345,7 @@ namespace NYql {
ytServices.FileStorage = FileStorage_;
ytServices.Config = std::make_shared<TYtGatewayConfig>(*ytConfig);
auto ytNativeGateway = CreateYtNativeGateway(ytServices);
- dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway));
+ dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway, NDq::MakeCBOOptimizerFactory()));
ProgramFactory_ = MakeHolder<TProgramFactory>(
false, FuncRegistry_.Get(), ExprContext_.NextUniqueId, dataProvidersInit, "embedded");
diff --git a/ydb/library/yql/public/purecalc/common/compile_mkql.cpp b/ydb/library/yql/public/purecalc/common/compile_mkql.cpp
deleted file mode 100644
index 8682f589c91..00000000000
--- a/ydb/library/yql/public/purecalc/common/compile_mkql.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-#include "compile_mkql.h"
-
-#include <yql/essentials/providers/common/mkql/yql_provider_mkql.h>
-#include <yql/essentials/providers/common/mkql/yql_type_mkql.h>
-#include <yql/essentials/core/yql_user_data_storage.h>
-#include <ydb/library/yql/public/purecalc/common/names.h>
-
-#include <util/stream/file.h>
-
-namespace NYql::NPureCalc {
-
-namespace {
-
-NCommon::IMkqlCallableCompiler::TCompiler MakeSelfCallableCompiler() {
- return [](const TExprNode& node, NCommon::TMkqlBuildContext& ctx) {
- MKQL_ENSURE(node.ChildrenSize() == 1, "Self takes exactly 1 argument");
- const auto* argument = node.Child(0);
- MKQL_ENSURE(argument->IsAtom(), "Self argument must be atom");
- ui32 inputIndex = 0;
- MKQL_ENSURE(TryFromString(argument->Content(), inputIndex), "Self argument must be UI32");
- auto type = NCommon::BuildType(node, *node.GetTypeAnn(), ctx.ProgramBuilder);
- NKikimr::NMiniKQL::TCallableBuilder call(ctx.ProgramBuilder.GetTypeEnvironment(), node.Content(), type);
- call.Add(ctx.ProgramBuilder.NewDataLiteral<ui32>(inputIndex));
- return NKikimr::NMiniKQL::TRuntimeNode(call.Build(), false);
- };
-}
-
-NCommon::IMkqlCallableCompiler::TCompiler MakeFilePathCallableCompiler(const TUserDataTable& userData) {
- return [&](const TExprNode& node, NCommon::TMkqlBuildContext& ctx) {
- const TString name(node.Child(0)->Content());
- auto block = TUserDataStorage::FindUserDataBlock(userData, TUserDataKey::File(name));
- if (!block) {
- auto blockKey = TUserDataKey::File(GetDefaultFilePrefix() + name);
- block = TUserDataStorage::FindUserDataBlock(userData, blockKey);
- }
- MKQL_ENSURE(block, "file not found: " << name);
- MKQL_ENSURE(block->Type == EUserDataType::PATH,
- "FilePath not supported for non-filesystem user data, name: "
- << name << ", block type: " << block->Type);
- return ctx.ProgramBuilder.NewDataLiteral<NKikimr::NUdf::EDataSlot::String>(block->Data);
- };
-}
-
-NCommon::IMkqlCallableCompiler::TCompiler MakeFileContentCallableCompiler(const TUserDataTable& userData) {
- return [&](const TExprNode& node, NCommon::TMkqlBuildContext& ctx) {
- const TString name(node.Child(0)->Content());
- auto block = TUserDataStorage::FindUserDataBlock(userData, TUserDataKey::File(name));
- if (!block) {
- auto blockKey = TUserDataKey::File(GetDefaultFilePrefix() + name);
- block = TUserDataStorage::FindUserDataBlock(userData, blockKey);
- }
- MKQL_ENSURE(block, "file not found: " << name);
- if (block->Type == EUserDataType::PATH) {
- auto content = TFileInput(block->Data).ReadAll();
- return ctx.ProgramBuilder.NewDataLiteral<NKikimr::NUdf::EDataSlot::String>(content);
- } else if (block->Type == EUserDataType::RAW_INLINE_DATA) {
- return ctx.ProgramBuilder.NewDataLiteral<NKikimr::NUdf::EDataSlot::String>(block->Data);
- } else {
- // TODO support EUserDataType::URL
- MKQL_ENSURE(false, "user data blocks of type URL are not supported by FileContent: " << name);
- Y_UNREACHABLE();
- }
- };
-}
-
-NCommon::IMkqlCallableCompiler::TCompiler MakeFolderPathCallableCompiler(const TUserDataTable& userData) {
- return [&](const TExprNode& node, NCommon::TMkqlBuildContext& ctx) {
- const TString name(node.Child(0)->Content());
- auto folderName = TUserDataStorage::MakeFolderName(name);
- TMaybe<TString> folderPath;
- for (const auto& x : userData) {
- if (!x.first.Alias().StartsWith(folderName)) {
- continue;
- }
-
- MKQL_ENSURE(x.second.Type == EUserDataType::PATH,
- "FilePath not supported for non-file data block, name: "
- << x.first.Alias() << ", block type: " << x.second.Type);
-
- auto pathPrefixLength = x.second.Data.size() - (x.first.Alias().size() - folderName.size());
- auto newFolderPath = x.second.Data.substr(0, pathPrefixLength);
- if (!folderPath) {
- folderPath = newFolderPath;
- } else {
- MKQL_ENSURE(*folderPath == newFolderPath,
- "file " << x.second.Data << " is out of directory " << *folderPath);
- }
- }
- return ctx.ProgramBuilder.NewDataLiteral<NKikimr::NUdf::EDataSlot::String>(*folderPath);
- };
-}
-
-}
-
-NKikimr::NMiniKQL::TRuntimeNode CompileMkql(const TExprNode::TPtr& exprRoot, TExprContext& exprCtx,
- const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, const NKikimr::NMiniKQL::TTypeEnvironment& env, const TUserDataTable& userData)
-{
- NCommon::TMkqlCommonCallableCompiler compiler;
-
- compiler.AddCallable(PurecalcInputCallableName, MakeSelfCallableCompiler());
- compiler.AddCallable(PurecalcBlockInputCallableName, MakeSelfCallableCompiler());
- compiler.OverrideCallable("FileContent", MakeFileContentCallableCompiler(userData));
- compiler.OverrideCallable("FilePath", MakeFilePathCallableCompiler(userData));
- compiler.OverrideCallable("FolderPath", MakeFolderPathCallableCompiler(userData));
-
- // Prepare build context
-
- NKikimr::NMiniKQL::TProgramBuilder pgmBuilder(env, funcRegistry);
- NCommon::TMkqlBuildContext buildCtx(compiler, pgmBuilder, exprCtx);
-
- // Build the root MKQL node
-
- return NCommon::MkqlBuildExpr(*exprRoot, buildCtx);
-}
-
-} // NYql::NPureCalc
diff --git a/ydb/library/yql/public/purecalc/common/compile_mkql.h b/ydb/library/yql/public/purecalc/common/compile_mkql.h
deleted file mode 100644
index 488c4d277bf..00000000000
--- a/ydb/library/yql/public/purecalc/common/compile_mkql.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/interface.h>
-#include <yql/essentials/minikql/mkql_node.h>
-#include <yql/essentials/ast/yql_expr.h>
-#include <yql/essentials/core/yql_user_data.h>
-
-namespace NYql {
- namespace NPureCalc {
- /**
- * Compile expr to mkql byte-code
- */
-
- NKikimr::NMiniKQL::TRuntimeNode CompileMkql(const TExprNode::TPtr& exprRoot, TExprContext& exprCtx,
- const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, const NKikimr::NMiniKQL::TTypeEnvironment& env, const TUserDataTable& userData);
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/fwd.cpp b/ydb/library/yql/public/purecalc/common/fwd.cpp
deleted file mode 100644
index 4214b6df83e..00000000000
--- a/ydb/library/yql/public/purecalc/common/fwd.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "fwd.h"
diff --git a/ydb/library/yql/public/purecalc/common/fwd.h b/ydb/library/yql/public/purecalc/common/fwd.h
deleted file mode 100644
index 22df90a6b29..00000000000
--- a/ydb/library/yql/public/purecalc/common/fwd.h
+++ /dev/null
@@ -1,56 +0,0 @@
-#pragma once
-
-#include <util/generic/fwd.h>
-#include <memory>
-
-namespace NYql::NPureCalc {
- class TCompileError;
-
- template <typename>
- class IConsumer;
-
- template <typename>
- class IStream;
-
- class IProgramFactory;
-
- class IWorkerFactory;
-
- class IPullStreamWorkerFactory;
-
- class IPullListWorkerFactory;
-
- class IPushStreamWorkerFactory;
-
- class IWorker;
-
- class IPullStreamWorker;
-
- class IPullListWorker;
-
- class IPushStreamWorker;
-
- class TInputSpecBase;
-
- class TOutputSpecBase;
-
- class IProgram;
-
- template <typename, typename, typename>
- class TProgramCommon;
-
- template <typename, typename>
- class TPullStreamProgram;
-
- template <typename, typename>
- class TPullListProgram;
-
- template <typename, typename>
- class TPushStreamProgram;
-
- using IProgramFactoryPtr = TIntrusivePtr<IProgramFactory>;
- using IWorkerFactoryPtr = std::shared_ptr<IWorkerFactory>;
- using IPullStreamWorkerFactoryPtr = std::shared_ptr<IPullStreamWorkerFactory>;
- using IPullListWorkerFactoryPtr = std::shared_ptr<IPullListWorkerFactory>;
- using IPushStreamWorkerFactoryPtr = std::shared_ptr<IPushStreamWorkerFactory>;
-}
diff --git a/ydb/library/yql/public/purecalc/common/inspect_input.cpp b/ydb/library/yql/public/purecalc/common/inspect_input.cpp
deleted file mode 100644
index 9ca56da5dec..00000000000
--- a/ydb/library/yql/public/purecalc/common/inspect_input.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "inspect_input.h"
-
-#include <yql/essentials/core/yql_expr_type_annotation.h>
-
-namespace NYql::NPureCalc {
- bool TryFetchInputIndexFromSelf(const TExprNode& node, TExprContext& ctx, ui32 inputsCount, ui32& result) {
- TIssueScopeGuard issueSope(ctx.IssueManager, [&]() {
- return MakeIntrusive<TIssue>(ctx.GetPosition(node.Pos()), TStringBuilder() << "At function: " << node.Content());
- });
-
- if (!EnsureArgsCount(node, 1, ctx)) {
- return false;
- }
-
- if (!EnsureAtom(*node.Child(0), ctx)) {
- return false;
- }
-
- if (!TryFromString(node.Child(0)->Content(), result)) {
- auto message = TStringBuilder() << "Index " << TString{node.Child(0)->Content()}.Quote() << " isn't UI32";
- ctx.AddError(TIssue(ctx.GetPosition(node.Child(0)->Pos()), std::move(message)));
- return false;
- }
-
- if (result >= inputsCount) {
- auto message = TStringBuilder() << "Invalid input index: " << result << " is out of range [0;" << inputsCount << ")";
- ctx.AddError(TIssue(ctx.GetPosition(node.Child(0)->Pos()), std::move(message)));
- return false;
- }
-
- return true;
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/inspect_input.h b/ydb/library/yql/public/purecalc/common/inspect_input.h
deleted file mode 100644
index 558144865da..00000000000
--- a/ydb/library/yql/public/purecalc/common/inspect_input.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#pragma once
-
-#include <yql/essentials/ast/yql_expr.h>
-
-namespace NYql::NPureCalc {
- bool TryFetchInputIndexFromSelf(const TExprNode&, TExprContext&, ui32, ui32&);
-}
diff --git a/ydb/library/yql/public/purecalc/common/interface.cpp b/ydb/library/yql/public/purecalc/common/interface.cpp
deleted file mode 100644
index c88525a76f4..00000000000
--- a/ydb/library/yql/public/purecalc/common/interface.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-#include "interface.h"
-
-#include <yql/essentials/providers/common/codec/yql_codec_type_flags.h>
-#include <ydb/library/yql/public/purecalc/common/logger_init.h>
-#include <ydb/library/yql/public/purecalc/common/program_factory.h>
-
-using namespace NYql;
-using namespace NYql::NPureCalc;
-
-TLoggingOptions::TLoggingOptions()
- : LogLevel_(ELogPriority::TLOG_ERR)
- , LogDestination(&Clog)
-{
-}
-
-TLoggingOptions& TLoggingOptions::SetLogLevel(ELogPriority logLevel) {
- LogLevel_ = logLevel;
- return *this;
-}
-
-TLoggingOptions& TLoggingOptions::SetLogDestination(IOutputStream* logDestination) {
- LogDestination = logDestination;
- return *this;
-}
-
-TProgramFactoryOptions::TProgramFactoryOptions()
- : UdfsDir_("")
- , UserData_()
- , LLVMSettings("OFF")
- , BlockEngineSettings("disable")
- , ExprOutputStream(nullptr)
- , CountersProvider(nullptr)
- , NativeYtTypeFlags(0)
- , UseSystemColumns(false)
- , UseWorkerPool(true)
-{
-}
-
-TProgramFactoryOptions& TProgramFactoryOptions::SetUDFsDir(TStringBuf dir) {
- UdfsDir_ = dir;
- return *this;
-}
-
-TProgramFactoryOptions& TProgramFactoryOptions::AddLibrary(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content) {
- auto& ref = UserData_.emplace_back();
-
- ref.Type_ = NUserData::EType::LIBRARY;
- ref.Disposition_ = disposition;
- ref.Name_ = name;
- ref.Content_ = content;
-
- return *this;
-}
-
-TProgramFactoryOptions& TProgramFactoryOptions::AddFile(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content) {
- auto& ref = UserData_.emplace_back();
-
- ref.Type_ = NUserData::EType::FILE;
- ref.Disposition_ = disposition;
- ref.Name_ = name;
- ref.Content_ = content;
-
- return *this;
-}
-
-TProgramFactoryOptions& TProgramFactoryOptions::AddUDF(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content) {
- auto& ref = UserData_.emplace_back();
-
- ref.Type_ = NUserData::EType::UDF;
- ref.Disposition_ = disposition;
- ref.Name_ = name;
- ref.Content_ = content;
-
- return *this;
-}
-
-TProgramFactoryOptions& TProgramFactoryOptions::SetLLVMSettings(TStringBuf llvm_settings) {
- LLVMSettings = llvm_settings;
- return *this;
-}
-
-TProgramFactoryOptions& TProgramFactoryOptions::SetBlockEngineSettings(TStringBuf blockEngineSettings) {
- BlockEngineSettings = blockEngineSettings;
- return *this;
-}
-
-TProgramFactoryOptions& TProgramFactoryOptions::SetExprOutputStream(IOutputStream* exprOutputStream) {
- ExprOutputStream = exprOutputStream;
- return *this;
-}
-
-TProgramFactoryOptions& TProgramFactoryOptions::SetCountersProvider(NKikimr::NUdf::ICountersProvider* countersProvider) {
- CountersProvider = countersProvider;
- return *this;
-}
-
-TProgramFactoryOptions& TProgramFactoryOptions::SetUseNativeYtTypes(bool useNativeTypes) {
- NativeYtTypeFlags = useNativeTypes ? NTCF_PRODUCTION : NTCF_NONE;
- return *this;
-}
-
-TProgramFactoryOptions& TProgramFactoryOptions::SetNativeYtTypeFlags(ui64 nativeTypeFlags) {
- NativeYtTypeFlags = nativeTypeFlags;
- return *this;
-}
-
-TProgramFactoryOptions& TProgramFactoryOptions::SetDeterministicTimeProviderSeed(TMaybe<ui64> seed) {
- DeterministicTimeProviderSeed = seed;
- return *this;
-}
-
-TProgramFactoryOptions& TProgramFactoryOptions::SetUseSystemColumns(bool useSystemColumns) {
- UseSystemColumns = useSystemColumns;
- return *this;
-}
-
-TProgramFactoryOptions& TProgramFactoryOptions::SetUseWorkerPool(bool useWorkerPool) {
- UseWorkerPool = useWorkerPool;
- return *this;
-}
-
-void NYql::NPureCalc::ConfigureLogging(const TLoggingOptions& options) {
- InitLogging(options);
-}
-
-IProgramFactoryPtr NYql::NPureCalc::MakeProgramFactory(const TProgramFactoryOptions& options) {
- return new TProgramFactory(options);
-}
diff --git a/ydb/library/yql/public/purecalc/common/interface.h b/ydb/library/yql/public/purecalc/common/interface.h
deleted file mode 100644
index 6e56c9aa3f9..00000000000
--- a/ydb/library/yql/public/purecalc/common/interface.h
+++ /dev/null
@@ -1,1180 +0,0 @@
-#pragma once
-
-#include "fwd.h"
-#include "wrappers.h"
-
-#include <yql/essentials/core/user_data/yql_user_data.h>
-
-#include <yql/essentials/public/udf/udf_value.h>
-#include <yql/essentials/public/udf/udf_counter.h>
-#include <yql/essentials/public/udf/udf_registrator.h>
-
-#include <yql/essentials/public/issue/yql_issue.h>
-#include <library/cpp/yson/node/node.h>
-
-#include <library/cpp/logger/priority.h>
-
-#include <util/generic/ptr.h>
-#include <util/generic/maybe.h>
-#include <util/generic/hash_set.h>
-#include <util/generic/string.h>
-#include <util/stream/output.h>
-
-class ITimeProvider;
-
-namespace NKikimr {
- namespace NMiniKQL {
- class TScopedAlloc;
- class IComputationGraph;
- class IFunctionRegistry;
- class TTypeEnvironment;
- class TType;
- class TStructType;
- }
-}
-
-namespace NYql {
- namespace NPureCalc {
- /**
- * SQL or s-expression translation error.
- */
- class TCompileError: public yexception {
- private:
- TString Yql_;
- TString Issues_;
-
- public:
- // TODO: maybe accept an actual list of issues here?
- // See https://a.yandex-team.ru/arc/review/439403/details#comment-778237
- TCompileError(TString yql, TString issues)
- : Yql_(std::move(yql))
- , Issues_(std::move(issues))
- {
- }
-
- public:
- /**
- * Get the sql query which caused the error (if there is one available).
- */
- const TString& GetYql() const {
- return Yql_;
- }
-
- /**
- * Get detailed description for all errors and warnings that happened during sql translation.
- */
- const TString& GetIssues() const {
- return Issues_;
- }
- };
-
- ////////////////////////////////////////////////////////////////////////////////////////////////////
-
- /**
- * A generic input stream of objects.
- */
- template <typename T>
- class IStream {
- public:
- virtual ~IStream() = default;
-
- public:
- /**
- * Pops and returns a next value in the stream. If the stream is finished, should return some sentinel object.
- *
- * Depending on return type, this function may not transfer object ownership to a user.
- * Thus, the stream may manage the returned object * itself.
- * That is, the returned object's lifetime may be bound to the input stream lifetime; it may be destroyed
- * upon calling Fetch() or upon destroying the stream, whichever happens first.
- */
- virtual T Fetch() = 0;
- };
-
- /**
- * Create a new stream which applies the given functor to the elements of the original stream.
- */
- template <typename TOld, typename TNew, typename TFunctor>
- inline THolder<IStream<TNew>> MapStream(THolder<IStream<TOld>> stream, TFunctor functor) {
- return THolder(new NPrivate::TMappingStream<TNew, TOld, TFunctor>(std::move(stream), std::move(functor)));
- };
-
- /**
- * Convert stream of objects into a stream of potentially incompatible objects.
- *
- * This conversion applies static cast to the output of the original stream. Use with caution!
- */
- /// @{
- template <
- typename TNew, typename TOld,
- std::enable_if_t<!std::is_same<TNew, TOld>::value>* = nullptr>
- inline THolder<IStream<TNew>> ConvertStreamUnsafe(THolder<IStream<TOld>> stream) {
- return MapStream<TOld, TNew>(std::move(stream), [](TOld x) -> TNew { return static_cast<TNew>(x); });
- }
- template <typename T>
- inline THolder<IStream<T>> ConvertStreamUnsafe(THolder<IStream<T>> stream) {
- return stream;
- }
- /// @}
-
- /**
- * Convert stream of objects into a stream of compatible objects.
- *
- * Note: each conversion adds one level of indirection so avoid them if possible.
- */
- template <typename TNew, typename TOld, std::enable_if_t<std::is_convertible<TOld, TNew>::value>* = nullptr>
- inline THolder<IStream<TNew>> ConvertStream(THolder<IStream<TOld>> stream) {
- return ConvertStreamUnsafe<TNew, TOld>(std::move(stream));
- }
-
- ////////////////////////////////////////////////////////////////////////////////////////////////////
-
- /**
- * A generic push consumer.
- */
- template <typename T>
- class IConsumer {
- public:
- virtual ~IConsumer() = default;
-
- public:
- /**
- * Feed an object to consumer.
- *
- * Depending on argument type, the consumer may not take ownership of the passed object;
- * in that case it is the caller responsibility to manage the object lifetime after passing it to this method.
- *
- * The passed object can be destroyed after the consumer returns from this function; the consumer should
- * not store pointer to the passed object or the passed object itself without taking all necessary precautions
- * to ensure that the pointer or the object stays valid after returning.
- */
- virtual void OnObject(T) = 0;
-
- /**
- * Close the consumer and run finalization logic. Calling OnObject after calling this function is an error.
- */
- virtual void OnFinish() = 0;
- };
-
- /**
- * Create a new consumer which applies the given functor to objects before .
- */
- template <typename TOld, typename TNew, typename TFunctor>
- inline THolder<IConsumer<TNew>> MapConsumer(THolder<IConsumer<TOld>> stream, TFunctor functor) {
- return THolder(new NPrivate::TMappingConsumer<TNew, TOld, TFunctor>(std::move(stream), std::move(functor)));
- };
-
-
- /**
- * Convert consumer of objects into a consumer of potentially incompatible objects.
- *
- * This conversion applies static cast to the input value. Use with caution.
- */
- /// @{
- template <
- typename TNew, typename TOld,
- std::enable_if_t<!std::is_same<TNew, TOld>::value>* = nullptr>
- inline THolder<IConsumer<TNew>> ConvertConsumerUnsafe(THolder<IConsumer<TOld>> consumer) {
- return MapConsumer<TOld, TNew>(std::move(consumer), [](TNew x) -> TOld { return static_cast<TOld>(x); });
- }
- template <typename T>
- inline THolder<IConsumer<T>> ConvertConsumerUnsafe(THolder<IConsumer<T>> consumer) {
- return consumer;
- }
- /// @}
-
- /**
- * Convert consumer of objects into a consumer of compatible objects.
- *
- * Note: each conversion adds one level of indirection so avoid them if possible.
- */
- template <typename TNew, typename TOld, std::enable_if_t<std::is_convertible<TNew, TOld>::value>* = nullptr>
- inline THolder<IConsumer<TNew>> ConvertConsumer(THolder<IConsumer<TOld>> consumer) {
- return ConvertConsumerUnsafe<TNew, TOld>(std::move(consumer));
- }
-
- /**
- * Create a consumer which holds a non-owning pointer to the given consumer
- * and passes all messages to the latter.
- */
- template <typename T, typename C>
- THolder<NPrivate::TNonOwningConsumer<T, C>> MakeNonOwningConsumer(C consumer) {
- return MakeHolder<NPrivate::TNonOwningConsumer<T, C>>(consumer);
- }
-
- ////////////////////////////////////////////////////////////////////////////////////////////////////
-
- /**
- * Logging options.
- */
- struct TLoggingOptions final {
- public:
- /// Logging level for messages generated during compilation.
- ELogPriority LogLevel_; // TODO: rename to LogLevel
-
- /// Where to write log messages.
- IOutputStream* LogDestination;
-
- public:
- TLoggingOptions();
- /**
- * Set a new logging level.
- *
- * @return reference to self, to allow method chaining.
- */
- TLoggingOptions& SetLogLevel(ELogPriority);
-
- /**
- * Set a new logging destination.
- *
- * @return reference to self, to allow method chaining.
- */
- TLoggingOptions& SetLogDestination(IOutputStream*);
- };
-
- /**
- * General options for program factory.
- */
- struct TProgramFactoryOptions final {
- public:
- /// Path to a directory with compiled UDFs. Leave empty to disable loading external UDFs.
- TString UdfsDir_; // TODO: rename to UDFDir
-
- /// List of available external resources, e.g. files, UDFs, libraries.
- TVector<NUserData::TUserData> UserData_; // TODO: rename to UserData
-
- /// LLVM settings. Assign "OFF" to disable LLVM, empty string for default settings.
- TString LLVMSettings;
-
- /// Block engine settings. Assign "force" to unconditionally enable
- /// it, "disable" for turn it off and "auto" to left the final
- /// decision to the platform heuristics.
- TString BlockEngineSettings;
-
- /// Output stream to dump the compiled and optimized expressions.
- IOutputStream* ExprOutputStream;
-
- /// Provider for generic counters which can be used to export statistics from UDFs.
- NKikimr::NUdf::ICountersProvider* CountersProvider;
-
- /// YT Type V3 flags for Skiff/Yson serialization.
- ui64 NativeYtTypeFlags;
-
- /// Seed for deterministic time provider
- TMaybe<ui64> DeterministicTimeProviderSeed;
-
- /// Use special system columns to support tables naming (supports non empty ``TablePath()``/``TableName()``)
- bool UseSystemColumns;
-
- /// Reuse allocated workers
- bool UseWorkerPool;
-
- public:
- TProgramFactoryOptions();
-
- public:
- /**
- * Set a new path to a directory with UDFs.
- *
- * @return reference to self, to allow method chaining.
- */
- TProgramFactoryOptions& SetUDFsDir(TStringBuf);
-
- /**
- * Add a new library to the UserData list.
- *
- * @param disposition where the resource resides, e.g. on filesystem, in memory, etc.
- * NB: URL disposition is not supported.
- * @param name name of the resource.
- * @param content depending on disposition, either path to the resource or its content.
- * @return reference to self, to allow method chaining.
- */
- TProgramFactoryOptions& AddLibrary(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content);
-
- /**
- * Add a new file to the UserData list.
- *
- * @param disposition where the resource resides, e.g. on filesystem, in memory, etc.
- * NB: URL disposition is not supported.
- * @param name name of the resource.
- * @param content depending on disposition, either path to the resource or its content.
- * @return reference to self, to allow method chaining.
- */
- TProgramFactoryOptions& AddFile(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content);
-
- /**
- * Add a new UDF to the UserData list.
- *
- * @param disposition where the resource resides, e.g. on filesystem, in memory, etc.
- * NB: URL disposition is not supported.
- * @param name name of the resource.
- * @param content depending on disposition, either path to the resource or its content.
- * @return reference to self, to allow method chaining.
- */
- TProgramFactoryOptions& AddUDF(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content);
-
- /**
- * Set new LLVM settings.
- *
- * @return reference to self, to allow method chaining.
- */
- TProgramFactoryOptions& SetLLVMSettings(TStringBuf llvm_settings);
-
- /**
- * Set new block engine settings.
- *
- * @return reference to self, to allow method chaining.
- */
- TProgramFactoryOptions& SetBlockEngineSettings(TStringBuf blockEngineSettings);
-
- /**
- * Set the stream to dump the compiled and optimized expressions.
- *
- * @return reference to self, to allow method chaining.
- */
- TProgramFactoryOptions& SetExprOutputStream(IOutputStream* exprOutputStream);
-
- /**
- * Set new counters provider. Passed pointer should stay alive for as long as the processor factory
- * stays alive.
- *
- * @return reference to self, to allow method chaining.
- */
- TProgramFactoryOptions& SetCountersProvider(NKikimr::NUdf::ICountersProvider* countersProvider);
-
- /**
- * Set new YT Type V3 mode. Deprecated method. Use SetNativeYtTypeFlags instead
- *
- * @return reference to self, to allow method chaining.
- */
- TProgramFactoryOptions& SetUseNativeYtTypes(bool useNativeTypes);
-
- /**
- * Set YT Type V3 flags.
- *
- * @return reference to self, to allow method chaining.
- */
- TProgramFactoryOptions& SetNativeYtTypeFlags(ui64 nativeTypeFlags);
-
- /**
- * Set seed for deterministic time provider.
- *
- * @return reference to self, to allow method chaining.
- */
- TProgramFactoryOptions& SetDeterministicTimeProviderSeed(TMaybe<ui64> seed);
-
- /**
- * Set new flag whether to allow using system columns or not.
- *
- * @return reference to self, to allow method chaining.
- */
- TProgramFactoryOptions& SetUseSystemColumns(bool useSystemColumns);
-
- /**
- * Set new flag whether to allow reusing workers or not.
- *
- * @return reference to self, to allow method chaining.
- */
- TProgramFactoryOptions& SetUseWorkerPool(bool useWorkerPool);
- };
-
- ////////////////////////////////////////////////////////////////////////////////////////////////////
-
- /**
- * What exactly are we parsing: SQL or an s-expression.
- */
- enum class ETranslationMode {
- SQL /* "SQL" */,
- SExpr /* "s-expression" */,
- Mkql /* "mkql" */,
- PG /* PostgreSQL */
- };
-
- /**
- * A facility for compiling sql and s-expressions and making programs from them.
- */
- class IProgramFactory: public TThrRefBase {
- protected:
- virtual IPullStreamWorkerFactoryPtr MakePullStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0;
- virtual IPullListWorkerFactoryPtr MakePullListWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0;
- virtual IPushStreamWorkerFactoryPtr MakePushStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0;
-
- public:
- /**
- * Add new udf module. It's not specified whether adding new modules will affect existing programs
- * (theoretical answer is 'no').
- */
- virtual void AddUdfModule(const TStringBuf&, NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&&) = 0;
- // TODO: support setting udf modules via factory options.
-
- /**
- * Set new counters provider, override one that was specified via factory options. Note that existing
- * programs will still reference the previous provider.
- */
- virtual void SetCountersProvider(NKikimr::NUdf::ICountersProvider*) = 0;
- // TODO: support setting providers via factory options.
-
- template <typename TInputSpec, typename TOutputSpec>
- THolder<TPullStreamProgram<TInputSpec, TOutputSpec>> MakePullStreamProgram(
- TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1
- ) {
- auto workerFactory = MakePullStreamWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion);
- return MakeHolder<TPullStreamProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory);
- }
-
- template <typename TInputSpec, typename TOutputSpec>
- THolder<TPullListProgram<TInputSpec, TOutputSpec>> MakePullListProgram(
- TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1
- ) {
- auto workerFactory = MakePullListWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion);
- return MakeHolder<TPullListProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory);
- }
-
- template <typename TInputSpec, typename TOutputSpec>
- THolder<TPushStreamProgram<TInputSpec, TOutputSpec>> MakePushStreamProgram(
- TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1
- ) {
- auto workerFactory = MakePushStreamWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion);
- return MakeHolder<TPushStreamProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory);
- }
- };
-
- ////////////////////////////////////////////////////////////////////////////////////////////////////
-
- /**
- * A facility for creating workers. Despite being a part of a public API, worker factory is not used directly.
- */
- class IWorkerFactory: public std::enable_shared_from_this<IWorkerFactory> {
- public:
- virtual ~IWorkerFactory() = default;
- /**
- * Get input column names for specified input that are actually used in the query.
- */
- virtual const THashSet<TString>& GetUsedColumns(ui32) const = 0;
- /**
- * Overload for single-input programs.
- */
- virtual const THashSet<TString>& GetUsedColumns() const = 0;
-
- /**
- * Make input type schema for specified input as deduced by program optimizer. This schema is equivalent
- * to one provided by input spec up to the order of the fields in structures.
- */
- virtual NYT::TNode MakeInputSchema(ui32) const = 0;
- /**
- * Overload for single-input programs.
- */
- virtual NYT::TNode MakeInputSchema() const = 0;
-
- /**
- * Make output type schema as deduced by program optimizer. If output spec provides its own schema, than
- * this schema is equivalent to one provided by output spec up to the order of the fields in structures.
- */
- /// @{
- /**
- * Overload for single-table output programs (i.e. output type is struct).
- */
- virtual NYT::TNode MakeOutputSchema() const = 0;
- /**
- * Overload for multi-table output programs (i.e. output type is variant over tuple).
- */
- virtual NYT::TNode MakeOutputSchema(ui32) const = 0;
- /**
- * Overload for multi-table output programs (i.e. output type is variant over struct).
- */
- virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0;
- /// @}
-
- /**
- * Make full output schema. For single-output programs returns struct type, for multi-output programs
- * returns variant type.
- *
- * Warning: calling this function may result in extended memory usage for large number of output tables.
- */
- virtual NYT::TNode MakeFullOutputSchema() const = 0;
-
- /**
- * Get compilation issues
- */
- virtual TIssues GetIssues() const = 0;
-
- /**
- * Get precompiled mkql program
- */
- virtual TString GetCompiledProgram() = 0;
-
- /**
- * Return a worker to the factory for possible reuse
- */
- virtual void ReturnWorker(IWorker* worker) = 0;
- };
-
- class TReleaseWorker {
- public:
- template <class T>
- static inline void Destroy(T* t) noexcept {
- t->Release();
- }
- };
-
- template <class T>
- using TWorkerHolder = THolder<T, TReleaseWorker>;
-
- /**
- * Factory for generating pull stream workers.
- */
- class IPullStreamWorkerFactory: public IWorkerFactory {
- public:
- /**
- * Create a new pull stream worker.
- */
- virtual TWorkerHolder<IPullStreamWorker> MakeWorker() = 0;
- };
-
- /**
- * Factory for generating pull list workers.
- */
- class IPullListWorkerFactory: public IWorkerFactory {
- public:
- /**
- * Create a new pull list worker.
- */
- virtual TWorkerHolder<IPullListWorker> MakeWorker() = 0;
- };
-
- /**
- * Factory for generating push stream workers.
- */
- class IPushStreamWorkerFactory: public IWorkerFactory {
- public:
- /**
- * Create a new push stream worker.
- */
- virtual TWorkerHolder<IPushStreamWorker> MakeWorker() = 0;
- };
-
- ////////////////////////////////////////////////////////////////////////////////////////////////////
-
- /**
- * Worker is a central part of any program instance. It contains current computation state
- * (called computation graph) and objects required to work with it, including an allocator for unboxed values.
- *
- * Usually, users do not interact with workers directly. They use program instance entry points such as streams
- * and consumers instead. The only case when one would have to to interact with workers is when implementing
- * custom io-specification.
- */
- class IWorker {
- protected:
- friend class TReleaseWorker;
- /**
- * Cleanup the worker and return to a worker factory for reuse
- */
- virtual void Release() = 0;
-
- public:
- virtual ~IWorker() = default;
-
- public:
- /**
- * Number of inputs for this program.
- */
- virtual ui32 GetInputsCount() const = 0;
-
- /**
- * MiniKQL input struct type of specified input for this program. Type is equivalent to the deduced input
- * schema (see IWorker::MakeInputSchema())
- *
- * If ``original`` is set to ``true``, returns type without virtual system columns.
- */
- virtual const NKikimr::NMiniKQL::TStructType* GetInputType(ui32, bool original = false) const = 0;
- /**
- * Overload for single-input programs.
- */
- virtual const NKikimr::NMiniKQL::TStructType* GetInputType(bool original = false) const = 0;
-
- /**
- * MiniKQL input struct type of the specified input for this program.
- * The returned type is the actual type of the specified input node.
- */
- virtual const NKikimr::NMiniKQL::TStructType* GetRawInputType(ui32) const = 0;
- /**
- * Overload for single-input programs.
- */
- virtual const NKikimr::NMiniKQL::TStructType* GetRawInputType() const = 0;
-
- /**
- * MiniKQL output struct type for this program. The returned type is equivalent to the deduced output
- * schema (see IWorker::MakeFullOutputSchema()).
- */
- virtual const NKikimr::NMiniKQL::TType* GetOutputType() const = 0;
-
- /**
- * MiniKQL output struct type for this program. The returned type is
- * the actual type of the root node.
- */
- virtual const NKikimr::NMiniKQL::TType* GetRawOutputType() const = 0;
-
- /**
- * Make input type schema for specified input as deduced by program optimizer. This schema is equivalent
- * to one provided by input spec up to the order of the fields in structures.
- */
- virtual NYT::TNode MakeInputSchema(ui32) const = 0;
- /**
- * Overload for single-input programs.
- */
- virtual NYT::TNode MakeInputSchema() const = 0;
-
- /**
- * Make output type schema as deduced by program optimizer. If output spec provides its own schema, than
- * this schema is equivalent to one provided by output spec up to the order of the fields in structures.
- */
- /// @{
- /**
- * Overload for single-table output programs (i.e. output type is struct).
- */
- virtual NYT::TNode MakeOutputSchema() const = 0;
- /**
- * Overload for multi-table output programs (i.e. output type is variant over tuple).
- */
- virtual NYT::TNode MakeOutputSchema(ui32) const = 0;
- /**
- * Overload for multi-table output programs (i.e. output type is variant over struct).
- */
- virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0;
- /// @}
-
- /**
- * Generates full output schema. For single-output programs returns struct type, for multi-output programs
- * returns variant type.
- *
- * Warning: calling this function may result in extended memory usage for large number of output tables.
- */
- virtual NYT::TNode MakeFullOutputSchema() const = 0;
-
- /**
- * Get scoped alloc used in this worker.
- */
- virtual NKikimr::NMiniKQL::TScopedAlloc& GetScopedAlloc() = 0;
-
- /**
- * Get computation graph.
- */
- virtual NKikimr::NMiniKQL::IComputationGraph& GetGraph() = 0;
-
- /**
- * Get function registry for this worker.
- */
- virtual const NKikimr::NMiniKQL::IFunctionRegistry& GetFunctionRegistry() const = 0;
-
- /**
- * Get type environment for this worker.
- */
- virtual NKikimr::NMiniKQL::TTypeEnvironment& GetTypeEnvironment() = 0;
-
- /**
- * Get llvm settings for this worker.
- */
- virtual const TString& GetLLVMSettings() const = 0;
-
- /**
- * Get YT Type V3 flags
- */
- virtual ui64 GetNativeYtTypeFlags() const = 0;
-
- /**
- * Get time provider
- */
- virtual ITimeProvider* GetTimeProvider() const = 0;
- };
-
- /**
- * Worker which operates in pull stream mode.
- */
- class IPullStreamWorker: public IWorker {
- public:
- /**
- * Set input computation graph node for specified input. The passed unboxed value should be a stream of
- * structs. It should be created via the allocator associated with this very worker.
- * This function can only be called once for each input.
- */
- virtual void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) = 0;
-
- /**
- * Get the output computation graph node. The returned node will be a stream of structs or variants.
- * This function cannot be called before setting an input value.
- */
- virtual NKikimr::NUdf::TUnboxedValue& GetOutput() = 0;
- };
-
- /**
- * Worker which operates in pull list mode.
- */
- class IPullListWorker: public IWorker {
- public:
- /**
- * Set input computation graph node for specified input. The passed unboxed value should be a list of
- * structs. It should be created via the allocator associated with this very worker.
- * This function can only be called once for each index.
- */
- virtual void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) = 0;
-
- /**
- * Get the output computation graph node. The returned node will be a list of structs or variants.
- * This function cannot be called before setting an input value.
- */
- virtual NKikimr::NUdf::TUnboxedValue& GetOutput() = 0;
-
- /**
- * Get iterator over the output list.
- */
- virtual NKikimr::NUdf::TUnboxedValue& GetOutputIterator() = 0;
-
- /**
- * Reset iterator to the beginning of the output list. After calling this function, GetOutputIterator()
- * will return a fresh iterator; all previously returned iterators will become invalid.
- */
- virtual void ResetOutputIterator() = 0;
- };
-
- /**
- * Worker which operates in push stream mode.
- */
- class IPushStreamWorker: public IWorker {
- public:
- /**
- * Set a consumer where the worker will relay its output. This function can only be called once.
- */
- virtual void SetConsumer(THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>>) = 0;
-
- /**
- * Push new value to the graph, than feed all new output to the consumer. Values cannot be pushed before
- * assigning a consumer.
- */
- virtual void Push(NKikimr::NUdf::TUnboxedValue&&) = 0;
-
- /**
- * Send finish event and clear the computation graph. No new values will be accepted.
- */
- virtual void OnFinish() = 0;
- };
-
- ////////////////////////////////////////////////////////////////////////////////////////////////////
-
- /**
- * Input specifications describe format for program input. They carry information about input data schema
- * as well as the knowledge about how to convert input structures into unboxed values (data format which can be
- * processed by the YQL runtime).
- *
- * Input spec defines the arguments of the program's Apply method. For example, a program
- * with the protobuf input spec will accept a stream of protobuf messages while a program with the
- * yson spec will accept an input stream (binary or text one).
- *
- * See documentation for input and output spec traits for hints on how to implement a custom specs.
- */
- class TInputSpecBase {
- protected:
- mutable TVector<THashMap<TString, NYT::TNode>> AllVirtualColumns_;
-
- public:
- virtual ~TInputSpecBase() = default;
-
- public:
- /**
- * Get input data schemas in YQL format (NB: not a YT format). Each item of the returned vector must
- * describe a structure.
- *
- * Format of each item is approximately this one:
- *
- * @code
- * [
- * 'StructType',
- * [
- * ["Field1Name", ["DataType", "Int32"]],
- * ["Field2Name", ["DataType", "String"]],
- * ...
- * ]
- * ]
- * @endcode
- */
- virtual const TVector<NYT::TNode>& GetSchemas() const = 0;
- // TODO: make a neat schema builder
-
- /**
- * Get virtual columns for each input.
- *
- * Key of each mapping is column name, value is data schema in YQL format.
- */
- const TVector<THashMap<TString, NYT::TNode>>& GetAllVirtualColumns() const {
- if (AllVirtualColumns_.empty()) {
- AllVirtualColumns_ = TVector<THashMap<TString, NYT::TNode>>(GetSchemas().size());
- }
-
- return AllVirtualColumns_;
- }
-
- virtual bool ProvidesBlocks() const { return false; }
- };
-
- /**
- * Output specifications describe format for program output. Like input specifications, they cary knowledge
- * about program output type and how to convert unboxed values into that type.
- */
- class TOutputSpecBase {
- private:
- TMaybe<THashSet<TString>> OutputColumnsFilter_;
-
- public:
- virtual ~TOutputSpecBase() = default;
-
- public:
- /**
- * Get output data schema in YQL format (NB: not a YT format). The returned value must describe a structure
- * or a variant made of structures for fulti-table outputs (note: not all specs support multi-table output).
- *
- * See docs for the input spec's GetSchemas().
- *
- * Also TNode entity could be returned (NYT::TNode::CreateEntity()),
- * in which case output schema would be inferred from query and could be
- * obtained by Program::GetOutputSchema() call.
- */
- virtual const NYT::TNode& GetSchema() const = 0;
-
- /**
- * Get an output columns filter.
- *
- * Output columns filter is a set of column names that should be left in the output. All columns that are
- * not in this set will not be calculated. Depending on the output schema, they will be either removed
- * completely (for optional columns) or filled with defaults (for required columns).
- */
- const TMaybe<THashSet<TString>>& GetOutputColumnsFilter() const {
- return OutputColumnsFilter_;
- }
-
- /**
- * Set new output columns filter.
- */
- void SetOutputColumnsFilter(const TMaybe<THashSet<TString>>& outputColumnsFilter) {
- OutputColumnsFilter_ = outputColumnsFilter;
- }
-
- virtual bool AcceptsBlocks() const { return false; }
- };
-
- ////////////////////////////////////////////////////////////////////////////////////////////////////
-
- /**
- * Input spec traits provide information on how to process program input.
- *
- * Each input spec should create a template specialization for this class, in which it should provide several
- * static variables and functions.
- *
- * For example, a hypothetical example of implementing a JSON input spec would look like this:
- *
- * @code
- * class TJsonInputSpec: public TInputSpecBase {
- * // whatever magic you require for this spec
- * };
- *
- * template <>
- * class TInputSpecTraits<TJsonInputSpec> {
- * // write here four constants, one typedef and three static functions described below
- * };
- * @endcode
- *
- * @tparam T input spec type.
- */
- template <typename T>
- struct TInputSpecTraits {
- /// Safety flag which should be set to false in all template specializations of this class. Attempt to
- /// build a program using a spec with `IsPartial=true` will result in compilation error.
- static const constexpr bool IsPartial = true;
-
- /// Indicates whether this spec supports pull stream mode.
- static const constexpr bool SupportPullStreamMode = false;
- /// Indicates whether this spec supports pull list mode.
- static const constexpr bool SupportPullListMode = false;
- /// Indicates whether this spec supports push stream mode.
- static const constexpr bool SupportPushStreamMode = false;
-
- /// For push mode, indicates the return type of the builder's Process function.
- using TConsumerType = void;
-
- /// For pull stream mode, should take an input spec, a pull stream worker and whatever the user passed
- /// to the program's Apply function, create an unboxed values with a custom stream implementations
- /// and pass it to the worker's SetInput function for each input.
- template <typename ...A>
- static void PreparePullStreamWorker(const T&, IPullStreamWorker*, A&&...) {
- Y_UNREACHABLE();
- }
-
- /// For pull list mode, should take an input spec, a pull list worker and whatever the user passed
- /// to the program's Apply function, create an unboxed values with a custom list implementations
- /// and pass it to the worker's SetInput function for each input.
- template <typename ...A>
- static void PreparePullListWorker(const T&, IPullListWorker*, A&&...) {
- Y_UNREACHABLE();
- }
-
- /// For push stream mode, should take an input spec and a worker and create a consumer which will
- /// be returned to the user. The consumer should keep the worker alive until its own destruction.
- /// The return type of this function should exactly match the one defined in ConsumerType typedef.
- static TConsumerType MakeConsumer(const T&, TWorkerHolder<IPushStreamWorker>) {
- Y_UNREACHABLE();
- }
- };
-
- /**
- * Output spec traits provide information on how to process program output. Like with input specs, each output
- * spec requires an appropriate template specialization of this class.
- *
- * @tparam T output spec type.
- */
- template <typename T>
- struct TOutputSpecTraits {
- /// Safety flag which should be set to false in all template specializations of this class. Attempt to
- /// build a program using a spec with `IsPartial=false` will result in compilation error.
- static const constexpr bool IsPartial = true;
-
- /// Indicates whether this spec supports pull stream mode.
- static const constexpr bool SupportPullStreamMode = false;
- /// Indicates whether this spec supports pull list mode.
- static const constexpr bool SupportPullListMode = false;
- /// Indicates whether this spec supports push stream mode.
- static const constexpr bool SupportPushStreamMode = false;
-
- /// For pull stream mode, indicates the return type of the program's Apply function.
- using TPullStreamReturnType = void;
-
- /// For pull list mode, indicates the return type of the program's Apply function.
- using TPullListReturnType = void;
-
- /// For pull stream mode, should take an output spec and a worker and build a stream which will be returned
- /// to the user. The return type of this function must match the one specified in the PullStreamReturnType.
- static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const T&, TWorkerHolder<IPullStreamWorker>) {
- Y_UNREACHABLE();
- }
-
- /// For pull list mode, should take an output spec and a worker and build a list which will be returned
- /// to the user. The return type of this function must match the one specified in the PullListReturnType.
- static TPullListReturnType ConvertPullListWorkerToOutputType(const T&, TWorkerHolder<IPullListWorker>) {
- Y_UNREACHABLE();
- }
-
- /// For push stream mode, should take an output spec, a worker and whatever arguments the user passed
- /// to the program's Apply function, create a consumer for unboxed values and pass it to the worker's
- /// SetConsumer function.
- template <typename ...A>
- static void SetConsumerToWorker(const T&, IPushStreamWorker*, A&&...) {
- Y_UNREACHABLE();
- }
- };
-
- ////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#define NOT_SPEC_MSG(spec_type) "passed class should be derived from " spec_type " spec base"
-#define PARTIAL_SPEC_MSG(spec_type) "this " spec_type " spec does not define its traits. Make sure you've passed " \
- "an " spec_type " spec and not some other object; also make sure you've included " \
- "all necessary headers. If you're developing a spec, make sure you have " \
- "a spec traits template specialization"
-#define UNSUPPORTED_MODE_MSG(spec_type, mode) "this " spec_type " spec does not support " mode " mode"
-
- class IProgram {
- public:
- virtual ~IProgram() = default;
-
- public:
- virtual const TInputSpecBase& GetInputSpecBase() const = 0;
- virtual const TOutputSpecBase& GetOutputSpecBase() const = 0;
- virtual const THashSet<TString>& GetUsedColumns(ui32) const = 0;
- virtual const THashSet<TString>& GetUsedColumns() const = 0;
- virtual NYT::TNode MakeInputSchema(ui32) const = 0;
- virtual NYT::TNode MakeInputSchema() const = 0;
- virtual NYT::TNode MakeOutputSchema() const = 0;
- virtual NYT::TNode MakeOutputSchema(ui32) const = 0;
- virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0;
- virtual NYT::TNode MakeFullOutputSchema() const = 0;
- virtual TIssues GetIssues() const = 0;
- virtual TString GetCompiledProgram() = 0;
-
- inline void MergeUsedColumns(THashSet<TString>& columns, ui32 inputIndex) {
- const auto& usedColumns = GetUsedColumns(inputIndex);
- columns.insert(usedColumns.begin(), usedColumns.end());
- }
-
- inline void MergeUsedColumns(THashSet<TString>& columns) {
- const auto& usedColumns = GetUsedColumns();
- columns.insert(usedColumns.begin(), usedColumns.end());
- }
- };
-
- template <typename TInputSpec, typename TOutputSpec, typename WorkerFactory>
- class TProgramCommon: public IProgram {
- static_assert(std::is_base_of<TInputSpecBase, TInputSpec>::value, NOT_SPEC_MSG("input"));
- static_assert(std::is_base_of<TOutputSpecBase, TOutputSpec>::value, NOT_SPEC_MSG("output"));
-
- protected:
- TInputSpec InputSpec_;
- TOutputSpec OutputSpec_;
- std::shared_ptr<WorkerFactory> WorkerFactory_;
-
- public:
- explicit TProgramCommon(
- TInputSpec inputSpec,
- TOutputSpec outputSpec,
- std::shared_ptr<WorkerFactory> workerFactory
- )
- : InputSpec_(inputSpec)
- , OutputSpec_(outputSpec)
- , WorkerFactory_(std::move(workerFactory))
- {
- }
-
- public:
- const TInputSpec& GetInputSpec() const {
- return InputSpec_;
- }
-
- const TOutputSpec& GetOutputSpec() const {
- return OutputSpec_;
- }
-
- const TInputSpecBase& GetInputSpecBase() const override {
- return InputSpec_;
- }
-
- const TOutputSpecBase& GetOutputSpecBase() const override {
- return OutputSpec_;
- }
-
- const THashSet<TString>& GetUsedColumns(ui32 inputIndex) const override {
- return WorkerFactory_->GetUsedColumns(inputIndex);
- }
-
- const THashSet<TString>& GetUsedColumns() const override {
- return WorkerFactory_->GetUsedColumns();
- }
-
- NYT::TNode MakeInputSchema(ui32 inputIndex) const override {
- return WorkerFactory_->MakeInputSchema(inputIndex);
- }
-
- NYT::TNode MakeInputSchema() const override {
- return WorkerFactory_->MakeInputSchema();
- }
-
- NYT::TNode MakeOutputSchema() const override {
- return WorkerFactory_->MakeOutputSchema();
- }
-
- NYT::TNode MakeOutputSchema(ui32 outputIndex) const override {
- return WorkerFactory_->MakeOutputSchema(outputIndex);
- }
-
- NYT::TNode MakeOutputSchema(TStringBuf outputName) const override {
- return WorkerFactory_->MakeOutputSchema(outputName);
- }
-
- NYT::TNode MakeFullOutputSchema() const override {
- return WorkerFactory_->MakeFullOutputSchema();
- }
-
- TIssues GetIssues() const override {
- return WorkerFactory_->GetIssues();
- }
-
- TString GetCompiledProgram() override {
- return WorkerFactory_->GetCompiledProgram();
- }
- };
-
- template <typename TInputSpec, typename TOutputSpec>
- class TPullStreamProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory> {
- using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::WorkerFactory_;
- using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::InputSpec_;
- using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::OutputSpec_;
-
- public:
- using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::TProgramCommon;
-
- public:
- template <typename ...T>
- typename TOutputSpecTraits<TOutputSpec>::TPullStreamReturnType Apply(T&& ... t) {
- static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input"));
- static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output"));
- static_assert(TInputSpecTraits<TInputSpec>::SupportPullStreamMode, UNSUPPORTED_MODE_MSG("input", "pull stream"));
- static_assert(TOutputSpecTraits<TOutputSpec>::SupportPullStreamMode, UNSUPPORTED_MODE_MSG("output", "pull stream"));
-
- auto worker = WorkerFactory_->MakeWorker();
- TInputSpecTraits<TInputSpec>::PreparePullStreamWorker(InputSpec_, worker.Get(), std::forward<T>(t)...);
- return TOutputSpecTraits<TOutputSpec>::ConvertPullStreamWorkerToOutputType(OutputSpec_, std::move(worker));
- }
- };
-
- template <typename TInputSpec, typename TOutputSpec>
- class TPullListProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory> {
- using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::WorkerFactory_;
- using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::InputSpec_;
- using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::OutputSpec_;
-
- public:
- using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::TProgramCommon;
-
- public:
- template <typename ...T>
- typename TOutputSpecTraits<TOutputSpec>::TPullListReturnType Apply(T&& ... t) {
- static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input"));
- static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output"));
- static_assert(TInputSpecTraits<TInputSpec>::SupportPullListMode, UNSUPPORTED_MODE_MSG("input", "pull list"));
- static_assert(TOutputSpecTraits<TOutputSpec>::SupportPullListMode, UNSUPPORTED_MODE_MSG("output", "pull list"));
-
- auto worker = WorkerFactory_->MakeWorker();
- TInputSpecTraits<TInputSpec>::PreparePullListWorker(InputSpec_, worker.Get(), std::forward<T>(t)...);
- return TOutputSpecTraits<TOutputSpec>::ConvertPullListWorkerToOutputType(OutputSpec_, std::move(worker));
- }
- };
-
- template <typename TInputSpec, typename TOutputSpec>
- class TPushStreamProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory> {
- using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::WorkerFactory_;
- using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::InputSpec_;
- using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::OutputSpec_;
-
- public:
- using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::TProgramCommon;
-
- public:
- template <typename ...T>
- typename TInputSpecTraits<TInputSpec>::TConsumerType Apply(T&& ... t) {
- static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input"));
- static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output"));
- static_assert(TInputSpecTraits<TInputSpec>::SupportPushStreamMode, UNSUPPORTED_MODE_MSG("input", "push stream"));
- static_assert(TOutputSpecTraits<TOutputSpec>::SupportPushStreamMode, UNSUPPORTED_MODE_MSG("output", "push stream"));
-
- auto worker = WorkerFactory_->MakeWorker();
- TOutputSpecTraits<TOutputSpec>::SetConsumerToWorker(OutputSpec_, worker.Get(), std::forward<T>(t)...);
- return TInputSpecTraits<TInputSpec>::MakeConsumer(InputSpec_, std::move(worker));
- }
- };
-
-#undef NOT_SPEC_MSG
-#undef PARTIAL_SPEC_MSG
-#undef UNSUPPORTED_MODE_MSG
-
- ////////////////////////////////////////////////////////////////////////////////////////////////////
-
- /**
- * Configure global logging facilities. Affects all YQL modules.
- */
- void ConfigureLogging(const TLoggingOptions& = {});
-
- /**
- * Create a new program factory.
- * Custom logging initialization could be preformed by a call to the ConfigureLogging method beforehand.
- * If the ConfigureLogging method has not been called the default logging initialization will be performed.
- */
- IProgramFactoryPtr MakeProgramFactory(const TProgramFactoryOptions& = {});
- }
-}
-
-Y_DECLARE_OUT_SPEC(inline, NYql::NPureCalc::TCompileError, stream, value) {
- stream << value.AsStrBuf() << Endl << "Issues:" << Endl << value.GetIssues() << Endl << Endl << "Yql:" << Endl <<value.GetYql();
-}
diff --git a/ydb/library/yql/public/purecalc/common/logger_init.cpp b/ydb/library/yql/public/purecalc/common/logger_init.cpp
deleted file mode 100644
index a7da19d9f10..00000000000
--- a/ydb/library/yql/public/purecalc/common/logger_init.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#include "logger_init.h"
-
-#include <yql/essentials/utils/log/log.h>
-
-#include <atomic>
-
-namespace NYql {
-namespace NPureCalc {
-
-namespace {
- std::atomic_bool Initialized;
-}
-
- void InitLogging(const TLoggingOptions& options) {
- NLog::InitLogger(options.LogDestination);
- auto& logger = NLog::YqlLogger();
- logger.SetDefaultPriority(options.LogLevel_);
- for (int i = 0; i < NLog::EComponentHelpers::ToInt(NLog::EComponent::MaxValue); ++i) {
- logger.SetComponentLevel((NLog::EComponent) i, (NLog::ELevel) options.LogLevel_);
- }
- Initialized = true;
- }
-
- void EnsureLoggingInitialized() {
- if (Initialized.load()) {
- return;
- }
- InitLogging(TLoggingOptions());
- }
-
-}
-}
diff --git a/ydb/library/yql/public/purecalc/common/logger_init.h b/ydb/library/yql/public/purecalc/common/logger_init.h
deleted file mode 100644
index 039cbd44118..00000000000
--- a/ydb/library/yql/public/purecalc/common/logger_init.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#pragma once
-
-#include "interface.h"
-
-namespace NYql {
- namespace NPureCalc {
- void InitLogging(const TLoggingOptions& options);
- void EnsureLoggingInitialized();
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/names.cpp b/ydb/library/yql/public/purecalc/common/names.cpp
deleted file mode 100644
index 5e8412a7b22..00000000000
--- a/ydb/library/yql/public/purecalc/common/names.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-#include "names.h"
-
-#include <util/generic/strbuf.h>
-
-namespace NYql::NPureCalc {
- const TStringBuf PurecalcSysColumnsPrefix = "_yql_sys_";
- const TStringBuf PurecalcSysColumnTablePath = "_yql_sys_tablepath";
- const TStringBuf PurecalcBlockColumnLength = "_yql_block_length";
-
- const TStringBuf PurecalcDefaultCluster = "view";
- const TStringBuf PurecalcDefaultService = "data";
-
- const TStringBuf PurecalcInputCallableName = "Self";
- const TStringBuf PurecalcInputTablePrefix = "Input";
-
- const TStringBuf PurecalcBlockInputCallableName = "BlockSelf";
-
- const TStringBuf PurecalcUdfModulePrefix = "<purecalc>::";
-}
diff --git a/ydb/library/yql/public/purecalc/common/names.h b/ydb/library/yql/public/purecalc/common/names.h
deleted file mode 100644
index b19c15ca4fe..00000000000
--- a/ydb/library/yql/public/purecalc/common/names.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#pragma once
-
-#include <util/generic/fwd.h>
-
-namespace NYql::NPureCalc {
- extern const TStringBuf PurecalcSysColumnsPrefix;
- extern const TStringBuf PurecalcSysColumnTablePath;
- extern const TStringBuf PurecalcBlockColumnLength;
-
- extern const TStringBuf PurecalcDefaultCluster;
- extern const TStringBuf PurecalcDefaultService;
-
- extern const TStringBuf PurecalcInputCallableName;
- extern const TStringBuf PurecalcInputTablePrefix;
-
- extern const TStringBuf PurecalcBlockInputCallableName;
-
- extern const TStringBuf PurecalcUdfModulePrefix;
-}
diff --git a/ydb/library/yql/public/purecalc/common/no_llvm/ya.make b/ydb/library/yql/public/purecalc/common/no_llvm/ya.make
deleted file mode 100644
index 18b3b5523d7..00000000000
--- a/ydb/library/yql/public/purecalc/common/no_llvm/ya.make
+++ /dev/null
@@ -1,18 +0,0 @@
-LIBRARY()
-
-INCLUDE(../ya.make.inc)
-
-PEERDIR(
- ydb/library/yql/providers/yt/codec/codegen/no_llvm
- yql/essentials/providers/config
- yql/essentials/minikql/computation/no_llvm
- yql/essentials/minikql/invoke_builtins/no_llvm
- yql/essentials/minikql/comp_nodes/no_llvm
- yql/essentials/minikql/codegen/no_llvm
- yql/essentials/parser/pg_wrapper
- yql/essentials/parser/pg_wrapper/interface
- yql/essentials/sql/pg
-)
-
-END()
-
diff --git a/ydb/library/yql/public/purecalc/common/processor_mode.cpp b/ydb/library/yql/public/purecalc/common/processor_mode.cpp
deleted file mode 100644
index 957cc2d7f42..00000000000
--- a/ydb/library/yql/public/purecalc/common/processor_mode.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "processor_mode.h"
diff --git a/ydb/library/yql/public/purecalc/common/processor_mode.h b/ydb/library/yql/public/purecalc/common/processor_mode.h
deleted file mode 100644
index 9bec87cadc9..00000000000
--- a/ydb/library/yql/public/purecalc/common/processor_mode.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#pragma once
-
-namespace NYql {
- namespace NPureCalc {
- enum class EProcessorMode {
- PullList,
- PullStream,
- PushStream
- };
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/program_factory.cpp b/ydb/library/yql/public/purecalc/common/program_factory.cpp
deleted file mode 100644
index 8452dc3d003..00000000000
--- a/ydb/library/yql/public/purecalc/common/program_factory.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-#include "program_factory.h"
-#include "logger_init.h"
-#include "names.h"
-#include "worker_factory.h"
-
-#include <yql/essentials/utils/log/log.h>
-
-using namespace NYql;
-using namespace NYql::NPureCalc;
-
-TProgramFactory::TProgramFactory(const TProgramFactoryOptions& options)
- : Options_(options)
- , ExprOutputStream_(Options_.ExprOutputStream)
- , CountersProvider_(nullptr)
-{
- EnsureLoggingInitialized();
-
- if (!TryFromString(Options_.BlockEngineSettings, BlockEngineMode_)) {
- ythrow TCompileError("", "") << "Unknown BlockEngineSettings value: expected "
- << GetEnumAllNames<EBlockEngineMode>()
- << ", but got: "
- << Options_.BlockEngineSettings;
- }
-
- NUserData::TUserData::UserDataToLibraries(Options_.UserData_, Modules_);
-
- UserData_ = GetYqlModuleResolver(ExprContext_, ModuleResolver_, Options_.UserData_, {}, {});
-
- if (!ModuleResolver_) {
- ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "failed to compile modules";
- }
-
- TVector<TString> UDFsPaths;
- for (const auto& item: Options_.UserData_) {
- if (
- item.Type_ == NUserData::EType::UDF &&
- item.Disposition_ == NUserData::EDisposition::FILESYSTEM
- ) {
- UDFsPaths.push_back(item.Content_);
- }
- }
-
- if (!Options_.UdfsDir_.empty()) {
- NKikimr::NMiniKQL::FindUdfsInDir(Options_.UdfsDir_, &UDFsPaths);
- }
-
- FuncRegistry_ = NKikimr::NMiniKQL::CreateFunctionRegistry(
- &NYql::NBacktrace::KikimrBackTrace, NKikimr::NMiniKQL::CreateBuiltinRegistry(), false, UDFsPaths)->Clone();
-
- NKikimr::NMiniKQL::FillStaticModules(*FuncRegistry_);
-}
-
-TProgramFactory::~TProgramFactory() {
-}
-
-void TProgramFactory::AddUdfModule(
- const TStringBuf& moduleName,
- NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&& module
-) {
- FuncRegistry_->AddModule(
- TString::Join(PurecalcUdfModulePrefix, moduleName), moduleName, std::move(module)
- );
-}
-
-void TProgramFactory::SetCountersProvider(NKikimr::NUdf::ICountersProvider* provider) {
- CountersProvider_ = provider;
-}
-
-IPullStreamWorkerFactoryPtr TProgramFactory::MakePullStreamWorkerFactory(
- const TInputSpecBase& inputSpec,
- const TOutputSpecBase& outputSpec,
- TString query,
- ETranslationMode mode,
- ui16 syntaxVersion
-) {
- return std::make_shared<TPullStreamWorkerFactory>(TWorkerFactoryOptions(
- TIntrusivePtr<TProgramFactory>(this),
- inputSpec,
- outputSpec,
- query,
- FuncRegistry_,
- ModuleResolver_,
- UserData_,
- Modules_,
- Options_.LLVMSettings,
- BlockEngineMode_,
- ExprOutputStream_,
- CountersProvider_,
- mode,
- syntaxVersion,
- Options_.NativeYtTypeFlags,
- Options_.DeterministicTimeProviderSeed,
- Options_.UseSystemColumns,
- Options_.UseWorkerPool
- ));
-}
-
-IPullListWorkerFactoryPtr TProgramFactory::MakePullListWorkerFactory(
- const TInputSpecBase& inputSpec,
- const TOutputSpecBase& outputSpec,
- TString query,
- ETranslationMode mode,
- ui16 syntaxVersion
-) {
- return std::make_shared<TPullListWorkerFactory>(TWorkerFactoryOptions(
- TIntrusivePtr<TProgramFactory>(this),
- inputSpec,
- outputSpec,
- query,
- FuncRegistry_,
- ModuleResolver_,
- UserData_,
- Modules_,
- Options_.LLVMSettings,
- BlockEngineMode_,
- ExprOutputStream_,
- CountersProvider_,
- mode,
- syntaxVersion,
- Options_.NativeYtTypeFlags,
- Options_.DeterministicTimeProviderSeed,
- Options_.UseSystemColumns,
- Options_.UseWorkerPool
- ));
-}
-
-IPushStreamWorkerFactoryPtr TProgramFactory::MakePushStreamWorkerFactory(
- const TInputSpecBase& inputSpec,
- const TOutputSpecBase& outputSpec,
- TString query,
- ETranslationMode mode,
- ui16 syntaxVersion
-) {
- if (inputSpec.GetSchemas().size() > 1) {
- ythrow yexception() << "push stream mode doesn't support several inputs";
- }
-
- return std::make_shared<TPushStreamWorkerFactory>(TWorkerFactoryOptions(
- TIntrusivePtr<TProgramFactory>(this),
- inputSpec,
- outputSpec,
- query,
- FuncRegistry_,
- ModuleResolver_,
- UserData_,
- Modules_,
- Options_.LLVMSettings,
- BlockEngineMode_,
- ExprOutputStream_,
- CountersProvider_,
- mode,
- syntaxVersion,
- Options_.NativeYtTypeFlags,
- Options_.DeterministicTimeProviderSeed,
- Options_.UseSystemColumns,
- Options_.UseWorkerPool
- ));
-}
diff --git a/ydb/library/yql/public/purecalc/common/program_factory.h b/ydb/library/yql/public/purecalc/common/program_factory.h
deleted file mode 100644
index 278d3e05a6a..00000000000
--- a/ydb/library/yql/public/purecalc/common/program_factory.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#pragma once
-
-#include "interface.h"
-
-#include <yql/essentials/utils/backtrace/backtrace.h>
-#include <yql/essentials/core/services/mounts/yql_mounts.h>
-
-#include <yql/essentials/ast/yql_expr.h>
-#include <yql/essentials/core/yql_user_data.h>
-#include <yql/essentials/minikql/mkql_function_registry.h>
-#include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h>
-
-#include <util/generic/function.h>
-#include <util/generic/ptr.h>
-#include <util/generic/strbuf.h>
-
-namespace NYql {
- namespace NPureCalc {
- class TProgramFactory: public IProgramFactory {
- private:
- TProgramFactoryOptions Options_;
- TExprContext ExprContext_;
- TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry_;
- IModuleResolver::TPtr ModuleResolver_;
- TUserDataTable UserData_;
- EBlockEngineMode BlockEngineMode_;
- IOutputStream* ExprOutputStream_;
- THashMap<TString, TString> Modules_;
- NKikimr::NUdf::ICountersProvider* CountersProvider_;
-
- public:
- explicit TProgramFactory(const TProgramFactoryOptions&);
- ~TProgramFactory() override;
-
- public:
- void AddUdfModule(
- const TStringBuf& moduleName,
- NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&& module
- ) override;
-
- void SetCountersProvider(NKikimr::NUdf::ICountersProvider* provider) override;
-
- IPullStreamWorkerFactoryPtr MakePullStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override;
- IPullListWorkerFactoryPtr MakePullListWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override;
- IPushStreamWorkerFactoryPtr MakePushStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override;
- };
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp b/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp
deleted file mode 100644
index 73ffa25d347..00000000000
--- a/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-#include "align_output_schema.h"
-
-#include <ydb/library/yql/public/purecalc/common/names.h>
-#include <ydb/library/yql/public/purecalc/common/type_from_schema.h>
-#include <ydb/library/yql/public/purecalc/common/transformations/utils.h>
-
-#include <yql/essentials/core/yql_expr_type_annotation.h>
-
-using namespace NYql;
-using namespace NYql::NPureCalc;
-
-namespace {
- class TOutputAligner : public TSyncTransformerBase {
- private:
- const TTypeAnnotationNode* OutputStruct_;
- bool AcceptsBlocks_;
- EProcessorMode ProcessorMode_;
-
- public:
- explicit TOutputAligner(
- const TTypeAnnotationNode* outputStruct,
- bool acceptsBlocks,
- EProcessorMode processorMode
- )
- : OutputStruct_(outputStruct)
- , AcceptsBlocks_(acceptsBlocks)
- , ProcessorMode_(processorMode)
- {
- }
-
- public:
- TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final {
- output = input;
-
- const auto* expectedType = MakeExpectedType(ctx);
- const auto* expectedItemType = MakeExpectedItemType();
- const auto* actualType = MakeActualType(input);
- const auto* actualItemType = MakeActualItemType(input);
-
- // XXX: Tweak the obtained expression type, is the spec supports blocks:
- // 1. Remove "_yql_block_length" attribute, since it's for internal usage.
- // 2. Strip block container from the type to store its internal type.
- if (AcceptsBlocks_) {
- Y_ENSURE(actualItemType->GetKind() == ETypeAnnotationKind::Struct);
- actualItemType = UnwrapBlockStruct(actualItemType->Cast<TStructExprType>(), ctx);
- if (ProcessorMode_ == EProcessorMode::PullList) {
- actualType = ctx.MakeType<TListExprType>(actualItemType);
- } else {
- actualType = ctx.MakeType<TStreamExprType>(actualItemType);
- }
- }
-
- if (!ValidateOutputType(actualItemType, expectedItemType, ctx)) {
- return TStatus::Error;
- }
-
- if (!expectedType) {
- return TStatus::Ok;
- }
-
- auto status = TryConvertTo(output, *actualType, *expectedType, ctx);
-
- if (status.Level == IGraphTransformer::TStatus::Repeat) {
- status = IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true);
- }
-
- return status;
- }
-
- void Rewind() final {
- }
-
- private:
- const TTypeAnnotationNode* MakeExpectedType(TExprContext& ctx) {
- if (!OutputStruct_) {
- return nullptr;
- }
-
- switch (ProcessorMode_) {
- case EProcessorMode::PullList:
- return ctx.MakeType<TListExprType>(OutputStruct_);
- case EProcessorMode::PullStream:
- case EProcessorMode::PushStream:
- return ctx.MakeType<TStreamExprType>(OutputStruct_);
- }
-
- Y_ABORT("Unexpected");
- }
-
- const TTypeAnnotationNode* MakeExpectedItemType() {
- return OutputStruct_;
- }
-
- const TTypeAnnotationNode* MakeActualType(TExprNode::TPtr& input) {
- return input->GetTypeAnn();
- }
-
- const TTypeAnnotationNode* MakeActualItemType(TExprNode::TPtr& input) {
- auto actualType = MakeActualType(input);
- switch (actualType->GetKind()) {
- case ETypeAnnotationKind::Stream:
- Y_ENSURE(ProcessorMode_ != EProcessorMode::PullList,
- "processor mode mismatches the actual container type");
- return actualType->Cast<TStreamExprType>()->GetItemType();
- case ETypeAnnotationKind::List:
- Y_ENSURE(ProcessorMode_ == EProcessorMode::PullList,
- "processor mode mismatches the actual container type");
- return actualType->Cast<TListExprType>()->GetItemType();
- default:
- Y_ABORT("unexpected return type");
- }
- }
- };
-}
-
-TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeOutputAligner(
- const TTypeAnnotationNode* outputStruct,
- bool acceptsBlocks,
- EProcessorMode processorMode
-) {
- return new TOutputAligner(outputStruct, acceptsBlocks, processorMode);
-}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.h b/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.h
deleted file mode 100644
index 294f30b8339..00000000000
--- a/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/processor_mode.h>
-
-#include <yql/essentials/core/yql_graph_transformer.h>
-#include <yql/essentials/core/yql_type_annotation.h>
-
-namespace NYql {
- namespace NPureCalc {
- /**
- * A transformer which converts an output type of the expression to the given type or reports an error.
- *
- * @param outputStruct destination output struct type.
- * @param acceptsBlocks indicates, whether the output type need to be
- * preprocessed.
- * @param processorMode specifies the top-most container of the result.
- * @return a graph transformer for type alignment.
- */
- TAutoPtr<IGraphTransformer> MakeOutputAligner(
- const TTypeAnnotationNode* outputStruct,
- bool acceptsBlocks,
- EProcessorMode processorMode
- );
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp b/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp
deleted file mode 100644
index 3cd4337d74c..00000000000
--- a/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-#include "extract_used_columns.h"
-
-#include <ydb/library/yql/public/purecalc/common/inspect_input.h>
-
-#include <yql/essentials/core/yql_expr_optimize.h>
-#include <yql/essentials/core/expr_nodes/yql_expr_nodes.h>
-
-using namespace NYql;
-using namespace NYql::NPureCalc;
-
-namespace {
- class TUsedColumnsExtractor : public TSyncTransformerBase {
- private:
- TVector<THashSet<TString>>* const Destination_;
- const TVector<THashSet<TString>>& AllColumns_;
- TString NodeName_;
-
- bool CalculatedUsedFields_ = false;
-
- public:
- TUsedColumnsExtractor(
- TVector<THashSet<TString>>* destination,
- const TVector<THashSet<TString>>& allColumns,
- TString nodeName
- )
- : Destination_(destination)
- , AllColumns_(allColumns)
- , NodeName_(std::move(nodeName))
- {
- }
-
- TUsedColumnsExtractor(TVector<THashSet<TString>>*, TVector<THashSet<TString>>&&, TString) = delete;
-
- public:
- TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final {
- output = input;
-
- if (CalculatedUsedFields_) {
- return IGraphTransformer::TStatus::Ok;
- }
-
- bool hasError = false;
-
- *Destination_ = AllColumns_;
-
- VisitExpr(input, [&](const TExprNode::TPtr& inputExpr) {
- NNodes::TExprBase node(inputExpr);
- if (auto maybeExtract = node.Maybe<NNodes::TCoExtractMembers>()) {
- auto extract = maybeExtract.Cast();
- const auto& arg = extract.Input().Ref();
- if (arg.IsCallable(NodeName_)) {
- ui32 inputIndex;
- if (!TryFetchInputIndexFromSelf(arg, ctx, AllColumns_.size(), inputIndex)) {
- hasError = true;
- return false;
- }
-
- YQL_ENSURE(inputIndex < AllColumns_.size());
-
- auto& destinationColumnsSet = (*Destination_)[inputIndex];
- const auto& allColumnsSet = AllColumns_[inputIndex];
-
- destinationColumnsSet.clear();
- for (const auto& columnAtom : extract.Members()) {
- TString name = TString(columnAtom.Value());
- YQL_ENSURE(allColumnsSet.contains(name), "unexpected column in the input struct");
- destinationColumnsSet.insert(name);
- }
- }
- }
-
- return true;
- });
-
- if (hasError) {
- return IGraphTransformer::TStatus::Error;
- }
-
- CalculatedUsedFields_ = true;
-
- return IGraphTransformer::TStatus::Ok;
- }
-
- void Rewind() final {
- CalculatedUsedFields_ = false;
- }
- };
-}
-
-TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeUsedColumnsExtractor(
- TVector<THashSet<TString>>* destination,
- const TVector<THashSet<TString>>& allColumns,
- const TString& nodeName
-) {
- return new TUsedColumnsExtractor(destination, allColumns, nodeName);
-}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.h b/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.h
deleted file mode 100644
index 659232899d9..00000000000
--- a/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/names.h>
-
-#include <yql/essentials/core/yql_graph_transformer.h>
-#include <yql/essentials/core/yql_type_annotation.h>
-
-#include <util/generic/hash_set.h>
-#include <util/generic/string.h>
-
-namespace NYql {
- namespace NPureCalc {
- /**
- * Make transformation which builds sets of input columns from the given expression.
- *
- * @param destination a vector of string sets which will be populated with column names sets when
- * transformation pipeline is launched. This pointer should contain a valid
- * TVector<THashSet> instance. The transformation will overwrite its contents.
- * @param allColumns vector of sets with all available columns for each input.
- * @param nodeName name of the callable used to get input data, e.g. `Self`.
- * @return an extractor which scans an input structs contents and populates destination.
- */
- TAutoPtr<IGraphTransformer> MakeUsedColumnsExtractor(
- TVector<THashSet<TString>>* destination,
- const TVector<THashSet<TString>>& allColumns,
- const TString& nodeName = TString{PurecalcInputCallableName}
- );
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp b/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp
deleted file mode 100644
index 04181db7c83..00000000000
--- a/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-#include "output_columns_filter.h"
-
-#include <yql/essentials/core/yql_expr_type_annotation.h>
-
-using namespace NYql;
-using namespace NYql::NPureCalc;
-
-namespace {
- class TOutputColumnsFilter: public TSyncTransformerBase {
- private:
- TMaybe<THashSet<TString>> Filter_;
- bool Fired_;
-
- public:
- explicit TOutputColumnsFilter(TMaybe<THashSet<TString>> filter)
- : Filter_(std::move(filter))
- , Fired_(false)
- {
- }
-
- public:
- void Rewind() override {
- Fired_ = false;
- }
-
- TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final {
- output = input;
-
- if (Fired_ || Filter_.Empty()) {
- return IGraphTransformer::TStatus::Ok;
- }
-
- const TTypeAnnotationNode* returnType = output->GetTypeAnn();
- const TTypeAnnotationNode* returnItemType = nullptr;
- switch (returnType->GetKind()) {
- case ETypeAnnotationKind::Stream:
- returnItemType = returnType->Cast<TStreamExprType>()->GetItemType();
- break;
- case ETypeAnnotationKind::List:
- returnItemType = returnType->Cast<TListExprType>()->GetItemType();
- break;
- default:
- Y_ABORT("unexpected return type");
- }
-
- if (returnItemType->GetKind() != ETypeAnnotationKind::Struct) {
- ctx.AddError(TIssue(ctx.GetPosition(output->Pos()), "columns filter only supported for single-output programs"));
- }
-
- const auto* returnItemStruct = returnItemType->Cast<TStructExprType>();
-
- auto arg = ctx.NewArgument(TPositionHandle(), "row");
- TExprNode::TListType asStructItems;
- for (const auto& x : returnItemStruct->GetItems()) {
- TExprNode::TPtr value;
- if (Filter_->contains(x->GetName())) {
- value = ctx.Builder({})
- .Callable("Member")
- .Add(0, arg)
- .Atom(1, x->GetName())
- .Seal()
- .Build();
- } else {
- auto type = x->GetItemType();
- value = ctx.Builder({})
- .Callable(type->GetKind() == ETypeAnnotationKind::Optional ? "Nothing" : "Default")
- .Add(0, ExpandType({}, *type, ctx))
- .Seal()
- .Build();
- }
-
- auto item = ctx.Builder({})
- .List()
- .Atom(0, x->GetName())
- .Add(1, value)
- .Seal()
- .Build();
-
- asStructItems.push_back(item);
- }
-
- auto body = ctx.NewCallable(TPositionHandle(), "AsStruct", std::move(asStructItems));
- auto lambda = ctx.NewLambda(TPositionHandle(), ctx.NewArguments(TPositionHandle(), {arg}), std::move(body));
- output = ctx.Builder(TPositionHandle())
- .Callable("Map")
- .Add(0, output)
- .Add(1, lambda)
- .Seal()
- .Build();
-
- Fired_ = true;
-
- return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true);
- }
- };
-}
-
-TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeOutputColumnsFilter(const TMaybe<THashSet<TString>>& columns) {
- return new TOutputColumnsFilter(columns);
-}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.h b/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.h
deleted file mode 100644
index 09fabf885b9..00000000000
--- a/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/processor_mode.h>
-
-#include <yql/essentials/core/yql_graph_transformer.h>
-#include <yql/essentials/core/yql_type_annotation.h>
-
-namespace NYql {
- namespace NPureCalc {
- /**
- * A transformer which removes unwanted columns from output.
- *
- * @param columns remove all columns that are not in this set.
- * @return a graph transformer for filtering output.
- */
- TAutoPtr<IGraphTransformer> MakeOutputColumnsFilter(const TMaybe<THashSet<TString>>& columns);
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp b/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp
deleted file mode 100644
index bafd67d6f23..00000000000
--- a/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp
+++ /dev/null
@@ -1,247 +0,0 @@
-#include "replace_table_reads.h"
-
-#include <ydb/library/yql/public/purecalc/common/names.h>
-#include <ydb/library/yql/public/purecalc/common/transformations/utils.h>
-
-#include <yql/essentials/core/yql_expr_optimize.h>
-#include <yql/essentials/core/yql_expr_type_annotation.h>
-
-using namespace NYql;
-using namespace NYql::NPureCalc;
-
-namespace {
- class TTableReadsReplacer: public TSyncTransformerBase {
- private:
- const TVector<const TStructExprType*>& InputStructs_;
- bool UseSystemColumns_;
- EProcessorMode ProcessorMode_;
- TString CallableName_;
- TString TablePrefix_;
- bool Complete_ = false;
-
- public:
- explicit TTableReadsReplacer(
- const TVector<const TStructExprType*>& inputStructs,
- bool useSystemColumns,
- EProcessorMode processorMode,
- TString inputNodeName,
- TString tablePrefix
- )
- : InputStructs_(inputStructs)
- , UseSystemColumns_(useSystemColumns)
- , ProcessorMode_(processorMode)
- , CallableName_(std::move(inputNodeName))
- , TablePrefix_(std::move(tablePrefix))
- {
- }
-
- TTableReadsReplacer(TVector<const TStructExprType*>&&, TString, TString) = delete;
-
- public:
- TStatus DoTransform(const TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final {
- output = input;
- if (Complete_) {
- return TStatus::Ok;
- }
-
- TOptimizeExprSettings settings(nullptr);
-
- auto status = OptimizeExpr(input, output, [&](const TExprNode::TPtr& node, TExprContext& ctx) -> TExprNode::TPtr {
- if (node->IsCallable(NNodes::TCoRight::CallableName())) {
- TIssueScopeGuard issueScope(ctx.IssueManager, [&]() {
- return new TIssue(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content());
- });
-
- if (!EnsureMinArgsCount(*node, 1, ctx)) {
- return nullptr;
- }
-
- if (node->Child(0)->IsCallable(NNodes::TCoCons::CallableName())) {
- return node;
- }
-
- if (!node->Child(0)->IsCallable(NNodes::TCoRead::CallableName())) {
- ctx.AddError(TIssue(ctx.GetPosition(node->Child(0)->Pos()), TStringBuilder() << "Expected Read!"));
- return nullptr;
- }
-
- return BuildInputFromRead(node->Pos(), node->ChildPtr(0), ctx);
- } else if (node->IsCallable(NNodes::TCoLeft::CallableName())) {
- TIssueScopeGuard issueScope(ctx.IssueManager, [&]() {
- return new TIssue(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content());
- });
-
- if (!EnsureMinArgsCount(*node, 1, ctx)) {
- return nullptr;
- }
-
- if (!node->Child(0)->IsCallable(NNodes::TCoRead::CallableName())) {
- ctx.AddError(TIssue(ctx.GetPosition(node->Child(0)->Pos()), TStringBuilder() << "Expected Read!"));
- return nullptr;
- }
-
- return node->Child(0)->HeadPtr();
- }
-
- return node;
- }, ctx, settings);
-
- if (status.Level == TStatus::Ok) {
- Complete_ = true;
- }
- return status;
- }
-
- void Rewind() override {
- Complete_ = false;
- }
-
- private:
- TExprNode::TPtr BuildInputFromRead(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) {
- TIssueScopeGuard issueScope(ctx.IssueManager, [&]() {
- return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content());
- });
-
- if (!EnsureMinArgsCount(*node, 3, ctx)) {
- return nullptr;
- }
-
- const auto source = node->ChildPtr(2);
- if (source->IsCallable(NNodes::TCoKey::CallableName())) {
- return BuildInputFromKey(replacePos, source, ctx);
- }
- if (source->IsCallable("DataTables")) {
- return BuildInputFromDataTables(replacePos, source, ctx);
- }
-
- ctx.AddError(TIssue(ctx.GetPosition(source->Pos()), TStringBuilder() << "Unsupported read source: " << source->Content()));
-
- return nullptr;
- }
-
- TExprNode::TPtr BuildInputFromKey(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) {
- TIssueScopeGuard issueScope(ctx.IssueManager, [&]() {
- return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content());
- });
-
- ui32 inputIndex;
- TExprNode::TPtr inputTableName;
-
- if (!TryFetchInputIndexFromKey(node, ctx, inputIndex, inputTableName)) {
- return nullptr;
- }
-
- YQL_ENSURE(inputTableName->IsCallable(NNodes::TCoString::CallableName()));
-
- auto inputNode = ctx.Builder(replacePos)
- .Callable(CallableName_)
- .Atom(0, ToString(inputIndex))
- .Seal()
- .Build();
-
- if (inputNode->IsCallable(PurecalcBlockInputCallableName)) {
- const auto inputStruct = InputStructs_[inputIndex]->Cast<TStructExprType>();
- const auto blocksLambda = NodeFromBlocks(replacePos, inputStruct, ctx);
- bool wrapLMap = ProcessorMode_ == EProcessorMode::PullList;
- inputNode = ApplyToIterable(replacePos, inputNode, blocksLambda, wrapLMap, ctx);
- }
-
- if (UseSystemColumns_) {
- auto mapLambda = ctx.Builder(replacePos)
- .Lambda()
- .Param("row")
- .Callable(0, NNodes::TCoAddMember::CallableName())
- .Arg(0, "row")
- .Atom(1, PurecalcSysColumnTablePath)
- .Add(2, inputTableName)
- .Seal()
- .Seal()
- .Build();
-
- return ctx.Builder(replacePos)
- .Callable(NNodes::TCoMap::CallableName())
- .Add(0, std::move(inputNode))
- .Add(1, std::move(mapLambda))
- .Seal()
- .Build();
- }
-
- return inputNode;
- }
-
- TExprNode::TPtr BuildInputFromDataTables(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) {
- TIssueScopeGuard issueScope(ctx.IssueManager, [&]() {
- return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content());
- });
-
- if (InputStructs_.empty()) {
- ctx.AddError(TIssue(ctx.GetPosition(node->Pos()), "No inputs provided by input spec"));
- return nullptr;
- }
-
- if (!EnsureArgsCount(*node, 0, ctx)) {
- return nullptr;
- }
-
- auto builder = ctx.Builder(replacePos);
-
- if (InputStructs_.size() > 1) {
- auto listBuilder = builder.List();
-
- for (ui32 i = 0; i < InputStructs_.size(); ++i) {
- listBuilder.Callable(i, CallableName_).Atom(0, ToString(i)).Seal();
- }
-
- return listBuilder.Seal().Build();
- }
-
- return builder.Callable(CallableName_).Atom(0, "0").Seal().Build();
- }
-
- bool TryFetchInputIndexFromKey(const TExprNode::TPtr& node, TExprContext& ctx, ui32& resultIndex, TExprNode::TPtr& resultTableName) {
- if (!EnsureArgsCount(*node, 1, ctx)) {
- return false;
- }
-
- const auto* keyArg = node->Child(0);
- if (!keyArg->IsList() || keyArg->ChildrenSize() != 2 || !keyArg->Child(0)->IsAtom("table") ||
- !keyArg->Child(1)->IsCallable(NNodes::TCoString::CallableName()))
- {
- ctx.AddError(TIssue(ctx.GetPosition(keyArg->Pos()), "Expected single table name"));
- return false;
- }
-
- resultTableName = keyArg->ChildPtr(1);
-
- auto tableName = resultTableName->Child(0)->Content();
-
- if (!tableName.StartsWith(TablePrefix_)) {
- ctx.AddError(TIssue(ctx.GetPosition(resultTableName->Child(0)->Pos()),
- TStringBuilder() << "Invalid table name " << TString{tableName}.Quote() << ": prefix must be " << TablePrefix_.Quote()));
- return false;
- }
-
- tableName.SkipPrefix(TablePrefix_);
-
- if (!tableName) {
- resultIndex = 0;
- } else if (!TryFromString(tableName, resultIndex)) {
- ctx.AddError(TIssue(ctx.GetPosition(resultTableName->Child(0)->Pos()),
- TStringBuilder() << "Invalid table name " << TString{tableName}.Quote() << ": suffix must be UI32 number"));
- return false;
- }
-
- return true;
- }
- };
-}
-
-TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeTableReadsReplacer(
- const TVector<const TStructExprType*>& inputStructs,
- bool useSystemColumns,
- EProcessorMode processorMode,
- TString callableName,
- TString tablePrefix
-) {
- return new TTableReadsReplacer(inputStructs, useSystemColumns, processorMode, std::move(callableName), std::move(tablePrefix));
-}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.h b/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.h
deleted file mode 100644
index 465cf656ce6..00000000000
--- a/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/names.h>
-#include <ydb/library/yql/public/purecalc/common/processor_mode.h>
-
-#include <yql/essentials/core/yql_graph_transformer.h>
-
-namespace NYql::NPureCalc {
- /**
- * SQL translation would generate a standard Read! call to read each input table. It will than generate
- * a Right! call to get the table data from a tuple returned by Read!. This transformation replaces any Right!
- * call with a call to special function used to get input data.
- *
- * Each table name must starts with the specified prefix and ends with an index of program input (e.g. `Input0`).
- * Name without numeric suffix is an alias for the first input.
- *
- * @param inputStructs types of each input.
- * @param useSystemColumns whether to allow special system columns in input structs.
- * @param callableName name of the special callable used to get input data (e.g. `Self`).
- * @param tablePrefix required prefix for all table names (e.g. `Input`).
- * @param return a graph transformer for replacing table reads.
- */
- TAutoPtr<IGraphTransformer> MakeTableReadsReplacer(
- const TVector<const TStructExprType*>& inputStructs,
- bool useSystemColumns,
- EProcessorMode processorMode,
- TString callableName = TString{PurecalcInputCallableName},
- TString tablePrefix = TString{PurecalcInputTablePrefix}
- );
-}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/root_to_blocks.cpp b/ydb/library/yql/public/purecalc/common/transformations/root_to_blocks.cpp
deleted file mode 100644
index 918381d9709..00000000000
--- a/ydb/library/yql/public/purecalc/common/transformations/root_to_blocks.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-#include "root_to_blocks.h"
-
-#include <ydb/library/yql/public/purecalc/common/transformations/utils.h>
-
-#include <yql/essentials/core/yql_expr_type_annotation.h>
-
-using namespace NYql;
-using namespace NYql::NPureCalc;
-
-namespace {
-
-class TRootToBlocks: public TSyncTransformerBase {
-private:
- bool AcceptsBlocks_;
- EProcessorMode ProcessorMode_;
- bool Wrapped_;
-
-public:
- explicit TRootToBlocks(bool acceptsBlocks, EProcessorMode processorMode)
- : AcceptsBlocks_(acceptsBlocks)
- , ProcessorMode_(processorMode)
- , Wrapped_(false)
- {
- }
-
-public:
- void Rewind() override {
- Wrapped_ = false;
- }
-
- TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final {
- if (Wrapped_ || !AcceptsBlocks_) {
- return IGraphTransformer::TStatus::Ok;
- }
-
- const TTypeAnnotationNode* returnItemType;
- const TTypeAnnotationNode* returnType = input->GetTypeAnn();
- if (ProcessorMode_ == EProcessorMode::PullList) {
- Y_ENSURE(returnType->GetKind() == ETypeAnnotationKind::List);
- returnItemType = returnType->Cast<TListExprType>()->GetItemType();
- } else {
- Y_ENSURE(returnType->GetKind() == ETypeAnnotationKind::Stream);
- returnItemType = returnType->Cast<TStreamExprType>()->GetItemType();
- }
-
- Y_ENSURE(returnItemType->GetKind() == ETypeAnnotationKind::Struct);
- const TStructExprType* structType = returnItemType->Cast<TStructExprType>();
- const auto blocksLambda = NodeToBlocks(input->Pos(), structType, ctx);
- bool wrapLMap = ProcessorMode_ == EProcessorMode::PullList;
- output = ApplyToIterable(input->Pos(), input, blocksLambda, wrapLMap, ctx);
-
- Wrapped_ = true;
-
- return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true);
- }
-};
-
-} // namespace
-
-TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeRootToBlocks(
- bool acceptsBlocks,
- EProcessorMode processorMode
-) {
- return new TRootToBlocks(acceptsBlocks, processorMode);
-}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/root_to_blocks.h b/ydb/library/yql/public/purecalc/common/transformations/root_to_blocks.h
deleted file mode 100644
index 892987034a4..00000000000
--- a/ydb/library/yql/public/purecalc/common/transformations/root_to_blocks.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/processor_mode.h>
-
-#include <yql/essentials/core/yql_graph_transformer.h>
-
-namespace NYql {
- namespace NPureCalc {
- /**
- * A transformer which rewrite the root to respect block types.
- *
- * @param acceptsBlock allows using this transformer in pipeline and
- * skip this phase if no block output is required.
- * @param processorMode specifies the top-most container of the result.
- * @return a graph transformer for rewriting the root node.
- */
- TAutoPtr<IGraphTransformer> MakeRootToBlocks(
- bool acceptsBlocks,
- EProcessorMode processorMode
- );
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp b/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp
deleted file mode 100644
index 5920a5df266..00000000000
--- a/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp
+++ /dev/null
@@ -1,251 +0,0 @@
-#include "type_annotation.h"
-
-#include <ydb/library/yql/public/purecalc/common/interface.h>
-#include <ydb/library/yql/public/purecalc/common/inspect_input.h>
-#include <ydb/library/yql/public/purecalc/common/names.h>
-#include <ydb/library/yql/public/purecalc/common/transformations/utils.h>
-
-#include <yql/essentials/core/type_ann/type_ann_core.h>
-#include <yql/essentials/core/yql_expr_type_annotation.h>
-
-#include <util/generic/fwd.h>
-
-using namespace NYql;
-using namespace NYql::NPureCalc;
-
-namespace {
- class TTypeAnnotatorBase: public TSyncTransformerBase {
- public:
- using THandler = std::function<TStatus(const TExprNode::TPtr&, TExprNode::TPtr&, TExprContext&)>;
-
- TTypeAnnotatorBase(TTypeAnnotationContextPtr typeAnnotationContext)
- {
- OriginalTransformer_.reset(CreateExtCallableTypeAnnotationTransformer(*typeAnnotationContext).Release());
- }
-
- TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final {
- if (input->Type() == TExprNode::Callable) {
- if (auto handler = Handlers_.FindPtr(input->Content())) {
- return (*handler)(input, output, ctx);
- }
- }
-
- auto status = OriginalTransformer_->Transform(input, output, ctx);
-
- YQL_ENSURE(status.Level != IGraphTransformer::TStatus::Async, "Async type check is not supported");
-
- return status;
- }
-
- void Rewind() final {
- OriginalTransformer_->Rewind();
- }
-
- protected:
- void AddHandler(std::initializer_list<TStringBuf> names, THandler handler) {
- for (auto name: names) {
- YQL_ENSURE(Handlers_.emplace(name, handler).second, "Duplicate handler for " << name);
- }
- }
-
- template <class TDerived>
- THandler Hndl(TStatus(TDerived::* handler)(const TExprNode::TPtr&, TExprNode::TPtr&, TExprContext&)) {
- return [this, handler] (TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) {
- return (static_cast<TDerived*>(this)->*handler)(input, output, ctx);
- };
- }
-
- template <class TDerived>
- THandler Hndl(TStatus(TDerived::* handler)(const TExprNode::TPtr&, TExprContext&)) {
- return [this, handler] (TExprNode::TPtr input, TExprNode::TPtr& /*output*/, TExprContext& ctx) {
- return (static_cast<TDerived*>(this)->*handler)(input, ctx);
- };
- }
-
- private:
- std::shared_ptr<IGraphTransformer> OriginalTransformer_;
- THashMap<TStringBuf, THandler> Handlers_;
- };
-
- class TTypeAnnotator : public TTypeAnnotatorBase {
- private:
- TTypeAnnotationContextPtr TypeAnnotationContext_;
- const TVector<const TStructExprType*>& InputStructs_;
- TVector<const TStructExprType*>& RawInputTypes_;
- EProcessorMode ProcessorMode_;
- TString InputNodeName_;
-
- public:
- TTypeAnnotator(
- TTypeAnnotationContextPtr typeAnnotationContext,
- const TVector<const TStructExprType*>& inputStructs,
- TVector<const TStructExprType*>& rawInputTypes,
- EProcessorMode processorMode,
- TString nodeName
- )
- : TTypeAnnotatorBase(typeAnnotationContext)
- , TypeAnnotationContext_(typeAnnotationContext)
- , InputStructs_(inputStructs)
- , RawInputTypes_(rawInputTypes)
- , ProcessorMode_(processorMode)
- , InputNodeName_(std::move(nodeName))
- {
- AddHandler({InputNodeName_}, Hndl(&TTypeAnnotator::HandleInputNode));
- AddHandler({NNodes::TCoTableName::CallableName()}, Hndl(&TTypeAnnotator::HandleTableName));
- AddHandler({NNodes::TCoTablePath::CallableName()}, Hndl(&TTypeAnnotator::HandleTablePath));
- AddHandler({NNodes::TCoHoppingTraits::CallableName()}, Hndl(&TTypeAnnotator::HandleHoppingTraits));
- }
-
- TTypeAnnotator(TTypeAnnotationContextPtr, TVector<const TStructExprType*>&&, EProcessorMode, TString) = delete;
-
- private:
- TStatus HandleInputNode(const TExprNode::TPtr& input, TExprContext& ctx) {
- ui32 inputIndex;
- if (!TryFetchInputIndexFromSelf(*input, ctx, InputStructs_.size(), inputIndex)) {
- return IGraphTransformer::TStatus::Error;
- }
-
- YQL_ENSURE(inputIndex < InputStructs_.size());
-
- auto itemType = InputStructs_[inputIndex];
-
- // XXX: Tweak the input expression type, if the spec supports blocks:
- // 1. Add "_yql_block_length" attribute for internal usage.
- // 2. Add block container to wrap the actual item type.
- if (input->IsCallable(PurecalcBlockInputCallableName)) {
- itemType = WrapBlockStruct(itemType, ctx);
- }
-
- RawInputTypes_[inputIndex] = itemType;
-
- TColumnOrder columnOrder;
- for (const auto& i : itemType->GetItems()) {
- columnOrder.AddColumn(TString(i->GetName()));
- }
-
- if (ProcessorMode_ != EProcessorMode::PullList) {
- input->SetTypeAnn(ctx.MakeType<TStreamExprType>(itemType));
- } else {
- input->SetTypeAnn(ctx.MakeType<TListExprType>(itemType));
- }
-
- TypeAnnotationContext_->SetColumnOrder(*input, columnOrder, ctx);
- return TStatus::Ok;
- }
-
- TStatus HandleTableName(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) {
- if (!EnsureMinMaxArgsCount(*input, 1, 2, ctx)) {
- return TStatus::Error;
- }
-
- if (input->ChildrenSize() > 1) {
- if (!EnsureAtom(input->Tail(), ctx)) {
- return TStatus::Error;
- }
-
- if (input->Tail().Content() != PurecalcDefaultService) {
- ctx.AddError(
- TIssue(
- ctx.GetPosition(input->Tail().Pos()),
- TStringBuilder() << "Unsupported system: " << input->Tail().Content()));
- return TStatus::Error;
- }
- }
-
- if (input->Head().IsCallable(NNodes::TCoDependsOn::CallableName())) {
- if (!EnsureArgsCount(input->Head(), 1, ctx)) {
- return TStatus::Error;
- }
-
- if (!TryBuildTableNameNode(input->Pos(), input->Head().HeadPtr(), output, ctx)) {
- return TStatus::Error;
- }
- } else {
- if (!EnsureSpecificDataType(input->Head(), EDataSlot::String, ctx)) {
- return TStatus::Error;
- }
- output = input->HeadPtr();
- }
-
- return TStatus::Repeat;
- }
-
- TStatus HandleTablePath(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) {
- if (!EnsureArgsCount(*input, 1, ctx)) {
- return TStatus::Error;
- }
-
- if (!EnsureDependsOn(input->Head(), ctx)) {
- return TStatus::Error;
- }
-
- if (!EnsureArgsCount(input->Head(), 1, ctx)) {
- return TStatus::Error;
- }
-
- if (!TryBuildTableNameNode(input->Pos(), input->Head().HeadPtr(), output, ctx)) {
- return TStatus::Error;
- }
-
- return TStatus::Repeat;
- }
-
- TStatus HandleHoppingTraits(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) {
- Y_UNUSED(output);
- if (input->ChildrenSize() == 1) {
- auto children = input->ChildrenList();
- auto falseArg = ctx.Builder(input->Pos())
- .Atom("false")
- .Seal()
- .Build();
- children.emplace_back(falseArg);
- input->ChangeChildrenInplace(std::move(children));
- return TStatus::Repeat;
- }
-
- return TStatus::Ok;
- }
-
- private:
- bool TryBuildTableNameNode(
- TPositionHandle position, const TExprNode::TPtr& row, TExprNode::TPtr& result, TExprContext& ctx)
- {
- if (!EnsureStructType(*row, ctx)) {
- return false;
- }
-
- const auto* structType = row->GetTypeAnn()->Cast<TStructExprType>();
-
- if (auto pos = structType->FindItem(PurecalcSysColumnTablePath)) {
- if (!EnsureSpecificDataType(row->Pos(), *structType->GetItems()[*pos]->GetItemType(), EDataSlot::String, ctx)) {
- return false;
- }
-
- result = ctx.Builder(position)
- .Callable(NNodes::TCoMember::CallableName())
- .Add(0, row)
- .Atom(1, PurecalcSysColumnTablePath)
- .Seal()
- .Build();
- } else {
- result = ctx.Builder(position)
- .Callable(NNodes::TCoString::CallableName())
- .Atom(0, "")
- .Seal()
- .Build();
- }
-
- return true;
- }
- };
-}
-
-TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeTypeAnnotationTransformer(
- TTypeAnnotationContextPtr typeAnnotationContext,
- const TVector<const TStructExprType*>& inputStructs,
- TVector<const TStructExprType*>& rawInputTypes,
- EProcessorMode processorMode,
- const TString& nodeName
-) {
- return new TTypeAnnotator(typeAnnotationContext, inputStructs, rawInputTypes, processorMode, nodeName);
-}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/type_annotation.h b/ydb/library/yql/public/purecalc/common/transformations/type_annotation.h
deleted file mode 100644
index 4926617546a..00000000000
--- a/ydb/library/yql/public/purecalc/common/transformations/type_annotation.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/names.h>
-#include <ydb/library/yql/public/purecalc/common/processor_mode.h>
-
-#include <yql/essentials/core/yql_graph_transformer.h>
-#include <yql/essentials/core/yql_type_annotation.h>
-
-namespace NYql {
- namespace NPureCalc {
- /**
- * Build type annotation transformer that is aware of type of the input rows.
- *
- * @param typeAnnotationContext current context.
- * @param inputStructs types of each input.
- * @param rawInputStructs container to store the resulting input item type.
- * @param processorMode current processor mode. This will affect generated input type,
- * e.g. list node or struct node.
- * @param nodeName name of the callable used to get input data, e.g. `Self`.
- * @return a graph transformer for type annotation.
- */
- TAutoPtr<IGraphTransformer> MakeTypeAnnotationTransformer(
- TTypeAnnotationContextPtr typeAnnotationContext,
- const TVector<const TStructExprType*>& inputStructs,
- TVector<const TStructExprType*>& rawInputStructs,
- EProcessorMode processorMode,
- const TString& nodeName = TString{PurecalcInputCallableName}
- );
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/utils.cpp b/ydb/library/yql/public/purecalc/common/transformations/utils.cpp
deleted file mode 100644
index f57f6cb801a..00000000000
--- a/ydb/library/yql/public/purecalc/common/transformations/utils.cpp
+++ /dev/null
@@ -1,179 +0,0 @@
-#include "utils.h"
-
-#include <ydb/library/yql/public/purecalc/common/names.h>
-#include <yql/essentials/core/yql_expr_type_annotation.h>
-
-using namespace NYql;
-using namespace NYql::NPureCalc;
-
-TExprNode::TPtr NYql::NPureCalc::NodeFromBlocks(
- const TPositionHandle& pos,
- const TStructExprType* structType,
- TExprContext& ctx
-) {
- const auto items = structType->GetItems();
- Y_ENSURE(items.size() > 0);
- return ctx.Builder(pos)
- .Lambda()
- .Param("stream")
- .Callable(0, "FromFlow")
- .Callable(0, "NarrowMap")
- .Callable(0, "WideFromBlocks")
- .Callable(0, "ExpandMap")
- .Callable(0, "ToFlow")
- .Arg(0, "stream")
- .Seal()
- .Lambda(1)
- .Param("item")
- .Do([&](TExprNodeBuilder& lambda) -> TExprNodeBuilder& {
- ui32 i = 0;
- for (const auto& item : items) {
- lambda.Callable(i++, "Member")
- .Arg(0, "item")
- .Atom(1, item->GetName())
- .Seal();
- }
- lambda.Callable(i, "Member")
- .Arg(0, "item")
- .Atom(1, PurecalcBlockColumnLength)
- .Seal();
- return lambda;
- })
- .Seal()
- .Seal()
- .Seal()
- .Lambda(1)
- .Params("fields", items.size())
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 i = 0;
- for (const auto& item : items) {
- parent.List(i)
- .Atom(0, item->GetName())
- .Arg(1, "fields", i++)
- .Seal();
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Build();
-}
-
-TExprNode::TPtr NYql::NPureCalc::NodeToBlocks(
- const TPositionHandle& pos,
- const TStructExprType* structType,
- TExprContext& ctx
-) {
- const auto items = structType->GetItems();
- Y_ENSURE(items.size() > 0);
- return ctx.Builder(pos)
- .Lambda()
- .Param("stream")
- .Callable("FromFlow")
- .Callable(0, "NarrowMap")
- .Callable(0, "WideToBlocks")
- .Callable(0, "ExpandMap")
- .Callable(0, "ToFlow")
- .Arg(0, "stream")
- .Seal()
- .Lambda(1)
- .Param("item")
- .Do([&](TExprNodeBuilder& lambda) -> TExprNodeBuilder& {
- ui32 i = 0;
- for (const auto& item : items) {
- lambda.Callable(i++, "Member")
- .Arg(0, "item")
- .Atom(1, item->GetName())
- .Seal();
- }
- return lambda;
- })
- .Seal()
- .Seal()
- .Seal()
- .Lambda(1)
- .Params("fields", items.size() + 1)
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 i = 0;
- for (const auto& item : items) {
- parent.List(i)
- .Atom(0, item->GetName())
- .Arg(1, "fields", i++)
- .Seal();
- }
- parent.List(i)
- .Atom(0, PurecalcBlockColumnLength)
- .Arg(1, "fields", i)
- .Seal();
- return parent;
- })
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Build();
-}
-
-TExprNode::TPtr NYql::NPureCalc::ApplyToIterable(
- const TPositionHandle& pos,
- const TExprNode::TPtr iterable,
- const TExprNode::TPtr lambda,
- bool wrapLMap,
- TExprContext& ctx
-) {
- if (wrapLMap) {
- return ctx.Builder(pos)
- .Callable("LMap")
- .Add(0, iterable)
- .Lambda(1)
- .Param("stream")
- .Apply(lambda)
- .With(0, "stream")
- .Seal()
- .Seal()
- .Seal()
- .Build();
- } else {
- return ctx.Builder(pos)
- .Apply(lambda)
- .With(0, iterable)
- .Seal()
- .Build();
- }
-}
-
-const TStructExprType* NYql::NPureCalc::WrapBlockStruct(
- const TStructExprType* structType,
- TExprContext& ctx
-) {
- TVector<const TItemExprType*> members;
- for (const auto& item : structType->GetItems()) {
- const auto blockItemType = ctx.MakeType<TBlockExprType>(item->GetItemType());
- members.push_back(ctx.MakeType<TItemExprType>(item->GetName(), blockItemType));
- }
- const auto scalarItemType = ctx.MakeType<TScalarExprType>(ctx.MakeType<TDataExprType>(EDataSlot::Uint64));
- members.push_back(ctx.MakeType<TItemExprType>(PurecalcBlockColumnLength, scalarItemType));
- return ctx.MakeType<TStructExprType>(members);
-}
-
-const TStructExprType* NYql::NPureCalc::UnwrapBlockStruct(
- const TStructExprType* structType,
- TExprContext& ctx
-) {
- TVector<const TItemExprType*> members;
- for (const auto& item : structType->GetItems()) {
- if (item->GetName() == PurecalcBlockColumnLength) {
- continue;
- }
- bool isScalarUnused;
- const auto blockItemType = GetBlockItemType(*item->GetItemType(), isScalarUnused);
- members.push_back(ctx.MakeType<TItemExprType>(item->GetName(), blockItemType));
- }
- return ctx.MakeType<TStructExprType>(members);
-}
diff --git a/ydb/library/yql/public/purecalc/common/transformations/utils.h b/ydb/library/yql/public/purecalc/common/transformations/utils.h
deleted file mode 100644
index cc8849b7e3a..00000000000
--- a/ydb/library/yql/public/purecalc/common/transformations/utils.h
+++ /dev/null
@@ -1,83 +0,0 @@
-#pragma once
-
-#include <yql/essentials/core/yql_graph_transformer.h>
-
-namespace NYql {
- namespace NPureCalc {
- /**
- * A transformer which wraps the given input node with the pipeline
- * converting the input type to the block one.
- *
- * @param pos the position of the given node to be rewritten.
- * @param structType the item type of the container provided by the node.
- * @param ctx the context to make ExprNode rewrites.
- * @return the resulting ExprNode.
- */
- TExprNode::TPtr NodeFromBlocks(
- const TPositionHandle& pos,
- const TStructExprType* structType,
- TExprContext& ctx
- );
-
- /**
- * A transformer which wraps the given root node with the pipeline
- * converting the output type to the block one.
- *
- * @param pos the position of the given node to be rewritten.
- * @param structType the item type of the container provided by the node.
- * @param ctx the context to make ExprNode rewrites.
- * @return the resulting ExprNode.
- */
- TExprNode::TPtr NodeToBlocks(
- const TPositionHandle& pos,
- const TStructExprType* structType,
- TExprContext& ctx
- );
-
- /**
- * A transformer to apply the given lambda to the given iterable (either
- * list or stream). If the iterable is list, the lambda should be passed
- * to the <LMap> callable; if the iterable is stream, the lambda should
- * be applied right to the iterable.
- *
- * @param pos the position of the given node to be rewritten.
- * @param iterable the node, that provides the iterable to be processed.
- * @param lambda the node, that provides lambda to be applied.
- * @param wrapLMap indicator to wrap the result with LMap callable.
- * @oaram ctx the context to make ExprNode rewrites.
- */
- TExprNode::TPtr ApplyToIterable(
- const TPositionHandle& pos,
- const TExprNode::TPtr iterable,
- const TExprNode::TPtr lambda,
- bool wrapLMap,
- TExprContext& ctx
- );
-
- /**
- * A helper which wraps the items of the given struct with the block
- * type container and appends the new item for _yql_block_length column.
- *
- * @param structType original struct to be wrapped.
- * @param ctx the context to make ExprType rewrite.
- * @return the new struct with block items.
- */
- const TStructExprType* WrapBlockStruct(
- const TStructExprType* structType,
- TExprContext& ctx
- );
-
- /**
- * A helper which unwraps the block container from the items of the
- * given struct and removes the item for _yql_block_length column.
- *
- * @param structType original struct to be unwrapped.
- * @param ctx the context to make ExprType rewrite.
- * @return the new struct without block items.
- */
- const TStructExprType* UnwrapBlockStruct(
- const TStructExprType* structType,
- TExprContext& ctx
- );
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/type_from_schema.cpp b/ydb/library/yql/public/purecalc/common/type_from_schema.cpp
deleted file mode 100644
index 373283a1a8e..00000000000
--- a/ydb/library/yql/public/purecalc/common/type_from_schema.cpp
+++ /dev/null
@@ -1,255 +0,0 @@
-#include "type_from_schema.h"
-
-#include <library/cpp/yson/node/node_io.h>
-
-#include <yql/essentials/core/yql_expr_type_annotation.h>
-#include <yql/essentials/providers/common/schema/expr/yql_expr_schema.h>
-
-namespace {
- using namespace NYql;
-
-#define REPORT(...) ctx.AddError(TIssue(TString(TStringBuilder() << __VA_ARGS__)))
-
- bool CheckStruct(const TStructExprType* got, const TStructExprType* expected, TExprContext& ctx) {
- auto status = true;
-
- if (expected) {
- for (const auto* gotNamedItem : got->GetItems()) {
- auto expectedIndex = expected->FindItem(gotNamedItem->GetName());
- if (expectedIndex) {
- const auto* gotItem = gotNamedItem->GetItemType();
- const auto* expectedItem = expected->GetItems()[*expectedIndex]->GetItemType();
-
- auto arg = ctx.NewArgument(TPositionHandle(), "arg");
- auto fieldConversionStatus = TrySilentConvertTo(arg, *gotItem, *expectedItem, ctx);
- if (fieldConversionStatus.Level == IGraphTransformer::TStatus::Error) {
- REPORT("Item " << TString{gotNamedItem->GetName()}.Quote() << " expected to be " <<
- *expectedItem << ", but got " << *gotItem);
- status = false;
- }
- } else {
- REPORT("Got unexpected item " << TString{gotNamedItem->GetName()}.Quote());
- status = false;
- }
- }
-
- for (const auto* expectedNamedItem : expected->GetItems()) {
- if (expectedNamedItem->GetItemType()->GetKind() == ETypeAnnotationKind::Optional) {
- continue;
- }
- if (!got->FindItem(expectedNamedItem->GetName())) {
- REPORT("Expected item " << TString{expectedNamedItem->GetName()}.Quote());
- status = false;
- }
- }
- }
-
- return status;
- }
-
- bool CheckVariantContent(const TStructExprType* got, const TStructExprType* expected, TExprContext& ctx) {
- auto status = true;
-
- if (expected) {
- for (const auto* gotNamedItem : got->GetItems()) {
- if (!expected->FindItem(gotNamedItem->GetName())) {
- REPORT("Got unexpected alternative " << TString{gotNamedItem->GetName()}.Quote());
- status = false;
- }
- }
-
- for (const auto* expectedNamedItem : expected->GetItems()) {
- if (!got->FindItem(expectedNamedItem->GetName())) {
- REPORT("Expected alternative " << TString{expectedNamedItem->GetName()}.Quote());
- status = false;
- }
- }
- }
-
- for (const auto* gotNamedItem : got->GetItems()) {
- const auto* gotItem = gotNamedItem->GetItemType();
- auto expectedIndex = expected ? expected->FindItem(gotNamedItem->GetName()) : Nothing();
- const auto* expectedItem = expected && expectedIndex ? expected->GetItems()[*expectedIndex]->GetItemType() : nullptr;
-
- TIssueScopeGuard issueScope(ctx.IssueManager, [&]() {
- return new TIssue(TPosition(), TStringBuilder() << "Alternative " << TString{gotNamedItem->GetName()}.Quote());
- });
-
- if (expectedItem && expectedItem->GetKind() != gotItem->GetKind()) {
- REPORT("Expected to be " << expectedItem->GetKind() << ", but got " << gotItem->GetKind());
- status = false;
- }
-
- if (gotItem->GetKind() != ETypeAnnotationKind::Struct) {
- REPORT("Expected to be Struct, but got " << gotItem->GetKind());
- status = false;
- }
-
- const auto* gotStruct = gotItem->Cast<TStructExprType>();
- const auto* expectedStruct = expectedItem ? expectedItem->Cast<TStructExprType>() : nullptr;
-
- if (!CheckStruct(gotStruct, expectedStruct, ctx)) {
- status = false;
- }
- }
-
- return status;
- }
-
- bool CheckVariantContent(const TTupleExprType* got, const TTupleExprType* expected, TExprContext& ctx) {
- if (expected && expected->GetSize() != got->GetSize()) {
- REPORT("Expected to have " << expected->GetSize() << " alternatives, but got " << got->GetSize());
- return false;
- }
-
- auto status = true;
-
- for (size_t i = 0; i < got->GetSize(); i++) {
- const auto* gotItem = got->GetItems()[i];
- const auto* expectedItem = expected ? expected->GetItems()[i] : nullptr;
-
- TIssueScopeGuard issueScope(ctx.IssueManager, [i]() {
- return new TIssue(TPosition(), TStringBuilder() << "Alternative #" << i);
- });
-
- if (expectedItem && expectedItem->GetKind() != gotItem->GetKind()) {
- REPORT("Expected " << expectedItem->GetKind() << ", but got " << gotItem->GetKind());
- status = false;
- }
-
- if (gotItem->GetKind() != ETypeAnnotationKind::Struct) {
- REPORT("Expected Struct, but got " << gotItem->GetKind());
- status = false;
- }
-
- const auto* gotStruct = gotItem->Cast<TStructExprType>();
- const auto* expectedStruct = expectedItem ? expectedItem->Cast<TStructExprType>() : nullptr;
-
- if (!CheckStruct(gotStruct, expectedStruct, ctx)) {
- status = false;
- }
- }
-
- return status;
- }
-
- bool CheckVariant(const TVariantExprType* got, const TVariantExprType* expected, TExprContext& ctx) {
- if (expected && expected->GetUnderlyingType()->GetKind() != got->GetUnderlyingType()->GetKind()) {
- REPORT("Expected Variant over " << expected->GetUnderlyingType()->GetKind() <<
- ", but got Variant over " << got->GetUnderlyingType()->GetKind());
- return false;
- }
-
- switch (got->GetUnderlyingType()->GetKind()) {
- case ETypeAnnotationKind::Struct:
- {
- const auto* gotStruct = got->GetUnderlyingType()->Cast<TStructExprType>();
- const auto* expectedStruct = expected ? expected->GetUnderlyingType()->Cast<TStructExprType>() : nullptr;
- return CheckVariantContent(gotStruct, expectedStruct, ctx);
- }
- case ETypeAnnotationKind::Tuple:
- {
- const auto* gotTuple = got->GetUnderlyingType()->Cast<TTupleExprType>();
- const auto* expectedTuple = expected ? expected->GetUnderlyingType()->Cast<TTupleExprType>() : nullptr;
- return CheckVariantContent(gotTuple, expectedTuple, ctx);
- }
- default:
- Y_UNREACHABLE();
- }
-
- return false;
- }
-
- bool CheckSchema(const TTypeAnnotationNode* got, const TTypeAnnotationNode* expected, TExprContext& ctx, bool allowVariant) {
- if (expected && expected->GetKind() != got->GetKind()) {
- REPORT("Expected " << expected->GetKind() << ", but got " << got->GetKind());
- return false;
- }
-
- switch (got->GetKind()) {
- case ETypeAnnotationKind::Struct:
- {
- TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Toplevel struct"); });
-
- const auto* gotStruct = got->Cast<TStructExprType>();
- const auto* expectedStruct = expected ? expected->Cast<TStructExprType>() : nullptr;
-
- if (!gotStruct->Validate(TPositionHandle(), ctx)) {
- return false;
- }
-
- return CheckStruct(gotStruct, expectedStruct, ctx);
- }
- case ETypeAnnotationKind::Variant:
- if (allowVariant) {
- TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Toplevel variant"); });
-
- const auto* gotVariant = got->Cast<TVariantExprType>();
- const auto* expectedVariant = expected ? expected->Cast<TVariantExprType>() : nullptr;
-
- if (!gotVariant->Validate(TPositionHandle(), ctx)) {
- return false;
- }
-
- return CheckVariant(gotVariant, expectedVariant, ctx);
- }
- [[fallthrough]];
- default:
- if (allowVariant) {
- REPORT("Expected Struct or Variant, but got " << got->GetKind());
- } else {
- REPORT("Expected Struct, but got " << got->GetKind());
- }
- return false;
- }
- }
-}
-
-namespace NYql::NPureCalc {
- const TTypeAnnotationNode* MakeTypeFromSchema(const NYT::TNode& yson, TExprContext& ctx) {
- const auto* type = NCommon::ParseTypeFromYson(yson, ctx);
-
- if (!type) {
- ythrow TCompileError("", ctx.IssueManager.GetIssues().ToString())
- << "Incorrect schema: " << NYT::NodeToYsonString(yson, NYson::EYsonFormat::Text);
- }
-
- return type;
- }
-
- const TStructExprType* ExtendStructType(
- const TStructExprType* type, const THashMap<TString, NYT::TNode>& extraColumns, TExprContext& ctx)
- {
- if (extraColumns.empty()) {
- return type;
- }
-
- auto items = type->GetItems();
- for (const auto& pair : extraColumns) {
- items.push_back(ctx.MakeType<TItemExprType>(pair.first, MakeTypeFromSchema(pair.second, ctx)));
- }
-
- auto result = ctx.MakeType<TStructExprType>(items);
-
- if (!result->Validate(TPosition(), ctx)) {
- ythrow TCompileError("", ctx.IssueManager.GetIssues().ToString()) << "Incorrect extended struct type";
- }
-
- return result;
- }
-
- bool ValidateInputSchema(const TTypeAnnotationNode* type, TExprContext& ctx) {
- TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Input schema"); });
- return CheckSchema(type, nullptr, ctx, false);
- }
-
- bool ValidateOutputSchema(const TTypeAnnotationNode* type, TExprContext& ctx) {
- TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Output schema"); });
- return CheckSchema(type, nullptr, ctx, true);
- }
-
- bool ValidateOutputType(const TTypeAnnotationNode* type, const TTypeAnnotationNode* expected, TExprContext& ctx) {
- TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Program return type"); });
- return CheckSchema(type, expected, ctx, true);
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/type_from_schema.h b/ydb/library/yql/public/purecalc/common/type_from_schema.h
deleted file mode 100644
index cb57e38168b..00000000000
--- a/ydb/library/yql/public/purecalc/common/type_from_schema.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/interface.h>
-
-#include <yql/essentials/ast/yql_expr.h>
-
-#include <library/cpp/yson/node/node.h>
-
-namespace NYql {
- namespace NPureCalc {
- /**
- * Load struct type from yson. Use methods below to check returned type for correctness.
- */
- const TTypeAnnotationNode* MakeTypeFromSchema(const NYT::TNode&, TExprContext&);
-
- /**
- * Extend struct type with additional columns. Type of each extra column is loaded from yson.
- */
- const TStructExprType* ExtendStructType(const TStructExprType*, const THashMap<TString, NYT::TNode>&, TExprContext&);
-
- /**
- * Check if the given type can be used as an input schema, i.e. it is a struct.
- */
- bool ValidateInputSchema(const TTypeAnnotationNode* type, TExprContext& ctx);
-
- /**
- * Check if the given type can be used as an output schema, i.e. it is a struct or a variant of structs.
- */
- bool ValidateOutputSchema(const TTypeAnnotationNode* type, TExprContext& ctx);
-
- /**
- * Check if output type can be silently converted to the expected type.
- */
- bool ValidateOutputType(const TTypeAnnotationNode* type, const TTypeAnnotationNode* expected, TExprContext& ctx);
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/worker.cpp b/ydb/library/yql/public/purecalc/common/worker.cpp
deleted file mode 100644
index 41d54a08728..00000000000
--- a/ydb/library/yql/public/purecalc/common/worker.cpp
+++ /dev/null
@@ -1,613 +0,0 @@
-#include "worker.h"
-#include "compile_mkql.h"
-
-#include <yql/essentials/ast/yql_expr.h>
-#include <yql/essentials/core/yql_user_data.h>
-#include <yql/essentials/core/yql_user_data_storage.h>
-#include <yql/essentials/providers/common/comp_nodes/yql_factory.h>
-#include <ydb/library/yql/public/purecalc/common/names.h>
-#include <yql/essentials/minikql/mkql_function_registry.h>
-#include <yql/essentials/minikql/mkql_node.h>
-#include <yql/essentials/minikql/mkql_node_builder.h>
-#include <yql/essentials/minikql/mkql_node_cast.h>
-#include <yql/essentials/minikql/mkql_node_visitor.h>
-#include <yql/essentials/minikql/mkql_node_serialization.h>
-#include <yql/essentials/minikql/mkql_program_builder.h>
-#include <yql/essentials/minikql/comp_nodes/mkql_factories.h>
-#include <yql/essentials/minikql/computation/mkql_computation_node.h>
-#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
-#include <yql/essentials/minikql/computation/mkql_computation_node_impl.h>
-#include <yql/essentials/providers/common/mkql/yql_provider_mkql.h>
-#include <yql/essentials/providers/common/mkql/yql_type_mkql.h>
-
-#include <library/cpp/random_provider/random_provider.h>
-#include <library/cpp/time_provider/time_provider.h>
-
-#include <util/stream/file.h>
-#include <yql/essentials/minikql/computation/mkql_custom_list.h>
-#include <yql/essentials/parser/pg_wrapper/interface/comp_factory.h>
-
-using namespace NYql;
-using namespace NYql::NPureCalc;
-
-TWorkerGraph::TWorkerGraph(
- const TExprNode::TPtr& exprRoot,
- TExprContext& exprCtx,
- const TString& serializedProgram,
- const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry,
- const TUserDataTable& userData,
- const TVector<const TStructExprType*>& inputTypes,
- const TVector<const TStructExprType*>& originalInputTypes,
- const TVector<const TStructExprType*>& rawInputTypes,
- const TTypeAnnotationNode* outputType,
- const TTypeAnnotationNode* rawOutputType,
- const TString& LLVMSettings,
- NKikimr::NUdf::ICountersProvider* countersProvider,
- ui64 nativeYtTypeFlags,
- TMaybe<ui64> deterministicTimeProviderSeed
-)
- : ScopedAlloc_(__LOCATION__, NKikimr::TAlignedPagePoolCounters(), funcRegistry.SupportsSizedAllocators())
- , Env_(ScopedAlloc_)
- , FuncRegistry_(funcRegistry)
- , RandomProvider_(CreateDefaultRandomProvider())
- , TimeProvider_(deterministicTimeProviderSeed ?
- CreateDeterministicTimeProvider(*deterministicTimeProviderSeed) :
- CreateDefaultTimeProvider())
- , LLVMSettings_(LLVMSettings)
- , NativeYtTypeFlags_(nativeYtTypeFlags)
-{
- // Build the root MKQL node
-
- NKikimr::NMiniKQL::TRuntimeNode rootNode;
- if (exprRoot) {
- rootNode = CompileMkql(exprRoot, exprCtx, FuncRegistry_, Env_, userData);
- } else {
- rootNode = NKikimr::NMiniKQL::DeserializeRuntimeNode(serializedProgram, Env_);
- }
-
- // Prepare container for input nodes
-
- const ui32 inputsCount = inputTypes.size();
-
- YQL_ENSURE(inputTypes.size() == originalInputTypes.size());
-
- SelfNodes_.resize(inputsCount, nullptr);
-
- YQL_ENSURE(SelfNodes_.size() == inputsCount);
-
- // Setup struct types
-
- NKikimr::NMiniKQL::TProgramBuilder pgmBuilder(Env_, FuncRegistry_);
- for (ui32 i = 0; i < inputsCount; ++i) {
- const auto* type = static_cast<NKikimr::NMiniKQL::TStructType*>(NCommon::BuildType(TPositionHandle(), *inputTypes[i], pgmBuilder));
- const auto* originalType = type;
- const auto* rawType = static_cast<NKikimr::NMiniKQL::TStructType*>(NCommon::BuildType(TPositionHandle(), *rawInputTypes[i], pgmBuilder));
- if (inputTypes[i] != originalInputTypes[i]) {
- YQL_ENSURE(inputTypes[i]->GetSize() >= originalInputTypes[i]->GetSize());
- originalType = static_cast<NKikimr::NMiniKQL::TStructType*>(NCommon::BuildType(TPositionHandle(), *originalInputTypes[i], pgmBuilder));
- }
-
- InputTypes_.push_back(type);
- OriginalInputTypes_.push_back(originalType);
- RawInputTypes_.push_back(rawType);
- }
-
- if (outputType) {
- OutputType_ = NCommon::BuildType(TPositionHandle(), *outputType, pgmBuilder);
- }
- if (rawOutputType) {
- RawOutputType_ = NCommon::BuildType(TPositionHandle(), *rawOutputType, pgmBuilder);
- }
-
- if (!exprRoot) {
- auto outMkqlType = rootNode.GetStaticType();
- if (outMkqlType->GetKind() == NKikimr::NMiniKQL::TType::EKind::List) {
- outMkqlType = static_cast<NKikimr::NMiniKQL::TListType*>(outMkqlType)->GetItemType();
- } else if (outMkqlType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Stream) {
- outMkqlType = static_cast<NKikimr::NMiniKQL::TStreamType*>(outMkqlType)->GetItemType();
- } else {
- ythrow TCompileError("", "") << "unexpected mkql output type " << NKikimr::NMiniKQL::TType::KindAsStr(outMkqlType->GetKind());
- }
- if (OutputType_) {
- if (!OutputType_->IsSameType(*outMkqlType)) {
- ythrow TCompileError("", "") << "precompiled program output type doesn't match the output schema";
- }
- } else {
- OutputType_ = outMkqlType;
- RawOutputType_ = outMkqlType;
- }
- }
-
- // Compile computation pattern
-
- const THashSet<NKikimr::NMiniKQL::TInternName> selfCallableNames = {
- Env_.InternName(PurecalcInputCallableName),
- Env_.InternName(PurecalcBlockInputCallableName)
- };
-
- NKikimr::NMiniKQL::TExploringNodeVisitor explorer;
- explorer.Walk(rootNode.GetNode(), Env_);
-
- auto compositeNodeFactory = NKikimr::NMiniKQL::GetCompositeWithBuiltinFactory(
- {NKikimr::NMiniKQL::GetYqlFactory(), NYql::GetPgFactory()}
- );
-
- auto nodeFactory = [&](
- NKikimr::NMiniKQL::TCallable& callable, const NKikimr::NMiniKQL::TComputationNodeFactoryContext& ctx
- ) -> NKikimr::NMiniKQL::IComputationNode* {
- if (selfCallableNames.contains(callable.GetType()->GetNameStr())) {
- YQL_ENSURE(callable.GetInputsCount() == 1, "Self takes exactly 1 argument");
- const auto inputIndex = AS_VALUE(NKikimr::NMiniKQL::TDataLiteral, callable.GetInput(0))->AsValue().Get<ui32>();
- YQL_ENSURE(inputIndex < inputsCount, "Self index is out of range");
- YQL_ENSURE(!SelfNodes_[inputIndex], "Self can be called at most once with each index");
- return SelfNodes_[inputIndex] = new NKikimr::NMiniKQL::TExternalComputationNode(ctx.Mutables);
- }
- else {
- return compositeNodeFactory(callable, ctx);
- }
- };
-
- NKikimr::NMiniKQL::TComputationPatternOpts computationPatternOpts(
- ScopedAlloc_.Ref(),
- Env_,
- nodeFactory,
- &funcRegistry,
- NKikimr::NUdf::EValidateMode::None,
- NKikimr::NUdf::EValidatePolicy::Exception,
- LLVMSettings,
- NKikimr::NMiniKQL::EGraphPerProcess::Multi,
- nullptr,
- countersProvider);
-
- ComputationPattern_ = NKikimr::NMiniKQL::MakeComputationPattern(
- explorer,
- rootNode,
- { rootNode.GetNode() },
- computationPatternOpts);
-
- ComputationGraph_ = ComputationPattern_->Clone(
- computationPatternOpts.ToComputationOptions(*RandomProvider_, *TimeProvider_));
-
- ComputationGraph_->Prepare();
-
- // Scoped alloc acquires itself on construction. We need to release it before returning control to user.
- // Note that scoped alloc releases itself on destruction so it is no problem if the above code throws.
- ScopedAlloc_.Release();
-}
-
-TWorkerGraph::~TWorkerGraph() {
- // Remember, we've released scoped alloc in constructor? Now, we need to acquire it back before destroying.
- ScopedAlloc_.Acquire();
-}
-
-template <typename TBase>
-TWorker<TBase>::TWorker(
- TWorkerFactoryPtr factory,
- const TExprNode::TPtr& exprRoot,
- TExprContext& exprCtx,
- const TString& serializedProgram,
- const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry,
- const TUserDataTable& userData,
- const TVector<const TStructExprType*>& inputTypes,
- const TVector<const TStructExprType*>& originalInputTypes,
- const TVector<const TStructExprType*>& rawInputTypes,
- const TTypeAnnotationNode* outputType,
- const TTypeAnnotationNode* rawOutputType,
- const TString& LLVMSettings,
- NKikimr::NUdf::ICountersProvider* countersProvider,
- ui64 nativeYtTypeFlags,
- TMaybe<ui64> deterministicTimeProviderSeed
-)
- : WorkerFactory_(std::move(factory))
- , Graph_(exprRoot, exprCtx, serializedProgram, funcRegistry, userData,
- inputTypes, originalInputTypes, rawInputTypes, outputType, rawOutputType,
- LLVMSettings, countersProvider, nativeYtTypeFlags, deterministicTimeProviderSeed)
-{
-}
-
-template <typename TBase>
-inline ui32 TWorker<TBase>::GetInputsCount() const {
- return Graph_.InputTypes_.size();
-}
-
-template <typename TBase>
-inline const NKikimr::NMiniKQL::TStructType* TWorker<TBase>::GetInputType(ui32 inputIndex, bool original) const {
- const auto& container = original ? Graph_.OriginalInputTypes_ : Graph_.InputTypes_;
-
- YQL_ENSURE(inputIndex < container.size(), "invalid input index (" << inputIndex << ") in GetInputType call");
-
- return container[inputIndex];
-}
-
-template <typename TBase>
-inline const NKikimr::NMiniKQL::TStructType* TWorker<TBase>::GetInputType(bool original) const {
- const auto& container = original ? Graph_.OriginalInputTypes_ : Graph_.InputTypes_;
-
- YQL_ENSURE(container.size() == 1, "GetInputType() can be used only for single-input programs");
-
- return container[0];
-}
-
-template <typename TBase>
-inline const NKikimr::NMiniKQL::TStructType* TWorker<TBase>::GetRawInputType(ui32 inputIndex) const {
- const auto& container = Graph_.RawInputTypes_;
- YQL_ENSURE(inputIndex < container.size(), "invalid input index (" << inputIndex << ") in GetInputType call");
- return container[inputIndex];
-}
-
-template <typename TBase>
-inline const NKikimr::NMiniKQL::TStructType* TWorker<TBase>::GetRawInputType() const {
- const auto& container = Graph_.RawInputTypes_;
- YQL_ENSURE(container.size() == 1, "GetInputType() can be used only for single-input programs");
- return container[0];
-}
-
-template <typename TBase>
-inline const NKikimr::NMiniKQL::TType* TWorker<TBase>::GetOutputType() const {
- return Graph_.OutputType_;
-}
-
-template <typename TBase>
-inline const NKikimr::NMiniKQL::TType* TWorker<TBase>::GetRawOutputType() const {
- return Graph_.RawOutputType_;
-}
-
-template <typename TBase>
-NYT::TNode TWorker<TBase>::MakeInputSchema(ui32 inputIndex) const {
- auto p = WorkerFactory_.lock();
- YQL_ENSURE(p, "Access to destroyed worker factory");
- return p->MakeInputSchema(inputIndex);
-}
-
-template <typename TBase>
-NYT::TNode TWorker<TBase>::MakeInputSchema() const {
- auto p = WorkerFactory_.lock();
- YQL_ENSURE(p, "Access to destroyed worker factory");
- return p->MakeInputSchema();
-}
-
-template <typename TBase>
-NYT::TNode TWorker<TBase>::MakeOutputSchema() const {
- auto p = WorkerFactory_.lock();
- YQL_ENSURE(p, "Access to destroyed worker factory");
- return p->MakeOutputSchema();
-}
-
-template <typename TBase>
-NYT::TNode TWorker<TBase>::MakeOutputSchema(ui32) const {
- auto p = WorkerFactory_.lock();
- YQL_ENSURE(p, "Access to destroyed worker factory");
- return p->MakeOutputSchema();
-}
-
-template <typename TBase>
-NYT::TNode TWorker<TBase>::MakeOutputSchema(TStringBuf) const {
- auto p = WorkerFactory_.lock();
- YQL_ENSURE(p, "Access to destroyed worker factory");
- return p->MakeOutputSchema();
-}
-
-template <typename TBase>
-NYT::TNode TWorker<TBase>::MakeFullOutputSchema() const {
- auto p = WorkerFactory_.lock();
- YQL_ENSURE(p, "Access to destroyed worker factory");
- return p->MakeFullOutputSchema();
-}
-
-template <typename TBase>
-inline NKikimr::NMiniKQL::TScopedAlloc& TWorker<TBase>::GetScopedAlloc() {
- return Graph_.ScopedAlloc_;
-}
-
-template <typename TBase>
-inline NKikimr::NMiniKQL::IComputationGraph& TWorker<TBase>::GetGraph() {
- return *Graph_.ComputationGraph_;
-}
-
-template <typename TBase>
-inline const NKikimr::NMiniKQL::IFunctionRegistry&
-TWorker<TBase>::GetFunctionRegistry() const {
- return Graph_.FuncRegistry_;
-}
-
-template <typename TBase>
-inline NKikimr::NMiniKQL::TTypeEnvironment&
-TWorker<TBase>::GetTypeEnvironment() {
- return Graph_.Env_;
-}
-
-template <typename TBase>
-inline const TString& TWorker<TBase>::GetLLVMSettings() const {
- return Graph_.LLVMSettings_;
-}
-
-template <typename TBase>
-inline ui64 TWorker<TBase>::GetNativeYtTypeFlags() const {
- return Graph_.NativeYtTypeFlags_;
-}
-
-template <typename TBase>
-ITimeProvider* TWorker<TBase>::GetTimeProvider() const {
- return Graph_.TimeProvider_.Get();
-}
-
-template <typename TBase>
-void TWorker<TBase>::Release() {
- if (auto p = WorkerFactory_.lock()) {
- p->ReturnWorker(this);
- } else {
- delete this;
- }
-}
-
-TPullStreamWorker::~TPullStreamWorker() {
- auto guard = Guard(GetScopedAlloc());
- Output_.Clear();
-}
-
-void TPullStreamWorker::SetInput(NKikimr::NUdf::TUnboxedValue&& value, ui32 inputIndex) {
- const auto inputsCount = Graph_.SelfNodes_.size();
-
- if (Y_UNLIKELY(inputIndex >= inputsCount)) {
- ythrow yexception() << "invalid input index (" << inputIndex << ") in SetInput call";
- }
-
- if (HasInput_.size() < inputsCount) {
- HasInput_.resize(inputsCount, false);
- }
-
- if (Y_UNLIKELY(HasInput_[inputIndex])) {
- ythrow yexception() << "input value for #" << inputIndex << " input is already set";
- }
-
- auto selfNode = Graph_.SelfNodes_[inputIndex];
-
- if (selfNode) {
- YQL_ENSURE(value);
- selfNode->SetValue(Graph_.ComputationGraph_->GetContext(), std::move(value));
- }
-
- HasInput_[inputIndex] = true;
-
- if (CheckAllInputsSet()) {
- Output_ = Graph_.ComputationGraph_->GetValue();
- }
-}
-
-NKikimr::NUdf::TUnboxedValue& TPullStreamWorker::GetOutput() {
- if (Y_UNLIKELY(!CheckAllInputsSet())) {
- ythrow yexception() << "some input values have not been set";
- }
-
- return Output_;
-}
-
-void TPullStreamWorker::Release() {
- with_lock(GetScopedAlloc()) {
- Output_ = NKikimr::NUdf::TUnboxedValue::Invalid();
- for (auto selfNode: Graph_.SelfNodes_) {
- if (selfNode) {
- selfNode->SetValue(Graph_.ComputationGraph_->GetContext(), NKikimr::NUdf::TUnboxedValue::Invalid());
- }
- }
- }
- HasInput_.clear();
- TWorker<IPullStreamWorker>::Release();
-}
-
-TPullListWorker::~TPullListWorker() {
- auto guard = Guard(GetScopedAlloc());
- Output_.Clear();
- OutputIterator_.Clear();
-}
-
-void TPullListWorker::SetInput(NKikimr::NUdf::TUnboxedValue&& value, ui32 inputIndex) {
- const auto inputsCount = Graph_.SelfNodes_.size();
-
- if (Y_UNLIKELY(inputIndex >= inputsCount)) {
- ythrow yexception() << "invalid input index (" << inputIndex << ") in SetInput call";
- }
-
- if (HasInput_.size() < inputsCount) {
- HasInput_.resize(inputsCount, false);
- }
-
- if (Y_UNLIKELY(HasInput_[inputIndex])) {
- ythrow yexception() << "input value for #" << inputIndex << " input is already set";
- }
-
- auto selfNode = Graph_.SelfNodes_[inputIndex];
-
- if (selfNode) {
- YQL_ENSURE(value);
- selfNode->SetValue(Graph_.ComputationGraph_->GetContext(), std::move(value));
- }
-
- HasInput_[inputIndex] = true;
-
- if (CheckAllInputsSet()) {
- Output_ = Graph_.ComputationGraph_->GetValue();
- ResetOutputIterator();
- }
-}
-
-NKikimr::NUdf::TUnboxedValue& TPullListWorker::GetOutput() {
- if (Y_UNLIKELY(!CheckAllInputsSet())) {
- ythrow yexception() << "some input values have not been set";
- }
-
- return Output_;
-}
-
-NKikimr::NUdf::TUnboxedValue& TPullListWorker::GetOutputIterator() {
- if (Y_UNLIKELY(!CheckAllInputsSet())) {
- ythrow yexception() << "some input values have not been set";
- }
-
- return OutputIterator_;
-}
-
-void TPullListWorker::ResetOutputIterator() {
- if (Y_UNLIKELY(!CheckAllInputsSet())) {
- ythrow yexception() << "some input values have not been set";
- }
-
- OutputIterator_ = Output_.GetListIterator();
-}
-
-void TPullListWorker::Release() {
- with_lock(GetScopedAlloc()) {
- Output_ = NKikimr::NUdf::TUnboxedValue::Invalid();
- OutputIterator_ = NKikimr::NUdf::TUnboxedValue::Invalid();
-
- for (auto selfNode: Graph_.SelfNodes_) {
- if (selfNode) {
- selfNode->SetValue(Graph_.ComputationGraph_->GetContext(), NKikimr::NUdf::TUnboxedValue::Invalid());
- }
- }
- }
- HasInput_.clear();
- TWorker<IPullListWorker>::Release();
-}
-
-namespace {
- class TPushStream final: public NKikimr::NMiniKQL::TCustomListValue {
- private:
- mutable bool HasIterator_ = false;
- bool HasValue_ = false;
- bool IsFinished_ = false;
- NKikimr::NUdf::TUnboxedValue Value_ = NKikimr::NUdf::TUnboxedValue::Invalid();
-
- public:
- using TCustomListValue::TCustomListValue;
-
- public:
- void SetValue(NKikimr::NUdf::TUnboxedValue&& value) {
- Value_ = std::move(value);
- HasValue_ = true;
- }
-
- void SetFinished() {
- IsFinished_ = true;
- }
-
- NKikimr::NUdf::TUnboxedValue GetListIterator() const override {
- YQL_ENSURE(!HasIterator_, "only one pass over input is supported");
- HasIterator_ = true;
- return NKikimr::NUdf::TUnboxedValuePod(const_cast<TPushStream*>(this));
- }
-
- NKikimr::NUdf::EFetchStatus Fetch(NKikimr::NUdf::TUnboxedValue& result) override {
- if (IsFinished_) {
- return NKikimr::NUdf::EFetchStatus::Finish;
- } else if (!HasValue_) {
- return NKikimr::NUdf::EFetchStatus::Yield;
- } else {
- result = std::move(Value_);
- HasValue_ = false;
- return NKikimr::NUdf::EFetchStatus::Ok;
- }
- }
- };
-}
-
-void TPushStreamWorker::FeedToConsumer() {
- auto value = Graph_.ComputationGraph_->GetValue();
-
- for (;;) {
- NKikimr::NUdf::TUnboxedValue item;
- auto status = value.Fetch(item);
-
- if (status != NKikimr::NUdf::EFetchStatus::Ok) {
- break;
- }
-
- Consumer_->OnObject(&item);
- }
-}
-
-NYql::NUdf::IBoxedValue* TPushStreamWorker::GetPushStream() const {
- auto& ctx = Graph_.ComputationGraph_->GetContext();
- NUdf::TUnboxedValue pushStream = SelfNode_->GetValue(ctx);
-
- if (Y_UNLIKELY(pushStream.IsInvalid())) {
- SelfNode_->SetValue(ctx, Graph_.ComputationGraph_->GetHolderFactory().Create<TPushStream>());
- pushStream = SelfNode_->GetValue(ctx);
- }
-
- return pushStream.AsBoxed().Get();
-}
-
-void TPushStreamWorker::SetConsumer(THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>> consumer) {
- auto guard = Guard(GetScopedAlloc());
- const auto inputsCount = Graph_.SelfNodes_.size();
-
- YQL_ENSURE(inputsCount < 2, "push stream mode doesn't support several inputs");
- YQL_ENSURE(!Consumer_, "consumer is already set");
-
- Consumer_ = std::move(consumer);
-
- if (inputsCount == 1) {
- SelfNode_ = Graph_.SelfNodes_[0];
- }
-
- if (SelfNode_) {
- SelfNode_->SetValue(
- Graph_.ComputationGraph_->GetContext(),
- Graph_.ComputationGraph_->GetHolderFactory().Create<TPushStream>());
- }
-
- FeedToConsumer();
-}
-
-void TPushStreamWorker::Push(NKikimr::NUdf::TUnboxedValue&& value) {
- YQL_ENSURE(Consumer_, "consumer is not set");
- YQL_ENSURE(!Finished_, "OnFinish has already been sent to the consumer; no new values can be pushed");
-
- if (Y_LIKELY(SelfNode_)) {
- static_cast<TPushStream*>(GetPushStream())->SetValue(std::move(value));
- }
-
- FeedToConsumer();
-}
-
-void TPushStreamWorker::OnFinish() {
- YQL_ENSURE(Consumer_, "consumer is not set");
- YQL_ENSURE(!Finished_, "already finished");
-
- if (Y_LIKELY(SelfNode_)) {
- static_cast<TPushStream*>(GetPushStream())->SetFinished();
- }
-
- FeedToConsumer();
-
- Consumer_->OnFinish();
-
- Finished_ = true;
-}
-
-void TPushStreamWorker::Release() {
- with_lock(GetScopedAlloc()) {
- Consumer_.Destroy();
- if (SelfNode_) {
- SelfNode_->SetValue(Graph_.ComputationGraph_->GetContext(), NKikimr::NUdf::TUnboxedValue::Invalid());
- }
- SelfNode_ = nullptr;
- }
- Finished_ = false;
- TWorker<IPushStreamWorker>::Release();
-}
-
-
-namespace NYql {
- namespace NPureCalc {
- template
- class TWorker<IPullStreamWorker>;
-
- template
- class TWorker<IPullListWorker>;
-
- template
- class TWorker<IPushStreamWorker>;
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/worker.h b/ydb/library/yql/public/purecalc/common/worker.h
deleted file mode 100644
index a15e2f4a0f5..00000000000
--- a/ydb/library/yql/public/purecalc/common/worker.h
+++ /dev/null
@@ -1,178 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/interface.h>
-
-#include <yql/essentials/public/udf/udf_value.h>
-#include <yql/essentials/ast/yql_expr.h>
-#include <yql/essentials/core/yql_user_data.h>
-#include <yql/essentials/minikql/mkql_alloc.h>
-#include <yql/essentials/minikql/mkql_node.h>
-#include <yql/essentials/minikql/mkql_node_visitor.h>
-#include <yql/essentials/minikql/computation/mkql_computation_node.h>
-#include <yql/essentials/providers/common/mkql/yql_provider_mkql.h>
-
-#include <memory>
-
-namespace NYql {
- namespace NPureCalc {
- struct TWorkerGraph {
- TWorkerGraph(
- const TExprNode::TPtr& exprRoot,
- TExprContext& exprCtx,
- const TString& serializedProgram,
- const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry,
- const TUserDataTable& userData,
- const TVector<const TStructExprType*>& inputTypes,
- const TVector<const TStructExprType*>& originalInputTypes,
- const TVector<const TStructExprType*>& rawInputTypes,
- const TTypeAnnotationNode* outputType,
- const TTypeAnnotationNode* rawOutputType,
- const TString& LLVMSettings,
- NKikimr::NUdf::ICountersProvider* countersProvider,
- ui64 nativeYtTypeFlags,
- TMaybe<ui64> deterministicTimeProviderSeed
- );
-
- ~TWorkerGraph();
-
- NKikimr::NMiniKQL::TScopedAlloc ScopedAlloc_;
- NKikimr::NMiniKQL::TTypeEnvironment Env_;
- const NKikimr::NMiniKQL::IFunctionRegistry& FuncRegistry_;
- TIntrusivePtr<IRandomProvider> RandomProvider_;
- TIntrusivePtr<ITimeProvider> TimeProvider_;
- NKikimr::NMiniKQL::IComputationPattern::TPtr ComputationPattern_;
- THolder<NKikimr::NMiniKQL::IComputationGraph> ComputationGraph_;
- TString LLVMSettings_;
- ui64 NativeYtTypeFlags_;
- TMaybe<TString> TimestampColumn_;
- const NKikimr::NMiniKQL::TType* OutputType_;
- const NKikimr::NMiniKQL::TType* RawOutputType_;
- TVector<NKikimr::NMiniKQL::IComputationExternalNode*> SelfNodes_;
- TVector<const NKikimr::NMiniKQL::TStructType*> InputTypes_;
- TVector<const NKikimr::NMiniKQL::TStructType*> OriginalInputTypes_;
- TVector<const NKikimr::NMiniKQL::TStructType*> RawInputTypes_;
- };
-
- template <typename TBase>
- class TWorker: public TBase {
- public:
- using TWorkerFactoryPtr = std::weak_ptr<IWorkerFactory>;
- private:
- // Worker factory implementation should stay alive for this worker to operate correctly.
- TWorkerFactoryPtr WorkerFactory_;
-
- protected:
- TWorkerGraph Graph_;
-
- public:
- TWorker(
- TWorkerFactoryPtr factory,
- const TExprNode::TPtr& exprRoot,
- TExprContext& exprCtx,
- const TString& serializedProgram,
- const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry,
- const TUserDataTable& userData,
- const TVector<const TStructExprType*>& inputTypes,
- const TVector<const TStructExprType*>& originalInputTypes,
- const TVector<const TStructExprType*>& rawInputTypes,
- const TTypeAnnotationNode* outputType,
- const TTypeAnnotationNode* rawOutputType,
- const TString& LLVMSettings,
- NKikimr::NUdf::ICountersProvider* countersProvider,
- ui64 nativeYtTypeFlags,
- TMaybe<ui64> deterministicTimeProviderSeed
- );
-
- public:
- ui32 GetInputsCount() const override;
- const NKikimr::NMiniKQL::TStructType* GetInputType(ui32, bool) const override;
- const NKikimr::NMiniKQL::TStructType* GetInputType(bool) const override;
- const NKikimr::NMiniKQL::TStructType* GetRawInputType(ui32) const override;
- const NKikimr::NMiniKQL::TStructType* GetRawInputType() const override;
- const NKikimr::NMiniKQL::TType* GetOutputType() const override;
- const NKikimr::NMiniKQL::TType* GetRawOutputType() const override;
- NYT::TNode MakeInputSchema() const override;
- NYT::TNode MakeInputSchema(ui32) const override;
- NYT::TNode MakeOutputSchema() const override;
- NYT::TNode MakeOutputSchema(ui32) const override;
- NYT::TNode MakeOutputSchema(TStringBuf) const override;
- NYT::TNode MakeFullOutputSchema() const override;
- NKikimr::NMiniKQL::TScopedAlloc& GetScopedAlloc() override;
- NKikimr::NMiniKQL::IComputationGraph& GetGraph() override;
- const NKikimr::NMiniKQL::IFunctionRegistry& GetFunctionRegistry() const override;
- NKikimr::NMiniKQL::TTypeEnvironment& GetTypeEnvironment() override;
- const TString& GetLLVMSettings() const override;
- ui64 GetNativeYtTypeFlags() const override;
- ITimeProvider* GetTimeProvider() const override;
- protected:
- void Release() override;
- };
-
- class TPullStreamWorker final: public TWorker<IPullStreamWorker> {
- private:
- NKikimr::NUdf::TUnboxedValue Output_ = NKikimr::NUdf::TUnboxedValue::Invalid();
- TVector<bool> HasInput_;
-
- inline bool CheckAllInputsSet() {
- return AllOf(HasInput_, [](bool x) { return x; });
- }
-
- public:
- using TWorker::TWorker;
- ~TPullStreamWorker();
-
- public:
- void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) override;
- NKikimr::NUdf::TUnboxedValue& GetOutput() override;
-
- protected:
- void Release() override;
- };
-
- class TPullListWorker final: public TWorker<IPullListWorker> {
- private:
- NKikimr::NUdf::TUnboxedValue Output_ = NKikimr::NUdf::TUnboxedValue::Invalid();
- NKikimr::NUdf::TUnboxedValue OutputIterator_ = NKikimr::NUdf::TUnboxedValue::Invalid();
- TVector<bool> HasInput_;
-
- inline bool CheckAllInputsSet() {
- return AllOf(HasInput_, [](bool x) { return x; });
- }
-
- public:
- using TWorker::TWorker;
- ~TPullListWorker();
-
- public:
- void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) override;
- NKikimr::NUdf::TUnboxedValue& GetOutput() override;
- NKikimr::NUdf::TUnboxedValue& GetOutputIterator() override;
- void ResetOutputIterator() override;
-
- protected:
- void Release() override;
- };
-
- class TPushStreamWorker final: public TWorker<IPushStreamWorker> {
- private:
- THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>> Consumer_{};
- bool Finished_ = false;
- NKikimr::NMiniKQL::IComputationExternalNode* SelfNode_ = nullptr;
-
- public:
- using TWorker::TWorker;
-
- private:
- void FeedToConsumer();
- NYql::NUdf::IBoxedValue* GetPushStream() const;
-
- public:
- void SetConsumer(THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>>) override;
- void Push(NKikimr::NUdf::TUnboxedValue&&) override;
- void OnFinish() override;
-
- protected:
- void Release() override;
- };
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/worker_factory.cpp b/ydb/library/yql/public/purecalc/common/worker_factory.cpp
deleted file mode 100644
index 77cd7f0bc19..00000000000
--- a/ydb/library/yql/public/purecalc/common/worker_factory.cpp
+++ /dev/null
@@ -1,532 +0,0 @@
-#include "worker_factory.h"
-
-#include "type_from_schema.h"
-#include "worker.h"
-#include "compile_mkql.h"
-
-#include <yql/essentials/sql/sql.h>
-#include <yql/essentials/ast/yql_expr.h>
-#include <yql/essentials/core/yql_expr_optimize.h>
-#include <yql/essentials/core/yql_type_helpers.h>
-#include <yql/essentials/core/peephole_opt/yql_opt_peephole_physical.h>
-#include <yql/essentials/providers/common/codec/yql_codec.h>
-#include <yql/essentials/providers/common/udf_resolve/yql_simple_udf_resolver.h>
-#include <yql/essentials/providers/common/arrow_resolve/yql_simple_arrow_resolver.h>
-#include <yql/essentials/providers/common/schema/expr/yql_expr_schema.h>
-#include <yql/essentials/providers/common/provider/yql_provider.h>
-#include <yql/essentials/providers/common/provider/yql_provider_names.h>
-#include <yql/essentials/providers/config/yql_config_provider.h>
-#include <yql/essentials/minikql/mkql_node.h>
-#include <yql/essentials/minikql/mkql_node_serialization.h>
-#include <yql/essentials/minikql/mkql_alloc.h>
-#include <yql/essentials/minikql/aligned_page_pool.h>
-#include <yql/essentials/core/services/yql_transform_pipeline.h>
-#include <ydb/library/yql/public/purecalc/common/names.h>
-#include <ydb/library/yql/public/purecalc/common/transformations/type_annotation.h>
-#include <ydb/library/yql/public/purecalc/common/transformations/align_output_schema.h>
-#include <ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.h>
-#include <ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.h>
-#include <ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.h>
-#include <ydb/library/yql/public/purecalc/common/transformations/root_to_blocks.h>
-#include <ydb/library/yql/public/purecalc/common/transformations/utils.h>
-#include <yql/essentials/utils/log/log.h>
-#include <util/stream/trace.h>
-
-using namespace NYql;
-using namespace NYql::NPureCalc;
-
-template <typename TBase>
-TWorkerFactory<TBase>::TWorkerFactory(TWorkerFactoryOptions options, EProcessorMode processorMode)
- : Factory_(std::move(options.Factory))
- , FuncRegistry_(std::move(options.FuncRegistry))
- , UserData_(std::move(options.UserData))
- , LLVMSettings_(std::move(options.LLVMSettings))
- , BlockEngineMode_(options.BlockEngineMode)
- , ExprOutputStream_(options.ExprOutputStream)
- , CountersProvider_(options.CountersProvider_)
- , NativeYtTypeFlags_(options.NativeYtTypeFlags_)
- , DeterministicTimeProviderSeed_(options.DeterministicTimeProviderSeed_)
- , UseSystemColumns_(options.UseSystemColumns)
- , UseWorkerPool_(options.UseWorkerPool)
-{
- // Prepare input struct types and extract all column names from inputs
-
- const auto& inputSchemas = options.InputSpec.GetSchemas();
- const auto& allVirtualColumns = options.InputSpec.GetAllVirtualColumns();
-
- YQL_ENSURE(inputSchemas.size() == allVirtualColumns.size());
-
- const auto inputsCount = inputSchemas.size();
-
- for (ui32 i = 0; i < inputsCount; ++i) {
- const auto* originalInputType = MakeTypeFromSchema(inputSchemas[i], ExprContext_);
- if (!ValidateInputSchema(originalInputType, ExprContext_)) {
- ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "invalid schema for #" << i << " input";
- }
-
- const auto* originalStructType = originalInputType->template Cast<TStructExprType>();
- const auto* structType = ExtendStructType(originalStructType, allVirtualColumns[i], ExprContext_);
-
- InputTypes_.push_back(structType);
- OriginalInputTypes_.push_back(originalStructType);
- RawInputTypes_.push_back(originalStructType);
-
- auto& columnsSet = AllColumns_.emplace_back();
- for (const auto* structItem : structType->GetItems()) {
- columnsSet.insert(TString(structItem->GetName()));
-
- if (!UseSystemColumns_ && structItem->GetName().StartsWith(PurecalcSysColumnsPrefix)) {
- ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString())
- << "#" << i << " input provides system column " << structItem->GetName()
- << ", but it is forbidden by options";
- }
- }
- }
-
- // Prepare output type
-
- auto outputSchema = options.OutputSpec.GetSchema();
- if (!outputSchema.IsNull()) {
- OutputType_ = MakeTypeFromSchema(outputSchema, ExprContext_);
- if (!ValidateOutputSchema(OutputType_, ExprContext_)) {
- ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "invalid output schema";
- }
- } else {
- OutputType_ = nullptr;
- }
-
- RawOutputType_ = OutputType_;
-
- // Translate
-
- if (options.TranslationMode_ == ETranslationMode::Mkql) {
- SerializedProgram_ = TString{options.Query};
- } else {
- ExprRoot_ = Compile(options.Query, options.TranslationMode_,
- options.ModuleResolver, options.SyntaxVersion_, options.Modules,
- options.InputSpec, options.OutputSpec, processorMode);
-
- RawOutputType_ = GetSequenceItemType(ExprRoot_->Pos(), ExprRoot_->GetTypeAnn(), true, ExprContext_);
-
- // Deduce output type if it wasn't provided by output spec
-
- if (!OutputType_) {
- OutputType_ = RawOutputType_;
- // XXX: Tweak the obtained expression type, is the spec supports blocks:
- // 1. Remove "_yql_block_length" attribute, since it's for internal usage.
- // 2. Strip block container from the type to store its internal type.
- if (options.OutputSpec.AcceptsBlocks()) {
- Y_ENSURE(OutputType_->GetKind() == ETypeAnnotationKind::Struct);
- OutputType_ = UnwrapBlockStruct(OutputType_->Cast<TStructExprType>(), ExprContext_);
- }
- }
- if (!OutputType_) {
- ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "cannot deduce output schema";
- }
- }
-}
-
-template <typename TBase>
-TExprNode::TPtr TWorkerFactory<TBase>::Compile(
- TStringBuf query,
- ETranslationMode mode,
- IModuleResolver::TPtr moduleResolver,
- ui16 syntaxVersion,
- const THashMap<TString, TString>& modules,
- const TInputSpecBase& inputSpec,
- const TOutputSpecBase& outputSpec,
- EProcessorMode processorMode
-) {
- if (mode == ETranslationMode::PG && processorMode != EProcessorMode::PullList) {
- ythrow TCompileError("", "") << "only PullList mode is compatible to PostgreSQL syntax";
- }
-
- // Prepare type annotation context
-
- TTypeAnnotationContextPtr typeContext;
-
- typeContext = MakeIntrusive<TTypeAnnotationContext>();
- typeContext->RandomProvider = CreateDefaultRandomProvider();
- typeContext->TimeProvider = DeterministicTimeProviderSeed_ ?
- CreateDeterministicTimeProvider(*DeterministicTimeProviderSeed_) :
- CreateDefaultTimeProvider();
- typeContext->UdfResolver = NCommon::CreateSimpleUdfResolver(FuncRegistry_.Get());
- typeContext->ArrowResolver = MakeSimpleArrowResolver(*FuncRegistry_.Get());
- typeContext->UserDataStorage = MakeIntrusive<TUserDataStorage>(nullptr, UserData_, nullptr, nullptr);
- typeContext->Modules = moduleResolver;
- typeContext->BlockEngineMode = BlockEngineMode_;
- auto configProvider = CreateConfigProvider(*typeContext, nullptr, "");
- typeContext->AddDataSource(ConfigProviderName, configProvider);
- typeContext->Initialize(ExprContext_);
-
- if (auto modules = dynamic_cast<TModuleResolver*>(moduleResolver.get())) {
- modules->AttachUserData(typeContext->UserDataStorage);
- }
-
- // Parse SQL/s-expr into AST
-
- TAstParseResult astRes;
-
- if (mode == ETranslationMode::SQL || mode == ETranslationMode::PG) {
- NSQLTranslation::TTranslationSettings settings;
-
- typeContext->DeprecatedSQL = (syntaxVersion == 0);
- if (mode == ETranslationMode::PG) {
- settings.PgParser = true;
- }
-
- settings.SyntaxVersion = syntaxVersion;
- settings.V0Behavior = NSQLTranslation::EV0Behavior::Disable;
- settings.Mode = NSQLTranslation::ESqlMode::LIMITED_VIEW;
- settings.DefaultCluster = PurecalcDefaultCluster;
- settings.ClusterMapping[settings.DefaultCluster] = PurecalcDefaultService;
- settings.ModuleMapping = modules;
- settings.EnableGenericUdfs = true;
- settings.File = "generated.sql";
- settings.Flags = {
- "AnsiOrderByLimitInUnionAll",
- "AnsiRankForNullableKeys",
- "DisableAnsiOptionalAs",
- "DisableCoalesceJoinKeysOnQualifiedAll",
- "DisableUnorderedSubqueries",
- "FlexibleTypes"
- };
- if (BlockEngineMode_ != EBlockEngineMode::Disable) {
- settings.Flags.insert("EmitAggApply");
- }
- for (const auto& [key, block] : UserData_) {
- TStringBuf alias(key.Alias());
- if (block.Usage.Test(EUserDataBlockUsage::Library) && !alias.StartsWith("/lib")) {
- alias.SkipPrefix("/home/");
- settings.Libraries.emplace(alias);
- }
- }
-
- astRes = SqlToYql(TString(query), settings);
- } else {
- astRes = ParseAst(TString(query));
- }
-
- if (!astRes.IsOk()) {
- ythrow TCompileError(TString(query), astRes.Issues.ToString()) << "failed to parse " << mode;
- }
-
- ExprContext_.IssueManager.AddIssues(astRes.Issues);
-
- if (ETraceLevel::TRACE_DETAIL <= StdDbgLevel()) {
- Cdbg << "Before optimization:" << Endl;
- astRes.Root->PrettyPrintTo(Cdbg, TAstPrintFlags::PerLine | TAstPrintFlags::ShortQuote | TAstPrintFlags::AdaptArbitraryContent);
- }
-
- // Translate AST into expression
-
- TExprNode::TPtr exprRoot;
- if (!CompileExpr(*astRes.Root, exprRoot, ExprContext_, moduleResolver.get(), nullptr, 0, syntaxVersion)) {
- TStringStream astStr;
- astRes.Root->PrettyPrintTo(astStr, TAstPrintFlags::ShortQuote | TAstPrintFlags::PerLine);
- ythrow TCompileError(astStr.Str(), ExprContext_.IssueManager.GetIssues().ToString()) << "failed to compile";
- }
-
-
- // Prepare transformation pipeline
- THolder<IGraphTransformer> calcTransformer = CreateFunctorTransformer([&](TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx)
- -> IGraphTransformer::TStatus
- {
- output = input;
- auto valueNode = input->HeadPtr();
-
- auto peepHole = MakePeepholeOptimization(typeContext);
- auto status = SyncTransform(*peepHole, valueNode, ctx);
- if (status != IGraphTransformer::TStatus::Ok) {
- return status;
- }
-
- TStringStream out;
- NYson::TYsonWriter writer(&out, NYson::EYsonFormat::Text, ::NYson::EYsonType::Node, true);
- writer.OnBeginMap();
-
- writer.OnKeyedItem("Data");
-
- TWorkerGraph graph(
- valueNode,
- ctx,
- {},
- *FuncRegistry_,
- UserData_,
- {},
- {},
- {},
- valueNode->GetTypeAnn(),
- valueNode->GetTypeAnn(),
- LLVMSettings_,
- CountersProvider_,
- NativeYtTypeFlags_,
- DeterministicTimeProviderSeed_
- );
-
- with_lock (graph.ScopedAlloc_) {
- const auto value = graph.ComputationGraph_->GetValue();
- NCommon::WriteYsonValue(writer, value, const_cast<NKikimr::NMiniKQL::TType*>(graph.OutputType_), nullptr);
- }
- writer.OnEndMap();
-
- auto ysonAtom = ctx.NewAtom(TPositionHandle(), out.Str());
- input->SetResult(std::move(ysonAtom));
- return IGraphTransformer::TStatus::Ok;
- });
-
- const TString& selfName = TString(inputSpec.ProvidesBlocks()
- ? PurecalcBlockInputCallableName
- : PurecalcInputCallableName);
-
- TTransformationPipeline pipeline(typeContext);
-
- pipeline.Add(MakeTableReadsReplacer(InputTypes_, UseSystemColumns_, processorMode, selfName),
- "ReplaceTableReads", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR,
- "Replace reads from tables");
- pipeline.AddServiceTransformers();
- pipeline.AddPreTypeAnnotation();
- pipeline.AddExpressionEvaluation(*FuncRegistry_, calcTransformer.Get());
- pipeline.AddIOAnnotation();
- pipeline.AddTypeAnnotationTransformer(MakeTypeAnnotationTransformer(typeContext, InputTypes_, RawInputTypes_, processorMode, selfName));
- pipeline.AddPostTypeAnnotation();
- pipeline.Add(CreateFunctorTransformer(
- [&](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) {
- return OptimizeExpr(input, output, [](const TExprNode::TPtr& node, TExprContext&) -> TExprNode::TPtr {
- if (node->IsCallable("Unordered") && node->Child(0)->IsCallable({
- PurecalcInputCallableName, PurecalcBlockInputCallableName
- })) {
- return node->ChildPtr(0);
- }
- return node;
- }, ctx, TOptimizeExprSettings(nullptr));
- }), "Unordered", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR,
- "Unordered optimizations");
- pipeline.Add(CreateFunctorTransformer(
- [&](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) {
- return OptimizeExpr(input, output, [](const TExprNode::TPtr& node, TExprContext&) -> TExprNode::TPtr {
- if (node->IsCallable("Right!") && node->Head().IsCallable("Cons!")) {
- return node->Head().ChildPtr(1);
- }
-
- return node;
- }, ctx, TOptimizeExprSettings(nullptr));
- }), "Cons", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR,
- "Cons optimizations");
- pipeline.Add(MakeOutputColumnsFilter(outputSpec.GetOutputColumnsFilter()),
- "Filter", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR,
- "Filter output columns");
- pipeline.Add(MakeRootToBlocks(outputSpec.AcceptsBlocks(), processorMode),
- "RootToBlocks", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR,
- "Rewrite the root if the output spec accepts blocks");
- pipeline.Add(MakeOutputAligner(OutputType_, outputSpec.AcceptsBlocks(), processorMode),
- "Convert", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR,
- "Align return type of the program to output schema");
- pipeline.AddCommonOptimization();
- pipeline.AddFinalCommonOptimization();
- pipeline.Add(MakeUsedColumnsExtractor(&UsedColumns_, AllColumns_),
- "ExtractColumns", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR,
- "Extract used columns");
- pipeline.Add(MakePeepholeOptimization(typeContext),
- "PeepHole", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR,
- "Peephole optimizations");
- pipeline.AddCheckExecution(false);
-
- // Apply optimizations
-
- auto transformer = pipeline.Build();
- auto status = SyncTransform(*transformer, exprRoot, ExprContext_);
- auto transformStats = transformer->GetStatistics();
- TStringStream out;
- NYson::TYsonWriter writer(&out, NYson::EYsonFormat::Pretty);
- NCommon::TransformerStatsToYson("", transformStats, writer);
- YQL_CLOG(DEBUG, Core) << "Transform stats: " << out.Str();
- if (status == IGraphTransformer::TStatus::Error) {
- ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "Failed to optimize";
- }
-
- IOutputStream* exprOut = nullptr;
- if (ExprOutputStream_) {
- exprOut = ExprOutputStream_;
- } else if (ETraceLevel::TRACE_DETAIL <= StdDbgLevel()) {
- exprOut = &Cdbg;
- }
-
- if (exprOut) {
- *exprOut << "After optimization:" << Endl;
- ConvertToAst(*exprRoot, ExprContext_, 0, true).Root
- ->PrettyPrintTo(*exprOut, TAstPrintFlags::PerLine
- | TAstPrintFlags::ShortQuote
- | TAstPrintFlags::AdaptArbitraryContent);
- }
- return exprRoot;
-}
-
-template <typename TBase>
-NYT::TNode TWorkerFactory<TBase>::MakeInputSchema(ui32 inputIndex) const {
- Y_ENSURE(
- inputIndex < InputTypes_.size(),
- "invalid input index (" << inputIndex << ") in MakeInputSchema call");
-
- return NCommon::TypeToYsonNode(InputTypes_[inputIndex]);
-}
-
-template <typename TBase>
-NYT::TNode TWorkerFactory<TBase>::MakeInputSchema() const {
- Y_ENSURE(
- InputTypes_.size() == 1,
- "MakeInputSchema() can be used only with single-input programs");
-
- return NCommon::TypeToYsonNode(InputTypes_[0]);
-}
-
-template <typename TBase>
-NYT::TNode TWorkerFactory<TBase>::MakeOutputSchema() const {
- Y_ENSURE(OutputType_, "MakeOutputSchema() cannot be used with precompiled programs");
- Y_ENSURE(
- OutputType_->GetKind() == ETypeAnnotationKind::Struct,
- "MakeOutputSchema() cannot be used with multi-output programs");
-
- return NCommon::TypeToYsonNode(OutputType_);
-}
-
-template <typename TBase>
-NYT::TNode TWorkerFactory<TBase>::MakeOutputSchema(ui32 index) const {
- Y_ENSURE(OutputType_, "MakeOutputSchema() cannot be used with precompiled programs");
- Y_ENSURE(
- OutputType_->GetKind() == ETypeAnnotationKind::Variant,
- "MakeOutputSchema(ui32) cannot be used with single-output programs");
-
- auto vtype = OutputType_->template Cast<TVariantExprType>();
-
- Y_ENSURE(
- vtype->GetUnderlyingType()->GetKind() == ETypeAnnotationKind::Tuple,
- "MakeOutputSchema(ui32) cannot be used to process variants over struct");
-
- auto ttype = vtype->GetUnderlyingType()->template Cast<TTupleExprType>();
-
- Y_ENSURE(
- index < ttype->GetSize(),
- "Invalid table index " << index);
-
- return NCommon::TypeToYsonNode(ttype->GetItems()[index]);
-}
-
-template <typename TBase>
-NYT::TNode TWorkerFactory<TBase>::MakeOutputSchema(TStringBuf tableName) const {
- Y_ENSURE(OutputType_, "MakeOutputSchema() cannot be used with precompiled programs");
- Y_ENSURE(
- OutputType_->GetKind() == ETypeAnnotationKind::Variant,
- "MakeOutputSchema(TStringBuf) cannot be used with single-output programs");
-
- auto vtype = OutputType_->template Cast<TVariantExprType>();
-
- Y_ENSURE(
- vtype->GetUnderlyingType()->GetKind() == ETypeAnnotationKind::Struct,
- "MakeOutputSchema(TStringBuf) cannot be used to process variants over tuple");
-
- auto stype = vtype->GetUnderlyingType()->template Cast<TStructExprType>();
-
- auto index = stype->FindItem(tableName);
-
- Y_ENSURE(
- index.Defined(),
- "Invalid table index " << TString{tableName}.Quote());
-
- return NCommon::TypeToYsonNode(stype->GetItems()[*index]->GetItemType());
-}
-
-template <typename TBase>
-NYT::TNode TWorkerFactory<TBase>::MakeFullOutputSchema() const {
- Y_ENSURE(OutputType_, "MakeFullOutputSchema() cannot be used with precompiled programs");
- return NCommon::TypeToYsonNode(OutputType_);
-}
-
-template <typename TBase>
-const THashSet<TString>& TWorkerFactory<TBase>::GetUsedColumns(ui32 inputIndex) const {
- Y_ENSURE(
- inputIndex < UsedColumns_.size(),
- "invalid input index (" << inputIndex << ") in GetUsedColumns call");
-
- return UsedColumns_[inputIndex];
-}
-
-template <typename TBase>
-const THashSet<TString>& TWorkerFactory<TBase>::GetUsedColumns() const {
- Y_ENSURE(
- UsedColumns_.size() == 1,
- "GetUsedColumns() can be used only with single-input programs");
-
- return UsedColumns_[0];
-}
-
-template <typename TBase>
-TIssues TWorkerFactory<TBase>::GetIssues() const {
- return ExprContext_.IssueManager.GetCompletedIssues();
-}
-
-template <typename TBase>
-TString TWorkerFactory<TBase>::GetCompiledProgram() {
- if (ExprRoot_) {
- NKikimr::NMiniKQL::TScopedAlloc alloc(__LOCATION__, NKikimr::TAlignedPagePoolCounters(),
- FuncRegistry_->SupportsSizedAllocators());
- NKikimr::NMiniKQL::TTypeEnvironment env(alloc);
-
- auto rootNode = CompileMkql(ExprRoot_, ExprContext_, *FuncRegistry_, env, UserData_);
- return NKikimr::NMiniKQL::SerializeRuntimeNode(rootNode, env);
- }
-
- return SerializedProgram_;
-}
-
-template <typename TBase>
-void TWorkerFactory<TBase>::ReturnWorker(IWorker* worker) {
- THolder<IWorker> tmp(worker);
- if (UseWorkerPool_) {
- WorkerPool_.push_back(std::move(tmp));
- }
-}
-
-
-#define DEFINE_WORKER_MAKER(MODE) \
- TWorkerHolder<I##MODE##Worker> T##MODE##WorkerFactory::MakeWorker() { \
- if (!WorkerPool_.empty()) { \
- auto res = std::move(WorkerPool_.back()); \
- WorkerPool_.pop_back(); \
- return TWorkerHolder<I##MODE##Worker>((I##MODE##Worker *)res.Release()); \
- } \
- return TWorkerHolder<I##MODE##Worker>(new T##MODE##Worker( \
- weak_from_this(), \
- ExprRoot_, \
- ExprContext_, \
- SerializedProgram_, \
- *FuncRegistry_, \
- UserData_, \
- InputTypes_, \
- OriginalInputTypes_, \
- RawInputTypes_, \
- OutputType_, \
- RawOutputType_, \
- LLVMSettings_, \
- CountersProvider_, \
- NativeYtTypeFlags_, \
- DeterministicTimeProviderSeed_ \
- )); \
- }
-
-DEFINE_WORKER_MAKER(PullStream)
-DEFINE_WORKER_MAKER(PullList)
-DEFINE_WORKER_MAKER(PushStream)
-
-namespace NYql {
- namespace NPureCalc {
- template
- class TWorkerFactory<IPullStreamWorkerFactory>;
-
- template
- class TWorkerFactory<IPullListWorkerFactory>;
-
- template
- class TWorkerFactory<IPushStreamWorkerFactory>;
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/worker_factory.h b/ydb/library/yql/public/purecalc/common/worker_factory.h
deleted file mode 100644
index baf741814fd..00000000000
--- a/ydb/library/yql/public/purecalc/common/worker_factory.h
+++ /dev/null
@@ -1,168 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/interface.h>
-
-#include "processor_mode.h"
-
-#include <util/generic/ptr.h>
-#include <yql/essentials/ast/yql_expr.h>
-#include <yql/essentials/core/yql_user_data.h>
-#include <yql/essentials/minikql/mkql_function_registry.h>
-#include <yql/essentials/core/yql_type_annotation.h>
-#include <utility>
-
-namespace NYql {
- namespace NPureCalc {
- struct TWorkerFactoryOptions {
- IProgramFactoryPtr Factory;
- const TInputSpecBase& InputSpec;
- const TOutputSpecBase& OutputSpec;
- TStringBuf Query;
- TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry;
- IModuleResolver::TPtr ModuleResolver;
- const TUserDataTable& UserData;
- const THashMap<TString, TString>& Modules;
- TString LLVMSettings;
- EBlockEngineMode BlockEngineMode;
- IOutputStream* ExprOutputStream;
- NKikimr::NUdf::ICountersProvider* CountersProvider_;
- ETranslationMode TranslationMode_;
- ui16 SyntaxVersion_;
- ui64 NativeYtTypeFlags_;
- TMaybe<ui64> DeterministicTimeProviderSeed_;
- bool UseSystemColumns;
- bool UseWorkerPool;
-
- TWorkerFactoryOptions(
- IProgramFactoryPtr Factory,
- const TInputSpecBase& InputSpec,
- const TOutputSpecBase& OutputSpec,
- TStringBuf Query,
- TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry,
- IModuleResolver::TPtr ModuleResolver,
- const TUserDataTable& UserData,
- const THashMap<TString, TString>& Modules,
- TString LLVMSettings,
- EBlockEngineMode BlockEngineMode,
- IOutputStream* ExprOutputStream,
- NKikimr::NUdf::ICountersProvider* CountersProvider,
- ETranslationMode translationMode,
- ui16 syntaxVersion,
- ui64 nativeYtTypeFlags,
- TMaybe<ui64> deterministicTimeProviderSeed,
- bool useSystemColumns,
- bool useWorkerPool
- )
- : Factory(std::move(Factory))
- , InputSpec(InputSpec)
- , OutputSpec(OutputSpec)
- , Query(Query)
- , FuncRegistry(std::move(FuncRegistry))
- , ModuleResolver(std::move(ModuleResolver))
- , UserData(UserData)
- , Modules(Modules)
- , LLVMSettings(std::move(LLVMSettings))
- , BlockEngineMode(BlockEngineMode)
- , ExprOutputStream(ExprOutputStream)
- , CountersProvider_(CountersProvider)
- , TranslationMode_(translationMode)
- , SyntaxVersion_(syntaxVersion)
- , NativeYtTypeFlags_(nativeYtTypeFlags)
- , DeterministicTimeProviderSeed_(deterministicTimeProviderSeed)
- , UseSystemColumns(useSystemColumns)
- , UseWorkerPool(useWorkerPool)
- {
- }
- };
-
- template <typename TBase>
- class TWorkerFactory: public TBase {
- private:
- IProgramFactoryPtr Factory_;
-
- protected:
- TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry_;
- const TUserDataTable& UserData_;
- TExprContext ExprContext_;
- TExprNode::TPtr ExprRoot_;
- TString SerializedProgram_;
- TVector<const TStructExprType*> InputTypes_;
- TVector<const TStructExprType*> OriginalInputTypes_;
- TVector<const TStructExprType*> RawInputTypes_;
- const TTypeAnnotationNode* OutputType_;
- const TTypeAnnotationNode* RawOutputType_;
- TVector<THashSet<TString>> AllColumns_;
- TVector<THashSet<TString>> UsedColumns_;
- TString LLVMSettings_;
- EBlockEngineMode BlockEngineMode_;
- IOutputStream* ExprOutputStream_;
- NKikimr::NUdf::ICountersProvider* CountersProvider_;
- ui64 NativeYtTypeFlags_;
- TMaybe<ui64> DeterministicTimeProviderSeed_;
- bool UseSystemColumns_;
- bool UseWorkerPool_;
- TVector<THolder<IWorker>> WorkerPool_;
-
- public:
- TWorkerFactory(TWorkerFactoryOptions, EProcessorMode);
-
- public:
- NYT::TNode MakeInputSchema(ui32) const override;
- NYT::TNode MakeInputSchema() const override;
- NYT::TNode MakeOutputSchema() const override;
- NYT::TNode MakeOutputSchema(ui32) const override;
- NYT::TNode MakeOutputSchema(TStringBuf) const override;
- NYT::TNode MakeFullOutputSchema() const override;
- const THashSet<TString>& GetUsedColumns(ui32 inputIndex) const override;
- const THashSet<TString>& GetUsedColumns() const override;
- TIssues GetIssues() const override;
- TString GetCompiledProgram() override;
-
- protected:
- void ReturnWorker(IWorker* worker) override;
-
- private:
- TExprNode::TPtr Compile(TStringBuf query,
- ETranslationMode mode,
- IModuleResolver::TPtr moduleResolver,
- ui16 syntaxVersion,
- const THashMap<TString, TString>& modules,
- const TInputSpecBase& inputSpec,
- const TOutputSpecBase& outputSpec,
- EProcessorMode processorMode);
- };
-
- class TPullStreamWorkerFactory final: public TWorkerFactory<IPullStreamWorkerFactory> {
- public:
- explicit TPullStreamWorkerFactory(TWorkerFactoryOptions options)
- : TWorkerFactory(std::move(options), EProcessorMode::PullStream)
- {
- }
-
- public:
- TWorkerHolder<IPullStreamWorker> MakeWorker() override;
- };
-
- class TPullListWorkerFactory final: public TWorkerFactory<IPullListWorkerFactory> {
- public:
- explicit TPullListWorkerFactory(TWorkerFactoryOptions options)
- : TWorkerFactory(std::move(options), EProcessorMode::PullList)
- {
- }
-
- public:
- TWorkerHolder<IPullListWorker> MakeWorker() override;
- };
-
- class TPushStreamWorkerFactory final: public TWorkerFactory<IPushStreamWorkerFactory> {
- public:
- explicit TPushStreamWorkerFactory(TWorkerFactoryOptions options)
- : TWorkerFactory(std::move(options), EProcessorMode::PushStream)
- {
- }
-
- public:
- TWorkerHolder<IPushStreamWorker> MakeWorker() override;
- };
- }
-}
diff --git a/ydb/library/yql/public/purecalc/common/wrappers.cpp b/ydb/library/yql/public/purecalc/common/wrappers.cpp
deleted file mode 100644
index c808d7b3940..00000000000
--- a/ydb/library/yql/public/purecalc/common/wrappers.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "wrappers.h"
diff --git a/ydb/library/yql/public/purecalc/common/wrappers.h b/ydb/library/yql/public/purecalc/common/wrappers.h
deleted file mode 100644
index 4d65e012716..00000000000
--- a/ydb/library/yql/public/purecalc/common/wrappers.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#pragma once
-
-#include "fwd.h"
-
-#include <util/generic/ptr.h>
-
-namespace NYql::NPureCalc::NPrivate {
- template <typename TNew, typename TOld, typename TFunctor>
- class TMappingStream final: public IStream<TNew> {
- private:
- THolder<IStream<TOld>> Old_;
- TFunctor Functor_;
-
- public:
- TMappingStream(THolder<IStream<TOld>> old, TFunctor functor)
- : Old_(std::move(old))
- , Functor_(std::move(functor))
- {
- }
-
- public:
- TNew Fetch() override {
- return Functor_(Old_->Fetch());
- }
- };
-
- template <typename TNew, typename TOld, typename TFunctor>
- class TMappingConsumer final: public IConsumer<TNew> {
- private:
- THolder<IConsumer<TOld>> Old_;
- TFunctor Functor_;
-
- public:
- TMappingConsumer(THolder<IConsumer<TOld>> old, TFunctor functor)
- : Old_(std::move(old))
- , Functor_(std::move(functor))
- {
- }
-
- public:
- void OnObject(TNew object) override {
- Old_->OnObject(Functor_(object));
- }
-
- void OnFinish() override {
- Old_->OnFinish();
- }
- };
-
- template <typename T, typename C>
- class TNonOwningConsumer final: public IConsumer<T> {
- private:
- C Consumer;
-
- public:
- explicit TNonOwningConsumer(const C& consumer)
- : Consumer(consumer)
- {
- }
-
- public:
- void OnObject(T t) override {
- Consumer->OnObject(t);
- }
-
- void OnFinish() override {
- Consumer->OnFinish();
- }
- };
-}
diff --git a/ydb/library/yql/public/purecalc/common/ya.make b/ydb/library/yql/public/purecalc/common/ya.make
deleted file mode 100644
index 8e478493271..00000000000
--- a/ydb/library/yql/public/purecalc/common/ya.make
+++ /dev/null
@@ -1,21 +0,0 @@
-LIBRARY()
-
-INCLUDE(ya.make.inc)
-
-PEERDIR(
- ydb/library/yql/providers/yt/codec/codegen
- yql/essentials/providers/config
- yql/essentials/minikql/computation/llvm14
- yql/essentials/minikql/invoke_builtins/llvm14
- yql/essentials/minikql/comp_nodes/llvm14
- yql/essentials/parser/pg_wrapper
- yql/essentials/parser/pg_wrapper/interface
- yql/essentials/sql/pg
-)
-
-END()
-
-RECURSE(
- no_llvm
-)
-
diff --git a/ydb/library/yql/public/purecalc/common/ya.make.inc b/ydb/library/yql/public/purecalc/common/ya.make.inc
deleted file mode 100644
index 4ef7c535bd1..00000000000
--- a/ydb/library/yql/public/purecalc/common/ya.make.inc
+++ /dev/null
@@ -1,52 +0,0 @@
-SRCDIR(
- ydb/library/yql/public/purecalc/common
-)
-
-ADDINCL(
- ydb/library/yql/public/purecalc/common
-)
-
-SRCS(
- compile_mkql.cpp
- fwd.cpp
- inspect_input.cpp
- interface.cpp
- logger_init.cpp
- names.cpp
- processor_mode.cpp
- program_factory.cpp
- transformations/align_output_schema.cpp
- transformations/extract_used_columns.cpp
- transformations/output_columns_filter.cpp
- transformations/replace_table_reads.cpp
- transformations/root_to_blocks.cpp
- transformations/type_annotation.cpp
- transformations/utils.cpp
- type_from_schema.cpp
- worker.cpp
- worker_factory.cpp
- wrappers.cpp
-)
-
-PEERDIR(
- yql/essentials/ast
- yql/essentials/core/services
- yql/essentials/core/services/mounts
- yql/essentials/core/user_data
- yql/essentials/utils/backtrace
- yql/essentials/utils/log
- yql/essentials/core
- yql/essentials/core/type_ann
- yql/essentials/providers/common/codec
- yql/essentials/providers/common/comp_nodes
- yql/essentials/providers/common/mkql
- yql/essentials/providers/common/provider
- yql/essentials/providers/common/schema/expr
- yql/essentials/providers/common/udf_resolve
- yql/essentials/providers/common/arrow_resolve
-)
-
-YQL_LAST_ABI_VERSION()
-
-GENERATE_ENUM_SERIALIZATION(interface.h)
-
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp b/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp
deleted file mode 100644
index 8ce3692766c..00000000000
--- a/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-#include <ydb/library/yql/public/purecalc/examples/protobuf/main.pb.h>
-
-#include <ydb/library/yql/public/purecalc/purecalc.h>
-#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
-#include <ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h>
-
-using namespace NYql::NPureCalc;
-using namespace NExampleProtos;
-
-void PullStreamExample(IProgramFactoryPtr);
-void PushStreamExample(IProgramFactoryPtr);
-void PrecompileExample(IProgramFactoryPtr factory);
-THolder<IStream<TInput*>> MakeInput();
-
-class TConsumer: public IConsumer<TOutput*> {
-public:
- void OnObject(TOutput* message) override {
- Cout << "path = " << message->GetPath() << Endl;
- Cout << "host = " << message->GetHost() << Endl;
- }
-
- void OnFinish() override {
- Cout << "end" << Endl;
- }
-};
-
-const char* Query = R"(
- $a = (SELECT * FROM Input);
- $b = (SELECT CAST(Url::GetTail(Url) AS Utf8) AS Path, CAST(Url::GetHost(Url) AS Utf8) AS Host, Ip FROM $a);
- $c = (SELECT Path, Host FROM $b WHERE Path IS NOT NULL AND Host IS NOT NULL AND Ip::IsIPv4(Ip::FromString(Ip)));
- $d = (SELECT Unwrap(Path) AS Path, Unwrap(Host) AS Host FROM $c);
- SELECT * FROM $d;
-)";
-
-int main(int argc, char** argv) {
- try {
- auto factory = MakeProgramFactory(
- TProgramFactoryOptions().SetUDFsDir(argc > 1 ? argv[1] : "../../../../udfs"));
-
- Cout << "Pull stream:" << Endl;
- PullStreamExample(factory);
-
- Cout << Endl;
- Cout << "Push stream:" << Endl;
- PushStreamExample(factory);
-
- Cout << Endl;
- Cout << "Pull stream with pre-compilation:" << Endl;
- PrecompileExample(factory);
- } catch (const TCompileError& err) {
- Cerr << err.GetIssues() << Endl;
- Cerr << err.what() << Endl;
- }
-}
-
-void PullStreamExample(IProgramFactoryPtr factory) {
- auto program = factory->MakePullStreamProgram(
- TProtobufInputSpec<TInput>(),
- TProtobufOutputSpec<TOutput>(),
- Query,
- ETranslationMode::SQL);
-
- auto result = program->Apply(MakeInput());
-
- while (auto* message = result->Fetch()) {
- Cout << "path = " << message->GetPath() << Endl;
- Cout << "host = " << message->GetHost() << Endl;
- }
-}
-
-void PushStreamExample(IProgramFactoryPtr factory) {
- auto program = factory->MakePushStreamProgram(
- TProtobufInputSpec<TInput>(),
- TProtobufOutputSpec<TOutput>(),
- Query,
- ETranslationMode::SQL);
-
- auto consumer = program->Apply(MakeHolder<TConsumer>());
-
- auto input = MakeInput();
- while (auto* message = input->Fetch()) {
- consumer->OnObject(message);
- }
- consumer->OnFinish();
-}
-
-void PrecompileExample(IProgramFactoryPtr factory) {
- TString prg;
- {
- auto program = factory->MakePullStreamProgram(
- TProtobufInputSpec<TInput>(),
- TProtobufOutputSpec<TOutput>(),
- Query,
- ETranslationMode::SQL);
-
- prg = program->GetCompiledProgram();
- }
-
- auto program = factory->MakePullStreamProgram(
- TProtobufInputSpec<TInput>(),
- TProtobufOutputSpec<TOutput>(),
- prg,
- ETranslationMode::Mkql);
-
- auto result = program->Apply(MakeInput());
-
- while (auto* message = result->Fetch()) {
- Cout << "path = " << message->GetPath() << Endl;
- Cout << "host = " << message->GetHost() << Endl;
- }
-}
-
-THolder<IStream<TInput*>> MakeInput() {
- TVector<TInput> input;
-
- {
- auto& message = input.emplace_back();
- message.SetUrl("https://news.yandex.ru/Moscow/index.html?from=index");
- message.SetIp("83.220.231.160");
- }
- {
- auto& message = input.emplace_back();
- message.SetUrl("https://music.yandex.ru/radio/");
- message.SetIp("83.220.231.161");
- }
- {
- auto& message = input.emplace_back();
- message.SetUrl("https://yandex.ru/maps/?ll=141.475401%2C11.581666&spn=1.757813%2C1.733096&z=7&l=map%2Cstv%2Csta&mode=search&panorama%5Bpoint%5D=141.476317%2C11.582710&panorama%5Bdirection%5D=177.241445%2C-15.219821&panorama%5Bspan%5D=107.410156%2C61.993317");
- message.SetIp("::ffff:77.75.155.3");
- }
-
- return StreamFromVector(std::move(input));
-}
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/main.proto b/ydb/library/yql/public/purecalc/examples/protobuf/main.proto
deleted file mode 100644
index 54fd15e226d..00000000000
--- a/ydb/library/yql/public/purecalc/examples/protobuf/main.proto
+++ /dev/null
@@ -1,11 +0,0 @@
-package NExampleProtos;
-
-message TInput {
- required string Url = 1;
- required string Ip = 2;
-}
-
-message TOutput {
- required string Path = 1;
- required string Host = 2;
-}
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/exectest.run_protobuf_/log.out b/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/exectest.run_protobuf_/log.out
deleted file mode 100644
index 1ec34e485d2..00000000000
--- a/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/exectest.run_protobuf_/log.out
+++ /dev/null
@@ -1,18 +0,0 @@
-Pull stream:
-path = /Moscow/index.html?from=index
-host = news.yandex.ru
-path = /radio/
-host = music.yandex.ru
-
-Push stream:
-path = /Moscow/index.html?from=index
-host = news.yandex.ru
-path = /radio/
-host = music.yandex.ru
-end
-
-Pull stream with pre-compilation:
-path = /Moscow/index.html?from=index
-host = news.yandex.ru
-path = /radio/
-host = music.yandex.ru
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/result.json b/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/result.json
deleted file mode 100644
index 96a5814765e..00000000000
--- a/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/result.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
- "exectest.run[protobuf]": {
- "uri": "file://exectest.run_protobuf_/log.out"
- }
-}
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/ut/ya.make b/ydb/library/yql/public/purecalc/examples/protobuf/ut/ya.make
deleted file mode 100644
index 3db9fc480b5..00000000000
--- a/ydb/library/yql/public/purecalc/examples/protobuf/ut/ya.make
+++ /dev/null
@@ -1,15 +0,0 @@
-IF (NOT SANITIZER_TYPE AND NOT OPENSOURCE)
-
-EXECTEST()
-
-RUN(protobuf ${ARCADIA_BUILD_ROOT}/yql/essentials/udfs STDOUT log.out CANONIZE_LOCALLY log.out)
-
-DEPENDS(
- ydb/library/yql/public/purecalc/examples/protobuf
- yql/essentials/udfs/common/url_base
- yql/essentials/udfs/common/ip_base
-)
-
-END()
-
-ENDIF()
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/ya.make b/ydb/library/yql/public/purecalc/examples/protobuf/ya.make
deleted file mode 100644
index 662816c5189..00000000000
--- a/ydb/library/yql/public/purecalc/examples/protobuf/ya.make
+++ /dev/null
@@ -1,27 +0,0 @@
-PROGRAM()
-
-SRCS(
- main.proto
- main.cpp
-)
-
-PEERDIR(
- ydb/library/yql/public/purecalc
- ydb/library/yql/public/purecalc/io_specs/protobuf
- ydb/library/yql/public/purecalc/helpers/stream
-)
-
-
- YQL_LAST_ABI_VERSION()
-
-
-END()
-
-RECURSE_ROOT_RELATIVE(
- yql/essentials/udfs/common/url_base
- yql/essentials/udfs/common/ip_base
-)
-
-RECURSE_FOR_TESTS(
- ut
-)
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp
deleted file mode 100644
index f10c2aa9be6..00000000000
--- a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.pb.h>
-
-#include <ydb/library/yql/public/purecalc/purecalc.h>
-#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
-#include <ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h>
-
-using namespace NYql::NPureCalc;
-using namespace NExampleProtos;
-
-const char* Query = R"(
- SELECT
- Url,
- COUNT(*) AS Hits
- FROM
- Input
- GROUP BY
- Url
- ORDER BY
- Url
-)";
-
-THolder<IStream<TInput*>> MakeInput();
-
-int main() {
- try {
- auto factory = MakeProgramFactory();
-
- auto program = factory->MakePullListProgram(
- TProtobufInputSpec<TInput>(),
- TProtobufOutputSpec<TOutput>(),
- Query,
- ETranslationMode::SQL
- );
-
- auto result = program->Apply(MakeInput());
-
- while (auto* message = result->Fetch()) {
- Cout << "url = " << message->GetUrl() << Endl;
- Cout << "hits = " << message->GetHits() << Endl;
- }
- } catch (TCompileError& e) {
- Cout << e.GetIssues();
- }
-}
-
-THolder<IStream<TInput*>> MakeInput() {
- TVector<TInput> input;
-
- {
- auto& message = input.emplace_back();
- message.SetUrl("https://yandex.ru/a");
- }
- {
- auto& message = input.emplace_back();
- message.SetUrl("https://yandex.ru/a");
- }
- {
- auto& message = input.emplace_back();
- message.SetUrl("https://yandex.ru/b");
- }
- {
- auto& message = input.emplace_back();
- message.SetUrl("https://yandex.ru/c");
- }
- {
- auto& message = input.emplace_back();
- message.SetUrl("https://yandex.ru/b");
- }
- {
- auto& message = input.emplace_back();
- message.SetUrl("https://yandex.ru/b");
- }
-
- return StreamFromVector(std::move(input));
-}
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto
deleted file mode 100644
index 2766c4b8c0c..00000000000
--- a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto
+++ /dev/null
@@ -1,10 +0,0 @@
-package NExampleProtos;
-
-message TInput {
- required string Url = 1;
-}
-
-message TOutput {
- required string Url = 1;
- required uint64 Hits = 2;
-}
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/exectest.run_protobuf_pull_list_/log.out b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/exectest.run_protobuf_pull_list_/log.out
deleted file mode 100644
index 0a799ed4b09..00000000000
--- a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/exectest.run_protobuf_pull_list_/log.out
+++ /dev/null
@@ -1,6 +0,0 @@
-url = https://yandex.ru/a
-hits = 2
-url = https://yandex.ru/b
-hits = 3
-url = https://yandex.ru/c
-hits = 1
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/result.json b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/result.json
deleted file mode 100644
index 668467cc850..00000000000
--- a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/result.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
- "exectest.run[protobuf_pull_list]": {
- "checksum": "29bf513fe0ca6f81ae076213a1c7801c",
- "uri": "file://exectest.run_protobuf_pull_list_/log.out"
- }
-} \ No newline at end of file
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/ya.make b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/ya.make
deleted file mode 100644
index 011ee766996..00000000000
--- a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/ya.make
+++ /dev/null
@@ -1,9 +0,0 @@
-EXECTEST()
-
-RUN(protobuf_pull_list STDOUT log.out CANONIZE_LOCALLY log.out)
-
-DEPENDS(
- ydb/library/yql/public/purecalc/examples/protobuf_pull_list
-)
-
-END()
diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ya.make b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ya.make
deleted file mode 100644
index cf800933af6..00000000000
--- a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ya.make
+++ /dev/null
@@ -1,20 +0,0 @@
-PROGRAM()
-
-SRCS(
- main.proto
- main.cpp
-)
-
-PEERDIR(
- ydb/library/yql/public/purecalc
- ydb/library/yql/public/purecalc/io_specs/protobuf
- ydb/library/yql/public/purecalc/helpers/stream
-)
-
-YQL_LAST_ABI_VERSION()
-
-END()
-
-RECURSE_FOR_TESTS(
- ut
-)
diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp
deleted file mode 100644
index 4b748d802d6..00000000000
--- a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-
-#include <ydb/library/yql/public/purecalc/purecalc.h>
-#include <ydb/library/yql/public/purecalc/io_specs/mkql/spec.h>
-
-#include <yql/essentials/core/user_data/yql_user_data.h>
-
-#include <util/stream/file.h>
-#include <util/datetime/base.h>
-#include <library/cpp/yson/node/node.h>
-#include <library/cpp/yson/node/node_io.h>
-
-#include <library/cpp/skiff/skiff.h>
-
-using namespace NYql::NUserData;
-using namespace NYT;
-using namespace NYql::NPureCalc;
-
-const char* Query = R"(
- SELECT
- Url,
- COUNT(*) AS Hits
- FROM
- Input
- GROUP BY
- Url
- ORDER BY
- Hits desc
-)";
-
-int main() {
- auto addField = [&](NYT::TNode& members, const TString& name, const TString& type, const bool isOptional) {
- auto typeNode = NYT::TNode::CreateList()
- .Add("DataType")
- .Add(type);
-
- if (isOptional) {
- typeNode = NYT::TNode::CreateList()
- .Add("OptionalType")
- .Add(typeNode);
- }
-
- members.Add(NYT::TNode::CreateList()
- .Add(name)
- .Add(typeNode));
- };
-
- NYT::TNode members{NYT::TNode::CreateList()};
- addField(members, "Url", "String", false);
- NYT::TNode schema = NYT::TNode::CreateList()
- .Add("StructType")
- .Add(members);
-
- Cout << "InputSchema: " << NodeToYsonString(schema) << Endl;
- auto inputSpec = TSkiffInputSpec(TVector<NYT::TNode>{schema});
- auto outputSpec = TSkiffOutputSpec({NYT::TNode::CreateEntity()});
- auto factoryOptions = TProgramFactoryOptions();
- factoryOptions.SetNativeYtTypeFlags(0);
- factoryOptions.SetLLVMSettings("OFF");
- factoryOptions.SetBlockEngineSettings("disable");
- auto factory = MakeProgramFactory(factoryOptions);
- auto program = factory->MakePullListProgram(
- inputSpec,
- outputSpec,
- Query,
- ETranslationMode::SQL);
- Cout << "OutpSchema: " << NYT::NodeToCanonicalYsonString(program->MakeFullOutputSchema()) << Endl;
- TStringStream stream;
- NSkiff::TUncheckedSkiffWriter writer{&stream};
- writer.WriteVariant16Tag(0);
- writer.WriteString32("https://yandex.ru/a");
- writer.WriteVariant16Tag(0);
- writer.WriteString32("https://yandex.ru/a");
- writer.WriteVariant16Tag(0);
- writer.WriteString32("https://yandex.ru/b");
- writer.WriteVariant16Tag(0);
- writer.WriteString32("https://yandex.ru/c");
- writer.WriteVariant16Tag(0);
- writer.WriteString32("https://yandex.ru/b");
- writer.WriteVariant16Tag(0);
- writer.WriteString32("https://yandex.ru/b");
- writer.Finish();
- auto input = TStringStream(stream);
- auto handle = program->Apply(&input);
- TStringStream output;
- handle->Run(&output);
- auto parser = NSkiff::TUncheckedSkiffParser(&output);
- while (parser.HasMoreData()) {
- parser.ParseVariant16Tag();
- auto hits = parser.ParseInt64();
- auto url = parser.ParseString32();
- Cout << "URL: " << url << " Hits: " << hits << Endl;
- }
-}
diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/ya.make b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/ya.make
deleted file mode 100644
index 0966d670fef..00000000000
--- a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/ya.make
+++ /dev/null
@@ -1,14 +0,0 @@
-PROGRAM()
-
-SRCS(
- main.cpp
-)
-
-PEERDIR(
- ydb/library/yql/public/purecalc
- ydb/library/yql/public/purecalc/io_specs/mkql
-)
-
-YQL_LAST_ABI_VERSION()
-
-END()
diff --git a/ydb/library/yql/public/purecalc/examples/ya.make b/ydb/library/yql/public/purecalc/examples/ya.make
deleted file mode 100644
index d78f8a825d0..00000000000
--- a/ydb/library/yql/public/purecalc/examples/ya.make
+++ /dev/null
@@ -1,5 +0,0 @@
-RECURSE(
- protobuf
- protobuf_pull_list
- skiff_pull_list
-)
diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp b/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp
deleted file mode 100644
index 6927c46240c..00000000000
--- a/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp
+++ /dev/null
@@ -1,202 +0,0 @@
-#include "schema_from_proto.h"
-
-#include <yt/yt_proto/yt/formats/extension.pb.h>
-
-#include <util/generic/algorithm.h>
-#include <util/generic/string.h>
-#include <util/string/printf.h>
-#include <util/string/vector.h>
-
-namespace pb = google::protobuf;
-
-namespace NYql {
- namespace NPureCalc {
-
- TProtoSchemaOptions::TProtoSchemaOptions()
- : EnumPolicy(EEnumPolicy::Int32)
- , ListIsOptional(false)
- {
- }
-
- TProtoSchemaOptions& TProtoSchemaOptions::SetEnumPolicy(EEnumPolicy policy) {
- EnumPolicy = policy;
- return *this;
- }
-
- TProtoSchemaOptions& TProtoSchemaOptions::SetListIsOptional(bool value) {
- ListIsOptional = value;
- return *this;
- }
-
- TProtoSchemaOptions& TProtoSchemaOptions::SetFieldRenames(
- THashMap<TString, TString> fieldRenames
- ) {
- FieldRenames = std::move(fieldRenames);
- return *this;
- }
-
- namespace {
- EEnumFormatType EnumFormatTypeWithYTFlag(const pb::FieldDescriptor& enumField, EEnumFormatType defaultEnumFormatType) {
- auto flags = enumField.options().GetRepeatedExtension(NYT::flags);
- for (auto flag : flags) {
- if (flag == NYT::EWrapperFieldFlag::ENUM_INT) {
- return EEnumFormatType::Int32;
- } else if (flag == NYT::EWrapperFieldFlag::ENUM_STRING) {
- return EEnumFormatType::String;
- }
- }
- return defaultEnumFormatType;
- }
- }
-
- EEnumFormatType EnumFormatType(const pb::FieldDescriptor& enumField, EEnumPolicy enumPolicy) {
- switch (enumPolicy) {
- case EEnumPolicy::Int32:
- return EEnumFormatType::Int32;
- case EEnumPolicy::String:
- return EEnumFormatType::String;
- case EEnumPolicy::YTFlagDefaultInt32:
- return EnumFormatTypeWithYTFlag(enumField, EEnumFormatType::Int32);
- case EEnumPolicy::YTFlagDefaultString:
- return EnumFormatTypeWithYTFlag(enumField, EEnumFormatType::String);
- }
- }
-
- namespace {
- const char* FormatTypeName(const pb::FieldDescriptor* field, EEnumPolicy enumPolicy) {
- switch (field->type()) {
- case pb::FieldDescriptor::TYPE_DOUBLE:
- return "Double";
- case pb::FieldDescriptor::TYPE_FLOAT:
- return "Float";
- case pb::FieldDescriptor::TYPE_INT64:
- case pb::FieldDescriptor::TYPE_SFIXED64:
- case pb::FieldDescriptor::TYPE_SINT64:
- return "Int64";
- case pb::FieldDescriptor::TYPE_UINT64:
- case pb::FieldDescriptor::TYPE_FIXED64:
- return "Uint64";
- case pb::FieldDescriptor::TYPE_INT32:
- case pb::FieldDescriptor::TYPE_SFIXED32:
- case pb::FieldDescriptor::TYPE_SINT32:
- return "Int32";
- case pb::FieldDescriptor::TYPE_UINT32:
- case pb::FieldDescriptor::TYPE_FIXED32:
- return "Uint32";
- case pb::FieldDescriptor::TYPE_BOOL:
- return "Bool";
- case pb::FieldDescriptor::TYPE_STRING:
- return "Utf8";
- case pb::FieldDescriptor::TYPE_BYTES:
- return "String";
- case pb::FieldDescriptor::TYPE_ENUM:
- switch (EnumFormatType(*field, enumPolicy)) {
- case EEnumFormatType::Int32:
- return "Int32";
- case EEnumFormatType::String:
- return "String";
- }
- default:
- ythrow yexception() << "Unsupported protobuf type: " << field->type_name()
- << ", field: " << field->name() << ", " << int(field->type());
- }
- }
- }
-
- NYT::TNode MakeSchemaFromProto(const pb::Descriptor& descriptor, TVector<const pb::Descriptor*>& nested, const TProtoSchemaOptions& options) {
- if (Find(nested, &descriptor) != nested.end()) {
- TVector<TString> nestedNames;
- for (const auto* d : nested) {
- nestedNames.push_back(d->full_name());
- }
- nestedNames.push_back(descriptor.full_name());
- ythrow yexception() << Sprintf("recursive messages are not supported (%s)",
- JoinStrings(nestedNames, "->").c_str());
- }
- nested.push_back(&descriptor);
-
- auto items = NYT::TNode::CreateList();
- for (int fieldNo = 0; fieldNo < descriptor.field_count(); ++fieldNo) {
- const auto& fieldDescriptor = *descriptor.field(fieldNo);
-
- auto name = fieldDescriptor.name();
- if (
- auto renamePtr = options.FieldRenames.FindPtr(name);
- nested.size() == 1 && renamePtr
- ) {
- name = *renamePtr;
- }
-
- NYT::TNode itemType;
- if (fieldDescriptor.type() == pb::FieldDescriptor::TYPE_MESSAGE) {
- itemType = MakeSchemaFromProto(*fieldDescriptor.message_type(), nested, options);
- } else {
- itemType = NYT::TNode::CreateList();
- itemType.Add("DataType");
- itemType.Add(FormatTypeName(&fieldDescriptor, options.EnumPolicy));
- }
- switch (fieldDescriptor.label()) {
- case pb::FieldDescriptor::LABEL_OPTIONAL:
- {
- auto optionalType = NYT::TNode::CreateList();
- optionalType.Add("OptionalType");
- optionalType.Add(std::move(itemType));
- itemType = std::move(optionalType);
- }
- break;
- case pb::FieldDescriptor::LABEL_REQUIRED:
- break;
- case pb::FieldDescriptor::LABEL_REPEATED:
- {
- auto listType = NYT::TNode::CreateList();
- listType.Add("ListType");
- listType.Add(std::move(itemType));
- itemType = std::move(listType);
- if (options.ListIsOptional) {
- itemType = NYT::TNode::CreateList().Add("OptionalType").Add(std::move(itemType));
- }
- }
- break;
- default:
- ythrow yexception() << "Unknown protobuf label: " << (ui32)fieldDescriptor.label() << ", field: " << name;
- }
-
- auto itemNode = NYT::TNode::CreateList();
- itemNode.Add(name);
- itemNode.Add(std::move(itemType));
-
- items.Add(std::move(itemNode));
- }
- auto root = NYT::TNode::CreateList();
- root.Add("StructType");
- root.Add(std::move(items));
-
- nested.pop_back();
- return root;
- }
-
- NYT::TNode MakeSchemaFromProto(const pb::Descriptor& descriptor, const TProtoSchemaOptions& options) {
- TVector<const pb::Descriptor*> nested;
- return MakeSchemaFromProto(descriptor, nested, options);
- }
-
- NYT::TNode MakeVariantSchemaFromProtos(const TVector<const pb::Descriptor*>& descriptors, const TProtoSchemaOptions& options) {
- Y_ENSURE(options.FieldRenames.empty(), "Renames are not supported in variant mode");
-
- auto tupleItems = NYT::TNode::CreateList();
- for (auto descriptor : descriptors) {
- tupleItems.Add(MakeSchemaFromProto(*descriptor, options));
- }
-
- auto tupleType = NYT::TNode::CreateList();
- tupleType.Add("TupleType");
- tupleType.Add(std::move(tupleItems));
-
- auto variantType = NYT::TNode::CreateList();
- variantType.Add("VariantType");
- variantType.Add(std::move(tupleType));
-
- return variantType;
- }
- }
-}
diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.h b/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.h
deleted file mode 100644
index 168c654ac78..00000000000
--- a/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#pragma once
-
-#include <library/cpp/yson/node/node.h>
-
-#include <util/generic/hash.h>
-#include <util/generic/string.h>
-
-#include <google/protobuf/descriptor.h>
-
-
-namespace NYql {
- namespace NPureCalc {
- enum class EEnumPolicy {
- Int32,
- String,
- YTFlagDefaultInt32,
- YTFlagDefaultString
- };
-
- enum class EEnumFormatType {
- Int32,
- String
- };
-
- /**
- * Options that customize building of struct type from protobuf descriptor.
- */
- struct TProtoSchemaOptions {
- public:
- EEnumPolicy EnumPolicy;
- bool ListIsOptional;
- THashMap<TString, TString> FieldRenames;
-
- public:
- TProtoSchemaOptions();
-
- public:
- TProtoSchemaOptions& SetEnumPolicy(EEnumPolicy);
-
- TProtoSchemaOptions& SetListIsOptional(bool);
-
- TProtoSchemaOptions& SetFieldRenames(
- THashMap<TString, TString> fieldRenames
- );
- };
-
- EEnumFormatType EnumFormatType(const google::protobuf::FieldDescriptor& enumField, EEnumPolicy enumPolicy);
-
- /**
- * Build struct type from a protobuf descriptor. The returned yson can be loaded into a struct annotation node
- * using the ParseTypeFromYson function.
- */
- NYT::TNode MakeSchemaFromProto(const google::protobuf::Descriptor&, const TProtoSchemaOptions& = {});
-
- /**
- * Build variant over tuple type from protobuf descriptors.
- */
- NYT::TNode MakeVariantSchemaFromProtos(const TVector<const google::protobuf::Descriptor*>&, const TProtoSchemaOptions& = {});
- }
-}
diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/ya.make b/ydb/library/yql/public/purecalc/helpers/protobuf/ya.make
deleted file mode 100644
index 11300baba84..00000000000
--- a/ydb/library/yql/public/purecalc/helpers/protobuf/ya.make
+++ /dev/null
@@ -1,14 +0,0 @@
-LIBRARY()
-
-SRCS(
- schema_from_proto.cpp
-)
-
-PEERDIR(
- contrib/libs/protobuf
- library/cpp/yson/node
- yt/yt_proto/yt/formats
- yt/yt_proto/yt/formats
-)
-
-END()
diff --git a/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp b/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp
deleted file mode 100644
index e1aed5d6899..00000000000
--- a/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "stream_from_vector.h"
diff --git a/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h b/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h
deleted file mode 100644
index 51d85133328..00000000000
--- a/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/interface.h>
-
-namespace NYql {
- namespace NPureCalc {
- namespace NPrivate {
- template <typename T>
- class TVectorStream final: public IStream<T*> {
- private:
- size_t I_;
- TVector<T> Data_;
-
- public:
- explicit TVectorStream(TVector<T> data)
- : I_(0)
- , Data_(std::move(data))
- {
- }
-
- public:
- T* Fetch() override {
- if (I_ >= Data_.size()) {
- return nullptr;
- } else {
- return &Data_[I_++];
- }
- }
- };
- }
-
- /**
- * Convert vector into a purecalc stream.
- */
- template <typename T>
- THolder<IStream<T*>> StreamFromVector(TVector<T> data) {
- return MakeHolder<NPrivate::TVectorStream<T>>(std::move(data));
- }
- }
-}
diff --git a/ydb/library/yql/public/purecalc/helpers/stream/ya.make b/ydb/library/yql/public/purecalc/helpers/stream/ya.make
deleted file mode 100644
index c96f93b5823..00000000000
--- a/ydb/library/yql/public/purecalc/helpers/stream/ya.make
+++ /dev/null
@@ -1,13 +0,0 @@
-LIBRARY()
-
-SRCS(
- stream_from_vector.cpp
-)
-
-PEERDIR(
- ydb/library/yql/public/purecalc/common
-)
-
-YQL_LAST_ABI_VERSION()
-
-END()
diff --git a/ydb/library/yql/public/purecalc/helpers/ya.make b/ydb/library/yql/public/purecalc/helpers/ya.make
deleted file mode 100644
index b228b159d92..00000000000
--- a/ydb/library/yql/public/purecalc/helpers/ya.make
+++ /dev/null
@@ -1,8 +0,0 @@
-LIBRARY()
-
-PEERDIR(
- ydb/library/yql/public/purecalc/helpers/protobuf
- ydb/library/yql/public/purecalc/helpers/stream
-)
-
-END()
diff --git a/ydb/library/yql/public/purecalc/io_specs/arrow/spec.cpp b/ydb/library/yql/public/purecalc/io_specs/arrow/spec.cpp
deleted file mode 100644
index fea2322168b..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/arrow/spec.cpp
+++ /dev/null
@@ -1,576 +0,0 @@
-#include "spec.h"
-
-#include <ydb/library/yql/public/purecalc/common/names.h>
-
-#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
-#include <yql/essentials/minikql/computation/mkql_custom_list.h>
-#include <yql/essentials/minikql/mkql_node_cast.h>
-#include <yql/essentials/public/udf/arrow/udf_arrow_helpers.h>
-#include <yql/essentials/utils/yql_panic.h>
-
-using namespace NYql::NPureCalc;
-using namespace NKikimr::NUdf;
-using namespace NKikimr::NMiniKQL;
-
-using IArrowIStream = typename TInputSpecTraits<TArrowInputSpec>::IInputStream;
-using InputItemType = typename TInputSpecTraits<TArrowInputSpec>::TInputItemType;
-using OutputItemType = typename TOutputSpecTraits<TArrowOutputSpec>::TOutputItemType;
-using PullListReturnType = typename TOutputSpecTraits<TArrowOutputSpec>::TPullListReturnType;
-using PullStreamReturnType = typename TOutputSpecTraits<TArrowOutputSpec>::TPullStreamReturnType;
-using ConsumerType = typename TInputSpecTraits<TArrowInputSpec>::TConsumerType;
-
-namespace {
-
-template <typename T>
-inline TVector<THolder<T>> VectorFromHolder(THolder<T> holder) {
- TVector<THolder<T>> result;
- result.push_back(std::move(holder));
- return result;
-}
-
-
-class TArrowIStreamImpl : public IArrowIStream {
-private:
- IArrowIStream* Underlying_;
- // If we own Underlying_, than Owned_ == Underlying_;
- // otherwise Owned_ is nullptr.
- THolder<IArrowIStream> Owned_;
-
- TArrowIStreamImpl(IArrowIStream* underlying, THolder<IArrowIStream> owned)
- : Underlying_(underlying)
- , Owned_(std::move(owned))
- {
- }
-
-public:
- TArrowIStreamImpl(THolder<IArrowIStream> stream)
- : TArrowIStreamImpl(stream.Get(), nullptr)
- {
- Owned_ = std::move(stream);
- }
-
- TArrowIStreamImpl(IArrowIStream* stream)
- : TArrowIStreamImpl(stream, nullptr)
- {
- }
-
- InputItemType Fetch() {
- return Underlying_->Fetch();
- }
-};
-
-
-/**
- * Converts input Datums to unboxed values.
- */
-class TArrowInputConverter {
-protected:
- const THolderFactory& Factory_;
- TVector<ui32> DatumToMemberIDMap_;
- size_t BatchLengthID_;
-
-public:
- explicit TArrowInputConverter(
- const TArrowInputSpec& inputSpec,
- ui32 index,
- IWorker* worker
- )
- : Factory_(worker->GetGraph().GetHolderFactory())
- {
- const NYT::TNode& inputSchema = inputSpec.GetSchema(index);
- // Deduce the schema from the input MKQL type, if no is
- // provided by <inputSpec>.
- const NYT::TNode& schema = inputSchema.IsEntity()
- ? worker->MakeInputSchema(index)
- : inputSchema;
-
- const auto* type = worker->GetRawInputType(index);
-
- Y_ENSURE(type->IsStruct());
- Y_ENSURE(schema.ChildAsString(0) == "StructType");
-
- const auto& members = schema.ChildAsList(1);
- DatumToMemberIDMap_.resize(members.size());
-
- for (size_t i = 0; i < DatumToMemberIDMap_.size(); i++) {
- const auto& name = members[i].ChildAsString(0);
- const auto& memberIndex = type->FindMemberIndex(name);
- Y_ENSURE(memberIndex);
- DatumToMemberIDMap_[i] = *memberIndex;
- }
- const auto& batchLengthID = type->FindMemberIndex(PurecalcBlockColumnLength);
- Y_ENSURE(batchLengthID);
- BatchLengthID_ = *batchLengthID;
- }
-
- void DoConvert(arrow::compute::ExecBatch* batch, TUnboxedValue& result) {
- size_t nvalues = DatumToMemberIDMap_.size();
- Y_ENSURE(nvalues == static_cast<size_t>(batch->num_values()));
-
- TUnboxedValue* datums = nullptr;
- result = Factory_.CreateDirectArrayHolder(nvalues + 1, datums);
- for (size_t i = 0; i < nvalues; i++) {
- const ui32 id = DatumToMemberIDMap_[i];
- datums[id] = Factory_.CreateArrowBlock(std::move(batch->values[i]));
- }
- arrow::Datum length(std::make_shared<arrow::UInt64Scalar>(batch->length));
- datums[BatchLengthID_] = Factory_.CreateArrowBlock(std::move(length));
- }
-};
-
-
-/**
- * Converts unboxed values to output Datums (single-output program case).
- */
-class TArrowOutputConverter {
-protected:
- const THolderFactory& Factory_;
- TVector<ui32> DatumToMemberIDMap_;
- THolder<arrow::compute::ExecBatch> Batch_;
- size_t BatchLengthID_;
-
-public:
- explicit TArrowOutputConverter(
- const TArrowOutputSpec& outputSpec,
- IWorker* worker
- )
- : Factory_(worker->GetGraph().GetHolderFactory())
- {
- Batch_.Reset(new arrow::compute::ExecBatch);
-
- const NYT::TNode& outputSchema = outputSpec.GetSchema();
- // Deduce the schema from the output MKQL type, if no is
- // provided by <outputSpec>.
- const NYT::TNode& schema = outputSchema.IsEntity()
- ? worker->MakeOutputSchema()
- : outputSchema;
-
- const auto* type = worker->GetRawOutputType();
-
- Y_ENSURE(type->IsStruct());
- Y_ENSURE(schema.ChildAsString(0) == "StructType");
-
- const auto* stype = AS_TYPE(NKikimr::NMiniKQL::TStructType, type);
-
- const auto& members = schema.ChildAsList(1);
- DatumToMemberIDMap_.resize(members.size());
-
- for (size_t i = 0; i < DatumToMemberIDMap_.size(); i++) {
- const auto& name = members[i].ChildAsString(0);
- const auto& memberIndex = stype->FindMemberIndex(name);
- Y_ENSURE(memberIndex);
- DatumToMemberIDMap_[i] = *memberIndex;
- }
- const auto& batchLengthID = stype->FindMemberIndex(PurecalcBlockColumnLength);
- Y_ENSURE(batchLengthID);
- BatchLengthID_ = *batchLengthID;
- }
-
- OutputItemType DoConvert(TUnboxedValue value) {
- OutputItemType batch = Batch_.Get();
- size_t nvalues = DatumToMemberIDMap_.size();
-
- const auto& sizeDatum = TArrowBlock::From(value.GetElement(BatchLengthID_)).GetDatum();
- Y_ENSURE(sizeDatum.is_scalar());
- const auto& sizeScalar = sizeDatum.scalar();
- const auto& sizeData = arrow::internal::checked_cast<const arrow::UInt64Scalar&>(*sizeScalar);
- const int64_t length = sizeData.value;
-
- TVector<arrow::Datum> datums(nvalues);
- for (size_t i = 0; i < nvalues; i++) {
- const ui32 id = DatumToMemberIDMap_[i];
- const auto& datum = TArrowBlock::From(value.GetElement(id)).GetDatum();
- datums[i] = datum;
- if (datum.is_scalar()) {
- continue;
- }
- Y_ENSURE(datum.length() == length);
- }
-
- *batch = arrow::compute::ExecBatch(std::move(datums), length);
- return batch;
- }
-};
-
-
-/**
- * List (or, better, stream) of unboxed values.
- * Used as an input value in pull workers.
- */
-class TArrowListValue final: public TCustomListValue {
-private:
- mutable bool HasIterator_ = false;
- THolder<IArrowIStream> Underlying_;
- IWorker* Worker_;
- TArrowInputConverter Converter_;
- TScopedAlloc& ScopedAlloc_;
-
-public:
- TArrowListValue(
- TMemoryUsageInfo* memInfo,
- const TArrowInputSpec& inputSpec,
- ui32 index,
- THolder<IArrowIStream> underlying,
- IWorker* worker
- )
- : TCustomListValue(memInfo)
- , Underlying_(std::move(underlying))
- , Worker_(worker)
- , Converter_(inputSpec, index, Worker_)
- , ScopedAlloc_(Worker_->GetScopedAlloc())
- {
- }
-
- ~TArrowListValue() override {
- {
- // This list value stored in the worker's computation graph and
- // destroyed upon the computation graph's destruction. This brings
- // us to an interesting situation: scoped alloc is acquired, worker
- // and computation graph are half-way destroyed, and now it's our
- // turn to die. The problem is, the underlying stream may own
- // another worker. This happens when chaining programs. Now, to
- // destroy that worker correctly, we need to release our scoped
- // alloc (because that worker has its own computation graph and
- // scoped alloc).
- // By the way, note that we shouldn't interact with the worker here
- // because worker is in the middle of its own destruction. So we're
- // using our own reference to the scoped alloc. That reference is
- // alive because scoped alloc destroyed after computation graph.
- auto unguard = Unguard(ScopedAlloc_);
- Underlying_.Destroy();
- }
- }
-
- TUnboxedValue GetListIterator() const override {
- YQL_ENSURE(!HasIterator_, "Only one pass over input is supported");
- HasIterator_ = true;
- return TUnboxedValuePod(const_cast<TArrowListValue*>(this));
- }
-
- bool Next(TUnboxedValue& result) override {
- arrow::compute::ExecBatch* batch;
- {
- auto unguard = Unguard(ScopedAlloc_);
- batch = Underlying_->Fetch();
- }
-
- if (!batch) {
- return false;
- }
-
- Converter_.DoConvert(batch, result);
- return true;
- }
-
- EFetchStatus Fetch(TUnboxedValue& result) override {
- if (Next(result)) {
- return EFetchStatus::Ok;
- } else {
- return EFetchStatus::Finish;
- }
- }
-};
-
-
-/**
- * Arrow input stream for unboxed value lists.
- */
-class TArrowListImpl final: public IStream<OutputItemType> {
-protected:
- TWorkerHolder<IPullListWorker> WorkerHolder_;
- TArrowOutputConverter Converter_;
-
-public:
- explicit TArrowListImpl(
- const TArrowOutputSpec& outputSpec,
- TWorkerHolder<IPullListWorker> worker
- )
- : WorkerHolder_(std::move(worker))
- , Converter_(outputSpec, WorkerHolder_.Get())
- {
- }
-
- OutputItemType Fetch() override {
- TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator());
-
- with_lock(WorkerHolder_->GetScopedAlloc()) {
- TUnboxedValue value;
-
- if (!WorkerHolder_->GetOutputIterator().Next(value)) {
- return TOutputSpecTraits<TArrowOutputSpec>::StreamSentinel;
- }
-
- return Converter_.DoConvert(value);
- }
- }
-};
-
-
-/**
- * Arrow input stream for unboxed value streams.
- */
-class TArrowStreamImpl final: public IStream<OutputItemType> {
-protected:
- TWorkerHolder<IPullStreamWorker> WorkerHolder_;
- TArrowOutputConverter Converter_;
-
-public:
- explicit TArrowStreamImpl(const TArrowOutputSpec& outputSpec, TWorkerHolder<IPullStreamWorker> worker)
- : WorkerHolder_(std::move(worker))
- , Converter_(outputSpec, WorkerHolder_.Get())
- {
- }
-
- OutputItemType Fetch() override {
- TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator());
-
- with_lock(WorkerHolder_->GetScopedAlloc()) {
- TUnboxedValue value;
-
- auto status = WorkerHolder_->GetOutput().Fetch(value);
- YQL_ENSURE(status != EFetchStatus::Yield, "Yield is not supported in pull mode");
-
- if (status == EFetchStatus::Finish) {
- return TOutputSpecTraits<TArrowOutputSpec>::StreamSentinel;
- }
-
- return Converter_.DoConvert(value);
- }
- }
-};
-
-
-/**
- * Consumer which converts Datums to unboxed values and relays them to the
- * worker. Used as a return value of the push processor's Process function.
- */
-class TArrowConsumerImpl final: public IConsumer<arrow::compute::ExecBatch*> {
-private:
- TWorkerHolder<IPushStreamWorker> WorkerHolder_;
- TArrowInputConverter Converter_;
-
-public:
- explicit TArrowConsumerImpl(
- const TArrowInputSpec& inputSpec,
- TWorkerHolder<IPushStreamWorker> worker
- )
- : TArrowConsumerImpl(inputSpec, 0, std::move(worker))
- {
- }
-
- explicit TArrowConsumerImpl(
- const TArrowInputSpec& inputSpec,
- ui32 index,
- TWorkerHolder<IPushStreamWorker> worker
- )
- : WorkerHolder_(std::move(worker))
- , Converter_(inputSpec, index, WorkerHolder_.Get())
- {
- }
-
- void OnObject(arrow::compute::ExecBatch* batch) override {
- TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator());
-
- with_lock(WorkerHolder_->GetScopedAlloc()) {
- TUnboxedValue result;
- Converter_.DoConvert(batch, result);
- WorkerHolder_->Push(std::move(result));
- }
- }
-
- void OnFinish() override {
- TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator());
-
- with_lock(WorkerHolder_->GetScopedAlloc()) {
- WorkerHolder_->OnFinish();
- }
- }
-};
-
-
-/**
- * Push relay used to convert generated unboxed value to a Datum and push it to
- * the user's consumer.
- */
-class TArrowPushRelayImpl: public IConsumer<const TUnboxedValue*> {
-private:
- THolder<IConsumer<OutputItemType>> Underlying_;
- IWorker* Worker_;
- TArrowOutputConverter Converter_;
-
-public:
- TArrowPushRelayImpl(
- const TArrowOutputSpec& outputSpec,
- IPushStreamWorker* worker,
- THolder<IConsumer<OutputItemType>> underlying
- )
- : Underlying_(std::move(underlying))
- , Worker_(worker)
- , Converter_(outputSpec, Worker_)
- {
- }
-
- // XXX: If you've read a comment in the TArrowListValue's destructor, you
- // may be wondering why don't we do the same trick here. Well, that's
- // because in push mode, consumer is destroyed before acquiring scoped alloc
- // and destroying computation graph.
-
- void OnObject(const TUnboxedValue* value) override {
- OutputItemType message = Converter_.DoConvert(*value);
- auto unguard = Unguard(Worker_->GetScopedAlloc());
- Underlying_->OnObject(message);
- }
-
- void OnFinish() override {
- auto unguard = Unguard(Worker_->GetScopedAlloc());
- Underlying_->OnFinish();
- }
-};
-
-
-template <typename TWorker>
-void PrepareWorkerImpl(const TArrowInputSpec& inputSpec, TWorker* worker,
- TVector<THolder<TArrowIStreamImpl>>&& streams
-) {
- YQL_ENSURE(worker->GetInputsCount() == streams.size(),
- "number of input streams should match number of inputs provided by spec");
-
- with_lock(worker->GetScopedAlloc()) {
- auto& holderFactory = worker->GetGraph().GetHolderFactory();
- for (ui32 i = 0; i < streams.size(); i++) {
- auto input = holderFactory.template Create<TArrowListValue>(
- inputSpec, i, std::move(streams[i]), worker);
- worker->SetInput(std::move(input), i);
- }
- }
-}
-
-} // namespace
-
-
-TArrowInputSpec::TArrowInputSpec(const TVector<NYT::TNode>& schemas)
- : Schemas_(schemas)
-{
-}
-
-const TVector<NYT::TNode>& TArrowInputSpec::GetSchemas() const {
- return Schemas_;
-}
-
-const NYT::TNode& TArrowInputSpec::GetSchema(ui32 index) const {
- return Schemas_[index];
-}
-
-void TInputSpecTraits<TArrowInputSpec>::PreparePullListWorker(
- const TArrowInputSpec& inputSpec, IPullListWorker* worker,
- IArrowIStream* stream
-) {
- TInputSpecTraits<TArrowInputSpec>::PreparePullListWorker(
- inputSpec, worker, TVector<IArrowIStream*>({stream}));
-}
-
-void TInputSpecTraits<TArrowInputSpec>::PreparePullListWorker(
- const TArrowInputSpec& inputSpec, IPullListWorker* worker,
- const TVector<IArrowIStream*>& streams
-) {
- TVector<THolder<TArrowIStreamImpl>> wrappers;
- for (ui32 i = 0; i < streams.size(); i++) {
- wrappers.push_back(MakeHolder<TArrowIStreamImpl>(streams[i]));
- }
- PrepareWorkerImpl(inputSpec, worker, std::move(wrappers));
-}
-
-void TInputSpecTraits<TArrowInputSpec>::PreparePullListWorker(
- const TArrowInputSpec& inputSpec, IPullListWorker* worker,
- THolder<IArrowIStream> stream
-) {
- TInputSpecTraits<TArrowInputSpec>::PreparePullListWorker(inputSpec, worker,
- VectorFromHolder<IArrowIStream>(std::move(stream)));
-}
-
-void TInputSpecTraits<TArrowInputSpec>::PreparePullListWorker(
- const TArrowInputSpec& inputSpec, IPullListWorker* worker,
- TVector<THolder<IArrowIStream>>&& streams
-) {
- TVector<THolder<TArrowIStreamImpl>> wrappers;
- for (ui32 i = 0; i < streams.size(); i++) {
- wrappers.push_back(MakeHolder<TArrowIStreamImpl>(std::move(streams[i])));
- }
- PrepareWorkerImpl(inputSpec, worker, std::move(wrappers));
-}
-
-
-void TInputSpecTraits<TArrowInputSpec>::PreparePullStreamWorker(
- const TArrowInputSpec& inputSpec, IPullStreamWorker* worker,
- IArrowIStream* stream
-) {
- TInputSpecTraits<TArrowInputSpec>::PreparePullStreamWorker(
- inputSpec, worker, TVector<IArrowIStream*>({stream}));
-}
-
-void TInputSpecTraits<TArrowInputSpec>::PreparePullStreamWorker(
- const TArrowInputSpec& inputSpec, IPullStreamWorker* worker,
- const TVector<IArrowIStream*>& streams
-) {
- TVector<THolder<TArrowIStreamImpl>> wrappers;
- for (ui32 i = 0; i < streams.size(); i++) {
- wrappers.push_back(MakeHolder<TArrowIStreamImpl>(streams[i]));
- }
- PrepareWorkerImpl(inputSpec, worker, std::move(wrappers));
-}
-
-void TInputSpecTraits<TArrowInputSpec>::PreparePullStreamWorker(
- const TArrowInputSpec& inputSpec, IPullStreamWorker* worker,
- THolder<IArrowIStream> stream
-) {
- TInputSpecTraits<TArrowInputSpec>::PreparePullStreamWorker(
- inputSpec, worker, VectorFromHolder<IArrowIStream>(std::move(stream)));
-}
-
-void TInputSpecTraits<TArrowInputSpec>::PreparePullStreamWorker(
- const TArrowInputSpec& inputSpec, IPullStreamWorker* worker,
- TVector<THolder<IArrowIStream>>&& streams
-) {
- TVector<THolder<TArrowIStreamImpl>> wrappers;
- for (ui32 i = 0; i < streams.size(); i++) {
- wrappers.push_back(MakeHolder<TArrowIStreamImpl>(std::move(streams[i])));
- }
- PrepareWorkerImpl(inputSpec, worker, std::move(wrappers));
-}
-
-
-ConsumerType TInputSpecTraits<TArrowInputSpec>::MakeConsumer(
- const TArrowInputSpec& inputSpec, TWorkerHolder<IPushStreamWorker> worker
-) {
- return MakeHolder<TArrowConsumerImpl>(inputSpec, std::move(worker));
-}
-
-
-TArrowOutputSpec::TArrowOutputSpec(const NYT::TNode& schema)
- : Schema_(schema)
-{
-}
-
-const NYT::TNode& TArrowOutputSpec::GetSchema() const {
- return Schema_;
-}
-
-
-PullListReturnType TOutputSpecTraits<TArrowOutputSpec>::ConvertPullListWorkerToOutputType(
- const TArrowOutputSpec& outputSpec, TWorkerHolder<IPullListWorker> worker
-) {
- return MakeHolder<TArrowListImpl>(outputSpec, std::move(worker));
-}
-
-PullStreamReturnType TOutputSpecTraits<TArrowOutputSpec>::ConvertPullStreamWorkerToOutputType(
- const TArrowOutputSpec& outputSpec, TWorkerHolder<IPullStreamWorker> worker
-) {
- return MakeHolder<TArrowStreamImpl>(outputSpec, std::move(worker));
-}
-
-void TOutputSpecTraits<TArrowOutputSpec>::SetConsumerToWorker(
- const TArrowOutputSpec& outputSpec, IPushStreamWorker* worker,
- THolder<IConsumer<TOutputItemType>> consumer
-) {
- worker->SetConsumer(MakeHolder<TArrowPushRelayImpl>(outputSpec, worker, std::move(consumer)));
-}
diff --git a/ydb/library/yql/public/purecalc/io_specs/arrow/spec.h b/ydb/library/yql/public/purecalc/io_specs/arrow/spec.h
deleted file mode 100644
index a8b1cfb1cbe..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/arrow/spec.h
+++ /dev/null
@@ -1,130 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/interface.h>
-#include <arrow/compute/kernel.h>
-
-namespace NYql {
-namespace NPureCalc {
-
-/**
- * Processing mode for working with Apache Arrow batches inputs.
- *
- * In this mode purecalc accept pointers to abstract Arrow ExecBatches and
- * processes them. All Datums in batches should respect the given YT schema
- * (the one you pass to the constructor of the input spec).
- *
- * All working modes are implemented. In pull list and pull stream modes a
- * program would accept a pointer to a single stream object or vector of
- * pointers of stream objects of Arrow ExecBatch pointers. In push mode, a
- * program will return a consumer of pointers to Arrow ExecBatch.
- *
- * The program synopsis follows:
- *
- * @code
- * ... TPullListProgram::Apply(IStream<arrow::compute::ExecBatch*>*);
- * ... TPullListProgram::Apply(TVector<IStream<arrow::compute::ExecBatch*>*>);
- * ... TPullStreamProgram::Apply(IStream<arrow::compute::ExecBatch*>*);
- * ... TPullStreamProgram::Apply(TVector<IStream<arrow::compute::ExecBatch*>*>);
- * TConsumer<arrow::compute::ExecBatch*> TPushStreamProgram::Apply(...);
- * @endcode
- */
-
-class TArrowInputSpec: public TInputSpecBase {
-private:
- const TVector<NYT::TNode> Schemas_;
-
-public:
- explicit TArrowInputSpec(const TVector<NYT::TNode>& schemas);
- const TVector<NYT::TNode>& GetSchemas() const override;
- const NYT::TNode& GetSchema(ui32 index) const;
- bool ProvidesBlocks() const override { return true; }
-};
-
-/**
- * Processing mode for working with Apache Arrow batches outputs.
- *
- * In this mode purecalc yields pointers to abstract Arrow ExecBatches. All
- * Datums in generated batches respects the given YT schema.
- *
- * Note that one should not expect that the returned pointer will be valid
- * forever; in can (and will) become outdated once a new output is
- * requested/pushed.
- *
- * All working modes are implemented. In pull stream and pull list modes a
- * program will return a pointer to a stream of pointers to Arrow ExecBatches.
- * In push mode, it will accept a single consumer of pointers to Arrow ExecBatch.
- *
- * The program synopsis follows:
- *
- * @code
- * IStream<arrow::compute::ExecBatch*> TPullStreamProgram::Apply(...);
- * IStream<arrow::compute::ExecBatch*> TPullListProgram::Apply(...);
- * ... TPushStreamProgram::Apply(TConsumer<arrow::compute::ExecBatch*>);
- * @endcode
- */
-
-class TArrowOutputSpec: public TOutputSpecBase {
-private:
- const NYT::TNode Schema_;
-
-public:
- explicit TArrowOutputSpec(const NYT::TNode& schema);
- const NYT::TNode& GetSchema() const override;
- bool AcceptsBlocks() const override { return true; }
-};
-
-template <>
-struct TInputSpecTraits<TArrowInputSpec> {
- static const constexpr bool IsPartial = false;
-
- static const constexpr bool SupportPullListMode = true;
- static const constexpr bool SupportPullStreamMode = true;
- static const constexpr bool SupportPushStreamMode = true;
-
- using TInputItemType = arrow::compute::ExecBatch*;
- using IInputStream = IStream<TInputItemType>;
- using TConsumerType = THolder<IConsumer<TInputItemType>>;
-
- static void PreparePullListWorker(const TArrowInputSpec&, IPullListWorker*,
- IInputStream*);
- static void PreparePullListWorker(const TArrowInputSpec&, IPullListWorker*,
- THolder<IInputStream>);
- static void PreparePullListWorker(const TArrowInputSpec&, IPullListWorker*,
- const TVector<IInputStream*>&);
- static void PreparePullListWorker(const TArrowInputSpec&, IPullListWorker*,
- TVector<THolder<IInputStream>>&&);
-
- static void PreparePullStreamWorker(const TArrowInputSpec&, IPullStreamWorker*,
- IInputStream*);
- static void PreparePullStreamWorker(const TArrowInputSpec&, IPullStreamWorker*,
- THolder<IInputStream>);
- static void PreparePullStreamWorker(const TArrowInputSpec&, IPullStreamWorker*,
- const TVector<IInputStream*>&);
- static void PreparePullStreamWorker(const TArrowInputSpec&, IPullStreamWorker*,
- TVector<THolder<IInputStream>>&&);
-
- static TConsumerType MakeConsumer(const TArrowInputSpec&, TWorkerHolder<IPushStreamWorker>);
-};
-
-template <>
-struct TOutputSpecTraits<TArrowOutputSpec> {
- static const constexpr bool IsPartial = false;
-
- static const constexpr bool SupportPullListMode = true;
- static const constexpr bool SupportPullStreamMode = true;
- static const constexpr bool SupportPushStreamMode = true;
-
- using TOutputItemType = arrow::compute::ExecBatch*;
- using IOutputStream = IStream<TOutputItemType>;
- using TPullListReturnType = THolder<IOutputStream>;
- using TPullStreamReturnType = THolder<IOutputStream>;
-
- static const constexpr TOutputItemType StreamSentinel = nullptr;
-
- static TPullListReturnType ConvertPullListWorkerToOutputType(const TArrowOutputSpec&, TWorkerHolder<IPullListWorker>);
- static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TArrowOutputSpec&, TWorkerHolder<IPullStreamWorker>);
- static void SetConsumerToWorker(const TArrowOutputSpec&, IPushStreamWorker*, THolder<IConsumer<TOutputItemType>>);
-};
-
-} // namespace NPureCalc
-} // namespace NYql
diff --git a/ydb/library/yql/public/purecalc/io_specs/arrow/ut/test_spec.cpp b/ydb/library/yql/public/purecalc/io_specs/arrow/ut/test_spec.cpp
deleted file mode 100644
index 5cf2f6513d2..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/arrow/ut/test_spec.cpp
+++ /dev/null
@@ -1,419 +0,0 @@
-#include <library/cpp/testing/unittest/registar.h>
-
-#include <ydb/library/yql/public/purecalc/common/interface.h>
-#include <ydb/library/yql/public/purecalc/io_specs/arrow/spec.h>
-#include <ydb/library/yql/public/purecalc/ut/lib/helpers.h>
-#include <yql/essentials/core/yql_type_annotation.h>
-
-#include <yql/essentials/public/udf/arrow/udf_arrow_helpers.h>
-#include <arrow/array/builder_primitive.h>
-
-namespace {
-
-#define Y_UNIT_TEST_ADD_BLOCK_TEST(N, MODE) \
- TCurrentTest::AddTest(#N ":BlockEngineMode=" #MODE, \
- static_cast<void (*)(NUnitTest::TTestContext&)>(&N<NYql::EBlockEngineMode::MODE>), false);
-
-#define Y_UNIT_TEST_BLOCKS(N) \
- template<NYql::EBlockEngineMode BlockEngineMode> \
- void N(NUnitTest::TTestContext&); \
- struct TTestRegistration##N { \
- TTestRegistration##N() { \
- Y_UNIT_TEST_ADD_BLOCK_TEST(N, Disable) \
- Y_UNIT_TEST_ADD_BLOCK_TEST(N, Auto) \
- Y_UNIT_TEST_ADD_BLOCK_TEST(N, Force) \
- } \
- }; \
- static TTestRegistration##N testRegistration##N; \
- template<NYql::EBlockEngineMode BlockEngineMode> \
- void N(NUnitTest::TTestContext&)
-
-NYql::NPureCalc::TProgramFactoryOptions TestOptions(NYql::EBlockEngineMode mode) {
- static const TMap<NYql::EBlockEngineMode, const TString> mode2settings = {
- {NYql::EBlockEngineMode::Disable, "disable"},
- {NYql::EBlockEngineMode::Auto, "auto"},
- {NYql::EBlockEngineMode::Force, "force"},
- };
- auto options = NYql::NPureCalc::TProgramFactoryOptions();
- options.SetBlockEngineSettings(mode2settings.at(mode));
- return options;
-}
-
-
-template <typename T>
-struct TVectorStream: public NYql::NPureCalc::IStream<T*> {
- TVector<T> Data_;
- size_t Index_ = 0;
-
-public:
- TVectorStream(TVector<T> items)
- : Data_(std::move(items))
- {
- }
-
- T* Fetch() override {
- return Index_ < Data_.size() ? &Data_[Index_++] : nullptr;
- }
-};
-
-
-template<typename T>
-struct TVectorConsumer: public NYql::NPureCalc::IConsumer<T*> {
- TVector<T>& Data_;
- size_t Index_ = 0;
-
-public:
- TVectorConsumer(TVector<T>& items)
- : Data_(items)
- {
- }
-
- void OnObject(T* t) override {
- Index_++;
- Data_.push_back(*t);
- }
-
- void OnFinish() override {
- UNIT_ASSERT_GT(Index_, 0);
- }
-};
-
-
-using ExecBatchStreamImpl = TVectorStream<arrow::compute::ExecBatch>;
-using ExecBatchConsumerImpl = TVectorConsumer<arrow::compute::ExecBatch>;
-
-template <typename TBuilder>
-arrow::Datum MakeArrayDatumFromVector(
- const TVector<typename TBuilder::value_type>& data,
- const TVector<bool>& valid
-) {
- TBuilder builder;
- ARROW_OK(builder.Reserve(data.size()));
- ARROW_OK(builder.AppendValues(data, valid));
- return arrow::Datum(ARROW_RESULT(builder.Finish()));
-}
-
-template <typename TValue>
-TVector<TValue> MakeVectorFromArrayDatum(
- const arrow::Datum& datum,
- const int64_t dsize
-) {
- Y_ENSURE(datum.is_array(), "ExecBatch layout doesn't respect the schema");
-
- const auto& array = *datum.array();
- Y_ENSURE(array.length == dsize,
- "Array Datum size differs from the given ExecBatch size");
- Y_ENSURE(array.GetNullCount() == 0,
- "Null values conversion is not supported");
- Y_ENSURE(array.buffers.size() == 2,
- "Array Datum layout doesn't respect the schema");
-
- const TValue* adata1 = array.GetValuesSafe<TValue>(1);
- return TVector<TValue>(adata1, adata1 + dsize);
-}
-
-arrow::compute::ExecBatch MakeBatch(ui64 bsize, i64 value, ui64 init = 1) {
- TVector<uint64_t> data1(bsize);
- TVector<int64_t> data2(bsize);
- TVector<bool> valid(bsize);
- std::iota(data1.begin(), data1.end(), init);
- std::fill(data2.begin(), data2.end(), value);
- std::fill(valid.begin(), valid.end(), true);
-
- TVector<arrow::Datum> batchArgs = {
- MakeArrayDatumFromVector<arrow::UInt64Builder>(data1, valid),
- MakeArrayDatumFromVector<arrow::Int64Builder>(data2, valid)
- };
-
- return arrow::compute::ExecBatch(std::move(batchArgs), bsize);
-}
-
-TVector<std::tuple<ui64, i64>> CanonBatches(const TVector<arrow::compute::ExecBatch>& batches) {
- TVector<std::tuple<ui64, i64>> result;
- for (const auto& batch : batches) {
- const auto bsize = batch.length;
-
- const auto& avec1 = MakeVectorFromArrayDatum<ui64>(batch.values[0], bsize);
- const auto& avec2 = MakeVectorFromArrayDatum<i64>(batch.values[1], bsize);
-
- for (auto i = 0; i < bsize; i++) {
- result.push_back(std::make_tuple(avec1[i], avec2[i]));
- }
- }
- std::sort(result.begin(), result.end());
- return result;
-}
-
-} // namespace
-
-
-Y_UNIT_TEST_SUITE(TestSimplePullListArrowIO) {
- Y_UNIT_TEST_BLOCKS(TestSingleInput) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields = {"uint64", "int64"};
- auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields);
-
- auto factory = MakeProgramFactory(TestOptions(BlockEngineMode));
-
- try {
- auto program = factory->MakePullListProgram(
- TArrowInputSpec({schema}),
- TArrowOutputSpec(schema),
- "SELECT * FROM Input",
- ETranslationMode::SQL
- );
-
- const TVector<arrow::compute::ExecBatch> input({MakeBatch(9, 19)});
- const auto canonInput = CanonBatches(input);
- ExecBatchStreamImpl items(input);
-
- auto stream = program->Apply(&items);
-
- TVector<arrow::compute::ExecBatch> output;
- while (arrow::compute::ExecBatch* batch = stream->Fetch()) {
- output.push_back(*batch);
- }
- const auto canonOutput = CanonBatches(output);
- UNIT_ASSERT_EQUAL(canonInput, canonOutput);
- } catch (const TCompileError& error) {
- UNIT_FAIL(error.GetIssues());
- }
- }
-
- Y_UNIT_TEST_BLOCKS(TestMultiInput) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields = {"uint64", "int64"};
- auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields);
-
- auto factory = MakeProgramFactory(TestOptions(BlockEngineMode));
-
- try {
- auto program = factory->MakePullListProgram(
- TArrowInputSpec({schema, schema}),
- TArrowOutputSpec(schema),
- R"(
- SELECT * FROM Input0
- UNION ALL
- SELECT * FROM Input1
- )",
- ETranslationMode::SQL
- );
-
- TVector<arrow::compute::ExecBatch> inputs = {
- MakeBatch(9, 19),
- MakeBatch(7, 17)
- };
- const auto canonInputs = CanonBatches(inputs);
-
- ExecBatchStreamImpl items0({inputs[0]});
- ExecBatchStreamImpl items1({inputs[1]});
-
- const TVector<IStream<arrow::compute::ExecBatch*>*> items({&items0, &items1});
-
- auto stream = program->Apply(items);
-
- TVector<arrow::compute::ExecBatch> output;
- while (arrow::compute::ExecBatch* batch = stream->Fetch()) {
- output.push_back(*batch);
- }
- const auto canonOutput = CanonBatches(output);
- UNIT_ASSERT_EQUAL(canonInputs, canonOutput);
- } catch (const TCompileError& error) {
- UNIT_FAIL(error.GetIssues());
- }
- }
-}
-
-
-Y_UNIT_TEST_SUITE(TestMorePullListArrowIO) {
- Y_UNIT_TEST_BLOCKS(TestInc) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields = {"uint64", "int64"};
- auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields);
-
- auto factory = MakeProgramFactory(TestOptions(BlockEngineMode));
-
- try {
- auto program = factory->MakePullListProgram(
- TArrowInputSpec({schema}),
- TArrowOutputSpec(schema),
- R"(SELECT
- uint64 + 1 as uint64,
- int64 - 2 as int64,
- FROM Input)",
- ETranslationMode::SQL
- );
-
- const TVector<arrow::compute::ExecBatch> input({MakeBatch(9, 19)});
- const auto canonInput = CanonBatches(input);
- ExecBatchStreamImpl items(input);
-
- auto stream = program->Apply(&items);
-
- TVector<arrow::compute::ExecBatch> output;
- while (arrow::compute::ExecBatch* batch = stream->Fetch()) {
- output.push_back(*batch);
- }
- const auto canonOutput = CanonBatches(output);
- const TVector<arrow::compute::ExecBatch> check({MakeBatch(9, 17, 2)});
- const auto canonCheck = CanonBatches(check);
- UNIT_ASSERT_EQUAL(canonCheck, canonOutput);
- } catch (const TCompileError& error) {
- UNIT_FAIL(error.GetIssues());
- }
- }
-}
-
-
-Y_UNIT_TEST_SUITE(TestSimplePullStreamArrowIO) {
- Y_UNIT_TEST_BLOCKS(TestSingleInput) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields = {"uint64", "int64"};
- auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields);
-
- auto factory = MakeProgramFactory(TestOptions(BlockEngineMode));
-
- try {
- auto program = factory->MakePullStreamProgram(
- TArrowInputSpec({schema}),
- TArrowOutputSpec(schema),
- "SELECT * FROM Input",
- ETranslationMode::SQL
- );
-
- const TVector<arrow::compute::ExecBatch> input({MakeBatch(9, 19)});
- const auto canonInput = CanonBatches(input);
- ExecBatchStreamImpl items(input);
-
- auto stream = program->Apply(&items);
-
- TVector<arrow::compute::ExecBatch> output;
- while (arrow::compute::ExecBatch* batch = stream->Fetch()) {
- output.push_back(*batch);
- }
- const auto canonOutput = CanonBatches(output);
- UNIT_ASSERT_EQUAL(canonInput, canonOutput);
- } catch (const TCompileError& error) {
- UNIT_FAIL(error.GetIssues());
- }
- }
-}
-
-
-Y_UNIT_TEST_SUITE(TestMorePullStreamArrowIO) {
- Y_UNIT_TEST_BLOCKS(TestInc) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields = {"uint64", "int64"};
- auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields);
-
- auto factory = MakeProgramFactory(TestOptions(BlockEngineMode));
-
- try {
- auto program = factory->MakePullStreamProgram(
- TArrowInputSpec({schema}),
- TArrowOutputSpec(schema),
- R"(SELECT
- uint64 + 1 as uint64,
- int64 - 2 as int64,
- FROM Input)",
- ETranslationMode::SQL
- );
-
- const TVector<arrow::compute::ExecBatch> input({MakeBatch(9, 19)});
- const auto canonInput = CanonBatches(input);
- ExecBatchStreamImpl items(input);
-
- auto stream = program->Apply(&items);
-
- TVector<arrow::compute::ExecBatch> output;
- while (arrow::compute::ExecBatch* batch = stream->Fetch()) {
- output.push_back(*batch);
- }
- const auto canonOutput = CanonBatches(output);
- const TVector<arrow::compute::ExecBatch> check({MakeBatch(9, 17, 2)});
- const auto canonCheck = CanonBatches(check);
- UNIT_ASSERT_EQUAL(canonCheck, canonOutput);
- } catch (const TCompileError& error) {
- UNIT_FAIL(error.GetIssues());
- }
- }
-}
-
-
-Y_UNIT_TEST_SUITE(TestPushStreamArrowIO) {
- Y_UNIT_TEST_BLOCKS(TestAllColumns) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields = {"uint64", "int64"};
- auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields);
-
- auto factory = MakeProgramFactory(TestOptions(BlockEngineMode));
-
- try {
- auto program = factory->MakePushStreamProgram(
- TArrowInputSpec({schema}),
- TArrowOutputSpec(schema),
- "SELECT * FROM Input",
- ETranslationMode::SQL
- );
-
- arrow::compute::ExecBatch input = MakeBatch(9, 19);
- const auto canonInput = CanonBatches({input});
- TVector<arrow::compute::ExecBatch> output;
-
- auto consumer = program->Apply(MakeHolder<ExecBatchConsumerImpl>(output));
-
- UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnObject(&input); }());
- UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnFinish(); }());
-
- const auto canonOutput = CanonBatches(output);
- UNIT_ASSERT_EQUAL(canonInput, canonOutput);
- } catch (const TCompileError& error) {
- UNIT_FAIL(error.GetIssues());
- }
- }
-}
-
-Y_UNIT_TEST_SUITE(TestMorePushStreamArrowIO) {
- Y_UNIT_TEST_BLOCKS(TestInc) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields = {"uint64", "int64"};
- auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields);
-
- auto factory = MakeProgramFactory(TestOptions(BlockEngineMode));
-
- try {
- auto program = factory->MakePushStreamProgram(
- TArrowInputSpec({schema}),
- TArrowOutputSpec(schema),
- R"(SELECT
- uint64 + 1 as uint64,
- int64 - 2 as int64,
- FROM Input)",
- ETranslationMode::SQL
- );
-
- arrow::compute::ExecBatch input = MakeBatch(9, 19);
- const auto canonInput = CanonBatches({input});
- TVector<arrow::compute::ExecBatch> output;
-
- auto consumer = program->Apply(MakeHolder<ExecBatchConsumerImpl>(output));
-
- UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnObject(&input); }());
- UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnFinish(); }());
-
- const auto canonOutput = CanonBatches(output);
- const TVector<arrow::compute::ExecBatch> check({MakeBatch(9, 17, 2)});
- const auto canonCheck = CanonBatches(check);
- UNIT_ASSERT_EQUAL(canonCheck, canonOutput);
- } catch (const TCompileError& error) {
- UNIT_FAIL(error.GetIssues());
- }
- }
-}
diff --git a/ydb/library/yql/public/purecalc/io_specs/arrow/ut/ya.make b/ydb/library/yql/public/purecalc/io_specs/arrow/ut/ya.make
deleted file mode 100644
index 71faf4ae1c5..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/arrow/ut/ya.make
+++ /dev/null
@@ -1,20 +0,0 @@
-UNITTEST()
-
-SIZE(MEDIUM)
-
-TIMEOUT(300)
-
-PEERDIR(
- yql/essentials/public/udf/service/exception_policy
- ydb/library/yql/public/purecalc
- ydb/library/yql/public/purecalc/io_specs/arrow
- ydb/library/yql/public/purecalc/ut/lib
-)
-
-YQL_LAST_ABI_VERSION()
-
-SRCS(
- test_spec.cpp
-)
-
-END()
diff --git a/ydb/library/yql/public/purecalc/io_specs/arrow/ya.make b/ydb/library/yql/public/purecalc/io_specs/arrow/ya.make
deleted file mode 100644
index 6019bc8b574..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/arrow/ya.make
+++ /dev/null
@@ -1,13 +0,0 @@
-LIBRARY()
-
-PEERDIR(
- ydb/library/yql/public/purecalc/common
-)
-
-INCLUDE(ya.make.inc)
-
-END()
-
-RECURSE_FOR_TESTS(
- ut
-)
diff --git a/ydb/library/yql/public/purecalc/io_specs/arrow/ya.make.inc b/ydb/library/yql/public/purecalc/io_specs/arrow/ya.make.inc
deleted file mode 100644
index af2e91086c7..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/arrow/ya.make.inc
+++ /dev/null
@@ -1,13 +0,0 @@
-SRCDIR(
- ydb/library/yql/public/purecalc/io_specs/arrow
-)
-
-ADDINCL(
- ydb/library/yql/public/purecalc/io_specs/arrow
-)
-
-YQL_LAST_ABI_VERSION()
-
-SRCS(
- spec.cpp
-)
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/no_llvm/ya.make b/ydb/library/yql/public/purecalc/io_specs/mkql/no_llvm/ya.make
deleted file mode 100644
index d34d298bb04..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/mkql/no_llvm/ya.make
+++ /dev/null
@@ -1,10 +0,0 @@
-LIBRARY()
-
-INCLUDE(../ya.make.inc)
-
-PEERDIR(
- ydb/library/yql/public/purecalc/common/no_llvm
-)
-
-END()
-
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp b/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp
deleted file mode 100644
index f54b8270f2a..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp
+++ /dev/null
@@ -1,934 +0,0 @@
-#include "spec.h"
-
-#include <ydb/library/yql/public/purecalc/common/names.h>
-#include <yql/essentials/minikql/computation/mkql_custom_list.h>
-#include <ydb/library/yql/providers/yt/codec/yt_codec_io.h>
-#include <ydb/library/yql/providers/yt/lib/mkql_helpers/mkql_helpers.h>
-#include <ydb/library/yql/providers/yt/common/yql_names.h>
-#include <yql/essentials/providers/common/codec/yql_codec_type_flags.h>
-#include <yql/essentials/providers/common/schema/mkql/yql_mkql_schema.h>
-#include <yql/essentials/minikql/mkql_node_cast.h>
-
-#include <library/cpp/yson/node/node_io.h>
-
-#include <util/generic/noncopyable.h>
-#include <util/generic/ptr.h>
-
-
-namespace {
- const TStringBuf PathColumnShortName = "path";
-
- template <typename T>
- inline TVector<THolder<T>> VectorFromHolder(THolder<T> holder) {
- TVector<THolder<T>> result;
- result.push_back(std::move(holder));
- return result;
- }
-
- template <typename TRowType>
- NYT::TNode ComposeRowSpec(const TRowType* rowType, ui64 nativeYtTypeFlags, bool strictSchema) {
- constexpr bool isNodeType = std::is_same_v<TRowType, NYT::TNode>;
-
- static_assert(isNodeType || std::is_same_v<TRowType, NKikimr::NMiniKQL::TType>);
-
- auto typeNode = NYT::TNode::CreateMap();
- if constexpr (isNodeType) {
- typeNode[NYql::RowSpecAttrType] = *rowType;
- } else {
- typeNode[NYql::RowSpecAttrType] = NYql::NCommon::TypeToYsonNode(rowType);
- }
- typeNode[NYql::RowSpecAttrNativeYtTypeFlags] = nativeYtTypeFlags;
- typeNode[NYql::RowSpecAttrStrictSchema] = strictSchema;
-
- auto attrNode = NYT::TNode::CreateMap();
- attrNode[NYql::YqlRowSpecAttribute] = std::move(typeNode);
-
- return attrNode;
- }
-
- struct TInputDescription {
- public:
- ui32 InputIndex;
- const TMaybe<TVector<TString>>& TableNames;
- const NYT::TNode& InputSchema;
- const bool UseOriginalRowSpec;
-
- public:
- template <bool UseSkiff>
- TInputDescription(const NYql::NPureCalc::TMkqlInputSpec<UseSkiff>& spec, ui32 inputIndex)
- : InputIndex(inputIndex)
- , TableNames(spec.GetTableNames(InputIndex))
- , InputSchema(spec.GetSchemas().at(inputIndex))
- , UseOriginalRowSpec(spec.UseOriginalRowSpec())
- {
- }
-
- bool UseSystemColumns() const {
- return TableNames.Defined();
- }
-
- size_t GetTablesNumber() const {
- if (TableNames.Defined()) {
- return TableNames->size();
- }
-
- return 1;
- }
- };
-
- NYT::TNode ComposeYqlAttributesFromSchema(
- const NKikimr::NMiniKQL::TType* type,
- ui64 nativeYtTypeFlags,
- bool strictSchema,
- const TInputDescription* inputDescription = nullptr)
- {
- auto attrs = NYT::TNode::CreateMap();
- NYT::TNode& tables = attrs[NYql::YqlIOSpecTables];
-
- switch (type->GetKind()) {
- case NKikimr::NMiniKQL::TType::EKind::Variant:
- {
- YQL_ENSURE(!inputDescription);
-
- const auto* vtype = AS_TYPE(NKikimr::NMiniKQL::TVariantType, type);
-
- NYT::TNode& registryNode = attrs[NYql::YqlIOSpecRegistry];
- THashMap<TString, TString> uniqSpecs;
-
- for (ui32 i = 0; i < vtype->GetAlternativesCount(); i++) {
- TString refName = TStringBuilder() << "$table" << uniqSpecs.size();
-
- auto rowSpec = ComposeRowSpec(vtype->GetAlternativeType(i), nativeYtTypeFlags, strictSchema);
-
- auto res = uniqSpecs.emplace(NYT::NodeToCanonicalYsonString(rowSpec), refName);
- if (res.second) {
- registryNode[refName] = rowSpec;
- } else {
- refName = res.first->second;
- }
- tables.Add(refName);
- }
- break;
- }
- case NKikimr::NMiniKQL::TType::EKind::Struct:
- {
- auto rowSpec = NYT::TNode();
-
- if (inputDescription && inputDescription->UseOriginalRowSpec) {
- rowSpec = ComposeRowSpec(&inputDescription->InputSchema, nativeYtTypeFlags, strictSchema);
- } else {
- rowSpec = ComposeRowSpec(type, nativeYtTypeFlags, strictSchema);
- }
-
- if (inputDescription && inputDescription->UseSystemColumns()) {
- rowSpec[NYql::YqlSysColumnPrefix] = NYT::TNode().Add(PathColumnShortName);
- }
-
- if (inputDescription && inputDescription->GetTablesNumber() > 1) {
- TStringBuf refName = "$table0";
- attrs[NYql::YqlIOSpecRegistry][refName] = std::move(rowSpec);
- for (ui32 i = 0; i < inputDescription->GetTablesNumber(); ++i) {
- tables.Add(refName);
- }
- } else {
- tables.Add(std::move(rowSpec));
- }
- break;
- }
- default:
- Y_UNREACHABLE();
- }
-
- return attrs;
- }
-
- NYql::NCommon::TCodecContext MakeCodecCtx(NYql::NPureCalc::IWorker* worker) {
- return NYql::NCommon::TCodecContext(
- worker->GetTypeEnvironment(),
- worker->GetFunctionRegistry(),
- &worker->GetGraph().GetHolderFactory()
- );
- }
-
- NYql::TMkqlIOSpecs GetIOSpecs(
- NYql::NPureCalc::IWorker* worker,
- NYql::NCommon::TCodecContext& codecCtx,
- bool useSkiff,
- const TInputDescription* inputDescription = nullptr,
- bool strictSchema = true
- ) {
- NYql::TMkqlIOSpecs specs;
- if (useSkiff) {
- specs.SetUseSkiff(worker->GetLLVMSettings());
- }
-
- if (inputDescription) {
- const auto* type = worker->GetInputType(inputDescription->InputIndex, true);
- const auto* fullType = worker->GetInputType(inputDescription->InputIndex, false);
-
- YQL_ENSURE(!type->FindMemberIndex(NYql::YqlSysColumnPath));
-
- size_t extraColumnsCount = 0;
- if (inputDescription->UseSystemColumns()) {
- YQL_ENSURE(fullType->FindMemberIndex(NYql::YqlSysColumnPath));
- ++extraColumnsCount;
- }
- if (!strictSchema) {
- YQL_ENSURE(fullType->FindMemberIndex(NYql::YqlOthersColumnName));
- ++extraColumnsCount;
- }
-
- if (extraColumnsCount != 0) {
- YQL_ENSURE(fullType->GetMembersCount() == type->GetMembersCount() + extraColumnsCount);
- } else {
- YQL_ENSURE(type == fullType);
- }
-
- auto attrs = ComposeYqlAttributesFromSchema(type, worker->GetNativeYtTypeFlags(), strictSchema, inputDescription);
- if (inputDescription->TableNames) {
- specs.Init(codecCtx, attrs, inputDescription->TableNames.GetRef(), {});
- } else {
- specs.Init(codecCtx, attrs, {}, {});
- }
- } else {
- auto attrs = ComposeYqlAttributesFromSchema(worker->GetOutputType(), worker->GetNativeYtTypeFlags(), strictSchema);
- specs.Init(codecCtx, attrs);
- }
-
- return specs;
- }
-
- class TRawTableReaderImpl final: public NYT::TRawTableReader {
- private:
- // If we own Underlying_, than Owned_ == Underlying_, otherwise Owned_ is nullptr.
- THolder<IInputStream> Owned_;
- IInputStream* Underlying_;
- NKikimr::NMiniKQL::TScopedAlloc& ScopedAlloc_;
-
- private:
- TRawTableReaderImpl(
- IInputStream* underlying,
- THolder<IInputStream> owned,
- NKikimr::NMiniKQL::TScopedAlloc& scopedAlloc
- )
- : Owned_(std::move(owned))
- , Underlying_(underlying)
- , ScopedAlloc_(scopedAlloc)
- {
- }
-
- public:
- TRawTableReaderImpl(THolder<IInputStream> stream, NKikimr::NMiniKQL::TScopedAlloc& scopedAlloc)
- : TRawTableReaderImpl(stream.Get(), nullptr, scopedAlloc)
- {
- Owned_ = std::move(stream);
- }
-
- TRawTableReaderImpl(IInputStream* stream, NKikimr::NMiniKQL::TScopedAlloc& scopedAlloc)
- : TRawTableReaderImpl(stream, nullptr, scopedAlloc)
- {
- }
-
- bool Retry(const TMaybe<ui32>&, const TMaybe<ui64>&, const std::exception_ptr&) override {
- return false;
- }
-
- void ResetRetries() override {
- }
-
- bool HasRangeIndices() const override {
- return false;
- }
-
- protected:
- size_t DoRead(void* buf, size_t len) override {
- auto unguard = Unguard(ScopedAlloc_);
- return Underlying_->Read(buf, len);
- }
- };
-
-
- class TMkqlListValue: public NKikimr::NMiniKQL::TCustomListValue {
- private:
- mutable bool HasIterator_ = false;
- NYql::NPureCalc::IWorker* Worker_;
- // Keeps struct members reorders
- NYql::NCommon::TCodecContext CodecCtx_;
- NYql::TMkqlIOSpecs IOSpecs_;
- // If we own Underlying_, than Owned_ == Underlying_, otherwise Owned_ is nullptr.
- THolder<NYT::TRawTableReader> Owned_;
- NYT::TRawTableReader* Underlying_;
- NYql::TMkqlReaderImpl Reader_;
-
- private:
- TMkqlListValue(
- NKikimr::NMiniKQL::TMemoryUsageInfo* memInfo,
- bool useSkiff,
- NYT::TRawTableReader* underlying,
- THolder<NYT::TRawTableReader> owned,
- NYql::NPureCalc::IWorker* worker,
- const TInputDescription& inputDescription,
- bool ignoreStreamTableIndex = false,
- bool strictSchema = true
- ) : TCustomListValue(memInfo)
- , Worker_(worker)
- , CodecCtx_(MakeCodecCtx(Worker_))
- , IOSpecs_(GetIOSpecs(Worker_, CodecCtx_, useSkiff, &inputDescription, strictSchema))
- , Owned_(std::move(owned))
- , Underlying_(underlying)
- , Reader_(*Underlying_, 0, 1ul << 20, 0, ignoreStreamTableIndex)
- {
- Reader_.SetSpecs(IOSpecs_, Worker_->GetGraph().GetHolderFactory());
- Reader_.Next();
- }
-
- public:
- TMkqlListValue(
- NKikimr::NMiniKQL::TMemoryUsageInfo* memInfo,
- bool useSkiff,
- THolder<NYT::TRawTableReader> stream,
- NYql::NPureCalc::IWorker* worker,
- const TInputDescription& inputDescription,
- bool ignoreStreamTableIndex = false,
- bool strictSchema = true
- )
- : TMkqlListValue(
- memInfo, useSkiff, stream.Get(), nullptr, worker, inputDescription, ignoreStreamTableIndex, strictSchema)
- {
- Owned_ = std::move(stream);
- }
-
- TMkqlListValue(
- NKikimr::NMiniKQL::TMemoryUsageInfo* memInfo,
- bool useSkiff,
- NYT::TRawTableReader* stream,
- NYql::NPureCalc::IWorker* worker,
- const TInputDescription& inputDescription,
- bool ignoreStreamTableIndex,
- bool strictSchema = true
- )
- : TMkqlListValue(memInfo, useSkiff, stream, nullptr, worker, inputDescription, ignoreStreamTableIndex, strictSchema)
- {
- }
-
- NKikimr::NUdf::TUnboxedValue GetListIterator() const override {
- YQL_ENSURE(!HasIterator_, "Only one pass over input is supported");
- HasIterator_ = true;
- return NKikimr::NUdf::TUnboxedValuePod(const_cast<TMkqlListValue*>(this));
- }
-
- bool Next(NKikimr::NUdf::TUnboxedValue& result) override {
- if (!Reader_.IsValid()) {
- return false;
- }
-
- result = Reader_.GetRow();
- Reader_.Next();
-
- return true;
- }
-
- NKikimr::NUdf::EFetchStatus Fetch(
- NKikimr::NUdf::TUnboxedValue& result
- ) override {
- if (Next(result)) {
- return NKikimr::NUdf::EFetchStatus::Ok;
- }
-
- return NKikimr::NUdf::EFetchStatus::Finish;
- }
- };
-
- class TMkqlWriter: public NYql::NPureCalc::THandle {
- protected:
- virtual const NYql::NPureCalc::IWorker* GetWorker() const = 0;
- virtual void DoRun(const TVector<IOutputStream*>& stream) = 0;
-
- public:
- void Run(IOutputStream* stream) final {
- Y_ENSURE(
- GetWorker()->GetOutputType()->IsStruct(),
- "NYql::NPureCalc::THandle::Run(IOutputStream*) cannot be used with multi-output programs; "
- "use other overloads of Run() instead.");
-
- DoRun({stream});
- }
-
- void Run(const TVector<IOutputStream*>& streams) final {
- Y_ENSURE(
- GetWorker()->GetOutputType()->IsVariant(),
- "NYql::NPureCalc::THandle::Run(TVector<IOutputStream*>) cannot be used with single-output programs; "
- "use NYql::NPureCalc::THandle::Run(IOutputStream*) instead.");
-
- const auto* variantType = AS_TYPE(NKikimr::NMiniKQL::TVariantType, GetWorker()->GetOutputType());
-
- Y_ENSURE(
- variantType->GetUnderlyingType()->IsTuple(),
- "NYql::NPureCalc::THandle::Run(TVector<IOutputStream*>) cannot be used to process variants over struct; "
- "use NYql::NPureCalc::THandle::Run(TMap<TString, IOutputStream*>) instead.");
-
- const auto* tupleType = AS_TYPE(NKikimr::NMiniKQL::TTupleType, variantType->GetUnderlyingType());
-
- Y_ENSURE(
- tupleType->GetElementsCount() == streams.size(),
- "Number of variant alternatives should match number of streams.");
-
- DoRun(streams);
- }
-
- void Run(const TMap<TString, IOutputStream*>& streams) final {
- Y_ENSURE(
- GetWorker()->GetOutputType()->IsVariant(),
- "NYql::NPureCalc::THandle::Run(TMap<TString, IOutputStream*>) cannot be used with single-output programs; "
- "use NYql::NPureCalc::THandle::Run(IOutputStream*) instead.");
-
- const auto* variantType = AS_TYPE(NKikimr::NMiniKQL::TVariantType, GetWorker()->GetOutputType());
-
- Y_ENSURE(
- variantType->GetUnderlyingType()->IsStruct(),
- "NYql::NPureCalc::THandle::Run(TMap<TString, IOutputStream*>) cannot be used to process variants over tuple; "
- "use NYql::NPureCalc::THandle::Run(TVector<IOutputStream*>) instead.");
-
- const auto* structType = AS_TYPE(NKikimr::NMiniKQL::TStructType, variantType->GetUnderlyingType());
-
- Y_ENSURE(
- structType->GetMembersCount() == streams.size(),
- "Number of variant alternatives should match number of streams.");
-
- TVector<IOutputStream*> sortedStreams;
- sortedStreams.reserve(structType->GetMembersCount());
-
- for (ui32 i = 0; i < structType->GetMembersCount(); i++) {
- auto name = TString{structType->GetMemberName(i)};
- Y_ENSURE(streams.contains(name), "Cannot find stream for alternative " << name.Quote());
- sortedStreams.push_back(streams.at(name));
- }
-
- DoRun(sortedStreams);
- }
- };
-
- class TPullListMkqlWriter: public TMkqlWriter {
- private:
- NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullListWorker> Worker_;
- NYql::NCommon::TCodecContext CodecCtx_;
- NYql::TMkqlIOSpecs IOSpecs_;
-
- public:
- TPullListMkqlWriter(
- NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullListWorker> worker,
- bool useSkiff
- )
- : Worker_(std::move(worker))
- , CodecCtx_(MakeCodecCtx(Worker_.Get()))
- , IOSpecs_(GetIOSpecs(Worker_.Get(), CodecCtx_, useSkiff))
- {
- }
-
- protected:
- const NYql::NPureCalc::IWorker* GetWorker() const override {
- return Worker_.Get();
- }
-
- void DoRun(const TVector<IOutputStream*>& outputs) override {
- NKikimr::NMiniKQL::TBindTerminator bind(Worker_->GetGraph().GetTerminator());
-
- with_lock(Worker_->GetScopedAlloc()) {
- NYql::TMkqlWriterImpl writer{outputs, 0, 1ul << 20};
- writer.SetSpecs(IOSpecs_);
-
- const auto outputIterator = Worker_->GetOutputIterator();
-
- for (NKikimr::NUdf::TUnboxedValue value; outputIterator.Next(value); writer.AddRow(value))
- continue;
-
- writer.Finish();
- }
- }
- };
-
- class TPullStreamMkqlWriter: public TMkqlWriter {
- private:
- NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullStreamWorker> Worker_;
- NYql::NCommon::TCodecContext CodecCtx_;
- NYql::TMkqlIOSpecs IOSpecs_;
-
- public:
- TPullStreamMkqlWriter(
- NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullStreamWorker> worker,
- bool useSkiff
- )
- : Worker_(std::move(worker))
- , CodecCtx_(MakeCodecCtx(Worker_.Get()))
- , IOSpecs_(GetIOSpecs(Worker_.Get(), CodecCtx_, useSkiff))
- {
- }
-
- protected:
- const NYql::NPureCalc::IWorker* GetWorker() const override {
- return Worker_.Get();
- }
-
- void DoRun(const TVector<IOutputStream*>& outputs) override {
- NKikimr::NMiniKQL::TBindTerminator bind(Worker_->GetGraph().GetTerminator());
-
- with_lock(Worker_->GetScopedAlloc()) {
- NYql::TMkqlWriterImpl writer{outputs, 0, 1ul << 20};
- writer.SetSpecs(IOSpecs_);
-
- const auto output = Worker_->GetOutput();
-
- for (NKikimr::NUdf::TUnboxedValue value;;) {
- const auto status = output.Fetch(value);
-
- if (status == NKikimr::NUdf::EFetchStatus::Ok) {
- writer.AddRow(value);
- } else if (status == NKikimr::NUdf::EFetchStatus::Finish) {
- break;
- } else {
- YQL_ENSURE(false, "Yield is not supported in pull mode");
- }
- }
-
- writer.Finish();
- }
- }
- };
-}
-
-namespace NYql {
- namespace NPureCalc {
- template <bool UseSkiff>
- TMkqlInputSpec<UseSkiff>::TMkqlInputSpec(TVector<NYT::TNode> schemas)
- : Schemas_(std::move(schemas))
- {
- AllTableNames_ = TVector<TMaybe<TVector<TString>>>(Schemas_.size(), Nothing());
- this->AllVirtualColumns_ = TVector<THashMap<TString, NYT::TNode>>(Schemas_.size());
- }
-
- template <bool UseSkiff>
- TMkqlInputSpec<UseSkiff>::TMkqlInputSpec(NYT::TNode schema, bool ignoreStreamTableIndex)
- {
- Schemas_.push_back(std::move(schema));
- IgnoreStreamTableIndex_ = ignoreStreamTableIndex;
- AllTableNames_.push_back(Nothing());
- this->AllVirtualColumns_.push_back({});
- }
-
- template <bool UseSkiff>
- const TVector<NYT::TNode>& TMkqlInputSpec<UseSkiff>::GetSchemas() const {
- return Schemas_;
- }
-
- template <bool UseSkiff>
- bool TMkqlInputSpec<UseSkiff>::IgnoreStreamTableIndex() const {
- return IgnoreStreamTableIndex_;
- }
-
- template <bool UseSkiff>
- bool TMkqlInputSpec<UseSkiff>::IsStrictSchema() const {
- return StrictSchema_;
- }
-
- template <bool UseSkiff>
- TMkqlInputSpec<UseSkiff>& TMkqlInputSpec<UseSkiff>::SetStrictSchema(bool strictSchema) {
- static const NYT::TNode stringType = NYT::TNode::CreateList().Add("DataType").Add("String");
- static const NYT::TNode othersColumntype = NYT::TNode::CreateList().Add("DictType").Add(stringType).Add(stringType);
-
- StrictSchema_ = strictSchema;
-
- for (size_t index = 0; index < Schemas_.size(); ++index) {
- auto& schemaVirtualColumns = this->AllVirtualColumns_.at(index);
- if (StrictSchema_) {
- schemaVirtualColumns.erase(NYql::YqlOthersColumnName);
- } else {
- schemaVirtualColumns.emplace(NYql::YqlOthersColumnName, othersColumntype);
- }
- }
-
- return *this;
- }
-
- template <bool UseSkiff>
- bool TMkqlInputSpec<UseSkiff>::UseOriginalRowSpec() const {
- return UseOriginalRowSpec_;
- }
-
- template <bool UseSkiff>
- TMkqlInputSpec<UseSkiff>& TMkqlInputSpec<UseSkiff>::SetUseOriginalRowSpec(bool value) {
- UseOriginalRowSpec_ = value;
-
- return *this;
- }
-
- template <bool UseSkiff>
- const TMaybe<TVector<TString>>& TMkqlInputSpec<UseSkiff>::GetTableNames() const {
- Y_ENSURE(AllTableNames_.size() == 1, "expected single-input spec");
-
- return AllTableNames_[0];
- }
-
- template <bool UseSkiff>
- const TMaybe<TVector<TString>>& TMkqlInputSpec<UseSkiff>::GetTableNames(ui32 index) const {
- Y_ENSURE(index < AllTableNames_.size(), "invalid input index");
-
- return AllTableNames_[index];
- }
-
- template <bool UseSkiff>
- TMkqlInputSpec<UseSkiff>& TMkqlInputSpec<UseSkiff>::SetTableNames(TVector<TString> tableNames) {
- Y_ENSURE(AllTableNames_.size() == 1, "expected single-input spec");
-
- return SetTableNames(std::move(tableNames), 0);
- }
-
- template <bool UseSkiff>
- TMkqlInputSpec<UseSkiff>& TMkqlInputSpec<UseSkiff>::SetTableNames(TVector<TString> tableNames, ui32 index) {
- Y_ENSURE(index < AllTableNames_.size(), "invalid input index");
-
- auto& value = AllTableNames_[index];
-
- if (!value.Defined()) {
- YQL_ENSURE(NYql::YqlSysColumnPath == NYql::NPureCalc::PurecalcSysColumnTablePath);
- YQL_ENSURE(NYql::GetSysColumnTypeId(PathColumnShortName) == NYql::NUdf::TDataType<char*>::Id);
- this->AllVirtualColumns_.at(index).emplace(
- NYql::YqlSysColumnPath, NYT::TNode::CreateList().Add("DataType").Add("String")
- );
- }
-
- value = std::move(tableNames);
-
- return *this;
- }
-
- template <bool UseSkiff>
- TMkqlOutputSpec<UseSkiff>::TMkqlOutputSpec(NYT::TNode schema)
- : Schema_(std::move(schema))
- {
- }
-
- template <bool UseSkiff>
- const NYT::TNode& TMkqlOutputSpec<UseSkiff>::GetSchema() const {
- return Schema_;
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullStreamWorker* worker,
- const TVector<IInputStream*>& streams
- ) {
- YQL_ENSURE(
- worker->GetInputsCount() == streams.size(),
- "number of input streams should match number of inputs provided by spec");
-
- TVector<THolder<NYT::TRawTableReader>> wrappers;
- auto& scopedAlloc = worker->GetScopedAlloc();
- for (ui32 i = 0; i < streams.size(); ++i) {
- wrappers.push_back(MakeHolder<TRawTableReaderImpl>(streams[i], scopedAlloc));
- }
-
- NYql::NPureCalc::TInputSpecTraits<NYql::NPureCalc::TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
- spec,
- worker,
- std::move(wrappers)
- );
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullStreamWorker* worker,
- IInputStream* stream
- ) {
- TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
- spec,
- worker,
- TVector<IInputStream*>({stream})
- );
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullStreamWorker* worker,
- TVector<THolder<IInputStream>>&& streams
- ) {
- YQL_ENSURE(
- worker->GetInputsCount() == streams.size(),
- "number of input streams should match number of inputs provided by spec");
-
- TVector<THolder<NYT::TRawTableReader>> wrappers;
- auto& scopedAlloc = worker->GetScopedAlloc();
- for (ui32 i = 0; i < streams.size(); ++i) {
- wrappers.push_back(MakeHolder<TRawTableReaderImpl>(std::move(streams[i]), scopedAlloc));
- }
-
- TInputSpecTraits<NYql::NPureCalc::TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
- spec,
- worker,
- std::move(wrappers)
- );
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullStreamWorker* worker,
- THolder<IInputStream> stream
- ) {
- TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
- spec,
- worker,
- VectorFromHolder<IInputStream>(std::move(stream))
- );
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullStreamWorker* worker,
- const TVector<NYT::TRawTableReader*>& streams
- ) {
- YQL_ENSURE(
- worker->GetInputsCount() == streams.size(),
- "number of input streams should match number of inputs provided by spec");
-
- with_lock(worker->GetScopedAlloc()) {
- auto& holderFactory = worker->GetGraph().GetHolderFactory();
- for (ui32 i = 0; i < streams.size(); ++i) {
- TInputDescription inputDescription(spec, i);
- auto input = holderFactory.Create<TMkqlListValue>(
- UseSkiff, streams[i], worker, inputDescription, spec.IgnoreStreamTableIndex(), spec.IsStrictSchema()
- );
- worker->SetInput(std::move(input), i);
- }
- }
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullStreamWorker* worker,
- NYT::TRawTableReader* stream
- ) {
- TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
- spec,
- worker,
- TVector<NYT::TRawTableReader*>({stream})
- );
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullStreamWorker* worker,
- TVector<THolder<NYT::TRawTableReader>>&& streams
- ) {
- YQL_ENSURE(
- worker->GetInputsCount() == streams.size(),
- "number of input streams should match number of inputs provided by spec");
-
- with_lock(worker->GetScopedAlloc()) {
- auto& holderFactory = worker->GetGraph().GetHolderFactory();
- for (ui32 i = 0; i < streams.size(); ++i) {
- TInputDescription inputDescription(spec, i);
- auto input = holderFactory.Create<TMkqlListValue>(
- UseSkiff, std::move(streams[i]), worker, inputDescription, spec.IgnoreStreamTableIndex(), spec.IsStrictSchema()
- );
- worker->SetInput(std::move(input), i);
- }
- }
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullStreamWorker* worker,
- THolder<NYT::TRawTableReader> stream
- ) {
- TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker(
- spec,
- worker,
- VectorFromHolder<NYT::TRawTableReader>(std::move(stream))
- );
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullListWorker* worker,
- const TVector<IInputStream*>& streams
- ) {
- YQL_ENSURE(
- worker->GetInputsCount() == streams.size(),
- "number of input streams should match number of inputs provided by spec");
-
- TVector<THolder<NYT::TRawTableReader>> wrappers;
- auto& scopedAlloc = worker->GetScopedAlloc();
- for (ui32 i = 0; i < streams.size(); ++i) {
- wrappers.push_back(MakeHolder<TRawTableReaderImpl>(streams[i], scopedAlloc));
- }
-
- NYql::NPureCalc::TInputSpecTraits<NYql::NPureCalc::TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
- spec,
- worker,
- std::move(wrappers)
- );
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullListWorker* worker,
- IInputStream* stream
- ) {
- TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
- spec,
- worker,
- TVector<IInputStream*>({stream})
- );
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullListWorker* worker,
- TVector<THolder<IInputStream>>&& streams
- ) {
- YQL_ENSURE(
- worker->GetInputsCount() == streams.size(),
- "number of input streams should match number of inputs provided by spec");
-
- TVector<THolder<NYT::TRawTableReader>> wrappers;
- auto& scopedAlloc = worker->GetScopedAlloc();
- for (ui32 i = 0; i < streams.size(); ++i) {
- wrappers.push_back(MakeHolder<TRawTableReaderImpl>(std::move(streams[i]), scopedAlloc));
- }
-
- NYql::NPureCalc::TInputSpecTraits<NYql::NPureCalc::TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
- spec,
- worker,
- std::move(wrappers)
- );
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullListWorker* worker,
- THolder<IInputStream> stream
- ) {
- TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
- spec,
- worker,
- VectorFromHolder<IInputStream>(std::move(stream))
- );
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullListWorker* worker,
- const TVector<NYT::TRawTableReader*>& streams
- ) {
- YQL_ENSURE(
- worker->GetInputsCount() == streams.size(),
- "number of input streams should match number of inputs provided by spec");
-
- with_lock(worker->GetScopedAlloc()) {
- auto& holderFactory = worker->GetGraph().GetHolderFactory();
- for (ui32 i = 0; i < streams.size(); ++i) {
- TInputDescription inputDescription(spec, i);
- auto input = holderFactory.Create<TMkqlListValue>(
- UseSkiff, streams[i], worker, inputDescription, spec.IgnoreStreamTableIndex(), spec.IsStrictSchema()
- );
- worker->SetInput(std::move(input), i);
- }
- }
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullListWorker* worker,
- NYT::TRawTableReader* stream
- ) {
- TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
- spec,
- worker,
- TVector<NYT::TRawTableReader*>({stream})
- );
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullListWorker* worker,
- TVector<THolder<NYT::TRawTableReader>>&& streams
- ) {
- YQL_ENSURE(
- worker->GetInputsCount() == streams.size(),
- "number of input streams should match number of inputs provided by spec");
-
- with_lock(worker->GetScopedAlloc()) {
- auto& holderFactory = worker->GetGraph().GetHolderFactory();
- for (ui32 i = 0; i < streams.size(); ++i) {
- TInputDescription inputDescription(spec, i);
- auto input = holderFactory.Create<TMkqlListValue>(
- UseSkiff, std::move(streams[i]), worker, inputDescription, spec.IgnoreStreamTableIndex(), spec.IsStrictSchema()
- );
- worker->SetInput(std::move(input), i);
- }
- }
- }
-
- template <bool UseSkiff>
- void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec,
- IPullListWorker* worker,
- THolder<NYT::TRawTableReader> stream
- ) {
- TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker(
- spec,
- worker,
- VectorFromHolder<NYT::TRawTableReader>(std::move(stream))
- );
- }
-
- template <bool UseSkiff>
- THolder<THandle> TOutputSpecTraits<TMkqlOutputSpec<UseSkiff>>::ConvertPullListWorkerToOutputType(
- const NYql::NPureCalc::TMkqlOutputSpec<UseSkiff>&,
- NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullListWorker> worker
- ) {
- with_lock(worker->GetScopedAlloc()) {
- return MakeHolder<TPullListMkqlWriter>(std::move(worker), UseSkiff);
- }
- }
-
- template <bool UseSkiff>
- THolder<THandle> TOutputSpecTraits<TMkqlOutputSpec<UseSkiff>>::ConvertPullStreamWorkerToOutputType(
- const NYql::NPureCalc::TMkqlOutputSpec<UseSkiff>&,
- NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullStreamWorker> worker
- ) {
- with_lock(worker->GetScopedAlloc()) {
- return MakeHolder<TPullStreamMkqlWriter>(std::move(worker), UseSkiff);
- }
- }
-
- template class TMkqlSpec<true, TInputSpecBase>;
- template class TMkqlSpec<false, TInputSpecBase>;
- template class TMkqlSpec<true, TOutputSpecBase>;
- template class TMkqlSpec<false, TOutputSpecBase>;
-
- template class TMkqlInputSpec<true>;
- template class TMkqlInputSpec<false>;
- template class TMkqlOutputSpec<true>;
- template class TMkqlOutputSpec<false>;
-
- template struct TInputSpecTraits<TMkqlInputSpec<true>>;
- template struct TInputSpecTraits<TMkqlInputSpec<false>>;
- template struct TOutputSpecTraits<TMkqlOutputSpec<true>>;
- template struct TOutputSpecTraits<TMkqlOutputSpec<false>>;
- }
-}
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/spec.h b/ydb/library/yql/public/purecalc/io_specs/mkql/spec.h
deleted file mode 100644
index ef4ceea6a25..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/mkql/spec.h
+++ /dev/null
@@ -1,231 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/interface.h>
-
-#include <util/generic/noncopyable.h>
-
-namespace NYT {
- class TRawTableReader;
-}
-
-namespace NYql {
- namespace NPureCalc {
- /**
- * Processing mode for working with Skiff/YSON IO.
- *
- * In this mode purecalc accepts vector of pointers to `IInputStream` as an inputs and returns a handle
- * which can be used to invoke program writing all output to a stream.
- *
- * For example:
- *
- * @code
- * auto handle = program.Apply(&Cin);
- * handle->Run(&Cout); // run the program, read from Cin and write to Cout
- * @endcode
- *
- * All working modes except PushStream are supported.
- */
- template <bool UseSkiff, typename TBase>
- class TMkqlSpec: public TBase {
- static_assert(
- std::is_same<TBase, TInputSpecBase>::value ||
- std::is_same<TBase, TOutputSpecBase>::value,
- "Class is used in unintended way!"
- );
- };
-
- /**
- * Skiff/YSON input spec. In this mode purecalc takes a non-owning pointers to a text input streams and parses
- * them using Skiff or YSON codec.
- *
- * The program synopsis follows:
- *
- * @code
- * ... TPullStreamProgram::Apply(TVector<IInputStream*>);
- * ... TPullStreamProgram::Apply(TVector<NYT::TRawTableReader*>);
- * ... TPullListProgram::Apply(TVector<IInputStream*>);
- * ... TPullListProgram::Apply(TVector<NYT::TRawTableReader*>);
- * @endcode
- *
- * @tparam UseSkiff expect Skiff format if true, YSON otherwise.
- */
- template <bool UseSkiff>
- class TMkqlInputSpec: public TMkqlSpec<UseSkiff, TInputSpecBase> {
- public:
- using TBase = TInputSpecBase;
- static constexpr bool UseSkiffValue = UseSkiff;
-
- private:
- TVector<NYT::TNode> Schemas_;
- bool StrictSchema_ = true;
- bool IgnoreStreamTableIndex_ = false;
- TVector<TMaybe<TVector<TString>>> AllTableNames_;
- // Allows to read structure columns with custom members order.
- // Instead of chain TNode => TTypeAnnotationNode => TType => TNode (which looses members order) use
- // original schema as row spec.
- bool UseOriginalRowSpec_ = false;
-
- public:
- explicit TMkqlInputSpec(TVector<NYT::TNode>);
- explicit TMkqlInputSpec(NYT::TNode, bool ignoreStreamTableIndex = false);
-
- const TVector<NYT::TNode>& GetSchemas() const override;
-
- bool IgnoreStreamTableIndex() const;
-
- bool IsStrictSchema() const;
- TMkqlInputSpec& SetStrictSchema(bool strictSchema);
-
- const TMaybe<TVector<TString>>& GetTableNames() const;
- const TMaybe<TVector<TString>>& GetTableNames(ui32) const;
- bool UseOriginalRowSpec() const;
-
- TMkqlInputSpec& SetTableNames(TVector<TString>);
- TMkqlInputSpec& SetTableNames(TVector<TString>, ui32);
- TMkqlInputSpec& SetUseOriginalRowSpec(bool value);
- };
-
- /**
- * Skiff/YSON output. In this mode purecalc returns a handle which can be used to invoke an underlying program.
- *
- * So far this is the only spec that supports multi-table output.
- *
- * The program synopsis follows:
- *
- * @code
- * THolder<THandle> TPullStreamProgram::Apply(...);
- * THolder<THandle> TPullListProgram::Apply(...);
- * @endcode
- *
- * @tparam UseSkiff write output in Skiff format if true, use YSON otherwise.
- */
- template <bool UseSkiff>
- class TMkqlOutputSpec: public TMkqlSpec<UseSkiff, TOutputSpecBase> {
- public:
- using TMkqlSpec<UseSkiff, TOutputSpecBase>::TMkqlSpec;
-
- using TBase = TOutputSpecBase;
- static constexpr bool UseSkiffValue = UseSkiff;
-
- private:
- NYT::TNode Schema_;
-
- public:
- explicit TMkqlOutputSpec(NYT::TNode);
-
- const NYT::TNode& GetSchema() const override;
- };
-
- /**
- * A class which can invoke a purecalc program and store its output in the given output stream.
- */
- class THandle: private TMoveOnly {
- public:
- /**
- * Run the program. Read a chunk from the program's assigned input, parse it and pass it to the program.
- * Than serialize the program's output and write it to the given output stream. Repeat until the input
- * stream is empty.
- */
- /// @{
- /**
- * Overload for single-table output programs (i.e. output type is struct).
- */
- virtual void Run(IOutputStream*) = 0;
- /**
- * Overload for multi-table output programs (i.e. output type is variant over tuple).
- * Size of vector should match number of variant alternatives.
- */
- virtual void Run(const TVector<IOutputStream*>&) = 0;
- /**
- * Overload for multi-table output programs (i.e. output type is variant over struct).
- * Size of map should match number of variant alternatives. For every alternative there should be a stream
- * in the map.
- */
- virtual void Run(const TMap<TString, IOutputStream*>&) = 0;
- /// @}
-
- virtual ~THandle() = default;
- };
-
- template <bool UseSkiff>
- struct TInputSpecTraits<TMkqlInputSpec<UseSkiff>> {
- static const constexpr bool IsPartial = false;
-
- static const constexpr bool SupportPullStreamMode = true;
- static const constexpr bool SupportPullListMode = true;
- static const constexpr bool SupportPushStreamMode = false;
-
- static void PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, const TVector<IInputStream*>& streams);
-
- static void PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, TVector<THolder<IInputStream>>&& streams);
-
- static void PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, const TVector<NYT::TRawTableReader*>& streams);
-
- static void PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, TVector<THolder<NYT::TRawTableReader>>&& streams);
-
- static void PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, const TVector<IInputStream*>& streams);
-
- static void PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, TVector<THolder<IInputStream>>&& streams);
-
- static void PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, const TVector<NYT::TRawTableReader*>& streams);
-
- static void PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, TVector<THolder<NYT::TRawTableReader>>&& streams);
-
- // Members for single-input programs
-
- static void PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, IInputStream* stream);
-
- static void PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, THolder<IInputStream> stream);
-
- static void PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, NYT::TRawTableReader* stream);
-
- static void PreparePullStreamWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, THolder<NYT::TRawTableReader> stream);
-
- static void PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, IInputStream* stream);
-
- static void PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, THolder<IInputStream> stream);
-
- static void PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, NYT::TRawTableReader* stream);
-
- static void PreparePullListWorker(
- const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, THolder<NYT::TRawTableReader> stream);
- };
-
- template <bool UseSkiff>
- struct TOutputSpecTraits<TMkqlOutputSpec<UseSkiff>> {
- static const constexpr bool IsPartial = false;
-
- static const constexpr bool SupportPullStreamMode = true;
- static const constexpr bool SupportPullListMode = true;
- static const constexpr bool SupportPushStreamMode = false;
-
- using TPullStreamReturnType = THolder<THandle>;
- using TPullListReturnType = THolder<THandle>;
-
- static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TMkqlOutputSpec<UseSkiff>&, TWorkerHolder<IPullStreamWorker>);
-
- static TPullListReturnType ConvertPullListWorkerToOutputType(const TMkqlOutputSpec<UseSkiff>&, TWorkerHolder<IPullListWorker>);
- };
-
- using TSkiffInputSpec = TMkqlInputSpec<true>;
- using TSkiffOutputSpec = TMkqlOutputSpec<true>;
-
- using TYsonInputSpec = TMkqlInputSpec<false>;
- using TYsonOutputSpec = TMkqlOutputSpec<false>;
- }
-}
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/no_llvm/ya.make b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/no_llvm/ya.make
deleted file mode 100644
index 0b71e93b95d..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/no_llvm/ya.make
+++ /dev/null
@@ -1,24 +0,0 @@
-UNITTEST()
-
-SIZE(MEDIUM)
-
-TIMEOUT(300)
-
-PEERDIR(
- yql/essentials/public/udf/service/exception_policy
- ydb/library/yql/public/purecalc/no_llvm
- ydb/library/yql/public/purecalc/io_specs/mkql/no_llvm
- ydb/library/yql/public/purecalc/ut/lib
-)
-
-YQL_LAST_ABI_VERSION()
-
-SRCDIR(
- ydb/library/yql/public/purecalc/io_specs/mkql/ut
-)
-
-SRCS(
- test_spec.cpp
-)
-
-END()
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test.inl b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test.inl
deleted file mode 100644
index 03b0958b614..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test.inl
+++ /dev/null
@@ -1,777 +0,0 @@
-Y_UNIT_TEST_SUITE(TEST_SUITE_NAME) {
- using NYql::NPureCalc::NPrivate::GetSchema;
-
- Y_UNIT_TEST(TestAllTypes) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields {"int64", "uint64", "double", "bool", "string", "yson"};
- auto schema = GetSchema(fields);
- auto stream = GET_STREAM(fields);
-
- auto factory = MakeProgramFactory();
-
- {
- auto program = CREATE_PROGRAM(
- INPUT_SPEC {schema},
- OUTPUT_SPEC {schema},
- "SELECT * FROM Input",
- ETranslationMode::SQL, 1
- );
-
- auto input = TStringStream(stream);
- auto handle = program->Apply(&input);
- TStringStream output;
- handle->Run(&output);
-
- ASSERT_EQUAL_STREAMS(stream, output);
- }
-
- // invalid table prefix
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- CREATE_PROGRAM(
- INPUT_SPEC {schema},
- OUTPUT_SPEC {schema},
- "SELECT * FROM Table",
- ETranslationMode::SQL, 1
- );
- }(), TCompileError, "Failed to optimize");
-
- // invalid table suffix (input index)
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- CREATE_PROGRAM(
- INPUT_SPEC {schema},
- OUTPUT_SPEC {schema},
- "SELECT * FROM Input1",
- ETranslationMode::SQL, 1
- );
- }(), TCompileError, "Failed to optimize");
- }
-
- Y_UNIT_TEST(TestColumnsFilter) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields {"int64", "uint64", "double", "bool", "string", "yson"};
- auto schema = GetSchema(fields);
- auto stream = GET_STREAM(fields);
-
- TVector<TString> someFields {"int64", "bool", "string"};
- auto someSchema = GetSchema(someFields);
- auto someStream = GET_STREAM(someFields);
-
- auto factory = MakeProgramFactory();
-
- {
- auto inputSpec = INPUT_SPEC {schema};
- auto outputSpec = OUTPUT_SPEC {someSchema};
-
- auto program = CREATE_PROGRAM(
- inputSpec,
- outputSpec,
- "SELECT int64, bool, string FROM Input",
- ETranslationMode::SQL, 1
- );
-
- UNIT_ASSERT_VALUES_EQUAL(
- program->GetUsedColumns(),
- THashSet<TString>(someFields.begin(), someFields.end())
- );
-
- UNIT_ASSERT_VALUES_EQUAL(
- program->GetUsedColumns(0),
- program->GetUsedColumns()
- );
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto unused = program->GetUsedColumns(1);
- }()), yexception, "invalid input index (1) in GetUsedColumns call");
-
- auto input = TStringStream(stream);
- auto handle = program->Apply(&input);
- TStringStream output;
- handle->Run(&output);
-
- ASSERT_EQUAL_STREAMS(someStream, output);
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto outputs = TVector<IOutputStream*>({});
- program->Apply(&input)->Run(outputs);
- }()), yexception, "cannot be used with single-output programs");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto outputs = TVector<IOutputStream*>({&output});
- program->Apply(&input)->Run(outputs);
- }()), yexception, "cannot be used with single-output programs");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto outputs = TMap<TString, IOutputStream*>();
- program->Apply(&input)->Run(outputs);
- }()), yexception, "cannot be used with single-output programs");
- }
- }
-
-#ifdef PULL_LIST_MODE
- Y_UNIT_TEST(TestColumnsFilterMultiInput) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields0 {"int64", "uint64", "double"};
- auto schema0 = GetSchema(fields0);
- TVector<TString> someFields0 {"int64", "uint64"};
-
- TVector<TString> fields1 {"bool", "string", "yson"};
- auto schema1 = GetSchema(fields1);
- TVector<TString> someFields1 {"bool", "yson"};
-
- TVector<TString> unitedFields {"int64", "uint64", "bool", "yson"};
- auto unitedSchema = GetSchema(unitedFields, unitedFields);
-
- auto factory = MakeProgramFactory();
-
- {
- auto inputSpec = INPUT_SPEC {{schema0, schema1}};
- auto outputSpec = OUTPUT_SPEC {unitedSchema};
-
- auto program = CREATE_PROGRAM(
- inputSpec,
- outputSpec,
- R"(
-SELECT int64, uint64 FROM Input0
-UNION ALL
-SELECT bool, yson FROM Input1
- )",
- ETranslationMode::SQL, 1
- );
-
- UNIT_ASSERT_VALUES_EQUAL(
- program->GetUsedColumns(0),
- THashSet<TString>(someFields0.begin(), someFields0.end())
- );
-
- UNIT_ASSERT_VALUES_EQUAL(
- program->GetUsedColumns(1),
- THashSet<TString>(someFields1.begin(), someFields1.end())
- );
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto unused = program->GetUsedColumns();
- }()), yexception, "GetUsedColumns() can be used only with single-input programs");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto unused = program->GetUsedColumns(2);
- }()), yexception, "invalid input index (2) in GetUsedColumns call");
- }
- }
-#endif
-
- Y_UNIT_TEST(TestColumnsFilterWithOptionalFields) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields {"int64", "uint64", "double", "bool", "string", "yson"};
- auto schema = GetSchema(fields);
- auto stream = GET_STREAM(fields);
-
- TVector<TString> someFields {"int64", "bool", "string"};
- TVector<TString> someOptionalFields {"string"};
-
- auto someSchema = GetSchema(someFields);
- auto someStream = GET_STREAM(someFields, someOptionalFields);
- auto someOptionalSchema = GetSchema(someFields, someOptionalFields);
-
- auto factory = MakeProgramFactory();
-
- {
- auto program = CREATE_PROGRAM(
- INPUT_SPEC {schema},
- OUTPUT_SPEC {someOptionalSchema},
- "SELECT int64, bool, Nothing(String?) as string FROM Input",
- ETranslationMode::SQL, 1
- );
-
- UNIT_ASSERT_VALUES_EQUAL(
- program->GetUsedColumns(),
- THashSet<TString>({"int64", "bool"})
- );
-
- UNIT_ASSERT_VALUES_EQUAL(
- program->GetUsedColumns(),
- program->GetUsedColumns(0)
- );
-
- auto input = TStringStream(stream);
- auto handle = program->Apply(&input);
- TStringStream output;
- handle->Run(&output);
-
- ASSERT_EQUAL_STREAMS(someStream, output);
- }
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- CREATE_PROGRAM(
- INPUT_SPEC {schema},
- OUTPUT_SPEC {someSchema},
- "SELECT int64, bool, Nothing(String?) as string FROM Input",
- ETranslationMode::SQL, 1
- );
- }(), TCompileError, "Failed to optimize");
- }
-
- Y_UNIT_TEST(TestOutputSpecInference) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields {"int64", "uint64", "double", "bool", "string"};
- auto schema = GetSchema(fields);
- auto stream = GET_STREAM(fields);
-
- TVector<TString> someFields {"bool", "int64", "string"}; // Keep this sorted...
- auto someSchema = GetSchema(someFields);
- auto someStream = GET_STREAM(someFields);
-
- auto factory = MakeProgramFactory();
-
- {
- auto inputSpec = INPUT_SPEC {schema};
- auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()};
-
- auto program = CREATE_PROGRAM(
- inputSpec,
- outputSpec,
- "SELECT int64, bool, string FROM Input",
- ETranslationMode::SQL, 1
- );
-
- UNIT_ASSERT_EQUAL(program->MakeFullOutputSchema(), someSchema);
-
- UNIT_ASSERT_VALUES_EQUAL(
- program->GetUsedColumns(),
- THashSet<TString>(someFields.begin(), someFields.end())
- );
-
- UNIT_ASSERT_VALUES_EQUAL(
- program->GetUsedColumns(),
- program->GetUsedColumns(0)
- );
-
- auto input = TStringStream(stream);
- auto handle = program->Apply(&input);
- TStringStream output;
- handle->Run(&output);
-
- ASSERT_EQUAL_STREAMS(someStream, output);
- }
- }
-
-#ifdef PULL_LIST_MODE
- Y_UNIT_TEST(TestJoinInputs) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields0 {"int64", "uint64", "double"};
- auto schema0 = GetSchema(fields0);
- auto stream0 = GET_STREAM(fields0);
-
- TVector<TString> fields1 {"int64", "bool", "string"};
- auto schema1 = GetSchema(fields1);
- auto stream1 = GET_STREAM(fields1);
-
- TVector<TString> joinedFields {"bool", "double", "int64", "string", "uint64"}; // keep this sorted
- auto joinedSchema = GetSchema(joinedFields);
- auto joinedStream = GET_STREAM(joinedFields);
-
- auto factory = MakeProgramFactory();
-
- {
- auto inputSpec = INPUT_SPEC {{schema0, schema1}};
- auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()};
-
- auto program = CREATE_PROGRAM(
- inputSpec,
- outputSpec,
- R"(
-SELECT
- t0.int64 AS int64,
- t0.uint64 AS uint64,
- t0.double AS double,
- t1.bool AS bool,
- t1.string AS string
-FROM
- Input0 AS t0
-INNER JOIN
- Input1 AS t1
-ON t0.int64 == t1.int64
-ORDER BY int64
- )",
- ETranslationMode::SQL, 1
- );
-
- UNIT_ASSERT_EQUAL(program->MakeFullOutputSchema(), joinedSchema);
-
- UNIT_ASSERT_VALUES_EQUAL(
- program->GetUsedColumns(0),
- THashSet<TString>(fields0.begin(), fields0.end())
- );
-
- UNIT_ASSERT_VALUES_EQUAL(
- program->GetUsedColumns(1),
- THashSet<TString>(fields1.begin(), fields1.end())
- );
-
- TStringStream input0(stream0);
- TStringStream input1(stream1);
- auto handle = program->Apply<TVector<IInputStream*>>({&input0, &input1});
- TStringStream output;
- handle->Run(&output);
-
- ASSERT_EQUAL_STREAMS(joinedStream, output);
- }
- }
-#endif
-
- Y_UNIT_TEST(TestMultiOutputOverTuple) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields {"int64", "uint64", "double", "bool", "string"};
- auto schema = GetSchema(fields);
- auto stream = GET_STREAM(fields, {}, 0, 10, 1);
-
- TVector<TString> someFields1 {"bool", "int64", "string"};
- auto someSchema1 = GetSchema(someFields1);
- auto someStream1 = GET_STREAM(someFields1, {}, 0, 10, 2);
-
- TVector<TString> someFields2 {"bool", "double"};
- auto someSchema2 = GetSchema(someFields2);
- auto someStream2 = GET_STREAM(someFields2, {}, 1, 10, 2);
-
- auto factory = MakeProgramFactory();
-
- {
- auto inputSpec = INPUT_SPEC {schema};
- auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()};
-
- auto program = CREATE_PROGRAM(
- inputSpec,
- outputSpec,
- R"(
-(
- (let vt (ParseType '"Variant<Struct<bool:Bool, int64:Int64, string:String>, Struct<bool:Bool, double:Double>>"))
- (return (Map (Self '0) (lambda '(x) (block '(
- (let r1 (Variant (AsStruct '('bool (Member x 'bool)) '('int64 (Member x 'int64)) '('string (Member x 'string))) '0 vt))
- (let r2 (Variant (AsStruct '('bool (Member x 'bool)) '('double (Member x 'double))) '1 vt))
- (return (If (Coalesce (== (% (Member x 'int64) (Int64 '2)) (Int64 '0)) (Bool 'false)) r1 r2))
- )))))
-)
- )",
- ETranslationMode::SExpr
- );
-
- auto input = TStringStream(stream);
- auto handle = program->Apply(&input);
- TStringStream output1, output2;
- auto outputs = TVector<IOutputStream*>({&output1, &output2});
- handle->Run(outputs);
- ASSERT_EQUAL_STREAMS(someStream1, output1);
- ASSERT_EQUAL_STREAMS(someStream2, output2);
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- program->Apply(&input)->Run(&output1);
- }()), yexception, "cannot be used with multi-output programs");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto outputs = TVector<IOutputStream*>({});
- program->Apply(&input)->Run(outputs);
- }()), yexception, "Number of variant alternatives should match number of streams");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto outputs = TVector<IOutputStream*>({&output1, &output1, &output1});
- program->Apply(&input)->Run(outputs);
- }()), yexception, "Number of variant alternatives should match number of streams");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto outputs = TMap<TString, IOutputStream*>();
- program->Apply(&input)->Run(outputs);
- }()), yexception, "cannot be used to process variants over tuple");
- }
- }
-
- Y_UNIT_TEST(TestMultiOutputOverStruct) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields {"int64", "uint64", "double", "bool", "string"};
- auto schema = GetSchema(fields);
- auto stream = GET_STREAM(fields, {}, 0, 10, 1);
-
- TVector<TString> someFields1 {"bool", "int64", "string"};
- auto someSchema1 = GetSchema(someFields1);
- auto someStream1 = GET_STREAM(someFields1, {}, 0, 10, 2);
-
- TVector<TString> someFields2 {"bool", "double"};
- auto someSchema2 = GetSchema(someFields2);
- auto someStream2 = GET_STREAM(someFields2, {}, 1, 10, 2);
-
- auto factory = MakeProgramFactory();
-
- {
- auto inputSpec = INPUT_SPEC {schema};
- auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()};
-
- auto program = CREATE_PROGRAM(
- inputSpec,
- outputSpec,
- R"(
-(
- (let vt (ParseType '"Variant<A2:Struct<bool:Bool, double:Double>, A1:Struct<bool:Bool, int64:Int64, string:String>>"))
- (return (Map (Self '0) (lambda '(x) (block '(
- (let r1 (Variant (AsStruct '('bool (Member x 'bool)) '('int64 (Member x 'int64)) '('string (Member x 'string))) 'A1 vt))
- (let r2 (Variant (AsStruct '('bool (Member x 'bool)) '('double (Member x 'double))) 'A2 vt))
- (return (If (Coalesce (== (% (Member x 'int64) (Int64 '2)) (Int64 '0)) (Bool 'false)) r1 r2))
- )))))
-)
- )",
- ETranslationMode::SExpr
- );
-
- auto input = TStringStream(stream);
- auto handle = program->Apply(&input);
- TStringStream output1, output2;
- auto outputs = TMap<TString, IOutputStream*>();
- outputs["A1"] = &output1;
- outputs["A2"] = &output2;
- handle->Run(outputs);
- ASSERT_EQUAL_STREAMS(someStream1, output1);
- ASSERT_EQUAL_STREAMS(someStream2, output2);
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- program->Apply(&input)->Run(&output1);
- }()), yexception, "cannot be used with multi-output programs");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto outputs = TVector<IOutputStream*>({});
- program->Apply(&input)->Run(outputs);
- }()), yexception, "cannot be used to process variants over struct");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto outputs = TMap<TString, IOutputStream*>();
- outputs["A1"] = &output1;
- program->Apply(&input)->Run(outputs);
- }()), yexception, "Number of variant alternatives should match number of streams");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto outputs = TMap<TString, IOutputStream*>();
- outputs["A1"] = &output1;
- outputs["A2"] = &output1;
- outputs["A3"] = &output1;
- program->Apply(&input)->Run(outputs);
- }()), yexception, "Number of variant alternatives should match number of streams");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto outputs = TMap<TString, IOutputStream*>();
- outputs["A1"] = &output1;
- outputs["B1"] = &output1;
- program->Apply(&input)->Run(outputs);
- }()), yexception, "Cannot find stream for alternative \"A2\"");
- }
- }
-
-#ifdef GET_STREAM_WITH_STRUCT
- Y_UNIT_TEST(TestReadNativeStructs) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory(
- TProgramFactoryOptions().SetNativeYtTypeFlags(NYql::NTCF_PRODUCTION)
- );
-
- auto runProgram = [&factory](bool sorted) -> TStringStream {
- auto inputSchema = GET_SCHEMA_WITH_STRUCT(sorted);
-
- auto input0 = GET_STREAM_WITH_STRUCT(sorted, 0, 2);
- auto input1 = GET_STREAM_WITH_STRUCT(sorted, 2, 4);
-
- auto inputSpec = INPUT_SPEC{{inputSchema, inputSchema}}.SetUseOriginalRowSpec(!sorted);
- auto outputSpec = OUTPUT_SPEC{NYT::TNode::CreateEntity()};
-
- auto program = CREATE_PROGRAM(
- inputSpec,
- outputSpec,
- R"(
-(
- (return (Extend (Self '0) (Self '1)))
-)
- )",
- ETranslationMode::SExpr
- );
-
- TStringStream result;
-
- auto handle = program->Apply(TVector<IInputStream*>({&input0, &input1}));
- handle->Run(&result);
-
- return result;
- };
-
- auto etalon = GET_STREAM_WITH_STRUCT(true, 0, 4);
-
- auto output0 = runProgram(true);
- auto output1 = runProgram(false);
-
- ASSERT_EQUAL_STREAMS(output0, etalon);
- ASSERT_EQUAL_STREAMS(output1, etalon);
- }
-#endif
-
- Y_UNIT_TEST(TestIndependentProcessings) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields0 {"double", "int64", "string"}; // keep this sorted
- auto schema0 = GetSchema(fields0);
- auto stream0 = GET_STREAM(fields0, {}, 0, 10, 1);
-
- TVector<TString> someFields0 {"int64", "string"};
- auto someStream0 = GET_STREAM(someFields0, {}, 0, 10, 2); // sample with even int64 numbers
-
- TVector<TString> fields1 {"bool", "int64", "uint64"}; // keep this sorted
- auto schema1 = GetSchema(fields1);
- auto stream1 = GET_STREAM(fields1, {}, 0, 10, 1);
-
- TVector<TString> someFields1 {"int64", "uint64"};
- auto someStream1 = GET_STREAM(someFields1, {}, 1, 10, 2); // sample with odd int64 numbers
-
- auto factory = MakeProgramFactory();
-
- {
- auto inputSpec = INPUT_SPEC {{schema0, schema1}};
- auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()};
-
- auto program = CREATE_PROGRAM(
- inputSpec,
- outputSpec,
- R"(
-(
- (let $type (ParseType '"Variant<Struct<int64: Int64, string:String>, Struct<int64:Int64, uint64: Uint64>>"))
- (let $stream0 (FlatMap (Self '0) (lambda '(x) (block '(
- (let $item (Variant (AsStruct '('int64 (Member x 'int64)) '('string (Member x 'string))) '0 $type))
- (return (ListIf (Coalesce (== (% (Member x 'int64) (Int64 '2)) (Int64 '0)) (Bool 'false)) $item))
- )))))
- (let $stream1 (FlatMap (Self '1) (lambda '(x) (block '(
- (let $item (Variant (AsStruct '('int64 (Member x 'int64)) '('uint64 (Member x 'uint64))) '1 $type))
- (return (ListIf (Coalesce (== (% (Member x 'int64) (Int64 '2)) (Int64 '1)) (Bool 'false)) $item))
- )))))
- (return (Extend $stream0 $stream1))
-)
- )",
- ETranslationMode::SExpr
- );
-
- UNIT_ASSERT_EQUAL(program->MakeInputSchema(0), schema0);
- UNIT_ASSERT_EQUAL(program->MakeInputSchema(1), schema1);
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto unused = program->MakeInputSchema(2);
- }()), yexception, "invalid input index (2) in MakeInputSchema call");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto unused = program->MakeInputSchema();
- }()), yexception, "MakeInputSchema() can be used only with single-input programs");
-
- TStringStream input0(stream0);
- TStringStream input1(stream1);
- auto handle = program->Apply(TVector<IInputStream*>({&input0, &input1}));
- TStringStream output0, output1;
- handle->Run(TVector<IOutputStream*>({&output0, &output1}));
-
- ASSERT_EQUAL_STREAMS(someStream0, output0);
- ASSERT_EQUAL_STREAMS(someStream1, output1);
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto unused = program->Apply(TVector<IInputStream*>());
- }()), yexception, "number of input streams should match number of inputs");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto unused = program->Apply(TVector<IInputStream*>({&input0}));
- }()), yexception, "number of input streams should match number of inputs");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- TStringStream input2;
- auto unused = program->Apply(TVector<IInputStream*>({&input0, &input1, &input2}));
- }()), yexception, "number of input streams should match number of inputs");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){
- auto unused = program->Apply(&input0);
- }()), yexception, "number of input streams should match number of inputs");
- }
- }
-
- Y_UNIT_TEST(TestMergeInputs) {
- using namespace NYql::NPureCalc;
-
- TVector<TString> fields0 {"double", "int64", "string", "uint64"}; // keep this sorted
- auto schema0 = GetSchema(fields0);
- auto stream0 = GET_STREAM(fields0, {}, 0, 5, 1);
-
- TVector<TString> fields1 {"double", "int64", "uint64", "yson"}; // keep this sorted
- auto schema1 = GetSchema(fields1);
- auto stream1 = GET_STREAM(fields1, {}, 5, 10, 1);
-
- TVector<TString> someFields {"double", "int64", "uint64"}; // keep this sorted
- auto mergedStream = GET_STREAM(someFields, {}, 0, 10, 1);
- auto mergedSchema = GetSchema(someFields);
-
- auto factory = MakeProgramFactory();
-
- {
- auto inputSpec = INPUT_SPEC {{schema0, schema1}};
- auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()};
-
- auto program = CREATE_PROGRAM(
- inputSpec,
- outputSpec,
- R"(
-(
- (let $stream0 (Map (Self '0) (lambda '(x) (RemoveMember x 'string))))
- (let $stream1 (Map (Self '1) (lambda '(x) (RemoveMember x 'yson))))
- (return (Extend $stream0 $stream1))
-)
- )",
- ETranslationMode::SExpr
- );
-
- UNIT_ASSERT_EQUAL(program->MakeInputSchema(0), schema0);
- UNIT_ASSERT_EQUAL(program->MakeInputSchema(1), schema1);
- UNIT_ASSERT_EQUAL(program->MakeFullOutputSchema(), mergedSchema);
-
- TStringStream input0(stream0);
- TStringStream input1(stream1);
- auto handle = program->Apply(TVector<IInputStream*>({&input0, &input1}));
- TStringStream output;
- handle->Run(&output);
-
- ASSERT_EQUAL_STREAMS(mergedStream, output);
- }
- }
-
- Y_UNIT_TEST(TestTableName) {
- using namespace NYql::NPureCalc;
-
- TVector<TVector<int>> values = {{3, 5}};
-
- auto inputSchema = GetSchema({"int64"});
- auto stream = GET_MULTITABLE_STREAM(values);
- auto etalon = GET_MULTITABLE_STREAM(values, {"Input"});
-
- auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true));
-
- {
- auto program = CREATE_PROGRAM(
- INPUT_SPEC(inputSchema),
- OUTPUT_SPEC(NYT::TNode::CreateEntity()),
- "SELECT int64, TableName() AS tname FROM Input",
- ETranslationMode::SQL
- );
-
- auto handle = program->Apply(&stream);
- TStringStream output;
- handle->Run(&output);
-
- ASSERT_EQUAL_STREAMS(output, etalon);
- }
- }
-
- Y_UNIT_TEST(TestCustomTableName) {
- using namespace NYql::NPureCalc;
-
- TVector<TVector<int>> values = {{3, 5}, {2, 8}};
- TVector<TString> tableNames = {"One", "Two"};
-
- auto inputSchema = GetSchema({"int64"});
- auto stream = GET_MULTITABLE_STREAM(values);
- auto etalon = GET_MULTITABLE_STREAM(values, tableNames);
-
- auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true));
-
- {
- auto program = CREATE_PROGRAM(
- INPUT_SPEC(inputSchema).SetTableNames(tableNames),
- OUTPUT_SPEC(NYT::TNode::CreateEntity()),
- "SELECT int64, TableName() AS tname FROM TABLES()",
- ETranslationMode::SQL
- );
-
- auto handle = program->Apply(&stream);
- TStringStream output;
- handle->Run(&output);
-
- ASSERT_EQUAL_STREAMS(output, etalon);
- }
- }
-
-#ifdef PULL_LIST_MODE
- Y_UNIT_TEST(TestMultiinputTableName) {
- using namespace NYql::NPureCalc;
-
- TVector<TVector<int>> values0 = {{3, 5}};
- TVector<TVector<int>> values1 = {{7, 9}};
-
- auto inputSchema = GetSchema({"int64"});
- auto stream0 = GET_MULTITABLE_STREAM(values0);
- auto stream1 = GET_MULTITABLE_STREAM(values1);
- auto etalon = GET_MULTITABLE_STREAM(JoinVectors(values0, values1), {"Input0", "Input1"});
-
- auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true));
-
- {
- auto program = CREATE_PROGRAM(
- INPUT_SPEC({inputSchema, inputSchema}),
- OUTPUT_SPEC(NYT::TNode::CreateEntity()),
- R"(
-$union = (
- SELECT * FROM Input0
- UNION ALL
- SELECT * FROM Input1
-);
-SELECT TableName() AS tname, int64 FROM $union
- )"
- );
-
- auto handle = program->Apply(TVector<IInputStream*>{&stream0, &stream1});
- TStringStream output;
- handle->Run(&output);
-
- ASSERT_EQUAL_STREAMS(output, etalon);
- }
- }
-
- Y_UNIT_TEST(TestMultiinputCustomTableName) {
- using namespace NYql::NPureCalc;
-
- TVector<TVector<int>> values0 = {{1, 4}, {2, 8}};
- TVector<TVector<int>> values1 = {{3, 5}, {7, 9}};
- TVector<TString> tableNames0 = {"OneA", "TwoA"};
- TVector<TString> tableNames1 = {"OneB", "TwoB"};
-
- auto inputSchema = GetSchema({"int64"});
- auto stream0 = GET_MULTITABLE_STREAM(values0);
- auto stream1 = GET_MULTITABLE_STREAM(values1);
- auto etalon = GET_MULTITABLE_STREAM(JoinVectors(values0, values1), JoinVectors(tableNames0, tableNames1));
-
- auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true));
-
- {
- auto program = CREATE_PROGRAM(
- INPUT_SPEC({inputSchema, inputSchema}).SetTableNames(tableNames0, 0).SetTableNames(tableNames1, 1),
- OUTPUT_SPEC(NYT::TNode::CreateEntity()),
- R"(
-$input0, $input1 = PROCESS TABLES();
-$union = (
- SELECT * FROM $input0
- UNION ALL
- SELECT * FROM $input1
-);
-SELECT TableName() AS tname, int64 FROM $union
- )"
- );
-
- auto handle = program->Apply(TVector<IInputStream*>{&stream0, &stream1});
- TStringStream output;
- handle->Run(&output);
-
- ASSERT_EQUAL_STREAMS(output, etalon);
- }
- }
-#endif
-}
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp
deleted file mode 100644
index 20acad436a0..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp
+++ /dev/null
@@ -1,325 +0,0 @@
-#include <yql/essentials/providers/common/codec/yql_codec_type_flags.h>
-
-#include <library/cpp/testing/unittest/registar.h>
-#include <library/cpp/yson/writer.h>
-
-#include <library/cpp/yson/node/node.h>
-#include <library/cpp/yson/node/node_io.h>
-#include <library/cpp/yson/node/node_visitor.h>
-
-#include <ydb/library/yql/public/purecalc/common/interface.h>
-#include <ydb/library/yql/public/purecalc/io_specs/mkql/spec.h>
-#include <ydb/library/yql/public/purecalc/ut/lib/helpers.h>
-
-#include <util/generic/hash_set.h>
-#include <util/generic/ptr.h>
-#include <util/stream/str.h>
-
-#include <library/cpp/skiff/skiff.h>
-
-#include <util/generic/yexception.h>
-
-
-namespace {
- TStringStream GetYsonStream(
- const TVector<TString>& fields,
- const TVector<TString>& optionalFields={},
- ui32 start = 0, ui32 stop = 5, ui32 step = 1
- ) {
- THashSet<TString> filter {fields.begin(), fields.end()};
- THashSet<TString> optionalFilter {optionalFields.begin(), optionalFields.end()};
-
- auto addField = [&] (
- NYT::TNode& node, const TString& field, NYT::TNode&& value
- ) {
- if (filter.contains(field) && !optionalFilter.contains(field)) {
- node(field, value);
- }
- };
-
- TStringStream stream;
- NYson::TYsonWriter writer(&stream, NYson::EYsonFormat::Binary, NYson::EYsonType::ListFragment);
- NYT::TNodeVisitor visitor(&writer);
-
- for (ui32 i = start; i < stop; i += step) {
- auto item = NYT::TNode::CreateMap();
-
- addField(item, "int64", (i64)(i));
- addField(item, "uint64", (ui64)(i * 2));
- addField(item, "double", (double)(i * 3.5));
- addField(item, "bool", true);
- addField(item, "string", "foo");
- addField(item, "yson", (i % 2 == 0 ? NYT::TNode(true) : NYT::TNode(false)));
-
- visitor.Visit(item);
- }
-
- return stream;
- }
-
- TStringStream GetMultitableYsonStream(
- const TVector<TVector<int>>& groupedValues,
- const TVector<TString>& etalonTableNames = {}
- ) {
- bool isEtalon = !etalonTableNames.empty();
-
- Y_ENSURE(!isEtalon || groupedValues.size() == etalonTableNames.size());
-
- TStringStream stream;
- NYson::TYsonWriter writer(&stream, NYson::EYsonFormat::Binary, NYson::EYsonType::ListFragment);
- NYT::TNodeVisitor visitor(&writer);
-
- for (ui64 tableIndex = 0; tableIndex < groupedValues.size(); ++tableIndex) {
- if (!isEtalon) {
- auto indexNode = NYT::TNode::CreateEntity();
- indexNode.Attributes() = NYT::TNode::CreateMap()("table_index", static_cast<i64>(tableIndex));
- visitor.Visit(indexNode);
- }
-
- const auto& values = groupedValues[tableIndex];
-
- for (ui64 i = 0; i < values.size(); ++i) {
- auto item = NYT::TNode::CreateMap()("int64", values[i]);
- if (isEtalon) {
- item("tname", etalonTableNames[tableIndex]);
- }
- visitor.Visit(item);
- }
- }
-
- return stream;
- }
-
- void AssertEqualYsonStreams(TStringStream etalonStream, TStringStream stream) {
- NYT::TNode etalonList {
- NYT::NodeFromYsonStream(&etalonStream, NYson::EYsonType::ListFragment)
- };
-
- NYT::TNode list {
- NYT::NodeFromYsonStream(&stream, NYson::EYsonType::ListFragment)
- };
-
- UNIT_ASSERT_EQUAL(etalonList, list);
- }
-
- TStringStream GetSkiffStream(
- const TVector<TString>& fields,
- const TVector<TString>& optionalFields={},
- ui32 start = 0, ui32 stop = 5, ui32 step = 1
- ) {
- THashSet<TString> filter {fields.begin(), fields.end()};
- THashSet<TString> optionalFilter {optionalFields.begin(), optionalFields.end()};
-
- TStringStream stream;
- NSkiff::TUncheckedSkiffWriter writer {&stream};
-
-#define WRITE_FIELD(field, type, value) \
- do { \
- if (filter.contains(field)) { \
- if (optionalFilter.contains(field)) { \
- writer.WriteVariant8Tag(0); \
- } else { \
- writer.Write ## type(value); \
- } \
- } \
- } while (0)
-
- for (ui32 i = start; i < stop; i += step) {
- auto item = NYT::TNode::CreateMap();
-
- writer.WriteVariant16Tag(0);
- WRITE_FIELD("bool", Boolean, true);
- WRITE_FIELD("double", Double, (double)(i * 3.5));
- WRITE_FIELD("int64", Int64, (i64)(i));
- WRITE_FIELD("string", String32, "foo");
- WRITE_FIELD("uint64", Uint64, (ui64)(i * 2));
- WRITE_FIELD("yson", Yson32, (i % 2 == 0 ? "\x05" : "\x04")); // boolean values
- }
-
-#undef WRITE_FIELD
-
- return stream;
- }
-
- TStringStream GetMultitableSkiffStream(
- const TVector<TVector<int>>& groupedValues,
- const TVector<TString>& etalonTableNames = {}
- ) {
- bool isEtalon = !etalonTableNames.empty();
-
- Y_ENSURE(!isEtalon || groupedValues.size() == etalonTableNames.size());
-
- TStringStream stream;
- NSkiff::TUncheckedSkiffWriter writer {&stream};
-
- for (ui64 tableIndex = 0; tableIndex < groupedValues.size(); ++tableIndex) {
- const auto& values = groupedValues[tableIndex];
-
- for (ui64 i = 0; i < values.size(); ++i) {
- if (isEtalon) {
- writer.WriteVariant16Tag(0);
- } else {
- writer.WriteVariant16Tag(tableIndex);
- }
-
- writer.WriteInt64(values[i]);
- if (isEtalon) {
- writer.WriteString32(etalonTableNames[tableIndex]);
- }
- }
- }
-
- return stream;
- }
-
- NYT::TNode GetSkiffSchemaWithStruct(bool sorted) {
- auto aMember = NYT::TNode::CreateList()
- .Add("a")
- .Add(NYT::TNode::CreateList().Add("DataType").Add("String"));
-
- auto bMember = NYT::TNode::CreateList()
- .Add("b")
- .Add(NYT::TNode::CreateList().Add("DataType").Add("Uint64"));
-
- auto members = NYT::TNode::CreateList();
-
- if (sorted) {
- members.Add(std::move(aMember)).Add(std::move(bMember));
- } else {
- members.Add(std::move(bMember)).Add(std::move(aMember));
- }
-
- auto structColumn = NYT::TNode::CreateList()
- .Add("Struct")
- .Add(NYT::TNode::CreateList().Add("StructType").Add(std::move(members)));
-
- auto indexColumn = NYT::TNode::CreateList()
- .Add("Index")
- .Add(NYT::TNode::CreateList().Add("DataType").Add("Uint64"));
-
- auto schema = NYT::TNode::CreateList()
- .Add("StructType")
- .Add(NYT::TNode::CreateList().Add(std::move(indexColumn)).Add(std::move(structColumn)));
-
- return schema;
- }
-
- TStringStream GetSkiffStreamWithStruct(bool sorted, ui32 start = 0, ui32 stop = 5) {
- TStringStream stream;
- NSkiff::TUncheckedSkiffWriter writer {&stream};
-
- auto writeStructMembers = [sorted, &writer](TStringBuf stringMember, ui64 numberMember) {
- if (sorted) {
- writer.WriteString32(stringMember);
- writer.WriteUint64(numberMember);
- } else {
- writer.WriteUint64(numberMember);
- writer.WriteString32(stringMember);
- }
- };
-
- for (ui32 idx = start; idx < stop; ++idx) {
- auto stringData = TStringBuilder{} << "text" << idx;
- writer.WriteVariant16Tag(0);
- writer.WriteUint64(idx);
- writeStructMembers(stringData, idx + 3);
- }
-
- return stream;
- }
-
- void AssertEqualSkiffStreams(TStringStream etalonStream, TStringStream stream) {
- UNIT_ASSERT_VALUES_EQUAL(etalonStream.Str(), stream.Str());
- }
-}
-
-template <typename T>
-TVector<T> JoinVectors(const TVector<T>& first, const TVector<T>& second) {
- TVector<T> result;
- result.reserve(first.size() + second.size());
-
- result.insert(result.end(), first.begin(), first.end());
- result.insert(result.end(), second.begin(), second.end());
-
- return result;
-}
-
-#define PULL_STREAM_MODE
-#define TEST_SUITE_NAME TestPullStreamYsonIO
-#define CREATE_PROGRAM(...) factory->MakePullStreamProgram(__VA_ARGS__)
-#define INPUT_SPEC TYsonInputSpec
-#define OUTPUT_SPEC TYsonOutputSpec
-#define GET_STREAM GetYsonStream
-#define GET_MULTITABLE_STREAM GetMultitableYsonStream
-#define ASSERT_EQUAL_STREAMS AssertEqualYsonStreams
-#include "test.inl"
-#undef ASSERT_EQUAL_STREAMS
-#undef GET_MULTITABLE_STREAM
-#undef GET_STREAM
-#undef OUTPUT_SPEC
-#undef INPUT_SPEC
-#undef CREATE_PROGRAM
-#undef TEST_SUITE_NAME
-#undef PULL_STREAM_MODE
-
-#define PULL_STREAM_MODE
-#define TEST_SUITE_NAME TestPullStreamSkiffIO
-#define CREATE_PROGRAM(...) factory->MakePullStreamProgram(__VA_ARGS__)
-#define INPUT_SPEC TSkiffInputSpec
-#define OUTPUT_SPEC TSkiffOutputSpec
-#define GET_STREAM GetSkiffStream
-#define GET_STREAM_WITH_STRUCT GetSkiffStreamWithStruct
-#define GET_SCHEMA_WITH_STRUCT GetSkiffSchemaWithStruct
-#define GET_MULTITABLE_STREAM GetMultitableSkiffStream
-#define ASSERT_EQUAL_STREAMS AssertEqualSkiffStreams
-#include "test.inl"
-#undef ASSERT_EQUAL_STREAMS
-#undef GET_MULTITABLE_STREAM
-#undef GET_SCHEMA_WITH_STRUCT
-#undef GET_STREAM_WITH_STRUCT
-#undef GET_STREAM
-#undef OUTPUT_SPEC
-#undef INPUT_SPEC
-#undef CREATE_PROGRAM
-#undef TEST_SUITE_NAME
-#undef PULL_STREAM_MODE
-
-#define PULL_LIST_MODE
-#define TEST_SUITE_NAME TestPullListYsonIO
-#define CREATE_PROGRAM(...) factory->MakePullListProgram(__VA_ARGS__)
-#define INPUT_SPEC TYsonInputSpec
-#define OUTPUT_SPEC TYsonOutputSpec
-#define GET_STREAM GetYsonStream
-#define GET_MULTITABLE_STREAM GetMultitableYsonStream
-#define ASSERT_EQUAL_STREAMS AssertEqualYsonStreams
-#include "test.inl"
-#undef ASSERT_EQUAL_STREAMS
-#undef GET_MULTITABLE_STREAM
-#undef GET_STREAM
-#undef OUTPUT_SPEC
-#undef INPUT_SPEC
-#undef CREATE_PROGRAM
-#undef TEST_SUITE_NAME
-#undef PULL_LIST_MODE
-
-#define PULL_LIST_MODE
-#define TEST_SUITE_NAME TestPullListSkiffIO
-#define CREATE_PROGRAM(...) factory->MakePullListProgram(__VA_ARGS__)
-#define INPUT_SPEC TSkiffInputSpec
-#define OUTPUT_SPEC TSkiffOutputSpec
-#define GET_STREAM GetSkiffStream
-#define GET_STREAM_WITH_STRUCT GetSkiffStreamWithStruct
-#define GET_SCHEMA_WITH_STRUCT GetSkiffSchemaWithStruct
-#define GET_MULTITABLE_STREAM GetMultitableSkiffStream
-#define ASSERT_EQUAL_STREAMS AssertEqualSkiffStreams
-#include "test.inl"
-#undef ASSERT_EQUAL_STREAMS
-#undef GET_MULTITABLE_STREAM
-#undef GET_SCHEMA_WITH_STRUCT
-#undef GET_STREAM_WITH_STRUCT
-#undef GET_STREAM
-#undef OUTPUT_SPEC
-#undef INPUT_SPEC
-#undef CREATE_PROGRAM
-#undef TEST_SUITE_NAME
-#undef PULL_LIST_MODE
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/ya.make b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/ya.make
deleted file mode 100644
index 7737370d2be..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/ya.make
+++ /dev/null
@@ -1,20 +0,0 @@
-UNITTEST()
-
-SIZE(MEDIUM)
-
-TIMEOUT(300)
-
-PEERDIR(
- yql/essentials/public/udf/service/exception_policy
- ydb/library/yql/public/purecalc
- ydb/library/yql/public/purecalc/io_specs/mkql
- ydb/library/yql/public/purecalc/ut/lib
-)
-
-YQL_LAST_ABI_VERSION()
-
-SRCS(
- test_spec.cpp
-)
-
-END()
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ya.make b/ydb/library/yql/public/purecalc/io_specs/mkql/ya.make
deleted file mode 100644
index 00fdbd702d3..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/mkql/ya.make
+++ /dev/null
@@ -1,19 +0,0 @@
-LIBRARY()
-
-INCLUDE(ya.make.inc)
-
-PEERDIR(
- ydb/library/yql/public/purecalc/common
-)
-
-END()
-
-RECURSE(
- no_llvm
-)
-
-RECURSE_FOR_TESTS(
- ut
- ut/no_llvm
-)
-
diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ya.make.inc b/ydb/library/yql/public/purecalc/io_specs/mkql/ya.make.inc
deleted file mode 100644
index 98cb1f1e533..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/mkql/ya.make.inc
+++ /dev/null
@@ -1,25 +0,0 @@
-SRCDIR(
- ydb/library/yql/public/purecalc/io_specs/mkql
-)
-
-ADDINCL(
- ydb/library/yql/public/purecalc/io_specs/mkql
-)
-
-PEERDIR(
- ydb/library/yql/providers/yt/codec
- ydb/library/yql/providers/yt/common
- ydb/library/yql/providers/yt/lib/mkql_helpers
- yql/essentials/providers/common/codec
- yql/essentials/providers/common/schema/mkql
-)
-
-
-YQL_LAST_ABI_VERSION()
-
-
-SRCS(
- spec.cpp
- spec.h
-)
-
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp
deleted file mode 100644
index 90f0b339ca6..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "proto_variant.h"
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.h b/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.h
deleted file mode 100644
index c7d137d0e6f..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/interface.h>
-
-#include <array>
-
-namespace NYql::NPureCalc::NPrivate {
- using TProtoRawMultiOutput = std::pair<ui32, google::protobuf::Message*>;
-
- template <typename... T>
- using TProtoMultiOutput = std::variant<T*...>;
-
- template <size_t I, typename... T>
- using TProtoOutput = std::add_pointer_t<typename TTypeList<T...>::template TGet<I>>;
-
- template <size_t I, typename... T>
- TProtoMultiOutput<T...> InitProtobufsVariant(google::protobuf::Message* ptr) {
- static_assert(std::conjunction_v<std::is_base_of<google::protobuf::Message, T>...>);
- return TProtoMultiOutput<T...>(std::in_place_index<I>, static_cast<TProtoOutput<I, T...>>(ptr));
- }
-
- template <typename... T>
- class TProtobufsMappingBase {
- public:
- TProtobufsMappingBase()
- : InitFuncs_(BuildInitFuncs(std::make_index_sequence<sizeof...(T)>()))
- {
- }
-
- private:
- typedef TProtoMultiOutput<T...> (*initfunc)(google::protobuf::Message*);
-
- template <size_t... I>
- inline std::array<initfunc, sizeof...(T)> BuildInitFuncs(std::index_sequence<I...>) {
- return {&InitProtobufsVariant<I, T...>...};
- }
-
- protected:
- const std::array<initfunc, sizeof...(T)> InitFuncs_;
- };
-
- template <typename... T>
- class TProtobufsMappingStream: public IStream<TProtoMultiOutput<T...>>, public TProtobufsMappingBase<T...> {
- public:
- TProtobufsMappingStream(THolder<IStream<TProtoRawMultiOutput>> oldStream)
- : OldStream_(std::move(oldStream))
- {
- }
-
- public:
- TProtoMultiOutput<T...> Fetch() override {
- auto&& oldItem = OldStream_->Fetch();
- return this->InitFuncs_[oldItem.first](oldItem.second);
- }
-
- private:
- THolder<IStream<TProtoRawMultiOutput>> OldStream_;
- };
-
- template <typename... T>
- class TProtobufsMappingConsumer: public IConsumer<TProtoRawMultiOutput>, public TProtobufsMappingBase<T...> {
- public:
- TProtobufsMappingConsumer(THolder<IConsumer<TProtoMultiOutput<T...>>> oldConsumer)
- : OldConsumer_(std::move(oldConsumer))
- {
- }
-
- public:
- void OnObject(TProtoRawMultiOutput oldItem) override {
- OldConsumer_->OnObject(this->InitFuncs_[oldItem.first](oldItem.second));
- }
-
- void OnFinish() override {
- OldConsumer_->OnFinish();
- }
-
- private:
- THolder<IConsumer<TProtoMultiOutput<T...>>> OldConsumer_;
- };
-}
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp
deleted file mode 100644
index 91de6c290a3..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "spec.h"
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h b/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h
deleted file mode 100644
index 53a4a2f96e8..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h
+++ /dev/null
@@ -1,147 +0,0 @@
-#pragma once
-
-#include "proto_variant.h"
-
-#include <ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.h>
-
-namespace NYql {
- namespace NPureCalc {
- /**
- * Processing mode for working with non-raw protobuf messages.
- *
- * @tparam T message type.
- */
- template <typename T>
- class TProtobufInputSpec: public TProtobufRawInputSpec {
- static_assert(std::is_base_of<google::protobuf::Message, T>::value,
- "should be derived from google::protobuf::Message");
- public:
- TProtobufInputSpec(
- const TMaybe<TString>& timestampColumn = Nothing(),
- const TProtoSchemaOptions& options = {}
- )
- : TProtobufRawInputSpec(*T::descriptor(), timestampColumn, options)
- {
- }
- };
-
- /**
- * Processing mode for working with non-raw protobuf messages.
- *
- * @tparam T message type.
- */
- template <typename T>
- class TProtobufOutputSpec: public TProtobufRawOutputSpec {
- static_assert(std::is_base_of<google::protobuf::Message, T>::value,
- "should be derived from google::protobuf::Message");
- public:
- TProtobufOutputSpec(
- const TProtoSchemaOptions& options = {},
- google::protobuf::Arena* arena = nullptr
- )
- : TProtobufRawOutputSpec(*T::descriptor(), nullptr, options, arena)
- {
- }
- };
-
- /**
- * Processing mode for working with non-raw protobuf messages and several outputs.
- */
- template <typename... T>
- class TProtobufMultiOutputSpec: public TProtobufRawMultiOutputSpec {
- static_assert(
- std::conjunction_v<std::is_base_of<google::protobuf::Message, T>...>,
- "all types should be derived from google::protobuf::Message");
- public:
- TProtobufMultiOutputSpec(
- const TProtoSchemaOptions& options = {},
- TMaybe<TVector<google::protobuf::Arena*>> arenas = {}
- )
- : TProtobufRawMultiOutputSpec({T::descriptor()...}, Nothing(), options, std::move(arenas))
- {
- }
- };
-
- template <typename T>
- struct TInputSpecTraits<TProtobufInputSpec<T>> {
- static const constexpr bool IsPartial = false;
-
- static const constexpr bool SupportPullStreamMode = true;
- static const constexpr bool SupportPullListMode = true;
- static const constexpr bool SupportPushStreamMode = true;
-
- using TConsumerType = THolder<IConsumer<T*>>;
-
- static void PreparePullStreamWorker(const TProtobufInputSpec<T>& inputSpec, IPullStreamWorker* worker, THolder<IStream<T*>> stream) {
- auto raw = ConvertStream<google::protobuf::Message*>(std::move(stream));
- TInputSpecTraits<TProtobufRawInputSpec>::PreparePullStreamWorker(inputSpec, worker, std::move(raw));
- }
-
- static void PreparePullListWorker(const TProtobufInputSpec<T>& inputSpec, IPullListWorker* worker, THolder<IStream<T*>> stream) {
- auto raw = ConvertStream<google::protobuf::Message*>(std::move(stream));
- TInputSpecTraits<TProtobufRawInputSpec>::PreparePullListWorker(inputSpec, worker, std::move(raw));
- }
-
- static TConsumerType MakeConsumer(const TProtobufInputSpec<T>& inputSpec, TWorkerHolder<IPushStreamWorker> worker) {
- auto raw = TInputSpecTraits<TProtobufRawInputSpec>::MakeConsumer(inputSpec, std::move(worker));
- return ConvertConsumer<T*>(std::move(raw));
- }
- };
-
- template <typename T>
- struct TOutputSpecTraits<TProtobufOutputSpec<T>> {
- static const constexpr bool IsPartial = false;
-
- static const constexpr bool SupportPullStreamMode = true;
- static const constexpr bool SupportPullListMode = true;
- static const constexpr bool SupportPushStreamMode = true;
-
- using TOutputItemType = T*;
- using TPullStreamReturnType = THolder<IStream<TOutputItemType>>;
- using TPullListReturnType = THolder<IStream<TOutputItemType>>;
-
- static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufOutputSpec<T>& outputSpec, TWorkerHolder<IPullStreamWorker> worker) {
- auto raw = TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullStreamWorkerToOutputType(outputSpec, std::move(worker));
- return ConvertStreamUnsafe<TOutputItemType>(std::move(raw));
- }
-
- static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufOutputSpec<T>& outputSpec, TWorkerHolder<IPullListWorker> worker) {
- auto raw = TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullListWorkerToOutputType(outputSpec, std::move(worker));
- return ConvertStreamUnsafe<TOutputItemType>(std::move(raw));
- }
-
- static void SetConsumerToWorker(const TProtobufOutputSpec<T>& outputSpec, IPushStreamWorker* worker, THolder<IConsumer<T*>> consumer) {
- auto raw = ConvertConsumerUnsafe<google::protobuf::Message*>(std::move(consumer));
- TOutputSpecTraits<TProtobufRawOutputSpec>::SetConsumerToWorker(outputSpec, worker, std::move(raw));
- }
- };
-
- template <typename... T>
- struct TOutputSpecTraits<TProtobufMultiOutputSpec<T...>> {
- static const constexpr bool IsPartial = false;
-
- static const constexpr bool SupportPullStreamMode = true;
- static const constexpr bool SupportPullListMode = true;
- static const constexpr bool SupportPushStreamMode = true;
-
- using TOutputItemType = std::variant<T*...>;
- using TPullStreamReturnType = THolder<IStream<TOutputItemType>>;
- using TPullListReturnType = THolder<IStream<TOutputItemType>>;
-
- static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufMultiOutputSpec<T...>& outputSpec, TWorkerHolder<IPullStreamWorker> worker) {
- auto raw = TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullStreamWorkerToOutputType(outputSpec, std::move(worker));
- return THolder(new NPrivate::TProtobufsMappingStream<T...>(std::move(raw)));
- }
-
- static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufMultiOutputSpec<T...>& outputSpec, TWorkerHolder<IPullListWorker> worker) {
- auto raw = TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullListWorkerToOutputType(outputSpec, std::move(worker));
- return THolder(new NPrivate::TProtobufsMappingStream<T...>(std::move(raw)));
- }
-
- static void SetConsumerToWorker(const TProtobufMultiOutputSpec<T...>& outputSpec, IPushStreamWorker* worker, THolder<IConsumer<TOutputItemType>> consumer) {
- auto wrapper = MakeHolder<NPrivate::TProtobufsMappingConsumer<T...>>(std::move(consumer));
- TOutputSpecTraits<TProtobufRawMultiOutputSpec>::SetConsumerToWorker(outputSpec, worker, std::move(wrapper));
- }
- };
- }
-}
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp
deleted file mode 100644
index 3b2a0e20511..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp
+++ /dev/null
@@ -1,996 +0,0 @@
-#include <library/cpp/testing/unittest/registar.h>
-
-#include <ydb/library/yql/public/purecalc/common/interface.h>
-#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
-#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
-#include <library/cpp/protobuf/util/pb_io.h>
-#include <util/generic/xrange.h>
-
-namespace {
- TMaybe<NPureCalcProto::TAllTypes> allTypesMessage;
-
- NPureCalcProto::TAllTypes& GetCanonicalMessage() {
- if (!allTypesMessage) {
- allTypesMessage = NPureCalcProto::TAllTypes();
-
- allTypesMessage->SetFDouble(1);
- allTypesMessage->SetFFloat(2);
- allTypesMessage->SetFInt64(3);
- allTypesMessage->SetFSfixed64(4);
- allTypesMessage->SetFSint64(5);
- allTypesMessage->SetFUint64(6);
- allTypesMessage->SetFFixed64(7);
- allTypesMessage->SetFInt32(8);
- allTypesMessage->SetFSfixed32(9);
- allTypesMessage->SetFSint32(10);
- allTypesMessage->SetFUint32(11);
- allTypesMessage->SetFFixed32(12);
- allTypesMessage->SetFBool(true);
- allTypesMessage->SetFString("asd");
- allTypesMessage->SetFBytes("dsa");
- }
-
- return allTypesMessage.GetRef();
- }
-
- template <typename T1, typename T2>
- void AssertEqualToCanonical(const T1& got, const T2& expected) {
- UNIT_ASSERT_EQUAL(expected.GetFDouble(), got.GetFDouble());
- UNIT_ASSERT_EQUAL(expected.GetFFloat(), got.GetFFloat());
- UNIT_ASSERT_EQUAL(expected.GetFInt64(), got.GetFInt64());
- UNIT_ASSERT_EQUAL(expected.GetFSfixed64(), got.GetFSfixed64());
- UNIT_ASSERT_EQUAL(expected.GetFSint64(), got.GetFSint64());
- UNIT_ASSERT_EQUAL(expected.GetFUint64(), got.GetFUint64());
- UNIT_ASSERT_EQUAL(expected.GetFFixed64(), got.GetFFixed64());
- UNIT_ASSERT_EQUAL(expected.GetFInt32(), got.GetFInt32());
- UNIT_ASSERT_EQUAL(expected.GetFSfixed32(), got.GetFSfixed32());
- UNIT_ASSERT_EQUAL(expected.GetFSint32(), got.GetFSint32());
- UNIT_ASSERT_EQUAL(expected.GetFUint32(), got.GetFUint32());
- UNIT_ASSERT_EQUAL(expected.GetFFixed32(), got.GetFFixed32());
- UNIT_ASSERT_EQUAL(expected.GetFBool(), got.GetFBool());
- UNIT_ASSERT_EQUAL(expected.GetFString(), got.GetFString());
- UNIT_ASSERT_EQUAL(expected.GetFBytes(), got.GetFBytes());
- }
-
- template <typename T>
- void AssertEqualToCanonical(const T& got) {
- AssertEqualToCanonical(got, GetCanonicalMessage());
- }
-
- TString SerializeToTextFormatAsString(const google::protobuf::Message& message) {
- TString result;
- {
- TStringOutput output(result);
- SerializeToTextFormat(message, output);
- }
- return result;
- }
-
- template <typename T>
- void AssertProtoEqual(const T& actual, const T& expected) {
- UNIT_ASSERT_VALUES_EQUAL(SerializeToTextFormatAsString(actual), SerializeToTextFormatAsString(expected));
- }
-}
-
-class TAllTypesStreamImpl: public NYql::NPureCalc::IStream<NPureCalcProto::TAllTypes*> {
-private:
- int I_ = 0;
- NPureCalcProto::TAllTypes Message_ = GetCanonicalMessage();
-
-public:
- NPureCalcProto::TAllTypes* Fetch() override {
- if (I_ > 0) {
- return nullptr;
- } else {
- I_ += 1;
- return &Message_;
- }
- }
-};
-
-class TSimpleMessageStreamImpl: public NYql::NPureCalc::IStream<NPureCalcProto::TSimpleMessage*> {
-public:
- TSimpleMessageStreamImpl(i32 value)
- {
- Message_.SetX(value);
- }
-
- NPureCalcProto::TSimpleMessage* Fetch() override {
- if (Exhausted_) {
- return nullptr;
- } else {
- Exhausted_ = true;
- return &Message_;
- }
- }
-
-private:
- NPureCalcProto::TSimpleMessage Message_;
- bool Exhausted_ = false;
-};
-
-class TAllTypesConsumerImpl: public NYql::NPureCalc::IConsumer<NPureCalcProto::TAllTypes*> {
-private:
- int I_ = 0;
-
-public:
- void OnObject(NPureCalcProto::TAllTypes* t) override {
- I_ += 1;
- AssertEqualToCanonical(*t);
- }
-
- void OnFinish() override {
- UNIT_ASSERT(I_ > 0);
- }
-};
-
-class TStringMessageStreamImpl: public NYql::NPureCalc::IStream<NPureCalcProto::TStringMessage*> {
-private:
- int I_ = 0;
- NPureCalcProto::TStringMessage Message_{};
-
-public:
- NPureCalcProto::TStringMessage* Fetch() override {
- if (I_ >= 3) {
- return nullptr;
- } else {
- Message_.SetX(TString("-") * I_);
- I_ += 1;
- return &Message_;
- }
- }
-};
-
-class TSimpleMessageConsumerImpl: public NYql::NPureCalc::IConsumer<NPureCalcProto::TSimpleMessage*> {
-private:
- TVector<int>* Buf_;
-
-public:
- TSimpleMessageConsumerImpl(TVector<int>* buf)
- : Buf_(buf)
- {
- }
-
-public:
- void OnObject(NPureCalcProto::TSimpleMessage* t) override {
- Buf_->push_back(t->GetX());
- }
-
- void OnFinish() override {
- Buf_->push_back(-100);
- }
-};
-
-using TMessagesVariant = std::variant<NPureCalcProto::TSplitted1*, NPureCalcProto::TSplitted2*, NPureCalcProto::TStringMessage*>;
-
-class TVariantConsumerImpl: public NYql::NPureCalc::IConsumer<TMessagesVariant> {
-public:
- using TType0 = TVector<std::pair<i32, TString>>;
- using TType1 = TVector<std::pair<ui32, TString>>;
- using TType2 = TVector<TString>;
-
-public:
- TVariantConsumerImpl(TType0* q0, TType1* q1, TType2* q2, int* v)
- : Queue0_(q0)
- , Queue1_(q1)
- , Queue2_(q2)
- , Value_(v)
- {
- }
-
- void OnObject(TMessagesVariant value) override {
- if (auto* p = std::get_if<0>(&value)) {
- Queue0_->push_back({(*p)->GetBInt(), std::move(*(*p)->MutableBString())});
- } else if (auto* p = std::get_if<1>(&value)) {
- Queue1_->push_back({(*p)->GetCUint(), std::move(*(*p)->MutableCString())});
- } else if (auto* p = std::get_if<2>(&value)) {
- Queue2_->push_back(std::move(*(*p)->MutableX()));
- } else {
- Y_ABORT("invalid variant alternative");
- }
- }
-
- void OnFinish() override {
- *Value_ = 42;
- }
-
-private:
- TType0* Queue0_;
- TType1* Queue1_;
- TType2* Queue2_;
- int* Value_;
-};
-
-class TUnsplittedStreamImpl: public NYql::NPureCalc::IStream<NPureCalcProto::TUnsplitted*> {
-public:
- TUnsplittedStreamImpl()
- {
- Message_.SetAInt(-23);
- Message_.SetAUint(111);
- Message_.SetAString("Hello!");
- }
-
-public:
- NPureCalcProto::TUnsplitted* Fetch() override {
- switch (I_) {
- case 0:
- ++I_;
- return &Message_;
- case 1:
- ++I_;
- Message_.SetABool(false);
- return &Message_;
- case 2:
- ++I_;
- Message_.SetABool(true);
- return &Message_;
- default:
- return nullptr;
- }
- }
-
-private:
- NPureCalcProto::TUnsplitted Message_;
- ui32 I_ = 0;
-};
-
-template<typename T>
-struct TVectorConsumer: public NYql::NPureCalc::IConsumer<T*> {
- TVector<T> Data;
-
- void OnObject(T* t) override {
- Data.push_back(*t);
- }
-
- void OnFinish() override {
- }
-};
-
-template <typename T>
-struct TVectorStream: public NYql::NPureCalc::IStream<T*> {
- TVector<T> Data;
- size_t Index = 0;
-
-public:
- T* Fetch() override {
- return Index < Data.size() ? &Data[Index++] : nullptr;
- }
-};
-
-Y_UNIT_TEST_SUITE(TestProtoIO) {
- Y_UNIT_TEST(TestAllTypes) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory();
-
- {
- auto program = factory->MakePullStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TAllTypes>(),
- TProtobufOutputSpec<NPureCalcProto::TAllTypes>(),
- "SELECT * FROM Input",
- ETranslationMode::SQL
- );
-
- auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>());
-
- NPureCalcProto::TAllTypes* message;
-
- UNIT_ASSERT(message = stream->Fetch());
- AssertEqualToCanonical(*message);
- UNIT_ASSERT(!stream->Fetch());
- }
-
- {
- auto program = factory->MakePullListProgram(
- TProtobufInputSpec<NPureCalcProto::TAllTypes>(),
- TProtobufOutputSpec<NPureCalcProto::TAllTypes>(),
- "SELECT * FROM Input",
- ETranslationMode::SQL
- );
-
- auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>());
-
- NPureCalcProto::TAllTypes* message;
-
- UNIT_ASSERT(message = stream->Fetch());
- AssertEqualToCanonical(*message);
- UNIT_ASSERT(!stream->Fetch());
- }
-
- {
- auto program = factory->MakePushStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TAllTypes>(),
- TProtobufOutputSpec<NPureCalcProto::TAllTypes>(),
- "SELECT * FROM Input",
- ETranslationMode::SQL
- );
-
- auto consumer = program->Apply(MakeHolder<TAllTypesConsumerImpl>());
-
- UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnObject(&GetCanonicalMessage()); }());
- UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnFinish(); }());
- }
- }
-
- template <typename T>
- void CheckPassThroughYql(T& testInput, google::protobuf::Arena* arena = nullptr) {
- using namespace NYql::NPureCalc;
-
- auto resetArena = [arena]() {
- if (arena != nullptr) {
- arena->Reset();
- }
- };
-
- auto factory = MakeProgramFactory();
-
- {
- auto program = factory->MakePushStreamProgram(
- TProtobufInputSpec<T>(),
- TProtobufOutputSpec<T>({}, arena),
- "SELECT * FROM Input",
- ETranslationMode::SQL
- );
-
- auto resultConsumer = MakeHolder<TVectorConsumer<T>>();
- auto* resultConsumerPtr = resultConsumer.Get();
- auto sourceConsumer = program->Apply(std::move(resultConsumer));
-
- sourceConsumer->OnObject(&testInput);
- UNIT_ASSERT_VALUES_EQUAL(1, resultConsumerPtr->Data.size());
- AssertProtoEqual(resultConsumerPtr->Data[0], testInput);
-
- resultConsumerPtr->Data.clear();
- sourceConsumer->OnObject(&testInput);
- UNIT_ASSERT_VALUES_EQUAL(1, resultConsumerPtr->Data.size());
- AssertProtoEqual(resultConsumerPtr->Data[0], testInput);
- }
- resetArena();
-
- {
- auto program = factory->MakePullStreamProgram(
- TProtobufInputSpec<T>(),
- TProtobufOutputSpec<T>({}, arena),
- "SELECT * FROM Input",
- ETranslationMode::SQL
- );
-
- auto sourceStream = MakeHolder<TVectorStream<T>>();
- auto* sourceStreamPtr = sourceStream.Get();
- auto resultStream = program->Apply(std::move(sourceStream));
-
- sourceStreamPtr->Data.push_back(testInput);
- T* resultMessage;
- UNIT_ASSERT(resultMessage = resultStream->Fetch());
- AssertProtoEqual(*resultMessage, testInput);
- UNIT_ASSERT(!resultStream->Fetch());
-
- UNIT_ASSERT_VALUES_EQUAL(resultMessage->GetArena(), arena);
- }
- resetArena();
-
- {
- auto program = factory->MakePullListProgram(
- TProtobufInputSpec<T>(),
- TProtobufOutputSpec<T>({}, arena),
- "SELECT * FROM Input",
- ETranslationMode::SQL
- );
-
- auto sourceStream = MakeHolder<TVectorStream<T>>();
- auto* sourceStreamPtr = sourceStream.Get();
- auto resultStream = program->Apply(std::move(sourceStream));
-
- sourceStreamPtr->Data.push_back(testInput);
- T* resultMessage;
- UNIT_ASSERT(resultMessage = resultStream->Fetch());
- AssertProtoEqual(*resultMessage, testInput);
- UNIT_ASSERT(!resultStream->Fetch());
-
- UNIT_ASSERT_VALUES_EQUAL(resultMessage->GetArena(), arena);
- }
- resetArena();
- }
-
- template <typename T>
- void CheckMessageIsInvalid(const TString& expectedExceptionMessage) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory();
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&]() {
- factory->MakePushStreamProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL);
- }(), yexception, expectedExceptionMessage);
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&]() {
- factory->MakePullStreamProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL);
- }(), yexception, expectedExceptionMessage);
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&]() {
- factory->MakePullListProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL);
- }(), yexception, expectedExceptionMessage);
- }
-
- Y_UNIT_TEST(TestSimpleNested) {
- NPureCalcProto::TSimpleNested input;
- input.SetX(10);
- {
- auto* item = input.MutableY();
- *item = GetCanonicalMessage();
- item->SetFUint64(100);
- }
- CheckPassThroughYql(input);
- }
-
- Y_UNIT_TEST(TestOptionalNested) {
- NPureCalcProto::TOptionalNested input;
- {
- auto* item = input.MutableX();
- *item = GetCanonicalMessage();
- item->SetFUint64(100);
- }
- CheckPassThroughYql(input);
- }
-
- Y_UNIT_TEST(TestSimpleRepeated) {
- NPureCalcProto::TSimpleRepeated input;
- input.SetX(20);
- input.AddY(100);
- input.AddY(200);
- input.AddY(300);
- CheckPassThroughYql(input);
- }
-
- Y_UNIT_TEST(TestNestedRepeated) {
- NPureCalcProto::TNestedRepeated input;
- input.SetX(20);
- {
- auto* item = input.MutableY()->Add();
- item->SetX(100);
- {
- auto* y = item->MutableY();
- *y = GetCanonicalMessage();
- y->SetFUint64(1000);
- }
- }
- {
- auto* item = input.MutableY()->Add();
- item->SetX(200);
- {
- auto* y = item->MutableY();
- *y = GetCanonicalMessage();
- y->SetFUint64(2000);
- }
- }
- CheckPassThroughYql(input);
- }
-
- Y_UNIT_TEST(TestMessageWithEnum) {
- NPureCalcProto::TMessageWithEnum input;
- input.AddEnumValue(NPureCalcProto::TMessageWithEnum::VALUE1);
- input.AddEnumValue(NPureCalcProto::TMessageWithEnum::VALUE2);
- CheckPassThroughYql(input);
- }
-
- Y_UNIT_TEST(TestRecursive) {
- CheckMessageIsInvalid<NPureCalcProto::TRecursive>("NPureCalcProto.TRecursive->NPureCalcProto.TRecursive");
- }
-
- Y_UNIT_TEST(TestRecursiveIndirectly) {
- CheckMessageIsInvalid<NPureCalcProto::TRecursiveIndirectly>(
- "NPureCalcProto.TRecursiveIndirectly->NPureCalcProto.TRecursiveIndirectly.TNested->NPureCalcProto.TRecursiveIndirectly");
- }
-
- Y_UNIT_TEST(TestColumnsFilter) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory();
-
- auto filter = THashSet<TString>({"FFixed64", "FBool", "FBytes"});
-
- NPureCalcProto::TOptionalAllTypes canonicalMessage;
- canonicalMessage.SetFFixed64(GetCanonicalMessage().GetFFixed64());
- canonicalMessage.SetFBool(GetCanonicalMessage().GetFBool());
- canonicalMessage.SetFBytes(GetCanonicalMessage().GetFBytes());
-
- {
- auto inputSpec = TProtobufInputSpec<NPureCalcProto::TAllTypes>();
- auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>();
- outputSpec.SetOutputColumnsFilter(filter);
-
- auto program = factory->MakePullStreamProgram(
- inputSpec,
- outputSpec,
- "SELECT * FROM Input",
- ETranslationMode::SQL
- );
-
- UNIT_ASSERT_EQUAL(program->GetUsedColumns(), filter);
-
- auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>());
-
- NPureCalcProto::TOptionalAllTypes* message;
-
- UNIT_ASSERT(message = stream->Fetch());
- AssertEqualToCanonical(*message, canonicalMessage);
- UNIT_ASSERT(!stream->Fetch());
- }
- }
-
- Y_UNIT_TEST(TestColumnsFilterWithOptionalFields) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory();
-
- auto fields = THashSet<TString>({"FFixed64", "FBool", "FBytes"});
-
- NPureCalcProto::TOptionalAllTypes canonicalMessage;
- canonicalMessage.SetFFixed64(GetCanonicalMessage().GetFFixed64());
- canonicalMessage.SetFBool(GetCanonicalMessage().GetFBool());
- canonicalMessage.SetFBytes(GetCanonicalMessage().GetFBytes());
-
- {
- auto program = factory->MakePullStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TAllTypes>(),
- TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(),
- "SELECT FFixed64, FBool, FBytes FROM Input",
- ETranslationMode::SQL
- );
-
- UNIT_ASSERT_EQUAL(program->GetUsedColumns(), fields);
-
- auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>());
-
- NPureCalcProto::TOptionalAllTypes* message;
-
- UNIT_ASSERT(message = stream->Fetch());
- AssertEqualToCanonical(*message, canonicalMessage);
- UNIT_ASSERT(!stream->Fetch());
- }
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePullStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TAllTypes>(),
- TProtobufOutputSpec<NPureCalcProto::TAllTypes>(),
- "SELECT FFixed64, FBool, FBytes FROM Input",
- ETranslationMode::SQL
- );
- }(), TCompileError, "Failed to optimize");
- }
-
- Y_UNIT_TEST(TestUsedColumns) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory();
-
- auto allFields = THashSet<TString>();
-
- for (auto i: xrange(NPureCalcProto::TOptionalAllTypes::descriptor()->field_count())) {
- allFields.emplace(NPureCalcProto::TOptionalAllTypes::descriptor()->field(i)->name());
- }
-
- {
- auto program = factory->MakePullStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TAllTypes>(),
- TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(),
- "SELECT * FROM Input",
- ETranslationMode::SQL
- );
-
- UNIT_ASSERT_EQUAL(program->GetUsedColumns(), allFields);
- }
- }
-
- Y_UNIT_TEST(TestChaining) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory();
-
- TString sql1 = "SELECT UNWRAP(X || CAST(\"HI\" AS Utf8)) AS X FROM Input";
- TString sql2 = "SELECT LENGTH(X) AS X FROM Input";
-
- {
- auto program1 = factory->MakePullStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
- TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
- sql1,
- ETranslationMode::SQL
- );
-
- auto program2 = factory->MakePullStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
- TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(),
- sql2,
- ETranslationMode::SQL
- );
-
- auto input = MakeHolder<TStringMessageStreamImpl>();
- auto intermediate = program1->Apply(std::move(input));
- auto output = program2->Apply(std::move(intermediate));
-
- TVector<int> expected = {2, 3, 4};
- TVector<int> actual{};
-
- while (auto *x = output->Fetch()) {
- actual.push_back(x->GetX());
- }
-
- UNIT_ASSERT_EQUAL(expected, actual);
- }
-
- {
- auto program1 = factory->MakePullListProgram(
- TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
- TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
- sql1,
- ETranslationMode::SQL
- );
-
- auto program2 = factory->MakePullListProgram(
- TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
- TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(),
- sql2,
- ETranslationMode::SQL
- );
-
- auto input = MakeHolder<TStringMessageStreamImpl>();
- auto intermediate = program1->Apply(std::move(input));
- auto output = program2->Apply(std::move(intermediate));
-
- TVector<int> expected = {2, 3, 4};
- TVector<int> actual{};
-
- while (auto *x = output->Fetch()) {
- actual.push_back(x->GetX());
- }
-
- UNIT_ASSERT_EQUAL(expected, actual);
- }
-
- {
- auto program1 = factory->MakePushStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
- TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
- sql1,
- ETranslationMode::SQL
- );
-
- auto program2 = factory->MakePushStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
- TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(),
- sql2,
- ETranslationMode::SQL
- );
-
- TVector<int> expected = {2, 3, 4, -100};
- TVector<int> actual{};
-
- auto consumer = MakeHolder<TSimpleMessageConsumerImpl>(&actual);
- auto intermediate = program2->Apply(std::move(consumer));
- auto input = program1->Apply(std::move(intermediate));
-
- NPureCalcProto::TStringMessage Message;
-
- Message.SetX("");
- input->OnObject(&Message);
-
- Message.SetX("1");
- input->OnObject(&Message);
-
- Message.SetX("22");
- input->OnObject(&Message);
-
- input->OnFinish();
-
- UNIT_ASSERT_EQUAL(expected, actual);
- }
- }
-
- Y_UNIT_TEST(TestTimestampColumn) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory(TProgramFactoryOptions()
- .SetDeterministicTimeProviderSeed(1)); // seconds
-
- NPureCalcProto::TOptionalAllTypes canonicalMessage;
-
- {
- auto inputSpec = TProtobufInputSpec<NPureCalcProto::TAllTypes>("MyTimestamp");
- auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>();
-
- auto program = factory->MakePullStreamProgram(
- inputSpec,
- outputSpec,
- "SELECT MyTimestamp AS FFixed64 FROM Input",
- ETranslationMode::SQL
- );
-
- auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>());
-
- NPureCalcProto::TOptionalAllTypes* message;
-
- UNIT_ASSERT(message = stream->Fetch());
- UNIT_ASSERT_VALUES_EQUAL(message->GetFFixed64(), 1000000); // microseconds
- UNIT_ASSERT(!stream->Fetch());
- }
- }
-
- Y_UNIT_TEST(TestTableNames) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true));
-
- auto runTest = [&](TStringBuf tableName, i32 value) {
- auto program = factory->MakePullStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TSimpleMessage>(),
- TProtobufOutputSpec<NPureCalcProto::TNamedSimpleMessage>(),
- TString::Join("SELECT TableName() AS Name, X FROM ", tableName),
- ETranslationMode::SQL
- );
-
- auto stream = program->Apply(MakeHolder<TSimpleMessageStreamImpl>(value));
- auto message = stream->Fetch();
-
- UNIT_ASSERT(message);
- UNIT_ASSERT_VALUES_EQUAL(message->GetX(), value);
- UNIT_ASSERT_VALUES_EQUAL(message->GetName(), tableName);
- UNIT_ASSERT(!stream->Fetch());
- };
-
- runTest("Input", 37);
- runTest("Input0", -23);
- }
-
- void CheckMultiOutputs(TMaybe<TVector<google::protobuf::Arena*>> arenas) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory();
- TString sExpr = R"(
-(
- (let $type (ParseType '"Variant<Struct<BInt:Int32,BString:Utf8>, Struct<CUint:Uint32,CString:Utf8>, Struct<X:Utf8>>"))
- (let $stream (Self '0))
- (return (FlatMap (Self '0) (lambda '(x) (block '(
- (let $cond (Member x 'ABool))
- (let $item0 (Variant (AsStruct '('BInt (Member x 'AInt)) '('BString (Member x 'AString))) '0 $type))
- (let $item1 (Variant (AsStruct '('CUint (Member x 'AUint)) '('CString (Member x 'AString))) '1 $type))
- (let $item2 (Variant (AsStruct '('X (Utf8 'Error))) '2 $type))
- (return (If (Exists $cond) (If (Unwrap $cond) (AsList $item0) (AsList $item1)) (AsList $item2)))
- )))))
-)
- )";
-
- {
- auto program = factory->MakePushStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TUnsplitted>(),
- TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>(
- {}, arenas
- ),
- sExpr,
- ETranslationMode::SExpr
- );
-
- TVariantConsumerImpl::TType0 queue0;
- TVariantConsumerImpl::TType1 queue1;
- TVariantConsumerImpl::TType2 queue2;
- int finalValue = 0;
-
- auto consumer = MakeHolder<TVariantConsumerImpl>(&queue0, &queue1, &queue2, &finalValue);
- auto input = program->Apply(std::move(consumer));
-
- NPureCalcProto::TUnsplitted message;
- message.SetAInt(-13);
- message.SetAUint(47);
- message.SetAString("first message");
- message.SetABool(true);
-
- input->OnObject(&message);
- UNIT_ASSERT(queue0.size() == 1 && queue1.empty() && queue2.empty() && finalValue == 0);
-
- message.SetABool(false);
- message.SetAString("second message");
-
- input->OnObject(&message);
- UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.empty() && finalValue == 0);
-
- message.ClearABool();
-
- input->OnObject(&message);
- UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.size() == 1 && finalValue == 0);
-
- input->OnFinish();
- UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.size() == 1 && finalValue == 42);
-
- TVariantConsumerImpl::TType0 expected0 = {{-13, "first message"}};
- UNIT_ASSERT_EQUAL(queue0, expected0);
-
- TVariantConsumerImpl::TType1 expected1 = {{47, "second message"}};
- UNIT_ASSERT_EQUAL(queue1, expected1);
-
- TVariantConsumerImpl::TType2 expected2 = {{"Error"}};
- UNIT_ASSERT_EQUAL(queue2, expected2);
- }
-
- {
- auto program1 = factory->MakePullStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TUnsplitted>(),
- TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>(
- {}, arenas
- ),
- sExpr,
- ETranslationMode::SExpr
- );
-
- auto program2 = factory->MakePullListProgram(
- TProtobufInputSpec<NPureCalcProto::TUnsplitted>(),
- TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>(
- {}, arenas
- ),
- sExpr,
- ETranslationMode::SExpr
- );
-
- auto input1 = MakeHolder<TUnsplittedStreamImpl>();
- auto output1 = program1->Apply(std::move(input1));
-
- auto input2 = MakeHolder<TUnsplittedStreamImpl>();
- auto output2 = program2->Apply(std::move(input2));
-
- decltype(output1->Fetch()) variant1;
- decltype(output2->Fetch()) variant2;
-
-#define ASSERT_EQUAL_FIELDS(X1, X2, I, F, E) \
- UNIT_ASSERT_EQUAL(X1.index(), I); \
- UNIT_ASSERT_EQUAL(X2.index(), I); \
- UNIT_ASSERT_EQUAL(std::get<I>(X1)->Get##F(), E); \
- UNIT_ASSERT_EQUAL(std::get<I>(X2)->Get##F(), E)
-
- variant1 = output1->Fetch();
- variant2 = output2->Fetch();
- ASSERT_EQUAL_FIELDS(variant1, variant2, 2, X, "Error");
- ASSERT_EQUAL_FIELDS(variant1, variant2, 2, Arena, (arenas.Defined() ? arenas->at(2) : nullptr));
-
- variant1 = output1->Fetch();
- variant2 = output2->Fetch();
- ASSERT_EQUAL_FIELDS(variant1, variant2, 1, CUint, 111);
- ASSERT_EQUAL_FIELDS(variant1, variant2, 1, CString, "Hello!");
- ASSERT_EQUAL_FIELDS(variant1, variant2, 1, Arena, (arenas.Defined() ? arenas->at(1) : nullptr));
-
- variant1 = output1->Fetch();
- variant2 = output2->Fetch();
- ASSERT_EQUAL_FIELDS(variant1, variant2, 0, BInt, -23);
- ASSERT_EQUAL_FIELDS(variant1, variant2, 0, BString, "Hello!");
- ASSERT_EQUAL_FIELDS(variant1, variant2, 0, Arena, (arenas.Defined() ? arenas->at(0) : nullptr));
-
- variant1 = output1->Fetch();
- variant2 = output2->Fetch();
- UNIT_ASSERT_EQUAL(variant1.index(), 0);
- UNIT_ASSERT_EQUAL(variant2.index(), 0);
- UNIT_ASSERT_EQUAL(std::get<0>(variant1), nullptr);
- UNIT_ASSERT_EQUAL(std::get<0>(variant1), nullptr);
-
-#undef ASSERT_EQUAL_FIELDS
- }
- }
-
- Y_UNIT_TEST(TestMultiOutputs) {
- CheckMultiOutputs(Nothing());
- }
-
- Y_UNIT_TEST(TestSupportedTypes) {
-
- }
-
- Y_UNIT_TEST(TestProtobufArena) {
- {
- NPureCalcProto::TNestedRepeated input;
- input.SetX(20);
- {
- auto* item = input.MutableY()->Add();
- item->SetX(100);
- {
- auto* y = item->MutableY();
- *y = GetCanonicalMessage();
- y->SetFUint64(1000);
- }
- }
- {
- auto* item = input.MutableY()->Add();
- item->SetX(200);
- {
- auto* y = item->MutableY();
- *y = GetCanonicalMessage();
- y->SetFUint64(2000);
- }
- }
-
- google::protobuf::Arena arena;
- CheckPassThroughYql(input, &arena);
- }
-
- {
- google::protobuf::Arena arena1;
- google::protobuf::Arena arena2;
- TVector<google::protobuf::Arena*> arenas{&arena1, &arena2, &arena1};
- CheckMultiOutputs(arenas);
- }
- }
-
- Y_UNIT_TEST(TestFieldRenames) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory();
-
- TString query = "SELECT InputAlias AS OutputAlias FROM Input";
-
- auto inputProtoOptions = TProtoSchemaOptions();
- inputProtoOptions.SetFieldRenames({{"X", "InputAlias"}});
-
- auto inputSpec = TProtobufInputSpec<NPureCalcProto::TSimpleMessage>(
- Nothing(), std::move(inputProtoOptions)
- );
-
- auto outputProtoOptions = TProtoSchemaOptions();
- outputProtoOptions.SetFieldRenames({{"X", "OutputAlias"}});
-
- auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(
- std::move(outputProtoOptions)
- );
-
- {
- auto program = factory->MakePullStreamProgram(
- inputSpec, outputSpec, query, ETranslationMode::SQL
- );
-
- auto input = MakeHolder<TSimpleMessageStreamImpl>(1);
- auto output = program->Apply(std::move(input));
-
- TVector<int> expected = {1};
- TVector<int> actual;
-
- while (auto* x = output->Fetch()) {
- actual.push_back(x->GetX());
- }
-
- UNIT_ASSERT_VALUES_EQUAL(expected, actual);
- }
-
- {
- auto program = factory->MakePullListProgram(
- inputSpec, outputSpec, query, ETranslationMode::SQL
- );
-
- auto input = MakeHolder<TSimpleMessageStreamImpl>(1);
- auto output = program->Apply(std::move(input));
-
- TVector<int> expected = {1};
- TVector<int> actual;
-
- while (auto* x = output->Fetch()) {
- actual.push_back(x->GetX());
- }
-
- UNIT_ASSERT_VALUES_EQUAL(expected, actual);
- }
-
- {
- auto program = factory->MakePushStreamProgram(
- inputSpec, outputSpec, query, ETranslationMode::SQL
- );
-
- TVector<int> expected = {1, -100};
- TVector<int> actual;
-
- auto consumer = MakeHolder<TSimpleMessageConsumerImpl>(&actual);
- auto input = program->Apply(std::move(consumer));
-
- NPureCalcProto::TSimpleMessage Message;
-
- Message.SetX(1);
- input->OnObject(&Message);
-
- input->OnFinish();
-
- UNIT_ASSERT_VALUES_EQUAL(expected, actual);
- }
- }
-}
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/ya.make b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/ya.make
deleted file mode 100644
index c59c065678a..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/ya.make
+++ /dev/null
@@ -1,23 +0,0 @@
-IF (NOT SANITIZER_TYPE)
-
-UNITTEST()
-
-PEERDIR(
- library/cpp/protobuf/util
- yql/essentials/public/udf/service/exception_policy
- ydb/library/yql/public/purecalc
- ydb/library/yql/public/purecalc/io_specs/protobuf
- ydb/library/yql/public/purecalc/ut/protos
-)
-
-SIZE(MEDIUM)
-
-YQL_LAST_ABI_VERSION()
-
-SRCS(
- test_spec.cpp
-)
-
-END()
-
-ENDIF()
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ya.make b/ydb/library/yql/public/purecalc/io_specs/protobuf/ya.make
deleted file mode 100644
index 7177024003f..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/protobuf/ya.make
+++ /dev/null
@@ -1,19 +0,0 @@
-LIBRARY()
-
-PEERDIR(
- ydb/library/yql/public/purecalc/common
- ydb/library/yql/public/purecalc/io_specs/protobuf_raw
-)
-
-SRCS(
- spec.cpp
- proto_variant.cpp
-)
-
-YQL_LAST_ABI_VERSION()
-
-END()
-
-RECURSE_FOR_TESTS(
- ut
-)
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp
deleted file mode 100644
index 95adbc4de95..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "proto_holder.h"
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.h b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.h
deleted file mode 100644
index 7d4d843bfcf..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#pragma once
-
-#include <google/protobuf/arena.h>
-
-#include <util/generic/ptr.h>
-
-#include <type_traits>
-
-namespace NYql::NPureCalc {
- class TProtoDestroyer {
- public:
- template <typename T>
- static inline void Destroy(T* t) noexcept {
- if (t->GetArena() == nullptr) {
- CheckedDelete(t);
- }
- }
- };
-
- template <typename TProto>
- concept IsProtoMessage = std::is_base_of_v<NProtoBuf::Message, TProto>;
-
- template <IsProtoMessage TProto>
- using TProtoHolder = THolder<TProto, TProtoDestroyer>;
-
- template <IsProtoMessage TProto, typename... TArgs>
- TProtoHolder<TProto> MakeProtoHolder(NProtoBuf::Arena* arena, TArgs&&... args) {
- auto* ptr = NProtoBuf::Arena::CreateMessage<TProto>(arena, std::forward<TArgs>(args)...);
- return TProtoHolder<TProto>(ptr);
- }
-}
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp
deleted file mode 100644
index 0a3cc41427f..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp
+++ /dev/null
@@ -1,1064 +0,0 @@
-#include "proto_holder.h"
-#include "spec.h"
-
-#include <yql/essentials/public/udf/udf_value.h>
-#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
-#include <yql/essentials/minikql/computation/mkql_custom_list.h>
-#include <yql/essentials/minikql/mkql_string_util.h>
-#include <yql/essentials/utils/yql_panic.h>
-#include <google/protobuf/reflection.h>
-
-using namespace NYql;
-using namespace NYql::NPureCalc;
-using namespace google::protobuf;
-using namespace NKikimr::NUdf;
-using namespace NKikimr::NMiniKQL;
-
-TProtobufRawInputSpec::TProtobufRawInputSpec(
- const Descriptor& descriptor,
- const TMaybe<TString>& timestampColumn,
- const TProtoSchemaOptions& options
-)
- : Descriptor_(descriptor)
- , TimestampColumn_(timestampColumn)
- , SchemaOptions_(options)
-{
-}
-
-const TVector<NYT::TNode>& TProtobufRawInputSpec::GetSchemas() const {
- if (SavedSchemas_.size() == 0) {
- SavedSchemas_.push_back(MakeSchemaFromProto(Descriptor_, SchemaOptions_));
- if (TimestampColumn_) {
- auto timestampType = NYT::TNode::CreateList();
- timestampType.Add("DataType");
- timestampType.Add("Uint64");
- auto timestamp = NYT::TNode::CreateList();
- timestamp.Add(*TimestampColumn_);
- timestamp.Add(timestampType);
- SavedSchemas_.back().AsList()[1].AsList().push_back(timestamp);
- }
- }
-
- return SavedSchemas_;
-}
-
-const Descriptor& TProtobufRawInputSpec::GetDescriptor() const {
- return Descriptor_;
-}
-
-const TMaybe<TString>& TProtobufRawInputSpec::GetTimestampColumn() const {
- return TimestampColumn_;
-}
-
-const TProtoSchemaOptions& TProtobufRawInputSpec::GetSchemaOptions() const {
- return SchemaOptions_;
-}
-
-TProtobufRawOutputSpec::TProtobufRawOutputSpec(
- const Descriptor& descriptor,
- MessageFactory* factory,
- const TProtoSchemaOptions& options,
- Arena* arena
-)
- : Descriptor_(descriptor)
- , Factory_(factory)
- , SchemaOptions_(options)
- , Arena_(arena)
-{
- SchemaOptions_.ListIsOptional = true;
-}
-
-const NYT::TNode& TProtobufRawOutputSpec::GetSchema() const {
- if (!SavedSchema_) {
- SavedSchema_ = MakeSchemaFromProto(Descriptor_, SchemaOptions_);
- }
-
- return SavedSchema_.GetRef();
-}
-
-const Descriptor& TProtobufRawOutputSpec::GetDescriptor() const {
- return Descriptor_;
-}
-
-void TProtobufRawOutputSpec::SetFactory(MessageFactory* factory) {
- Factory_ = factory;
-}
-
-MessageFactory* TProtobufRawOutputSpec::GetFactory() const {
- return Factory_;
-}
-
-void TProtobufRawOutputSpec::SetArena(Arena* arena) {
- Arena_ = arena;
-}
-
-Arena* TProtobufRawOutputSpec::GetArena() const {
- return Arena_;
-}
-
-const TProtoSchemaOptions& TProtobufRawOutputSpec::GetSchemaOptions() const {
- return SchemaOptions_;
-}
-
-TProtobufRawMultiOutputSpec::TProtobufRawMultiOutputSpec(
- TVector<const Descriptor*> descriptors,
- TMaybe<TVector<MessageFactory*>> factories,
- const TProtoSchemaOptions& options,
- TMaybe<TVector<Arena*>> arenas
-)
- : Descriptors_(std::move(descriptors))
- , SchemaOptions_(options)
-{
- if (factories) {
- Y_ENSURE(factories->size() == Descriptors_.size(), "number of factories must match number of descriptors");
- Factories_ = std::move(*factories);
- } else {
- Factories_ = TVector<MessageFactory*>(Descriptors_.size(), nullptr);
- }
-
- if (arenas) {
- Y_ENSURE(arenas->size() == Descriptors_.size(), "number of arenas must match number of descriptors");
- Arenas_ = std::move(*arenas);
- } else {
- Arenas_ = TVector<Arena*>(Descriptors_.size(), nullptr);
- }
-}
-
-const NYT::TNode& TProtobufRawMultiOutputSpec::GetSchema() const {
- if (SavedSchema_.IsUndefined()) {
- SavedSchema_ = MakeVariantSchemaFromProtos(Descriptors_, SchemaOptions_);
- }
-
- return SavedSchema_;
-}
-
-const Descriptor& TProtobufRawMultiOutputSpec::GetDescriptor(ui32 index) const {
- Y_ENSURE(index < Descriptors_.size(), "invalid output index");
-
- return *Descriptors_[index];
-}
-
-void TProtobufRawMultiOutputSpec::SetFactory(ui32 index, MessageFactory* factory) {
- Y_ENSURE(index < Factories_.size(), "invalid output index");
-
- Factories_[index] = factory;
-}
-
-MessageFactory* TProtobufRawMultiOutputSpec::GetFactory(ui32 index) const {
- Y_ENSURE(index < Factories_.size(), "invalid output index");
-
- return Factories_[index];
-}
-
-void TProtobufRawMultiOutputSpec::SetArena(ui32 index, Arena* arena) {
- Y_ENSURE(index < Arenas_.size(), "invalid output index");
-
- Arenas_[index] = arena;
-}
-
-Arena* TProtobufRawMultiOutputSpec::GetArena(ui32 index) const {
- Y_ENSURE(index < Arenas_.size(), "invalid output index");
-
- return Arenas_[index];
-}
-
-ui32 TProtobufRawMultiOutputSpec::GetOutputsNumber() const {
- return static_cast<ui32>(Descriptors_.size());
-}
-
-const TProtoSchemaOptions& TProtobufRawMultiOutputSpec::GetSchemaOptions() const {
- return SchemaOptions_;
-}
-
-namespace {
- struct TFieldMapping {
- TString Name;
- const FieldDescriptor* Field;
- TVector<TFieldMapping> NestedFields;
- };
-
- /**
- * Fills a tree of field mappings from the given yql struct type to protobuf message.
- *
- * @param fromType source yql type.
- * @param toType target protobuf message type.
- * @param mappings destination vector will be filled with field descriptors. Order of descriptors will match
- * the order of field names.
- */
- void FillFieldMappings(
- const TStructType* fromType,
- const Descriptor& toType,
- TVector<TFieldMapping>& mappings,
- const TMaybe<TString>& timestampColumn,
- bool listIsOptional,
- const THashMap<TString, TString>& fieldRenames
- ) {
- THashMap<TString, TString> inverseFieldRenames;
-
- for (const auto& [source, target]: fieldRenames) {
- auto [iterator, emplaced] = inverseFieldRenames.emplace(target, source);
- Y_ENSURE(emplaced, "Duplicate rename field found: " << source << " -> " << target);
- }
-
- mappings.resize(fromType->GetMembersCount());
- for (ui32 i = 0; i < fromType->GetMembersCount(); ++i) {
- TString fieldName(fromType->GetMemberName(i));
- if (auto fieldRenamePtr = inverseFieldRenames.FindPtr(fieldName)) {
- fieldName = *fieldRenamePtr;
- }
-
- mappings[i].Name = fieldName;
- mappings[i].Field = toType.FindFieldByName(fieldName);
- YQL_ENSURE(
- mappings[i].Field || timestampColumn && *timestampColumn == fieldName,
- "Missing field: " << fieldName);
-
- const auto* fieldType = fromType->GetMemberType(i);
- if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::List) {
- const auto* listType = static_cast<const NKikimr::NMiniKQL::TListType*>(fieldType);
- fieldType = listType->GetItemType();
- } else if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Optional) {
- const auto* optionalType = static_cast<const NKikimr::NMiniKQL::TOptionalType*>(fieldType);
- fieldType = optionalType->GetItemType();
-
- if (listIsOptional) {
- if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::List) {
- const auto* listType = static_cast<const NKikimr::NMiniKQL::TListType*>(fieldType);
- fieldType = listType->GetItemType();
- }
- }
- }
- YQL_ENSURE(fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Struct ||
- fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Data,
- "unsupported field kind [" << fieldType->GetKindAsStr() << "], field [" << fieldName << "]");
- if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Struct) {
- FillFieldMappings(static_cast<const NKikimr::NMiniKQL::TStructType*>(fieldType),
- *mappings[i].Field->message_type(),
- mappings[i].NestedFields, Nothing(), listIsOptional, {});
- }
- }
- }
-
- /**
- * Extract field values from the given protobuf message into an array of unboxed values.
- *
- * @param factory to create nested unboxed values.
- * @param source source protobuf message.
- * @param destination destination array of unboxed values. Each element in the array corresponds to a field
- * in the protobuf message.
- * @param mappings vector of protobuf field descriptors which denotes relation between fields of the
- * source message and elements of the destination array.
- * @param scratch temporary string which will be used during conversion.
- */
- void FillInputValue(
- const THolderFactory& factory,
- const Message* source,
- TUnboxedValue* destination,
- const TVector<TFieldMapping>& mappings,
- const TMaybe<TString>& timestampColumn,
- ITimeProvider* timeProvider,
- EEnumPolicy enumPolicy
- ) {
- TString scratch;
- auto reflection = source->GetReflection();
- for (ui32 i = 0; i < mappings.size(); ++i) {
- auto mapping = mappings[i];
- if (!mapping.Field) {
- YQL_ENSURE(timestampColumn && mapping.Name == *timestampColumn);
- destination[i] = TUnboxedValuePod(timeProvider->Now().MicroSeconds());
- continue;
- }
-
- const auto type = mapping.Field->type();
- if (mapping.Field->label() == FieldDescriptor::LABEL_REPEATED) {
- const auto size = static_cast<ui32>(reflection->FieldSize(*source, mapping.Field));
- if (size == 0) {
- destination[i] = factory.GetEmptyContainerLazy();
- } else {
- TUnboxedValue* inplace = nullptr;
- destination[i] = factory.CreateDirectArrayHolder(size, inplace);
- for (ui32 j = 0; j < size; ++j) {
- switch (type) {
- case FieldDescriptor::TYPE_DOUBLE:
- inplace[j] = TUnboxedValuePod(reflection->GetRepeatedDouble(*source, mapping.Field, j));
- break;
-
- case FieldDescriptor::TYPE_FLOAT:
- inplace[j] = TUnboxedValuePod(reflection->GetRepeatedFloat(*source, mapping.Field, j));
- break;
-
- case FieldDescriptor::TYPE_INT64:
- case FieldDescriptor::TYPE_SFIXED64:
- case FieldDescriptor::TYPE_SINT64:
- inplace[j] = TUnboxedValuePod(reflection->GetRepeatedInt64(*source, mapping.Field, j));
- break;
-
- case FieldDescriptor::TYPE_ENUM:
- switch (EnumFormatType(*mapping.Field, enumPolicy)) {
- case EEnumFormatType::Int32:
- inplace[j] = TUnboxedValuePod(reflection->GetRepeatedEnumValue(*source, mapping.Field, j));
- break;
- case EEnumFormatType::String:
- inplace[j] = MakeString(reflection->GetRepeatedEnum(*source, mapping.Field, j)->name());
- break;
- }
- break;
-
- case FieldDescriptor::TYPE_UINT64:
- case FieldDescriptor::TYPE_FIXED64:
- inplace[j] = TUnboxedValuePod(reflection->GetRepeatedUInt64(*source, mapping.Field, j));
- break;
-
- case FieldDescriptor::TYPE_INT32:
- case FieldDescriptor::TYPE_SFIXED32:
- case FieldDescriptor::TYPE_SINT32:
- inplace[j] = TUnboxedValuePod(reflection->GetRepeatedInt32(*source, mapping.Field, j));
- break;
-
- case FieldDescriptor::TYPE_UINT32:
- case FieldDescriptor::TYPE_FIXED32:
- inplace[j] = TUnboxedValuePod(reflection->GetRepeatedUInt32(*source, mapping.Field, j));
- break;
-
- case FieldDescriptor::TYPE_BOOL:
- inplace[j] = TUnboxedValuePod(reflection->GetRepeatedBool(*source, mapping.Field, j));
- break;
-
- case FieldDescriptor::TYPE_STRING:
- inplace[j] = MakeString(reflection->GetRepeatedStringReference(*source, mapping.Field, j, &scratch));
- break;
-
- case FieldDescriptor::TYPE_BYTES:
- inplace[j] = MakeString(reflection->GetRepeatedStringReference(*source, mapping.Field, j, &scratch));
- break;
-
- case FieldDescriptor::TYPE_MESSAGE:
- {
- const Message& nestedMessage = reflection->GetRepeatedMessage(*source, mapping.Field, j);
- TUnboxedValue* nestedValues = nullptr;
- inplace[j] = factory.CreateDirectArrayHolder(static_cast<ui32>(mapping.NestedFields.size()),
- nestedValues);
- FillInputValue(factory, &nestedMessage, nestedValues, mapping.NestedFields, Nothing(), timeProvider, enumPolicy);
- }
- break;
-
- default:
- ythrow yexception() << "Unsupported protobuf type: " << mapping.Field->type_name() << ", field: " << mapping.Field->name();
- }
- }
- }
- } else {
- if (!reflection->HasField(*source, mapping.Field)) {
- continue;
- }
-
- switch (type) {
- case FieldDescriptor::TYPE_DOUBLE:
- destination[i] = TUnboxedValuePod(reflection->GetDouble(*source, mapping.Field));
- break;
-
- case FieldDescriptor::TYPE_FLOAT:
- destination[i] = TUnboxedValuePod(reflection->GetFloat(*source, mapping.Field));
- break;
-
- case FieldDescriptor::TYPE_INT64:
- case FieldDescriptor::TYPE_SFIXED64:
- case FieldDescriptor::TYPE_SINT64:
- destination[i] = TUnboxedValuePod(reflection->GetInt64(*source, mapping.Field));
- break;
-
- case FieldDescriptor::TYPE_ENUM:
- switch (EnumFormatType(*mapping.Field, enumPolicy)) {
- case EEnumFormatType::Int32:
- destination[i] = TUnboxedValuePod(reflection->GetEnumValue(*source, mapping.Field));
- break;
- case EEnumFormatType::String:
- destination[i] = MakeString(reflection->GetEnum(*source, mapping.Field)->name());
- break;
- }
- break;
-
- case FieldDescriptor::TYPE_UINT64:
- case FieldDescriptor::TYPE_FIXED64:
- destination[i] = TUnboxedValuePod(reflection->GetUInt64(*source, mapping.Field));
- break;
-
- case FieldDescriptor::TYPE_INT32:
- case FieldDescriptor::TYPE_SFIXED32:
- case FieldDescriptor::TYPE_SINT32:
- destination[i] = TUnboxedValuePod(reflection->GetInt32(*source, mapping.Field));
- break;
-
- case FieldDescriptor::TYPE_UINT32:
- case FieldDescriptor::TYPE_FIXED32:
- destination[i] = TUnboxedValuePod(reflection->GetUInt32(*source, mapping.Field));
- break;
-
- case FieldDescriptor::TYPE_BOOL:
- destination[i] = TUnboxedValuePod(reflection->GetBool(*source, mapping.Field));
- break;
-
- case FieldDescriptor::TYPE_STRING:
- destination[i] = MakeString(reflection->GetStringReference(*source, mapping.Field, &scratch));
- break;
-
- case FieldDescriptor::TYPE_BYTES:
- destination[i] = MakeString(reflection->GetStringReference(*source, mapping.Field, &scratch));
- break;
- case FieldDescriptor::TYPE_MESSAGE:
- {
- const Message& nestedMessage = reflection->GetMessage(*source, mapping.Field);
- TUnboxedValue* nestedValues = nullptr;
- destination[i] = factory.CreateDirectArrayHolder(static_cast<ui32>(mapping.NestedFields.size()),
- nestedValues);
- FillInputValue(factory, &nestedMessage, nestedValues, mapping.NestedFields, Nothing(), timeProvider, enumPolicy);
- }
- break;
-
- default:
- ythrow yexception() << "Unsupported protobuf type: " << mapping.Field->type_name()
- << ", field: " << mapping.Field->name();
- }
- }
- }
- }
-
-
- /**
- * Convert unboxed value to protobuf.
- *
- * @param source unboxed value to extract data from. Type of the value should be struct. It's UB to pass
- * a non-struct value here.
- * @param destination destination message. Data in this message will be overwritten
- * by data from unboxed value.
- * @param mappings vector of protobuf field descriptors which denotes relation between struct fields
- * and message fields. For any i-th element of this vector, type of the i-th element of
- * the unboxed structure must match type of the field pointed by descriptor. Size of this
- * vector should match the number of fields in the struct.
- */
- void FillOutputMessage(
- const TUnboxedValue& source,
- Message* destination,
- const TVector<TFieldMapping>& mappings,
- EEnumPolicy enumPolicy
- ) {
- auto reflection = destination->GetReflection();
- for (ui32 i = 0; i < mappings.size(); ++i) {
- const auto& mapping = mappings[i];
- const auto& cell = source.GetElement(i);
- if (!cell) {
- reflection->ClearField(destination, mapping.Field);
- continue;
- }
- const auto type = mapping.Field->type();
- if (mapping.Field->label() == FieldDescriptor::LABEL_REPEATED) {
- const auto iter = cell.GetListIterator();
- reflection->ClearField(destination, mapping.Field);
- for (TUnboxedValue item; iter.Next(item);) {
- switch (mapping.Field->type()) {
- case FieldDescriptor::TYPE_DOUBLE:
- reflection->AddDouble(destination, mapping.Field, item.Get<double>());
- break;
-
- case FieldDescriptor::TYPE_FLOAT:
- reflection->AddFloat(destination, mapping.Field, item.Get<float>());
- break;
-
- case FieldDescriptor::TYPE_INT64:
- case FieldDescriptor::TYPE_SFIXED64:
- case FieldDescriptor::TYPE_SINT64:
- reflection->AddInt64(destination, mapping.Field, item.Get<i64>());
- break;
-
- case FieldDescriptor::TYPE_ENUM: {
- switch (EnumFormatType(*mapping.Field, enumPolicy)) {
- case EEnumFormatType::Int32:
- reflection->AddEnumValue(destination, mapping.Field, item.Get<i32>());
- break;
- case EEnumFormatType::String: {
- auto enumValueDescriptor = mapping.Field->enum_type()->FindValueByName(TString(item.AsStringRef()));
- if (!enumValueDescriptor) {
- enumValueDescriptor = mapping.Field->default_value_enum();
- }
- reflection->AddEnum(destination, mapping.Field, enumValueDescriptor);
- break;
- }
- }
- break;
- }
-
- case FieldDescriptor::TYPE_UINT64:
- case FieldDescriptor::TYPE_FIXED64:
- reflection->AddUInt64(destination, mapping.Field, item.Get<ui64>());
- break;
-
- case FieldDescriptor::TYPE_INT32:
- case FieldDescriptor::TYPE_SFIXED32:
- case FieldDescriptor::TYPE_SINT32:
- reflection->AddInt32(destination, mapping.Field, item.Get<i32>());
- break;
-
- case FieldDescriptor::TYPE_UINT32:
- case FieldDescriptor::TYPE_FIXED32:
- reflection->AddUInt32(destination, mapping.Field, item.Get<ui32>());
- break;
-
- case FieldDescriptor::TYPE_BOOL:
- reflection->AddBool(destination, mapping.Field, item.Get<bool>());
- break;
-
- case FieldDescriptor::TYPE_STRING:
- reflection->AddString(destination, mapping.Field, TString(item.AsStringRef()));
- break;
-
- case FieldDescriptor::TYPE_BYTES:
- reflection->AddString(destination, mapping.Field, TString(item.AsStringRef()));
- break;
-
- case FieldDescriptor::TYPE_MESSAGE:
- {
- auto* nestedMessage = reflection->AddMessage(destination, mapping.Field);
- FillOutputMessage(item, nestedMessage, mapping.NestedFields, enumPolicy);
- }
- break;
-
- default:
- ythrow yexception() << "Unsupported protobuf type: "
- << mapping.Field->type_name() << ", field: " << mapping.Field->name();
- }
- }
- } else {
- switch (type) {
- case FieldDescriptor::TYPE_DOUBLE:
- reflection->SetDouble(destination, mapping.Field, cell.Get<double>());
- break;
-
- case FieldDescriptor::TYPE_FLOAT:
- reflection->SetFloat(destination, mapping.Field, cell.Get<float>());
- break;
-
- case FieldDescriptor::TYPE_INT64:
- case FieldDescriptor::TYPE_SFIXED64:
- case FieldDescriptor::TYPE_SINT64:
- reflection->SetInt64(destination, mapping.Field, cell.Get<i64>());
- break;
-
- case FieldDescriptor::TYPE_ENUM: {
- switch (EnumFormatType(*mapping.Field, enumPolicy)) {
- case EEnumFormatType::Int32:
- reflection->SetEnumValue(destination, mapping.Field, cell.Get<i32>());
- break;
- case EEnumFormatType::String: {
- auto enumValueDescriptor = mapping.Field->enum_type()->FindValueByName(TString(cell.AsStringRef()));
- if (!enumValueDescriptor) {
- enumValueDescriptor = mapping.Field->default_value_enum();
- }
- reflection->SetEnum(destination, mapping.Field, enumValueDescriptor);
- break;
- }
- }
- break;
- }
-
- case FieldDescriptor::TYPE_UINT64:
- case FieldDescriptor::TYPE_FIXED64:
- reflection->SetUInt64(destination, mapping.Field, cell.Get<ui64>());
- break;
-
- case FieldDescriptor::TYPE_INT32:
- case FieldDescriptor::TYPE_SFIXED32:
- case FieldDescriptor::TYPE_SINT32:
- reflection->SetInt32(destination, mapping.Field, cell.Get<i32>());
- break;
-
- case FieldDescriptor::TYPE_UINT32:
- case FieldDescriptor::TYPE_FIXED32:
- reflection->SetUInt32(destination, mapping.Field, cell.Get<ui32>());
- break;
-
- case FieldDescriptor::TYPE_BOOL:
- reflection->SetBool(destination, mapping.Field, cell.Get<bool>());
- break;
-
- case FieldDescriptor::TYPE_STRING:
- reflection->SetString(destination, mapping.Field, TString(cell.AsStringRef()));
- break;
-
- case FieldDescriptor::TYPE_BYTES:
- reflection->SetString(destination, mapping.Field, TString(cell.AsStringRef()));
- break;
-
- case FieldDescriptor::TYPE_MESSAGE:
- {
- auto* nestedMessage = reflection->MutableMessage(destination, mapping.Field);
- FillOutputMessage(cell, nestedMessage, mapping.NestedFields, enumPolicy);
- }
- break;
-
- default:
- ythrow yexception() << "Unsupported protobuf type: "
- << mapping.Field->type_name() << ", field: " << mapping.Field->name();
- }
- }
- }
- }
-
- /**
- * Converts input messages to unboxed values.
- */
- class TInputConverter {
- protected:
- IWorker* Worker_;
- TVector<TFieldMapping> Mappings_;
- TPlainContainerCache Cache_;
- TMaybe<TString> TimestampColumn_;
- EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32;
-
- public:
- explicit TInputConverter(const TProtobufRawInputSpec& inputSpec, IWorker* worker)
- : Worker_(worker)
- , TimestampColumn_(inputSpec.GetTimestampColumn())
- , EnumPolicy_(inputSpec.GetSchemaOptions().EnumPolicy)
- {
- FillFieldMappings(
- Worker_->GetInputType(), inputSpec.GetDescriptor(),
- Mappings_, TimestampColumn_,
- inputSpec.GetSchemaOptions().ListIsOptional,
- inputSpec.GetSchemaOptions().FieldRenames
- );
- }
-
- public:
- void DoConvert(const Message* message, TUnboxedValue& result) {
- auto& holderFactory = Worker_->GetGraph().GetHolderFactory();
- TUnboxedValue* items = nullptr;
- result = Cache_.NewArray(holderFactory, static_cast<ui32>(Mappings_.size()), items);
- FillInputValue(holderFactory, message, items, Mappings_, TimestampColumn_, Worker_->GetTimeProvider(), EnumPolicy_);
- }
-
- void ClearCache() {
- Cache_.Clear();
- }
- };
-
- template <typename TOutputSpec>
- using OutputItemType = typename TOutputSpecTraits<TOutputSpec>::TOutputItemType;
-
- template <typename TOutputSpec>
- class TOutputConverter;
-
- /**
- * Converts unboxed values to output messages (single-output program case).
- */
- template <>
- class TOutputConverter<TProtobufRawOutputSpec> {
- protected:
- IWorker* Worker_;
- TVector<TFieldMapping> OutputColumns_;
- TProtoHolder<Message> Message_;
- EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32;
-
- public:
- explicit TOutputConverter(const TProtobufRawOutputSpec& outputSpec, IWorker* worker)
- : Worker_(worker)
- , EnumPolicy_(outputSpec.GetSchemaOptions().EnumPolicy)
- {
- if (!Worker_->GetOutputType()->IsStruct()) {
- ythrow yexception() << "protobuf output spec does not support multiple outputs";
- }
-
- FillFieldMappings(
- static_cast<const NKikimr::NMiniKQL::TStructType*>(Worker_->GetOutputType()),
- outputSpec.GetDescriptor(),
- OutputColumns_,
- Nothing(),
- outputSpec.GetSchemaOptions().ListIsOptional,
- outputSpec.GetSchemaOptions().FieldRenames
- );
-
- auto* factory = outputSpec.GetFactory();
-
- if (!factory) {
- factory = MessageFactory::generated_factory();
- }
-
- Message_.Reset(factory->GetPrototype(&outputSpec.GetDescriptor())->New(outputSpec.GetArena()));
- }
-
- OutputItemType<TProtobufRawOutputSpec> DoConvert(TUnboxedValue value) {
- FillOutputMessage(value, Message_.Get(), OutputColumns_, EnumPolicy_);
- return Message_.Get();
- }
- };
-
- /*
- * Converts unboxed values to output type (multi-output programs case).
- */
- template <>
- class TOutputConverter<TProtobufRawMultiOutputSpec> {
- protected:
- IWorker* Worker_;
- TVector<TVector<TFieldMapping>> OutputColumns_;
- TVector<TProtoHolder<Message>> Messages_;
- EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32;
-
- public:
- explicit TOutputConverter(const TProtobufRawMultiOutputSpec& outputSpec, IWorker* worker)
- : Worker_(worker)
- , EnumPolicy_(outputSpec.GetSchemaOptions().EnumPolicy)
- {
- const auto* outputType = Worker_->GetOutputType();
- Y_ENSURE(outputType->IsVariant(), "protobuf multi-output spec requires multi-output program");
- const auto* variantType = static_cast<const NKikimr::NMiniKQL::TVariantType*>(outputType);
- Y_ENSURE(
- variantType->GetUnderlyingType()->IsTuple(),
- "protobuf multi-output spec requires variant over tuple as program output type"
- );
- Y_ENSURE(
- outputSpec.GetOutputsNumber() == variantType->GetAlternativesCount(),
- "number of outputs provided by spec does not match number of variant alternatives"
- );
-
- auto defaultFactory = MessageFactory::generated_factory();
-
- for (ui32 i = 0; i < variantType->GetAlternativesCount(); ++i) {
- const auto* type = variantType->GetAlternativeType(i);
- Y_ASSERT(type->IsStruct());
- Y_ASSERT(OutputColumns_.size() == i && Messages_.size() == i);
-
- OutputColumns_.push_back({});
-
- FillFieldMappings(
- static_cast<const NKikimr::NMiniKQL::TStructType*>(type),
- outputSpec.GetDescriptor(i),
- OutputColumns_.back(),
- Nothing(),
- outputSpec.GetSchemaOptions().ListIsOptional,
- {}
- );
-
- auto factory = outputSpec.GetFactory(i);
- if (!factory) {
- factory = defaultFactory;
- }
-
- Messages_.push_back(TProtoHolder<Message>(
- factory->GetPrototype(&outputSpec.GetDescriptor(i))->New(outputSpec.GetArena(i))
- ));
- }
- }
-
- OutputItemType<TProtobufRawMultiOutputSpec> DoConvert(TUnboxedValue value) {
- auto index = value.GetVariantIndex();
- auto msgPtr = Messages_[index].Get();
- FillOutputMessage(value.GetVariantItem(), msgPtr, OutputColumns_[index], EnumPolicy_);
- return {index, msgPtr};
- }
- };
-
- /**
- * List (or, better, stream) of unboxed values. Used as an input value in pull workers.
- */
- class TProtoListValue final: public TCustomListValue {
- private:
- mutable bool HasIterator_ = false;
- THolder<IStream<Message*>> Underlying_;
- TInputConverter Converter_;
- IWorker* Worker_;
- TScopedAlloc& ScopedAlloc_;
-
- public:
- TProtoListValue(
- TMemoryUsageInfo* memInfo,
- const TProtobufRawInputSpec& inputSpec,
- THolder<IStream<Message*>> underlying,
- IWorker* worker
- )
- : TCustomListValue(memInfo)
- , Underlying_(std::move(underlying))
- , Converter_(inputSpec, worker)
- , Worker_(worker)
- , ScopedAlloc_(Worker_->GetScopedAlloc())
- {
- }
-
- ~TProtoListValue() override {
- {
- // This list value stored in the worker's computation graph and destroyed upon the computation
- // graph's destruction. This brings us to an interesting situation: scoped alloc is acquired,
- // worker and computation graph are half-way destroyed, and now it's our turn to die. The problem is,
- // the underlying stream may own another worker. This happens when chaining programs. Now, to destroy
- // that worker correctly, we need to release our scoped alloc (because that worker has its own
- // computation graph and scoped alloc).
- // By the way, note that we shouldn't interact with the worker here because worker is in the middle of
- // its own destruction. So we're using our own reference to the scoped alloc. That reference is alive
- // because scoped alloc destroyed after computation graph.
- auto unguard = Unguard(ScopedAlloc_);
- Underlying_.Destroy();
- }
- }
-
- public:
- TUnboxedValue GetListIterator() const override {
- YQL_ENSURE(!HasIterator_, "Only one pass over input is supported");
- HasIterator_ = true;
- return TUnboxedValuePod(const_cast<TProtoListValue*>(this));
- }
-
- bool Next(TUnboxedValue& result) override {
- const Message* message;
- {
- auto unguard = Unguard(ScopedAlloc_);
- message = Underlying_->Fetch();
- }
-
- if (!message) {
- return false;
- }
-
- Converter_.DoConvert(message, result);
-
- return true;
- }
-
- EFetchStatus Fetch(TUnboxedValue& result) override {
- if (Next(result)) {
- return EFetchStatus::Ok;
- } else {
- return EFetchStatus::Finish;
- }
- }
- };
-
- /**
- * Consumer which converts messages to unboxed values and relays them to the worker. Used as a return value
- * of the push processor's Process function.
- */
- class TProtoConsumerImpl final: public IConsumer<Message*> {
- private:
- TWorkerHolder<IPushStreamWorker> WorkerHolder_;
- TInputConverter Converter_;
-
- public:
- explicit TProtoConsumerImpl(
- const TProtobufRawInputSpec& inputSpec,
- TWorkerHolder<IPushStreamWorker> worker
- )
- : WorkerHolder_(std::move(worker))
- , Converter_(inputSpec, WorkerHolder_.Get())
- {
- }
-
- ~TProtoConsumerImpl() override {
- with_lock(WorkerHolder_->GetScopedAlloc()) {
- Converter_.ClearCache();
- }
- }
-
- public:
- void OnObject(Message* message) override {
- TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator());
-
- with_lock(WorkerHolder_->GetScopedAlloc()) {
- TUnboxedValue result;
- Converter_.DoConvert(message, result);
- WorkerHolder_->Push(std::move(result));
- }
- }
-
- void OnFinish() override {
- TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator());
-
- with_lock(WorkerHolder_->GetScopedAlloc()) {
- WorkerHolder_->OnFinish();
- }
- }
- };
-
- /**
- * Protobuf input stream for unboxed value streams.
- */
- template <typename TOutputSpec>
- class TRawProtoStreamImpl final: public IStream<OutputItemType<TOutputSpec>> {
- protected:
- TWorkerHolder<IPullStreamWorker> WorkerHolder_;
- TOutputConverter<TOutputSpec> Converter_;
-
- public:
- explicit TRawProtoStreamImpl(const TOutputSpec& outputSpec, TWorkerHolder<IPullStreamWorker> worker)
- : WorkerHolder_(std::move(worker))
- , Converter_(outputSpec, WorkerHolder_.Get())
- {
- }
-
- public:
- OutputItemType<TOutputSpec> Fetch() override {
- TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator());
-
- with_lock(WorkerHolder_->GetScopedAlloc()) {
- TUnboxedValue value;
-
- auto status = WorkerHolder_->GetOutput().Fetch(value);
-
- YQL_ENSURE(status != EFetchStatus::Yield, "Yield is not supported in pull mode");
-
- if (status == EFetchStatus::Finish) {
- return TOutputSpecTraits<TOutputSpec>::StreamSentinel;
- }
-
- return Converter_.DoConvert(value);
- }
- }
- };
-
- /**
- * Protobuf input stream for unboxed value lists.
- */
- template <typename TOutputSpec>
- class TRawProtoListImpl final: public IStream<OutputItemType<TOutputSpec>> {
- protected:
- TWorkerHolder<IPullListWorker> WorkerHolder_;
- TOutputConverter<TOutputSpec> Converter_;
-
- public:
- explicit TRawProtoListImpl(const TOutputSpec& outputSpec, TWorkerHolder<IPullListWorker> worker)
- : WorkerHolder_(std::move(worker))
- , Converter_(outputSpec, WorkerHolder_.Get())
- {
- }
-
- public:
- OutputItemType<TOutputSpec> Fetch() override {
- TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator());
-
- with_lock(WorkerHolder_->GetScopedAlloc()) {
- TUnboxedValue value;
-
- if (!WorkerHolder_->GetOutputIterator().Next(value)) {
- return TOutputSpecTraits<TOutputSpec>::StreamSentinel;
- }
-
- return Converter_.DoConvert(value);
- }
- }
- };
-
- /**
- * Push relay used to convert generated unboxed value to a message and push it to the user's consumer.
- */
- template <typename TOutputSpec>
- class TPushRelayImpl: public IConsumer<const TUnboxedValue*> {
- private:
- THolder<IConsumer<OutputItemType<TOutputSpec>>> Underlying_;
- TOutputConverter<TOutputSpec> Converter_;
- IWorker* Worker_;
-
- public:
- TPushRelayImpl(
- const TOutputSpec& outputSpec,
- IPushStreamWorker* worker,
- THolder<IConsumer<OutputItemType<TOutputSpec>>> underlying
- )
- : Underlying_(std::move(underlying))
- , Converter_(outputSpec, worker)
- , Worker_(worker)
- {
- }
-
- // If you've read a comment in the TProtoListValue's destructor, you may be wondering why don't we do the
- // same trick here. Well, that's because in push mode, consumer is destroyed before acquiring scoped alloc and
- // destroying computation graph.
-
- public:
- void OnObject(const TUnboxedValue* value) override {
- OutputItemType<TOutputSpec> message = Converter_.DoConvert(*value);
- auto unguard = Unguard(Worker_->GetScopedAlloc());
- Underlying_->OnObject(message);
- }
-
- void OnFinish() override {
- auto unguard = Unguard(Worker_->GetScopedAlloc());
- Underlying_->OnFinish();
- }
- };
-}
-
-using ConsumerType = TInputSpecTraits<TProtobufRawInputSpec>::TConsumerType;
-
-void TInputSpecTraits<TProtobufRawInputSpec>::PreparePullStreamWorker(
- const TProtobufRawInputSpec& inputSpec,
- IPullStreamWorker* worker,
- THolder<IStream<Message*>> stream
-) {
- with_lock(worker->GetScopedAlloc()) {
- worker->SetInput(
- worker->GetGraph().GetHolderFactory().Create<TProtoListValue>(inputSpec, std::move(stream), worker), 0);
- }
-}
-
-void TInputSpecTraits<TProtobufRawInputSpec>::PreparePullListWorker(
- const TProtobufRawInputSpec& inputSpec,
- IPullListWorker* worker,
- THolder<IStream<Message*>> stream
-) {
- with_lock(worker->GetScopedAlloc()) {
- worker->SetInput(
- worker->GetGraph().GetHolderFactory().Create<TProtoListValue>(inputSpec, std::move(stream), worker), 0);
- }
-}
-
-ConsumerType TInputSpecTraits<TProtobufRawInputSpec>::MakeConsumer(
- const TProtobufRawInputSpec& inputSpec,
- TWorkerHolder<IPushStreamWorker> worker
-) {
- return MakeHolder<TProtoConsumerImpl>(inputSpec, std::move(worker));
-}
-
-template <typename TOutputSpec>
-using PullStreamReturnType = typename TOutputSpecTraits<TOutputSpec>::TPullStreamReturnType;
-template <typename TOutputSpec>
-using PullListReturnType = typename TOutputSpecTraits<TOutputSpec>::TPullListReturnType;
-
-PullStreamReturnType<TProtobufRawOutputSpec> TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullStreamWorkerToOutputType(
- const TProtobufRawOutputSpec& outputSpec,
- TWorkerHolder<IPullStreamWorker> worker
-) {
- return MakeHolder<TRawProtoStreamImpl<TProtobufRawOutputSpec>>(outputSpec, std::move(worker));
-}
-
-PullListReturnType<TProtobufRawOutputSpec> TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullListWorkerToOutputType(
- const TProtobufRawOutputSpec& outputSpec,
- TWorkerHolder<IPullListWorker> worker
-) {
- return MakeHolder<TRawProtoListImpl<TProtobufRawOutputSpec>>(outputSpec, std::move(worker));
-}
-
-void TOutputSpecTraits<TProtobufRawOutputSpec>::SetConsumerToWorker(
- const TProtobufRawOutputSpec& outputSpec,
- IPushStreamWorker* worker,
- THolder<IConsumer<TOutputItemType>> consumer
-) {
- worker->SetConsumer(MakeHolder<TPushRelayImpl<TProtobufRawOutputSpec>>(outputSpec, worker, std::move(consumer)));
-}
-
-PullStreamReturnType<TProtobufRawMultiOutputSpec> TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullStreamWorkerToOutputType(
- const TProtobufRawMultiOutputSpec& outputSpec,
- TWorkerHolder<IPullStreamWorker> worker
-) {
- return MakeHolder<TRawProtoStreamImpl<TProtobufRawMultiOutputSpec>>(outputSpec, std::move(worker));
-}
-
-PullListReturnType<TProtobufRawMultiOutputSpec> TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullListWorkerToOutputType(
- const TProtobufRawMultiOutputSpec& outputSpec,
- TWorkerHolder<IPullListWorker> worker
-) {
- return MakeHolder<TRawProtoListImpl<TProtobufRawMultiOutputSpec>>(outputSpec, std::move(worker));
-}
-
-void TOutputSpecTraits<TProtobufRawMultiOutputSpec>::SetConsumerToWorker(
- const TProtobufRawMultiOutputSpec& outputSpec,
- IPushStreamWorker* worker,
- THolder<IConsumer<TOutputItemType>> consumer
-) {
- worker->SetConsumer(MakeHolder<TPushRelayImpl<TProtobufRawMultiOutputSpec>>(outputSpec, worker, std::move(consumer)));
-}
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.h b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.h
deleted file mode 100644
index 2a8fd196488..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.h
+++ /dev/null
@@ -1,257 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/common/interface.h>
-#include <ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.h>
-
-#include <google/protobuf/message.h>
-
-#include <util/generic/maybe.h>
-
-namespace NYql {
- namespace NPureCalc {
- /**
- * Processing mode for working with raw protobuf message inputs.
- *
- * In this mode purecalc accept pointers to abstract protobuf messages and processes them using the reflection
- * mechanism. All passed messages should have the same descriptor (the one you pass to the constructor
- * of the input spec).
- *
- * All working modes are implemented. In pull stream and pull list modes a program would accept a single object
- * stream of const protobuf messages. In push mode, a program will return a consumer of const protobuf messages.
- *
- * The program synopsis follows:
- *
- * @code
- * ... TPullStreamProgram::Apply(IStream<google::protobuf::Message*>);
- * ... TPullListProgram::Apply(IStream<google::protobuf::Message*>);
- * TConsumer<google::protobuf::Message*> TPushStreamProgram::Apply(...);
- * @endcode
- */
- class TProtobufRawInputSpec: public TInputSpecBase {
- private:
- const google::protobuf::Descriptor& Descriptor_;
- const TMaybe<TString> TimestampColumn_;
- const TProtoSchemaOptions SchemaOptions_;
- mutable TVector<NYT::TNode> SavedSchemas_;
-
- public:
- /**
- * Build input spec and associate the given message descriptor.
- */
- explicit TProtobufRawInputSpec(
- const google::protobuf::Descriptor& descriptor,
- const TMaybe<TString>& timestampColumn = Nothing(),
- const TProtoSchemaOptions& options = {}
- );
-
- public:
- const TVector<NYT::TNode>& GetSchemas() const override;
-
- /**
- * Get the descriptor associated with this spec.
- */
- const google::protobuf::Descriptor& GetDescriptor() const;
-
- const TMaybe<TString>& GetTimestampColumn() const;
-
- /*
- * Get options that customize input struct type building.
- */
- const TProtoSchemaOptions& GetSchemaOptions() const;
- };
-
- /**
- * Processing mode for working with raw protobuf message outputs.
- *
- * In this mode purecalc yields pointers to abstract protobuf messages. All generated messages share the same
- * descriptor so they can be safely converted into an appropriate message type.
- *
- * Note that one should not expect that the returned pointer will be valid forever; in can (and will) become
- * outdated once a new output is requested/pushed.
- *
- * All working modes are implemented. In pull stream and pull list modes a program will return an object
- * stream of non-const protobuf messages. In push mode, it will accept a single consumer of non-const
- * messages.
- *
- * The program synopsis follows:
- *
- * @code
- * IStream<google::protobuf::Message*> TPullStreamProgram::Apply(...);
- * IStream<google::protobuf::Message*> TPullListProgram::Apply(...);
- * ... TPushStreamProgram::Apply(TConsumer<google::protobuf::Message*>);
- * @endcode
- */
- class TProtobufRawOutputSpec: public TOutputSpecBase {
- private:
- const google::protobuf::Descriptor& Descriptor_;
- google::protobuf::MessageFactory* Factory_;
- TProtoSchemaOptions SchemaOptions_;
- google::protobuf::Arena* Arena_;
- mutable TMaybe<NYT::TNode> SavedSchema_;
-
- public:
- /**
- * Build output spec and associate the given message descriptor and maybe the given message factory.
- */
- explicit TProtobufRawOutputSpec(
- const google::protobuf::Descriptor& descriptor,
- google::protobuf::MessageFactory* = nullptr,
- const TProtoSchemaOptions& options = {},
- google::protobuf::Arena* arena = nullptr
- );
-
- public:
- const NYT::TNode& GetSchema() const override;
-
- /**
- * Get the descriptor associated with this spec.
- */
- const google::protobuf::Descriptor& GetDescriptor() const;
-
- /**
- * Set a new message factory which will be used to generate messages. Pass a null pointer to use the
- * default factory.
- */
- void SetFactory(google::protobuf::MessageFactory*);
-
- /**
- * Get the message factory which is currently associated with this spec.
- */
- google::protobuf::MessageFactory* GetFactory() const;
-
- /**
- * Set a new arena which will be used to generate messages. Pass a null pointer to create on the heap.
- */
- void SetArena(google::protobuf::Arena*);
-
- /**
- * Get the arena which is currently associated with this spec.
- */
- google::protobuf::Arena* GetArena() const;
-
- /**
- * Get options that customize output struct type building.
- */
- const TProtoSchemaOptions& GetSchemaOptions() const;
- };
-
- /**
- * Processing mode for working with raw protobuf messages and several outputs.
- *
- * The program synopsis follows:
- *
- * @code
- * IStream<std::pair<ui32, google::protobuf::Message*>> TPullStreamProgram::Apply(...);
- * IStream<std::pair<ui32, google::protobuf::Message*>> TPullListProgram::Apply(...);
- * ... TPushStreamProgram::Apply(TConsumer<std::pair<ui32, google::protobuf::Message*>>);
- * @endcode
- */
- class TProtobufRawMultiOutputSpec: public TOutputSpecBase {
- private:
- TVector<const google::protobuf::Descriptor*> Descriptors_;
- TVector<google::protobuf::MessageFactory*> Factories_;
- const TProtoSchemaOptions SchemaOptions_;
- TVector<google::protobuf::Arena*> Arenas_;
- mutable NYT::TNode SavedSchema_;
-
- public:
- TProtobufRawMultiOutputSpec(
- TVector<const google::protobuf::Descriptor*>,
- TMaybe<TVector<google::protobuf::MessageFactory*>> = {},
- const TProtoSchemaOptions& options = {},
- TMaybe<TVector<google::protobuf::Arena*>> arenas = {}
- );
-
- public:
- const NYT::TNode& GetSchema() const override;
-
- /**
- * Get the descriptor associated with given output.
- */
- const google::protobuf::Descriptor& GetDescriptor(ui32) const;
-
- /**
- * Set a new message factory for given output. It will be used to generate messages for this output.
- */
- void SetFactory(ui32, google::protobuf::MessageFactory*);
-
- /**
- * Get the message factory which is currently associated with given output.
- */
- google::protobuf::MessageFactory* GetFactory(ui32) const;
-
- /**
- * Set a new arena for given output. It will be used to generate messages for this output.
- */
- void SetArena(ui32, google::protobuf::Arena*);
-
- /**
- * Get the arena which is currently associated with given output.
- */
- google::protobuf::Arena* GetArena(ui32) const;
-
- /**
- * Get number of outputs for this spec.
- */
- ui32 GetOutputsNumber() const;
-
- /**
- * Get options that customize output struct type building.
- */
- const TProtoSchemaOptions& GetSchemaOptions() const;
- };
-
- template <>
- struct TInputSpecTraits<TProtobufRawInputSpec> {
- static const constexpr bool IsPartial = false;
-
- static const constexpr bool SupportPullStreamMode = true;
- static const constexpr bool SupportPullListMode = true;
- static const constexpr bool SupportPushStreamMode = true;
-
- using TConsumerType = THolder<IConsumer<google::protobuf::Message*>>;
-
- static void PreparePullStreamWorker(const TProtobufRawInputSpec&, IPullStreamWorker*, THolder<IStream<google::protobuf::Message*>>);
- static void PreparePullListWorker(const TProtobufRawInputSpec&, IPullListWorker*, THolder<IStream<google::protobuf::Message*>>);
- static TConsumerType MakeConsumer(const TProtobufRawInputSpec&, TWorkerHolder<IPushStreamWorker>);
- };
-
- template <>
- struct TOutputSpecTraits<TProtobufRawOutputSpec> {
- static const constexpr bool IsPartial = false;
-
- static const constexpr bool SupportPullStreamMode = true;
- static const constexpr bool SupportPullListMode = true;
- static const constexpr bool SupportPushStreamMode = true;
-
- using TOutputItemType = google::protobuf::Message*;
- using TPullStreamReturnType = THolder<IStream<TOutputItemType>>;
- using TPullListReturnType = THolder<IStream<TOutputItemType>>;
-
- static const constexpr TOutputItemType StreamSentinel = nullptr;
-
- static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufRawOutputSpec&, TWorkerHolder<IPullStreamWorker>);
- static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufRawOutputSpec&, TWorkerHolder<IPullListWorker>);
- static void SetConsumerToWorker(const TProtobufRawOutputSpec&, IPushStreamWorker*, THolder<IConsumer<TOutputItemType>>);
- };
-
- template <>
- struct TOutputSpecTraits<TProtobufRawMultiOutputSpec> {
- static const constexpr bool IsPartial = false;
-
- static const constexpr bool SupportPullStreamMode = true;
- static const constexpr bool SupportPullListMode = true;
- static const constexpr bool SupportPushStreamMode = true;
-
- using TOutputItemType = std::pair<ui32, google::protobuf::Message*>;
- using TPullStreamReturnType = THolder<IStream<TOutputItemType>>;
- using TPullListReturnType = THolder<IStream<TOutputItemType>>;
-
- static const constexpr TOutputItemType StreamSentinel = {0, nullptr};
-
- static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufRawMultiOutputSpec&, TWorkerHolder<IPullStreamWorker>);
- static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufRawMultiOutputSpec&, TWorkerHolder<IPullListWorker>);
- static void SetConsumerToWorker(const TProtobufRawMultiOutputSpec&, IPushStreamWorker*, THolder<IConsumer<TOutputItemType>>);
- };
- }
-}
diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/ya.make b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/ya.make
deleted file mode 100644
index ad72bbf43ab..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/ya.make
+++ /dev/null
@@ -1,16 +0,0 @@
-LIBRARY()
-
-PEERDIR(
- ydb/library/yql/public/purecalc/common
- ydb/library/yql/public/purecalc/helpers/protobuf
-)
-
-SRCS(
- proto_holder.cpp
- spec.cpp
- spec.h
-)
-
-YQL_LAST_ABI_VERSION()
-
-END()
diff --git a/ydb/library/yql/public/purecalc/io_specs/ut/ya.make b/ydb/library/yql/public/purecalc/io_specs/ut/ya.make
deleted file mode 100644
index 9cb38b87e7f..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/ut/ya.make
+++ /dev/null
@@ -1,5 +0,0 @@
-RECURSE(
- ../arrow/ut
- ../mkql/ut
- ../protobuf/ut
-)
diff --git a/ydb/library/yql/public/purecalc/io_specs/ya.make b/ydb/library/yql/public/purecalc/io_specs/ya.make
deleted file mode 100644
index 7f129d6dce6..00000000000
--- a/ydb/library/yql/public/purecalc/io_specs/ya.make
+++ /dev/null
@@ -1,10 +0,0 @@
-RECURSE(
- arrow
- mkql
- protobuf
- protobuf_raw
-)
-
-RECURSE_FOR_TESTS(
- ut
-)
diff --git a/ydb/library/yql/public/purecalc/no_llvm/purecalc.h b/ydb/library/yql/public/purecalc/no_llvm/purecalc.h
deleted file mode 100644
index 9b281a7caa7..00000000000
--- a/ydb/library/yql/public/purecalc/no_llvm/purecalc.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#pragma once
-
-#include "common/interface.h"
-
diff --git a/ydb/library/yql/public/purecalc/no_llvm/ya.make b/ydb/library/yql/public/purecalc/no_llvm/ya.make
deleted file mode 100644
index 3f5220f45cf..00000000000
--- a/ydb/library/yql/public/purecalc/no_llvm/ya.make
+++ /dev/null
@@ -1,30 +0,0 @@
-LIBRARY()
-
-ADDINCL(
- ydb/library/yql/public/purecalc
-)
-
-SRCDIR(
- ydb/library/yql/public/purecalc
-)
-
-SRCS(
- purecalc.cpp
-)
-
-PEERDIR(
- yql/essentials/public/udf/service/exception_policy
- ydb/library/yql/public/purecalc/common/no_llvm
- ydb/library/yql/providers/yt/codec/codegen/no_llvm
- yql/essentials/minikql/codegen/no_llvm
- yql/essentials/minikql/computation/no_llvm
- yql/essentials/minikql/invoke_builtins/no_llvm
- yql/essentials/minikql/comp_nodes/no_llvm
-)
-
-YQL_LAST_ABI_VERSION()
-
-PROVIDES(YQL_PURECALC)
-
-END()
-
diff --git a/ydb/library/yql/public/purecalc/purecalc.cpp b/ydb/library/yql/public/purecalc/purecalc.cpp
deleted file mode 100644
index 80cfd39d963..00000000000
--- a/ydb/library/yql/public/purecalc/purecalc.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "purecalc.h"
diff --git a/ydb/library/yql/public/purecalc/purecalc.h b/ydb/library/yql/public/purecalc/purecalc.h
deleted file mode 100644
index 83bd8a7b842..00000000000
--- a/ydb/library/yql/public/purecalc/purecalc.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#pragma once
-
-#include "common/interface.h"
diff --git a/ydb/library/yql/public/purecalc/ut/empty_stream.h b/ydb/library/yql/public/purecalc/ut/empty_stream.h
deleted file mode 100644
index 246aabd423a..00000000000
--- a/ydb/library/yql/public/purecalc/ut/empty_stream.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/purecalc.h>
-
-namespace NYql {
- namespace NPureCalc {
- template <typename T>
- class TEmptyStreamImpl: public IStream<T> {
- public:
- T Fetch() override {
- return nullptr;
- }
- };
-
- template <typename T>
- THolder<IStream<T>> EmptyStream() {
- return MakeHolder<TEmptyStreamImpl<T>>();
- }
- }
-}
diff --git a/ydb/library/yql/public/purecalc/ut/fake_spec.cpp b/ydb/library/yql/public/purecalc/ut/fake_spec.cpp
deleted file mode 100644
index b56f7cfdfd5..00000000000
--- a/ydb/library/yql/public/purecalc/ut/fake_spec.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-#include "fake_spec.h"
-
-namespace NYql {
- namespace NPureCalc {
- NYT::TNode MakeFakeSchema(bool pg) {
- auto itemType = NYT::TNode::CreateList();
- itemType.Add(pg ? "PgType" : "DataType");
- itemType.Add(pg ? "int4" : "Int32");
-
- auto itemNode = NYT::TNode::CreateList();
- itemNode.Add("Name");
- itemNode.Add(std::move(itemType));
-
- auto items = NYT::TNode::CreateList();
- items.Add(std::move(itemNode));
-
- auto schema = NYT::TNode::CreateList();
- schema.Add("StructType");
- schema.Add(std::move(items));
-
- return schema;
- }
-
- TFakeInputSpec FakeIS(ui32 inputsNumber, bool pg) {
- auto spec = TFakeInputSpec();
- spec.Schemas = TVector<NYT::TNode>(inputsNumber, MakeFakeSchema(pg));
- return spec;
- }
-
- TFakeOutputSpec FakeOS(bool pg) {
- auto spec = TFakeOutputSpec();
- spec.Schema = MakeFakeSchema(pg);
- return spec;
- }
- }
-}
diff --git a/ydb/library/yql/public/purecalc/ut/fake_spec.h b/ydb/library/yql/public/purecalc/ut/fake_spec.h
deleted file mode 100644
index 87b4907e5de..00000000000
--- a/ydb/library/yql/public/purecalc/ut/fake_spec.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#pragma once
-
-#include <ydb/library/yql/public/purecalc/purecalc.h>
-
-namespace NYql {
- namespace NPureCalc {
- class TFakeInputSpec: public TInputSpecBase {
- public:
- TVector<NYT::TNode> Schemas = {NYT::TNode::CreateList()};
-
- public:
- const TVector<NYT::TNode>& GetSchemas() const override {
- return Schemas;
- }
- };
-
- class TFakeOutputSpec: public TOutputSpecBase {
- public:
- NYT::TNode Schema = NYT::TNode::CreateList();
-
- public:
- const NYT::TNode& GetSchema() const override {
- return Schema;
- }
- };
-
- template <>
- struct TInputSpecTraits<TFakeInputSpec> {
- static const constexpr bool IsPartial = false;
-
- static const constexpr bool SupportPullStreamMode = false;
- static const constexpr bool SupportPullListMode = false;
- static const constexpr bool SupportPushStreamMode = false;
-
- using TConsumerType = void;
- };
-
- template <>
- struct TOutputSpecTraits<TFakeOutputSpec> {
- static const constexpr bool IsPartial = false;
-
- static const constexpr bool SupportPullStreamMode = false;
- static const constexpr bool SupportPullListMode = false;
- static const constexpr bool SupportPushStreamMode = false;
-
- using TPullStreamReturnType = void;
- using TPullListReturnType = void;
- };
-
- NYT::TNode MakeFakeSchema(bool pg = false);
- TFakeInputSpec FakeIS(ui32 inputsNumber = 1, bool pg = false);
- TFakeOutputSpec FakeOS(bool pg = false);
- }
-}
diff --git a/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp b/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp
deleted file mode 100644
index cef9a995235..00000000000
--- a/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-#include "helpers.h"
-
-#include <library/cpp/yson/writer.h>
-
-#include <library/cpp/yson/node/node_visitor.h>
-
-#include <util/string/ascii.h>
-#include <util/generic/hash_set.h>
-
-
-namespace NYql {
- namespace NPureCalc {
- namespace NPrivate {
- NYT::TNode GetSchema(
- const TVector<TString>& fields,
- const TVector<TString>& optionalFields
- ) {
- THashSet<TString> optionalFilter {
- optionalFields.begin(), optionalFields.end()
- };
-
- NYT::TNode members {NYT::TNode::CreateList()};
-
- auto addField = [&] (const TString& name, const TString& type) {
- auto typeNode = NYT::TNode::CreateList()
- .Add("DataType")
- .Add(type);
-
- if (optionalFilter.contains(name)) {
- typeNode = NYT::TNode::CreateList()
- .Add("OptionalType")
- .Add(typeNode);
- }
-
- members.Add(NYT::TNode::CreateList()
- .Add(name)
- .Add(typeNode)
- );
- };
-
- for (const auto& field: fields) {
- TString type {field};
- type[0] = AsciiToUpper(type[0]);
- addField(field, type);
- }
-
- NYT::TNode schema = NYT::TNode::CreateList()
- .Add("StructType")
- .Add(members);
-
- return schema;
- }
- }
- }
-}
diff --git a/ydb/library/yql/public/purecalc/ut/lib/helpers.h b/ydb/library/yql/public/purecalc/ut/lib/helpers.h
deleted file mode 100644
index 53a22661ec3..00000000000
--- a/ydb/library/yql/public/purecalc/ut/lib/helpers.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-
-#include <library/cpp/yson/node/node.h>
-#include <util/generic/string.h>
-#include <util/generic/vector.h>
-#include <util/stream/str.h>
-
-
-namespace NYql {
- namespace NPureCalc {
- namespace NPrivate {
- NYT::TNode GetSchema(
- const TVector<TString>& fields,
- const TVector<TString>& optionalFields = {}
- );
- }
- }
-}
diff --git a/ydb/library/yql/public/purecalc/ut/lib/ya.make b/ydb/library/yql/public/purecalc/ut/lib/ya.make
deleted file mode 100644
index 36134a2940b..00000000000
--- a/ydb/library/yql/public/purecalc/ut/lib/ya.make
+++ /dev/null
@@ -1,14 +0,0 @@
-LIBRARY()
-
-PEERDIR(
- contrib/libs/apache/arrow
- library/cpp/yson
- library/cpp/yson/node
-)
-
-SRCS(
- helpers.cpp
- helpers.h
-)
-
-END()
diff --git a/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto b/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto
deleted file mode 100644
index 66593005a5e..00000000000
--- a/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto
+++ /dev/null
@@ -1,122 +0,0 @@
-package NPureCalcProto;
-
-message TUnparsed {
- required string S = 1;
-}
-
-message TParsed {
- required int32 A = 1;
- optional int32 B = 2;
- required int32 C = 3;
-}
-
-message TPartial {
- required int32 X = 1;
-}
-
-message TSimpleMessage {
- required int32 X = 1;
-}
-
-message TNamedSimpleMessage {
- required int32 X = 1;
- required bytes Name = 2;
-}
-
-message TStringMessage {
- required string X = 1;
-}
-
-message TAllTypes {
- required double FDouble = 1;
- required float FFloat = 2;
- required int64 FInt64 = 3;
- required sfixed64 FSfixed64 = 4;
- required sint64 FSint64 = 5;
- required uint64 FUint64 = 6;
- required fixed64 FFixed64 = 7;
- required int32 FInt32 = 8;
- required sfixed32 FSfixed32 = 9;
- required sint32 FSint32 = 10;
- required uint32 FUint32 = 11;
- required fixed32 FFixed32 = 12;
- required bool FBool = 13;
- required string FString = 14;
- required bytes FBytes = 15;
-}
-
-message TOptionalAllTypes {
- optional double FDouble = 1;
- optional float FFloat = 2;
- optional int64 FInt64 = 3;
- optional sfixed64 FSfixed64 = 4;
- optional sint64 FSint64 = 5;
- optional uint64 FUint64 = 6;
- optional fixed64 FFixed64 = 7;
- optional int32 FInt32 = 8;
- optional sfixed32 FSfixed32 = 9;
- optional sint32 FSint32 = 10;
- optional uint32 FUint32 = 11;
- optional fixed32 FFixed32 = 12;
- optional bool FBool = 13;
- optional string FString = 14;
- optional bytes FBytes = 15;
-}
-
-message TSimpleNested {
- required int32 X = 1;
- required TAllTypes Y = 2;
-}
-
-message TOptionalNested {
- optional TAllTypes X = 1;
-}
-
-message TSimpleRepeated {
- required int32 X = 1;
- repeated int32 Y = 2;
-}
-
-message TNestedRepeated {
- required int32 X = 1;
- repeated TSimpleNested Y = 2;
-}
-
-message TRecursive {
- required int32 X = 1;
- required TRecursive Nested = 2;
-}
-
-message TRecursiveIndirectly {
- message TNested {
- required TRecursiveIndirectly Nested = 1;
- }
-
- required int32 X = 1;
- repeated TNested Nested = 2;
-}
-
-message TMessageWithEnum {
- enum ETestEnum {
- VALUE1 = 0;
- VALUE2 = 1;
- }
- repeated ETestEnum EnumValue = 1;
-}
-
-message TUnsplitted {
- required int32 AInt = 1;
- required uint32 AUint = 2;
- required string AString = 3;
- optional bool ABool = 4;
-}
-
-message TSplitted1 {
- required int32 BInt = 1;
- required string BString = 2;
-}
-
-message TSplitted2 {
- required uint32 CUint = 1;
- required string CString = 2;
-}
diff --git a/ydb/library/yql/public/purecalc/ut/protos/ya.make b/ydb/library/yql/public/purecalc/ut/protos/ya.make
deleted file mode 100644
index a455ff2fba2..00000000000
--- a/ydb/library/yql/public/purecalc/ut/protos/ya.make
+++ /dev/null
@@ -1,9 +0,0 @@
-PROTO_LIBRARY()
-
-SRCS(
- test_structs.proto
-)
-
-EXCLUDE_TAGS(GO_PROTO)
-
-END()
diff --git a/ydb/library/yql/public/purecalc/ut/test_eval.cpp b/ydb/library/yql/public/purecalc/ut/test_eval.cpp
deleted file mode 100644
index a556b47b038..00000000000
--- a/ydb/library/yql/public/purecalc/ut/test_eval.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-#include <ydb/library/yql/public/purecalc/purecalc.h>
-#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
-#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
-#include <ydb/library/yql/public/purecalc/ut/empty_stream.h>
-
-#include <library/cpp/testing/unittest/registar.h>
-
-Y_UNIT_TEST_SUITE(TestEval) {
- Y_UNIT_TEST(TestEvalExpr) {
- using namespace NYql::NPureCalc;
-
- auto options = TProgramFactoryOptions();
- auto factory = MakeProgramFactory(options);
-
- auto program = factory->MakePullListProgram(
- TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
- TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
- "SELECT Unwrap(cast(EvaluateExpr('foo' || 'bar') as Utf8)) AS X",
- ETranslationMode::SQL
- );
-
- auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>());
-
- NPureCalcProto::TStringMessage* message;
-
- UNIT_ASSERT(message = stream->Fetch());
- UNIT_ASSERT_EQUAL(message->GetX(), "foobar");
- UNIT_ASSERT(!stream->Fetch());
- }
-}
diff --git a/ydb/library/yql/public/purecalc/ut/test_mixed_allocators.cpp b/ydb/library/yql/public/purecalc/ut/test_mixed_allocators.cpp
deleted file mode 100644
index bccb59b497b..00000000000
--- a/ydb/library/yql/public/purecalc/ut/test_mixed_allocators.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-#include <library/cpp/testing/unittest/registar.h>
-
-#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
-#include <yql/essentials/minikql/mkql_string_util.h>
-
-#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
-#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
-
-using namespace NYql::NPureCalc;
-
-namespace {
- class TStatelessInputSpec : public TInputSpecBase {
- public:
- TStatelessInputSpec()
- : Schemas_({NYT::TNode::CreateList()
- .Add("StructType")
- .Add(NYT::TNode::CreateList()
- .Add(NYT::TNode::CreateList()
- .Add("InputValue")
- .Add(NYT::TNode::CreateList()
- .Add("DataType")
- .Add("Utf8")
- )
- )
- )
- })
- {};
-
- const TVector<NYT::TNode>& GetSchemas() const override {
- return Schemas_;
- }
-
- private:
- const TVector<NYT::TNode> Schemas_;
- };
-
- class TStatelessInputConsumer : public IConsumer<const NYql::NUdf::TUnboxedValue&> {
- public:
- TStatelessInputConsumer(TWorkerHolder<IPushStreamWorker> worker)
- : Worker_(std::move(worker))
- {}
-
- void OnObject(const NYql::NUdf::TUnboxedValue& value) override {
- with_lock (Worker_->GetScopedAlloc()) {
- NYql::NUdf::TUnboxedValue* items = nullptr;
- NYql::NUdf::TUnboxedValue result = Worker_->GetGraph().GetHolderFactory().CreateDirectArrayHolder(1, items);
-
- items[0] = value;
-
- Worker_->Push(std::move(result));
-
- // Clear graph after each object because
- // values allocated on another allocator and should be released
- Worker_->GetGraph().Invalidate();
- }
- }
-
- void OnFinish() override {
- with_lock(Worker_->GetScopedAlloc()) {
- Worker_->OnFinish();
- }
- }
-
- private:
- TWorkerHolder<IPushStreamWorker> Worker_;
- };
-
- class TStatelessConsumer : public IConsumer<NPureCalcProto::TStringMessage*> {
- const TString ExpectedData_;
- const ui64 ExpectedRows_;
- ui64 RowId_ = 0;
-
- public:
- TStatelessConsumer(const TString& expectedData, ui64 expectedRows)
- : ExpectedData_(expectedData)
- , ExpectedRows_(expectedRows)
- {}
-
- void OnObject(NPureCalcProto::TStringMessage* message) override {
- UNIT_ASSERT_VALUES_EQUAL_C(ExpectedData_, message->GetX(), RowId_);
- RowId_++;
- }
-
- void OnFinish() override {
- UNIT_ASSERT_VALUES_EQUAL(ExpectedRows_, RowId_);
- }
- };
-}
-
-template <>
-struct TInputSpecTraits<TStatelessInputSpec> {
- static constexpr bool IsPartial = false;
- static constexpr bool SupportPushStreamMode = true;
-
- using TConsumerType = THolder<IConsumer<const NYql::NUdf::TUnboxedValue&>>;
-
- static TConsumerType MakeConsumer(const TStatelessInputSpec&, TWorkerHolder<IPushStreamWorker> worker) {
- return MakeHolder<TStatelessInputConsumer>(std::move(worker));
- }
-};
-
-Y_UNIT_TEST_SUITE(TestMixedAllocators) {
- Y_UNIT_TEST(TestPushStream) {
- const auto targetString = "large string >= 14 bytes";
- const auto factory = MakeProgramFactory();
- const auto sql = TStringBuilder() << "SELECT InputValue AS X FROM Input WHERE InputValue = \"" << targetString << "\";";
-
- const auto program = factory->MakePushStreamProgram(
- TStatelessInputSpec(),
- TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
- sql
- );
-
- const ui64 numberRows = 5;
- const auto inputConsumer = program->Apply(MakeHolder<TStatelessConsumer>(targetString, numberRows));
- NKikimr::NMiniKQL::TScopedAlloc alloc(__LOCATION__, NKikimr::TAlignedPagePoolCounters(), true, false);
-
- const auto pushString = [&](TString inputValue) {
- NYql::NUdf::TUnboxedValue stringValue;
- with_lock(alloc) {
- stringValue = NKikimr::NMiniKQL::MakeString(inputValue);
- alloc.Ref().LockObject(stringValue);
- }
-
- inputConsumer->OnObject(stringValue);
-
- with_lock(alloc) {
- alloc.Ref().UnlockObject(stringValue);
- stringValue.Clear();
- }
- };
-
- for (ui64 i = 0; i < numberRows; ++i) {
- pushString(targetString);
- pushString("another large string >= 14 bytes");
- }
- inputConsumer->OnFinish();
- }
-}
diff --git a/ydb/library/yql/public/purecalc/ut/test_pg.cpp b/ydb/library/yql/public/purecalc/ut/test_pg.cpp
deleted file mode 100644
index d9b21dece19..00000000000
--- a/ydb/library/yql/public/purecalc/ut/test_pg.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <ydb/library/yql/public/purecalc/purecalc.h>
-
-#include "fake_spec.h"
-
-#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
-
-#include <library/cpp/testing/unittest/registar.h>
-
-Y_UNIT_TEST_SUITE(TestPg) {
- using namespace NYql::NPureCalc;
-
- Y_UNIT_TEST(TestPgCompile) {
- auto factory = MakeProgramFactory();
-
- auto sql = TString(R"(
- SELECT * FROM "Input";
- )");
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePullListProgram(FakeIS(1,true), FakeOS(true), sql, ETranslationMode::PG);
- }());
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePullStreamProgram(FakeIS(1,true), FakeOS(true), sql, ETranslationMode::PG);
- }(), TCompileError, "PullList mode");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePushStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG);
- }(), TCompileError, "PullList mode");
- }
-
- Y_UNIT_TEST(TestSqlWrongTableName) {
- auto factory = MakeProgramFactory();
-
- auto sql = TString(R"(
- SELECT * FROM WrongTable;
- )");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePullListProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG);
- }(), TCompileError, "Failed to optimize");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePullStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG);
- }(), TCompileError, "PullList mode");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePushStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG);
- }(), TCompileError, "PullList mode");
- }
-
- Y_UNIT_TEST(TestInvalidSql) {
- auto factory = MakeProgramFactory();
-
- auto sql = TString(R"(
- Just some invalid SQL;
- )");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePullListProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG);
- }(), TCompileError, "failed to parse PG");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePullStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG);
- }(), TCompileError, "PullList mode");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePushStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG);
- }(), TCompileError, "PullList mode");
- }
-}
diff --git a/ydb/library/yql/public/purecalc/ut/test_pool.cpp b/ydb/library/yql/public/purecalc/ut/test_pool.cpp
deleted file mode 100644
index 8c80ae9c84a..00000000000
--- a/ydb/library/yql/public/purecalc/ut/test_pool.cpp
+++ /dev/null
@@ -1,184 +0,0 @@
-#include <library/cpp/testing/unittest/registar.h>
-
-#include <ydb/library/yql/public/purecalc/common/interface.h>
-#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
-#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
-#include <library/cpp/protobuf/util/pb_io.h>
-
-#include <util/string/cast.h>
-
-using namespace NYql::NPureCalc;
-
-namespace {
- class TStringMessageStreamImpl: public IStream<NPureCalcProto::TStringMessage*> {
- private:
- ui32 I_ = 0;
- NPureCalcProto::TStringMessage Message_{};
-
- public:
- NPureCalcProto::TStringMessage* Fetch() override {
- if (I_ >= 3) {
- return nullptr;
- } else {
- Message_.SetX(ToString(I_));
- ++I_;
- return &Message_;
- }
- }
- };
-
- class TStringMessageConsumerImpl: public IConsumer<NPureCalcProto::TStringMessage*> {
- private:
- TVector<TString>* Buf_;
-
- public:
- TStringMessageConsumerImpl(TVector<TString>* buf)
- : Buf_(buf)
- {
- }
-
- public:
- void OnObject(NPureCalcProto::TStringMessage* t) override {
- Buf_->push_back(t->GetX());
- }
-
- void OnFinish() override {
- }
- };
-
-}
-
-Y_UNIT_TEST_SUITE(TestWorkerPool) {
- static TString sql = "SELECT 'abc'u || X AS X FROM Input";
-
- static TVector<TString> expected{"abc0", "abc1", "abc2"};
-
- void TestPullStreamImpl(bool useWorkerPool) {
- auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool));
-
- auto program = factory->MakePullStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
- TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
- sql,
- ETranslationMode::SQL
- );
-
- auto check = [](IStream<NPureCalcProto::TStringMessage*>* output) {
- TVector<TString> actual;
- while (auto *x = output->Fetch()) {
- actual.push_back(x->GetX());
- }
-
- UNIT_ASSERT_VALUES_EQUAL(expected, actual);
- };
-
- // Sequential use
- for (size_t i = 0; i < 2; ++i) {
- auto output = program->Apply(MakeHolder<TStringMessageStreamImpl>());
- check(output.Get());
- }
- // Parallel use
- {
- auto output1 = program->Apply(MakeHolder<TStringMessageStreamImpl>());
- auto output2 = program->Apply(MakeHolder<TStringMessageStreamImpl>());
- check(output1.Get());
- check(output2.Get());
- }
- }
-
- Y_UNIT_TEST(TestPullStreamUseWorkerPool) {
- TestPullStreamImpl(true);
- }
-
- Y_UNIT_TEST(TestPullStreamNoWorkerPool) {
- TestPullStreamImpl(false);
- }
-
- void TestPullListImpl(bool useWorkerPool) {
- auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool));
-
- auto program = factory->MakePullListProgram(
- TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
- TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
- sql,
- ETranslationMode::SQL
- );
-
- auto check = [](IStream<NPureCalcProto::TStringMessage*>* output) {
- TVector<TString> actual;
- while (auto *x = output->Fetch()) {
- actual.push_back(x->GetX());
- }
-
- UNIT_ASSERT_VALUES_EQUAL(expected, actual);
- };
-
- // Sequential use
- for (size_t i = 0; i < 2; ++i) {
- auto output = program->Apply(MakeHolder<TStringMessageStreamImpl>());
- check(output.Get());
- }
- // Parallel use
- {
- auto output1 = program->Apply(MakeHolder<TStringMessageStreamImpl>());
- auto output2 = program->Apply(MakeHolder<TStringMessageStreamImpl>());
- check(output1.Get());
- check(output2.Get());
- }
- }
-
- Y_UNIT_TEST(TestPullListUseWorkerPool) {
- TestPullListImpl(true);
- }
-
- Y_UNIT_TEST(TestPullListNoWorkerPool) {
- TestPullListImpl(false);
- }
-
- void TestPushStreamImpl(bool useWorkerPool) {
- auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool));
-
- auto program = factory->MakePushStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
- TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
- sql,
- ETranslationMode::SQL
- );
-
- auto check = [](IConsumer<NPureCalcProto::TStringMessage*>* input, const TVector<TString>& result) {
- NPureCalcProto::TStringMessage message;
- for (auto s: {"0", "1", "2"}) {
- message.SetX(s);
- input->OnObject(&message);
- }
- input->OnFinish();
-
- UNIT_ASSERT_VALUES_EQUAL(expected, result);
- };
-
- // Sequential use
- for (size_t i = 0; i < 2; ++i) {
- TVector<TString> actual;
- auto input = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual));
- check(input.Get(), actual);
- }
-
- // Parallel use
- {
- TVector<TString> actual1;
- auto input1 = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual1));
- TVector<TString> actual2;
- auto input2 = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual2));
- check(input1.Get(), actual1);
- check(input2.Get(), actual2);
- }
- }
-
- Y_UNIT_TEST(TestPushStreamUseWorkerPool) {
- TestPushStreamImpl(true);
- }
-
- Y_UNIT_TEST(TestPushStreamNoWorkerPool) {
- TestPushStreamImpl(false);
- }
-}
diff --git a/ydb/library/yql/public/purecalc/ut/test_schema.cpp b/ydb/library/yql/public/purecalc/ut/test_schema.cpp
deleted file mode 100644
index 9763e52b005..00000000000
--- a/ydb/library/yql/public/purecalc/ut/test_schema.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include <library/cpp/testing/unittest/registar.h>
diff --git a/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp b/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp
deleted file mode 100644
index b9d55c0f983..00000000000
--- a/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-#include <ydb/library/yql/public/purecalc/purecalc.h>
-
-#include "fake_spec.h"
-
-#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
-
-#include <library/cpp/testing/unittest/registar.h>
-
-Y_UNIT_TEST_SUITE(TestSExpr) {
- Y_UNIT_TEST(TestSExprCompile) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory();
-
- auto expr = TString(R"(
- (
- (return (Self '0))
- )
- )");
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePullStreamProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr);
- }());
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePullListProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr);
- }());
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePushStreamProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr);
- }());
- }
-
- Y_UNIT_TEST(TestInvalidSExpr) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory();
-
- auto sql = TString(R"(
- Some totally invalid SExpr
- )");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr);
- }(), TCompileError, "failed to parse s-expression");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr);
- }(), TCompileError, "failed to parse s-expression");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr);
- }(), TCompileError, "failed to parse s-expression");
- }
-}
diff --git a/ydb/library/yql/public/purecalc/ut/test_sql.cpp b/ydb/library/yql/public/purecalc/ut/test_sql.cpp
deleted file mode 100644
index 10157912a96..00000000000
--- a/ydb/library/yql/public/purecalc/ut/test_sql.cpp
+++ /dev/null
@@ -1,205 +0,0 @@
-#include <ydb/library/yql/public/purecalc/purecalc.h>
-
-#include "fake_spec.h"
-
-#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
-
-#include <library/cpp/testing/unittest/registar.h>
-
-Y_UNIT_TEST_SUITE(TestSql) {
- using namespace NYql::NPureCalc;
-
- Y_UNIT_TEST(TestSqlCompile) {
- auto factory = MakeProgramFactory();
-
- auto sql = TString(R"(
- SELECT * FROM Input;
- )");
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }());
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }());
-
- auto program = factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- auto expectedIssues = TString(R"(<main>: Warning: Type annotation, code: 1030
- generated.sql:2:13: Warning: At function: PersistableRepr
- generated.sql:2:13: Warning: Persistable required. Atom, key, world, datasink, datasource, callable, resource, stream and lambda are not persistable, code: 1104
-)");
-
- UNIT_ASSERT_VALUES_EQUAL(expectedIssues, program->GetIssues().ToString());
- }
-
- Y_UNIT_TEST(TestSqlCompileSingleUnnamedInput) {
- auto factory = MakeProgramFactory();
-
- auto sql = TString(R"(
- SELECT * FROM TABLES()
- )");
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }());
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }());
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }());
- }
-
- Y_UNIT_TEST(TestSqlCompileNamedMultiinputs) {
- auto factory = MakeProgramFactory();
-
- auto sql = TString(R"(
- SELECT * FROM Input0
- UNION ALL
- SELECT * FROM Input1
- )");
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePullListProgram(FakeIS(2), FakeOS(), sql, ETranslationMode::SQL);
- }());
- }
-
- Y_UNIT_TEST(TestSqlCompileUnnamedMultiinputs) {
- auto factory = MakeProgramFactory();
-
- auto sql = TString(R"(
- $t0, $t1, $t2 = PROCESS TABLES();
- SELECT * FROM $t0
- UNION ALL
- SELECT * FROM $t1
- UNION ALL
- SELECT * FROM $t2
- )");
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePullListProgram(FakeIS(3), FakeOS(), sql, ETranslationMode::SQL);
- }());
- }
-
- Y_UNIT_TEST(TestSqlCompileWithWarning) {
- auto factory = MakeProgramFactory();
-
- auto sql = TString(R"(
- $x = 1;
- $y = 2;
- SELECT $x as Name FROM Input;
- )");
-
- auto expectedIssues = TString(R"(generated.sql:3:13: Warning: Symbol $y is not used, code: 4527
-<main>: Warning: Type annotation, code: 1030
- generated.sql:4:13: Warning: At function: PersistableRepr
- generated.sql:4:13: Warning: Persistable required. Atom, key, world, datasink, datasource, callable, resource, stream and lambda are not persistable, code: 1104
-)");
-
- auto program = factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- UNIT_ASSERT_VALUES_EQUAL(expectedIssues, program->GetIssues().ToString());
- }
-
- Y_UNIT_TEST(TestSqlWrongTableName) {
- auto factory = MakeProgramFactory();
-
- auto sql = TString(R"(
- SELECT * FROM WrongTable;
- )");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }(), TCompileError, "Failed to optimize");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }(), TCompileError, "Failed to optimize");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }(), TCompileError, "Failed to optimize");
- }
-
- Y_UNIT_TEST(TestAllocateLargeStringOnEvaluate) {
- auto factory = MakeProgramFactory();
-
- auto sql = TString(R"(
- $data = Length(EvaluateExpr("long string" || " very loooong string"));
- SELECT $data as Name FROM Input;
- )");
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }());
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }());
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }());
- }
-
- Y_UNIT_TEST(TestInvalidSql) {
- auto factory = MakeProgramFactory();
-
- auto sql = TString(R"(
- Just some invalid SQL;
- )");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }(), TCompileError, "failed to parse SQL");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }(), TCompileError, "failed to parse SQL");
-
- UNIT_ASSERT_EXCEPTION_CONTAINS([&](){
- factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }(), TCompileError, "failed to parse SQL");
- }
-
- Y_UNIT_TEST(TestUseProcess) {
- auto factory = MakeProgramFactory();
-
- auto sql = TString(R"(
- $processor = ($row) -> ($row);
-
- PROCESS Input using $processor(TableRow());
- )");
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }());
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }());
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }());
- }
-
- Y_UNIT_TEST(TestUseCodegen) {
- auto factory = MakeProgramFactory();
-
- auto sql = TString(R"(
- $processor = ($row) -> {
- $lambda = EvaluateCode(LambdaCode(($row) -> ($row)));
- return $lambda($row);
- };
-
- PROCESS Input using $processor(TableRow());
- )");
-
- UNIT_ASSERT_NO_EXCEPTION([&](){
- factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL);
- }());
- }
-}
diff --git a/ydb/library/yql/public/purecalc/ut/test_udf.cpp b/ydb/library/yql/public/purecalc/ut/test_udf.cpp
deleted file mode 100644
index 59cc9625f5e..00000000000
--- a/ydb/library/yql/public/purecalc/ut/test_udf.cpp
+++ /dev/null
@@ -1,195 +0,0 @@
-#include <library/cpp/testing/unittest/registar.h>
-
-#include <ydb/library/yql/public/purecalc/purecalc.h>
-#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
-#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
-#include <yql/essentials/public/udf/udf_counter.h>
-#include <yql/essentials/public/udf/udf_type_builder.h>
-#include <library/cpp/testing/unittest/registar.h>
-
-class TMyModule : public NKikimr::NUdf::IUdfModule {
-public:
- class TFunc : public NKikimr::NUdf::TBoxedValue {
- public:
- TFunc(NKikimr::NUdf::TCounter counter, NKikimr::NUdf::TScopedProbe scopedProbe)
- : Counter_(counter)
- , ScopedProbe_(scopedProbe)
- {}
-
- NKikimr::NUdf::TUnboxedValue Run(const NKikimr::NUdf::IValueBuilder* valueBuilder, const NKikimr::NUdf::TUnboxedValuePod* args) const override {
- Y_UNUSED(valueBuilder);
- with_lock(ScopedProbe_) {
- Counter_.Inc();
- return NKikimr::NUdf::TUnboxedValuePod(args[0].Get<i32>());
- }
- }
-
- private:
- mutable NKikimr::NUdf::TCounter Counter_;
- mutable NKikimr::NUdf::TScopedProbe ScopedProbe_;
- };
-
- void GetAllFunctions(NKikimr::NUdf::IFunctionsSink& sink) const override {
- Y_UNUSED(sink);
- }
-
- void BuildFunctionTypeInfo(
- const NKikimr::NUdf::TStringRef& name,
- NKikimr::NUdf::TType* userType,
- const NKikimr::NUdf::TStringRef& typeConfig,
- ui32 flags,
- NKikimr::NUdf::IFunctionTypeInfoBuilder& builder) const override {
- Y_UNUSED(userType);
- Y_UNUSED(typeConfig);
- Y_UNUSED(flags);
- if (name == NKikimr::NUdf::TStringRef::Of("Func")) {
- builder.SimpleSignature<i32(i32)>();
- builder.Implementation(new TFunc(
- builder.GetCounter("FuncCalls",true),
- builder.GetScopedProbe("FuncTime")
- ));
- }
- }
-
- void CleanupOnTerminate() const override {
- }
-};
-
-class TMyCountersProvider : public NKikimr::NUdf::ICountersProvider, public NKikimr::NUdf::IScopedProbeHost {
-public:
- TMyCountersProvider(i64* calls, TString* log)
- : Calls_(calls)
- , Log_(log)
- {}
-
- NKikimr::NUdf::TCounter GetCounter(const NKikimr::NUdf::TStringRef& module, const NKikimr::NUdf::TStringRef& name, bool deriv) override {
- UNIT_ASSERT_VALUES_EQUAL(module, "MyModule");
- UNIT_ASSERT_VALUES_EQUAL(name, "FuncCalls");
- UNIT_ASSERT_VALUES_EQUAL(deriv, true);
- return NKikimr::NUdf::TCounter(Calls_);
- }
-
- NKikimr::NUdf::TScopedProbe GetScopedProbe(const NKikimr::NUdf::TStringRef& module, const NKikimr::NUdf::TStringRef& name) override {
- UNIT_ASSERT_VALUES_EQUAL(module, "MyModule");
- UNIT_ASSERT_VALUES_EQUAL(name, "FuncTime");
- return NKikimr::NUdf::TScopedProbe(Log_ ? this : nullptr, Log_);
- }
-
- void Acquire(void* cookie) override {
- UNIT_ASSERT(cookie == Log_);
- *Log_ += "Enter\n";
- }
-
- void Release(void* cookie) override {
- UNIT_ASSERT(cookie == Log_);
- *Log_ += "Exit\n";
- }
-
-private:
- i64* Calls_;
- TString* Log_;
-};
-
-namespace NPureCalcProto {
- class TUnparsed;
- class TParsed;
-}
-
-class TDocInput : public NYql::NPureCalc::IStream<NPureCalcProto::TUnparsed*> {
-public:
- NPureCalcProto::TUnparsed* Fetch() override {
- if (Extracted) {
- return nullptr;
- }
-
- Extracted = true;
- Msg.SetS("foo");
- return &Msg;
- }
-
-public:
- NPureCalcProto::TUnparsed Msg;
- bool Extracted = false;
-};
-
-Y_UNIT_TEST_SUITE(TestUdf) {
- Y_UNIT_TEST(TestCounters) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory();
-
- i64 callCounter = 0;
- TMyCountersProvider myCountersProvider(&callCounter, nullptr);
- factory->AddUdfModule("MyModule", new TMyModule);
- factory->SetCountersProvider(&myCountersProvider);
-
- auto program = factory->MakePullStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TUnparsed>(),
- TProtobufOutputSpec<NPureCalcProto::TParsed>(),
- "select MyModule::Func(1) as A, 2 as B, 3 as C from Input",
- ETranslationMode::SQL);
-
- auto out = program->Apply(MakeHolder<TDocInput>());
- auto* message = out->Fetch();
- UNIT_ASSERT(message);
- UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 1);
- UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2);
- UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3);
- UNIT_ASSERT_VALUES_EQUAL(callCounter, 1);
- UNIT_ASSERT(!out->Fetch());
- }
-
- Y_UNIT_TEST(TestCountersFilteredColumns) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory();
-
- i64 callCounter = 0;
- TMyCountersProvider myCountersProvider(&callCounter, nullptr);
- factory->AddUdfModule("MyModule", new TMyModule);
- factory->SetCountersProvider(&myCountersProvider);
-
- auto ospec = TProtobufOutputSpec<NPureCalcProto::TParsed>();
- ospec.SetOutputColumnsFilter(THashSet<TString>({"B", "C"}));
- auto program = factory->MakePullStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TUnparsed>(),
- ospec,
- "select MyModule::Func(1) as A, 2 as B, 3 as C from Input",
- ETranslationMode::SQL);
-
- auto out = program->Apply(MakeHolder<TDocInput>());
- auto* message = out->Fetch();
- UNIT_ASSERT(message);
- UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 0);
- UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2);
- UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3);
- UNIT_ASSERT_VALUES_EQUAL(callCounter, 0);
- UNIT_ASSERT(!out->Fetch());
- }
-
- Y_UNIT_TEST(TestScopedProbes) {
- using namespace NYql::NPureCalc;
-
- auto factory = MakeProgramFactory();
-
- TString log;
- TMyCountersProvider myCountersProvider(nullptr, &log);
- factory->AddUdfModule("MyModule", new TMyModule);
- factory->SetCountersProvider(&myCountersProvider);
-
- auto program = factory->MakePullStreamProgram(
- TProtobufInputSpec<NPureCalcProto::TUnparsed>(),
- TProtobufOutputSpec<NPureCalcProto::TParsed>(),
- "select MyModule::Func(1) as A, 2 as B, 3 as C from Input",
- ETranslationMode::SQL);
-
- auto out = program->Apply(MakeHolder<TDocInput>());
- auto* message = out->Fetch();
- UNIT_ASSERT(message);
- UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 1);
- UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2);
- UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3);
- UNIT_ASSERT_VALUES_EQUAL(log, "Enter\nExit\n");
- UNIT_ASSERT(!out->Fetch());
- }
-}
diff --git a/ydb/library/yql/public/purecalc/ut/test_user_data.cpp b/ydb/library/yql/public/purecalc/ut/test_user_data.cpp
deleted file mode 100644
index 3d0a0935ef6..00000000000
--- a/ydb/library/yql/public/purecalc/ut/test_user_data.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-#include <ydb/library/yql/public/purecalc/purecalc.h>
-#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h>
-#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h>
-#include <ydb/library/yql/public/purecalc/ut/empty_stream.h>
-
-#include <library/cpp/testing/unittest/registar.h>
-
-Y_UNIT_TEST_SUITE(TestUserData) {
- Y_UNIT_TEST(TestUserData) {
- using namespace NYql::NPureCalc;
-
- auto options = TProgramFactoryOptions()
- .AddFile(NYql::NUserData::EDisposition::INLINE, "my_file.txt", "my content!");
-
- auto factory = MakeProgramFactory(options);
-
- auto program = factory->MakePullListProgram(
- TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
- TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
- "SELECT UNWRAP(CAST(FileContent(\"my_file.txt\") AS Utf8)) AS X",
- ETranslationMode::SQL
- );
-
- auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>());
-
- NPureCalcProto::TStringMessage* message;
-
- UNIT_ASSERT(message = stream->Fetch());
- UNIT_ASSERT_EQUAL(message->GetX(), "my content!");
- UNIT_ASSERT(!stream->Fetch());
- }
-
- Y_UNIT_TEST(TestUserDataLibrary) {
- using namespace NYql::NPureCalc;
-
- try {
- auto options = TProgramFactoryOptions()
- .AddLibrary(NYql::NUserData::EDisposition::INLINE, "a.sql", "$x = 1; EXPORT $x;")
- .AddLibrary(NYql::NUserData::EDisposition::INLINE, "b.sql", "IMPORT a SYMBOLS $x; $y = CAST($x + 1 AS String); EXPORT $y;");
-
- auto factory = MakeProgramFactory(options);
-
- auto program = factory->MakePullListProgram(
- TProtobufInputSpec<NPureCalcProto::TStringMessage>(),
- TProtobufOutputSpec<NPureCalcProto::TStringMessage>(),
- "IMPORT b SYMBOLS $y; SELECT CAST($y AS Utf8) ?? '' AS X;",
- ETranslationMode::SQL
- );
-
- auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>());
-
- NPureCalcProto::TStringMessage* message;
-
- UNIT_ASSERT(message = stream->Fetch());
- UNIT_ASSERT_EQUAL(message->GetX(), "2");
- UNIT_ASSERT(!stream->Fetch());
- } catch (const TCompileError& e) {
- Cerr << e;
- throw e;
- }
- }
-}
diff --git a/ydb/library/yql/public/purecalc/ut/ya.make b/ydb/library/yql/public/purecalc/ut/ya.make
deleted file mode 100644
index 6f23dcd7f4a..00000000000
--- a/ydb/library/yql/public/purecalc/ut/ya.make
+++ /dev/null
@@ -1,28 +0,0 @@
-UNITTEST()
-
-SRCS(
- empty_stream.h
- fake_spec.cpp
- fake_spec.h
- test_schema.cpp
- test_sexpr.cpp
- test_sql.cpp
- test_pg.cpp
- test_udf.cpp
- test_user_data.cpp
- test_eval.cpp
- test_pool.cpp
- test_mixed_allocators.cpp
-)
-
-PEERDIR(
- ydb/library/yql/public/purecalc
- ydb/library/yql/public/purecalc/io_specs/protobuf
- ydb/library/yql/public/purecalc/ut/protos
-)
-
-SIZE(MEDIUM)
-
-YQL_LAST_ABI_VERSION()
-
-END()
diff --git a/ydb/library/yql/public/purecalc/ya.make b/ydb/library/yql/public/purecalc/ya.make
deleted file mode 100644
index bfde6c2e9d9..00000000000
--- a/ydb/library/yql/public/purecalc/ya.make
+++ /dev/null
@@ -1,28 +0,0 @@
-LIBRARY()
-
-SRCS(
- purecalc.cpp
-)
-
-PEERDIR(
- yql/essentials/public/udf/service/exception_policy
- ydb/library/yql/public/purecalc/common
-)
-
-YQL_LAST_ABI_VERSION()
-
-PROVIDES(YQL_PURECALC)
-
-END()
-
-RECURSE(
- common
- examples
- helpers
- io_specs
- no_llvm
-)
-
-RECURSE_FOR_TESTS(
- ut
-)
diff --git a/ydb/library/yql/public/ya.make b/ydb/library/yql/public/ya.make
index 93876a75d9c..456a1b25e34 100644
--- a/ydb/library/yql/public/ya.make
+++ b/ydb/library/yql/public/ya.make
@@ -1,5 +1,4 @@
RECURSE(
embedded
- purecalc
ydb_issue
)
diff --git a/ydb/library/yql/tools/dqrun/dqrun.cpp b/ydb/library/yql/tools/dqrun/dqrun.cpp
index c4b8ee5dfa2..866b5039025 100644
--- a/ydb/library/yql/tools/dqrun/dqrun.cpp
+++ b/ydb/library/yql/tools/dqrun/dqrun.cpp
@@ -23,6 +23,7 @@
#include <ydb/library/yql/providers/dq/provider/yql_dq_provider.h>
#include <ydb/library/yql/providers/dq/provider/exec/yql_dq_exectransformer.h>
#include <ydb/library/yql/dq/actors/input_transforms/dq_input_transform_lookup_factory.h>
+#include <ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.h>
#include <yql/essentials/core/dq_integration/transform/yql_dq_task_transform.h>
#include <ydb/library/yql/providers/clickhouse/actors/yql_ch_source_factory.h>
#include <ydb/library/yql/providers/clickhouse/provider/yql_clickhouse_provider.h>
@@ -963,7 +964,7 @@ int RunMain(int argc, const char* argv[])
factories.push_back(GetYtFileFactory(ytFileServices));
clusters["plato"] = YtProviderName;
auto ytNativeGateway = CreateYtFileGateway(ytFileServices, &emulateOutputForMultirun);
- dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway));
+ dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway, NDq::MakeCBOOptimizerFactory()));
} else if (gatewaysConfig.HasYt()) {
TYtNativeServices ytServices;
ytServices.FunctionRegistry = funcRegistry.Get();
@@ -974,7 +975,7 @@ int RunMain(int argc, const char* argv[])
for (auto& cluster: gatewaysConfig.GetYt().GetClusterMapping()) {
clusters.emplace(to_lower(cluster.GetName()), TString{YtProviderName});
}
- dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway));
+ dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway, NDq::MakeCBOOptimizerFactory()));
}
ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory;
diff --git a/ydb/library/yql/tools/dqrun/ya.make b/ydb/library/yql/tools/dqrun/ya.make
index f3ac9632f6b..ccda8a5dff5 100644
--- a/ydb/library/yql/tools/dqrun/ya.make
+++ b/ydb/library/yql/tools/dqrun/ya.make
@@ -36,7 +36,7 @@ ENDIF()
yql/essentials/core/services/mounts
ydb/library/yql/dq/actors/input_transforms
ydb/library/yql/dq/comp_nodes
- ydb/library/yql/dq/actors/input_transforms
+ ydb/library/yql/dq/opt
yql/essentials/core/dq_integration/transform
ydb/library/yql/dq/transform
yql/essentials/minikql/comp_nodes/llvm14
diff --git a/ydb/library/yql/tools/mrrun/mrrun.cpp b/ydb/library/yql/tools/mrrun/mrrun.cpp
index 9ed8e230299..68d3f2adb0c 100644
--- a/ydb/library/yql/tools/mrrun/mrrun.cpp
+++ b/ydb/library/yql/tools/mrrun/mrrun.cpp
@@ -41,6 +41,7 @@
#include <ydb/library/yql/providers/pq/gateway/native/yql_pq_gateway.h>
#include <ydb/library/yql/providers/s3/actors/yql_s3_actors_factory_impl.h>
#include <ydb/library/yql/dq/comp_nodes/yql_common_dq_factory.h>
+#include <ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.h>
#include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h>
#include <yql/essentials/minikql/comp_nodes/mkql_factories.h>
#include <yql/essentials/core/yql_library_compiler.h>
@@ -657,7 +658,7 @@ int RunMain(int argc, const char* argv[])
auto ytNativeGateway = CreateYtNativeGateway(services);
gateways.emplace_back(ytNativeGateway);
FillClusterMapping(clusters, gatewaysConfig.GetYt(), TString{YtProviderName});
- dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway));
+ dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway, NDq::MakeCBOOptimizerFactory()));
}
if (gatewayTypes.contains(ClickHouseProviderName) && gatewaysConfig.HasClickHouse()) {
diff --git a/ydb/library/yql/tools/mrrun/ya.make b/ydb/library/yql/tools/mrrun/ya.make
index 9a03e689a29..aef16123e5c 100644
--- a/ydb/library/yql/tools/mrrun/ya.make
+++ b/ydb/library/yql/tools/mrrun/ya.make
@@ -35,6 +35,7 @@ PEERDIR(
yql/essentials/core/services/mounts
yql/essentials/core/url_lister
ydb/library/yql/dq/comp_nodes
+ ydb/library/yql/dq/opt
yql/essentials/core/dq_integration/transform
yql/essentials/minikql/comp_nodes/llvm14
yql/essentials/minikql/invoke_builtins/llvm14
diff --git a/ydb/library/yql/tools/yqlrun/http/ya.make b/ydb/library/yql/tools/yqlrun/http/ya.make
index a9f04477471..3bc08d0f6e2 100644
--- a/ydb/library/yql/tools/yqlrun/http/ya.make
+++ b/ydb/library/yql/tools/yqlrun/http/ya.make
@@ -23,6 +23,7 @@ PEERDIR(
library/cpp/yson/node
yql/essentials/core/facade
yql/essentials/core/type_ann
+ ydb/library/yql/dq/opt
ydb/library/yql/providers/dq/provider
yql/essentials/providers/result/provider
yql/essentials/parser/pg_wrapper
diff --git a/ydb/library/yql/tools/yqlrun/http/yql_server.cpp b/ydb/library/yql/tools/yqlrun/http/yql_server.cpp
index 3adcaa80712..ae058fa0a9c 100644
--- a/ydb/library/yql/tools/yqlrun/http/yql_server.cpp
+++ b/ydb/library/yql/tools/yqlrun/http/yql_server.cpp
@@ -5,6 +5,7 @@
#include <yql/essentials/providers/common/proto/gateways_config.pb.h>
#include <yql/essentials/providers/common/provider/yql_provider_names.h>
#include <yql/essentials/providers/common/comp_nodes/yql_factory.h>
+#include <ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.h>
#include <ydb/library/yql/providers/dq/provider/yql_dq_provider.h>
#include <yql/essentials/providers/pg/provider/yql_pg_provider.h>
#include <ydb/library/yql/providers/yt/common/yql_names.h>
@@ -189,7 +190,7 @@ TProgramPtr MakeFileProgram(const TString& program, TYqlServer& yqlServer,
dataProvidersInit.push_back(GetDqDataProviderInitializer([](const TDqStatePtr&){
return new TNullTransformer;
}, {}, dqCompFactory, {}, yqlServer.FileStorage));
- dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway));
+ dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway, NDq::MakeCBOOptimizerFactory()));
dataProvidersInit.push_back(GetPgDataProviderInitializer());
ExtProviderSpecific(yqlServer.FunctionRegistry, dataProvidersInit, rtmrTableAttributes);
diff --git a/ydb/library/yql/tools/yqlrun/ya.make b/ydb/library/yql/tools/yqlrun/ya.make
index e3a64dc4ddd..b5e4ad29caf 100644
--- a/ydb/library/yql/tools/yqlrun/ya.make
+++ b/ydb/library/yql/tools/yqlrun/ya.make
@@ -36,6 +36,7 @@ PEERDIR(
yql/essentials/providers/common/proto
yql/essentials/providers/common/provider
yql/essentials/providers/common/udf_resolve
+ ydb/library/yql/dq/opt
ydb/library/yql/providers/dq/provider
ydb/library/yql/providers/yt/gateway/file
ydb/library/yql/providers/yt/codec/codegen
diff --git a/ydb/library/yql/tools/yqlrun/yqlrun.cpp b/ydb/library/yql/tools/yqlrun/yqlrun.cpp
index cc9101f4c15..b601ee91367 100644
--- a/ydb/library/yql/tools/yqlrun/yqlrun.cpp
+++ b/ydb/library/yql/tools/yqlrun/yqlrun.cpp
@@ -2,6 +2,8 @@
#include <ydb/library/yql/tools/yqlrun/http/yql_server.h>
+#include <ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.h>
+
#include <ydb/library/yql/providers/yt/gateway/file/yql_yt_file.h>
#include <ydb/library/yql/providers/yt/gateway/file/yql_yt_file_services.h>
#include <ydb/library/yql/providers/yt/provider/yql_yt_provider.h>
@@ -700,7 +702,7 @@ int Main(int argc, const char *argv[])
if (gatewayTypes.contains(YtProviderName) || res.Has("opt-collision")) {
auto yqlNativeServices = NFile::TYtFileServices::Make(funcRegistry.Get(), tablesMapping, fileStorage, tmpDir, res.Has("keep-temp"), tablesDirMapping);
auto ytNativeGateway = CreateYtFileGateway(yqlNativeServices, &emulateOutputForMultirun);
- dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway));
+ dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway, NDq::MakeCBOOptimizerFactory()));
}
}
diff --git a/ydb/library/yql/yt/native/plugin.cpp b/ydb/library/yql/yt/native/plugin.cpp
index dae2eb7fea6..f231895aa3f 100644
--- a/ydb/library/yql/yt/native/plugin.cpp
+++ b/ydb/library/yql/yt/native/plugin.cpp
@@ -29,6 +29,7 @@
#include <yql/essentials/ast/yql_expr.h>
#include <ydb/library/yql/dq/comp_nodes/yql_common_dq_factory.h>
+#include <ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.h>
#include <yql/essentials/core/facade/yql_facade.h>
#include <yql/essentials/core/file_storage/file_storage.h>
#include <yql/essentials/core/file_storage/proto/file_storage.pb.h>
@@ -353,7 +354,7 @@ public:
}
auto ytNativeGateway = CreateYtNativeGateway(ytServices);
- dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway));
+ dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway, NDq::MakeCBOOptimizerFactory()));
ProgramFactory_ = std::make_unique<NYql::TProgramFactory>(
false, FuncRegistry_.Get(), ExprContext_.NextUniqueId, dataProvidersInit, "embedded");
diff --git a/ydb/library/yql/yt/native/ya.make b/ydb/library/yql/yt/native/ya.make
index d3124a05e2e..c470cff46b5 100644
--- a/ydb/library/yql/yt/native/ya.make
+++ b/ydb/library/yql/yt/native/ya.make
@@ -43,6 +43,7 @@ PEERDIR(
yql/essentials/core
yql/essentials/core/url_preprocessing
ydb/library/yql/dq/comp_nodes
+ ydb/library/yql/dq/opt
ydb/library/yql/providers/dq/actors/yt
ydb/library/yql/providers/dq/global_worker_manager
ydb/library/yql/providers/dq/provider