aboutsummaryrefslogtreecommitdiffstats
path: root/yt/cpp/mapreduce
diff options
context:
space:
mode:
authormax42 <max42@yandex-team.com>2023-06-30 03:37:03 +0300
committermax42 <max42@yandex-team.com>2023-06-30 03:37:03 +0300
commitfac2bd72b4b31ec3238292caf8fb2a8aaa6d6c4a (patch)
treeb8cbc1deb00309c7f1a7ab6df520a76cf0b5c6d7 /yt/cpp/mapreduce
parent7bf166b1a7ed0af927f230022b245af618e998c1 (diff)
downloadydb-fac2bd72b4b31ec3238292caf8fb2a8aaa6d6c4a.tar.gz
YT-19324: move YT provider to ydb/library/yql
This commit is formed by the following script: https://paste.yandex-team.ru/6f92e4b8-efc5-4d34-948b-15ee2accd7e7/text. This commit has zero effect on all projects that depend on YQL. The summary of changes: - `yql/providers/yt -> ydb/library/yql/providers/yt `- the whole implementation of YT provider is moved into YDB code base for further export as a part of YT YQL plugin shared library; - `yql/providers/stat/{expr_nodes,uploader} -> ydb/library/yql/providers/stat/{expr_nodes,uploader}` - a small interface without implementation and the description of stat expr nodes; - `yql/core/extract_predicate/ut -> ydb/library/yql/core/extract_predicate/ut`; - `yql/core/{ut,ut_common} -> ydb/library/yql/core/{ut,ut_common}`; - `yql/core` is gone; - `yql/library/url_preprocessing -> ydb/library/yql/core/url_preprocessing`. **NB**: all new targets inside `ydb/` are under `IF (NOT CMAKE_EXPORT)` clause which disables them from open-source cmake generation and ya make build. They will be enabled in the subsequent commits.
Diffstat (limited to 'yt/cpp/mapreduce')
-rw-r--r--yt/cpp/mapreduce/client/abortable_registry.cpp125
-rw-r--r--yt/cpp/mapreduce/client/abortable_registry.h81
-rw-r--r--yt/cpp/mapreduce/client/batch_request_impl.cpp198
-rw-r--r--yt/cpp/mapreduce/client/batch_request_impl.h137
-rw-r--r--yt/cpp/mapreduce/client/client.cpp1361
-rw-r--r--yt/cpp/mapreduce/client/client.h506
-rw-r--r--yt/cpp/mapreduce/client/client_reader.cpp232
-rw-r--r--yt/cpp/mapreduce/client/client_reader.h65
-rw-r--r--yt/cpp/mapreduce/client/client_writer.cpp69
-rw-r--r--yt/cpp/mapreduce/client/client_writer.h42
-rw-r--r--yt/cpp/mapreduce/client/dummy_job_profiler.cpp26
-rw-r--r--yt/cpp/mapreduce/client/file_reader.cpp243
-rw-r--r--yt/cpp/mapreduce/client/file_reader.h105
-rw-r--r--yt/cpp/mapreduce/client/file_writer.cpp60
-rw-r--r--yt/cpp/mapreduce/client/file_writer.h38
-rw-r--r--yt/cpp/mapreduce/client/format_hints.cpp84
-rw-r--r--yt/cpp/mapreduce/client/format_hints.h27
-rw-r--r--yt/cpp/mapreduce/client/fwd.h16
-rw-r--r--yt/cpp/mapreduce/client/init.cpp280
-rw-r--r--yt/cpp/mapreduce/client/init.h22
-rw-r--r--yt/cpp/mapreduce/client/job_profiler.h27
-rw-r--r--yt/cpp/mapreduce/client/lock.cpp105
-rw-r--r--yt/cpp/mapreduce/client/lock.h31
-rw-r--r--yt/cpp/mapreduce/client/operation.cpp2981
-rw-r--r--yt/cpp/mapreduce/client/operation.h203
-rw-r--r--yt/cpp/mapreduce/client/operation_helpers.cpp91
-rw-r--r--yt/cpp/mapreduce/client/operation_helpers.h20
-rw-r--r--yt/cpp/mapreduce/client/operation_preparer.cpp881
-rw-r--r--yt/cpp/mapreduce/client/operation_preparer.h129
-rw-r--r--yt/cpp/mapreduce/client/operation_tracker.cpp34
-rw-r--r--yt/cpp/mapreduce/client/operation_tracker.h27
-rw-r--r--yt/cpp/mapreduce/client/prepare_operation.cpp286
-rw-r--r--yt/cpp/mapreduce/client/prepare_operation.h93
-rw-r--r--yt/cpp/mapreduce/client/py_helpers.cpp112
-rw-r--r--yt/cpp/mapreduce/client/py_helpers.h25
-rw-r--r--yt/cpp/mapreduce/client/retry_heavy_write_request.cpp87
-rw-r--r--yt/cpp/mapreduce/client/retry_heavy_write_request.h21
-rw-r--r--yt/cpp/mapreduce/client/retry_transaction.h71
-rw-r--r--yt/cpp/mapreduce/client/retryful_writer.cpp163
-rw-r--r--yt/cpp/mapreduce/client/retryful_writer.h130
-rw-r--r--yt/cpp/mapreduce/client/retryless_writer.cpp45
-rw-r--r--yt/cpp/mapreduce/client/retryless_writer.h73
-rw-r--r--yt/cpp/mapreduce/client/skiff.cpp396
-rw-r--r--yt/cpp/mapreduce/client/skiff.h72
-rw-r--r--yt/cpp/mapreduce/client/structured_table_formats.cpp572
-rw-r--r--yt/cpp/mapreduce/client/structured_table_formats.h146
-rw-r--r--yt/cpp/mapreduce/client/transaction.cpp195
-rw-r--r--yt/cpp/mapreduce/client/transaction.h95
-rw-r--r--yt/cpp/mapreduce/client/transaction_pinger.cpp321
-rw-r--r--yt/cpp/mapreduce/client/transaction_pinger.h39
-rw-r--r--yt/cpp/mapreduce/client/ya.make75
-rw-r--r--yt/cpp/mapreduce/client/yt_poller.cpp132
-rw-r--r--yt/cpp/mapreduce/client/yt_poller.h86
-rw-r--r--yt/cpp/mapreduce/common/debug_metrics.cpp62
-rw-r--r--yt/cpp/mapreduce/common/debug_metrics.h22
-rw-r--r--yt/cpp/mapreduce/common/fwd.h11
-rw-r--r--yt/cpp/mapreduce/common/helpers.cpp126
-rw-r--r--yt/cpp/mapreduce/common/helpers.h37
-rw-r--r--yt/cpp/mapreduce/common/node_builder.h4
-rw-r--r--yt/cpp/mapreduce/common/node_visitor.h4
-rw-r--r--yt/cpp/mapreduce/common/retry_lib.cpp267
-rw-r--r--yt/cpp/mapreduce/common/retry_lib.h100
-rw-r--r--yt/cpp/mapreduce/common/wait_proxy.cpp118
-rw-r--r--yt/cpp/mapreduce/common/wait_proxy.h53
-rw-r--r--yt/cpp/mapreduce/common/ya.make23
-rw-r--r--yt/cpp/mapreduce/http/abortable_http_response.cpp223
-rw-r--r--yt/cpp/mapreduce/http/abortable_http_response.h142
-rw-r--r--yt/cpp/mapreduce/http/context.cpp25
-rw-r--r--yt/cpp/mapreduce/http/context.h31
-rw-r--r--yt/cpp/mapreduce/http/core.h27
-rw-r--r--yt/cpp/mapreduce/http/fwd.h26
-rw-r--r--yt/cpp/mapreduce/http/helpers.cpp88
-rw-r--r--yt/cpp/mapreduce/http/helpers.h25
-rw-r--r--yt/cpp/mapreduce/http/host_manager.cpp140
-rw-r--r--yt/cpp/mapreduce/http/host_manager.h37
-rw-r--r--yt/cpp/mapreduce/http/http.cpp1014
-rw-r--r--yt/cpp/mapreduce/http/http.h256
-rw-r--r--yt/cpp/mapreduce/http/http_client.cpp603
-rw-r--r--yt/cpp/mapreduce/http/http_client.h76
-rw-r--r--yt/cpp/mapreduce/http/requests.cpp66
-rw-r--r--yt/cpp/mapreduce/http/requests.h29
-rw-r--r--yt/cpp/mapreduce/http/retry_request.cpp149
-rw-r--r--yt/cpp/mapreduce/http/retry_request.h52
-rw-r--r--yt/cpp/mapreduce/http/ya.make29
-rw-r--r--yt/cpp/mapreduce/interface/batch_request.cpp15
-rw-r--r--yt/cpp/mapreduce/interface/batch_request.h222
-rw-r--r--yt/cpp/mapreduce/interface/client.cpp19
-rw-r--r--yt/cpp/mapreduce/interface/client.h568
-rw-r--r--yt/cpp/mapreduce/interface/client_method_options.cpp34
-rw-r--r--yt/cpp/mapreduce/interface/client_method_options.h1452
-rw-r--r--yt/cpp/mapreduce/interface/common.cpp664
-rw-r--r--yt/cpp/mapreduce/interface/common.h1301
-rw-r--r--yt/cpp/mapreduce/interface/common_ut.cpp303
-rw-r--r--yt/cpp/mapreduce/interface/common_ut.h1
-rw-r--r--yt/cpp/mapreduce/interface/config.cpp321
-rw-r--r--yt/cpp/mapreduce/interface/config.h228
-rw-r--r--yt/cpp/mapreduce/interface/config_ut.cpp20
-rw-r--r--yt/cpp/mapreduce/interface/constants.h19
-rw-r--r--yt/cpp/mapreduce/interface/cypress.cpp24
-rw-r--r--yt/cpp/mapreduce/interface/cypress.h252
-rw-r--r--yt/cpp/mapreduce/interface/error_codes.h468
-rw-r--r--yt/cpp/mapreduce/interface/error_ut.cpp81
-rw-r--r--yt/cpp/mapreduce/interface/errors.cpp437
-rw-r--r--yt/cpp/mapreduce/interface/errors.h290
-rw-r--r--yt/cpp/mapreduce/interface/finish_or_die.h41
-rw-r--r--yt/cpp/mapreduce/interface/fluent.h678
-rw-r--r--yt/cpp/mapreduce/interface/format.cpp135
-rw-r--r--yt/cpp/mapreduce/interface/format.h122
-rw-r--r--yt/cpp/mapreduce/interface/format_ut.cpp235
-rw-r--r--yt/cpp/mapreduce/interface/fwd.h397
-rw-r--r--yt/cpp/mapreduce/interface/init.h71
-rw-r--r--yt/cpp/mapreduce/interface/io-inl.h1015
-rw-r--r--yt/cpp/mapreduce/interface/io.cpp47
-rw-r--r--yt/cpp/mapreduce/interface/io.h586
-rw-r--r--yt/cpp/mapreduce/interface/job_counters.cpp164
-rw-r--r--yt/cpp/mapreduce/interface/job_counters.h74
-rw-r--r--yt/cpp/mapreduce/interface/job_counters_ut.cpp103
-rw-r--r--yt/cpp/mapreduce/interface/job_statistics.cpp361
-rw-r--r--yt/cpp/mapreduce/interface/job_statistics.h268
-rw-r--r--yt/cpp/mapreduce/interface/job_statistics_ut.cpp257
-rw-r--r--yt/cpp/mapreduce/interface/logging/logger.cpp188
-rw-r--r--yt/cpp/mapreduce/interface/logging/logger.h43
-rw-r--r--yt/cpp/mapreduce/interface/logging/ya.make16
-rw-r--r--yt/cpp/mapreduce/interface/logging/yt_log.cpp126
-rw-r--r--yt/cpp/mapreduce/interface/logging/yt_log.h17
-rw-r--r--yt/cpp/mapreduce/interface/mpl.h73
-rw-r--r--yt/cpp/mapreduce/interface/node.h7
-rw-r--r--yt/cpp/mapreduce/interface/operation-inl.h928
-rw-r--r--yt/cpp/mapreduce/interface/operation.cpp663
-rw-r--r--yt/cpp/mapreduce/interface/operation.h3494
-rw-r--r--yt/cpp/mapreduce/interface/operation_ut.cpp269
-rw-r--r--yt/cpp/mapreduce/interface/proto3_ut.proto17
-rw-r--r--yt/cpp/mapreduce/interface/protobuf_file_options_ut.cpp271
-rw-r--r--yt/cpp/mapreduce/interface/protobuf_file_options_ut.proto142
-rw-r--r--yt/cpp/mapreduce/interface/protobuf_format.cpp1498
-rw-r--r--yt/cpp/mapreduce/interface/protobuf_format.h106
-rw-r--r--yt/cpp/mapreduce/interface/protobuf_table_schema_ut.cpp451
-rw-r--r--yt/cpp/mapreduce/interface/protobuf_table_schema_ut.proto402
-rw-r--r--yt/cpp/mapreduce/interface/public.h10
-rw-r--r--yt/cpp/mapreduce/interface/retry_policy.h47
-rw-r--r--yt/cpp/mapreduce/interface/serialize.cpp553
-rw-r--r--yt/cpp/mapreduce/interface/serialize.h90
-rw-r--r--yt/cpp/mapreduce/interface/serialize_ut.cpp49
-rw-r--r--yt/cpp/mapreduce/interface/skiff_row.cpp1
-rw-r--r--yt/cpp/mapreduce/interface/skiff_row.h127
-rw-r--r--yt/cpp/mapreduce/interface/tvm.cpp1
-rw-r--r--yt/cpp/mapreduce/interface/tvm.h35
-rw-r--r--yt/cpp/mapreduce/interface/ut/ya.make25
-rw-r--r--yt/cpp/mapreduce/interface/wait_proxy.h54
-rw-r--r--yt/cpp/mapreduce/interface/ya.make46
-rw-r--r--yt/cpp/mapreduce/io/counting_raw_reader.cpp38
-rw-r--r--yt/cpp/mapreduce/io/counting_raw_reader.h31
-rw-r--r--yt/cpp/mapreduce/io/helpers.h130
-rw-r--r--yt/cpp/mapreduce/io/job_reader.cpp46
-rw-r--r--yt/cpp/mapreduce/io/job_reader.h38
-rw-r--r--yt/cpp/mapreduce/io/job_writer.cpp68
-rw-r--r--yt/cpp/mapreduce/io/job_writer.h43
-rw-r--r--yt/cpp/mapreduce/io/lenval_table_reader.cpp198
-rw-r--r--yt/cpp/mapreduce/io/lenval_table_reader.h67
-rw-r--r--yt/cpp/mapreduce/io/node_table_reader.cpp375
-rw-r--r--yt/cpp/mapreduce/io/node_table_reader.h91
-rw-r--r--yt/cpp/mapreduce/io/node_table_writer.cpp72
-rw-r--r--yt/cpp/mapreduce/io/node_table_writer.h33
-rw-r--r--yt/cpp/mapreduce/io/proto_helpers.cpp101
-rw-r--r--yt/cpp/mapreduce/io/proto_helpers.h36
-rw-r--r--yt/cpp/mapreduce/io/proto_table_reader.cpp305
-rw-r--r--yt/cpp/mapreduce/io/proto_table_reader.h76
-rw-r--r--yt/cpp/mapreduce/io/proto_table_writer.cpp184
-rw-r--r--yt/cpp/mapreduce/io/proto_table_writer.h61
-rw-r--r--yt/cpp/mapreduce/io/skiff_row_table_reader.cpp232
-rw-r--r--yt/cpp/mapreduce/io/skiff_row_table_reader.h67
-rw-r--r--yt/cpp/mapreduce/io/skiff_table_reader.cpp293
-rw-r--r--yt/cpp/mapreduce/io/skiff_table_reader.h65
-rw-r--r--yt/cpp/mapreduce/io/stream_raw_reader.cpp59
-rw-r--r--yt/cpp/mapreduce/io/stream_table_reader.h65
-rw-r--r--yt/cpp/mapreduce/io/ya.make33
-rw-r--r--yt/cpp/mapreduce/io/yamr_table_reader.cpp145
-rw-r--r--yt/cpp/mapreduce/io/yamr_table_reader.h48
-rw-r--r--yt/cpp/mapreduce/io/yamr_table_writer.cpp53
-rw-r--r--yt/cpp/mapreduce/io/yamr_table_writer.h31
-rw-r--r--yt/cpp/mapreduce/library/table_schema/protobuf.cpp1
-rw-r--r--yt/cpp/mapreduce/library/table_schema/protobuf.h3
-rw-r--r--yt/cpp/mapreduce/library/table_schema/ya.make14
-rw-r--r--yt/cpp/mapreduce/raw_client/raw_batch_request.cpp687
-rw-r--r--yt/cpp/mapreduce/raw_client/raw_batch_request.h190
-rw-r--r--yt/cpp/mapreduce/raw_client/raw_requests.cpp1027
-rw-r--r--yt/cpp/mapreduce/raw_client/raw_requests.h397
-rw-r--r--yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.cpp873
-rw-r--r--yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h231
-rw-r--r--yt/cpp/mapreduce/raw_client/ya.make19
-rw-r--r--yt/cpp/mapreduce/skiff/skiff_schema.h3
-rw-r--r--yt/cpp/mapreduce/skiff/unchecked_parser.h1
-rw-r--r--yt/cpp/mapreduce/skiff/wire_type.h1
-rw-r--r--yt/cpp/mapreduce/skiff/ya.make9
-rw-r--r--yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h194
195 files changed, 43102 insertions, 0 deletions
diff --git a/yt/cpp/mapreduce/client/abortable_registry.cpp b/yt/cpp/mapreduce/client/abortable_registry.cpp
new file mode 100644
index 0000000000..283d39e049
--- /dev/null
+++ b/yt/cpp/mapreduce/client/abortable_registry.cpp
@@ -0,0 +1,125 @@
+#include "abortable_registry.h"
+
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+
+#include <yt/cpp/mapreduce/interface/common.h>
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <util/generic/singleton.h>
+
+namespace NYT {
+namespace NDetail {
+
+using namespace NRawClient;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TTransactionAbortable::TTransactionAbortable(const TClientContext& context, const TTransactionId& transactionId)
+ : Context_(context)
+ , TransactionId_(transactionId)
+{ }
+
+void TTransactionAbortable::Abort()
+{
+ AbortTransaction(nullptr, Context_, TransactionId_);
+}
+
+TString TTransactionAbortable::GetType() const
+{
+ return "transaction";
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TOperationAbortable::TOperationAbortable(IClientRetryPolicyPtr clientRetryPolicy, TClientContext context, const TOperationId& operationId)
+ : ClientRetryPolicy_(std::move(clientRetryPolicy))
+ , Context_(std::move(context))
+ , OperationId_(operationId)
+{ }
+
+
+void TOperationAbortable::Abort()
+{
+ AbortOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, OperationId_);
+}
+
+TString TOperationAbortable::GetType() const
+{
+ return "operation";
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void TAbortableRegistry::AbortAllAndBlockForever()
+{
+ auto guard = Guard(Lock_);
+
+ for (const auto& entry : ActiveAbortables_) {
+ const auto& id = entry.first;
+ const auto& abortable = entry.second;
+ try {
+ abortable->Abort();
+ } catch (std::exception& ex) {
+ YT_LOG_ERROR("Exception while aborting %v %v: %v",
+ abortable->GetType(),
+ id,
+ ex.what());
+ }
+ }
+
+ Running_ = false;
+}
+
+void TAbortableRegistry::Add(const TGUID& id, IAbortablePtr abortable)
+{
+ auto guard = Guard(Lock_);
+
+ if (!Running_) {
+ Sleep(TDuration::Max());
+ }
+
+ ActiveAbortables_[id] = abortable;
+}
+
+void TAbortableRegistry::Remove(const TGUID& id)
+{
+ auto guard = Guard(Lock_);
+
+ if (!Running_) {
+ Sleep(TDuration::Max());
+ }
+
+ ActiveAbortables_.erase(id);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+class TRegistryHolder
+{
+public:
+ TRegistryHolder()
+ : Registry_(::MakeIntrusive<TAbortableRegistry>())
+ { }
+
+ ::TIntrusivePtr<TAbortableRegistry> Get()
+ {
+ return Registry_;
+ }
+
+private:
+ ::TIntrusivePtr<TAbortableRegistry> Registry_;
+};
+
+} // namespace
+
+::TIntrusivePtr<TAbortableRegistry> TAbortableRegistry::Get()
+{
+ return Singleton<TRegistryHolder>()->Get();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/abortable_registry.h b/yt/cpp/mapreduce/client/abortable_registry.h
new file mode 100644
index 0000000000..119d685cad
--- /dev/null
+++ b/yt/cpp/mapreduce/client/abortable_registry.h
@@ -0,0 +1,81 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/common.h>
+
+#include <yt/cpp/mapreduce/http/context.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+
+#include <util/str_stl.h>
+#include <util/system/mutex.h>
+#include <util/generic/hash.h>
+
+namespace NYT {
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class IAbortable
+ : public TThrRefBase
+{
+public:
+ virtual void Abort() = 0;
+ virtual TString GetType() const = 0;
+};
+
+using IAbortablePtr = ::TIntrusivePtr<IAbortable>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TTransactionAbortable
+ : public IAbortable
+{
+public:
+ TTransactionAbortable(const TClientContext& context, const TTransactionId& transactionId);
+ void Abort() override;
+ TString GetType() const override;
+
+private:
+ TClientContext Context_;
+ TTransactionId TransactionId_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TOperationAbortable
+ : public IAbortable
+{
+public:
+ TOperationAbortable(IClientRetryPolicyPtr clientRetryPolicy, TClientContext context, const TOperationId& operationId);
+ void Abort() override;
+ TString GetType() const override;
+
+private:
+ const IClientRetryPolicyPtr ClientRetryPolicy_;
+ const TClientContext Context_;
+ const TOperationId OperationId_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TAbortableRegistry
+ : public TThrRefBase
+{
+public:
+ TAbortableRegistry() = default;
+ static ::TIntrusivePtr<TAbortableRegistry> Get();
+
+ void AbortAllAndBlockForever();
+ void Add(const TGUID& id, IAbortablePtr abortable);
+ void Remove(const TGUID& id);
+
+private:
+ THashMap<TGUID, IAbortablePtr> ActiveAbortables_;
+ TMutex Lock_;
+ bool Running_ = true;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/batch_request_impl.cpp b/yt/cpp/mapreduce/client/batch_request_impl.cpp
new file mode 100644
index 0000000000..6afa5665f1
--- /dev/null
+++ b/yt/cpp/mapreduce/client/batch_request_impl.cpp
@@ -0,0 +1,198 @@
+#include "batch_request_impl.h"
+
+#include "lock.h"
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+
+#include <yt/cpp/mapreduce/http/retry_request.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <library/cpp/yson/node/node.h>
+#include <library/cpp/yson/node/serialize.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+#include <yt/cpp/mapreduce/raw_client/raw_batch_request.h>
+#include <yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h>
+
+#include <util/generic/guid.h>
+#include <util/string/builder.h>
+
+#include <exception>
+
+namespace NYT {
+namespace NDetail {
+
+using namespace NRawClient;
+
+using ::NThreading::TFuture;
+using ::NThreading::TPromise;
+using ::NThreading::NewPromise;
+
+////////////////////////////////////////////////////////////////////
+
+TBatchRequest::TBatchRequest(const TTransactionId& defaultTransaction, ::TIntrusivePtr<TClient> client)
+ : DefaultTransaction_(defaultTransaction)
+ , Impl_(MakeIntrusive<TRawBatchRequest>(client->GetContext().Config))
+ , Client_(client)
+{ }
+
+TBatchRequest::TBatchRequest(TRawBatchRequest* impl, ::TIntrusivePtr<TClient> client)
+ : Impl_(impl)
+ , Client_(std::move(client))
+{ }
+
+TBatchRequest::~TBatchRequest() = default;
+
+IBatchRequestBase& TBatchRequest::WithTransaction(const TTransactionId& transactionId)
+{
+ if (!TmpWithTransaction_) {
+ TmpWithTransaction_.Reset(new TBatchRequest(Impl_.Get(), Client_));
+ }
+ TmpWithTransaction_->DefaultTransaction_ = transactionId;
+ return *TmpWithTransaction_;
+}
+
+TFuture<TNode> TBatchRequest::Get(
+ const TYPath& path,
+ const TGetOptions& options)
+{
+ return Impl_->Get(DefaultTransaction_, path, options);
+}
+
+TFuture<void> TBatchRequest::Set(const TYPath& path, const TNode& node, const TSetOptions& options)
+{
+ return Impl_->Set(DefaultTransaction_, path, node, options);
+}
+
+TFuture<TNode::TListType> TBatchRequest::List(const TYPath& path, const TListOptions& options)
+{
+ return Impl_->List(DefaultTransaction_, path, options);
+}
+
+TFuture<bool> TBatchRequest::Exists(const TYPath& path, const TExistsOptions& options)
+{
+ return Impl_->Exists(DefaultTransaction_, path, options);
+}
+
+TFuture<ILockPtr> TBatchRequest::Lock(
+ const TYPath& path,
+ ELockMode mode,
+ const TLockOptions& options)
+{
+ auto convert = [waitable=options.Waitable_, client=Client_] (TFuture<TNodeId> nodeIdFuture) -> ILockPtr {
+ return ::MakeIntrusive<TLock>(nodeIdFuture.GetValue(), client, waitable);
+ };
+ return Impl_->Lock(DefaultTransaction_, path, mode, options).Apply(convert);
+}
+
+::NThreading::TFuture<void> TBatchRequest::Unlock(
+ const TYPath& path,
+ const TUnlockOptions& options = TUnlockOptions())
+{
+ return Impl_->Unlock(DefaultTransaction_, path, options);
+}
+
+TFuture<TLockId> TBatchRequest::Create(
+ const TYPath& path,
+ ENodeType type,
+ const TCreateOptions& options)
+{
+ return Impl_->Create(DefaultTransaction_, path, type, options);
+}
+
+TFuture<void> TBatchRequest::Remove(
+ const TYPath& path,
+ const TRemoveOptions& options)
+{
+ return Impl_->Remove(DefaultTransaction_, path, options);
+}
+
+TFuture<TNodeId> TBatchRequest::Move(
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TMoveOptions& options)
+{
+ return Impl_->Move(DefaultTransaction_, sourcePath, destinationPath, options);
+}
+
+TFuture<TNodeId> TBatchRequest::Copy(
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TCopyOptions& options)
+{
+ return Impl_->Copy(DefaultTransaction_, sourcePath, destinationPath, options);
+}
+
+TFuture<TNodeId> TBatchRequest::Link(
+ const TYPath& targetPath,
+ const TYPath& linkPath,
+ const TLinkOptions& options)
+{
+ return Impl_->Link(DefaultTransaction_, targetPath, linkPath, options);
+}
+
+TFuture<void> TBatchRequest::AbortOperation(const NYT::TOperationId& operationId)
+{
+ return Impl_->AbortOperation(operationId);
+}
+
+TFuture<void> TBatchRequest::CompleteOperation(const NYT::TOperationId& operationId)
+{
+ return Impl_->CompleteOperation(operationId);
+}
+
+TFuture<void> TBatchRequest::SuspendOperation(
+ const TOperationId& operationId,
+ const TSuspendOperationOptions& options)
+{
+ return Impl_->SuspendOperation(operationId, options);
+}
+
+TFuture<void> TBatchRequest::ResumeOperation(
+ const TOperationId& operationId,
+ const TResumeOperationOptions& options)
+{
+ return Impl_->ResumeOperation(operationId, options);
+}
+
+TFuture<void> TBatchRequest::UpdateOperationParameters(
+ const NYT::TOperationId& operationId,
+ const NYT::TUpdateOperationParametersOptions& options)
+{
+ return Impl_->UpdateOperationParameters(operationId, options);
+}
+
+TFuture<TRichYPath> TBatchRequest::CanonizeYPath(const TRichYPath& path)
+{
+ return Impl_->CanonizeYPath(path);
+}
+
+TFuture<TVector<TTableColumnarStatistics>> TBatchRequest::GetTableColumnarStatistics(
+ const TVector<TRichYPath>& paths,
+ const NYT::TGetTableColumnarStatisticsOptions& options)
+{
+ return Impl_->GetTableColumnarStatistics(DefaultTransaction_, paths, options);
+}
+
+TFuture<TCheckPermissionResponse> TBatchRequest::CheckPermission(
+ const TString& user,
+ EPermission permission,
+ const TYPath& path,
+ const TCheckPermissionOptions& options)
+{
+ return Impl_->CheckPermission(user, permission, path, options);
+}
+
+void TBatchRequest::ExecuteBatch(const TExecuteBatchOptions& options)
+{
+ NYT::NDetail::ExecuteBatch(Client_->GetRetryPolicy()->CreatePolicyForGenericRequest(), Client_->GetContext(), *Impl_, options);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/batch_request_impl.h b/yt/cpp/mapreduce/client/batch_request_impl.h
new file mode 100644
index 0000000000..0a176417b3
--- /dev/null
+++ b/yt/cpp/mapreduce/client/batch_request_impl.h
@@ -0,0 +1,137 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/batch_request.h>
+#include <yt/cpp/mapreduce/interface/fwd.h>
+#include <yt/cpp/mapreduce/interface/node.h>
+
+#include <yt/cpp/mapreduce/http/requests.h>
+
+#include <library/cpp/threading/future/future.h>
+
+#include <util/generic/ptr.h>
+#include <util/generic/deque.h>
+
+#include <exception>
+
+namespace NYT {
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TResponseInfo;
+class TClient;
+using TClientPtr = ::TIntrusivePtr<TClient>;
+
+namespace NRawClient {
+ class TRawBatchRequest;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TBatchRequest
+ : public IBatchRequest
+{
+public:
+ TBatchRequest(const TTransactionId& defaultTransaction, ::TIntrusivePtr<TClient> client);
+
+ ~TBatchRequest();
+
+ virtual IBatchRequestBase& WithTransaction(const TTransactionId& transactionId) override;
+
+ virtual ::NThreading::TFuture<TLockId> Create(
+ const TYPath& path,
+ ENodeType type,
+ const TCreateOptions& options = TCreateOptions()) override;
+
+ virtual ::NThreading::TFuture<void> Remove(
+ const TYPath& path,
+ const TRemoveOptions& options = TRemoveOptions()) override;
+
+ virtual ::NThreading::TFuture<bool> Exists(
+ const TYPath& path,
+ const TExistsOptions& options = TExistsOptions()) override;
+
+ virtual ::NThreading::TFuture<TNode> Get(
+ const TYPath& path,
+ const TGetOptions& options = TGetOptions()) override;
+
+ virtual ::NThreading::TFuture<void> Set(
+ const TYPath& path,
+ const TNode& node,
+ const TSetOptions& options = TSetOptions()) override;
+
+ virtual ::NThreading::TFuture<TNode::TListType> List(
+ const TYPath& path,
+ const TListOptions& options = TListOptions()) override;
+
+ virtual ::NThreading::TFuture<TNodeId> Copy(
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TCopyOptions& options = TCopyOptions()) override;
+
+ virtual ::NThreading::TFuture<TNodeId> Move(
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TMoveOptions& options = TMoveOptions()) override;
+
+ virtual ::NThreading::TFuture<TNodeId> Link(
+ const TYPath& targetPath,
+ const TYPath& linkPath,
+ const TLinkOptions& options = TLinkOptions()) override;
+
+ virtual ::NThreading::TFuture<ILockPtr> Lock(
+ const TYPath& path,
+ ELockMode mode,
+ const TLockOptions& options) override;
+
+ virtual ::NThreading::TFuture<void> Unlock(
+ const TYPath& path,
+ const TUnlockOptions& options) override;
+
+ virtual ::NThreading::TFuture<void> AbortOperation(const TOperationId& operationId) override;
+
+ virtual ::NThreading::TFuture<void> CompleteOperation(const TOperationId& operationId) override;
+
+ ::NThreading::TFuture<void> SuspendOperation(
+ const TOperationId& operationId,
+ const TSuspendOperationOptions& options) override;
+
+ ::NThreading::TFuture<void> ResumeOperation(
+ const TOperationId& operationId,
+ const TResumeOperationOptions& options) override;
+
+ virtual ::NThreading::TFuture<void> UpdateOperationParameters(
+ const TOperationId& operationId,
+ const TUpdateOperationParametersOptions& options) override;
+
+ virtual ::NThreading::TFuture<TRichYPath> CanonizeYPath(const TRichYPath& path) override;
+
+ virtual ::NThreading::TFuture<TVector<TTableColumnarStatistics>> GetTableColumnarStatistics(
+ const TVector<TRichYPath>& paths,
+ const TGetTableColumnarStatisticsOptions& options) override;
+
+ ::NThreading::TFuture<TCheckPermissionResponse> CheckPermission(
+ const TString& user,
+ EPermission permission,
+ const TYPath& path,
+ const TCheckPermissionOptions& options) override;
+
+ virtual void ExecuteBatch(const TExecuteBatchOptions& executeBatch) override;
+
+private:
+ TBatchRequest(NDetail::NRawClient::TRawBatchRequest* impl, ::TIntrusivePtr<TClient> client);
+
+private:
+ TTransactionId DefaultTransaction_;
+ ::TIntrusivePtr<NDetail::NRawClient::TRawBatchRequest> Impl_;
+ THolder<TBatchRequest> TmpWithTransaction_;
+ ::TIntrusivePtr<TClient> Client_;
+
+private:
+ friend class NYT::NDetail::TClient;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/client.cpp b/yt/cpp/mapreduce/client/client.cpp
new file mode 100644
index 0000000000..ca979c5588
--- /dev/null
+++ b/yt/cpp/mapreduce/client/client.cpp
@@ -0,0 +1,1361 @@
+#include "client.h"
+
+#include "batch_request_impl.h"
+#include "client_reader.h"
+#include "client_writer.h"
+#include "file_reader.h"
+#include "file_writer.h"
+#include "format_hints.h"
+#include "lock.h"
+#include "operation.h"
+#include "retry_transaction.h"
+#include "retryful_writer.h"
+#include "transaction.h"
+#include "transaction_pinger.h"
+#include "yt_poller.h"
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+
+#include <yt/cpp/mapreduce/http/helpers.h>
+#include <yt/cpp/mapreduce/http/http.h>
+#include <yt/cpp/mapreduce/http/http_client.h>
+#include <yt/cpp/mapreduce/http/requests.h>
+#include <yt/cpp/mapreduce/http/retry_request.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/client.h>
+#include <yt/cpp/mapreduce/interface/fluent.h>
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+#include <yt/cpp/mapreduce/interface/skiff_row.h>
+
+#include <yt/cpp/mapreduce/io/yamr_table_reader.h>
+#include <yt/cpp/mapreduce/io/yamr_table_writer.h>
+#include <yt/cpp/mapreduce/io/node_table_reader.h>
+#include <yt/cpp/mapreduce/io/node_table_writer.h>
+#include <yt/cpp/mapreduce/io/proto_table_reader.h>
+#include <yt/cpp/mapreduce/io/proto_table_writer.h>
+#include <yt/cpp/mapreduce/io/skiff_row_table_reader.h>
+#include <yt/cpp/mapreduce/io/proto_helpers.h>
+
+#include <yt/cpp/mapreduce/library/table_schema/protobuf.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+#include <yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h>
+
+#include <library/cpp/json/json_reader.h>
+
+#include <util/generic/algorithm.h>
+#include <util/string/type.h>
+#include <util/system/env.h>
+
+#include <exception>
+
+using namespace NYT::NDetail::NRawClient;
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TClientBase::TClientBase(
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ IClientRetryPolicyPtr retryPolicy)
+ : Context_(context)
+ , TransactionId_(transactionId)
+ , ClientRetryPolicy_(std::move(retryPolicy))
+{ }
+
+ITransactionPtr TClientBase::StartTransaction(
+ const TStartTransactionOptions& options)
+{
+ return MakeIntrusive<TTransaction>(GetParentClientImpl(), Context_, TransactionId_, options);
+}
+
+TNodeId TClientBase::Create(
+ const TYPath& path,
+ ENodeType type,
+ const TCreateOptions& options)
+{
+ return NRawClient::Create(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, type, options);
+}
+
+void TClientBase::Remove(
+ const TYPath& path,
+ const TRemoveOptions& options)
+{
+ return NRawClient::Remove(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, options);
+}
+
+bool TClientBase::Exists(
+ const TYPath& path,
+ const TExistsOptions& options)
+{
+ return NRawClient::Exists(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, options);
+}
+
+TNode TClientBase::Get(
+ const TYPath& path,
+ const TGetOptions& options)
+{
+ return NRawClient::Get(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, options);
+}
+
+void TClientBase::Set(
+ const TYPath& path,
+ const TNode& value,
+ const TSetOptions& options)
+{
+ NRawClient::Set(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, value, options);
+}
+
+void TClientBase::MultisetAttributes(
+ const TYPath& path, const TNode::TMapType& value, const TMultisetAttributesOptions& options)
+{
+ NRawClient::MultisetAttributes(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, value, options);
+}
+
+
+TNode::TListType TClientBase::List(
+ const TYPath& path,
+ const TListOptions& options)
+{
+ return NRawClient::List(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, options);
+}
+
+TNodeId TClientBase::Copy(
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TCopyOptions& options)
+{
+ return NRawClient::Copy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, sourcePath, destinationPath, options);
+}
+
+TNodeId TClientBase::Move(
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TMoveOptions& options)
+{
+ return NRawClient::Move(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, sourcePath, destinationPath, options);
+}
+
+TNodeId TClientBase::Link(
+ const TYPath& targetPath,
+ const TYPath& linkPath,
+ const TLinkOptions& options)
+{
+ return NRawClient::Link(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, targetPath, linkPath, options);
+}
+
+void TClientBase::Concatenate(
+ const TVector<TRichYPath>& sourcePaths,
+ const TRichYPath& destinationPath,
+ const TConcatenateOptions& options)
+{
+ std::function<void(ITransactionPtr)> lambda = [&sourcePaths, &destinationPath, &options, this](ITransactionPtr transaction) {
+ if (!options.Append_ && !sourcePaths.empty() && !transaction->Exists(destinationPath.Path_)) {
+ auto typeNode = transaction->Get(CanonizeYPath(sourcePaths.front()).Path_ + "/@type");
+ auto type = FromString<ENodeType>(typeNode.AsString());
+ transaction->Create(destinationPath.Path_, type, TCreateOptions().IgnoreExisting(true));
+ }
+ NRawClient::Concatenate(this->Context_, transaction->GetId(), sourcePaths, destinationPath, options);
+ };
+ RetryTransactionWithPolicy(this, lambda, ClientRetryPolicy_->CreatePolicyForGenericRequest());
+}
+
+TRichYPath TClientBase::CanonizeYPath(const TRichYPath& path)
+{
+ return NRawClient::CanonizeYPath(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, path);
+}
+
+TVector<TTableColumnarStatistics> TClientBase::GetTableColumnarStatistics(
+ const TVector<TRichYPath>& paths,
+ const TGetTableColumnarStatisticsOptions& options)
+{
+ return NRawClient::GetTableColumnarStatistics(
+ ClientRetryPolicy_->CreatePolicyForGenericRequest(),
+ Context_,
+ TransactionId_,
+ paths,
+ options);
+}
+
+TMultiTablePartitions TClientBase::GetTablePartitions(
+ const TVector<TRichYPath>& paths,
+ const TGetTablePartitionsOptions& options)
+{
+ return NRawClient::GetTablePartitions(
+ ClientRetryPolicy_->CreatePolicyForGenericRequest(),
+ Context_,
+ TransactionId_,
+ paths,
+ options);
+}
+
+TMaybe<TYPath> TClientBase::GetFileFromCache(
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TGetFileFromCacheOptions& options)
+{
+ return NRawClient::GetFileFromCache(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, md5Signature, cachePath, options);
+}
+
+TYPath TClientBase::PutFileToCache(
+ const TYPath& filePath,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TPutFileToCacheOptions& options)
+{
+ return NRawClient::PutFileToCache(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, filePath, md5Signature, cachePath, options);
+}
+
+IFileReaderPtr TClientBase::CreateBlobTableReader(
+ const TYPath& path,
+ const TKey& key,
+ const TBlobTableReaderOptions& options)
+{
+ return new TBlobTableReader(
+ path,
+ key,
+ ClientRetryPolicy_,
+ GetTransactionPinger(),
+ Context_,
+ TransactionId_,
+ options);
+}
+
+IFileReaderPtr TClientBase::CreateFileReader(
+ const TRichYPath& path,
+ const TFileReaderOptions& options)
+{
+ return new TFileReader(
+ CanonizeYPath(path),
+ ClientRetryPolicy_,
+ GetTransactionPinger(),
+ Context_,
+ TransactionId_,
+ options);
+}
+
+IFileWriterPtr TClientBase::CreateFileWriter(
+ const TRichYPath& path,
+ const TFileWriterOptions& options)
+{
+ auto realPath = CanonizeYPath(path);
+ if (!NRawClient::Exists(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, realPath.Path_)) {
+ NRawClient::Create(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, realPath.Path_, NT_FILE,
+ TCreateOptions().IgnoreExisting(true));
+ }
+ return new TFileWriter(realPath, ClientRetryPolicy_, GetTransactionPinger(), Context_, TransactionId_, options);
+}
+
+TTableWriterPtr<::google::protobuf::Message> TClientBase::CreateTableWriter(
+ const TRichYPath& path, const ::google::protobuf::Descriptor& descriptor, const TTableWriterOptions& options)
+{
+ const Message* prototype = google::protobuf::MessageFactory::generated_factory()->GetPrototype(&descriptor);
+ return new TTableWriter<::google::protobuf::Message>(CreateProtoWriter(path, options, prototype));
+}
+
+TRawTableReaderPtr TClientBase::CreateRawReader(
+ const TRichYPath& path,
+ const TFormat& format,
+ const TTableReaderOptions& options)
+{
+ return CreateClientReader(path, format, options).Get();
+}
+
+TRawTableWriterPtr TClientBase::CreateRawWriter(
+ const TRichYPath& path,
+ const TFormat& format,
+ const TTableWriterOptions& options)
+{
+ return ::MakeIntrusive<TRetryfulWriter>(
+ ClientRetryPolicy_,
+ GetTransactionPinger(),
+ Context_,
+ TransactionId_,
+ GetWriteTableCommand(Context_.Config->ApiVersion),
+ format,
+ CanonizeYPath(path),
+ options).Get();
+}
+
+IOperationPtr TClientBase::DoMap(
+ const TMapOperationSpec& spec,
+ ::TIntrusivePtr<IStructuredJob> mapper,
+ const TOperationOptions& options)
+{
+ auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl());
+ auto prepareOperation = [
+ this_ = ::TIntrusivePtr(this),
+ operation,
+ spec,
+ mapper,
+ options
+ ] () {
+ ExecuteMap(
+ operation,
+ ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_),
+ spec,
+ mapper,
+ options);
+ };
+ return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options);
+}
+
+IOperationPtr TClientBase::RawMap(
+ const TRawMapOperationSpec& spec,
+ ::TIntrusivePtr<IRawJob> mapper,
+ const TOperationOptions& options)
+{
+ auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl());
+ auto prepareOperation = [
+ this_=::TIntrusivePtr(this),
+ operation,
+ spec,
+ mapper,
+ options
+ ] () {
+ ExecuteRawMap(
+ operation,
+ ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_),
+ spec,
+ mapper,
+ options);
+ };
+ return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options);
+}
+
+IOperationPtr TClientBase::DoReduce(
+ const TReduceOperationSpec& spec,
+ ::TIntrusivePtr<IStructuredJob> reducer,
+ const TOperationOptions& options)
+{
+ auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl());
+ auto prepareOperation = [
+ this_=::TIntrusivePtr(this),
+ operation,
+ spec,
+ reducer,
+ options
+ ] () {
+ ExecuteReduce(
+ operation,
+ ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_),
+ spec,
+ reducer,
+ options);
+ };
+ return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options);
+}
+
+IOperationPtr TClientBase::RawReduce(
+ const TRawReduceOperationSpec& spec,
+ ::TIntrusivePtr<IRawJob> reducer,
+ const TOperationOptions& options)
+{
+ auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl());
+ auto prepareOperation = [
+ this_=::TIntrusivePtr(this),
+ operation,
+ spec,
+ reducer,
+ options
+ ] () {
+ ExecuteRawReduce(
+ operation,
+ ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_),
+ spec,
+ reducer,
+ options);
+ };
+ return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options);
+}
+
+IOperationPtr TClientBase::DoJoinReduce(
+ const TJoinReduceOperationSpec& spec,
+ ::TIntrusivePtr<IStructuredJob> reducer,
+ const TOperationOptions& options)
+{
+ auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl());
+ auto prepareOperation = [
+ this_=::TIntrusivePtr(this),
+ operation,
+ spec,
+ reducer,
+ options
+ ] () {
+ ExecuteJoinReduce(
+ operation,
+ ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_),
+ spec,
+ reducer,
+ options);
+ };
+ return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options);
+}
+
+IOperationPtr TClientBase::RawJoinReduce(
+ const TRawJoinReduceOperationSpec& spec,
+ ::TIntrusivePtr<IRawJob> reducer,
+ const TOperationOptions& options)
+{
+ auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl());
+ auto prepareOperation = [
+ this_=::TIntrusivePtr(this),
+ operation,
+ spec,
+ reducer,
+ options
+ ] () {
+ ExecuteRawJoinReduce(
+ operation,
+ ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_),
+ spec,
+ reducer,
+ options);
+ };
+ return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options);
+}
+
+IOperationPtr TClientBase::DoMapReduce(
+ const TMapReduceOperationSpec& spec,
+ ::TIntrusivePtr<IStructuredJob> mapper,
+ ::TIntrusivePtr<IStructuredJob> reduceCombiner,
+ ::TIntrusivePtr<IStructuredJob> reducer,
+ const TOperationOptions& options)
+{
+ auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl());
+ auto prepareOperation = [
+ this_=::TIntrusivePtr(this),
+ operation,
+ spec,
+ mapper,
+ reduceCombiner,
+ reducer,
+ options
+ ] () {
+ ExecuteMapReduce(
+ operation,
+ ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_),
+ spec,
+ mapper,
+ reduceCombiner,
+ reducer,
+ options);
+ };
+ return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options);
+}
+
+IOperationPtr TClientBase::RawMapReduce(
+ const TRawMapReduceOperationSpec& spec,
+ ::TIntrusivePtr<IRawJob> mapper,
+ ::TIntrusivePtr<IRawJob> reduceCombiner,
+ ::TIntrusivePtr<IRawJob> reducer,
+ const TOperationOptions& options)
+{
+ auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl());
+ auto prepareOperation = [
+ this_=::TIntrusivePtr(this),
+ operation,
+ spec,
+ mapper,
+ reduceCombiner,
+ reducer,
+ options
+ ] () {
+ ExecuteRawMapReduce(
+ operation,
+ ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_),
+ spec,
+ mapper,
+ reduceCombiner,
+ reducer,
+ options);
+ };
+ return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options);
+}
+
+IOperationPtr TClientBase::Sort(
+ const TSortOperationSpec& spec,
+ const TOperationOptions& options)
+{
+ auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl());
+ auto prepareOperation = [
+ this_ = ::TIntrusivePtr(this),
+ operation,
+ spec,
+ options
+ ] () {
+ ExecuteSort(
+ operation,
+ ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_),
+ spec,
+ options);
+ };
+ return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options);
+}
+
+IOperationPtr TClientBase::Merge(
+ const TMergeOperationSpec& spec,
+ const TOperationOptions& options)
+{
+ auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl());
+ auto prepareOperation = [
+ this_ = ::TIntrusivePtr(this),
+ operation,
+ spec,
+ options
+ ] () {
+ ExecuteMerge(
+ operation,
+ ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_),
+ spec,
+ options);
+ };
+ return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options);
+}
+
+IOperationPtr TClientBase::Erase(
+ const TEraseOperationSpec& spec,
+ const TOperationOptions& options)
+{
+ auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl());
+ auto prepareOperation = [
+ this_ = ::TIntrusivePtr(this),
+ operation,
+ spec,
+ options
+ ] () {
+ ExecuteErase(
+ operation,
+ ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_),
+ spec,
+ options);
+ };
+ return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options);
+}
+
+IOperationPtr TClientBase::RemoteCopy(
+ const TRemoteCopyOperationSpec& spec,
+ const TOperationOptions& options)
+{
+ auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl());
+ auto prepareOperation = [
+ this_ = ::TIntrusivePtr(this),
+ operation,
+ spec,
+ options
+ ] () {
+ ExecuteRemoteCopy(
+ operation,
+ ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_),
+ spec,
+ options);
+ };
+ return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options);
+}
+
+IOperationPtr TClientBase::RunVanilla(
+ const TVanillaOperationSpec& spec,
+ const TOperationOptions& options)
+{
+ auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl());
+ auto prepareOperation = [
+ this_ = ::TIntrusivePtr(this),
+ operation,
+ spec,
+ options
+ ] () {
+ ExecuteVanilla(
+ operation,
+ ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_),
+ spec,
+ options);
+ };
+ return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options);
+}
+
+IOperationPtr TClientBase::AttachOperation(const TOperationId& operationId)
+{
+ auto operation = ::MakeIntrusive<TOperation>(operationId, GetParentClientImpl());
+ operation->GetBriefState(); // check that operation exists
+ return operation;
+}
+
+EOperationBriefState TClientBase::CheckOperation(const TOperationId& operationId)
+{
+ return NYT::NDetail::CheckOperation(ClientRetryPolicy_, Context_, operationId);
+}
+
+void TClientBase::AbortOperation(const TOperationId& operationId)
+{
+ NRawClient::AbortOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId);
+}
+
+void TClientBase::CompleteOperation(const TOperationId& operationId)
+{
+ NRawClient::CompleteOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId);
+}
+
+void TClientBase::WaitForOperation(const TOperationId& operationId)
+{
+ NYT::NDetail::WaitForOperation(ClientRetryPolicy_, Context_, operationId);
+}
+
+void TClientBase::AlterTable(
+ const TYPath& path,
+ const TAlterTableOptions& options)
+{
+ NRawClient::AlterTable(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, options);
+}
+
+::TIntrusivePtr<TClientReader> TClientBase::CreateClientReader(
+ const TRichYPath& path,
+ const TFormat& format,
+ const TTableReaderOptions& options,
+ bool useFormatFromTableAttributes)
+{
+ return ::MakeIntrusive<TClientReader>(
+ CanonizeYPath(path),
+ ClientRetryPolicy_,
+ GetTransactionPinger(),
+ Context_,
+ TransactionId_,
+ format,
+ options,
+ useFormatFromTableAttributes);
+}
+
+THolder<TClientWriter> TClientBase::CreateClientWriter(
+ const TRichYPath& path,
+ const TFormat& format,
+ const TTableWriterOptions& options)
+{
+ auto realPath = CanonizeYPath(path);
+ if (!NRawClient::Exists(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, realPath.Path_)) {
+ NRawClient::Create(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, realPath.Path_, NT_TABLE,
+ TCreateOptions().IgnoreExisting(true));
+ }
+ return MakeHolder<TClientWriter>(
+ realPath,
+ ClientRetryPolicy_,
+ GetTransactionPinger(),
+ Context_,
+ TransactionId_,
+ format,
+ options
+ );
+}
+
+::TIntrusivePtr<INodeReaderImpl> TClientBase::CreateNodeReader(
+ const TRichYPath& path, const TTableReaderOptions& options)
+{
+ auto format = TFormat::YsonBinary();
+ ApplyFormatHints<TNode>(&format, options.FormatHints_);
+
+ // Skiff is disabled here because of large header problem (see https://st.yandex-team.ru/YT-6926).
+ // Revert this code to r3614168 when it is fixed.
+ return new TNodeTableReader(
+ CreateClientReader(path, format, options));
+}
+
+::TIntrusivePtr<IYaMRReaderImpl> TClientBase::CreateYaMRReader(
+ const TRichYPath& path, const TTableReaderOptions& options)
+{
+ return new TYaMRTableReader(
+ CreateClientReader(path, TFormat::YaMRLenval(), options, /* useFormatFromTableAttributes = */ true));
+}
+
+::TIntrusivePtr<IProtoReaderImpl> TClientBase::CreateProtoReader(
+ const TRichYPath& path,
+ const TTableReaderOptions& options,
+ const Message* prototype)
+{
+ TVector<const ::google::protobuf::Descriptor*> descriptors;
+ descriptors.push_back(prototype->GetDescriptor());
+
+ if (Context_.Config->UseClientProtobuf) {
+ return new TProtoTableReader(
+ CreateClientReader(path, TFormat::YsonBinary(), options),
+ std::move(descriptors));
+ } else {
+ auto format = TFormat::Protobuf({prototype->GetDescriptor()}, Context_.Config->ProtobufFormatWithDescriptors);
+ return new TLenvalProtoTableReader(
+ CreateClientReader(path, format, options),
+ std::move(descriptors));
+ }
+}
+
+::TIntrusivePtr<ISkiffRowReaderImpl> TClientBase::CreateSkiffRowReader(
+ const TRichYPath& path,
+ const TTableReaderOptions& options,
+ const ISkiffRowSkipperPtr& skipper,
+ const NSkiff::TSkiffSchemaPtr& schema)
+{
+ auto skiffOptions = TCreateSkiffSchemaOptions().HasRangeIndex(true);
+ auto resultSchema = NYT::NDetail::CreateSkiffSchema(TVector{schema}, skiffOptions);
+ return new TSkiffRowTableReader(
+ CreateClientReader(path, NYT::NDetail::CreateSkiffFormat(resultSchema), options),
+ resultSchema,
+ {skipper},
+ std::move(skiffOptions));
+}
+
+::TIntrusivePtr<INodeWriterImpl> TClientBase::CreateNodeWriter(
+ const TRichYPath& path, const TTableWriterOptions& options)
+{
+ auto format = TFormat::YsonBinary();
+ ApplyFormatHints<TNode>(&format, options.FormatHints_);
+
+ return new TNodeTableWriter(
+ CreateClientWriter(path, format, options));
+}
+
+::TIntrusivePtr<IYaMRWriterImpl> TClientBase::CreateYaMRWriter(
+ const TRichYPath& path, const TTableWriterOptions& options)
+{
+ auto format = TFormat::YaMRLenval();
+ ApplyFormatHints<TYaMRRow>(&format, options.FormatHints_);
+
+ return new TYaMRTableWriter(
+ CreateClientWriter(path, format, options));
+}
+
+::TIntrusivePtr<IProtoWriterImpl> TClientBase::CreateProtoWriter(
+ const TRichYPath& path,
+ const TTableWriterOptions& options,
+ const Message* prototype)
+{
+ TVector<const ::google::protobuf::Descriptor*> descriptors;
+ descriptors.push_back(prototype->GetDescriptor());
+
+ auto pathWithSchema = path;
+ if (options.InferSchema_.GetOrElse(Context_.Config->InferTableSchema) && !path.Schema_) {
+ pathWithSchema.Schema(CreateTableSchema(*prototype->GetDescriptor()));
+ }
+
+ if (Context_.Config->UseClientProtobuf) {
+ auto format = TFormat::YsonBinary();
+ ApplyFormatHints<TNode>(&format, options.FormatHints_);
+ return new TProtoTableWriter(
+ CreateClientWriter(pathWithSchema, format, options),
+ std::move(descriptors));
+ } else {
+ auto format = TFormat::Protobuf({prototype->GetDescriptor()}, Context_.Config->ProtobufFormatWithDescriptors);
+ ApplyFormatHints<::google::protobuf::Message>(&format, options.FormatHints_);
+ return new TLenvalProtoTableWriter(
+ CreateClientWriter(pathWithSchema, format, options),
+ std::move(descriptors));
+ }
+}
+
+TBatchRequestPtr TClientBase::CreateBatchRequest()
+{
+ return MakeIntrusive<TBatchRequest>(TransactionId_, GetParentClientImpl());
+}
+
+IClientPtr TClientBase::GetParentClient()
+{
+ return GetParentClientImpl();
+}
+
+const TClientContext& TClientBase::GetContext() const
+{
+ return Context_;
+}
+
+const IClientRetryPolicyPtr& TClientBase::GetRetryPolicy() const
+{
+ return ClientRetryPolicy_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TTransaction::TTransaction(
+ TClientPtr parentClient,
+ const TClientContext& context,
+ const TTransactionId& parentTransactionId,
+ const TStartTransactionOptions& options)
+ : TClientBase(context, parentTransactionId, parentClient->GetRetryPolicy())
+ , TransactionPinger_(parentClient->GetTransactionPinger())
+ , PingableTx_(
+ MakeHolder<TPingableTransaction>(
+ parentClient->GetRetryPolicy(),
+ context,
+ parentTransactionId,
+ TransactionPinger_->GetChildTxPinger(),
+ options))
+ , ParentClient_(parentClient)
+{
+ TransactionId_ = PingableTx_->GetId();
+}
+
+TTransaction::TTransaction(
+ TClientPtr parentClient,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TAttachTransactionOptions& options)
+ : TClientBase(context, transactionId, parentClient->GetRetryPolicy())
+ , TransactionPinger_(parentClient->GetTransactionPinger())
+ , PingableTx_(
+ new TPingableTransaction(
+ parentClient->GetRetryPolicy(),
+ context,
+ transactionId,
+ parentClient->GetTransactionPinger()->GetChildTxPinger(),
+ options))
+ , ParentClient_(parentClient)
+{ }
+
+const TTransactionId& TTransaction::GetId() const
+{
+ return TransactionId_;
+}
+
+ILockPtr TTransaction::Lock(
+ const TYPath& path,
+ ELockMode mode,
+ const TLockOptions& options)
+{
+ auto lockId = NRawClient::Lock(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, mode, options);
+ return ::MakeIntrusive<TLock>(lockId, GetParentClientImpl(), options.Waitable_);
+}
+
+void TTransaction::Unlock(
+ const TYPath& path,
+ const TUnlockOptions& options)
+{
+ NRawClient::Unlock(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, options);
+}
+
+void TTransaction::Commit()
+{
+ PingableTx_->Commit();
+}
+
+void TTransaction::Abort()
+{
+ PingableTx_->Abort();
+}
+
+void TTransaction::Ping()
+{
+ PingTx(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_);
+}
+
+void TTransaction::Detach()
+{
+ PingableTx_->Detach();
+}
+
+ITransactionPingerPtr TTransaction::GetTransactionPinger()
+{
+ return TransactionPinger_;
+}
+
+TClientPtr TTransaction::GetParentClientImpl()
+{
+ return ParentClient_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TClient::TClient(
+ const TClientContext& context,
+ const TTransactionId& globalId,
+ IClientRetryPolicyPtr retryPolicy)
+ : TClientBase(context, globalId, retryPolicy)
+ , TransactionPinger_(nullptr)
+{ }
+
+TClient::~TClient() = default;
+
+ITransactionPtr TClient::AttachTransaction(
+ const TTransactionId& transactionId,
+ const TAttachTransactionOptions& options)
+{
+ CheckShutdown();
+
+ return MakeIntrusive<TTransaction>(this, Context_, transactionId, options);
+}
+
+void TClient::MountTable(
+ const TYPath& path,
+ const TMountTableOptions& options)
+{
+ CheckShutdown();
+
+ THttpHeader header("POST", "mount_table");
+ SetTabletParams(header, path, options);
+ if (options.CellId_) {
+ header.AddParameter("cell_id", GetGuidAsString(*options.CellId_));
+ }
+ header.AddParameter("freeze", options.Freeze_);
+ RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header);
+}
+
+void TClient::UnmountTable(
+ const TYPath& path,
+ const TUnmountTableOptions& options)
+{
+ CheckShutdown();
+
+ THttpHeader header("POST", "unmount_table");
+ SetTabletParams(header, path, options);
+ header.AddParameter("force", options.Force_);
+ RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header);
+}
+
+void TClient::RemountTable(
+ const TYPath& path,
+ const TRemountTableOptions& options)
+{
+ CheckShutdown();
+
+ THttpHeader header("POST", "remount_table");
+ SetTabletParams(header, path, options);
+ RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header);
+}
+
+void TClient::FreezeTable(
+ const TYPath& path,
+ const TFreezeTableOptions& options)
+{
+ CheckShutdown();
+ NRawClient::FreezeTable(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, path, options);
+}
+
+void TClient::UnfreezeTable(
+ const TYPath& path,
+ const TUnfreezeTableOptions& options)
+{
+ CheckShutdown();
+ NRawClient::UnfreezeTable(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, path, options);
+}
+
+void TClient::ReshardTable(
+ const TYPath& path,
+ const TVector<TKey>& keys,
+ const TReshardTableOptions& options)
+{
+ CheckShutdown();
+
+ THttpHeader header("POST", "reshard_table");
+ SetTabletParams(header, path, options);
+ header.AddParameter("pivot_keys", BuildYsonNodeFluently().List(keys));
+ RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header);
+}
+
+void TClient::ReshardTable(
+ const TYPath& path,
+ i64 tabletCount,
+ const TReshardTableOptions& options)
+{
+ CheckShutdown();
+
+ THttpHeader header("POST", "reshard_table");
+ SetTabletParams(header, path, options);
+ header.AddParameter("tablet_count", tabletCount);
+ RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header);
+}
+
+void TClient::InsertRows(
+ const TYPath& path,
+ const TNode::TListType& rows,
+ const TInsertRowsOptions& options)
+{
+ CheckShutdown();
+
+ THttpHeader header("PUT", "insert_rows");
+ header.SetInputFormat(TFormat::YsonBinary());
+ // TODO: use corresponding raw request
+ header.MergeParameters(SerializeParametersForInsertRows(Context_.Config->Prefix, path, options));
+
+ auto body = NodeListToYsonString(rows);
+ TRequestConfig config;
+ config.IsHeavy = true;
+ RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header, body, config);
+}
+
+void TClient::DeleteRows(
+ const TYPath& path,
+ const TNode::TListType& keys,
+ const TDeleteRowsOptions& options)
+{
+ CheckShutdown();
+ return NRawClient::DeleteRows(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, path, keys, options);
+}
+
+void TClient::TrimRows(
+ const TYPath& path,
+ i64 tabletIndex,
+ i64 rowCount,
+ const TTrimRowsOptions& options)
+{
+ CheckShutdown();
+
+ THttpHeader header("POST", "trim_rows");
+ header.AddParameter("trimmed_row_count", rowCount);
+ header.AddParameter("tablet_index", tabletIndex);
+ // TODO: use corresponding raw request
+ header.MergeParameters(NRawClient::SerializeParametersForTrimRows(Context_.Config->Prefix, path, options));
+
+ TRequestConfig config;
+ config.IsHeavy = true;
+ RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header, {}, config);
+}
+
+TNode::TListType TClient::LookupRows(
+ const TYPath& path,
+ const TNode::TListType& keys,
+ const TLookupRowsOptions& options)
+{
+ CheckShutdown();
+
+ Y_UNUSED(options);
+ THttpHeader header("PUT", "lookup_rows");
+ header.AddPath(AddPathPrefix(path, Context_.Config->ApiVersion));
+ header.SetInputFormat(TFormat::YsonBinary());
+ header.SetOutputFormat(TFormat::YsonBinary());
+
+ header.MergeParameters(BuildYsonNodeFluently().BeginMap()
+ .DoIf(options.Timeout_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("timeout").Value(static_cast<i64>(options.Timeout_->MilliSeconds()));
+ })
+ .Item("keep_missing_rows").Value(options.KeepMissingRows_)
+ .DoIf(options.Versioned_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("versioned").Value(*options.Versioned_);
+ })
+ .DoIf(options.Columns_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("column_names").Value(*options.Columns_);
+ })
+ .EndMap());
+
+ auto body = NodeListToYsonString(keys);
+ TRequestConfig config;
+ config.IsHeavy = true;
+ auto result = RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header, body, config);
+ return NodeFromYsonString(result.Response, ::NYson::EYsonType::ListFragment).AsList();
+}
+
+TNode::TListType TClient::SelectRows(
+ const TString& query,
+ const TSelectRowsOptions& options)
+{
+ CheckShutdown();
+
+ THttpHeader header("GET", "select_rows");
+ header.SetInputFormat(TFormat::YsonBinary());
+ header.SetOutputFormat(TFormat::YsonBinary());
+
+ header.MergeParameters(BuildYsonNodeFluently().BeginMap()
+ .Item("query").Value(query)
+ .DoIf(options.Timeout_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("timeout").Value(static_cast<i64>(options.Timeout_->MilliSeconds()));
+ })
+ .DoIf(options.InputRowLimit_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("input_row_limit").Value(*options.InputRowLimit_);
+ })
+ .DoIf(options.OutputRowLimit_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("output_row_limit").Value(*options.OutputRowLimit_);
+ })
+ .Item("range_expansion_limit").Value(options.RangeExpansionLimit_)
+ .Item("fail_on_incomplete_result").Value(options.FailOnIncompleteResult_)
+ .Item("verbose_logging").Value(options.VerboseLogging_)
+ .Item("enable_code_cache").Value(options.EnableCodeCache_)
+ .EndMap());
+
+ TRequestConfig config;
+ config.IsHeavy = true;
+ auto result = RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header, {}, config);
+ return NodeFromYsonString(result.Response, ::NYson::EYsonType::ListFragment).AsList();
+}
+
+void TClient::AlterTableReplica(const TReplicaId& replicaId, const TAlterTableReplicaOptions& options)
+{
+ CheckShutdown();
+ NRawClient::AlterTableReplica(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, replicaId, options);
+}
+
+ui64 TClient::GenerateTimestamp()
+{
+ CheckShutdown();
+ THttpHeader header("GET", "generate_timestamp");
+ TRequestConfig config;
+ config.IsHeavy = true;
+ auto requestResult = RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header, {}, config);
+ return NodeFromYsonString(requestResult.Response).AsUint64();
+}
+
+TAuthorizationInfo TClient::WhoAmI()
+{
+ CheckShutdown();
+
+ THttpHeader header("GET", "auth/whoami", /* isApi = */ false);
+ auto requestResult = RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header);
+ TAuthorizationInfo result;
+
+ NJson::TJsonValue jsonValue;
+ bool ok = NJson::ReadJsonTree(requestResult.Response, &jsonValue, /* throwOnError = */ true);
+ Y_VERIFY(ok);
+ result.Login = jsonValue["login"].GetString();
+ result.Realm = jsonValue["realm"].GetString();
+ return result;
+}
+
+TOperationAttributes TClient::GetOperation(
+ const TOperationId& operationId,
+ const TGetOperationOptions& options)
+{
+ CheckShutdown();
+ return NRawClient::GetOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId, options);
+}
+
+TListOperationsResult TClient::ListOperations(
+ const TListOperationsOptions& options)
+{
+ CheckShutdown();
+ return NRawClient::ListOperations(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, options);
+}
+
+void TClient::UpdateOperationParameters(
+ const TOperationId& operationId,
+ const TUpdateOperationParametersOptions& options)
+{
+ CheckShutdown();
+ return NRawClient::UpdateOperationParameters(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId, options);
+}
+
+TJobAttributes TClient::GetJob(
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobOptions& options)
+{
+ CheckShutdown();
+ return NRawClient::GetJob(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId, jobId, options);
+}
+
+TListJobsResult TClient::ListJobs(
+ const TOperationId& operationId,
+ const TListJobsOptions& options)
+{
+ CheckShutdown();
+ return NRawClient::ListJobs(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId, options);
+}
+
+IFileReaderPtr TClient::GetJobInput(
+ const TJobId& jobId,
+ const TGetJobInputOptions& options)
+{
+ CheckShutdown();
+ return NRawClient::GetJobInput(Context_, jobId, options);
+}
+
+IFileReaderPtr TClient::GetJobFailContext(
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobFailContextOptions& options)
+{
+ CheckShutdown();
+ return NRawClient::GetJobFailContext(Context_, operationId, jobId, options);
+}
+
+IFileReaderPtr TClient::GetJobStderr(
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobStderrOptions& options)
+{
+ CheckShutdown();
+ return NRawClient::GetJobStderr(Context_, operationId, jobId, options);
+}
+
+TNode::TListType TClient::SkyShareTable(
+ const std::vector<TYPath>& tablePaths,
+ const TSkyShareTableOptions& options)
+{
+ CheckShutdown();
+ return NRawClient::SkyShareTable(
+ ClientRetryPolicy_->CreatePolicyForGenericRequest(),
+ Context_,
+ tablePaths,
+ options);
+}
+
+TCheckPermissionResponse TClient::CheckPermission(
+ const TString& user,
+ EPermission permission,
+ const TYPath& path,
+ const TCheckPermissionOptions& options)
+{
+ CheckShutdown();
+ return NRawClient::CheckPermission(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, user, permission, path, options);
+}
+
+TVector<TTabletInfo> TClient::GetTabletInfos(
+ const TYPath& path,
+ const TVector<int>& tabletIndexes,
+ const TGetTabletInfosOptions& options)
+{
+ CheckShutdown();
+ return NRawClient::GetTabletInfos(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, path, tabletIndexes, options);
+}
+
+
+void TClient::SuspendOperation(
+ const TOperationId& operationId,
+ const TSuspendOperationOptions& options)
+{
+ CheckShutdown();
+ NRawClient::SuspendOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId, options);
+}
+
+void TClient::ResumeOperation(
+ const TOperationId& operationId,
+ const TResumeOperationOptions& options)
+{
+ CheckShutdown();
+ NRawClient::ResumeOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId, options);
+}
+
+TYtPoller& TClient::GetYtPoller()
+{
+ auto g = Guard(YtPollerLock_);
+ if (!YtPoller_) {
+ CheckShutdown();
+ // We don't use current client and create new client because YtPoller_ might use
+ // this client during current client shutdown.
+ // That might lead to incrementing of current client refcount and double delete of current client object.
+ YtPoller_ = MakeHolder<TYtPoller>(Context_, ClientRetryPolicy_);
+ }
+ return *YtPoller_;
+}
+
+void TClient::Shutdown()
+{
+ auto g = Guard(YtPollerLock_);
+
+ if (!Shutdown_.exchange(true) && YtPoller_) {
+ YtPoller_->Stop();
+ }
+}
+
+ITransactionPingerPtr TClient::GetTransactionPinger()
+{
+ if (!TransactionPinger_) {
+ TransactionPinger_ = CreateTransactionPinger(Context_.Config);
+ }
+ return TransactionPinger_;
+}
+
+TClientPtr TClient::GetParentClientImpl()
+{
+ return this;
+}
+
+template <class TOptions>
+void TClient::SetTabletParams(
+ THttpHeader& header,
+ const TYPath& path,
+ const TOptions& options)
+{
+ header.AddPath(AddPathPrefix(path, Context_.Config->Prefix));
+ if (options.FirstTabletIndex_) {
+ header.AddParameter("first_tablet_index", *options.FirstTabletIndex_);
+ }
+ if (options.LastTabletIndex_) {
+ header.AddParameter("last_tablet_index", *options.LastTabletIndex_);
+ }
+}
+
+void TClient::CheckShutdown() const
+{
+ if (Shutdown_) {
+ ythrow TApiUsageError() << "Call client's methods after shutdown";
+ }
+}
+
+TClientPtr CreateClientImpl(
+ const TString& serverName,
+ const TCreateClientOptions& options)
+{
+ TClientContext context;
+ context.Config = options.Config_ ? options.Config_ : TConfig::Get();
+ context.TvmOnly = options.TvmOnly_;
+ context.UseTLS = options.UseTLS_;
+
+ context.ServerName = serverName;
+ if (serverName.find('.') == TString::npos &&
+ serverName.find(':') == TString::npos)
+ {
+ context.ServerName += ".yt.yandex.net";
+ }
+
+ if (serverName.find(':') == TString::npos) {
+ context.ServerName = CreateHostNameWithPort(context.ServerName, context);
+ }
+ if (options.TvmOnly_) {
+ context.ServerName = Format("tvm.%v", context.ServerName);
+ }
+
+ if (options.UseTLS_ || options.UseCoreHttpClient_) {
+ context.HttpClient = NHttpClient::CreateCoreHttpClient(options.UseTLS_, context.Config);
+ } else {
+ context.HttpClient = NHttpClient::CreateDefaultHttpClient();
+ }
+
+ context.Token = context.Config->Token;
+ if (options.Token_) {
+ context.Token = options.Token_;
+ } else if (options.TokenPath_) {
+ context.Token = TConfig::LoadTokenFromFile(options.TokenPath_);
+ } else if (options.ServiceTicketAuth_) {
+ context.ServiceTicketAuth = options.ServiceTicketAuth_;
+ }
+
+ context.ImpersonationUser = options.ImpersonationUser_;
+
+ if (context.Token) {
+ TConfig::ValidateToken(context.Token);
+ }
+
+ auto globalTxId = GetGuid(context.Config->GlobalTxId);
+
+ auto retryConfigProvider = options.RetryConfigProvider_;
+ if (!retryConfigProvider) {
+ retryConfigProvider = CreateDefaultRetryConfigProvider();
+ }
+ return new NDetail::TClient(context, globalTxId, CreateDefaultClientRetryPolicy(retryConfigProvider, context.Config));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+
+////////////////////////////////////////////////////////////////////////////////
+
+IClientPtr CreateClient(
+ const TString& serverName,
+ const TCreateClientOptions& options)
+{
+ return NDetail::CreateClientImpl(serverName, options);
+}
+
+IClientPtr CreateClientFromEnv(const TCreateClientOptions& options)
+{
+ auto serverName = GetEnv("YT_PROXY");
+ if (!serverName) {
+ ythrow yexception() << "YT_PROXY is not set";
+ }
+
+ return NDetail::CreateClientImpl(serverName, options);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/client.h b/yt/cpp/mapreduce/client/client.h
new file mode 100644
index 0000000000..0f4df09d0b
--- /dev/null
+++ b/yt/cpp/mapreduce/client/client.h
@@ -0,0 +1,506 @@
+#pragma once
+
+#include "client_reader.h"
+#include "client_writer.h"
+#include "transaction_pinger.h"
+
+#include <yt/cpp/mapreduce/interface/client.h>
+
+#include <yt/cpp/mapreduce/http/context.h>
+#include <yt/cpp/mapreduce/http/requests.h>
+
+namespace NYT {
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TYtPoller;
+
+class TClientBase;
+using TClientBasePtr = ::TIntrusivePtr<TClientBase>;
+
+class TClient;
+using TClientPtr = ::TIntrusivePtr<TClient>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TClientBase
+ : virtual public IClientBase
+{
+public:
+ TClientBase(
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ IClientRetryPolicyPtr retryPolicy);
+
+ ITransactionPtr StartTransaction(
+ const TStartTransactionOptions& options) override;
+
+ // cypress
+
+ TNodeId Create(
+ const TYPath& path,
+ ENodeType type,
+ const TCreateOptions& options) override;
+
+ void Remove(
+ const TYPath& path,
+ const TRemoveOptions& options) override;
+
+ bool Exists(
+ const TYPath& path,
+ const TExistsOptions& options) override;
+
+ TNode Get(
+ const TYPath& path,
+ const TGetOptions& options) override;
+
+ void Set(
+ const TYPath& path,
+ const TNode& value,
+ const TSetOptions& options) override;
+
+ void MultisetAttributes(
+ const TYPath& path,
+ const TNode::TMapType& value,
+ const TMultisetAttributesOptions& options) override;
+
+ TNode::TListType List(
+ const TYPath& path,
+ const TListOptions& options) override;
+
+ TNodeId Copy(
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TCopyOptions& options) override;
+
+ TNodeId Move(
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TMoveOptions& options) override;
+
+ TNodeId Link(
+ const TYPath& targetPath,
+ const TYPath& linkPath,
+ const TLinkOptions& options) override;
+
+ void Concatenate(
+ const TVector<TRichYPath>& sourcePaths,
+ const TRichYPath& destinationPath,
+ const TConcatenateOptions& options) override;
+
+ TRichYPath CanonizeYPath(const TRichYPath& path) override;
+
+ TVector<TTableColumnarStatistics> GetTableColumnarStatistics(
+ const TVector<TRichYPath>& paths,
+ const TGetTableColumnarStatisticsOptions& options) override;
+
+ TMultiTablePartitions GetTablePartitions(
+ const TVector<TRichYPath>& paths,
+ const TGetTablePartitionsOptions& options) override;
+
+ TMaybe<TYPath> GetFileFromCache(
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TGetFileFromCacheOptions& options = TGetFileFromCacheOptions()) override;
+
+ TYPath PutFileToCache(
+ const TYPath& filePath,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TPutFileToCacheOptions& options = TPutFileToCacheOptions()) override;
+
+ IFileReaderPtr CreateFileReader(
+ const TRichYPath& path,
+ const TFileReaderOptions& options) override;
+
+ IFileWriterPtr CreateFileWriter(
+ const TRichYPath& path,
+ const TFileWriterOptions& options) override;
+
+ TTableWriterPtr<::google::protobuf::Message> CreateTableWriter(
+ const TRichYPath& path,
+ const ::google::protobuf::Descriptor& descriptor,
+ const TTableWriterOptions& options) override;
+
+ TRawTableReaderPtr CreateRawReader(
+ const TRichYPath& path,
+ const TFormat& format,
+ const TTableReaderOptions& options) override;
+
+ TRawTableWriterPtr CreateRawWriter(
+ const TRichYPath& path,
+ const TFormat& format,
+ const TTableWriterOptions& options) override;
+
+ IFileReaderPtr CreateBlobTableReader(
+ const TYPath& path,
+ const TKey& key,
+ const TBlobTableReaderOptions& options) override;
+
+ // operations
+
+ IOperationPtr DoMap(
+ const TMapOperationSpec& spec,
+ ::TIntrusivePtr<IStructuredJob> mapper,
+ const TOperationOptions& options) override;
+
+ IOperationPtr RawMap(
+ const TRawMapOperationSpec& spec,
+ ::TIntrusivePtr<IRawJob> mapper,
+ const TOperationOptions& options) override;
+
+ IOperationPtr DoReduce(
+ const TReduceOperationSpec& spec,
+ ::TIntrusivePtr<IStructuredJob> reducer,
+ const TOperationOptions& options) override;
+
+ IOperationPtr RawReduce(
+ const TRawReduceOperationSpec& spec,
+ ::TIntrusivePtr<IRawJob> mapper,
+ const TOperationOptions& options) override;
+
+ IOperationPtr DoJoinReduce(
+ const TJoinReduceOperationSpec& spec,
+ ::TIntrusivePtr<IStructuredJob> reducer,
+ const TOperationOptions& options) override;
+
+ IOperationPtr RawJoinReduce(
+ const TRawJoinReduceOperationSpec& spec,
+ ::TIntrusivePtr<IRawJob> mapper,
+ const TOperationOptions& options) override;
+
+ IOperationPtr DoMapReduce(
+ const TMapReduceOperationSpec& spec,
+ ::TIntrusivePtr<IStructuredJob> mapper,
+ ::TIntrusivePtr<IStructuredJob> reduceCombiner,
+ ::TIntrusivePtr<IStructuredJob> reducer,
+ const TOperationOptions& options) override;
+
+ IOperationPtr RawMapReduce(
+ const TRawMapReduceOperationSpec& spec,
+ ::TIntrusivePtr<IRawJob> mapper,
+ ::TIntrusivePtr<IRawJob> reduceCombiner,
+ ::TIntrusivePtr<IRawJob> reducer,
+ const TOperationOptions& options) override;
+
+ IOperationPtr Sort(
+ const TSortOperationSpec& spec,
+ const TOperationOptions& options) override;
+
+ IOperationPtr Merge(
+ const TMergeOperationSpec& spec,
+ const TOperationOptions& options) override;
+
+ IOperationPtr Erase(
+ const TEraseOperationSpec& spec,
+ const TOperationOptions& options) override;
+
+ IOperationPtr RemoteCopy(
+ const TRemoteCopyOperationSpec& spec,
+ const TOperationOptions& options = TOperationOptions()) override;
+
+ IOperationPtr RunVanilla(
+ const TVanillaOperationSpec& spec,
+ const TOperationOptions& options = TOperationOptions()) override;
+
+ IOperationPtr AttachOperation(const TOperationId& operationId) override;
+
+ EOperationBriefState CheckOperation(const TOperationId& operationId) override;
+
+ void AbortOperation(const TOperationId& operationId) override;
+
+ void CompleteOperation(const TOperationId& operationId) override;
+
+ void WaitForOperation(const TOperationId& operationId) override;
+
+ void AlterTable(
+ const TYPath& path,
+ const TAlterTableOptions& options) override;
+
+ TBatchRequestPtr CreateBatchRequest() override;
+
+ IClientPtr GetParentClient() override;
+
+ const TClientContext& GetContext() const;
+
+ const IClientRetryPolicyPtr& GetRetryPolicy() const;
+
+ virtual ITransactionPingerPtr GetTransactionPinger() = 0;
+
+protected:
+ virtual TClientPtr GetParentClientImpl() = 0;
+
+protected:
+ const TClientContext Context_;
+ TTransactionId TransactionId_;
+ IClientRetryPolicyPtr ClientRetryPolicy_;
+
+private:
+ ::TIntrusivePtr<TClientReader> CreateClientReader(
+ const TRichYPath& path,
+ const TFormat& format,
+ const TTableReaderOptions& options,
+ bool useFormatFromTableAttributes = false);
+
+ THolder<TClientWriter> CreateClientWriter(
+ const TRichYPath& path,
+ const TFormat& format,
+ const TTableWriterOptions& options);
+
+ ::TIntrusivePtr<INodeReaderImpl> CreateNodeReader(
+ const TRichYPath& path, const TTableReaderOptions& options) override;
+
+ ::TIntrusivePtr<IYaMRReaderImpl> CreateYaMRReader(
+ const TRichYPath& path, const TTableReaderOptions& options) override;
+
+ ::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader(
+ const TRichYPath& path,
+ const TTableReaderOptions& options,
+ const Message* prototype) override;
+
+ ::TIntrusivePtr<ISkiffRowReaderImpl> CreateSkiffRowReader(
+ const TRichYPath& path,
+ const TTableReaderOptions& options,
+ const ISkiffRowSkipperPtr& skipper,
+ const NSkiff::TSkiffSchemaPtr& schema) override;
+
+ ::TIntrusivePtr<INodeWriterImpl> CreateNodeWriter(
+ const TRichYPath& path, const TTableWriterOptions& options) override;
+
+ ::TIntrusivePtr<IYaMRWriterImpl> CreateYaMRWriter(
+ const TRichYPath& path, const TTableWriterOptions& options) override;
+
+ ::TIntrusivePtr<IProtoWriterImpl> CreateProtoWriter(
+ const TRichYPath& path,
+ const TTableWriterOptions& options,
+ const Message* prototype) override;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TTransaction
+ : public ITransaction
+ , public TClientBase
+{
+public:
+ //
+ // Start a new transaction.
+ TTransaction(
+ TClientPtr parentClient,
+ const TClientContext& context,
+ const TTransactionId& parentTransactionId,
+ const TStartTransactionOptions& options);
+
+ //
+ // Attach an existing transaction.
+ TTransaction(
+ TClientPtr parentClient,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TAttachTransactionOptions& options);
+
+ const TTransactionId& GetId() const override;
+
+ ILockPtr Lock(
+ const TYPath& path,
+ ELockMode mode,
+ const TLockOptions& options) override;
+
+ void Unlock(
+ const TYPath& path,
+ const TUnlockOptions& options) override;
+
+ void Commit() override;
+
+ void Abort() override;
+
+ void Ping() override;
+
+ void Detach() override;
+
+ ITransactionPingerPtr GetTransactionPinger() override;
+
+protected:
+ TClientPtr GetParentClientImpl() override;
+
+private:
+ ITransactionPingerPtr TransactionPinger_;
+ THolder<TPingableTransaction> PingableTx_;
+ TClientPtr ParentClient_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TClient
+ : public IClient
+ , public TClientBase
+{
+public:
+ TClient(
+ const TClientContext& context,
+ const TTransactionId& globalId,
+ IClientRetryPolicyPtr retryPolicy);
+
+ ~TClient();
+
+ ITransactionPtr AttachTransaction(
+ const TTransactionId& transactionId,
+ const TAttachTransactionOptions& options) override;
+
+ void MountTable(
+ const TYPath& path,
+ const TMountTableOptions& options) override;
+
+ void UnmountTable(
+ const TYPath& path,
+ const TUnmountTableOptions& options) override;
+
+ void RemountTable(
+ const TYPath& path,
+ const TRemountTableOptions& options) override;
+
+ void FreezeTable(
+ const TYPath& path,
+ const TFreezeTableOptions& options) override;
+
+ void UnfreezeTable(
+ const TYPath& path,
+ const TUnfreezeTableOptions& options) override;
+
+ void ReshardTable(
+ const TYPath& path,
+ const TVector<TKey>& keys,
+ const TReshardTableOptions& options) override;
+
+ void ReshardTable(
+ const TYPath& path,
+ i64 tabletCount,
+ const TReshardTableOptions& options) override;
+
+ void InsertRows(
+ const TYPath& path,
+ const TNode::TListType& rows,
+ const TInsertRowsOptions& options) override;
+
+ void DeleteRows(
+ const TYPath& path,
+ const TNode::TListType& keys,
+ const TDeleteRowsOptions& options) override;
+
+ void TrimRows(
+ const TYPath& path,
+ i64 tabletIndex,
+ i64 rowCount,
+ const TTrimRowsOptions& options) override;
+
+ TNode::TListType LookupRows(
+ const TYPath& path,
+ const TNode::TListType& keys,
+ const TLookupRowsOptions& options) override;
+
+ TNode::TListType SelectRows(
+ const TString& query,
+ const TSelectRowsOptions& options) override;
+
+ void AlterTableReplica(
+ const TReplicaId& replicaId,
+ const TAlterTableReplicaOptions& alterTableReplicaOptions) override;
+
+ ui64 GenerateTimestamp() override;
+
+ TAuthorizationInfo WhoAmI() override;
+
+ TOperationAttributes GetOperation(
+ const TOperationId& operationId,
+ const TGetOperationOptions& options) override;
+
+ TListOperationsResult ListOperations(
+ const TListOperationsOptions& options) override;
+
+ void UpdateOperationParameters(
+ const TOperationId& operationId,
+ const TUpdateOperationParametersOptions& options) override;
+
+ TJobAttributes GetJob(
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobOptions& options) override;
+
+ TListJobsResult ListJobs(
+ const TOperationId& operationId,
+ const TListJobsOptions& options = TListJobsOptions()) override;
+
+ IFileReaderPtr GetJobInput(
+ const TJobId& jobId,
+ const TGetJobInputOptions& options = TGetJobInputOptions()) override;
+
+ IFileReaderPtr GetJobFailContext(
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobFailContextOptions& options = TGetJobFailContextOptions()) override;
+
+ IFileReaderPtr GetJobStderr(
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobStderrOptions& options = TGetJobStderrOptions()) override;
+
+ TNode::TListType SkyShareTable(
+ const std::vector<TYPath>& tablePaths,
+ const TSkyShareTableOptions& options = TSkyShareTableOptions()) override;
+
+ TCheckPermissionResponse CheckPermission(
+ const TString& user,
+ EPermission permission,
+ const TYPath& path,
+ const TCheckPermissionOptions& options) override;
+
+ TVector<TTabletInfo> GetTabletInfos(
+ const TYPath& path,
+ const TVector<int>& tabletIndexes,
+ const TGetTabletInfosOptions& options) override;
+
+ void SuspendOperation(
+ const TOperationId& operationId,
+ const TSuspendOperationOptions& options) override;
+
+ void ResumeOperation(
+ const TOperationId& operationId,
+ const TResumeOperationOptions& options) override;
+
+ void Shutdown() override;
+
+ ITransactionPingerPtr GetTransactionPinger() override;
+
+ // Helper methods
+ TYtPoller& GetYtPoller();
+
+protected:
+ TClientPtr GetParentClientImpl() override;
+
+private:
+ template <class TOptions>
+ void SetTabletParams(
+ THttpHeader& header,
+ const TYPath& path,
+ const TOptions& options);
+
+ void CheckShutdown() const;
+
+ ITransactionPingerPtr TransactionPinger_;
+
+ std::atomic<bool> Shutdown_ = false;
+ TMutex YtPollerLock_;
+ THolder<TYtPoller> YtPoller_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+TClientPtr CreateClientImpl(
+ const TString& serverName,
+ const TCreateClientOptions& options = TCreateClientOptions());
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/client_reader.cpp b/yt/cpp/mapreduce/client/client_reader.cpp
new file mode 100644
index 0000000000..80759b12dc
--- /dev/null
+++ b/yt/cpp/mapreduce/client/client_reader.cpp
@@ -0,0 +1,232 @@
+#include "client_reader.h"
+
+#include "structured_table_formats.h"
+#include "transaction.h"
+#include "transaction_pinger.h"
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/tvm.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <yt/cpp/mapreduce/io/helpers.h>
+#include <yt/cpp/mapreduce/io/yamr_table_reader.h>
+
+#include <yt/cpp/mapreduce/http/helpers.h>
+#include <yt/cpp/mapreduce/http/requests.h>
+#include <yt/cpp/mapreduce/http/retry_request.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+
+#include <library/cpp/yson/node/serialize.h>
+
+#include <util/random/random.h>
+#include <util/stream/file.h>
+#include <util/stream/str.h>
+#include <util/string/builder.h>
+#include <util/string/cast.h>
+
+namespace NYT {
+
+using ::ToString;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TClientReader::TClientReader(
+ const TRichYPath& path,
+ IClientRetryPolicyPtr clientRetryPolicy,
+ ITransactionPingerPtr transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TFormat& format,
+ const TTableReaderOptions& options,
+ bool useFormatFromTableAttributes)
+ : Path_(path)
+ , ClientRetryPolicy_(std::move(clientRetryPolicy))
+ , Context_(context)
+ , ParentTransactionId_(transactionId)
+ , Format_(format)
+ , Options_(options)
+ , ReadTransaction_(nullptr)
+{
+ if (options.CreateTransaction_) {
+ Y_VERIFY(transactionPinger, "Internal error: transactionPinger is null");
+ ReadTransaction_ = MakeHolder<TPingableTransaction>(
+ ClientRetryPolicy_,
+ Context_,
+ transactionId,
+ transactionPinger->GetChildTxPinger(),
+ TStartTransactionOptions());
+ Path_.Path(Snapshot(
+ ClientRetryPolicy_,
+ Context_,
+ ReadTransaction_->GetId(),
+ path.Path_));
+ }
+
+ if (useFormatFromTableAttributes) {
+ auto transactionId2 = ReadTransaction_ ? ReadTransaction_->GetId() : ParentTransactionId_;
+ auto newFormat = GetTableFormat(ClientRetryPolicy_, Context_, transactionId2, Path_);
+ if (newFormat) {
+ Format_->Config = *newFormat;
+ }
+ }
+
+ TransformYPath();
+ CreateRequest();
+}
+
+bool TClientReader::Retry(
+ const TMaybe<ui32>& rangeIndex,
+ const TMaybe<ui64>& rowIndex)
+{
+ if (CurrentRequestRetryPolicy_) {
+ // TODO we should pass actual exception in Retry function
+ yexception genericError;
+ auto backoff = CurrentRequestRetryPolicy_->OnGenericError(genericError);
+ if (!backoff) {
+ return false;
+ }
+ }
+
+ try {
+ CreateRequest(rangeIndex, rowIndex);
+ return true;
+ } catch (const std::exception& ex) {
+ YT_LOG_ERROR("Client reader retry failed: %v",
+ ex.what());
+
+ return false;
+ }
+}
+
+void TClientReader::ResetRetries()
+{
+ CurrentRequestRetryPolicy_ = nullptr;
+}
+
+size_t TClientReader::DoRead(void* buf, size_t len)
+{
+ return Input_->Read(buf, len);
+}
+
+void TClientReader::TransformYPath()
+{
+ for (auto& range : Path_.MutableRangesView()) {
+ auto& exact = range.Exact_;
+ if (IsTrivial(exact)) {
+ continue;
+ }
+
+ if (exact.RowIndex_) {
+ range.LowerLimit(TReadLimit().RowIndex(*exact.RowIndex_));
+ range.UpperLimit(TReadLimit().RowIndex(*exact.RowIndex_ + 1));
+ exact.RowIndex_.Clear();
+
+ } else if (exact.Key_) {
+ range.LowerLimit(TReadLimit().Key(*exact.Key_));
+
+ auto lastPart = TNode::CreateEntity();
+ lastPart.Attributes() = TNode()("type", "max");
+ exact.Key_->Parts_.push_back(lastPart);
+
+ range.UpperLimit(TReadLimit().Key(*exact.Key_));
+ exact.Key_.Clear();
+ }
+ }
+}
+
+void TClientReader::CreateRequest(const TMaybe<ui32>& rangeIndex, const TMaybe<ui64>& rowIndex)
+{
+ if (!CurrentRequestRetryPolicy_) {
+ CurrentRequestRetryPolicy_ = ClientRetryPolicy_->CreatePolicyForGenericRequest();
+ }
+ while (true) {
+ CurrentRequestRetryPolicy_->NotifyNewAttempt();
+
+ THttpHeader header("GET", GetReadTableCommand(Context_.Config->ApiVersion));
+ if (Context_.ServiceTicketAuth) {
+ header.SetServiceTicket(Context_.ServiceTicketAuth->Ptr->IssueServiceTicket());
+ } else {
+ header.SetToken(Context_.Token);
+ }
+ auto transactionId = (ReadTransaction_ ? ReadTransaction_->GetId() : ParentTransactionId_);
+ header.AddTransactionId(transactionId);
+
+ const auto& controlAttributes = Options_.ControlAttributes_;
+ header.AddParameter("control_attributes", TNode()
+ ("enable_row_index", controlAttributes.EnableRowIndex_)
+ ("enable_range_index", controlAttributes.EnableRangeIndex_));
+ header.SetOutputFormat(Format_);
+
+ header.SetResponseCompression(ToString(Context_.Config->AcceptEncoding));
+
+ if (rowIndex.Defined()) {
+ auto& ranges = Path_.MutableRanges();
+ if (ranges.Empty()) {
+ ranges.ConstructInPlace(TVector{TReadRange()});
+ } else {
+ if (rangeIndex.GetOrElse(0) >= ranges->size()) {
+ ythrow yexception()
+ << "range index " << rangeIndex.GetOrElse(0)
+ << " is out of range, input range count is " << ranges->size();
+ }
+ ranges->erase(ranges->begin(), ranges->begin() + rangeIndex.GetOrElse(0));
+ }
+ ranges->begin()->LowerLimit(TReadLimit().RowIndex(*rowIndex));
+ }
+
+ header.MergeParameters(FormIORequestParameters(Path_, Options_));
+
+ auto requestId = CreateGuidAsString();
+
+ try {
+ const auto proxyName = GetProxyForHeavyRequest(Context_);
+ Response_ = Context_.HttpClient->Request(GetFullUrl(proxyName, Context_, header), requestId, header);
+
+ Input_ = Response_->GetResponseStream();
+
+ YT_LOG_DEBUG("RSP %v - table stream", requestId);
+
+ return;
+ } catch (const TErrorResponse& e) {
+ LogRequestError(
+ requestId,
+ header,
+ e.what(),
+ CurrentRequestRetryPolicy_->GetAttemptDescription());
+
+ if (!IsRetriable(e)) {
+ throw;
+ }
+ auto backoff = CurrentRequestRetryPolicy_->OnRetriableError(e);
+ if (!backoff) {
+ throw;
+ }
+ NDetail::TWaitProxy::Get()->Sleep(*backoff);
+ } catch (const std::exception& e) {
+ LogRequestError(
+ requestId,
+ header,
+ e.what(),
+ CurrentRequestRetryPolicy_->GetAttemptDescription());
+
+ Response_.reset();
+ Input_ = nullptr;
+
+ auto backoff = CurrentRequestRetryPolicy_->OnGenericError(e);
+ if (!backoff) {
+ throw;
+ }
+ NDetail::TWaitProxy::Get()->Sleep(*backoff);
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/client_reader.h b/yt/cpp/mapreduce/client/client_reader.h
new file mode 100644
index 0000000000..22f5a0ebb0
--- /dev/null
+++ b/yt/cpp/mapreduce/client/client_reader.h
@@ -0,0 +1,65 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/common/fwd.h>
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+#include <yt/cpp/mapreduce/http/context.h>
+#include <yt/cpp/mapreduce/http/requests.h>
+#include <yt/cpp/mapreduce/http/http.h>
+#include <yt/cpp/mapreduce/http/http_client.h>
+
+namespace NYT {
+
+class TPingableTransaction;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TClientReader
+ : public TRawTableReader
+{
+public:
+ TClientReader(
+ const TRichYPath& path,
+ IClientRetryPolicyPtr clientRetryPolicy,
+ ITransactionPingerPtr transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TFormat& format,
+ const TTableReaderOptions& options,
+ bool useFormatFromTableAttributes);
+
+ bool Retry(
+ const TMaybe<ui32>& rangeIndex,
+ const TMaybe<ui64>& rowIndex) override;
+
+ void ResetRetries() override;
+
+ bool HasRangeIndices() const override { return true; }
+
+protected:
+ size_t DoRead(void* buf, size_t len) override;
+
+private:
+ TRichYPath Path_;
+ const IClientRetryPolicyPtr ClientRetryPolicy_;
+ const TClientContext Context_;
+ TTransactionId ParentTransactionId_;
+ TMaybe<TFormat> Format_;
+ TTableReaderOptions Options_;
+
+ THolder<TPingableTransaction> ReadTransaction_;
+
+ NHttpClient::IHttpResponsePtr Response_;
+ IInputStream* Input_;
+
+ IRequestRetryPolicyPtr CurrentRequestRetryPolicy_;
+
+private:
+ void TransformYPath();
+ void CreateRequest(const TMaybe<ui32>& rangeIndex = Nothing(), const TMaybe<ui64>& rowIndex = Nothing());
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/client_writer.cpp b/yt/cpp/mapreduce/client/client_writer.cpp
new file mode 100644
index 0000000000..357abd32eb
--- /dev/null
+++ b/yt/cpp/mapreduce/client/client_writer.cpp
@@ -0,0 +1,69 @@
+#include "client_writer.h"
+
+#include "retryful_writer.h"
+#include "retryless_writer.h"
+
+#include <yt/cpp/mapreduce/interface/io.h>
+#include <yt/cpp/mapreduce/common/fwd.h>
+#include <yt/cpp/mapreduce/common/helpers.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TClientWriter::TClientWriter(
+ const TRichYPath& path,
+ IClientRetryPolicyPtr clientRetryPolicy,
+ ITransactionPingerPtr transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TMaybe<TFormat>& format,
+ const TTableWriterOptions& options)
+ : BUFFER_SIZE(options.BufferSize_)
+{
+ if (options.SingleHttpRequest_) {
+ RawWriter_.Reset(new TRetrylessWriter(
+ context,
+ transactionId,
+ GetWriteTableCommand(context.Config->ApiVersion),
+ format,
+ path,
+ BUFFER_SIZE,
+ options));
+ } else {
+ RawWriter_.Reset(new TRetryfulWriter(
+ std::move(clientRetryPolicy),
+ std::move(transactionPinger),
+ context,
+ transactionId,
+ GetWriteTableCommand(context.Config->ApiVersion),
+ format,
+ path,
+ options));
+ }
+}
+
+size_t TClientWriter::GetStreamCount() const
+{
+ return 1;
+}
+
+IOutputStream* TClientWriter::GetStream(size_t tableIndex) const
+{
+ Y_UNUSED(tableIndex);
+ return RawWriter_.Get();
+}
+
+void TClientWriter::OnRowFinished(size_t)
+{
+ RawWriter_->NotifyRowEnd();
+}
+
+void TClientWriter::Abort()
+{
+ RawWriter_->Abort();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/client_writer.h b/yt/cpp/mapreduce/client/client_writer.h
new file mode 100644
index 0000000000..010a88a8ff
--- /dev/null
+++ b/yt/cpp/mapreduce/client/client_writer.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/common/fwd.h>
+
+#include <yt/cpp/mapreduce/http/requests.h>
+#include <yt/cpp/mapreduce/interface/io.h>
+
+namespace NYT {
+
+struct TTableWriterOptions;
+class TRetryfulWriter;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TClientWriter
+ : public IProxyOutput
+{
+public:
+ TClientWriter(
+ const TRichYPath& path,
+ IClientRetryPolicyPtr clientRetryPolicy,
+ ITransactionPingerPtr transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TMaybe<TFormat>& format,
+ const TTableWriterOptions& options);
+
+ size_t GetStreamCount() const override;
+ IOutputStream* GetStream(size_t tableIndex) const override;
+ void OnRowFinished(size_t tableIndex) override;
+ void Abort() override;
+
+private:
+ ::TIntrusivePtr<TRawTableWriter> RawWriter_;
+
+ const size_t BUFFER_SIZE = 64 << 20;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/dummy_job_profiler.cpp b/yt/cpp/mapreduce/client/dummy_job_profiler.cpp
new file mode 100644
index 0000000000..5a2f1e8d46
--- /dev/null
+++ b/yt/cpp/mapreduce/client/dummy_job_profiler.cpp
@@ -0,0 +1,26 @@
+#include "job_profiler.h"
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TDummyJobProfiler
+ : public IJobProfiler
+{
+ void Start() override
+ { }
+
+ void Stop() override
+ { }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<IJobProfiler> CreateJobProfiler()
+{
+ return std::make_unique<TDummyJobProfiler>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/file_reader.cpp b/yt/cpp/mapreduce/client/file_reader.cpp
new file mode 100644
index 0000000000..fc21e0bc02
--- /dev/null
+++ b/yt/cpp/mapreduce/client/file_reader.cpp
@@ -0,0 +1,243 @@
+#include "file_reader.h"
+
+#include "transaction.h"
+#include "transaction_pinger.h"
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/tvm.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <yt/cpp/mapreduce/io/helpers.h>
+
+#include <yt/cpp/mapreduce/http/helpers.h>
+#include <yt/cpp/mapreduce/http/http.h>
+#include <yt/cpp/mapreduce/http/http_client.h>
+#include <yt/cpp/mapreduce/http/retry_request.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+
+namespace NYT {
+namespace NDetail {
+
+using ::ToString;
+
+////////////////////////////////////////////////////////////////////////////////
+
+static TMaybe<ui64> GetEndOffset(const TFileReaderOptions& options) {
+ if (options.Length_) {
+ return options.Offset_.GetOrElse(0) + *options.Length_;
+ } else {
+ return Nothing();
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TStreamReaderBase::TStreamReaderBase(
+ IClientRetryPolicyPtr clientRetryPolicy,
+ ITransactionPingerPtr transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& transactionId)
+ : Context_(context)
+ , ClientRetryPolicy_(std::move(clientRetryPolicy))
+ , ReadTransaction_(MakeHolder<TPingableTransaction>(
+ ClientRetryPolicy_,
+ context,
+ transactionId,
+ transactionPinger->GetChildTxPinger(),
+ TStartTransactionOptions()))
+{ }
+
+TStreamReaderBase::~TStreamReaderBase() = default;
+
+TYPath TStreamReaderBase::Snapshot(const TYPath& path)
+{
+ return NYT::Snapshot(ClientRetryPolicy_, Context_, ReadTransaction_->GetId(), path);
+}
+
+TString TStreamReaderBase::GetActiveRequestId() const
+{
+ if (Response_) {
+ return Response_->GetRequestId();;
+ } else {
+ return "<no-active-request>";
+ }
+}
+
+size_t TStreamReaderBase::DoRead(void* buf, size_t len)
+{
+ const int retryCount = Context_.Config->ReadRetryCount;
+ for (int attempt = 1; attempt <= retryCount; ++attempt) {
+ try {
+ if (!Input_) {
+ Response_ = Request(Context_, ReadTransaction_->GetId(), CurrentOffset_);
+ Input_ = Response_->GetResponseStream();
+ }
+ if (len == 0) {
+ return 0;
+ }
+ const size_t read = Input_->Read(buf, len);
+ CurrentOffset_ += read;
+ return read;
+ } catch (TErrorResponse& e) {
+ YT_LOG_ERROR("RSP %v - failed: %v (attempt %v of %v)",
+ GetActiveRequestId(),
+ e.what(),
+ attempt,
+ retryCount);
+
+ if (!IsRetriable(e) || attempt == retryCount) {
+ throw;
+ }
+ NDetail::TWaitProxy::Get()->Sleep(GetBackoffDuration(e, Context_.Config));
+ } catch (std::exception& e) {
+ YT_LOG_ERROR("RSP %v - failed: %v (attempt %v of %v)",
+ GetActiveRequestId(),
+ e.what(),
+ attempt,
+ retryCount);
+
+ // Invalidate connection.
+ Response_.reset();
+
+ if (attempt == retryCount) {
+ throw;
+ }
+ NDetail::TWaitProxy::Get()->Sleep(GetBackoffDuration(e, Context_.Config));
+ }
+ Input_ = nullptr;
+ }
+ Y_UNREACHABLE(); // we should either return or throw from loop above
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TFileReader::TFileReader(
+ const TRichYPath& path,
+ IClientRetryPolicyPtr clientRetryPolicy,
+ ITransactionPingerPtr transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TFileReaderOptions& options)
+ : TStreamReaderBase(std::move(clientRetryPolicy), std::move(transactionPinger), context, transactionId)
+ , FileReaderOptions_(options)
+ , Path_(path)
+ , StartOffset_(FileReaderOptions_.Offset_.GetOrElse(0))
+ , EndOffset_(GetEndOffset(FileReaderOptions_))
+{
+ Path_.Path_ = TStreamReaderBase::Snapshot(Path_.Path_);
+}
+
+NHttpClient::IHttpResponsePtr TFileReader::Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes)
+{
+ const ui64 currentOffset = StartOffset_ + readBytes;
+ TString hostName = GetProxyForHeavyRequest(context);
+
+ THttpHeader header("GET", GetReadFileCommand(context.Config->ApiVersion));
+ if (context.ServiceTicketAuth) {
+ header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket());
+ } else {
+ header.SetToken(context.Token);
+ }
+ header.AddTransactionId(transactionId);
+ header.SetOutputFormat(TMaybe<TFormat>()); // Binary format
+
+ if (EndOffset_) {
+ Y_VERIFY(*EndOffset_ >= currentOffset);
+ FileReaderOptions_.Length(*EndOffset_ - currentOffset);
+ }
+ FileReaderOptions_.Offset(currentOffset);
+ header.MergeParameters(FormIORequestParameters(Path_, FileReaderOptions_));
+
+ header.SetResponseCompression(ToString(context.Config->AcceptEncoding));
+
+ auto requestId = CreateGuidAsString();
+ NHttpClient::IHttpResponsePtr response;
+ try {
+ response = context.HttpClient->Request(GetFullUrl(hostName, context, header), requestId, header);
+ } catch (const std::exception& ex) {
+ LogRequestError(requestId, header, ex.what(), "");
+ throw;
+ }
+
+ YT_LOG_DEBUG("RSP %v - file stream",
+ requestId);
+
+ return response;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TBlobTableReader::TBlobTableReader(
+ const TYPath& path,
+ const TKey& key,
+ IClientRetryPolicyPtr retryPolicy,
+ ITransactionPingerPtr transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TBlobTableReaderOptions& options)
+ : TStreamReaderBase(std::move(retryPolicy), std::move(transactionPinger), context, transactionId)
+ , Key_(key)
+ , Options_(options)
+{
+ Path_ = TStreamReaderBase::Snapshot(path);
+}
+
+NHttpClient::IHttpResponsePtr TBlobTableReader::Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes)
+{
+ TString hostName = GetProxyForHeavyRequest(context);
+
+ THttpHeader header("GET", "read_blob_table");
+ if (context.ServiceTicketAuth) {
+ header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket());
+ } else {
+ header.SetToken(context.Token);
+ }
+ header.AddTransactionId(transactionId);
+ header.SetOutputFormat(TMaybe<TFormat>()); // Binary format
+
+ const ui64 currentOffset = Options_.Offset_ + readBytes;
+ const i64 startPartIndex = currentOffset / Options_.PartSize_;
+ const ui64 skipBytes = currentOffset - Options_.PartSize_ * startPartIndex;
+ auto lowerLimitKey = Key_;
+ lowerLimitKey.Parts_.push_back(startPartIndex);
+ auto upperLimitKey = Key_;
+ upperLimitKey.Parts_.push_back(std::numeric_limits<i64>::max());
+ TNode params = PathToParamNode(TRichYPath(Path_).AddRange(TReadRange()
+ .LowerLimit(TReadLimit().Key(lowerLimitKey))
+ .UpperLimit(TReadLimit().Key(upperLimitKey))));
+ params["start_part_index"] = TNode(startPartIndex);
+ params["offset"] = skipBytes;
+ if (Options_.PartIndexColumnName_) {
+ params["part_index_column_name"] = *Options_.PartIndexColumnName_;
+ }
+ if (Options_.DataColumnName_) {
+ params["data_column_name"] = *Options_.DataColumnName_;
+ }
+ params["part_size"] = Options_.PartSize_;
+ header.MergeParameters(params);
+ header.SetResponseCompression(ToString(context.Config->AcceptEncoding));
+
+ auto requestId = CreateGuidAsString();
+ NHttpClient::IHttpResponsePtr response;
+ try {
+ response = context.HttpClient->Request(GetFullUrl(hostName, context, header), requestId, header);
+ } catch (const std::exception& ex) {
+ LogRequestError(requestId, header, ex.what(), "");
+ throw;
+ }
+
+ YT_LOG_DEBUG("RSP %v - blob table stream",
+ requestId);
+ return response;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/file_reader.h b/yt/cpp/mapreduce/client/file_reader.h
new file mode 100644
index 0000000000..d850008a31
--- /dev/null
+++ b/yt/cpp/mapreduce/client/file_reader.h
@@ -0,0 +1,105 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/common/fwd.h>
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+#include <yt/cpp/mapreduce/http/context.h>
+#include <yt/cpp/mapreduce/http/requests.h>
+
+class IInputStream;
+
+namespace NYT {
+
+class THttpRequest;
+class TPingableTransaction;
+
+namespace NDetail {
+////////////////////////////////////////////////////////////////////////////////
+
+class TStreamReaderBase
+ : public IFileReader
+{
+public:
+ TStreamReaderBase(
+ IClientRetryPolicyPtr clientRetryPolicy,
+ ITransactionPingerPtr transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& transactionId);
+
+ ~TStreamReaderBase();
+
+protected:
+ TYPath Snapshot(const TYPath& path);
+
+protected:
+ const TClientContext Context_;
+
+private:
+ size_t DoRead(void* buf, size_t len) override;
+ virtual NHttpClient::IHttpResponsePtr Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes) = 0;
+ TString GetActiveRequestId() const;
+
+private:
+ const IClientRetryPolicyPtr ClientRetryPolicy_;
+ TFileReaderOptions FileReaderOptions_;
+
+ NHttpClient::IHttpResponsePtr Response_;
+ IInputStream* Input_ = nullptr;
+
+ THolder<TPingableTransaction> ReadTransaction_;
+
+ ui64 CurrentOffset_ = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TFileReader
+ : public TStreamReaderBase
+{
+public:
+ TFileReader(
+ const TRichYPath& path,
+ IClientRetryPolicyPtr clientRetryPolicy,
+ ITransactionPingerPtr transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TFileReaderOptions& options = TFileReaderOptions());
+
+private:
+ NHttpClient::IHttpResponsePtr Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes) override;
+
+private:
+ TFileReaderOptions FileReaderOptions_;
+
+ TRichYPath Path_;
+ const ui64 StartOffset_;
+ const TMaybe<ui64> EndOffset_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TBlobTableReader
+ : public TStreamReaderBase
+{
+public:
+ TBlobTableReader(
+ const TYPath& path,
+ const TKey& key,
+ IClientRetryPolicyPtr clientRetryPolicy,
+ ITransactionPingerPtr transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TBlobTableReaderOptions& options);
+
+private:
+ NHttpClient::IHttpResponsePtr Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes) override;
+
+private:
+ const TKey Key_;
+ const TBlobTableReaderOptions Options_;
+ TYPath Path_;
+};
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/file_writer.cpp b/yt/cpp/mapreduce/client/file_writer.cpp
new file mode 100644
index 0000000000..daf6461edd
--- /dev/null
+++ b/yt/cpp/mapreduce/client/file_writer.cpp
@@ -0,0 +1,60 @@
+#include "file_writer.h"
+
+#include <yt/cpp/mapreduce/io/helpers.h>
+#include <yt/cpp/mapreduce/interface/finish_or_die.h>
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TFileWriter::TFileWriter(
+ const TRichYPath& path,
+ IClientRetryPolicyPtr clientRetryPolicy,
+ ITransactionPingerPtr transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TFileWriterOptions& options)
+ : RetryfulWriter_(
+ std::move(clientRetryPolicy),
+ std::move(transactionPinger),
+ context,
+ transactionId,
+ GetWriteFileCommand(context.Config->ApiVersion),
+ TMaybe<TFormat>(),
+ path,
+ options)
+{ }
+
+TFileWriter::~TFileWriter()
+{
+ NDetail::FinishOrDie(this, "TFileWriter");
+}
+
+void TFileWriter::DoWrite(const void* buf, size_t len)
+{
+ // If user tunes RetryBlockSize / DesiredChunkSize he expects
+ // us to send data exactly by RetryBlockSize. So behaviour of the writer is predictable.
+ //
+ // We want to avoid situation when size of sent data slightly exceeded DesiredChunkSize
+ // and server produced one chunk of desired size and one small chunk.
+ while (len > 0) {
+ const auto retryBlockRemainingSize = RetryfulWriter_.GetRetryBlockRemainingSize();
+ Y_VERIFY(retryBlockRemainingSize > 0);
+ const auto firstWriteLen = Min(len, retryBlockRemainingSize);
+ RetryfulWriter_.Write(buf, firstWriteLen);
+ RetryfulWriter_.NotifyRowEnd();
+ len -= firstWriteLen;
+ buf = static_cast<const char*>(buf) + firstWriteLen;
+ }
+}
+
+void TFileWriter::DoFinish()
+{
+ RetryfulWriter_.Finish();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/file_writer.h b/yt/cpp/mapreduce/client/file_writer.h
new file mode 100644
index 0000000000..f3b97b904e
--- /dev/null
+++ b/yt/cpp/mapreduce/client/file_writer.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include "retryful_writer.h"
+
+#include <yt/cpp/mapreduce/common/fwd.h>
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TFileWriter
+ : public IFileWriter
+{
+public:
+ TFileWriter(
+ const TRichYPath& path,
+ IClientRetryPolicyPtr clientRetryPolicy,
+ ITransactionPingerPtr transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TFileWriterOptions& options = TFileWriterOptions());
+
+ ~TFileWriter() override;
+
+protected:
+ void DoWrite(const void* buf, size_t len) override;
+ void DoFinish() override;
+
+private:
+ TRetryfulWriter RetryfulWriter_;
+ static const size_t BUFFER_SIZE = 64 << 20;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/format_hints.cpp b/yt/cpp/mapreduce/client/format_hints.cpp
new file mode 100644
index 0000000000..1f6eb173ad
--- /dev/null
+++ b/yt/cpp/mapreduce/client/format_hints.cpp
@@ -0,0 +1,84 @@
+#include "format_hints.h"
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/operation.h>
+
+#include <util/string/builder.h>
+
+namespace NYT::NDetail {
+
+using ::ToString;
+
+////////////////////////////////////////////////////////////////////////////////
+
+static void ApplyEnableTypeConversion(TFormat* format, const TFormatHints& formatHints)
+{
+ if (formatHints.EnableAllToStringConversion_) {
+ format->Config.Attributes()["enable_all_to_string_conversion"] = *formatHints.EnableAllToStringConversion_;
+ }
+ if (formatHints.EnableStringToAllConversion_) {
+ format->Config.Attributes()["enable_string_to_all_conversion"] = *formatHints.EnableStringToAllConversion_;
+ }
+ if (formatHints.EnableIntegralTypeConversion_) {
+ format->Config.Attributes()["enable_integral_type_conversion"] = *formatHints.EnableIntegralTypeConversion_;
+ }
+ if (formatHints.EnableIntegralToDoubleConversion_) {
+ format->Config.Attributes()["enable_integral_to_double_conversion"] = *formatHints.EnableIntegralToDoubleConversion_;
+ }
+ if (formatHints.EnableTypeConversion_) {
+ format->Config.Attributes()["enable_type_conversion"] = *formatHints.EnableTypeConversion_;
+ }
+}
+
+template <>
+void ApplyFormatHints<TNode>(TFormat* format, const TMaybe<TFormatHints>& formatHints)
+{
+ Y_VERIFY(format);
+ if (!formatHints) {
+ return;
+ }
+
+ ApplyEnableTypeConversion(format, *formatHints);
+
+ if (formatHints->SkipNullValuesForTNode_) {
+ Y_ENSURE_EX(
+ format->Config.AsString() == "yson",
+ TApiUsageError() << "SkipNullForTNode option must be used with yson format, actual format: " << format->Config.AsString());
+ format->Config.Attributes()["skip_null_values"] = formatHints->SkipNullValuesForTNode_;
+ }
+
+ if (formatHints->ComplexTypeMode_) {
+ Y_ENSURE_EX(
+ format->Config.AsString() == "yson",
+ TApiUsageError() << "ComplexTypeMode option must be used with yson format, actual format: "
+ << format->Config.AsString());
+ format->Config.Attributes()["complex_type_mode"] = ToString(*formatHints->ComplexTypeMode_);
+ }
+}
+
+template <>
+void ApplyFormatHints<TYaMRRow>(TFormat* format, const TMaybe<TFormatHints>& formatHints)
+{
+ Y_VERIFY(format);
+ if (!formatHints) {
+ return;
+ }
+
+ ythrow TApiUsageError() << "Yamr format currently has no supported format hints";
+}
+
+template <>
+void ApplyFormatHints<::google::protobuf::Message>(TFormat* format, const TMaybe<TFormatHints>& formatHints)
+{
+ Y_VERIFY(format);
+ if (!formatHints) {
+ return;
+ }
+
+ ythrow TApiUsageError() << "Protobuf format currently has no supported format hints";
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail
diff --git a/yt/cpp/mapreduce/client/format_hints.h b/yt/cpp/mapreduce/client/format_hints.h
new file mode 100644
index 0000000000..f6576b1045
--- /dev/null
+++ b/yt/cpp/mapreduce/client/format_hints.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/fwd.h>
+
+#include <util/generic/maybe.h>
+
+namespace NYT {
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename TRow>
+void ApplyFormatHints(TFormat* format, const TMaybe<TFormatHints>& formatHints);
+
+template <>
+void ApplyFormatHints<TNode>(TFormat* format, const TMaybe<TFormatHints>& formatHints);
+
+template <>
+void ApplyFormatHints<TYaMRRow>(TFormat* format, const TMaybe<TFormatHints>& formatHints);
+
+template <>
+void ApplyFormatHints<::google::protobuf::Message>(TFormat* format, const TMaybe<TFormatHints>& formatHints);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/fwd.h b/yt/cpp/mapreduce/client/fwd.h
new file mode 100644
index 0000000000..d4449d4ac1
--- /dev/null
+++ b/yt/cpp/mapreduce/client/fwd.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <util/generic/ptr.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TPingableTransaction;
+
+class TClient;
+using TClientPtr = ::TIntrusivePtr<TClient>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/init.cpp b/yt/cpp/mapreduce/client/init.cpp
new file mode 100644
index 0000000000..c74598ba14
--- /dev/null
+++ b/yt/cpp/mapreduce/client/init.cpp
@@ -0,0 +1,280 @@
+#include "init.h"
+
+#include "abortable_registry.h"
+#include "job_profiler.h"
+
+#include <yt/cpp/mapreduce/http/requests.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/init.h>
+#include <yt/cpp/mapreduce/interface/operation.h>
+
+#include <yt/cpp/mapreduce/interface/logging/logger.h>
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <yt/cpp/mapreduce/io/job_reader.h>
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+
+#include <library/cpp/sighandler/async_signals_handler.h>
+
+#include <util/folder/dirut.h>
+
+#include <util/generic/singleton.h>
+
+#include <util/string/builder.h>
+#include <util/string/cast.h>
+#include <util/string/type.h>
+
+#include <util/system/env.h>
+#include <util/system/thread.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+void WriteVersionToLog()
+{
+ YT_LOG_INFO("Wrapper version: %v",
+ TProcessState::Get()->ClientVersion);
+}
+
+static TNode SecureVaultContents; // safe
+
+void InitializeSecureVault()
+{
+ SecureVaultContents = NodeFromYsonString(
+ GetEnv("YT_SECURE_VAULT", "{}"));
+}
+
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+const TNode& GetJobSecureVault()
+{
+ return SecureVaultContents;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TAbnormalTerminator
+{
+public:
+ TAbnormalTerminator() = default;
+
+ static void SetErrorTerminationHandler()
+ {
+ if (Instance().OldHandler_ != nullptr) {
+ return;
+ }
+
+ Instance().OldHandler_ = std::set_terminate(&TerminateHandler);
+
+ SetAsyncSignalFunction(SIGINT, SignalHandler);
+ SetAsyncSignalFunction(SIGTERM, SignalHandler);
+ }
+
+private:
+ static TAbnormalTerminator& Instance()
+ {
+ return *Singleton<TAbnormalTerminator>();
+ }
+
+ static void* Invoke(void* opaque)
+ {
+ (*reinterpret_cast<std::function<void()>*>(opaque))();
+ return nullptr;
+ }
+
+ static void TerminateWithTimeout(
+ const TDuration& timeout,
+ const std::function<void(void)>& exitFunction,
+ const TString& logMessage)
+ {
+ std::function<void()> threadFun = [=] {
+ YT_LOG_INFO("%v",
+ logMessage);
+ NDetail::TAbortableRegistry::Get()->AbortAllAndBlockForever();
+ };
+ TThread thread(TThread::TParams(Invoke, &threadFun).SetName("aborter"));
+ thread.Start();
+ thread.Detach();
+
+ Sleep(timeout);
+ exitFunction();
+ }
+
+ static void SignalHandler(int signalNumber)
+ {
+ TerminateWithTimeout(
+ TDuration::Seconds(5),
+ std::bind(_exit, -signalNumber),
+ ::TStringBuilder() << "Signal " << signalNumber << " received, aborting transactions. Waiting 5 seconds...");
+ }
+
+ static void TerminateHandler()
+ {
+ TerminateWithTimeout(
+ TDuration::Seconds(5),
+ [&] {
+ if (Instance().OldHandler_) {
+ Instance().OldHandler_();
+ } else {
+ abort();
+ }
+ },
+ ::TStringBuilder() << "Terminate called, aborting transactions. Waiting 5 seconds...");
+ }
+
+private:
+ std::terminate_handler OldHandler_ = nullptr;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+EInitStatus& GetInitStatus()
+{
+ static EInitStatus initStatus = EInitStatus::NotInitialized;
+ return initStatus;
+}
+
+static void ElevateInitStatus(const EInitStatus newStatus) {
+ NDetail::GetInitStatus() = Max(NDetail::GetInitStatus(), newStatus);
+}
+
+void CommonInitialize(int argc, const char** argv)
+{
+ auto logLevelStr = to_lower(TConfig::Get()->LogLevel);
+ ILogger::ELevel logLevel;
+
+ if (!TryFromString(logLevelStr, logLevel)) {
+ Cerr << "Invalid log level: " << TConfig::Get()->LogLevel << Endl;
+ exit(1);
+ }
+
+ SetLogger(CreateStdErrLogger(logLevel));
+
+ TProcessState::Get()->SetCommandLine(argc, argv);
+}
+
+void NonJobInitialize(const TInitializeOptions& options)
+{
+ if (FromString<bool>(GetEnv("YT_CLEANUP_ON_TERMINATION", "0")) || options.CleanupOnTermination_) {
+ TAbnormalTerminator::SetErrorTerminationHandler();
+ }
+ if (options.WaitProxy_) {
+ NDetail::TWaitProxy::Get()->SetProxy(options.WaitProxy_);
+ }
+ WriteVersionToLog();
+}
+
+void ExecJob(int argc, const char** argv, const TInitializeOptions& options)
+{
+ // Now we are definitely in job.
+ // We take this setting from environment variable to be consistent with client code.
+ TConfig::Get()->UseClientProtobuf = IsTrue(GetEnv("YT_USE_CLIENT_PROTOBUF", ""));
+
+ auto execJobImpl = [&options](TString jobName, i64 outputTableCount, bool hasState) {
+ auto jobProfiler = CreateJobProfiler();
+ jobProfiler->Start();
+
+ InitializeSecureVault();
+
+ NDetail::OutputTableCount = static_cast<i64>(outputTableCount);
+
+ THolder<IInputStream> jobStateStream;
+ if (hasState) {
+ jobStateStream = MakeHolder<TIFStream>("jobstate");
+ } else {
+ jobStateStream = MakeHolder<TBufferStream>(0);
+ }
+
+ int ret = 1;
+ try {
+ ret = TJobFactory::Get()->GetJobFunction(jobName.data())(outputTableCount, *jobStateStream);
+ } catch (const TSystemError& ex) {
+ if (ex.Status() == EPIPE) {
+ // 32 == EPIPE, write number here so it's easier to grep this exit code in source files
+ exit(32);
+ }
+ throw;
+ }
+
+ jobProfiler->Stop();
+
+ if (options.JobOnExitFunction_) {
+ (*options.JobOnExitFunction_)();
+ }
+ exit(ret);
+ };
+
+ auto jobArguments = NodeFromYsonString(GetEnv("YT_JOB_ARGUMENTS", "#"));
+ if (jobArguments.HasValue()) {
+ execJobImpl(
+ jobArguments["job_name"].AsString(),
+ jobArguments["output_table_count"].AsInt64(),
+ jobArguments["has_state"].AsBool());
+ Y_UNREACHABLE();
+ }
+
+ TString jobType = argc >= 2 ? argv[1] : TString();
+ if (argc != 5 || jobType != "--yt-map" && jobType != "--yt-reduce") {
+ // We are inside job but probably using old API
+ // (i.e. both NYT::Initialize and NMR::Initialize are called).
+ WriteVersionToLog();
+ return;
+ }
+
+ TString jobName(argv[2]);
+ i64 outputTableCount = FromString<i64>(argv[3]);
+ int hasState = FromString<int>(argv[4]);
+ execJobImpl(jobName, outputTableCount, hasState);
+ Y_UNREACHABLE();
+}
+
+} // namespace NDetail
+
+////////////////////////////////////////////////////////////////////////////////
+
+void JoblessInitialize(const TInitializeOptions& options)
+{
+ static const char* fakeArgv[] = {"unknown..."};
+ NDetail::CommonInitialize(1, fakeArgv);
+ NDetail::NonJobInitialize(options);
+ NDetail::ElevateInitStatus(NDetail::EInitStatus::JoblessInitialization);
+}
+
+void Initialize(int argc, const char* argv[], const TInitializeOptions& options)
+{
+ NDetail::CommonInitialize(argc, argv);
+
+ NDetail::ElevateInitStatus(NDetail::EInitStatus::FullInitialization);
+
+ const bool isInsideJob = !GetEnv("YT_JOB_ID").empty();
+ if (isInsideJob) {
+ NDetail::ExecJob(argc, argv, options);
+ } else {
+ NDetail::NonJobInitialize(options);
+ }
+}
+
+void Initialize(int argc, char* argv[], const TInitializeOptions& options)
+{
+ return Initialize(argc, const_cast<const char**>(argv), options);
+}
+
+void Initialize(const TInitializeOptions& options)
+{
+ static const char* fakeArgv[] = {"unknown..."};
+ Initialize(1, fakeArgv, options);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/init.h b/yt/cpp/mapreduce/client/init.h
new file mode 100644
index 0000000000..af2fc80e55
--- /dev/null
+++ b/yt/cpp/mapreduce/client/init.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/init.h>
+
+namespace NYT {
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+enum class EInitStatus : int
+{
+ NotInitialized,
+ JoblessInitialization,
+ FullInitialization,
+};
+
+EInitStatus& GetInitStatus();
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/job_profiler.h b/yt/cpp/mapreduce/client/job_profiler.h
new file mode 100644
index 0000000000..6532871380
--- /dev/null
+++ b/yt/cpp/mapreduce/client/job_profiler.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <memory>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct IJobProfiler
+{
+ virtual ~IJobProfiler() = default;
+
+ //! Starts job profiling if corresponding options are set
+ //! in environment.
+ virtual void Start() = 0;
+
+ //! Stops profiling and sends profile to job proxy.
+ virtual void Stop() = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<IJobProfiler> CreateJobProfiler();
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/lock.cpp b/yt/cpp/mapreduce/client/lock.cpp
new file mode 100644
index 0000000000..88110f9266
--- /dev/null
+++ b/yt/cpp/mapreduce/client/lock.cpp
@@ -0,0 +1,105 @@
+#include "lock.h"
+
+#include "yt_poller.h"
+
+#include <yt/cpp/mapreduce/http/retry_request.h>
+
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_batch_request.h>
+
+#include <util/string/builder.h>
+
+namespace NYT {
+namespace NDetail {
+
+using namespace NRawClient;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TLockPollerItem
+ : public IYtPollerItem
+{
+public:
+ TLockPollerItem(const TLockId& lockId, ::NThreading::TPromise<void> acquired)
+ : LockStateYPath_("#" + GetGuidAsString(lockId) + "/@state")
+ , Acquired_(acquired)
+ { }
+
+ void PrepareRequest(TRawBatchRequest* batchRequest) override
+ {
+ LockState_ = batchRequest->Get(TTransactionId(), LockStateYPath_, TGetOptions());
+ }
+
+ EStatus OnRequestExecuted() override
+ {
+ try {
+ const auto& state = LockState_.GetValue().AsString();
+ if (state == "acquired") {
+ Acquired_.SetValue();
+ return PollBreak;
+ }
+ } catch (const TErrorResponse& e) {
+ if (!IsRetriable(e)) {
+ Acquired_.SetException(std::current_exception());
+ return PollBreak;
+ }
+ } catch (const std::exception& e) {
+ if (!IsRetriable(e)) {
+ Acquired_.SetException(std::current_exception());
+ return PollBreak;
+ }
+ }
+ return PollContinue;
+ }
+
+ void OnItemDiscarded() override
+ {
+ Acquired_.SetException(std::make_exception_ptr(yexception() << "Operation cancelled"));
+ }
+
+private:
+ const TString LockStateYPath_;
+ ::NThreading::TPromise<void> Acquired_;
+
+ ::NThreading::TFuture<TNode> LockState_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+TLock::TLock(const TLockId& lockId, TClientPtr client, bool waitable)
+ : LockId_(lockId)
+ , Client_(std::move(client))
+{
+ if (!waitable) {
+ Acquired_ = ::NThreading::MakeFuture();
+ }
+}
+
+const TLockId& TLock::GetId() const
+{
+ return LockId_;
+}
+
+TNodeId TLock::GetLockedNodeId() const
+{
+ auto nodeIdNode = Client_->Get(
+ ::TStringBuilder() << '#' << GetGuidAsString(LockId_) << "/@node_id",
+ TGetOptions());
+ return GetGuid(nodeIdNode.AsString());
+}
+
+const ::NThreading::TFuture<void>& TLock::GetAcquiredFuture() const
+{
+ if (!Acquired_) {
+ auto promise = ::NThreading::NewPromise<void>();
+ Client_->GetYtPoller().Watch(::MakeIntrusive<TLockPollerItem>(LockId_, promise));
+ Acquired_ = promise.GetFuture();
+ }
+ return *Acquired_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/lock.h b/yt/cpp/mapreduce/client/lock.h
new file mode 100644
index 0000000000..7e2c7a127d
--- /dev/null
+++ b/yt/cpp/mapreduce/client/lock.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include "client.h"
+
+#include <yt/cpp/mapreduce/interface/client.h>
+
+namespace NYT {
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TLock
+ : public ILock
+{
+public:
+ TLock(const TLockId& lockId, TClientPtr client, bool waitable);
+
+ virtual const TLockId& GetId() const override;
+ virtual TNodeId GetLockedNodeId() const override;
+ virtual const ::NThreading::TFuture<void>& GetAcquiredFuture() const override;
+
+private:
+ const TLockId LockId_;
+ mutable TMaybe<::NThreading::TFuture<void>> Acquired_;
+ TClientPtr Client_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/operation.cpp b/yt/cpp/mapreduce/client/operation.cpp
new file mode 100644
index 0000000000..fc1600c240
--- /dev/null
+++ b/yt/cpp/mapreduce/client/operation.cpp
@@ -0,0 +1,2981 @@
+#include "operation.h"
+
+#include "abortable_registry.h"
+#include "client.h"
+#include "operation_helpers.h"
+#include "operation_tracker.h"
+#include "transaction.h"
+#include "prepare_operation.h"
+#include "retry_heavy_write_request.h"
+#include "skiff.h"
+#include "structured_table_formats.h"
+#include "yt_poller.h"
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/errors.h>
+#include <yt/cpp/mapreduce/interface/fluent.h>
+#include <yt/cpp/mapreduce/interface/format.h>
+#include <yt/cpp/mapreduce/interface/job_statistics.h>
+#include <yt/cpp/mapreduce/interface/protobuf_format.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <yt/cpp/mapreduce/http/requests.h>
+#include <yt/cpp/mapreduce/http/retry_request.h>
+
+#include <yt/cpp/mapreduce/io/job_reader.h>
+#include <yt/cpp/mapreduce/io/job_writer.h>
+#include <yt/cpp/mapreduce/io/yamr_table_reader.h>
+#include <yt/cpp/mapreduce/io/yamr_table_writer.h>
+#include <yt/cpp/mapreduce/io/node_table_reader.h>
+#include <yt/cpp/mapreduce/io/node_table_writer.h>
+#include <yt/cpp/mapreduce/io/proto_table_reader.h>
+#include <yt/cpp/mapreduce/io/proto_table_writer.h>
+#include <yt/cpp/mapreduce/io/proto_helpers.h>
+#include <yt/cpp/mapreduce/io/skiff_table_reader.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_batch_request.h>
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+
+#include <library/cpp/yson/node/serialize.h>
+
+#include <util/generic/hash_set.h>
+
+#include <util/string/builder.h>
+#include <util/string/cast.h>
+
+#include <util/system/thread.h>
+
+namespace NYT {
+namespace NDetail {
+
+using namespace NRawClient;
+
+using ::ToString;
+
+////////////////////////////////////////////////////////////////////////////////
+
+static const ui64 DefaultExrtaTmpfsSize = 1024LL * 1024LL;
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TMapReduceOperationIo
+{
+ TVector<TRichYPath> Inputs;
+ TVector<TRichYPath> MapOutputs;
+ TVector<TRichYPath> Outputs;
+
+ TMaybe<TFormat> MapperInputFormat;
+ TMaybe<TFormat> MapperOutputFormat;
+
+ TMaybe<TFormat> ReduceCombinerInputFormat;
+ TMaybe<TFormat> ReduceCombinerOutputFormat;
+
+ TFormat ReducerInputFormat = TFormat::YsonBinary();
+ TFormat ReducerOutputFormat = TFormat::YsonBinary();
+
+ TVector<TSmallJobFile> MapperJobFiles;
+ TVector<TSmallJobFile> ReduceCombinerJobFiles;
+ TVector<TSmallJobFile> ReducerJobFiles;
+};
+
+template <typename T>
+void VerifyHasElements(const TVector<T>& paths, TStringBuf name)
+{
+ if (paths.empty()) {
+ ythrow TApiUsageError() << "no " << name << " table is specified";
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TVector<TSmallJobFile> CreateFormatConfig(
+ TMaybe<TSmallJobFile> inputConfig,
+ const TMaybe<TSmallJobFile>& outputConfig)
+{
+ TVector<TSmallJobFile> result;
+ if (inputConfig) {
+ result.push_back(std::move(*inputConfig));
+ }
+ if (outputConfig) {
+ result.push_back(std::move(*outputConfig));
+ }
+ return result;
+}
+
+template <typename T>
+ENodeReaderFormat NodeReaderFormatFromHintAndGlobalConfig(const TUserJobFormatHintsBase<T>& formatHints)
+{
+ auto result = TConfig::Get()->NodeReaderFormat;
+ if (formatHints.InputFormatHints_ && formatHints.InputFormatHints_->SkipNullValuesForTNode_) {
+ Y_ENSURE_EX(
+ result != ENodeReaderFormat::Skiff,
+ TApiUsageError() << "skiff format doesn't support SkipNullValuesForTNode format hint");
+ result = ENodeReaderFormat::Yson;
+ }
+ return result;
+}
+
+template <class TSpec>
+const TVector<TStructuredTablePath>& GetStructuredInputs(const TSpec& spec)
+{
+ if constexpr (std::is_same_v<TSpec, TVanillaTask>) {
+ static const TVector<TStructuredTablePath> empty;
+ return empty;
+ } else {
+ return spec.GetStructuredInputs();
+ }
+}
+
+template <class TSpec>
+const TVector<TStructuredTablePath>& GetStructuredOutputs(const TSpec& spec)
+{
+ return spec.GetStructuredOutputs();
+}
+
+template <class TSpec>
+const TMaybe<TFormatHints>& GetInputFormatHints(const TSpec& spec)
+{
+ if constexpr (std::is_same_v<TSpec, TVanillaTask>) {
+ static const TMaybe<TFormatHints> empty = Nothing();
+ return empty;
+ } else {
+ return spec.InputFormatHints_;
+ }
+}
+
+template <class TSpec>
+const TMaybe<TFormatHints>& GetOutputFormatHints(const TSpec& spec)
+{
+ return spec.OutputFormatHints_;
+}
+
+template <class TSpec>
+ENodeReaderFormat GetNodeReaderFormat(const TSpec& spec, bool allowSkiff)
+{
+ if constexpr (std::is_same<TSpec, TVanillaTask>::value) {
+ return ENodeReaderFormat::Yson;
+ } else {
+ return allowSkiff
+ ? NodeReaderFormatFromHintAndGlobalConfig(spec)
+ : ENodeReaderFormat::Yson;
+ }
+}
+
+static void SortColumnsToNames(const TSortColumns& sortColumns, THashSet<TString>* result)
+{
+ auto names = sortColumns.GetNames();
+ result->insert(names.begin(), names.end());
+}
+
+static THashSet<TString> SortColumnsToNames(const TSortColumns& sortColumns)
+{
+ THashSet<TString> columnNames;
+ SortColumnsToNames(sortColumns, &columnNames);
+ return columnNames;
+}
+
+THashSet<TString> GetColumnsUsedInOperation(const TJoinReduceOperationSpec& spec)
+{
+ return SortColumnsToNames(spec.JoinBy_);
+}
+
+THashSet<TString> GetColumnsUsedInOperation(const TReduceOperationSpec& spec) {
+ auto result = SortColumnsToNames(spec.SortBy_);
+ SortColumnsToNames(spec.ReduceBy_, &result);
+ if (spec.JoinBy_) {
+ SortColumnsToNames(*spec.JoinBy_, &result);
+ }
+ return result;
+}
+
+THashSet<TString> GetColumnsUsedInOperation(const TMapReduceOperationSpec& spec)
+{
+ auto result = SortColumnsToNames(spec.SortBy_);
+ SortColumnsToNames(spec.ReduceBy_, &result);
+ return result;
+}
+
+THashSet<TString> GetColumnsUsedInOperation(const TMapOperationSpec&)
+{
+ return THashSet<TString>();
+}
+
+THashSet<TString> GetColumnsUsedInOperation(const TVanillaTask&)
+{
+ return THashSet<TString>();
+}
+
+TStructuredJobTableList ApplyProtobufColumnFilters(
+ const TStructuredJobTableList& tableList,
+ const TOperationPreparer& preparer,
+ const THashSet<TString>& columnsUsedInOperations,
+ const TOperationOptions& options)
+{
+ bool hasInputQuery = options.Spec_.Defined() && options.Spec_->IsMap() && options.Spec_->HasKey("input_query");
+ if (hasInputQuery) {
+ return tableList;
+ }
+
+ auto isDynamic = BatchTransform(
+ CreateDefaultRequestRetryPolicy(preparer.GetContext().Config),
+ preparer.GetContext(),
+ tableList,
+ [&] (TRawBatchRequest& batch, const auto& table) {
+ return batch.Get(preparer.GetTransactionId(), table.RichYPath->Path_ + "/@dynamic", TGetOptions());
+ });
+
+ auto newTableList = tableList;
+ for (size_t tableIndex = 0; tableIndex < tableList.size(); ++tableIndex) {
+ if (isDynamic[tableIndex].AsBool()) {
+ continue;
+ }
+ auto& table = newTableList[tableIndex];
+ Y_VERIFY(table.RichYPath);
+ if (table.RichYPath->Columns_) {
+ continue;
+ }
+ if (!std::holds_alternative<TProtobufTableStructure>(table.Description)) {
+ continue;
+ }
+ const auto& descriptor = std::get<TProtobufTableStructure>(table.Description).Descriptor;
+ if (!descriptor) {
+ continue;
+ }
+ auto fromDescriptor = NDetail::InferColumnFilter(*descriptor);
+ if (!fromDescriptor) {
+ continue;
+ }
+ THashSet<TString> columns(fromDescriptor->begin(), fromDescriptor->end());
+ columns.insert(columnsUsedInOperations.begin(), columnsUsedInOperations.end());
+ table.RichYPath->Columns(TVector<TString>(columns.begin(), columns.end()));
+ }
+ return newTableList;
+}
+
+template <class TSpec>
+TSimpleOperationIo CreateSimpleOperationIo(
+ const IStructuredJob& structuredJob,
+ const TOperationPreparer& preparer,
+ const TSpec& spec,
+ const TOperationOptions& options,
+ bool allowSkiff)
+{
+ if (!std::holds_alternative<TVoidStructuredRowStream>(structuredJob.GetInputRowStreamDescription())) {
+ VerifyHasElements(GetStructuredInputs(spec), "input");
+ }
+
+ TUserJobFormatHints hints;
+ hints.InputFormatHints_ = GetInputFormatHints(spec);
+ hints.OutputFormatHints_ = GetOutputFormatHints(spec);
+ ENodeReaderFormat nodeReaderFormat = GetNodeReaderFormat(spec, allowSkiff);
+
+ return CreateSimpleOperationIoHelper(
+ structuredJob,
+ preparer,
+ options,
+ CanonizeStructuredTableList(preparer.GetContext(), GetStructuredInputs(spec)),
+ CanonizeStructuredTableList(preparer.GetContext(), GetStructuredOutputs(spec)),
+ hints,
+ nodeReaderFormat,
+ GetColumnsUsedInOperation(spec));
+}
+
+template <class T>
+TSimpleOperationIo CreateSimpleOperationIo(
+ const IJob& job,
+ const TOperationPreparer& preparer,
+ const TSimpleRawOperationIoSpec<T>& spec)
+{
+ auto getFormatOrDefault = [&] (const TMaybe<TFormat>& maybeFormat, const char* formatName) {
+ if (maybeFormat) {
+ return *maybeFormat;
+ } else if (spec.Format_) {
+ return *spec.Format_;
+ } else {
+ ythrow TApiUsageError() << "Neither " << formatName << "format nor default format is specified for raw operation";
+ }
+ };
+
+ auto inputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer.GetContext(), spec.GetInputs());
+ auto outputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer.GetContext(), spec.GetOutputs());
+
+ VerifyHasElements(inputs, "input");
+ VerifyHasElements(outputs, "output");
+
+ TUserJobFormatHints hints;
+
+ auto outputSchemas = PrepareOperation(
+ job,
+ TOperationPreparationContext(
+ inputs,
+ outputs,
+ preparer.GetContext(),
+ preparer.GetClientRetryPolicy(),
+ preparer.GetTransactionId()),
+ &inputs,
+ &outputs,
+ hints);
+
+ Y_VERIFY(outputs.size() == outputSchemas.size());
+ for (int i = 0; i < static_cast<int>(outputs.size()); ++i) {
+ if (!outputs[i].Schema_ && !outputSchemas[i].Columns().empty()) {
+ outputs[i].Schema_ = outputSchemas[i];
+ }
+ }
+
+ return TSimpleOperationIo {
+ inputs,
+ outputs,
+
+ getFormatOrDefault(spec.InputFormat_, "input"),
+ getFormatOrDefault(spec.OutputFormat_, "output"),
+
+ TVector<TSmallJobFile>{},
+ };
+}
+
+////////////////////////////////////////////////////////////////////
+
+TString GetJobStderrWithRetriesAndIgnoreErrors(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const size_t stderrTailSize,
+ const TGetJobStderrOptions& options = TGetJobStderrOptions())
+{
+ TString jobStderr;
+ try {
+ jobStderr = GetJobStderrWithRetries(
+ retryPolicy,
+ context,
+ operationId,
+ jobId,
+ options);
+ } catch (const TErrorResponse& e) {
+ YT_LOG_ERROR("Cannot get job stderr (OperationId: %v, JobId: %v, Error: %v)",
+ operationId,
+ jobId,
+ e.what());
+ }
+ if (jobStderr.size() > stderrTailSize) {
+ jobStderr = jobStderr.substr(jobStderr.size() - stderrTailSize, stderrTailSize);
+ }
+ return jobStderr;
+}
+
+TVector<TFailedJobInfo> GetFailedJobInfo(
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TGetFailedJobInfoOptions& options)
+{
+ const auto listJobsResult = ListJobs(
+ clientRetryPolicy->CreatePolicyForGenericRequest(),
+ context,
+ operationId,
+ TListJobsOptions()
+ .State(EJobState::Failed)
+ .Limit(options.MaxJobCount_));
+
+ const auto stderrTailSize = options.StderrTailSize_;
+
+ TVector<TFailedJobInfo> result;
+ for (const auto& job : listJobsResult.Jobs) {
+ auto& info = result.emplace_back();
+ Y_ENSURE(job.Id);
+ info.JobId = *job.Id;
+ info.Error = job.Error.GetOrElse(TYtError(TString("unknown error")));
+ if (job.StderrSize.GetOrElse(0) != 0) {
+ // There are cases when due to bad luck we cannot read stderr even if
+ // list_jobs reports that stderr_size > 0.
+ //
+ // Such errors don't have special error code
+ // so we ignore all errors and try our luck on other jobs.
+ info.Stderr = GetJobStderrWithRetriesAndIgnoreErrors(
+ clientRetryPolicy->CreatePolicyForGenericRequest(),
+ context,
+ operationId,
+ *job.Id,
+ stderrTailSize);
+ }
+ }
+ return result;
+}
+
+struct TGetJobsStderrOptions
+{
+ using TSelf = TGetJobsStderrOptions;
+
+ // How many jobs to download. Which jobs will be chosen is undefined.
+ FLUENT_FIELD_DEFAULT(ui64, MaxJobCount, 10);
+
+ // How much of stderr should be downloaded.
+ FLUENT_FIELD_DEFAULT(ui64, StderrTailSize, 64 * 1024);
+};
+
+static TVector<TString> GetJobsStderr(
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TGetJobsStderrOptions& options = TGetJobsStderrOptions())
+{
+ const auto listJobsResult = ListJobs(
+ clientRetryPolicy->CreatePolicyForGenericRequest(),
+ context,
+ operationId,
+ TListJobsOptions().Limit(options.MaxJobCount_).WithStderr(true));
+ const auto stderrTailSize = options.StderrTailSize_;
+ TVector<TString> result;
+ for (const auto& job : listJobsResult.Jobs) {
+ result.push_back(
+ // There are cases when due to bad luck we cannot read stderr even if
+ // list_jobs reports that stderr_size > 0.
+ //
+ // Such errors don't have special error code
+ // so we ignore all errors and try our luck on other jobs.
+ GetJobStderrWithRetriesAndIgnoreErrors(
+ clientRetryPolicy->CreatePolicyForGenericRequest(),
+ context,
+ operationId,
+ *job.Id,
+ stderrTailSize)
+ );
+ }
+ return result;
+}
+
+int CountIntermediateTables(const TStructuredJobTableList& tables)
+{
+ int result = 0;
+ for (const auto& table : tables) {
+ if (table.RichYPath) {
+ break;
+ }
+ ++result;
+ }
+ return result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+TSimpleOperationIo CreateSimpleOperationIoHelper(
+ const IStructuredJob& structuredJob,
+ const TOperationPreparer& preparer,
+ const TOperationOptions& options,
+ TStructuredJobTableList structuredInputs,
+ TStructuredJobTableList structuredOutputs,
+ TUserJobFormatHints hints,
+ ENodeReaderFormat nodeReaderFormat,
+ const THashSet<TString>& columnsUsedInOperations)
+{
+ auto intermediateInputTableCount = CountIntermediateTables(structuredInputs);
+ auto intermediateOutputTableCount = CountIntermediateTables(structuredOutputs);
+
+ auto jobSchemaInferenceResult = PrepareOperation(
+ structuredJob,
+ TOperationPreparationContext(
+ structuredInputs,
+ structuredOutputs,
+ preparer.GetContext(),
+ preparer.GetClientRetryPolicy(),
+ preparer.GetTransactionId()),
+ &structuredInputs,
+ &structuredOutputs,
+ hints);
+
+ TVector<TSmallJobFile> formatConfigList;
+ TFormatBuilder formatBuilder(preparer.GetClientRetryPolicy(), preparer.GetContext(), preparer.GetTransactionId(), options);
+
+ auto [inputFormat, inputFormatConfig] = formatBuilder.CreateFormat(
+ structuredJob,
+ EIODirection::Input,
+ structuredInputs,
+ hints.InputFormatHints_,
+ nodeReaderFormat,
+ /* allowFormatFromTableAttribute = */ true);
+
+ auto [outputFormat, outputFormatConfig] = formatBuilder.CreateFormat(
+ structuredJob,
+ EIODirection::Output,
+ structuredOutputs,
+ hints.OutputFormatHints_,
+ ENodeReaderFormat::Yson,
+ /* allowFormatFromTableAttribute = */ false);
+
+ const bool inferOutputSchema = options.InferOutputSchema_.GetOrElse(preparer.GetContext().Config->InferTableSchema);
+
+ auto outputPaths = GetPathList(
+ TStructuredJobTableList(structuredOutputs.begin() + intermediateOutputTableCount, structuredOutputs.end()),
+ TVector<TTableSchema>(jobSchemaInferenceResult.begin() + intermediateOutputTableCount, jobSchemaInferenceResult.end()),
+ inferOutputSchema);
+
+ auto inputPaths = GetPathList(
+ ApplyProtobufColumnFilters(
+ TStructuredJobTableList(structuredInputs.begin() + intermediateInputTableCount, structuredInputs.end()),
+ preparer,
+ columnsUsedInOperations,
+ options),
+ /*schemaInferenceResult*/ Nothing(),
+ /*inferSchema*/ false);
+
+ return TSimpleOperationIo {
+ inputPaths,
+ outputPaths,
+
+ inputFormat,
+ outputFormat,
+
+ CreateFormatConfig(inputFormatConfig, outputFormatConfig)
+ };
+}
+
+EOperationBriefState CheckOperation(
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId)
+{
+ auto attributes = GetOperation(
+ clientRetryPolicy->CreatePolicyForGenericRequest(),
+ context,
+ operationId,
+ TGetOperationOptions().AttributeFilter(TOperationAttributeFilter()
+ .Add(EOperationAttribute::State)
+ .Add(EOperationAttribute::Result)));
+ Y_VERIFY(attributes.BriefState,
+ "get_operation for operation %s has not returned \"state\" field",
+ GetGuidAsString(operationId).Data());
+ if (*attributes.BriefState == EOperationBriefState::Completed) {
+ return EOperationBriefState::Completed;
+ } else if (*attributes.BriefState == EOperationBriefState::Aborted || *attributes.BriefState == EOperationBriefState::Failed) {
+ YT_LOG_ERROR("Operation %v %v (%v)",
+ operationId,
+ ToString(*attributes.BriefState),
+ ToString(TOperationExecutionTimeTracker::Get()->Finish(operationId)));
+
+ auto failedJobInfoList = GetFailedJobInfo(
+ clientRetryPolicy,
+ context,
+ operationId,
+ TGetFailedJobInfoOptions());
+
+ Y_VERIFY(attributes.Result && attributes.Result->Error);
+ ythrow TOperationFailedError(
+ *attributes.BriefState == EOperationBriefState::Aborted
+ ? TOperationFailedError::Aborted
+ : TOperationFailedError::Failed,
+ operationId,
+ *attributes.Result->Error,
+ failedJobInfoList);
+ }
+ return EOperationBriefState::InProgress;
+}
+
+void WaitForOperation(
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId)
+{
+ const TDuration checkOperationStateInterval =
+ UseLocalModeOptimization(context, clientRetryPolicy)
+ ? Min(TDuration::MilliSeconds(100), context.Config->OperationTrackerPollPeriod)
+ : context.Config->OperationTrackerPollPeriod;
+
+ while (true) {
+ auto status = CheckOperation(clientRetryPolicy, context, operationId);
+ if (status == EOperationBriefState::Completed) {
+ YT_LOG_INFO("Operation %v completed (%v)",
+ operationId,
+ TOperationExecutionTimeTracker::Get()->Finish(operationId));
+ break;
+ }
+ TWaitProxy::Get()->Sleep(checkOperationStateInterval);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+TNode BuildAutoMergeSpec(const TAutoMergeSpec& options)
+{
+ TNode result;
+ if (options.Mode_) {
+ result["mode"] = ToString(*options.Mode_);
+ }
+ if (options.MaxIntermediateChunkCount_) {
+ result["max_intermediate_chunk_count"] = *options.MaxIntermediateChunkCount_;
+ }
+ if (options.ChunkCountPerMergeJob_) {
+ result["chunk_count_per_merge_job"] = *options.ChunkCountPerMergeJob_;
+ }
+ if (options.ChunkSizeThreshold_) {
+ result["chunk_size_threshold"] = *options.ChunkSizeThreshold_;
+ }
+ return result;
+}
+
+TNode BuildJobProfilerSpec(const TJobProfilerSpec& profilerSpec)
+{
+ TNode result;
+ if (profilerSpec.ProfilingBinary_) {
+ result["binary"] = ToString(*profilerSpec.ProfilingBinary_);
+ }
+ if (profilerSpec.ProfilerType_) {
+ result["type"] = ToString(*profilerSpec.ProfilerType_);
+ }
+ if (profilerSpec.ProfilingProbability_) {
+ result["profiling_probability"] = *profilerSpec.ProfilingProbability_;
+ }
+ if (profilerSpec.SamplingFrequency_) {
+ result["sampling_frequency"] = *profilerSpec.SamplingFrequency_;
+ }
+
+ return result;
+}
+
+// Returns undefined node if resources doesn't contain any meaningful field
+TNode BuildSchedulerResourcesSpec(const TSchedulerResources& resources)
+{
+ TNode result;
+ if (resources.UserSlots().Defined()) {
+ result["user_slots"] = *resources.UserSlots();
+ }
+ if (resources.Cpu().Defined()) {
+ result["cpu"] = *resources.Cpu();
+ }
+ if (resources.Memory().Defined()) {
+ result["memory"] = *resources.Memory();
+ }
+ return result;
+}
+
+void BuildUserJobFluently(
+ const TJobPreparer& preparer,
+ const TMaybe<TFormat>& inputFormat,
+ const TMaybe<TFormat>& outputFormat,
+ TFluentMap fluent)
+{
+ const auto& userJobSpec = preparer.GetSpec();
+ TMaybe<i64> memoryLimit = userJobSpec.MemoryLimit_;
+ TMaybe<double> cpuLimit = userJobSpec.CpuLimit_;
+ TMaybe<ui16> portCount = userJobSpec.PortCount_;
+
+ // Use 1MB extra tmpfs size by default, it helps to detect job sandbox as tmp directory
+ // for standard python libraries. See YTADMINREQ-14505 for more details.
+ auto tmpfsSize = preparer.GetSpec().ExtraTmpfsSize_.GetOrElse(DefaultExrtaTmpfsSize);
+ if (preparer.ShouldMountSandbox()) {
+ tmpfsSize += preparer.GetTotalFileSize();
+ if (tmpfsSize == 0) {
+ // This can be a case for example when it is local mode and we don't upload binary.
+ // NOTE: YT doesn't like zero tmpfs size.
+ tmpfsSize = RoundUpFileSize(1);
+ }
+ memoryLimit = memoryLimit.GetOrElse(512ll << 20) + tmpfsSize;
+ }
+
+ fluent
+ .Item("file_paths").List(preparer.GetFiles())
+ .Item("command").Value(preparer.GetCommand())
+ .Item("class_name").Value(preparer.GetClassName())
+ .DoIf(!userJobSpec.Environment_.empty(), [&] (TFluentMap fluentMap) {
+ TNode environment;
+ for (const auto& item : userJobSpec.Environment_) {
+ environment[item.first] = item.second;
+ }
+ fluentMap.Item("environment").Value(environment);
+ })
+ .DoIf(userJobSpec.DiskSpaceLimit_.Defined(), [&] (TFluentMap fluentMap) {
+ fluentMap.Item("disk_space_limit").Value(*userJobSpec.DiskSpaceLimit_);
+ })
+ .DoIf(inputFormat.Defined(), [&] (TFluentMap fluentMap) {
+ fluentMap.Item("input_format").Value(inputFormat->Config);
+ })
+ .DoIf(outputFormat.Defined(), [&] (TFluentMap fluentMap) {
+ fluentMap.Item("output_format").Value(outputFormat->Config);
+ })
+ .DoIf(memoryLimit.Defined(), [&] (TFluentMap fluentMap) {
+ fluentMap.Item("memory_limit").Value(*memoryLimit);
+ })
+ .DoIf(userJobSpec.MemoryReserveFactor_.Defined(), [&] (TFluentMap fluentMap) {
+ fluentMap.Item("memory_reserve_factor").Value(*userJobSpec.MemoryReserveFactor_);
+ })
+ .DoIf(cpuLimit.Defined(), [&] (TFluentMap fluentMap) {
+ fluentMap.Item("cpu_limit").Value(*cpuLimit);
+ })
+ .DoIf(portCount.Defined(), [&] (TFluentMap fluentMap) {
+ fluentMap.Item("port_count").Value(*portCount);
+ })
+ .DoIf(userJobSpec.JobTimeLimit_.Defined(), [&] (TFluentMap fluentMap) {
+ fluentMap.Item("job_time_limit").Value(userJobSpec.JobTimeLimit_->MilliSeconds());
+ })
+ .DoIf(userJobSpec.NetworkProject_.Defined(), [&] (TFluentMap fluentMap) {
+ fluentMap.Item("network_project").Value(*userJobSpec.NetworkProject_);
+ })
+ .DoIf(preparer.ShouldMountSandbox(), [&] (TFluentMap fluentMap) {
+ fluentMap.Item("tmpfs_path").Value(".");
+ fluentMap.Item("tmpfs_size").Value(tmpfsSize);
+ fluentMap.Item("copy_files").Value(true);
+ })
+ .Item("profilers")
+ .BeginList()
+ .DoFor(userJobSpec.JobProfilers_, [&] (TFluentList list, const auto& jobProfiler) {
+ list.Item().Value(BuildJobProfilerSpec(jobProfiler));
+ })
+ .EndList();
+}
+
+template <typename T>
+void BuildCommonOperationPart(const TConfigPtr& config, const TOperationSpecBase<T>& baseSpec, const TOperationOptions& options, TFluentMap fluent)
+{
+ const TProcessState* properties = TProcessState::Get();
+ TString pool = config->Pool;
+
+ if (baseSpec.Pool_) {
+ pool = *baseSpec.Pool_;
+ }
+
+ fluent
+ .Item("started_by")
+ .BeginMap()
+ .Item("hostname").Value(properties->FqdnHostName)
+ .Item("pid").Value(properties->Pid)
+ .Item("user").Value(properties->UserName)
+ .Item("command").List(properties->CensoredCommandLine)
+ .Item("wrapper_version").Value(properties->ClientVersion)
+ .EndMap()
+ .DoIf(!pool.empty(), [&] (TFluentMap fluentMap) {
+ fluentMap.Item("pool").Value(pool);
+ })
+ .DoIf(baseSpec.Weight_.Defined(), [&] (TFluentMap fluentMap) {
+ fluentMap.Item("weight").Value(*baseSpec.Weight_);
+ })
+ .DoIf(baseSpec.TimeLimit_.Defined(), [&] (TFluentMap fluentMap) {
+ fluentMap.Item("time_limit").Value(baseSpec.TimeLimit_->MilliSeconds());
+ })
+ .DoIf(baseSpec.PoolTrees().Defined(), [&] (TFluentMap fluentMap) {
+ TNode poolTreesSpec = TNode::CreateList();
+ for (const auto& tree : *baseSpec.PoolTrees()) {
+ poolTreesSpec.Add(tree);
+ }
+ fluentMap.Item("pool_trees").Value(poolTreesSpec);
+ })
+ .DoIf(baseSpec.ResourceLimits().Defined(), [&] (TFluentMap fluentMap) {
+ auto resourceLimitsSpec = BuildSchedulerResourcesSpec(*baseSpec.ResourceLimits());
+ if (!resourceLimitsSpec.IsUndefined()) {
+ fluentMap.Item("resource_limits").Value(std::move(resourceLimitsSpec));
+ }
+ })
+ .DoIf(options.SecureVault_.Defined(), [&] (TFluentMap fluentMap) {
+ Y_ENSURE(options.SecureVault_->IsMap(),
+ "SecureVault must be a map node, got " << options.SecureVault_->GetType());
+ fluentMap.Item("secure_vault").Value(*options.SecureVault_);
+ })
+ .DoIf(baseSpec.Title_.Defined(), [&] (TFluentMap fluentMap) {
+ fluentMap.Item("title").Value(*baseSpec.Title_);
+ });
+}
+
+template <typename TSpec>
+void BuildCommonUserOperationPart(const TSpec& baseSpec, TNode* spec)
+{
+ if (baseSpec.MaxFailedJobCount_.Defined()) {
+ (*spec)["max_failed_job_count"] = *baseSpec.MaxFailedJobCount_;
+ }
+ if (baseSpec.FailOnJobRestart_.Defined()) {
+ (*spec)["fail_on_job_restart"] = *baseSpec.FailOnJobRestart_;
+ }
+ if (baseSpec.StderrTablePath_.Defined()) {
+ (*spec)["stderr_table_path"] = *baseSpec.StderrTablePath_;
+ }
+ if (baseSpec.CoreTablePath_.Defined()) {
+ (*spec)["core_table_path"] = *baseSpec.CoreTablePath_;
+ }
+ if (baseSpec.WaitingJobTimeout_.Defined()) {
+ (*spec)["waiting_job_timeout"] = baseSpec.WaitingJobTimeout_->MilliSeconds();
+ }
+}
+
+template <typename TSpec>
+void BuildJobCountOperationPart(const TSpec& spec, TNode* nodeSpec)
+{
+ if (spec.JobCount_.Defined()) {
+ (*nodeSpec)["job_count"] = *spec.JobCount_;
+ }
+ if (spec.DataSizePerJob_.Defined()) {
+ (*nodeSpec)["data_size_per_job"] = *spec.DataSizePerJob_;
+ }
+}
+
+template <typename TSpec>
+void BuildPartitionCountOperationPart(const TSpec& spec, TNode* nodeSpec)
+{
+ if (spec.PartitionCount_.Defined()) {
+ (*nodeSpec)["partition_count"] = *spec.PartitionCount_;
+ }
+ if (spec.PartitionDataSize_.Defined()) {
+ (*nodeSpec)["partition_data_size"] = *spec.PartitionDataSize_;
+ }
+}
+
+template <typename TSpec>
+void BuildDataSizePerSortJobPart(const TSpec& spec, TNode* nodeSpec)
+{
+ if (spec.DataSizePerSortJob_.Defined()) {
+ (*nodeSpec)["data_size_per_sort_job"] = *spec.DataSizePerSortJob_;
+ }
+}
+
+template <typename TSpec>
+void BuildPartitionJobCountOperationPart(const TSpec& spec, TNode* nodeSpec)
+{
+ if (spec.PartitionJobCount_.Defined()) {
+ (*nodeSpec)["partition_job_count"] = *spec.PartitionJobCount_;
+ }
+ if (spec.DataSizePerPartitionJob_.Defined()) {
+ (*nodeSpec)["data_size_per_partition_job"] = *spec.DataSizePerPartitionJob_;
+ }
+}
+
+template <typename TSpec>
+void BuildMapJobCountOperationPart(const TSpec& spec, TNode* nodeSpec)
+{
+ if (spec.MapJobCount_.Defined()) {
+ (*nodeSpec)["map_job_count"] = *spec.MapJobCount_;
+ }
+ if (spec.DataSizePerMapJob_.Defined()) {
+ (*nodeSpec)["data_size_per_map_job"] = *spec.DataSizePerMapJob_;
+ }
+}
+
+template <typename TSpec>
+void BuildIntermediateDataPart(const TSpec& spec, TNode* nodeSpec)
+{
+ if (spec.IntermediateDataAccount_.Defined()) {
+ (*nodeSpec)["intermediate_data_account"] = *spec.IntermediateDataAccount_;
+ }
+ if (spec.IntermediateDataReplicationFactor_.Defined()) {
+ (*nodeSpec)["intermediate_data_replication_factor"] = *spec.IntermediateDataReplicationFactor_;
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TNode MergeSpec(TNode dst, TNode spec, const TOperationOptions& options)
+{
+ MergeNodes(dst["spec"], spec);
+ if (options.Spec_) {
+ MergeNodes(dst["spec"], *options.Spec_);
+ }
+ return dst;
+}
+
+template <typename TSpec>
+void CreateDebugOutputTables(const TSpec& spec, const TOperationPreparer& preparer)
+{
+ if (spec.StderrTablePath_.Defined()) {
+ NYT::NDetail::Create(
+ preparer.GetClientRetryPolicy()->CreatePolicyForGenericRequest(),
+ preparer.GetContext(),
+ TTransactionId(),
+ *spec.StderrTablePath_,
+ NT_TABLE,
+ TCreateOptions()
+ .IgnoreExisting(true)
+ .Recursive(true));
+ }
+ if (spec.CoreTablePath_.Defined()) {
+ NYT::NDetail::Create(
+ preparer.GetClientRetryPolicy()->CreatePolicyForGenericRequest(),
+ preparer.GetContext(),
+ TTransactionId(),
+ *spec.CoreTablePath_,
+ NT_TABLE,
+ TCreateOptions()
+ .IgnoreExisting(true)
+ .Recursive(true));
+ }
+}
+
+void CreateOutputTable(
+ const TOperationPreparer& preparer,
+ const TRichYPath& path)
+{
+ Y_ENSURE(path.Path_, "Output table is not set");
+ Create(
+ preparer.GetClientRetryPolicy()->CreatePolicyForGenericRequest(),
+ preparer.GetContext(), preparer.GetTransactionId(), path.Path_, NT_TABLE,
+ TCreateOptions()
+ .IgnoreExisting(true)
+ .Recursive(true));
+}
+
+void CreateOutputTables(
+ const TOperationPreparer& preparer,
+ const TVector<TRichYPath>& paths)
+{
+ for (auto& path : paths) {
+ CreateOutputTable(preparer, path);
+ }
+}
+
+void CheckInputTablesExist(
+ const TOperationPreparer& preparer,
+ const TVector<TRichYPath>& paths)
+{
+ Y_ENSURE(!paths.empty(), "Input tables are not set");
+ for (auto& path : paths) {
+ auto curTransactionId = path.TransactionId_.GetOrElse(preparer.GetTransactionId());
+ Y_ENSURE_EX(
+ Exists(
+ preparer.GetClientRetryPolicy()->CreatePolicyForGenericRequest(),
+ preparer.GetContext(),
+ curTransactionId,
+ path.Path_),
+ TApiUsageError() << "Input table '" << path.Path_ << "' doesn't exist");
+ }
+}
+
+void LogJob(const TOperationId& opId, const IJob* job, const char* type)
+{
+ if (job) {
+ YT_LOG_INFO("Operation %v; %v = %v",
+ opId,
+ type,
+ TJobFactory::Get()->GetJobName(job));
+ }
+}
+
+void LogYPaths(const TOperationId& opId, const TVector<TRichYPath>& paths, const char* type)
+{
+ for (size_t i = 0; i < paths.size(); ++i) {
+ YT_LOG_INFO("Operation %v; %v[%v] = %v",
+ opId,
+ type,
+ i,
+ paths[i].Path_);
+ }
+}
+
+void LogYPath(const TOperationId& opId, const TRichYPath& path, const char* type)
+{
+ YT_LOG_INFO("Operation %v; %v = %v",
+ opId,
+ type,
+ path.Path_);
+}
+
+TString AddModeToTitleIfDebug(const TString& title) {
+#ifndef NDEBUG
+ return title + " (debug build)";
+#else
+ return title;
+#endif
+}
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+void DoExecuteMap(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TSimpleOperationIo& operationIo,
+ TMapOperationSpecBase<T> spec,
+ const IJobPtr& mapper,
+ const TOperationOptions& options)
+{
+ if (options.CreateDebugOutputTables_) {
+ CreateDebugOutputTables(spec, *preparer);
+ }
+ if (options.CreateOutputTables_) {
+ CheckInputTablesExist(*preparer, operationIo.Inputs);
+ CreateOutputTables(*preparer, operationIo.Outputs);
+ }
+
+ TJobPreparer map(
+ *preparer,
+ spec.MapperSpec_,
+ *mapper,
+ operationIo.Outputs.size(),
+ operationIo.JobFiles,
+ options);
+
+ spec.Title_ = spec.Title_.GetOrElse(AddModeToTitleIfDebug(map.GetClassName()));
+
+ TNode specNode = BuildYsonNodeFluently()
+ .BeginMap().Item("spec").BeginMap()
+ .Item("mapper").DoMap([&] (TFluentMap fluent) {
+ BuildUserJobFluently(
+ map,
+ operationIo.InputFormat,
+ operationIo.OutputFormat,
+ fluent);
+ })
+ .DoIf(spec.AutoMerge_.Defined(), [&] (TFluentMap fluent) {
+ auto autoMergeSpec = BuildAutoMergeSpec(*spec.AutoMerge_);
+ if (!autoMergeSpec.IsUndefined()) {
+ fluent.Item("auto_merge").Value(std::move(autoMergeSpec));
+ }
+ })
+ .Item("input_table_paths").List(operationIo.Inputs)
+ .Item("output_table_paths").List(operationIo.Outputs)
+ .DoIf(spec.Ordered_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("ordered").Value(spec.Ordered_.GetRef());
+ })
+ .Do(std::bind(BuildCommonOperationPart<T>, preparer->GetContext().Config, spec, options, std::placeholders::_1))
+ .EndMap().EndMap();
+
+ specNode["spec"]["job_io"]["control_attributes"]["enable_row_index"] = TNode(true);
+ specNode["spec"]["job_io"]["control_attributes"]["enable_range_index"] = TNode(true);
+ if (!preparer->GetContext().Config->TableWriter.Empty()) {
+ specNode["spec"]["job_io"]["table_writer"] = preparer->GetContext().Config->TableWriter;
+ }
+
+ BuildCommonUserOperationPart(spec, &specNode["spec"]);
+ BuildJobCountOperationPart(spec, &specNode["spec"]);
+
+ auto startOperation = [
+ operation=operation.Get(),
+ spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options),
+ preparer,
+ operationIo,
+ mapper
+ ] () {
+ auto operationId = preparer->StartOperation(operation, "map", spec);
+
+ LogJob(operationId, mapper.Get(), "mapper");
+ LogYPaths(operationId, operationIo.Inputs, "input");
+ LogYPaths(operationId, operationIo.Outputs, "output");
+
+ return operationId;
+ };
+ operation->SetDelayedStartFunction(std::move(startOperation));
+}
+
+void ExecuteMap(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TMapOperationSpec& spec,
+ const ::TIntrusivePtr<IStructuredJob>& mapper,
+ const TOperationOptions& options)
+{
+ YT_LOG_DEBUG("Starting map operation (PreparationId: %v)",
+ preparer->GetPreparationId());
+ auto operationIo = CreateSimpleOperationIo(*mapper, *preparer, spec, options, /* allowSkiff = */ true);
+ DoExecuteMap(
+ operation,
+ preparer,
+ operationIo,
+ spec,
+ mapper,
+ options);
+}
+
+void ExecuteRawMap(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TRawMapOperationSpec& spec,
+ const ::TIntrusivePtr<IRawJob>& mapper,
+ const TOperationOptions& options)
+{
+ YT_LOG_DEBUG("Starting raw map operation (PreparationId: %v)",
+ preparer->GetPreparationId());
+ auto operationIo = CreateSimpleOperationIo(*mapper, *preparer, spec);
+ DoExecuteMap(
+ operation,
+ preparer,
+ operationIo,
+ spec,
+ mapper,
+ options);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+void DoExecuteReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TSimpleOperationIo& operationIo,
+ TReduceOperationSpecBase<T> spec,
+ const IJobPtr& reducer,
+ const TOperationOptions& options)
+{
+ if (options.CreateDebugOutputTables_) {
+ CreateDebugOutputTables(spec, *preparer);
+ }
+ if (options.CreateOutputTables_) {
+ CheckInputTablesExist(*preparer, operationIo.Inputs);
+ CreateOutputTables(*preparer, operationIo.Outputs);
+ }
+
+ TJobPreparer reduce(
+ *preparer,
+ spec.ReducerSpec_,
+ *reducer,
+ operationIo.Outputs.size(),
+ operationIo.JobFiles,
+ options);
+
+ spec.Title_ = spec.Title_.GetOrElse(AddModeToTitleIfDebug(reduce.GetClassName()));
+
+ TNode specNode = BuildYsonNodeFluently()
+ .BeginMap().Item("spec").BeginMap()
+ .Item("reducer").DoMap([&] (TFluentMap fluent) {
+ BuildUserJobFluently(
+ reduce,
+ operationIo.InputFormat,
+ operationIo.OutputFormat,
+ fluent);
+ })
+ .Item("sort_by").Value(spec.SortBy_)
+ .Item("reduce_by").Value(spec.ReduceBy_)
+ .DoIf(spec.JoinBy_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("join_by").Value(spec.JoinBy_.GetRef());
+ })
+ .DoIf(spec.EnableKeyGuarantee_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("enable_key_guarantee").Value(spec.EnableKeyGuarantee_.GetRef());
+ })
+ .Item("input_table_paths").List(operationIo.Inputs)
+ .Item("output_table_paths").List(operationIo.Outputs)
+ .Item("job_io").BeginMap()
+ .Item("control_attributes").BeginMap()
+ .Item("enable_key_switch").Value(true)
+ .Item("enable_row_index").Value(true)
+ .Item("enable_range_index").Value(true)
+ .EndMap()
+ .DoIf(!preparer->GetContext().Config->TableWriter.Empty(), [&] (TFluentMap fluent) {
+ fluent.Item("table_writer").Value(preparer->GetContext().Config->TableWriter);
+ })
+ .EndMap()
+ .DoIf(spec.AutoMerge_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("auto_merge").Value(BuildAutoMergeSpec(*spec.AutoMerge_));
+ })
+ .Do(std::bind(BuildCommonOperationPart<T>, preparer->GetContext().Config, spec, options, std::placeholders::_1))
+ .EndMap().EndMap();
+
+ BuildCommonUserOperationPart(spec, &specNode["spec"]);
+ BuildJobCountOperationPart(spec, &specNode["spec"]);
+
+ auto startOperation = [
+ operation=operation.Get(),
+ spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options),
+ preparer,
+ operationIo,
+ reducer
+ ] () {
+ auto operationId = preparer->StartOperation(operation, "reduce", spec);
+
+ LogJob(operationId, reducer.Get(), "reducer");
+ LogYPaths(operationId, operationIo.Inputs, "input");
+ LogYPaths(operationId, operationIo.Outputs, "output");
+
+ return operationId;
+ };
+
+ operation->SetDelayedStartFunction(std::move(startOperation));
+}
+
+void ExecuteReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TReduceOperationSpec& spec,
+ const ::TIntrusivePtr<IStructuredJob>& reducer,
+ const TOperationOptions& options)
+{
+ YT_LOG_DEBUG("Starting reduce operation (PreparationId: %v)",
+ preparer->GetPreparationId());
+ auto operationIo = CreateSimpleOperationIo(*reducer, *preparer, spec, options, /* allowSkiff = */ false);
+ DoExecuteReduce(
+ operation,
+ preparer,
+ operationIo,
+ spec,
+ reducer,
+ options);
+}
+
+void ExecuteRawReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TRawReduceOperationSpec& spec,
+ const ::TIntrusivePtr<IRawJob>& reducer,
+ const TOperationOptions& options)
+{
+ YT_LOG_DEBUG("Starting raw reduce operation (PreparationId: %v)",
+ preparer->GetPreparationId());
+ auto operationIo = CreateSimpleOperationIo(*reducer, *preparer, spec);
+ DoExecuteReduce(
+ operation,
+ preparer,
+ operationIo,
+ spec,
+ reducer,
+ options);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+void DoExecuteJoinReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TSimpleOperationIo& operationIo,
+ TJoinReduceOperationSpecBase<T> spec,
+ const IJobPtr& reducer,
+ const TOperationOptions& options)
+{
+ if (options.CreateDebugOutputTables_) {
+ CreateDebugOutputTables(spec, *preparer);
+ }
+ if (options.CreateOutputTables_) {
+ CheckInputTablesExist(*preparer, operationIo.Inputs);
+ CreateOutputTables(*preparer, operationIo.Outputs);
+ }
+
+ TJobPreparer reduce(
+ *preparer,
+ spec.ReducerSpec_,
+ *reducer,
+ operationIo.Outputs.size(),
+ operationIo.JobFiles,
+ options);
+
+ spec.Title_ = spec.Title_.GetOrElse(AddModeToTitleIfDebug(reduce.GetClassName()));
+
+ TNode specNode = BuildYsonNodeFluently()
+ .BeginMap().Item("spec").BeginMap()
+ .Item("reducer").DoMap([&] (TFluentMap fluent) {
+ BuildUserJobFluently(
+ reduce,
+ operationIo.InputFormat,
+ operationIo.OutputFormat,
+ fluent);
+ })
+ .Item("join_by").Value(spec.JoinBy_)
+ .Item("input_table_paths").List(operationIo.Inputs)
+ .Item("output_table_paths").List(operationIo.Outputs)
+ .Item("job_io").BeginMap()
+ .Item("control_attributes").BeginMap()
+ .Item("enable_key_switch").Value(true)
+ .Item("enable_row_index").Value(true)
+ .Item("enable_range_index").Value(true)
+ .EndMap()
+ .DoIf(!preparer->GetContext().Config->TableWriter.Empty(), [&] (TFluentMap fluent) {
+ fluent.Item("table_writer").Value(preparer->GetContext().Config->TableWriter);
+ })
+ .EndMap()
+ .Do(std::bind(BuildCommonOperationPart<T>, preparer->GetContext().Config, spec, options, std::placeholders::_1))
+ .EndMap().EndMap();
+
+ BuildCommonUserOperationPart(spec, &specNode["spec"]);
+ BuildJobCountOperationPart(spec, &specNode["spec"]);
+
+ auto startOperation = [
+ operation=operation.Get(),
+ spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options),
+ preparer,
+ reducer,
+ operationIo
+ ] () {
+ auto operationId = preparer->StartOperation(operation, "join_reduce", spec);
+
+ LogJob(operationId, reducer.Get(), "reducer");
+ LogYPaths(operationId, operationIo.Inputs, "input");
+ LogYPaths(operationId, operationIo.Outputs, "output");
+
+ return operationId;
+ };
+
+ operation->SetDelayedStartFunction(std::move(startOperation));
+}
+
+void ExecuteJoinReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TJoinReduceOperationSpec& spec,
+ const ::TIntrusivePtr<IStructuredJob>& reducer,
+ const TOperationOptions& options)
+{
+ YT_LOG_DEBUG("Starting join reduce operation (PreparationId: %v)",
+ preparer->GetPreparationId());
+ auto operationIo = CreateSimpleOperationIo(*reducer, *preparer, spec, options, /* allowSkiff = */ false);
+ return DoExecuteJoinReduce(
+ operation,
+ preparer,
+ operationIo,
+ spec,
+ reducer,
+ options);
+}
+
+void ExecuteRawJoinReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TRawJoinReduceOperationSpec& spec,
+ const ::TIntrusivePtr<IRawJob>& reducer,
+ const TOperationOptions& options)
+{
+ YT_LOG_DEBUG("Starting raw join reduce operation (PreparationId: %v)",
+ preparer->GetPreparationId());
+ auto operationIo = CreateSimpleOperationIo(*reducer, *preparer, spec);
+ return DoExecuteJoinReduce(
+ operation,
+ preparer,
+ operationIo,
+ spec,
+ reducer,
+ options);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+void DoExecuteMapReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TMapReduceOperationIo& operationIo,
+ TMapReduceOperationSpecBase<T> spec,
+ const IJobPtr& mapper,
+ const IJobPtr& reduceCombiner,
+ const IJobPtr& reducer,
+ const TOperationOptions& options)
+{
+ TVector<TRichYPath> allOutputs;
+ allOutputs.insert(allOutputs.end(), operationIo.MapOutputs.begin(), operationIo.MapOutputs.end());
+ allOutputs.insert(allOutputs.end(), operationIo.Outputs.begin(), operationIo.Outputs.end());
+
+ if (options.CreateDebugOutputTables_) {
+ CreateDebugOutputTables(spec, *preparer);
+ }
+ if (options.CreateOutputTables_) {
+ CheckInputTablesExist(*preparer, operationIo.Inputs);
+ CreateOutputTables(*preparer, allOutputs);
+ }
+
+ TSortColumns sortBy = spec.SortBy_;
+ TSortColumns reduceBy = spec.ReduceBy_;
+
+ if (sortBy.Parts_.empty()) {
+ sortBy = reduceBy;
+ }
+
+ const bool hasMapper = mapper != nullptr;
+ const bool hasCombiner = reduceCombiner != nullptr;
+
+ TVector<TRichYPath> files;
+
+ TJobPreparer reduce(
+ *preparer,
+ spec.ReducerSpec_,
+ *reducer,
+ operationIo.Outputs.size(),
+ operationIo.ReducerJobFiles,
+ options);
+
+ TString title;
+
+ TNode specNode = BuildYsonNodeFluently()
+ .BeginMap().Item("spec").BeginMap()
+ .DoIf(hasMapper, [&] (TFluentMap fluent) {
+ TJobPreparer map(
+ *preparer,
+ spec.MapperSpec_,
+ *mapper,
+ 1 + operationIo.MapOutputs.size(),
+ operationIo.MapperJobFiles,
+ options);
+ fluent.Item("mapper").DoMap([&] (TFluentMap fluent) {
+ BuildUserJobFluently(
+ std::cref(map),
+ *operationIo.MapperInputFormat,
+ *operationIo.MapperOutputFormat,
+ fluent);
+ });
+
+ title = "mapper:" + map.GetClassName() + " ";
+ })
+ .DoIf(hasCombiner, [&] (TFluentMap fluent) {
+ TJobPreparer combine(
+ *preparer,
+ spec.ReduceCombinerSpec_,
+ *reduceCombiner,
+ size_t(1),
+ operationIo.ReduceCombinerJobFiles,
+ options);
+ fluent.Item("reduce_combiner").DoMap([&] (TFluentMap fluent) {
+ BuildUserJobFluently(
+ combine,
+ *operationIo.ReduceCombinerInputFormat,
+ *operationIo.ReduceCombinerOutputFormat,
+ fluent);
+ });
+ title += "combiner:" + combine.GetClassName() + " ";
+ })
+ .Item("reducer").DoMap([&] (TFluentMap fluent) {
+ BuildUserJobFluently(
+ reduce,
+ operationIo.ReducerInputFormat,
+ operationIo.ReducerOutputFormat,
+ fluent);
+ })
+ .Item("sort_by").Value(sortBy)
+ .Item("reduce_by").Value(reduceBy)
+ .Item("input_table_paths").List(operationIo.Inputs)
+ .Item("output_table_paths").List(allOutputs)
+ .Item("mapper_output_table_count").Value(operationIo.MapOutputs.size())
+ .DoIf(spec.ForceReduceCombiners_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("force_reduce_combiners").Value(*spec.ForceReduceCombiners_);
+ })
+ .Item("map_job_io").BeginMap()
+ .Item("control_attributes").BeginMap()
+ .Item("enable_row_index").Value(true)
+ .Item("enable_range_index").Value(true)
+ .EndMap()
+ .DoIf(!preparer->GetContext().Config->TableWriter.Empty(), [&] (TFluentMap fluent) {
+ fluent.Item("table_writer").Value(preparer->GetContext().Config->TableWriter);
+ })
+ .EndMap()
+ .Item("sort_job_io").BeginMap()
+ .Item("control_attributes").BeginMap()
+ .Item("enable_key_switch").Value(true)
+ .EndMap()
+ .DoIf(!preparer->GetContext().Config->TableWriter.Empty(), [&] (TFluentMap fluent) {
+ fluent.Item("table_writer").Value(preparer->GetContext().Config->TableWriter);
+ })
+ .EndMap()
+ .Item("reduce_job_io").BeginMap()
+ .Item("control_attributes").BeginMap()
+ .Item("enable_key_switch").Value(true)
+ .EndMap()
+ .DoIf(!preparer->GetContext().Config->TableWriter.Empty(), [&] (TFluentMap fluent) {
+ fluent.Item("table_writer").Value(preparer->GetContext().Config->TableWriter);
+ })
+ .EndMap()
+ .Do([&] (TFluentMap) {
+ spec.Title_ = spec.Title_.GetOrElse(AddModeToTitleIfDebug(title + "reducer:" + reduce.GetClassName()));
+ })
+ .Do(std::bind(BuildCommonOperationPart<T>, preparer->GetContext().Config, spec, options, std::placeholders::_1))
+ .EndMap().EndMap();
+
+ if (spec.Ordered_) {
+ specNode["spec"]["ordered"] = *spec.Ordered_;
+ }
+
+ BuildCommonUserOperationPart(spec, &specNode["spec"]);
+ BuildMapJobCountOperationPart(spec, &specNode["spec"]);
+ BuildPartitionCountOperationPart(spec, &specNode["spec"]);
+ BuildIntermediateDataPart(spec, &specNode["spec"]);
+ BuildDataSizePerSortJobPart(spec, &specNode["spec"]);
+
+ auto startOperation = [
+ operation=operation.Get(),
+ spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options),
+ preparer,
+ mapper,
+ reduceCombiner,
+ reducer,
+ inputs=operationIo.Inputs,
+ allOutputs
+ ] () {
+ auto operationId = preparer->StartOperation(operation, "map_reduce", spec);
+
+ LogJob(operationId, mapper.Get(), "mapper");
+ LogJob(operationId, reduceCombiner.Get(), "reduce_combiner");
+ LogJob(operationId, reducer.Get(), "reducer");
+ LogYPaths(operationId, inputs, "input");
+ LogYPaths(operationId, allOutputs, "output");
+
+ return operationId;
+ };
+
+ operation->SetDelayedStartFunction(std::move(startOperation));
+}
+
+void ExecuteMapReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TMapReduceOperationSpec& spec_,
+ const ::TIntrusivePtr<IStructuredJob>& mapper,
+ const ::TIntrusivePtr<IStructuredJob>& reduceCombiner,
+ const ::TIntrusivePtr<IStructuredJob>& reducer,
+ const TOperationOptions& options)
+{
+ YT_LOG_DEBUG("Starting map-reduce operation (PreparationId: %v)",
+ preparer->GetPreparationId());
+ TMapReduceOperationSpec spec = spec_;
+
+ TMapReduceOperationIo operationIo;
+ auto structuredInputs = CanonizeStructuredTableList(preparer->GetContext(), spec.GetStructuredInputs());
+ auto structuredMapOutputs = CanonizeStructuredTableList(preparer->GetContext(), spec.GetStructuredMapOutputs());
+ auto structuredOutputs = CanonizeStructuredTableList(preparer->GetContext(), spec.GetStructuredOutputs());
+
+ const bool inferOutputSchema = options.InferOutputSchema_.GetOrElse(preparer->GetContext().Config->InferTableSchema);
+
+ TVector<TTableSchema> currentInferenceResult;
+
+ auto fixSpec = [&] (const TFormat& format) {
+ if (format.IsYamredDsv()) {
+ spec.SortBy_.Parts_.clear();
+ spec.ReduceBy_.Parts_.clear();
+
+ const TYamredDsvAttributes attributes = format.GetYamredDsvAttributes();
+ for (auto& column : attributes.KeyColumnNames) {
+ spec.SortBy_.Parts_.push_back(column);
+ spec.ReduceBy_.Parts_.push_back(column);
+ }
+ for (const auto& column : attributes.SubkeyColumnNames) {
+ spec.SortBy_.Parts_.push_back(column);
+ }
+ }
+ };
+
+ VerifyHasElements(structuredInputs, "inputs");
+
+ TFormatBuilder formatBuilder(
+ preparer->GetClientRetryPolicy(),
+ preparer->GetContext(),
+ preparer->GetTransactionId(),
+ options);
+
+ if (mapper) {
+ auto mapperOutputDescription =
+ spec.GetIntermediateMapOutputDescription()
+ .GetOrElse(TUnspecifiedTableStructure());
+ TStructuredJobTableList mapperOutput = {
+ TStructuredJobTable::Intermediate(mapperOutputDescription),
+ };
+
+ for (const auto& table : structuredMapOutputs) {
+ mapperOutput.push_back(TStructuredJobTable{table.Description, table.RichYPath});
+ }
+
+ auto hints = spec.MapperFormatHints_;
+
+ auto mapperInferenceResult = PrepareOperation<TStructuredJobTableList>(
+ *mapper,
+ TOperationPreparationContext(
+ structuredInputs,
+ mapperOutput,
+ preparer->GetContext(),
+ preparer->GetClientRetryPolicy(),
+ preparer->GetTransactionId()),
+ &structuredInputs,
+ /* outputs */ nullptr,
+ hints);
+
+ auto nodeReaderFormat = NodeReaderFormatFromHintAndGlobalConfig(spec.MapperFormatHints_);
+
+ auto [inputFormat, inputFormatConfig] = formatBuilder.CreateFormat(
+ *mapper,
+ EIODirection::Input,
+ structuredInputs,
+ hints.InputFormatHints_,
+ nodeReaderFormat,
+ /* allowFormatFromTableAttribute */ true);
+
+ auto [outputFormat, outputFormatConfig] = formatBuilder.CreateFormat(
+ *mapper,
+ EIODirection::Output,
+ mapperOutput,
+ hints.OutputFormatHints_,
+ ENodeReaderFormat::Yson,
+ /* allowFormatFromTableAttribute */ false);
+
+ operationIo.MapperJobFiles = CreateFormatConfig(inputFormatConfig, outputFormatConfig);
+ operationIo.MapperInputFormat = inputFormat;
+ operationIo.MapperOutputFormat = outputFormat;
+
+ Y_VERIFY(mapperInferenceResult.size() >= 1);
+ currentInferenceResult = TVector<TTableSchema>{mapperInferenceResult[0]};
+ // The first output as it corresponds to the intermediate data.
+ TVector<TTableSchema> additionalOutputsInferenceResult(mapperInferenceResult.begin() + 1, mapperInferenceResult.end());
+
+ operationIo.MapOutputs = GetPathList(
+ structuredMapOutputs,
+ additionalOutputsInferenceResult,
+ inferOutputSchema);
+ }
+
+ if (reduceCombiner) {
+ const bool isFirstStep = !mapper;
+ TStructuredJobTableList inputs;
+ if (isFirstStep) {
+ inputs = structuredInputs;
+ } else {
+ auto reduceCombinerIntermediateInput =
+ spec.GetIntermediateReduceCombinerInputDescription()
+ .GetOrElse(TUnspecifiedTableStructure());
+ inputs = {
+ TStructuredJobTable::Intermediate(reduceCombinerIntermediateInput),
+ };
+ }
+
+ auto reduceCombinerOutputDescription = spec.GetIntermediateReduceCombinerOutputDescription()
+ .GetOrElse(TUnspecifiedTableStructure());
+
+ TStructuredJobTableList outputs = {
+ TStructuredJobTable::Intermediate(reduceCombinerOutputDescription),
+ };
+
+ auto hints = spec.ReduceCombinerFormatHints_;
+
+ if (isFirstStep) {
+ currentInferenceResult = PrepareOperation<TStructuredJobTableList>(
+ *reduceCombiner,
+ TOperationPreparationContext(
+ inputs,
+ outputs,
+ preparer->GetContext(),
+ preparer->GetClientRetryPolicy(),
+ preparer->GetTransactionId()),
+ &inputs,
+ /* outputs */ nullptr,
+ hints);
+ } else {
+ currentInferenceResult = PrepareOperation<TStructuredJobTableList>(
+ *reduceCombiner,
+ TSpeculativeOperationPreparationContext(
+ currentInferenceResult,
+ inputs,
+ outputs),
+ /* inputs */ nullptr,
+ /* outputs */ nullptr,
+ hints);
+ }
+
+ auto [inputFormat, inputFormatConfig] = formatBuilder.CreateFormat(
+ *reduceCombiner,
+ EIODirection::Input,
+ inputs,
+ hints.InputFormatHints_,
+ ENodeReaderFormat::Yson,
+ /* allowFormatFromTableAttribute = */ isFirstStep);
+
+ auto [outputFormat, outputFormatConfig] = formatBuilder.CreateFormat(
+ *reduceCombiner,
+ EIODirection::Output,
+ outputs,
+ hints.OutputFormatHints_,
+ ENodeReaderFormat::Yson,
+ /* allowFormatFromTableAttribute = */ false);
+
+ operationIo.ReduceCombinerJobFiles = CreateFormatConfig(inputFormatConfig, outputFormatConfig);
+ operationIo.ReduceCombinerInputFormat = inputFormat;
+ operationIo.ReduceCombinerOutputFormat = outputFormat;
+
+ if (isFirstStep) {
+ fixSpec(*operationIo.ReduceCombinerInputFormat);
+ }
+ }
+
+ const bool isFirstStep = (!mapper && !reduceCombiner);
+ TStructuredJobTableList reducerInputs;
+ if (isFirstStep) {
+ reducerInputs = structuredInputs;
+ } else {
+ auto reducerInputDescription =
+ spec.GetIntermediateReducerInputDescription()
+ .GetOrElse(TUnspecifiedTableStructure());
+ reducerInputs = {
+ TStructuredJobTable::Intermediate(reducerInputDescription),
+ };
+ }
+
+ auto hints = spec.ReducerFormatHints_;
+
+ TVector<TTableSchema> reducerInferenceResult;
+ if (isFirstStep) {
+ reducerInferenceResult = PrepareOperation(
+ *reducer,
+ TOperationPreparationContext(
+ structuredInputs,
+ structuredOutputs,
+ preparer->GetContext(),
+ preparer->GetClientRetryPolicy(),
+ preparer->GetTransactionId()),
+ &structuredInputs,
+ &structuredOutputs,
+ hints);
+ } else {
+ reducerInferenceResult = PrepareOperation<TStructuredJobTableList>(
+ *reducer,
+ TSpeculativeOperationPreparationContext(
+ currentInferenceResult,
+ reducerInputs,
+ structuredOutputs),
+ /* inputs */ nullptr,
+ &structuredOutputs,
+ hints);
+ }
+
+ auto [inputFormat, inputFormatConfig] = formatBuilder.CreateFormat(
+ *reducer,
+ EIODirection::Input,
+ reducerInputs,
+ hints.InputFormatHints_,
+ ENodeReaderFormat::Yson,
+ /* allowFormatFromTableAttribute = */ isFirstStep);
+
+ auto [outputFormat, outputFormatConfig] = formatBuilder.CreateFormat(
+ *reducer,
+ EIODirection::Output,
+ ToStructuredJobTableList(spec.GetStructuredOutputs()),
+ hints.OutputFormatHints_,
+ ENodeReaderFormat::Yson,
+ /* allowFormatFromTableAttribute = */ false);
+ operationIo.ReducerJobFiles = CreateFormatConfig(inputFormatConfig, outputFormatConfig);
+ operationIo.ReducerInputFormat = inputFormat;
+ operationIo.ReducerOutputFormat = outputFormat;
+
+ if (isFirstStep) {
+ fixSpec(operationIo.ReducerInputFormat);
+ }
+
+ operationIo.Inputs = GetPathList(
+ ApplyProtobufColumnFilters(
+ structuredInputs,
+ *preparer,
+ GetColumnsUsedInOperation(spec),
+ options),
+ /* jobSchemaInferenceResult */ Nothing(),
+ /* inferSchema */ false);
+
+ operationIo.Outputs = GetPathList(
+ structuredOutputs,
+ reducerInferenceResult,
+ inferOutputSchema);
+
+ VerifyHasElements(operationIo.Outputs, "outputs");
+
+ return DoExecuteMapReduce(
+ operation,
+ preparer,
+ operationIo,
+ spec,
+ mapper,
+ reduceCombiner,
+ reducer,
+ options);
+}
+
+void ExecuteRawMapReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TRawMapReduceOperationSpec& spec,
+ const ::TIntrusivePtr<IRawJob>& mapper,
+ const ::TIntrusivePtr<IRawJob>& reduceCombiner,
+ const ::TIntrusivePtr<IRawJob>& reducer,
+ const TOperationOptions& options)
+{
+ YT_LOG_DEBUG("Starting raw map-reduce operation (PreparationId: %v)",
+ preparer->GetPreparationId());
+ TMapReduceOperationIo operationIo;
+ operationIo.Inputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer->GetContext(), spec.GetInputs());
+ operationIo.MapOutputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer->GetContext(), spec.GetMapOutputs());
+ operationIo.Outputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer->GetContext(), spec.GetOutputs());
+
+ VerifyHasElements(operationIo.Inputs, "inputs");
+ VerifyHasElements(operationIo.Outputs, "outputs");
+
+ auto getFormatOrDefault = [&] (const TMaybe<TFormat>& maybeFormat, const TMaybe<TFormat> stageDefaultFormat, const char* formatName) {
+ if (maybeFormat) {
+ return *maybeFormat;
+ } else if (stageDefaultFormat) {
+ return *stageDefaultFormat;
+ } else {
+ ythrow TApiUsageError() << "Cannot derive " << formatName;
+ }
+ };
+
+ if (mapper) {
+ operationIo.MapperInputFormat = getFormatOrDefault(spec.MapperInputFormat_, spec.MapperFormat_, "mapper input format");
+ operationIo.MapperOutputFormat = getFormatOrDefault(spec.MapperOutputFormat_, spec.MapperFormat_, "mapper output format");
+ }
+
+ if (reduceCombiner) {
+ operationIo.ReduceCombinerInputFormat = getFormatOrDefault(spec.ReduceCombinerInputFormat_, spec.ReduceCombinerFormat_, "reduce combiner input format");
+ operationIo.ReduceCombinerOutputFormat = getFormatOrDefault(spec.ReduceCombinerOutputFormat_, spec.ReduceCombinerFormat_, "reduce combiner output format");
+ }
+
+ operationIo.ReducerInputFormat = getFormatOrDefault(spec.ReducerInputFormat_, spec.ReducerFormat_, "reducer input format");
+ operationIo.ReducerOutputFormat = getFormatOrDefault(spec.ReducerOutputFormat_, spec.ReducerFormat_, "reducer output format");
+
+ return DoExecuteMapReduce(
+ operation,
+ preparer,
+ operationIo,
+ spec,
+ mapper,
+ reduceCombiner,
+ reducer,
+ options);
+}
+
+void ExecuteSort(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TSortOperationSpec& spec,
+ const TOperationOptions& options)
+{
+ YT_LOG_DEBUG("Starting sort operation (PreparationId: %v)",
+ preparer->GetPreparationId());
+ auto inputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer->GetContext(), spec.Inputs_);
+ auto output = CanonizeYPath(nullptr, preparer->GetContext(), spec.Output_);
+
+ if (options.CreateOutputTables_) {
+ CheckInputTablesExist(*preparer, inputs);
+ CreateOutputTable(*preparer, output);
+ }
+
+ TNode specNode = BuildYsonNodeFluently()
+ .BeginMap().Item("spec").BeginMap()
+ .Item("input_table_paths").List(inputs)
+ .Item("output_table_path").Value(output)
+ .Item("sort_by").Value(spec.SortBy_)
+ .DoIf(spec.SchemaInferenceMode_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("schema_inference_mode").Value(ToString(*spec.SchemaInferenceMode_));
+ })
+ .Do(std::bind(BuildCommonOperationPart<TSortOperationSpec>, preparer->GetContext().Config, spec, options, std::placeholders::_1))
+ .EndMap().EndMap();
+
+ BuildPartitionCountOperationPart(spec, &specNode["spec"]);
+ BuildPartitionJobCountOperationPart(spec, &specNode["spec"]);
+ BuildIntermediateDataPart(spec, &specNode["spec"]);
+
+ auto startOperation = [
+ operation=operation.Get(),
+ spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options),
+ preparer,
+ inputs,
+ output
+ ] () {
+ auto operationId = preparer->StartOperation(operation, "sort", spec);
+
+ LogYPaths(operationId, inputs, "input");
+ LogYPath(operationId, output, "output");
+
+ return operationId;
+ };
+
+ operation->SetDelayedStartFunction(std::move(startOperation));
+}
+
+void ExecuteMerge(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TMergeOperationSpec& spec,
+ const TOperationOptions& options)
+{
+ YT_LOG_DEBUG("Starting merge operation (PreparationId: %v)",
+ preparer->GetPreparationId());
+ auto inputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer->GetContext(), spec.Inputs_);
+ auto output = CanonizeYPath(nullptr, preparer->GetContext(), spec.Output_);
+
+ if (options.CreateOutputTables_) {
+ CheckInputTablesExist(*preparer, inputs);
+ CreateOutputTable(*preparer, output);
+ }
+
+ TNode specNode = BuildYsonNodeFluently()
+ .BeginMap().Item("spec").BeginMap()
+ .Item("input_table_paths").List(inputs)
+ .Item("output_table_path").Value(output)
+ .Item("mode").Value(ToString(spec.Mode_))
+ .Item("combine_chunks").Value(spec.CombineChunks_)
+ .Item("force_transform").Value(spec.ForceTransform_)
+ .Item("merge_by").Value(spec.MergeBy_)
+ .DoIf(spec.SchemaInferenceMode_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("schema_inference_mode").Value(ToString(*spec.SchemaInferenceMode_));
+ })
+ .Do(std::bind(BuildCommonOperationPart<TMergeOperationSpec>, preparer->GetContext().Config, spec, options, std::placeholders::_1))
+ .EndMap().EndMap();
+
+ BuildJobCountOperationPart(spec, &specNode["spec"]);
+
+ auto startOperation = [
+ operation=operation.Get(),
+ spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options),
+ preparer,
+ inputs,
+ output
+ ] () {
+ auto operationId = preparer->StartOperation(operation, "merge", spec);
+
+ LogYPaths(operationId, inputs, "input");
+ LogYPath(operationId, output, "output");
+
+ return operationId;
+ };
+
+ operation->SetDelayedStartFunction(std::move(startOperation));
+}
+
+void ExecuteErase(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TEraseOperationSpec& spec,
+ const TOperationOptions& options)
+{
+ YT_LOG_DEBUG("Starting erase operation (PreparationId: %v)",
+ preparer->GetPreparationId());
+ auto tablePath = CanonizeYPath(nullptr, preparer->GetContext(), spec.TablePath_);
+
+ TNode specNode = BuildYsonNodeFluently()
+ .BeginMap().Item("spec").BeginMap()
+ .Item("table_path").Value(tablePath)
+ .Item("combine_chunks").Value(spec.CombineChunks_)
+ .DoIf(spec.SchemaInferenceMode_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("schema_inference_mode").Value(ToString(*spec.SchemaInferenceMode_));
+ })
+ .Do(std::bind(BuildCommonOperationPart<TEraseOperationSpec>, preparer->GetContext().Config, spec, options, std::placeholders::_1))
+ .EndMap().EndMap();
+
+ auto startOperation = [
+ operation=operation.Get(),
+ spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options),
+ preparer,
+ tablePath
+ ] () {
+ auto operationId = preparer->StartOperation(operation, "erase", spec);
+
+ LogYPath(operationId, tablePath, "table_path");
+
+ return operationId;
+ };
+
+ operation->SetDelayedStartFunction(std::move(startOperation));
+}
+
+void ExecuteRemoteCopy(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TRemoteCopyOperationSpec& spec,
+ const TOperationOptions& options)
+{
+ YT_LOG_DEBUG("Starting remote copy operation (PreparationId: %v)",
+ preparer->GetPreparationId());
+ auto inputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer->GetContext(), spec.Inputs_);
+ auto output = CanonizeYPath(nullptr, preparer->GetContext(), spec.Output_);
+
+ if (options.CreateOutputTables_) {
+ CreateOutputTable(*preparer, output);
+ }
+
+ Y_ENSURE_EX(!spec.ClusterName_.empty(), TApiUsageError() << "ClusterName parameter is required");
+
+ TNode specNode = BuildYsonNodeFluently()
+ .BeginMap().Item("spec").BeginMap()
+ .Item("cluster_name").Value(spec.ClusterName_)
+ .Item("input_table_paths").List(inputs)
+ .Item("output_table_path").Value(output)
+ .DoIf(spec.NetworkName_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("network_name").Value(*spec.NetworkName_);
+ })
+ .DoIf(spec.SchemaInferenceMode_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("schema_inference_mode").Value(ToString(*spec.SchemaInferenceMode_));
+ })
+ .Item("copy_attributes").Value(spec.CopyAttributes_)
+ .DoIf(!spec.AttributeKeys_.empty(), [&] (TFluentMap fluent) {
+ Y_ENSURE_EX(spec.CopyAttributes_, TApiUsageError() <<
+ "Specifying nonempty AttributeKeys in RemoteCopy "
+ "doesn't make sense without CopyAttributes == true");
+ fluent.Item("attribute_keys").List(spec.AttributeKeys_);
+ })
+ .Do(std::bind(BuildCommonOperationPart<TRemoteCopyOperationSpec>, preparer->GetContext().Config, spec, options, std::placeholders::_1))
+ .EndMap().EndMap();
+
+ auto startOperation = [
+ operation=operation.Get(),
+ spec=MergeSpec(specNode, preparer->GetContext().Config->Spec, options),
+ preparer,
+ inputs,
+ output
+ ] () {
+ auto operationId = preparer->StartOperation(operation, "remote_copy", spec);
+
+ LogYPaths(operationId, inputs, "input");
+ LogYPath(operationId, output, "output");
+
+ return operationId;
+ };
+
+ operation->SetDelayedStartFunction(std::move(startOperation));
+}
+
+void ExecuteVanilla(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TVanillaOperationSpec& spec,
+ const TOperationOptions& options)
+{
+ YT_LOG_DEBUG("Starting vanilla operation (PreparationId: %v)",
+ preparer->GetPreparationId());
+
+ auto addTask = [&](TFluentMap fluent, const TVanillaTask& task) {
+ Y_VERIFY(task.Job_.Get());
+ if (std::holds_alternative<TVoidStructuredRowStream>(task.Job_->GetOutputRowStreamDescription())) {
+ Y_ENSURE_EX(task.Outputs_.empty(),
+ TApiUsageError() << "Vanilla task with void IVanillaJob doesn't expect output tables");
+ TJobPreparer jobPreparer(
+ *preparer,
+ task.Spec_,
+ *task.Job_,
+ /* outputTableCount */ 0,
+ /* smallFileList */ {},
+ options);
+ fluent
+ .Item(task.Name_).BeginMap()
+ .Item("job_count").Value(task.JobCount_)
+ .DoIf(task.NetworkProject_.Defined(), [&](TFluentMap fluent) {
+ fluent.Item("network_project").Value(*task.NetworkProject_);
+ })
+ .Do([&] (TFluentMap fluent) {
+ BuildUserJobFluently(
+ std::cref(jobPreparer),
+ /* inputFormat */ Nothing(),
+ /* outputFormat */ Nothing(),
+ fluent);
+ })
+ .EndMap();
+ } else {
+ auto operationIo = CreateSimpleOperationIo(
+ *task.Job_,
+ *preparer,
+ task,
+ options,
+ false);
+ Y_ENSURE_EX(operationIo.Outputs.size() > 0,
+ TApiUsageError() << "Vanilla task with IVanillaJob that has table writer expects output tables");
+ if (options.CreateOutputTables_) {
+ CreateOutputTables(*preparer, operationIo.Outputs);
+ }
+ TJobPreparer jobPreparer(
+ *preparer,
+ task.Spec_,
+ *task.Job_,
+ operationIo.Outputs.size(),
+ operationIo.JobFiles,
+ options);
+ fluent
+ .Item(task.Name_).BeginMap()
+ .Item("job_count").Value(task.JobCount_)
+ .DoIf(task.NetworkProject_.Defined(), [&](TFluentMap fluent) {
+ fluent.Item("network_project").Value(*task.NetworkProject_);
+ })
+ .Do([&] (TFluentMap fluent) {
+ BuildUserJobFluently(
+ std::cref(jobPreparer),
+ /* inputFormat */ Nothing(),
+ operationIo.OutputFormat,
+ fluent);
+ })
+ .Item("output_table_paths").List(operationIo.Outputs)
+ .Item("job_io").BeginMap()
+ .DoIf(!preparer->GetContext().Config->TableWriter.Empty(), [&](TFluentMap fluent) {
+ fluent.Item("table_writer").Value(preparer->GetContext().Config->TableWriter);
+ })
+ .Item("control_attributes").BeginMap()
+ .Item("enable_row_index").Value(TNode(true))
+ .Item("enable_range_index").Value(TNode(true))
+ .EndMap()
+ .EndMap()
+ .EndMap();
+ }
+ };
+
+ if (options.CreateDebugOutputTables_) {
+ CreateDebugOutputTables(spec, *preparer);
+ }
+
+ TNode specNode = BuildYsonNodeFluently()
+ .BeginMap().Item("spec").BeginMap()
+ .Item("tasks").DoMapFor(spec.Tasks_, addTask)
+ .Do(std::bind(BuildCommonOperationPart<TVanillaOperationSpec>, preparer->GetContext().Config, spec, options, std::placeholders::_1))
+ .EndMap().EndMap();
+
+ BuildCommonUserOperationPart(spec, &specNode["spec"]);
+
+ auto startOperation = [operation=operation.Get(), spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options), preparer] () {
+ auto operationId = preparer->StartOperation(operation, "vanilla", spec, /* useStartOperationRequest */ true);
+ return operationId;
+ };
+
+ operation->SetDelayedStartFunction(std::move(startOperation));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TOperation::TOperationImpl
+ : public TThrRefBase
+{
+public:
+ TOperationImpl(
+ IClientRetryPolicyPtr clientRetryPolicy,
+ TClientContext context,
+ const TMaybe<TOperationId>& operationId = {})
+ : ClientRetryPolicy_(clientRetryPolicy)
+ , Context_(std::move(context))
+ , Id_(operationId)
+ , PreparedPromise_(::NThreading::NewPromise<void>())
+ , StartedPromise_(::NThreading::NewPromise<void>())
+ {
+ if (Id_) {
+ PreparedPromise_.SetValue();
+ StartedPromise_.SetValue();
+ } else {
+ PreparedPromise_.GetFuture().Subscribe([this_=::TIntrusivePtr(this)] (const ::NThreading::TFuture<void>& preparedResult) {
+ try {
+ preparedResult.GetValue();
+ } catch (...) {
+ this_->StartedPromise_.SetException(std::current_exception());
+ return;
+ }
+ });
+ }
+ }
+
+ const TOperationId& GetId() const;
+ TString GetWebInterfaceUrl() const;
+
+ void OnPrepared();
+ void SetDelayedStartFunction(std::function<TOperationId()> start);
+ void Start();
+ bool IsStarted() const;
+ void OnPreparationException(std::exception_ptr e);
+
+ TString GetStatus();
+ void OnStatusUpdated(const TString& newStatus);
+
+ ::NThreading::TFuture<void> GetPreparedFuture();
+ ::NThreading::TFuture<void> GetStartedFuture();
+ ::NThreading::TFuture<void> Watch(TClientPtr client);
+
+ EOperationBriefState GetBriefState();
+ TMaybe<TYtError> GetError();
+ TJobStatistics GetJobStatistics();
+ TMaybe<TOperationBriefProgress> GetBriefProgress();
+ void AbortOperation();
+ void CompleteOperation();
+ void SuspendOperation(const TSuspendOperationOptions& options);
+ void ResumeOperation(const TResumeOperationOptions& options);
+ TOperationAttributes GetAttributes(const TGetOperationOptions& options);
+ void UpdateParameters(const TUpdateOperationParametersOptions& options);
+ TJobAttributes GetJob(const TJobId& jobId, const TGetJobOptions& options);
+ TListJobsResult ListJobs(const TListJobsOptions& options);
+
+ void AsyncFinishOperation(TOperationAttributes operationAttributes);
+ void FinishWithException(std::exception_ptr exception);
+ void UpdateBriefProgress(TMaybe<TOperationBriefProgress> briefProgress);
+ void AnalyzeUnrecognizedSpec(TNode unrecognizedSpec);
+
+ const TClientContext& GetContext() const;
+
+private:
+ void OnStarted(const TOperationId& operationId);
+
+ void UpdateAttributesAndCall(bool needJobStatistics, std::function<void(const TOperationAttributes&)> func);
+
+ void SyncFinishOperationImpl(const TOperationAttributes&);
+ static void* SyncFinishOperationProc(void* );
+
+ void ValidateOperationStarted() const;
+
+private:
+ IClientRetryPolicyPtr ClientRetryPolicy_;
+ const TClientContext Context_;
+ TMaybe<TOperationId> Id_;
+ TMutex Lock_;
+
+ ::NThreading::TPromise<void> PreparedPromise_;
+ ::NThreading::TPromise<void> StartedPromise_;
+ TMaybe<::NThreading::TPromise<void>> CompletePromise_;
+
+ std::function<TOperationId()> DelayedStartFunction_;
+ TString Status_;
+ TOperationAttributes Attributes_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TOperationPollerItem
+ : public IYtPollerItem
+{
+public:
+ TOperationPollerItem(::TIntrusivePtr<TOperation::TOperationImpl> operationImpl)
+ : OperationImpl_(std::move(operationImpl))
+ { }
+
+ void PrepareRequest(TRawBatchRequest* batchRequest) override
+ {
+ auto filter = TOperationAttributeFilter()
+ .Add(EOperationAttribute::State)
+ .Add(EOperationAttribute::BriefProgress)
+ .Add(EOperationAttribute::Result);
+
+ if (!UnrecognizedSpecAnalyzed_) {
+ filter.Add(EOperationAttribute::UnrecognizedSpec);
+ }
+
+ OperationState_ = batchRequest->GetOperation(
+ OperationImpl_->GetId(),
+ TGetOperationOptions().AttributeFilter(filter));
+ }
+
+ EStatus OnRequestExecuted() override
+ {
+ try {
+ const auto& attributes = OperationState_.GetValue();
+ if (!UnrecognizedSpecAnalyzed_ && !attributes.UnrecognizedSpec.Empty()) {
+ OperationImpl_->AnalyzeUnrecognizedSpec(*attributes.UnrecognizedSpec);
+ UnrecognizedSpecAnalyzed_ = true;
+ }
+ Y_VERIFY(attributes.BriefState,
+ "get_operation for operation %s has not returned \"state\" field",
+ GetGuidAsString(OperationImpl_->GetId()).Data());
+ if (*attributes.BriefState != EOperationBriefState::InProgress) {
+ OperationImpl_->AsyncFinishOperation(attributes);
+ return PollBreak;
+ } else {
+ OperationImpl_->UpdateBriefProgress(attributes.BriefProgress);
+ }
+ } catch (const TErrorResponse& e) {
+ if (!IsRetriable(e)) {
+ OperationImpl_->FinishWithException(std::current_exception());
+ return PollBreak;
+ }
+ } catch (const std::exception& e) {
+ OperationImpl_->FinishWithException(std::current_exception());
+ return PollBreak;
+ }
+ return PollContinue;
+ }
+
+ void OnItemDiscarded() override {
+ OperationImpl_->FinishWithException(std::make_exception_ptr(yexception() << "Operation cancelled"));
+ }
+
+private:
+ ::TIntrusivePtr<TOperation::TOperationImpl> OperationImpl_;
+ ::NThreading::TFuture<TOperationAttributes> OperationState_;
+ bool UnrecognizedSpecAnalyzed_ = false;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+const TOperationId& TOperation::TOperationImpl::GetId() const
+{
+ ValidateOperationStarted();
+ return *Id_;
+}
+
+TString TOperation::TOperationImpl::GetWebInterfaceUrl() const
+{
+ ValidateOperationStarted();
+ return GetOperationWebInterfaceUrl(Context_.ServerName, *Id_);
+}
+
+void TOperation::TOperationImpl::OnPrepared()
+{
+ Y_VERIFY(!PreparedPromise_.HasException() && !PreparedPromise_.HasValue());
+ PreparedPromise_.SetValue();
+}
+
+void TOperation::TOperationImpl::SetDelayedStartFunction(std::function<TOperationId()> start)
+{
+ DelayedStartFunction_ = std::move(start);
+}
+
+void TOperation::TOperationImpl::Start()
+{
+ {
+ auto guard = Guard(Lock_);
+ if (Id_) {
+ ythrow TApiUsageError() << "Start() should not be called on running operations";
+ }
+ }
+ GetPreparedFuture().GetValueSync();
+
+ std::function<TOperationId()> startStuff;
+ {
+ auto guard = Guard(Lock_);
+ startStuff.swap(DelayedStartFunction_);
+ }
+ if (!startStuff) {
+ ythrow TApiUsageError() << "Seems that Start() was called multiple times. If not, contact yt@";
+ }
+
+ TOperationId operationId;
+ try {
+ operationId = startStuff();
+ } catch (...) {
+ auto exception = std::current_exception();
+ StartedPromise_.SetException(exception);
+ std::rethrow_exception(exception);
+ }
+ OnStarted(operationId);
+}
+
+bool TOperation::TOperationImpl::IsStarted() const {
+ auto guard = Guard(Lock_);
+ return bool(Id_);
+}
+
+void TOperation::TOperationImpl::OnPreparationException(std::exception_ptr e)
+{
+ Y_VERIFY(!PreparedPromise_.HasValue() && !PreparedPromise_.HasException());
+ PreparedPromise_.SetException(e);
+}
+
+TString TOperation::TOperationImpl::GetStatus()
+{
+ {
+ auto guard = Guard(Lock_);
+ if (!Id_) {
+ return Status_;
+ }
+ }
+ TMaybe<TString> state;
+ UpdateAttributesAndCall(false, [&] (const TOperationAttributes& attributes) {
+ state = attributes.State;
+ });
+
+ return "On YT cluster: " + state.GetOrElse("undefined state");
+}
+
+void TOperation::TOperationImpl::OnStatusUpdated(const TString& newStatus)
+{
+ auto guard = Guard(Lock_);
+ Status_ = newStatus;
+}
+
+::NThreading::TFuture<void> TOperation::TOperationImpl::GetPreparedFuture()
+{
+ return PreparedPromise_.GetFuture();
+}
+
+::NThreading::TFuture<void> TOperation::TOperationImpl::GetStartedFuture()
+{
+ return StartedPromise_.GetFuture();
+}
+
+::NThreading::TFuture<void> TOperation::TOperationImpl::Watch(TClientPtr client)
+{
+ {
+ auto guard = Guard(Lock_);
+ if (CompletePromise_) {
+ return *CompletePromise_;
+ }
+ CompletePromise_ = ::NThreading::NewPromise<void>();
+ }
+ GetStartedFuture().Subscribe([
+ this_=::TIntrusivePtr(this),
+ client=std::move(client)
+ ] (const ::NThreading::TFuture<void>& startedResult) {
+ try {
+ startedResult.GetValue();
+ } catch (...) {
+ this_->CompletePromise_->SetException(std::current_exception());
+ return;
+ }
+ client->GetYtPoller().Watch(::MakeIntrusive<TOperationPollerItem>(this_));
+ auto operationId = this_->GetId();
+ auto registry = TAbortableRegistry::Get();
+ registry->Add(
+ operationId,
+ ::MakeIntrusive<TOperationAbortable>(this_->ClientRetryPolicy_, this_->Context_, operationId));
+ // We have to own an IntrusivePtr to registry to prevent use-after-free
+ auto removeOperation = [registry, operationId] (const ::NThreading::TFuture<void>&) {
+ registry->Remove(operationId);
+ };
+ this_->CompletePromise_->GetFuture().Subscribe(removeOperation);
+ });
+
+ return *CompletePromise_;
+}
+
+EOperationBriefState TOperation::TOperationImpl::GetBriefState()
+{
+ ValidateOperationStarted();
+ EOperationBriefState result = EOperationBriefState::InProgress;
+ UpdateAttributesAndCall(false, [&] (const TOperationAttributes& attributes) {
+ Y_VERIFY(attributes.BriefState,
+ "get_operation for operation %s has not returned \"state\" field",
+ GetGuidAsString(*Id_).Data());
+ result = *attributes.BriefState;
+ });
+ return result;
+}
+
+TMaybe<TYtError> TOperation::TOperationImpl::GetError()
+{
+ ValidateOperationStarted();
+ TMaybe<TYtError> result;
+ UpdateAttributesAndCall(false, [&] (const TOperationAttributes& attributes) {
+ Y_VERIFY(attributes.Result);
+ result = attributes.Result->Error;
+ });
+ return result;
+}
+
+TJobStatistics TOperation::TOperationImpl::GetJobStatistics()
+{
+ ValidateOperationStarted();
+ TJobStatistics result;
+ UpdateAttributesAndCall(true, [&] (const TOperationAttributes& attributes) {
+ if (attributes.Progress) {
+ result = attributes.Progress->JobStatistics;
+ }
+ });
+ return result;
+}
+
+TMaybe<TOperationBriefProgress> TOperation::TOperationImpl::GetBriefProgress()
+{
+ ValidateOperationStarted();
+ {
+ auto g = Guard(Lock_);
+ if (CompletePromise_.Defined()) {
+ // Poller do this job for us
+ return Attributes_.BriefProgress;
+ }
+ }
+ TMaybe<TOperationBriefProgress> result;
+ UpdateAttributesAndCall(false, [&] (const TOperationAttributes& attributes) {
+ result = attributes.BriefProgress;
+ });
+ return result;
+}
+
+void TOperation::TOperationImpl::UpdateBriefProgress(TMaybe<TOperationBriefProgress> briefProgress)
+{
+ auto g = Guard(Lock_);
+ Attributes_.BriefProgress = std::move(briefProgress);
+}
+
+void TOperation::TOperationImpl::AnalyzeUnrecognizedSpec(TNode unrecognizedSpec)
+{
+ static const TVector<TVector<TString>> knownUnrecognizedSpecFieldPaths = {
+ {"mapper", "class_name"},
+ {"reducer", "class_name"},
+ {"reduce_combiner", "class_name"},
+ };
+
+ auto removeByPath = [] (TNode& node, auto pathBegin, auto pathEnd, auto& removeByPath) {
+ if (pathBegin == pathEnd) {
+ return;
+ }
+ if (!node.IsMap()) {
+ return;
+ }
+ auto* child = node.AsMap().FindPtr(*pathBegin);
+ if (!child) {
+ return;
+ }
+ removeByPath(*child, std::next(pathBegin), pathEnd, removeByPath);
+ if (std::next(pathBegin) == pathEnd || (child->IsMap() && child->Empty())) {
+ node.AsMap().erase(*pathBegin);
+ }
+ };
+
+ Y_VERIFY(unrecognizedSpec.IsMap());
+ for (const auto& knownFieldPath : knownUnrecognizedSpecFieldPaths) {
+ Y_VERIFY(!knownFieldPath.empty());
+ removeByPath(unrecognizedSpec, knownFieldPath.cbegin(), knownFieldPath.cend(), removeByPath);
+ }
+
+ if (!unrecognizedSpec.Empty()) {
+ YT_LOG_INFO(
+ "WARNING! Unrecognized spec for operation %s is not empty "
+ "(fields added by the YT API library are excluded): %s",
+ GetGuidAsString(*Id_).Data(),
+ NodeToYsonString(unrecognizedSpec).Data());
+ }
+}
+
+void TOperation::TOperationImpl::OnStarted(const TOperationId& operationId)
+{
+ auto guard = Guard(Lock_);
+ Y_VERIFY(!Id_,
+ "OnStarted() called with operationId = %s for operation with id %s",
+ GetGuidAsString(operationId).Data(),
+ GetGuidAsString(*Id_).Data());
+ Id_ = operationId;
+
+ Y_VERIFY(!StartedPromise_.HasValue() && !StartedPromise_.HasException());
+ StartedPromise_.SetValue();
+}
+
+void TOperation::TOperationImpl::UpdateAttributesAndCall(bool needJobStatistics, std::function<void(const TOperationAttributes&)> func)
+{
+ {
+ auto g = Guard(Lock_);
+ if (Attributes_.BriefState
+ && *Attributes_.BriefState != EOperationBriefState::InProgress
+ && (!needJobStatistics || Attributes_.Progress))
+ {
+ func(Attributes_);
+ return;
+ }
+ }
+
+ TOperationAttributes attributes = NDetail::GetOperation(
+ ClientRetryPolicy_->CreatePolicyForGenericRequest(),
+ Context_,
+ *Id_,
+ TGetOperationOptions().AttributeFilter(TOperationAttributeFilter()
+ .Add(EOperationAttribute::Result)
+ .Add(EOperationAttribute::Progress)
+ .Add(EOperationAttribute::State)
+ .Add(EOperationAttribute::BriefProgress)));
+
+ func(attributes);
+
+ Y_ENSURE(attributes.BriefState);
+ if (*attributes.BriefState != EOperationBriefState::InProgress) {
+ auto g = Guard(Lock_);
+ Attributes_ = std::move(attributes);
+ }
+}
+
+void TOperation::TOperationImpl::FinishWithException(std::exception_ptr e)
+{
+ CompletePromise_->SetException(std::move(e));
+}
+
+void TOperation::TOperationImpl::AbortOperation()
+{
+ ValidateOperationStarted();
+ NYT::NDetail::AbortOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_);
+}
+
+void TOperation::TOperationImpl::CompleteOperation()
+{
+ ValidateOperationStarted();
+ NYT::NDetail::CompleteOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_);
+}
+
+void TOperation::TOperationImpl::SuspendOperation(const TSuspendOperationOptions& options)
+{
+ ValidateOperationStarted();
+ NYT::NDetail::SuspendOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_, options);
+}
+
+void TOperation::TOperationImpl::ResumeOperation(const TResumeOperationOptions& options)
+{
+ ValidateOperationStarted();
+ NYT::NDetail::ResumeOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_, options);
+}
+
+TOperationAttributes TOperation::TOperationImpl::GetAttributes(const TGetOperationOptions& options)
+{
+ ValidateOperationStarted();
+ return NYT::NDetail::GetOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_, options);
+}
+
+void TOperation::TOperationImpl::UpdateParameters(const TUpdateOperationParametersOptions& options)
+{
+ ValidateOperationStarted();
+ return NYT::NDetail::UpdateOperationParameters(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_, options);
+}
+
+TJobAttributes TOperation::TOperationImpl::GetJob(const TJobId& jobId, const TGetJobOptions& options)
+{
+ ValidateOperationStarted();
+ return NYT::NDetail::GetJob(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_, jobId, options);
+}
+
+TListJobsResult TOperation::TOperationImpl::ListJobs(const TListJobsOptions& options)
+{
+ ValidateOperationStarted();
+ return NYT::NDetail::ListJobs(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_, options);
+}
+
+struct TAsyncFinishOperationsArgs
+{
+ ::TIntrusivePtr<TOperation::TOperationImpl> OperationImpl;
+ TOperationAttributes OperationAttributes;
+};
+
+void TOperation::TOperationImpl::AsyncFinishOperation(TOperationAttributes operationAttributes)
+{
+ auto args = new TAsyncFinishOperationsArgs;
+ args->OperationImpl = this;
+ args->OperationAttributes = std::move(operationAttributes);
+
+ TThread thread(TThread::TParams(&TOperation::TOperationImpl::SyncFinishOperationProc, args).SetName("finish operation"));
+ thread.Start();
+ thread.Detach();
+}
+
+void* TOperation::TOperationImpl::SyncFinishOperationProc(void* pArgs)
+{
+ THolder<TAsyncFinishOperationsArgs> args(static_cast<TAsyncFinishOperationsArgs*>(pArgs));
+ args->OperationImpl->SyncFinishOperationImpl(args->OperationAttributes);
+ return nullptr;
+}
+
+void TOperation::TOperationImpl::SyncFinishOperationImpl(const TOperationAttributes& attributes)
+{
+ {
+ auto guard = Guard(Lock_);
+ Y_VERIFY(Id_);
+ }
+ Y_VERIFY(attributes.BriefState,
+ "get_operation for operation %s has not returned \"state\" field",
+ GetGuidAsString(*Id_).Data());
+ Y_VERIFY(*attributes.BriefState != EOperationBriefState::InProgress);
+
+ {
+ try {
+ // `attributes' that came from poller don't have JobStatistics
+ // so we call `GetJobStatistics' in order to get it from server
+ // and cache inside object.
+ GetJobStatistics();
+ } catch (const TErrorResponse& ) {
+ // But if for any reason we failed to get attributes
+ // we complete operation using what we have.
+ auto g = Guard(Lock_);
+ Attributes_ = attributes;
+ }
+ }
+
+ if (*attributes.BriefState == EOperationBriefState::Completed) {
+ CompletePromise_->SetValue();
+ } else if (*attributes.BriefState == EOperationBriefState::Aborted || *attributes.BriefState == EOperationBriefState::Failed) {
+ Y_VERIFY(attributes.Result && attributes.Result->Error);
+ const auto& error = *attributes.Result->Error;
+ YT_LOG_ERROR("Operation %v is `%v' with error: %v",
+ *Id_,
+ ToString(*attributes.BriefState),
+ error.FullDescription());
+
+ TString additionalExceptionText;
+ TVector<TFailedJobInfo> failedJobStderrInfo;
+ if (*attributes.BriefState == EOperationBriefState::Failed) {
+ try {
+ failedJobStderrInfo = NYT::NDetail::GetFailedJobInfo(ClientRetryPolicy_, Context_, *Id_, TGetFailedJobInfoOptions());
+ } catch (const std::exception& e) {
+ additionalExceptionText = "Cannot get job stderrs: ";
+ additionalExceptionText += e.what();
+ }
+ }
+ CompletePromise_->SetException(
+ std::make_exception_ptr(
+ TOperationFailedError(
+ *attributes.BriefState == EOperationBriefState::Failed
+ ? TOperationFailedError::Failed
+ : TOperationFailedError::Aborted,
+ *Id_,
+ error,
+ failedJobStderrInfo) << additionalExceptionText));
+ }
+}
+
+void TOperation::TOperationImpl::ValidateOperationStarted() const
+{
+ auto guard = Guard(Lock_);
+ if (!Id_) {
+ ythrow TApiUsageError() << "Operation is not started";
+ }
+}
+
+const TClientContext& TOperation::TOperationImpl::GetContext() const
+{
+ return Context_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TOperation::TOperation(TClientPtr client)
+ : Client_(std::move(client))
+ , Impl_(::MakeIntrusive<TOperationImpl>(Client_->GetRetryPolicy(), Client_->GetContext()))
+{
+}
+
+TOperation::TOperation(TOperationId id, TClientPtr client)
+ : Client_(std::move(client))
+ , Impl_(::MakeIntrusive<TOperationImpl>(Client_->GetRetryPolicy(), Client_->GetContext(), id))
+{
+}
+
+const TOperationId& TOperation::GetId() const
+{
+ return Impl_->GetId();
+}
+
+TString TOperation::GetWebInterfaceUrl() const
+{
+ return Impl_->GetWebInterfaceUrl();
+}
+
+void TOperation::OnPrepared()
+{
+ Impl_->OnPrepared();
+}
+
+void TOperation::SetDelayedStartFunction(std::function<TOperationId()> start)
+{
+ Impl_->SetDelayedStartFunction(std::move(start));
+}
+
+void TOperation::Start()
+{
+ Impl_->Start();
+}
+
+bool TOperation::IsStarted() const
+{
+ return Impl_->IsStarted();
+}
+
+void TOperation::OnPreparationException(std::exception_ptr e)
+{
+ Impl_->OnPreparationException(std::move(e));
+}
+
+TString TOperation::GetStatus() const
+{
+ return Impl_->GetStatus();
+}
+
+void TOperation::OnStatusUpdated(const TString& newStatus)
+{
+ Impl_->OnStatusUpdated(newStatus);
+}
+
+::NThreading::TFuture<void> TOperation::GetPreparedFuture()
+{
+ return Impl_->GetPreparedFuture();
+}
+
+::NThreading::TFuture<void> TOperation::GetStartedFuture()
+{
+ return Impl_->GetStartedFuture();
+}
+
+::NThreading::TFuture<void> TOperation::Watch()
+{
+ return Impl_->Watch(Client_);
+}
+
+TVector<TFailedJobInfo> TOperation::GetFailedJobInfo(const TGetFailedJobInfoOptions& options)
+{
+ return NYT::NDetail::GetFailedJobInfo(Client_->GetRetryPolicy(), Client_->GetContext(), GetId(), options);
+}
+
+EOperationBriefState TOperation::GetBriefState()
+{
+ return Impl_->GetBriefState();
+}
+
+TMaybe<TYtError> TOperation::GetError()
+{
+ return Impl_->GetError();
+}
+
+TJobStatistics TOperation::GetJobStatistics()
+{
+ return Impl_->GetJobStatistics();
+}
+
+TMaybe<TOperationBriefProgress> TOperation::GetBriefProgress()
+{
+ return Impl_->GetBriefProgress();
+}
+
+void TOperation::AbortOperation()
+{
+ Impl_->AbortOperation();
+}
+
+void TOperation::CompleteOperation()
+{
+ Impl_->CompleteOperation();
+}
+
+void TOperation::SuspendOperation(const TSuspendOperationOptions& options)
+{
+ Impl_->SuspendOperation(options);
+}
+
+void TOperation::ResumeOperation(const TResumeOperationOptions& options)
+{
+ Impl_->ResumeOperation(options);
+}
+
+TOperationAttributes TOperation::GetAttributes(const TGetOperationOptions& options)
+{
+ return Impl_->GetAttributes(options);
+}
+
+void TOperation::UpdateParameters(const TUpdateOperationParametersOptions& options)
+{
+ Impl_->UpdateParameters(options);
+}
+
+TJobAttributes TOperation::GetJob(const TJobId& jobId, const TGetJobOptions& options)
+{
+ return Impl_->GetJob(jobId, options);
+}
+
+TListJobsResult TOperation::ListJobs(const TListJobsOptions& options)
+{
+ return Impl_->ListJobs(options);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TAsyncPrepareAndStartOperationArgs
+{
+ std::function<void()> PrepareAndStart;
+};
+
+void* SyncPrepareAndStartOperation(void* pArgs)
+{
+ THolder<TAsyncPrepareAndStartOperationArgs> args(static_cast<TAsyncPrepareAndStartOperationArgs*>(pArgs));
+ args->PrepareAndStart();
+ return nullptr;
+}
+
+::TIntrusivePtr<TOperation> ProcessOperation(
+ NYT::NDetail::TClientPtr client,
+ std::function<void()> prepare,
+ ::TIntrusivePtr<TOperation> operation,
+ const TOperationOptions& options)
+{
+ auto prepareAndStart = [prepare = std::move(prepare), operation, mode = options.StartOperationMode_] () {
+ try {
+ prepare();
+ operation->OnPrepared();
+ } catch (...) {
+ operation->OnPreparationException(std::current_exception());
+ }
+ if (mode >= TOperationOptions::EStartOperationMode::AsyncStart) {
+ try {
+ operation->Start();
+ } catch (...) { }
+ }
+ };
+ if (options.StartOperationMode_ >= TOperationOptions::EStartOperationMode::SyncStart) {
+ prepareAndStart();
+ WaitIfRequired(operation, client, options);
+ } else {
+ auto args = new TAsyncPrepareAndStartOperationArgs;
+ args->PrepareAndStart = std::move(prepareAndStart);
+
+ TThread thread(TThread::TParams(SyncPrepareAndStartOperation, args).SetName("prepare and start operation"));
+ thread.Start();
+ thread.Detach();
+ }
+ return operation;
+}
+
+void WaitIfRequired(const TOperationPtr& operation, const TClientPtr& client, const TOperationOptions& options)
+{
+ auto retryPolicy = client->GetRetryPolicy();
+ auto context = client->GetContext();
+ if (options.StartOperationMode_ >= TOperationOptions::EStartOperationMode::SyncStart) {
+ operation->GetStartedFuture().GetValueSync();
+ }
+ if (options.StartOperationMode_ == TOperationOptions::EStartOperationMode::SyncWait) {
+ auto finishedFuture = operation->Watch();
+ TWaitProxy::Get()->WaitFuture(finishedFuture);
+ finishedFuture.GetValue();
+ if (context.Config->WriteStderrSuccessfulJobs) {
+ auto stderrs = GetJobsStderr(retryPolicy, context, operation->GetId());
+ for (const auto& jobStderr : stderrs) {
+ if (!jobStderr.empty()) {
+ Cerr << jobStderr << '\n';
+ }
+ }
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void ResetUseClientProtobuf(const char* methodName)
+{
+ Cerr << "WARNING! OPTION `TConfig::UseClientProtobuf' IS RESET TO `true'; "
+ << "IT CAN DETERIORATE YOUR CODE PERFORMANCE!!! DON'T USE DEPRECATED METHOD `"
+ << "TOperationIOSpec::" << methodName << "' TO AVOID THIS RESET" << Endl;
+ // Give users some time to contemplate about usage of deprecated functions.
+ Cerr << "Sleeping for 5 seconds..." << Endl;
+ Sleep(TDuration::Seconds(5));
+ TConfig::Get()->UseClientProtobuf = true;
+}
+
+} // namespace NDetail
+
+////////////////////////////////////////////////////////////////////////////////
+
+::TIntrusivePtr<INodeReaderImpl> CreateJobNodeReader(TRawTableReaderPtr rawTableReader)
+{
+ if (auto schema = NDetail::GetJobInputSkiffSchema()) {
+ return new NDetail::TSkiffTableReader(rawTableReader, schema);
+ } else {
+ return new TNodeTableReader(rawTableReader);
+ }
+}
+
+::TIntrusivePtr<IYaMRReaderImpl> CreateJobYaMRReader(TRawTableReaderPtr rawTableReader)
+{
+ return new TYaMRTableReader(rawTableReader);
+}
+
+::TIntrusivePtr<IProtoReaderImpl> CreateJobProtoReader(TRawTableReaderPtr rawTableReader)
+{
+ if (TConfig::Get()->UseClientProtobuf) {
+ return new TProtoTableReader(
+ rawTableReader,
+ GetJobInputDescriptors());
+ } else {
+ return new TLenvalProtoTableReader(
+ rawTableReader,
+ GetJobInputDescriptors());
+ }
+}
+
+::TIntrusivePtr<INodeWriterImpl> CreateJobNodeWriter(THolder<IProxyOutput> rawJobWriter)
+{
+ return new TNodeTableWriter(std::move(rawJobWriter));
+}
+
+::TIntrusivePtr<IYaMRWriterImpl> CreateJobYaMRWriter(THolder<IProxyOutput> rawJobWriter)
+{
+ return new TYaMRTableWriter(std::move(rawJobWriter));
+}
+
+::TIntrusivePtr<IProtoWriterImpl> CreateJobProtoWriter(THolder<IProxyOutput> rawJobWriter)
+{
+ if (TConfig::Get()->UseClientProtobuf) {
+ return new TProtoTableWriter(
+ std::move(rawJobWriter),
+ GetJobOutputDescriptors());
+ } else {
+ return new TLenvalProtoTableWriter(
+ std::move(rawJobWriter),
+ GetJobOutputDescriptors());
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/operation.h b/yt/cpp/mapreduce/client/operation.h
new file mode 100644
index 0000000000..141161b0b7
--- /dev/null
+++ b/yt/cpp/mapreduce/client/operation.h
@@ -0,0 +1,203 @@
+#pragma once
+
+#include "fwd.h"
+#include "structured_table_formats.h"
+#include "operation_preparer.h"
+
+#include <yt/cpp/mapreduce/http/fwd.h>
+
+#include <yt/cpp/mapreduce/interface/client.h>
+#include <yt/cpp/mapreduce/interface/operation.h>
+#include <yt/cpp/mapreduce/interface/retry_policy.h>
+
+#include <util/generic/ptr.h>
+#include <util/generic/vector.h>
+
+namespace NYT::NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TOperation
+ : public IOperation
+{
+public:
+ class TOperationImpl;
+
+public:
+ explicit TOperation(TClientPtr client);
+ TOperation(TOperationId id, TClientPtr client);
+ virtual const TOperationId& GetId() const override;
+ virtual TString GetWebInterfaceUrl() const override;
+
+ void OnPrepared();
+ void SetDelayedStartFunction(std::function<TOperationId()> start);
+ virtual void Start() override;
+ void OnPreparationException(std::exception_ptr e);
+ virtual bool IsStarted() const override;
+
+ virtual TString GetStatus() const override;
+ void OnStatusUpdated(const TString& newStatus);
+
+ virtual ::NThreading::TFuture<void> GetPreparedFuture() override;
+ virtual ::NThreading::TFuture<void> GetStartedFuture() override;
+ virtual ::NThreading::TFuture<void> Watch() override;
+
+ virtual TVector<TFailedJobInfo> GetFailedJobInfo(const TGetFailedJobInfoOptions& options = TGetFailedJobInfoOptions()) override;
+ virtual EOperationBriefState GetBriefState() override;
+ virtual TMaybe<TYtError> GetError() override;
+ virtual TJobStatistics GetJobStatistics() override;
+ virtual TMaybe<TOperationBriefProgress> GetBriefProgress() override;
+ virtual void AbortOperation() override;
+ virtual void CompleteOperation() override;
+ virtual void SuspendOperation(const TSuspendOperationOptions& options) override;
+ virtual void ResumeOperation(const TResumeOperationOptions& options) override;
+ virtual TOperationAttributes GetAttributes(const TGetOperationOptions& options) override;
+ virtual void UpdateParameters(const TUpdateOperationParametersOptions& options) override;
+ virtual TJobAttributes GetJob(const TJobId& jobId, const TGetJobOptions& options) override;
+ virtual TListJobsResult ListJobs(const TListJobsOptions& options) override;
+
+private:
+ TClientPtr Client_;
+ ::TIntrusivePtr<TOperationImpl> Impl_;
+};
+
+using TOperationPtr = ::TIntrusivePtr<TOperation>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TSimpleOperationIo
+{
+ TVector<TRichYPath> Inputs;
+ TVector<TRichYPath> Outputs;
+
+ TFormat InputFormat;
+ TFormat OutputFormat;
+
+ TVector<TSmallJobFile> JobFiles;
+};
+
+TSimpleOperationIo CreateSimpleOperationIoHelper(
+ const IStructuredJob& structuredJob,
+ const TOperationPreparer& preparer,
+ const TOperationOptions& options,
+ TStructuredJobTableList structuredInputs,
+ TStructuredJobTableList structuredOutputs,
+ TUserJobFormatHints hints,
+ ENodeReaderFormat nodeReaderFormat,
+ const THashSet<TString>& columnsUsedInOperations);
+
+////////////////////////////////////////////////////////////////////////////////
+
+void ExecuteMap(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TMapOperationSpec& spec,
+ const ::TIntrusivePtr<IStructuredJob>& mapper,
+ const TOperationOptions& options);
+
+void ExecuteRawMap(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TRawMapOperationSpec& spec,
+ const ::TIntrusivePtr<IRawJob>& mapper,
+ const TOperationOptions& options);
+
+void ExecuteReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TReduceOperationSpec& spec,
+ const ::TIntrusivePtr<IStructuredJob>& reducer,
+ const TOperationOptions& options);
+
+void ExecuteRawReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TRawReduceOperationSpec& spec,
+ const ::TIntrusivePtr<IRawJob>& reducer,
+ const TOperationOptions& options);
+
+void ExecuteJoinReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TJoinReduceOperationSpec& spec,
+ const ::TIntrusivePtr<IStructuredJob>& reducer,
+ const TOperationOptions& options);
+
+void ExecuteRawJoinReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TRawJoinReduceOperationSpec& spec,
+ const ::TIntrusivePtr<IRawJob>& reducer,
+ const TOperationOptions& options);
+
+void ExecuteMapReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TMapReduceOperationSpec& spec,
+ const ::TIntrusivePtr<IStructuredJob>& mapper,
+ const ::TIntrusivePtr<IStructuredJob>& reduceCombiner,
+ const ::TIntrusivePtr<IStructuredJob>& reducer,
+ const TOperationOptions& options);
+
+void ExecuteRawMapReduce(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TRawMapReduceOperationSpec& spec,
+ const ::TIntrusivePtr<IRawJob>& mapper,
+ const ::TIntrusivePtr<IRawJob>& reduceCombiner,
+ const ::TIntrusivePtr<IRawJob>& reducer,
+ const TOperationOptions& options);
+
+void ExecuteSort(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TSortOperationSpec& spec,
+ const TOperationOptions& options);
+
+void ExecuteMerge(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TMergeOperationSpec& spec,
+ const TOperationOptions& options);
+
+void ExecuteErase(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TEraseOperationSpec& spec,
+ const TOperationOptions& options);
+
+void ExecuteRemoteCopy(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TRemoteCopyOperationSpec& spec,
+ const TOperationOptions& options);
+
+void ExecuteVanilla(
+ const TOperationPtr& operation,
+ const TOperationPreparerPtr& preparer,
+ const TVanillaOperationSpec& spec,
+ const TOperationOptions& options);
+
+EOperationBriefState CheckOperation(
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId);
+
+void WaitForOperation(
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId);
+
+////////////////////////////////////////////////////////////////////////////////
+
+::TIntrusivePtr<TOperation> ProcessOperation(
+ NYT::NDetail::TClientPtr client,
+ std::function<void()> prepare,
+ ::TIntrusivePtr<TOperation> operation,
+ const TOperationOptions& options);
+
+void WaitIfRequired(const TOperationPtr& operation, const TClientPtr& client, const TOperationOptions& options);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail
diff --git a/yt/cpp/mapreduce/client/operation_helpers.cpp b/yt/cpp/mapreduce/client/operation_helpers.cpp
new file mode 100644
index 0000000000..abb2185662
--- /dev/null
+++ b/yt/cpp/mapreduce/client/operation_helpers.cpp
@@ -0,0 +1,91 @@
+#include "operation_helpers.h"
+
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+
+#include <yt/cpp/mapreduce/http/context.h>
+#include <yt/cpp/mapreduce/http/requests.h>
+
+#include <util/string/builder.h>
+
+#include <util/system/mutex.h>
+#include <util/system/rwlock.h>
+
+namespace NYT::NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ui64 RoundUpFileSize(ui64 size)
+{
+ constexpr ui64 roundUpTo = 4ull << 10;
+ return (size + roundUpTo - 1) & ~(roundUpTo - 1);
+}
+
+bool UseLocalModeOptimization(const TClientContext& context, const IClientRetryPolicyPtr& clientRetryPolicy)
+{
+ if (!context.Config->EnableLocalModeOptimization) {
+ return false;
+ }
+
+ static THashMap<TString, bool> localModeMap;
+ static TRWMutex mutex;
+
+ {
+ TReadGuard guard(mutex);
+ auto it = localModeMap.find(context.ServerName);
+ if (it != localModeMap.end()) {
+ return it->second;
+ }
+ }
+
+ bool isLocalMode = false;
+ TString localModeAttr("//sys/@local_mode_fqdn");
+ // We don't want to pollute logs with errors about failed request,
+ // so we check if path exists before getting it.
+ if (NRawClient::Exists(clientRetryPolicy->CreatePolicyForGenericRequest(),
+ context,
+ TTransactionId(),
+ localModeAttr,
+ TExistsOptions().ReadFrom(EMasterReadKind::Cache)))
+ {
+ auto fqdnNode = NRawClient::TryGet(
+ clientRetryPolicy->CreatePolicyForGenericRequest(),
+ context,
+ TTransactionId(),
+ localModeAttr,
+ TGetOptions().ReadFrom(EMasterReadKind::Cache));
+ if (!fqdnNode.IsUndefined()) {
+ auto fqdn = fqdnNode.AsString();
+ isLocalMode = (fqdn == TProcessState::Get()->FqdnHostName);
+ YT_LOG_DEBUG("Checking local mode; LocalModeFqdn: %v FqdnHostName: %v IsLocalMode: %v",
+ fqdn,
+ TProcessState::Get()->FqdnHostName,
+ isLocalMode ? "true" : "false");
+ }
+ }
+
+ {
+ TWriteGuard guard(mutex);
+ localModeMap[context.ServerName] = isLocalMode;
+ }
+
+ return isLocalMode;
+}
+
+TString GetOperationWebInterfaceUrl(TStringBuf serverName, TOperationId operationId)
+{
+ serverName.ChopSuffix(":80");
+ serverName.ChopSuffix(".yt.yandex-team.ru");
+ serverName.ChopSuffix(".yt.yandex.net");
+ return ::TStringBuilder() << "https://yt.yandex-team.ru/" << serverName <<
+ "/operations/" << GetGuidAsString(operationId);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail
diff --git a/yt/cpp/mapreduce/client/operation_helpers.h b/yt/cpp/mapreduce/client/operation_helpers.h
new file mode 100644
index 0000000000..7fd2ffb0de
--- /dev/null
+++ b/yt/cpp/mapreduce/client/operation_helpers.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/common/fwd.h>
+#include <yt/cpp/mapreduce/interface/fwd.h>
+
+#include <yt/cpp/mapreduce/http/fwd.h>
+
+namespace NYT::NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ui64 RoundUpFileSize(ui64 size);
+
+bool UseLocalModeOptimization(const TClientContext& context, const IClientRetryPolicyPtr& clientRetryPolicy);
+
+TString GetOperationWebInterfaceUrl(TStringBuf serverName, TOperationId operationId);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail
diff --git a/yt/cpp/mapreduce/client/operation_preparer.cpp b/yt/cpp/mapreduce/client/operation_preparer.cpp
new file mode 100644
index 0000000000..e06fac4061
--- /dev/null
+++ b/yt/cpp/mapreduce/client/operation_preparer.cpp
@@ -0,0 +1,881 @@
+#include "operation_preparer.h"
+
+#include "init.h"
+#include "file_writer.h"
+#include "operation.h"
+#include "operation_helpers.h"
+#include "operation_tracker.h"
+#include "transaction.h"
+#include "transaction_pinger.h"
+#include "yt_poller.h"
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+#include <yt/cpp/mapreduce/raw_client/raw_batch_request.h>
+
+#include <yt/cpp/mapreduce/interface/error_codes.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <library/cpp/digest/md5/md5.h>
+
+#include <util/folder/path.h>
+
+#include <util/string/builder.h>
+
+#include <util/system/execpath.h>
+
+namespace NYT::NDetail {
+
+using namespace NRawClient;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TWaitOperationStartPollerItem
+ : public IYtPollerItem
+{
+public:
+ TWaitOperationStartPollerItem(TOperationId operationId, THolder<TPingableTransaction> transaction)
+ : OperationId_(operationId)
+ , Transaction_(std::move(transaction))
+ { }
+
+ void PrepareRequest(TRawBatchRequest* batchRequest) override
+ {
+ Future_ = batchRequest->GetOperation(
+ OperationId_,
+ TGetOperationOptions().AttributeFilter(
+ TOperationAttributeFilter().Add(EOperationAttribute::State)));
+ }
+
+ EStatus OnRequestExecuted() override
+ {
+ try {
+ auto attributes = Future_.GetValue();
+ Y_ENSURE(attributes.State.Defined());
+ bool operationHasLockedFiles =
+ *attributes.State != "starting" &&
+ *attributes.State != "pending" &&
+ *attributes.State != "orphaned" &&
+ *attributes.State != "waiting_for_agent" &&
+ *attributes.State != "initializing";
+ return operationHasLockedFiles ? EStatus::PollBreak : EStatus::PollContinue;
+ } catch (const TErrorResponse& e) {
+ YT_LOG_ERROR("get_operation request failed: %v (RequestId: %v)",
+ e.GetError().GetMessage(),
+ e.GetRequestId());
+ return IsRetriable(e) ? PollContinue : PollBreak;
+ } catch (const std::exception& e) {
+ YT_LOG_ERROR("%v", e.what());
+ return PollBreak;
+ }
+ }
+
+ void OnItemDiscarded() override {
+ }
+
+private:
+ TOperationId OperationId_;
+ THolder<TPingableTransaction> Transaction_;
+ ::NThreading::TFuture<TOperationAttributes> Future_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TOperationForwardingRequestRetryPolicy
+ : public IRequestRetryPolicy
+{
+public:
+ TOperationForwardingRequestRetryPolicy(const IRequestRetryPolicyPtr& underlying, const TOperationPtr& operation)
+ : Underlying_(underlying)
+ , Operation_(operation)
+ { }
+
+ void NotifyNewAttempt() override
+ {
+ Underlying_->NotifyNewAttempt();
+ }
+
+ TMaybe<TDuration> OnGenericError(const std::exception& e) override
+ {
+ UpdateOperationStatus(e.what());
+ return Underlying_->OnGenericError(e);
+ }
+
+ TMaybe<TDuration> OnRetriableError(const TErrorResponse& e) override
+ {
+ auto msg = e.GetError().ShortDescription();
+ UpdateOperationStatus(msg);
+ return Underlying_->OnRetriableError(e);
+ }
+
+ void OnIgnoredError(const TErrorResponse& e) override
+ {
+ Underlying_->OnIgnoredError(e);
+ }
+
+ TString GetAttemptDescription() const override
+ {
+ return Underlying_->GetAttemptDescription();
+ }
+
+private:
+ void UpdateOperationStatus(TStringBuf err)
+ {
+ Y_VERIFY(Operation_);
+ Operation_->OnStatusUpdated(
+ ::TStringBuilder() << "Retriable error during operation start: " << err);
+ }
+
+private:
+ IRequestRetryPolicyPtr Underlying_;
+ TOperationPtr Operation_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+TOperationPreparer::TOperationPreparer(TClientPtr client, TTransactionId transactionId)
+ : Client_(std::move(client))
+ , TransactionId_(transactionId)
+ , FileTransaction_(MakeHolder<TPingableTransaction>(
+ Client_->GetRetryPolicy(),
+ Client_->GetContext(),
+ TransactionId_,
+ Client_->GetTransactionPinger()->GetChildTxPinger(),
+ TStartTransactionOptions()))
+ , ClientRetryPolicy_(Client_->GetRetryPolicy())
+ , PreparationId_(CreateGuidAsString())
+{ }
+
+const TClientContext& TOperationPreparer::GetContext() const
+{
+ return Client_->GetContext();
+}
+
+TTransactionId TOperationPreparer::GetTransactionId() const
+{
+ return TransactionId_;
+}
+
+TClientPtr TOperationPreparer::GetClient() const
+{
+ return Client_;
+}
+
+const TString& TOperationPreparer::GetPreparationId() const
+{
+ return PreparationId_;
+}
+
+const IClientRetryPolicyPtr& TOperationPreparer::GetClientRetryPolicy() const
+{
+ return ClientRetryPolicy_;
+}
+
+TOperationId TOperationPreparer::StartOperation(
+ TOperation* operation,
+ const TString& operationType,
+ const TNode& spec,
+ bool useStartOperationRequest)
+{
+ CheckValidity();
+
+ THttpHeader header("POST", (useStartOperationRequest ? "start_op" : operationType));
+ if (useStartOperationRequest) {
+ header.AddParameter("operation_type", operationType);
+ }
+ header.AddTransactionId(TransactionId_);
+ header.AddMutationId();
+
+ auto ysonSpec = NodeToYsonString(spec);
+ auto responseInfo = RetryRequestWithPolicy(
+ ::MakeIntrusive<TOperationForwardingRequestRetryPolicy>(
+ ClientRetryPolicy_->CreatePolicyForStartOperationRequest(),
+ TOperationPtr(operation)),
+ GetContext(),
+ header,
+ ysonSpec);
+ TOperationId operationId = ParseGuidFromResponse(responseInfo.Response);
+ YT_LOG_DEBUG("Operation started (OperationId: %v; PreparationId: %v)",
+ operationId,
+ GetPreparationId());
+
+ YT_LOG_INFO("Operation %v started (%v): %v",
+ operationId,
+ operationType,
+ GetOperationWebInterfaceUrl(GetContext().ServerName, operationId));
+
+ TOperationExecutionTimeTracker::Get()->Start(operationId);
+
+ Client_->GetYtPoller().Watch(
+ new TWaitOperationStartPollerItem(operationId, std::move(FileTransaction_)));
+
+ return operationId;
+}
+
+void TOperationPreparer::LockFiles(TVector<TRichYPath>* paths)
+{
+ CheckValidity();
+
+ TVector<::NThreading::TFuture<TLockId>> lockIdFutures;
+ lockIdFutures.reserve(paths->size());
+ TRawBatchRequest lockRequest(GetContext().Config);
+ for (const auto& path : *paths) {
+ lockIdFutures.push_back(lockRequest.Lock(
+ FileTransaction_->GetId(),
+ path.Path_,
+ ELockMode::LM_SNAPSHOT,
+ TLockOptions().Waitable(true)));
+ }
+ ExecuteBatch(ClientRetryPolicy_->CreatePolicyForGenericRequest(), GetContext(), lockRequest);
+
+ TVector<::NThreading::TFuture<TNode>> nodeIdFutures;
+ nodeIdFutures.reserve(paths->size());
+ TRawBatchRequest getNodeIdRequest(GetContext().Config);
+ for (const auto& lockIdFuture : lockIdFutures) {
+ nodeIdFutures.push_back(getNodeIdRequest.Get(
+ FileTransaction_->GetId(),
+ ::TStringBuilder() << '#' << GetGuidAsString(lockIdFuture.GetValue()) << "/@node_id",
+ TGetOptions()));
+ }
+ ExecuteBatch(ClientRetryPolicy_->CreatePolicyForGenericRequest(), GetContext(), getNodeIdRequest);
+
+ for (size_t i = 0; i != paths->size(); ++i) {
+ auto& richPath = (*paths)[i];
+ richPath.OriginalPath(richPath.Path_);
+ richPath.Path("#" + nodeIdFutures[i].GetValue().AsString());
+ YT_LOG_DEBUG("Locked file %v, new path is %v",
+ *richPath.OriginalPath_,
+ richPath.Path_);
+ }
+}
+
+void TOperationPreparer::CheckValidity() const
+{
+ Y_ENSURE(
+ FileTransaction_,
+ "File transaction is already moved, are you trying to use preparer for more than one operation?");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TRetryPolicyIgnoringLockConflicts
+ : public TAttemptLimitedRetryPolicy
+{
+public:
+ using TAttemptLimitedRetryPolicy::TAttemptLimitedRetryPolicy;
+ using TAttemptLimitedRetryPolicy::OnGenericError;
+
+ TMaybe<TDuration> OnRetriableError(const TErrorResponse& e) override
+ {
+ if (IsAttemptLimitExceeded()) {
+ return Nothing();
+ }
+ if (e.IsConcurrentTransactionLockConflict()) {
+ return GetBackoffDuration(Config_);
+ }
+ return TAttemptLimitedRetryPolicy::OnRetriableError(e);
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TFileToUpload
+ : public IItemToUpload
+{
+public:
+ TFileToUpload(TString fileName, TMaybe<TString> md5)
+ : FileName_(std::move(fileName))
+ , MD5_(std::move(md5))
+ { }
+
+ TString CalculateMD5() const override
+ {
+ if (MD5_) {
+ return *MD5_;
+ }
+ constexpr size_t md5Size = 32;
+ TString result;
+ result.ReserveAndResize(md5Size);
+ MD5::File(FileName_.data(), result.Detach());
+ MD5_ = result;
+ return result;
+ }
+
+ THolder<IInputStream> CreateInputStream() const override
+ {
+ return MakeHolder<TFileInput>(FileName_);
+ }
+
+ TString GetDescription() const override
+ {
+ return FileName_;
+ }
+
+ ui64 GetDataSize() const override
+ {
+ return GetFileLength(FileName_);
+ }
+
+private:
+ TString FileName_;
+ mutable TMaybe<TString> MD5_;
+};
+
+class TDataToUpload
+ : public IItemToUpload
+{
+public:
+ TDataToUpload(TString data, TString description)
+ : Data_(std::move(data))
+ , Description_(std::move(description))
+ { }
+
+ TString CalculateMD5() const override
+ {
+ constexpr size_t md5Size = 32;
+ TString result;
+ result.ReserveAndResize(md5Size);
+ MD5::Data(reinterpret_cast<const unsigned char*>(Data_.data()), Data_.size(), result.Detach());
+ return result;
+ }
+
+ THolder<IInputStream> CreateInputStream() const override
+ {
+ return MakeHolder<TMemoryInput>(Data_.data(), Data_.size());
+ }
+
+ TString GetDescription() const override
+ {
+ return Description_;
+ }
+
+ ui64 GetDataSize() const override
+ {
+ return Data_.size();
+ }
+
+private:
+ TString Data_;
+ TString Description_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+static const TString& GetPersistentExecPathMd5()
+{
+ static TString md5 = MD5::File(GetPersistentExecPath());
+ return md5;
+}
+
+static TMaybe<TSmallJobFile> GetJobState(const IJob& job)
+{
+ TString result;
+ {
+ TStringOutput output(result);
+ job.Save(output);
+ output.Finish();
+ }
+ if (result.empty()) {
+ return Nothing();
+ } else {
+ return TSmallJobFile{"jobstate", result};
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TJobPreparer::TJobPreparer(
+ TOperationPreparer& operationPreparer,
+ const TUserJobSpec& spec,
+ const IJob& job,
+ size_t outputTableCount,
+ const TVector<TSmallJobFile>& smallFileList,
+ const TOperationOptions& options)
+ : OperationPreparer_(operationPreparer)
+ , Spec_(spec)
+ , Options_(options)
+{
+
+ CreateStorage();
+ auto cypressFileList = CanonizeYPaths(/* retryPolicy */ nullptr, OperationPreparer_.GetContext(), spec.Files_);
+
+ for (const auto& file : cypressFileList) {
+ UseFileInCypress(file);
+ }
+ for (const auto& localFile : spec.GetLocalFiles()) {
+ UploadLocalFile(std::get<0>(localFile), std::get<1>(localFile));
+ }
+ auto jobStateSmallFile = GetJobState(job);
+ if (jobStateSmallFile) {
+ UploadSmallFile(*jobStateSmallFile);
+ }
+ for (const auto& smallFile : smallFileList) {
+ UploadSmallFile(smallFile);
+ }
+
+ if (auto commandJob = dynamic_cast<const ICommandJob*>(&job)) {
+ ClassName_ = TJobFactory::Get()->GetJobName(&job);
+ Command_ = commandJob->GetCommand();
+ } else {
+ PrepareJobBinary(job, outputTableCount, jobStateSmallFile.Defined());
+ }
+
+ operationPreparer.LockFiles(&CachedFiles_);
+}
+
+TVector<TRichYPath> TJobPreparer::GetFiles() const
+{
+ TVector<TRichYPath> allFiles = CypressFiles_;
+ allFiles.insert(allFiles.end(), CachedFiles_.begin(), CachedFiles_.end());
+ return allFiles;
+}
+
+const TString& TJobPreparer::GetClassName() const
+{
+ return ClassName_;
+}
+
+const TString& TJobPreparer::GetCommand() const
+{
+ return Command_;
+}
+
+const TUserJobSpec& TJobPreparer::GetSpec() const
+{
+ return Spec_;
+}
+
+bool TJobPreparer::ShouldMountSandbox() const
+{
+ return OperationPreparer_.GetContext().Config->MountSandboxInTmpfs || Options_.MountSandboxInTmpfs_;
+}
+
+ui64 TJobPreparer::GetTotalFileSize() const
+{
+ return TotalFileSize_;
+}
+
+TString TJobPreparer::GetFileStorage() const
+{
+ return Options_.FileStorage_ ?
+ *Options_.FileStorage_ :
+ OperationPreparer_.GetContext().Config->RemoteTempFilesDirectory;
+}
+
+TYPath TJobPreparer::GetCachePath() const
+{
+ return AddPathPrefix(
+ ::TStringBuilder() << GetFileStorage() << "/new_cache",
+ OperationPreparer_.GetContext().Config->Prefix);
+}
+
+void TJobPreparer::CreateStorage() const
+{
+ Create(
+ OperationPreparer_.GetClientRetryPolicy()->CreatePolicyForGenericRequest(),
+ OperationPreparer_.GetContext(),
+ Options_.FileStorageTransactionId_,
+ GetCachePath(),
+ NT_MAP,
+ TCreateOptions()
+ .IgnoreExisting(true)
+ .Recursive(true));
+}
+
+int TJobPreparer::GetFileCacheReplicationFactor() const
+{
+ if (IsLocalMode()) {
+ return 1;
+ } else {
+ return OperationPreparer_.GetContext().Config->FileCacheReplicationFactor;
+ }
+}
+
+void TJobPreparer::CreateFileInCypress(const TString& path) const
+{
+ auto attributes = TNode()("replication_factor", GetFileCacheReplicationFactor());
+ if (Options_.FileExpirationTimeout_) {
+ attributes["expiration_timeout"] = Options_.FileExpirationTimeout_->MilliSeconds();
+ }
+
+ Create(
+ OperationPreparer_.GetClientRetryPolicy()->CreatePolicyForGenericRequest(),
+ OperationPreparer_.GetContext(),
+ Options_.FileStorageTransactionId_,
+ path,
+ NT_FILE,
+ TCreateOptions()
+ .IgnoreExisting(true)
+ .Recursive(true)
+ .Attributes(attributes)
+ );
+}
+
+TString TJobPreparer::PutFileToCypressCache(
+ const TString& path,
+ const TString& md5Signature,
+ TTransactionId transactionId) const
+{
+ constexpr ui32 LockConflictRetryCount = 30;
+ auto retryPolicy = MakeIntrusive<TRetryPolicyIgnoringLockConflicts>(
+ LockConflictRetryCount,
+ OperationPreparer_.GetContext().Config);
+
+ auto putFileToCacheOptions = TPutFileToCacheOptions();
+ if (Options_.FileExpirationTimeout_) {
+ putFileToCacheOptions.PreserveExpirationTimeout(true);
+ }
+
+ auto cachePath = PutFileToCache(
+ retryPolicy,
+ OperationPreparer_.GetContext(),
+ transactionId,
+ path,
+ md5Signature,
+ GetCachePath(),
+ putFileToCacheOptions);
+
+ Remove(
+ OperationPreparer_.GetClientRetryPolicy()->CreatePolicyForGenericRequest(),
+ OperationPreparer_.GetContext(),
+ transactionId,
+ path,
+ TRemoveOptions().Force(true));
+
+ return cachePath;
+}
+
+TMaybe<TString> TJobPreparer::GetItemFromCypressCache(const TString& md5Signature, const TString& fileName) const
+{
+ constexpr ui32 LockConflictRetryCount = 30;
+ auto retryPolicy = MakeIntrusive<TRetryPolicyIgnoringLockConflicts>(
+ LockConflictRetryCount,
+ OperationPreparer_.GetContext().Config);
+ auto maybePath = GetFileFromCache(
+ retryPolicy,
+ OperationPreparer_.GetContext(),
+ TTransactionId(),
+ md5Signature,
+ GetCachePath(),
+ TGetFileFromCacheOptions());
+ if (maybePath) {
+ YT_LOG_DEBUG("File is already in cache (FileName: %v)",
+ fileName,
+ *maybePath);
+ }
+ return maybePath;
+}
+
+TDuration TJobPreparer::GetWaitForUploadTimeout(const IItemToUpload& itemToUpload) const
+{
+ const TDuration extraTime = OperationPreparer_.GetContext().Config->WaitLockPollInterval +
+ TDuration::MilliSeconds(100);
+ const double dataSizeGb = static_cast<double>(itemToUpload.GetDataSize()) / 1_GB;
+ return extraTime + dataSizeGb * OperationPreparer_.GetContext().Config->CacheLockTimeoutPerGb;
+}
+
+TString TJobPreparer::UploadToRandomPath(const IItemToUpload& itemToUpload) const
+{
+ TString uniquePath = AddPathPrefix(
+ ::TStringBuilder() << GetFileStorage() << "/cpp_" << CreateGuidAsString(),
+ OperationPreparer_.GetContext().Config->Prefix);
+ YT_LOG_INFO("Uploading file to random cypress path (FileName: %v; CypressPath: %v; PreparationId: %v)",
+ itemToUpload.GetDescription(),
+ uniquePath,
+ OperationPreparer_.GetPreparationId());
+
+ CreateFileInCypress(uniquePath);
+
+ {
+ TFileWriter writer(
+ uniquePath,
+ OperationPreparer_.GetClientRetryPolicy(),
+ OperationPreparer_.GetClient()->GetTransactionPinger(),
+ OperationPreparer_.GetContext(),
+ Options_.FileStorageTransactionId_,
+ TFileWriterOptions().ComputeMD5(true));
+ itemToUpload.CreateInputStream()->ReadAll(writer);
+ writer.Finish();
+ }
+ return uniquePath;
+}
+
+TMaybe<TString> TJobPreparer::TryUploadWithDeduplication(const IItemToUpload& itemToUpload) const
+{
+ const auto md5Signature = itemToUpload.CalculateMD5();
+
+ auto fileName = ::TStringBuilder() << GetFileStorage() << "/cpp_md5_" << md5Signature;
+ if (OperationPreparer_.GetContext().Config->CacheUploadDeduplicationMode == EUploadDeduplicationMode::Host) {
+ fileName << "_" << MD5::Data(TProcessState::Get()->FqdnHostName);
+ }
+ TString cypressPath = AddPathPrefix(fileName, OperationPreparer_.GetContext().Config->Prefix);
+
+ CreateFileInCypress(cypressPath);
+
+ auto uploadTx = MakeIntrusive<TTransaction>(
+ OperationPreparer_.GetClient(),
+ OperationPreparer_.GetContext(),
+ TTransactionId(),
+ TStartTransactionOptions());
+
+ ILockPtr lock;
+ try {
+ lock = uploadTx->Lock(cypressPath, ELockMode::LM_EXCLUSIVE, TLockOptions().Waitable(true));
+ } catch (const TErrorResponse& e) {
+ if (e.IsResolveError()) {
+ // If the node doesn't exist, it must be removed by concurrent uploading process.
+ // Let's try to find it in the cache.
+ return GetItemFromCypressCache(md5Signature, itemToUpload.GetDescription());
+ }
+ throw;
+ }
+
+ auto waitTimeout = GetWaitForUploadTimeout(itemToUpload);
+ YT_LOG_DEBUG("Waiting for the lock on file (FileName: %v; CypressPath: %v; LockTimeout: %v)",
+ itemToUpload.GetDescription(),
+ cypressPath,
+ waitTimeout);
+
+ if (!TWaitProxy::Get()->WaitFuture(lock->GetAcquiredFuture(), waitTimeout)) {
+ YT_LOG_DEBUG("Waiting for the lock timed out. Fallback to random path uploading (FileName: %v; CypressPath: %v)",
+ itemToUpload.GetDescription(),
+ cypressPath);
+ return Nothing();
+ }
+
+ YT_LOG_DEBUG("Exclusive lock successfully acquired (FileName: %v; CypressPath: %v)",
+ itemToUpload.GetDescription(),
+ cypressPath);
+
+ // Ensure that this process is the first to take a lock.
+ if (auto cachedItemPath = GetItemFromCypressCache(md5Signature, itemToUpload.GetDescription())) {
+ return *cachedItemPath;
+ }
+
+ YT_LOG_INFO("Uploading file to cypress (FileName: %v; CypressPath: %v; PreparationId: %v)",
+ itemToUpload.GetDescription(),
+ cypressPath,
+ OperationPreparer_.GetPreparationId());
+
+ {
+ auto writer = uploadTx->CreateFileWriter(cypressPath, TFileWriterOptions().ComputeMD5(true));
+ YT_VERIFY(writer);
+ itemToUpload.CreateInputStream()->ReadAll(*writer);
+ writer->Finish();
+ }
+
+ auto path = PutFileToCypressCache(cypressPath, md5Signature, uploadTx->GetId());
+
+ uploadTx->Commit();
+ return path;
+}
+
+TString TJobPreparer::UploadToCacheUsingApi(const IItemToUpload& itemToUpload) const
+{
+ auto md5Signature = itemToUpload.CalculateMD5();
+ Y_VERIFY(md5Signature.size() == 32);
+
+ if (auto cachedItemPath = GetItemFromCypressCache(md5Signature, itemToUpload.GetDescription())) {
+ return *cachedItemPath;
+ }
+
+ YT_LOG_INFO("File not found in cache; uploading to cypress (FileName: %v; PreparationId: %v)",
+ itemToUpload.GetDescription(),
+ OperationPreparer_.GetPreparationId());
+
+ if (OperationPreparer_.GetContext().Config->CacheUploadDeduplicationMode != EUploadDeduplicationMode::Disabled) {
+ if (auto path = TryUploadWithDeduplication(itemToUpload)) {
+ return *path;
+ }
+ }
+
+ auto path = UploadToRandomPath(itemToUpload);
+ return PutFileToCypressCache(path, md5Signature, Options_.FileStorageTransactionId_);
+}
+
+TString TJobPreparer::UploadToCache(const IItemToUpload& itemToUpload) const
+{
+ YT_LOG_INFO("Uploading file (FileName: %v; PreparationId: %v)",
+ itemToUpload.GetDescription(),
+ OperationPreparer_.GetPreparationId());
+
+ TString result;
+ switch (Options_.FileCacheMode_) {
+ case TOperationOptions::EFileCacheMode::ApiCommandBased:
+ Y_ENSURE_EX(Options_.FileStorageTransactionId_.IsEmpty(), TApiUsageError() <<
+ "Default cache mode (API command-based) doesn't allow non-default 'FileStorageTransactionId_'");
+ result = UploadToCacheUsingApi(itemToUpload);
+ break;
+ case TOperationOptions::EFileCacheMode::CachelessRandomPathUpload:
+ result = UploadToRandomPath(itemToUpload);
+ break;
+ default:
+ Y_FAIL("Unknown file cache mode: %d", static_cast<int>(Options_.FileCacheMode_));
+ }
+
+ YT_LOG_INFO("Complete uploading file (FileName: %v; PreparationId: %v)",
+ itemToUpload.GetDescription(),
+ OperationPreparer_.GetPreparationId());
+
+ return result;
+}
+
+void TJobPreparer::UseFileInCypress(const TRichYPath& file)
+{
+ if (!Exists(
+ OperationPreparer_.GetClientRetryPolicy()->CreatePolicyForGenericRequest(),
+ OperationPreparer_.GetContext(),
+ file.TransactionId_.GetOrElse(OperationPreparer_.GetTransactionId()),
+ file.Path_))
+ {
+ ythrow yexception() << "File " << file.Path_ << " does not exist";
+ }
+
+ if (ShouldMountSandbox()) {
+ auto size = Get(
+ OperationPreparer_.GetClientRetryPolicy()->CreatePolicyForGenericRequest(),
+ OperationPreparer_.GetContext(),
+ file.TransactionId_.GetOrElse(OperationPreparer_.GetTransactionId()),
+ file.Path_ + "/@uncompressed_data_size")
+ .AsInt64();
+
+ TotalFileSize_ += RoundUpFileSize(static_cast<ui64>(size));
+ }
+ CypressFiles_.push_back(file);
+}
+
+void TJobPreparer::UploadLocalFile(
+ const TLocalFilePath& localPath,
+ const TAddLocalFileOptions& options,
+ bool isApiFile)
+{
+ TFsPath fsPath(localPath);
+ fsPath.CheckExists();
+
+ TFileStat stat;
+ fsPath.Stat(stat);
+
+ bool isExecutable = stat.Mode & (S_IXUSR | S_IXGRP | S_IXOTH);
+ auto cachePath = UploadToCache(TFileToUpload(localPath, options.MD5CheckSum_));
+
+ TRichYPath cypressPath;
+ if (isApiFile) {
+ cypressPath = OperationPreparer_.GetContext().Config->ApiFilePathOptions;
+ }
+ cypressPath.Path(cachePath).FileName(options.PathInJob_.GetOrElse(fsPath.Basename()));
+ if (isExecutable) {
+ cypressPath.Executable(true);
+ }
+ if (options.BypassArtifactCache_) {
+ cypressPath.BypassArtifactCache(*options.BypassArtifactCache_);
+ }
+
+ if (ShouldMountSandbox()) {
+ TotalFileSize_ += RoundUpFileSize(stat.Size);
+ }
+
+ CachedFiles_.push_back(cypressPath);
+}
+
+void TJobPreparer::UploadBinary(const TJobBinaryConfig& jobBinary)
+{
+ if (std::holds_alternative<TJobBinaryLocalPath>(jobBinary)) {
+ auto binaryLocalPath = std::get<TJobBinaryLocalPath>(jobBinary);
+ auto opts = TAddLocalFileOptions().PathInJob("cppbinary");
+ if (binaryLocalPath.MD5CheckSum) {
+ opts.MD5CheckSum(*binaryLocalPath.MD5CheckSum);
+ }
+ UploadLocalFile(binaryLocalPath.Path, opts, /* isApiFile */ true);
+ } else if (std::holds_alternative<TJobBinaryCypressPath>(jobBinary)) {
+ auto binaryCypressPath = std::get<TJobBinaryCypressPath>(jobBinary);
+ TRichYPath ytPath = OperationPreparer_.GetContext().Config->ApiFilePathOptions;
+ ytPath.Path(binaryCypressPath.Path);
+ if (binaryCypressPath.TransactionId) {
+ ytPath.TransactionId(*binaryCypressPath.TransactionId);
+ }
+ UseFileInCypress(ytPath.FileName("cppbinary").Executable(true));
+ } else {
+ Y_FAIL("%s", (::TStringBuilder() << "Unexpected jobBinary tag: " << jobBinary.index()).data());
+ }
+}
+
+void TJobPreparer::UploadSmallFile(const TSmallJobFile& smallFile)
+{
+ auto cachePath = UploadToCache(TDataToUpload(smallFile.Data, smallFile.FileName + " [generated-file]"));
+ auto path = OperationPreparer_.GetContext().Config->ApiFilePathOptions;
+ CachedFiles_.push_back(path.Path(cachePath).FileName(smallFile.FileName));
+ if (ShouldMountSandbox()) {
+ TotalFileSize_ += RoundUpFileSize(smallFile.Data.size());
+ }
+}
+
+bool TJobPreparer::IsLocalMode() const
+{
+ return UseLocalModeOptimization(OperationPreparer_.GetContext(), OperationPreparer_.GetClientRetryPolicy());
+}
+
+void TJobPreparer::PrepareJobBinary(const IJob& job, int outputTableCount, bool hasState)
+{
+ auto jobBinary = TJobBinaryConfig();
+ if (!std::holds_alternative<TJobBinaryDefault>(Spec_.GetJobBinary())) {
+ jobBinary = Spec_.GetJobBinary();
+ }
+ TString binaryPathInsideJob;
+ if (std::holds_alternative<TJobBinaryDefault>(jobBinary)) {
+ if (GetInitStatus() != EInitStatus::FullInitialization) {
+ ythrow yexception() << "NYT::Initialize() must be called prior to any operation";
+ }
+
+ const bool isLocalMode = IsLocalMode();
+ const TMaybe<TString> md5 = !isLocalMode ? MakeMaybe(GetPersistentExecPathMd5()) : Nothing();
+ jobBinary = TJobBinaryLocalPath{GetPersistentExecPath(), md5};
+
+ if (isLocalMode) {
+ binaryPathInsideJob = GetExecPath();
+ }
+ } else if (std::holds_alternative<TJobBinaryLocalPath>(jobBinary)) {
+ const bool isLocalMode = IsLocalMode();
+ if (isLocalMode) {
+ binaryPathInsideJob = TFsPath(std::get<TJobBinaryLocalPath>(jobBinary).Path).RealPath();
+ }
+ }
+ Y_ASSERT(!std::holds_alternative<TJobBinaryDefault>(jobBinary));
+
+ // binaryPathInsideJob is only set when LocalModeOptimization option is on, so upload is not needed
+ if (!binaryPathInsideJob) {
+ binaryPathInsideJob = "./cppbinary";
+ UploadBinary(jobBinary);
+ }
+
+ TString jobCommandPrefix = Options_.JobCommandPrefix_;
+ if (!Spec_.JobCommandPrefix_.empty()) {
+ jobCommandPrefix = Spec_.JobCommandPrefix_;
+ }
+
+ TString jobCommandSuffix = Options_.JobCommandSuffix_;
+ if (!Spec_.JobCommandSuffix_.empty()) {
+ jobCommandSuffix = Spec_.JobCommandSuffix_;
+ }
+
+ ClassName_ = TJobFactory::Get()->GetJobName(&job);
+
+ auto jobArguments = TNode::CreateMap();
+ jobArguments["job_name"] = ClassName_;
+ jobArguments["output_table_count"] = static_cast<i64>(outputTableCount);
+ jobArguments["has_state"] = hasState;
+ Spec_.AddEnvironment("YT_JOB_ARGUMENTS", NodeToYsonString(jobArguments));
+
+ Command_ = ::TStringBuilder() <<
+ jobCommandPrefix <<
+ (OperationPreparer_.GetContext().Config->UseClientProtobuf ? "YT_USE_CLIENT_PROTOBUF=1" : "YT_USE_CLIENT_PROTOBUF=0") << " " <<
+ binaryPathInsideJob <<
+ jobCommandSuffix;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail
diff --git a/yt/cpp/mapreduce/client/operation_preparer.h b/yt/cpp/mapreduce/client/operation_preparer.h
new file mode 100644
index 0000000000..7ced54e3b5
--- /dev/null
+++ b/yt/cpp/mapreduce/client/operation_preparer.h
@@ -0,0 +1,129 @@
+#pragma once
+
+#include "client.h"
+#include "structured_table_formats.h"
+
+#include <yt/cpp/mapreduce/interface/operation.h>
+
+namespace NYT::NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TOperation;
+
+class TOperationPreparer
+ : public TThrRefBase
+{
+public:
+ TOperationPreparer(TClientPtr client, TTransactionId transactionId);
+
+ const TClientContext& GetContext() const;
+ TTransactionId GetTransactionId() const;
+ ITransactionPingerPtr GetTransactionPinger() const;
+ TClientPtr GetClient() const;
+
+ const TString& GetPreparationId() const;
+
+ void LockFiles(TVector<TRichYPath>* paths);
+
+ TOperationId StartOperation(
+ TOperation* operation,
+ const TString& operationType,
+ const TNode& spec,
+ bool useStartOperationRequest = false);
+
+ const IClientRetryPolicyPtr& GetClientRetryPolicy() const;
+
+private:
+ TClientPtr Client_;
+ TTransactionId TransactionId_;
+ THolder<TPingableTransaction> FileTransaction_;
+ IClientRetryPolicyPtr ClientRetryPolicy_;
+ const TString PreparationId_;
+
+private:
+ void CheckValidity() const;
+};
+
+using TOperationPreparerPtr = ::TIntrusivePtr<TOperationPreparer>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct IItemToUpload
+{
+ virtual ~IItemToUpload() = default;
+
+ virtual TString CalculateMD5() const = 0;
+ virtual THolder<IInputStream> CreateInputStream() const = 0;
+ virtual TString GetDescription() const = 0;
+ virtual ui64 GetDataSize() const = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TJobPreparer
+ : private TNonCopyable
+{
+public:
+ TJobPreparer(
+ TOperationPreparer& operationPreparer,
+ const TUserJobSpec& spec,
+ const IJob& job,
+ size_t outputTableCount,
+ const TVector<TSmallJobFile>& smallFileList,
+ const TOperationOptions& options);
+
+ TVector<TRichYPath> GetFiles() const;
+ const TString& GetClassName() const;
+ const TString& GetCommand() const;
+ const TUserJobSpec& GetSpec() const;
+ bool ShouldMountSandbox() const;
+ ui64 GetTotalFileSize() const;
+
+private:
+ TOperationPreparer& OperationPreparer_;
+ TUserJobSpec Spec_;
+ TOperationOptions Options_;
+
+ TVector<TRichYPath> CypressFiles_;
+ TVector<TRichYPath> CachedFiles_;
+
+ TString ClassName_;
+ TString Command_;
+ ui64 TotalFileSize_ = 0;
+
+private:
+ TString GetFileStorage() const;
+ TYPath GetCachePath() const;
+
+ bool IsLocalMode() const;
+ int GetFileCacheReplicationFactor() const;
+
+ void CreateStorage() const;
+
+ void CreateFileInCypress(const TString& path) const;
+ TString PutFileToCypressCache(const TString& path, const TString& md5Signature, TTransactionId transactionId) const;
+ TMaybe<TString> GetItemFromCypressCache(const TString& md5Signature, const TString& fileName) const;
+
+ TDuration GetWaitForUploadTimeout(const IItemToUpload& itemToUpload) const;
+ TString UploadToRandomPath(const IItemToUpload& itemToUpload) const;
+ TString UploadToCacheUsingApi(const IItemToUpload& itemToUpload) const;
+ TMaybe<TString> TryUploadWithDeduplication(const IItemToUpload& itemToUpload) const;
+ TString UploadToCache(const IItemToUpload& itemToUpload) const;
+
+ void UseFileInCypress(const TRichYPath& file);
+
+ void UploadLocalFile(
+ const TLocalFilePath& localPath,
+ const TAddLocalFileOptions& options,
+ bool isApiFile = false);
+
+ void UploadBinary(const TJobBinaryConfig& jobBinary);
+ void UploadSmallFile(const TSmallJobFile& smallFile);
+
+ void PrepareJobBinary(const IJob& job, int outputTableCount, bool hasState);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail
diff --git a/yt/cpp/mapreduce/client/operation_tracker.cpp b/yt/cpp/mapreduce/client/operation_tracker.cpp
new file mode 100644
index 0000000000..56623e9927
--- /dev/null
+++ b/yt/cpp/mapreduce/client/operation_tracker.cpp
@@ -0,0 +1,34 @@
+#include "operation_tracker.h"
+
+#include <yt/cpp/mapreduce/interface/config.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+void TOperationExecutionTimeTracker::Start(const TOperationId& operationId) {
+ with_lock(Lock_) {
+ StartTimes_[operationId] = TInstant::Now();
+ }
+}
+
+TMaybe<TDuration> TOperationExecutionTimeTracker::Finish(const TOperationId& operationId) {
+ TDuration duration;
+ with_lock(Lock_) {
+ auto i = StartTimes_.find(operationId);
+ if (i == StartTimes_.end()) {
+ return Nothing();
+ }
+ duration = TInstant::Now() - i->second;
+ StartTimes_.erase(i);
+ }
+ return duration;
+}
+
+TOperationExecutionTimeTracker* TOperationExecutionTimeTracker::Get() {
+ return Singleton<TOperationExecutionTimeTracker>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/operation_tracker.h b/yt/cpp/mapreduce/client/operation_tracker.h
new file mode 100644
index 0000000000..9f1504ea91
--- /dev/null
+++ b/yt/cpp/mapreduce/client/operation_tracker.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/operation.h>
+
+#include <util/datetime/base.h>
+#include <util/generic/hash.h>
+#include <util/generic/maybe.h>
+#include <util/system/mutex.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TOperationExecutionTimeTracker {
+public:
+ void Start(const TOperationId& operationId);
+ TMaybe<TDuration> Finish(const TOperationId& operationId);
+ static TOperationExecutionTimeTracker* Get();
+
+private:
+ THashMap<TOperationId, TInstant> StartTimes_;
+ TMutex Lock_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/prepare_operation.cpp b/yt/cpp/mapreduce/client/prepare_operation.cpp
new file mode 100644
index 0000000000..7f772dc99a
--- /dev/null
+++ b/yt/cpp/mapreduce/client/prepare_operation.cpp
@@ -0,0 +1,286 @@
+#include "prepare_operation.h"
+
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+
+#include <yt/cpp/mapreduce/interface/serialize.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+#include <yt/cpp/mapreduce/raw_client/raw_batch_request.h>
+
+#include <library/cpp/iterator/functools.h>
+
+namespace NYT::NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TOperationPreparationContext::TOperationPreparationContext(
+ const TStructuredJobTableList& structuredInputs,
+ const TStructuredJobTableList& structuredOutputs,
+ const TClientContext& context,
+ const IClientRetryPolicyPtr& retryPolicy,
+ TTransactionId transactionId)
+ : Context_(context)
+ , RetryPolicy_(retryPolicy)
+ , TransactionId_(transactionId)
+ , InputSchemas_(structuredInputs.size())
+ , InputSchemasLoaded_(structuredInputs.size(), false)
+{
+ Inputs_.reserve(structuredInputs.size());
+ for (const auto& input : structuredInputs) {
+ Inputs_.push_back(input.RichYPath);
+ }
+ Outputs_.reserve(structuredOutputs.size());
+ for (const auto& output : structuredOutputs) {
+ Outputs_.push_back(output.RichYPath);
+ }
+}
+
+TOperationPreparationContext::TOperationPreparationContext(
+ TVector<TRichYPath> inputs,
+ TVector<TRichYPath> outputs,
+ const TClientContext& context,
+ const IClientRetryPolicyPtr& retryPolicy,
+ TTransactionId transactionId)
+ : Context_(context)
+ , RetryPolicy_(retryPolicy)
+ , TransactionId_(transactionId)
+ , InputSchemas_(inputs.size())
+ , InputSchemasLoaded_(inputs.size(), false)
+{
+ Inputs_.reserve(inputs.size());
+ for (auto& input : inputs) {
+ Inputs_.push_back(std::move(input));
+ }
+ Outputs_.reserve(outputs.size());
+ for (const auto& output : outputs) {
+ Outputs_.push_back(std::move(output));
+ }
+}
+
+int TOperationPreparationContext::GetInputCount() const
+{
+ return static_cast<int>(Inputs_.size());
+}
+
+int TOperationPreparationContext::GetOutputCount() const
+{
+ return static_cast<int>(Outputs_.size());
+}
+
+const TVector<TTableSchema>& TOperationPreparationContext::GetInputSchemas() const
+{
+ TVector<::NThreading::TFuture<TNode>> schemaFutures;
+ NRawClient::TRawBatchRequest batch(Context_.Config);
+ for (int tableIndex = 0; tableIndex < static_cast<int>(InputSchemas_.size()); ++tableIndex) {
+ if (InputSchemasLoaded_[tableIndex]) {
+ schemaFutures.emplace_back();
+ continue;
+ }
+ Y_VERIFY(Inputs_[tableIndex]);
+ schemaFutures.push_back(batch.Get(TransactionId_, Inputs_[tableIndex]->Path_ + "/@schema", TGetOptions{}));
+ }
+
+ NRawClient::ExecuteBatch(
+ RetryPolicy_->CreatePolicyForGenericRequest(),
+ Context_,
+ batch);
+
+ for (int tableIndex = 0; tableIndex < static_cast<int>(InputSchemas_.size()); ++tableIndex) {
+ if (schemaFutures[tableIndex].Initialized()) {
+ Deserialize(InputSchemas_[tableIndex], schemaFutures[tableIndex].ExtractValueSync());
+ }
+ }
+
+ return InputSchemas_;
+}
+
+const TTableSchema& TOperationPreparationContext::GetInputSchema(int index) const
+{
+ auto& schema = InputSchemas_[index];
+ if (!InputSchemasLoaded_[index]) {
+ Y_VERIFY(Inputs_[index]);
+ auto schemaNode = NRawClient::Get(
+ RetryPolicy_->CreatePolicyForGenericRequest(),
+ Context_,
+ TransactionId_,
+ Inputs_[index]->Path_ + "/@schema");
+ Deserialize(schema, schemaNode);
+ }
+ return schema;
+}
+
+TMaybe<TYPath> TOperationPreparationContext::GetInputPath(int index) const
+{
+ Y_VERIFY(index < static_cast<int>(Inputs_.size()));
+ if (Inputs_[index]) {
+ return Inputs_[index]->Path_;
+ }
+ return Nothing();
+}
+
+TMaybe<TYPath> TOperationPreparationContext::GetOutputPath(int index) const
+{
+ Y_VERIFY(index < static_cast<int>(Outputs_.size()));
+ if (Outputs_[index]) {
+ return Outputs_[index]->Path_;
+ }
+ return Nothing();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TSpeculativeOperationPreparationContext::TSpeculativeOperationPreparationContext(
+ const TVector<TTableSchema>& previousResult,
+ TStructuredJobTableList inputs,
+ TStructuredJobTableList outputs)
+ : InputSchemas_(previousResult)
+ , Inputs_(std::move(inputs))
+ , Outputs_(std::move(outputs))
+{
+ Y_VERIFY(Inputs_.size() == previousResult.size());
+}
+
+int TSpeculativeOperationPreparationContext::GetInputCount() const
+{
+ return static_cast<int>(Inputs_.size());
+}
+
+int TSpeculativeOperationPreparationContext::GetOutputCount() const
+{
+ return static_cast<int>(Outputs_.size());
+}
+
+const TVector<TTableSchema>& TSpeculativeOperationPreparationContext::GetInputSchemas() const
+{
+ return InputSchemas_;
+}
+
+const TTableSchema& TSpeculativeOperationPreparationContext::GetInputSchema(int index) const
+{
+ Y_VERIFY(index < static_cast<int>(InputSchemas_.size()));
+ return InputSchemas_[index];
+}
+
+TMaybe<TYPath> TSpeculativeOperationPreparationContext::GetInputPath(int index) const
+{
+ Y_VERIFY(index < static_cast<int>(Inputs_.size()));
+ if (Inputs_[index].RichYPath) {
+ return Inputs_[index].RichYPath->Path_;
+ }
+ return Nothing();
+}
+
+TMaybe<TYPath> TSpeculativeOperationPreparationContext::GetOutputPath(int index) const
+{
+ Y_VERIFY(index < static_cast<int>(Outputs_.size()));
+ if (Outputs_[index].RichYPath) {
+ return Outputs_[index].RichYPath->Path_;
+ }
+ return Nothing();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+static void FixInputTable(TRichYPath& table, int index, const TJobOperationPreparer& preparer)
+{
+ const auto& columnRenamings = preparer.GetInputColumnRenamings();
+ const auto& columnFilters = preparer.GetInputColumnFilters();
+
+ if (!columnRenamings[index].empty()) {
+ table.RenameColumns(columnRenamings[index]);
+ }
+ if (columnFilters[index]) {
+ table.Columns(*columnFilters[index]);
+ }
+}
+
+static void FixInputTable(TStructuredJobTable& table, int index, const TJobOperationPreparer& preparer)
+{
+ const auto& inputDescriptions = preparer.GetInputDescriptions();
+
+ if (inputDescriptions[index] && std::holds_alternative<TUnspecifiedTableStructure>(table.Description)) {
+ table.Description = *inputDescriptions[index];
+ }
+ if (table.RichYPath) {
+ FixInputTable(*table.RichYPath, index, preparer);
+ }
+}
+
+static void FixOutputTable(TRichYPath& /* table */, int /* index */, const TJobOperationPreparer& /* preparer */)
+{ }
+
+static void FixOutputTable(TStructuredJobTable& table, int index, const TJobOperationPreparer& preparer)
+{
+ const auto& outputDescriptions = preparer.GetOutputDescriptions();
+
+ if (outputDescriptions[index] && std::holds_alternative<TUnspecifiedTableStructure>(table.Description)) {
+ table.Description = *outputDescriptions[index];
+ }
+ if (table.RichYPath) {
+ FixOutputTable(*table.RichYPath, index, preparer);
+ }
+}
+
+template <typename TTables>
+TVector<TTableSchema> PrepareOperation(
+ const IJob& job,
+ const IOperationPreparationContext& context,
+ TTables* inputsPtr,
+ TTables* outputsPtr,
+ TUserJobFormatHints& hints)
+{
+ TJobOperationPreparer preparer(context);
+ job.PrepareOperation(context, preparer);
+ preparer.Finish();
+
+ if (inputsPtr) {
+ auto& inputs = *inputsPtr;
+ for (int i = 0; i < static_cast<int>(inputs.size()); ++i) {
+ FixInputTable(inputs[i], i, preparer);
+ }
+ }
+
+ if (outputsPtr) {
+ auto& outputs = *outputsPtr;
+ for (int i = 0; i < static_cast<int>(outputs.size()); ++i) {
+ FixOutputTable(outputs[i], i, preparer);
+ }
+ }
+
+ auto applyPatch = [](TMaybe<TFormatHints>& origin, const TMaybe<TFormatHints>& patch) {
+ if (origin) {
+ if (patch) {
+ origin->Merge(*patch);
+ }
+ } else {
+ origin = patch;
+ }
+ };
+
+ auto preparerHints = preparer.GetFormatHints();
+ applyPatch(preparerHints.InputFormatHints_, hints.InputFormatHints_);
+ applyPatch(preparerHints.OutputFormatHints_, hints.OutputFormatHints_);
+ hints = std::move(preparerHints);
+
+ return preparer.GetOutputSchemas();
+}
+
+template
+TVector<TTableSchema> PrepareOperation<TStructuredJobTableList>(
+ const IJob& job,
+ const IOperationPreparationContext& context,
+ TStructuredJobTableList* inputsPtr,
+ TStructuredJobTableList* outputsPtr,
+ TUserJobFormatHints& hints);
+
+template
+TVector<TTableSchema> PrepareOperation<TVector<TRichYPath>>(
+ const IJob& job,
+ const IOperationPreparationContext& context,
+ TVector<TRichYPath>* inputsPtr,
+ TVector<TRichYPath>* outputsPtr,
+ TUserJobFormatHints& hints);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail
diff --git a/yt/cpp/mapreduce/client/prepare_operation.h b/yt/cpp/mapreduce/client/prepare_operation.h
new file mode 100644
index 0000000000..3b64aa2856
--- /dev/null
+++ b/yt/cpp/mapreduce/client/prepare_operation.h
@@ -0,0 +1,93 @@
+#pragma once
+
+#include "structured_table_formats.h"
+
+#include <yt/cpp/mapreduce/interface/operation.h>
+
+namespace NYT::NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TOperationPreparationContext
+ : public IOperationPreparationContext
+{
+public:
+ TOperationPreparationContext(
+ const TStructuredJobTableList& structuredInputs,
+ const TStructuredJobTableList& structuredOutputs,
+ const TClientContext& context,
+ const IClientRetryPolicyPtr& retryPolicy,
+ TTransactionId transactionId);
+
+ TOperationPreparationContext(
+ TVector<TRichYPath> inputs,
+ TVector<TRichYPath> outputs,
+ const TClientContext& context,
+ const IClientRetryPolicyPtr& retryPolicy,
+ TTransactionId transactionId);
+
+ int GetInputCount() const override;
+ int GetOutputCount() const override;
+
+ const TVector<TTableSchema>& GetInputSchemas() const override;
+ const TTableSchema& GetInputSchema(int index) const override;
+
+ TMaybe<TYPath> GetInputPath(int index) const override;
+ TMaybe<TYPath> GetOutputPath(int index) const override;
+
+private:
+ TVector<TMaybe<TRichYPath>> Inputs_;
+ TVector<TMaybe<TRichYPath>> Outputs_;
+ const TClientContext& Context_;
+ const IClientRetryPolicyPtr RetryPolicy_;
+ TTransactionId TransactionId_;
+
+ mutable TVector<TTableSchema> InputSchemas_;
+ mutable TVector<bool> InputSchemasLoaded_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TSpeculativeOperationPreparationContext
+ : public IOperationPreparationContext
+{
+public:
+ TSpeculativeOperationPreparationContext(
+ const TVector<TTableSchema>& previousResult,
+ TStructuredJobTableList inputs,
+ TStructuredJobTableList outputs);
+
+ int GetInputCount() const override;
+ int GetOutputCount() const override;
+
+ const TVector<TTableSchema>& GetInputSchemas() const override;
+ const TTableSchema& GetInputSchema(int index) const override;
+
+ TMaybe<TYPath> GetInputPath(int index) const override;
+ TMaybe<TYPath> GetOutputPath(int index) const override;
+
+private:
+ TVector<TTableSchema> InputSchemas_;
+ TStructuredJobTableList Inputs_;
+ TStructuredJobTableList Outputs_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename TTables>
+TVector<TTableSchema> PrepareOperation(
+ const IJob& job,
+ const IOperationPreparationContext& context,
+ TTables* inputsPtr,
+ TTables* outputsPtr,
+ TUserJobFormatHints& hints);
+
+////////////////////////////////////////////////////////////////////////////////
+
+TJobOperationPreparer GetOperationPreparer(
+ const IJob& job,
+ const IOperationPreparationContext& context);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail
diff --git a/yt/cpp/mapreduce/client/py_helpers.cpp b/yt/cpp/mapreduce/client/py_helpers.cpp
new file mode 100644
index 0000000000..3072449866
--- /dev/null
+++ b/yt/cpp/mapreduce/client/py_helpers.cpp
@@ -0,0 +1,112 @@
+#include "py_helpers.h"
+
+#include "client.h"
+#include "operation.h"
+#include "transaction.h"
+
+#include <yt/cpp/mapreduce/interface/client.h>
+#include <yt/cpp/mapreduce/interface/fluent.h>
+
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+#include <yt/cpp/mapreduce/common/helpers.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+#include <util/generic/hash_set.h>
+
+namespace NYT {
+
+using namespace NDetail;
+
+////////////////////////////////////////////////////////////////////////////////
+
+IStructuredJobPtr ConstructJob(const TString& jobName, const TString& state)
+{
+ auto node = TNode();
+ if (!state.empty()) {
+ node = NodeFromYsonString(state);
+ }
+ return TJobFactory::Get()->GetConstructingFunction(jobName.data())(node);
+}
+
+TString GetJobStateString(const IStructuredJob& job)
+{
+ TString result;
+ {
+ TStringOutput output(result);
+ job.Save(output);
+ output.Finish();
+ }
+ return result;
+}
+
+TStructuredJobTableList NodeToStructuredTablePaths(const TNode& node, const TOperationPreparer& preparer)
+{
+ int intermediateTableCount = 0;
+ TVector<TRichYPath> paths;
+ for (const auto& inputNode : node.AsList()) {
+ if (inputNode.IsNull()) {
+ ++intermediateTableCount;
+ } else {
+ paths.emplace_back(inputNode.AsString());
+ }
+ }
+ paths = NRawClient::CanonizeYPaths(/* retryPolicy */ nullptr, preparer.GetContext(), paths);
+ TStructuredJobTableList result(intermediateTableCount, TStructuredJobTable::Intermediate(TUnspecifiedTableStructure()));
+ for (const auto& path : paths) {
+ result.emplace_back(TStructuredJobTable{TUnspecifiedTableStructure(), path});
+ }
+ return result;
+}
+
+TString GetIOInfo(
+ const IStructuredJob& job,
+ const TCreateClientOptions& options,
+ const TString& cluster,
+ const TString& transactionId,
+ const TString& inputPaths,
+ const TString& outputPaths,
+ const TString& neededColumns)
+{
+ auto client = NDetail::CreateClientImpl(cluster, options);
+ TOperationPreparer preparer(client, GetGuid(transactionId));
+
+ auto structuredInputs = NodeToStructuredTablePaths(NodeFromYsonString(inputPaths), preparer);
+ auto structuredOutputs = NodeToStructuredTablePaths(NodeFromYsonString(outputPaths), preparer);
+
+ auto neededColumnsNode = NodeFromYsonString(neededColumns);
+ THashSet<TString> columnsUsedInOperations;
+ for (const auto& columnNode : neededColumnsNode.AsList()) {
+ columnsUsedInOperations.insert(columnNode.AsString());
+ }
+
+ auto operationIo = CreateSimpleOperationIoHelper(
+ job,
+ preparer,
+ TOperationOptions(),
+ std::move(structuredInputs),
+ std::move(structuredOutputs),
+ TUserJobFormatHints(),
+ ENodeReaderFormat::Yson,
+ columnsUsedInOperations);
+
+ return BuildYsonStringFluently().BeginMap()
+ .Item("input_format").Value(operationIo.InputFormat.Config)
+ .Item("output_format").Value(operationIo.OutputFormat.Config)
+ .Item("input_table_paths").List(operationIo.Inputs)
+ .Item("output_table_paths").List(operationIo.Outputs)
+ .Item("small_files").DoListFor(
+ operationIo.JobFiles.begin(),
+ operationIo.JobFiles.end(),
+ [] (TFluentList fluent, auto fileIt) {
+ fluent.Item().BeginMap()
+ .Item("file_name").Value(fileIt->FileName)
+ .Item("data").Value(fileIt->Data)
+ .EndMap();
+ })
+ .EndMap();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/py_helpers.h b/yt/cpp/mapreduce/client/py_helpers.h
new file mode 100644
index 0000000000..85aa0a93f3
--- /dev/null
+++ b/yt/cpp/mapreduce/client/py_helpers.h
@@ -0,0 +1,25 @@
+#include <yt/cpp/mapreduce/interface/client_method_options.h>
+#include <yt/cpp/mapreduce/interface/operation.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+using IStructuredJobPtr = TIntrusiveConstPtr<IStructuredJob>;
+
+IStructuredJobPtr ConstructJob(const TString& jobName, const TString& state);
+
+TString GetJobStateString(const IStructuredJob& job);
+
+TString GetIOInfo(
+ const IStructuredJob& job,
+ const TCreateClientOptions& options,
+ const TString& cluster,
+ const TString& transactionId,
+ const TString& inputPaths,
+ const TString& outputPaths,
+ const TString& neededColumns);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/retry_heavy_write_request.cpp b/yt/cpp/mapreduce/client/retry_heavy_write_request.cpp
new file mode 100644
index 0000000000..b4e4975d7f
--- /dev/null
+++ b/yt/cpp/mapreduce/client/retry_heavy_write_request.cpp
@@ -0,0 +1,87 @@
+#include "retry_heavy_write_request.h"
+
+#include "transaction.h"
+#include "transaction_pinger.h"
+
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/tvm.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <yt/cpp/mapreduce/http/helpers.h>
+#include <yt/cpp/mapreduce/http/http_client.h>
+#include <yt/cpp/mapreduce/http/requests.h>
+#include <yt/cpp/mapreduce/http/retry_request.h>
+
+namespace NYT {
+
+using ::ToString;
+
+////////////////////////////////////////////////////////////////////////////////
+
+void RetryHeavyWriteRequest(
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const ITransactionPingerPtr& transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& parentId,
+ THttpHeader& header,
+ std::function<THolder<IInputStream>()> streamMaker)
+{
+ int retryCount = context.Config->RetryCount;
+ if (context.ServiceTicketAuth) {
+ header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket());
+ } else {
+ header.SetToken(context.Token);
+ }
+
+ for (int attempt = 0; attempt < retryCount; ++attempt) {
+ TPingableTransaction attemptTx(clientRetryPolicy, context, parentId, transactionPinger->GetChildTxPinger(), TStartTransactionOptions());
+
+ auto input = streamMaker();
+ TString requestId;
+
+ try {
+ auto hostName = GetProxyForHeavyRequest(context);
+ requestId = CreateGuidAsString();
+
+ header.AddTransactionId(attemptTx.GetId(), /* overwrite = */ true);
+ header.SetRequestCompression(ToString(context.Config->ContentEncoding));
+
+ auto request = context.HttpClient->StartRequest(GetFullUrl(hostName, context, header), requestId, header);
+ TransferData(input.Get(), request->GetStream());
+ request->Finish()->GetResponse();
+ } catch (TErrorResponse& e) {
+ YT_LOG_ERROR("RSP %v - attempt %v failed",
+ requestId,
+ attempt);
+
+ if (!IsRetriable(e) || attempt + 1 == retryCount) {
+ throw;
+ }
+ NDetail::TWaitProxy::Get()->Sleep(GetBackoffDuration(e, context.Config));
+ continue;
+
+ } catch (std::exception& e) {
+ YT_LOG_ERROR("RSP %v - %v - attempt %v failed",
+ requestId,
+ e.what(),
+ attempt);
+
+ if (attempt + 1 == retryCount) {
+ throw;
+ }
+ NDetail::TWaitProxy::Get()->Sleep(GetBackoffDuration(e, context.Config));
+ continue;
+ }
+
+ attemptTx.Commit();
+ return;
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/retry_heavy_write_request.h b/yt/cpp/mapreduce/client/retry_heavy_write_request.h
new file mode 100644
index 0000000000..647cad302c
--- /dev/null
+++ b/yt/cpp/mapreduce/client/retry_heavy_write_request.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/common/fwd.h>
+
+#include <yt/cpp/mapreduce/http/requests.h>
+
+namespace NYT {
+
+///////////////////////////////////////////////////////////////////////////////
+
+void RetryHeavyWriteRequest(
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const ITransactionPingerPtr& transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& parentId,
+ THttpHeader& header,
+ std::function<THolder<IInputStream>()> streamMaker);
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/retry_transaction.h b/yt/cpp/mapreduce/client/retry_transaction.h
new file mode 100644
index 0000000000..5220c222b8
--- /dev/null
+++ b/yt/cpp/mapreduce/client/retry_transaction.h
@@ -0,0 +1,71 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/http/retry_request.h>
+
+#include <yt/cpp/mapreduce/client/client.h>
+
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+namespace NYT::NDetail {
+
+template <typename TResult>
+TResult RetryTransactionWithPolicy(
+ const TClientBasePtr& client,
+ std::function<TResult(ITransactionPtr)> func,
+ IRequestRetryPolicyPtr retryPolicy)
+{
+ if (!retryPolicy) {
+ retryPolicy = CreateDefaultRequestRetryPolicy(client->GetContext().Config);
+ }
+
+ while (true) {
+ try {
+ retryPolicy->NotifyNewAttempt();
+ auto transaction = client->StartTransaction(TStartTransactionOptions());
+ if constexpr (std::is_same<TResult, void>::value) {
+ func(transaction);
+ transaction->Commit();
+ return;
+ } else {
+ auto result = func(transaction);
+ transaction->Commit();
+ return result;
+ }
+ } catch (const TErrorResponse& e) {
+ YT_LOG_ERROR("Retry failed %v - %v",
+ e.GetError().GetMessage(),
+ retryPolicy->GetAttemptDescription());
+
+ if (!IsRetriable(e)) {
+ throw;
+ }
+
+ auto maybeRetryTimeout = retryPolicy->OnRetriableError(e);
+ if (maybeRetryTimeout) {
+ TWaitProxy::Get()->Sleep(*maybeRetryTimeout);
+ } else {
+ throw;
+ }
+ } catch (const std::exception& e) {
+ YT_LOG_ERROR("Retry failed %v - %v",
+ e.what(),
+ retryPolicy->GetAttemptDescription());
+
+ if (!IsRetriable(e)) {
+ throw;
+ }
+
+ auto maybeRetryTimeout = retryPolicy->OnGenericError(e);
+ if (maybeRetryTimeout) {
+ TWaitProxy::Get()->Sleep(*maybeRetryTimeout);
+ } else {
+ throw;
+ }
+ }
+ }
+}
+
+} // namespace NYT::NDetail
diff --git a/yt/cpp/mapreduce/client/retryful_writer.cpp b/yt/cpp/mapreduce/client/retryful_writer.cpp
new file mode 100644
index 0000000000..12b2939ffa
--- /dev/null
+++ b/yt/cpp/mapreduce/client/retryful_writer.cpp
@@ -0,0 +1,163 @@
+#include "retryful_writer.h"
+
+#include "retry_heavy_write_request.h"
+
+#include <yt/cpp/mapreduce/http/requests.h>
+
+#include <yt/cpp/mapreduce/interface/errors.h>
+#include <yt/cpp/mapreduce/interface/finish_or_die.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <util/generic/size_literals.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TRetryfulWriter::~TRetryfulWriter()
+{
+ NDetail::FinishOrDie(this, "TRetryfulWriter");
+}
+
+void TRetryfulWriter::CheckWriterState()
+{
+ switch (WriterState_) {
+ case Ok:
+ break;
+ case Completed:
+ ythrow TApiUsageError() << "Cannot use table writer that is finished";
+ case Error:
+ ythrow TApiUsageError() << "Cannot use table writer that finished with error";
+ }
+}
+
+void TRetryfulWriter::NotifyRowEnd()
+{
+ CheckWriterState();
+ if (Buffer_.Size() >= BufferSize_) {
+ FlushBuffer(false);
+ }
+}
+
+void TRetryfulWriter::DoWrite(const void* buf, size_t len)
+{
+ CheckWriterState();
+ while (Buffer_.Size() + len > Buffer_.Capacity()) {
+ Buffer_.Reserve(Buffer_.Capacity() * 2);
+ }
+ Buffer_.Append(static_cast<const char*>(buf), len);
+}
+
+void TRetryfulWriter::DoFinish()
+{
+ if (WriterState_ != Ok) {
+ return;
+ }
+ FlushBuffer(true);
+ if (Started_) {
+ FilledBuffers_.Stop();
+ Thread_.Join();
+ }
+ if (Exception_) {
+ WriterState_ = Error;
+ std::rethrow_exception(Exception_);
+ }
+ if (WriteTransaction_) {
+ WriteTransaction_->Commit();
+ }
+ WriterState_ = Completed;
+}
+
+void TRetryfulWriter::FlushBuffer(bool lastBlock)
+{
+ if (!Started_) {
+ if (lastBlock) {
+ try {
+ Send(Buffer_);
+ } catch (...) {
+ WriterState_ = Error;
+ throw;
+ }
+ return;
+ } else {
+ Started_ = true;
+ Thread_.Start();
+ }
+ }
+
+ auto emptyBuffer = EmptyBuffers_.Pop();
+ if (!emptyBuffer) {
+ WriterState_ = Error;
+ std::rethrow_exception(Exception_);
+ }
+ FilledBuffers_.Push(std::move(Buffer_));
+ Buffer_ = std::move(emptyBuffer.GetRef());
+}
+
+void TRetryfulWriter::Send(const TBuffer& buffer)
+{
+ THttpHeader header("PUT", Command_);
+ header.SetInputFormat(Format_);
+ header.MergeParameters(Parameters_);
+
+ auto streamMaker = [&buffer] () {
+ return MakeHolder<TBufferInput>(buffer);
+ };
+
+ auto transactionId = (WriteTransaction_ ? WriteTransaction_->GetId() : ParentTransactionId_);
+ RetryHeavyWriteRequest(ClientRetryPolicy_, TransactionPinger_, Context_, transactionId, header, streamMaker);
+
+ Parameters_ = SecondaryParameters_; // all blocks except the first one are appended
+}
+
+void TRetryfulWriter::SendThread()
+{
+ while (auto maybeBuffer = FilledBuffers_.Pop()) {
+ auto& buffer = maybeBuffer.GetRef();
+ try {
+ Send(buffer);
+ } catch (const std::exception&) {
+ Exception_ = std::current_exception();
+ EmptyBuffers_.Stop();
+ break;
+ }
+ buffer.Clear();
+ EmptyBuffers_.Push(std::move(buffer));
+ }
+}
+
+void* TRetryfulWriter::SendThread(void* opaque)
+{
+ static_cast<TRetryfulWriter*>(opaque)->SendThread();
+ return nullptr;
+}
+
+void TRetryfulWriter::Abort()
+{
+ if (Started_) {
+ FilledBuffers_.Stop();
+ Thread_.Join();
+ }
+ if (WriteTransaction_) {
+ WriteTransaction_->Abort();
+ }
+ WriterState_ = Completed;
+}
+
+size_t TRetryfulWriter::GetBufferSize(const TMaybe<TWriterOptions>& writerOptions)
+{
+ auto retryBlockSize = TMaybe<size_t>();
+ if (writerOptions) {
+ if (writerOptions->RetryBlockSize_) {
+ retryBlockSize = *writerOptions->RetryBlockSize_;
+ } else if (writerOptions->DesiredChunkSize_) {
+ retryBlockSize = *writerOptions->DesiredChunkSize_;
+ }
+ }
+ return retryBlockSize.GetOrElse(64_MB);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/retryful_writer.h b/yt/cpp/mapreduce/client/retryful_writer.h
new file mode 100644
index 0000000000..38e351977d
--- /dev/null
+++ b/yt/cpp/mapreduce/client/retryful_writer.h
@@ -0,0 +1,130 @@
+#pragma once
+
+#include "transaction.h"
+#include "transaction_pinger.h"
+
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+#include <yt/cpp/mapreduce/http/http.h>
+#include <yt/cpp/mapreduce/interface/common.h>
+#include <yt/cpp/mapreduce/interface/io.h>
+#include <yt/cpp/mapreduce/io/helpers.h>
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+
+#include <library/cpp/threading/blocking_queue/blocking_queue.h>
+
+#include <util/stream/output.h>
+#include <util/generic/buffer.h>
+#include <util/stream/buffer.h>
+#include <util/system/thread.h>
+#include <util/system/event.h>
+
+#include <atomic>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TRetryfulWriter
+ : public TRawTableWriter
+{
+public:
+ template <class TWriterOptions>
+ TRetryfulWriter(
+ IClientRetryPolicyPtr clientRetryPolicy,
+ ITransactionPingerPtr transactionPinger,
+ const TClientContext& context,
+ const TTransactionId& parentId,
+ const TString& command,
+ const TMaybe<TFormat>& format,
+ const TRichYPath& path,
+ const TWriterOptions& options)
+ : ClientRetryPolicy_(std::move(clientRetryPolicy))
+ , TransactionPinger_(std::move(transactionPinger))
+ , Context_(context)
+ , Command_(command)
+ , Format_(format)
+ , BufferSize_(GetBufferSize(options.WriterOptions_))
+ , ParentTransactionId_(parentId)
+ , WriteTransaction_()
+ , FilledBuffers_(2)
+ , EmptyBuffers_(2)
+ , Buffer_(BufferSize_ * 2)
+ , Thread_(TThread::TParams{SendThread, this}.SetName("retryful_writer"))
+ {
+ Parameters_ = FormIORequestParameters(path, options);
+
+ auto secondaryPath = path;
+ secondaryPath.Append_ = true;
+ secondaryPath.Schema_.Clear();
+ secondaryPath.CompressionCodec_.Clear();
+ secondaryPath.ErasureCodec_.Clear();
+ secondaryPath.OptimizeFor_.Clear();
+ SecondaryParameters_ = FormIORequestParameters(secondaryPath, options);
+
+ if (options.CreateTransaction_) {
+ WriteTransaction_.ConstructInPlace(ClientRetryPolicy_, context, parentId, TransactionPinger_->GetChildTxPinger(), TStartTransactionOptions());
+ auto append = path.Append_.GetOrElse(false);
+ auto lockMode = (append ? LM_SHARED : LM_EXCLUSIVE);
+ NDetail::NRawClient::Lock(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, WriteTransaction_->GetId(), path.Path_, lockMode);
+ }
+
+ EmptyBuffers_.Push(TBuffer(BufferSize_ * 2));
+ }
+
+ ~TRetryfulWriter() override;
+ void NotifyRowEnd() override;
+ void Abort() override;
+
+ size_t GetRetryBlockRemainingSize() const
+ {
+ return (BufferSize_ > Buffer_.size()) ? (BufferSize_ - Buffer_.size()) : 0;
+ }
+
+protected:
+ void DoWrite(const void* buf, size_t len) override;
+ void DoFinish() override;
+
+private:
+ static size_t GetBufferSize(const TMaybe<TWriterOptions>& writerOptions);
+
+private:
+ const IClientRetryPolicyPtr ClientRetryPolicy_;
+ const ITransactionPingerPtr TransactionPinger_;
+ const TClientContext Context_;
+ TString Command_;
+ TMaybe<TFormat> Format_;
+ const size_t BufferSize_;
+
+ TNode Parameters_;
+ TNode SecondaryParameters_;
+
+ TTransactionId ParentTransactionId_;
+ TMaybe<TPingableTransaction> WriteTransaction_;
+
+ ::NThreading::TBlockingQueue<TBuffer> FilledBuffers_;
+ ::NThreading::TBlockingQueue<TBuffer> EmptyBuffers_;
+
+ TBuffer Buffer_;
+
+ TThread Thread_;
+ bool Started_ = false;
+ std::exception_ptr Exception_ = nullptr;
+
+ enum EWriterState {
+ Ok,
+ Completed,
+ Error,
+ } WriterState_ = Ok;
+
+private:
+ void FlushBuffer(bool lastBlock);
+ void Send(const TBuffer& buffer);
+ void CheckWriterState();
+
+ void SendThread();
+ static void* SendThread(void* opaque);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+}
diff --git a/yt/cpp/mapreduce/client/retryless_writer.cpp b/yt/cpp/mapreduce/client/retryless_writer.cpp
new file mode 100644
index 0000000000..4c25c1a1dd
--- /dev/null
+++ b/yt/cpp/mapreduce/client/retryless_writer.cpp
@@ -0,0 +1,45 @@
+#include "retryless_writer.h"
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TRetrylessWriter::~TRetrylessWriter()
+{
+ NDetail::FinishOrDie(this, "TRetrylessWriter");
+}
+
+void TRetrylessWriter::DoFinish()
+{
+ if (!Running_) {
+ return;
+ }
+ Running_ = false;
+
+ BufferedOutput_->Finish();
+ Request_->Finish()->GetResponse();
+}
+
+void TRetrylessWriter::DoWrite(const void* buf, size_t len)
+{
+ try {
+ BufferedOutput_->Write(buf, len);
+ } catch (...) {
+ Running_ = false;
+ throw;
+ }
+}
+
+void TRetrylessWriter::NotifyRowEnd()
+{ }
+
+void TRetrylessWriter::Abort()
+{
+ Running_ = false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/retryless_writer.h b/yt/cpp/mapreduce/client/retryless_writer.h
new file mode 100644
index 0000000000..baf49a258f
--- /dev/null
+++ b/yt/cpp/mapreduce/client/retryless_writer.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include "transaction.h"
+
+#include <yt/cpp/mapreduce/http/helpers.h>
+#include <yt/cpp/mapreduce/http/http.h>
+#include <yt/cpp/mapreduce/http/http_client.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/common.h>
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/io.h>
+#include <yt/cpp/mapreduce/interface/tvm.h>
+
+#include <yt/cpp/mapreduce/io/helpers.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+
+#include <util/stream/buffered.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TRetrylessWriter
+ : public TRawTableWriter
+{
+public:
+ template <class TWriterOptions>
+ TRetrylessWriter(
+ const TClientContext& context,
+ const TTransactionId& parentId,
+ const TString& command,
+ const TMaybe<TFormat>& format,
+ const TRichYPath& path,
+ size_t bufferSize,
+ const TWriterOptions& options)
+ {
+ THttpHeader header("PUT", command);
+ header.SetInputFormat(format);
+ header.MergeParameters(FormIORequestParameters(path, options));
+ header.AddTransactionId(parentId);
+ header.SetRequestCompression(ToString(context.Config->ContentEncoding));
+ if (context.ServiceTicketAuth) {
+ header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket());
+ } else {
+ header.SetToken(context.Token);
+ }
+
+ TString requestId = CreateGuidAsString();
+
+ auto hostName = GetProxyForHeavyRequest(context);
+ Request_ = context.HttpClient->StartRequest(GetFullUrl(hostName, context, header), requestId, header);
+ BufferedOutput_.Reset(new TBufferedOutput(Request_->GetStream(), bufferSize));
+ }
+
+ ~TRetrylessWriter() override;
+ void NotifyRowEnd() override;
+ void Abort() override;
+
+protected:
+ void DoWrite(const void* buf, size_t len) override;
+ void DoFinish() override;
+
+private:
+ bool Running_ = true;
+ NHttpClient::IHttpRequestPtr Request_;
+ THolder<TBufferedOutput> BufferedOutput_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/skiff.cpp b/yt/cpp/mapreduce/client/skiff.cpp
new file mode 100644
index 0000000000..67a0f960ae
--- /dev/null
+++ b/yt/cpp/mapreduce/client/skiff.cpp
@@ -0,0 +1,396 @@
+#include "skiff.h"
+
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+
+#include <yt/cpp/mapreduce/http/retry_request.h>
+#include <yt/cpp/mapreduce/http/requests.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/common.h>
+#include <yt/cpp/mapreduce/interface/serialize.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <library/cpp/yson/node/node_builder.h>
+#include <library/cpp/yson/node/node_io.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_batch_request.h>
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+
+#include <yt/cpp/mapreduce/skiff/skiff_schema.h>
+
+#include <library/cpp/yson/consumer.h>
+#include <library/cpp/yson/writer.h>
+
+#include <util/string/cast.h>
+#include <util/stream/str.h>
+#include <util/stream/file.h>
+#include <util/folder/path.h>
+
+namespace NYT {
+namespace NDetail {
+
+using namespace NRawClient;
+
+using ::ToString;
+
+////////////////////////////////////////////////////////////////////////////////
+
+static NSkiff::TSkiffSchemaPtr ReadSkiffSchema(const TString& fileName)
+{
+ if (!TFsPath(fileName).Exists()) {
+ return nullptr;
+ }
+ TIFStream input(fileName);
+ NSkiff::TSkiffSchemaPtr schema;
+ Deserialize(schema, NodeFromYsonStream(&input));
+ return schema;
+}
+
+NSkiff::TSkiffSchemaPtr GetJobInputSkiffSchema()
+{
+ return ReadSkiffSchema("skiff_input");
+}
+
+NSkiff::EWireType ValueTypeToSkiffType(EValueType valueType)
+{
+ using NSkiff::EWireType;
+ switch (valueType) {
+ case VT_INT64:
+ case VT_INT32:
+ case VT_INT16:
+ case VT_INT8:
+ return EWireType::Int64;
+
+ case VT_UINT64:
+ case VT_UINT32:
+ case VT_UINT16:
+ case VT_UINT8:
+ return EWireType::Uint64;
+
+ case VT_DOUBLE:
+ case VT_FLOAT:
+ return EWireType::Double;
+
+ case VT_BOOLEAN:
+ return EWireType::Boolean;
+
+ case VT_STRING:
+ case VT_UTF8:
+ case VT_JSON:
+ return EWireType::String32;
+
+ case VT_ANY:
+ return EWireType::Yson32;
+
+ case VT_NULL:
+ case VT_VOID:
+ return EWireType::Nothing;
+
+ case VT_DATE:
+ case VT_DATETIME:
+ case VT_TIMESTAMP:
+ return EWireType::Uint64;
+
+ case VT_INTERVAL:
+ return EWireType::Int64;
+ };
+ ythrow yexception() << "Cannot convert EValueType '" << valueType << "' to NSkiff::EWireType";
+}
+
+NSkiff::TSkiffSchemaPtr CreateSkiffSchema(
+ const TTableSchema& schema,
+ const TCreateSkiffSchemaOptions& options)
+{
+ using namespace NSkiff;
+
+ Y_ENSURE(schema.Strict(), "Cannot create Skiff schema for non-strict table schema");
+ TVector<TSkiffSchemaPtr> skiffColumns;
+ for (const auto& column: schema.Columns()) {
+ TSkiffSchemaPtr skiffColumn;
+ if (column.Type() == VT_ANY && *column.TypeV3() != *NTi::Optional(NTi::Yson())) {
+ // We ignore all complex types until YT-12717 is done.
+ return nullptr;
+ }
+ if (column.Required() || NTi::IsSingular(column.TypeV3()->GetTypeName())) {
+ skiffColumn = CreateSimpleTypeSchema(ValueTypeToSkiffType(column.Type()));
+ } else {
+ skiffColumn = CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(ValueTypeToSkiffType(column.Type()))});
+ }
+ if (options.RenameColumns_) {
+ auto maybeName = options.RenameColumns_->find(column.Name());
+ skiffColumn->SetName(maybeName == options.RenameColumns_->end() ? column.Name() : maybeName->second);
+ } else {
+ skiffColumn->SetName(column.Name());
+ }
+ skiffColumns.push_back(skiffColumn);
+ }
+
+ if (options.HasKeySwitch_) {
+ skiffColumns.push_back(
+ CreateSimpleTypeSchema(EWireType::Boolean)->SetName("$key_switch"));
+ }
+ if (options.HasRangeIndex_) {
+ skiffColumns.push_back(
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64)})
+ ->SetName("$range_index"));
+ }
+
+ skiffColumns.push_back(
+ CreateVariant8Schema({
+ CreateSimpleTypeSchema(EWireType::Nothing),
+ CreateSimpleTypeSchema(EWireType::Int64)})
+ ->SetName("$row_index"));
+
+ return CreateTupleSchema(std::move(skiffColumns));
+}
+
+NSkiff::TSkiffSchemaPtr CreateSkiffSchema(
+ const TNode& schemaNode,
+ const TCreateSkiffSchemaOptions& options)
+{
+ TTableSchema schema;
+ Deserialize(schema, schemaNode);
+ return CreateSkiffSchema(schema, options);
+}
+
+void Serialize(const NSkiff::TSkiffSchemaPtr& schema, NYson::IYsonConsumer* consumer)
+{
+ consumer->OnBeginMap();
+ if (schema->GetName().size() > 0) {
+ consumer->OnKeyedItem("name");
+ consumer->OnStringScalar(schema->GetName());
+ }
+ consumer->OnKeyedItem("wire_type");
+ consumer->OnStringScalar(ToString(schema->GetWireType()));
+ if (schema->GetChildren().size() > 0) {
+ consumer->OnKeyedItem("children");
+ consumer->OnBeginList();
+ for (const auto& child : schema->GetChildren()) {
+ consumer->OnListItem();
+ Serialize(child, consumer);
+ }
+ consumer->OnEndList();
+ }
+ consumer->OnEndMap();
+}
+
+void Deserialize(NSkiff::TSkiffSchemaPtr& schema, const TNode& node)
+{
+ using namespace NSkiff;
+
+ static auto createSchema = [](EWireType wireType, TVector<TSkiffSchemaPtr>&& children) -> TSkiffSchemaPtr {
+ switch (wireType) {
+ case EWireType::Tuple:
+ return CreateTupleSchema(std::move(children));
+ case EWireType::Variant8:
+ return CreateVariant8Schema(std::move(children));
+ case EWireType::Variant16:
+ return CreateVariant16Schema(std::move(children));
+ case EWireType::RepeatedVariant8:
+ return CreateRepeatedVariant8Schema(std::move(children));
+ case EWireType::RepeatedVariant16:
+ return CreateRepeatedVariant16Schema(std::move(children));
+ default:
+ return CreateSimpleTypeSchema(wireType);
+ }
+ };
+
+ const auto& map = node.AsMap();
+ const auto* wireTypePtr = map.FindPtr("wire_type");
+ Y_ENSURE(wireTypePtr, "'wire_type' is a required key");
+ auto wireType = FromString<NSkiff::EWireType>(wireTypePtr->AsString());
+
+ const auto* childrenPtr = map.FindPtr("children");
+ Y_ENSURE(NSkiff::IsSimpleType(wireType) || childrenPtr,
+ "'children' key is required for complex node '" << wireType << "'");
+ TVector<TSkiffSchemaPtr> children;
+ if (childrenPtr) {
+ for (const auto& childNode : childrenPtr->AsList()) {
+ TSkiffSchemaPtr childSchema;
+ Deserialize(childSchema, childNode);
+ children.push_back(std::move(childSchema));
+ }
+ }
+
+ schema = createSchema(wireType, std::move(children));
+
+ const auto* namePtr = map.FindPtr("name");
+ if (namePtr) {
+ schema->SetName(namePtr->AsString());
+ }
+}
+
+TFormat CreateSkiffFormat(const NSkiff::TSkiffSchemaPtr& schema) {
+ Y_ENSURE(schema->GetWireType() == NSkiff::EWireType::Variant16,
+ "Bad wire type for schema; expected 'variant16', got " << schema->GetWireType());
+
+ THashMap<
+ NSkiff::TSkiffSchemaPtr,
+ size_t,
+ NSkiff::TSkiffSchemaPtrHasher,
+ NSkiff::TSkiffSchemaPtrEqual> schemasMap;
+ size_t tableIndex = 0;
+ auto config = TNode("skiff");
+ config.Attributes()["table_skiff_schemas"] = TNode::CreateList();
+
+ for (const auto& schemaChild : schema->GetChildren()) {
+ auto [iter, inserted] = schemasMap.emplace(schemaChild, tableIndex);
+ size_t currentIndex;
+ if (inserted) {
+ currentIndex = tableIndex;
+ ++tableIndex;
+ } else {
+ currentIndex = iter->second;
+ }
+ config.Attributes()["table_skiff_schemas"].Add("$" + ToString(currentIndex));
+ }
+
+ config.Attributes()["skiff_schema_registry"] = TNode::CreateMap();
+
+ for (const auto& [tableSchema, index] : schemasMap) {
+ TNode node;
+ TNodeBuilder nodeBuilder(&node);
+ Serialize(tableSchema, &nodeBuilder);
+ config.Attributes()["skiff_schema_registry"][ToString(index)] = std::move(node);
+ }
+
+ return TFormat(config);
+}
+
+NSkiff::TSkiffSchemaPtr CreateSkiffSchemaIfNecessary(
+ const TClientContext& context,
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const TTransactionId& transactionId,
+ ENodeReaderFormat nodeReaderFormat,
+ const TVector<TRichYPath>& tablePaths,
+ const TCreateSkiffSchemaOptions& options)
+{
+ if (nodeReaderFormat == ENodeReaderFormat::Yson) {
+ return nullptr;
+ }
+
+ for (const auto& path : tablePaths) {
+ if (path.Columns_) {
+ switch (nodeReaderFormat) {
+ case ENodeReaderFormat::Skiff:
+ ythrow TApiUsageError() << "Cannot use Skiff format with column selectors";
+ case ENodeReaderFormat::Auto:
+ return nullptr;
+ default:
+ Y_FAIL("Unexpected node reader format: %d", static_cast<int>(nodeReaderFormat));
+ }
+ }
+ }
+
+ auto nodes = NRawClient::BatchTransform(
+ clientRetryPolicy->CreatePolicyForGenericRequest(),
+ context,
+ NRawClient::CanonizeYPaths(clientRetryPolicy->CreatePolicyForGenericRequest(), context, tablePaths),
+ [&] (TRawBatchRequest& batch, const TRichYPath& path) {
+ auto getOptions = TGetOptions()
+ .AttributeFilter(
+ TAttributeFilter()
+ .AddAttribute("schema")
+ .AddAttribute("dynamic")
+ .AddAttribute("type")
+ );
+ return batch.Get(transactionId, path.Path_, getOptions);
+ });
+
+ TVector<NSkiff::TSkiffSchemaPtr> schemas;
+ for (size_t tableIndex = 0; tableIndex < nodes.size(); ++tableIndex) {
+ const auto& tablePath = tablePaths[tableIndex].Path_;
+ const auto& attributes = nodes[tableIndex].GetAttributes();
+ Y_ENSURE_EX(attributes["type"] == TNode("table"),
+ TApiUsageError() << "Operation input path " << tablePath << " is not a table");
+ bool dynamic = attributes["dynamic"].AsBool();
+ bool strict = attributes["schema"].GetAttributes()["strict"].AsBool();
+ switch (nodeReaderFormat) {
+ case ENodeReaderFormat::Skiff:
+ Y_ENSURE_EX(strict,
+ TApiUsageError() << "Cannot use skiff format for table with non-strict schema '" << tablePath << "'");
+ Y_ENSURE_EX(!dynamic,
+ TApiUsageError() << "Cannot use skiff format for dynamic table '" << tablePath << "'");
+ break;
+ case ENodeReaderFormat::Auto:
+ if (dynamic || !strict) {
+ YT_LOG_DEBUG("Cannot use skiff format for table '%v' as it is dynamic or has non-strict schema",
+ tablePath);
+ return nullptr;
+ }
+ break;
+ default:
+ Y_FAIL("Unexpected node reader format: %d", static_cast<int>(nodeReaderFormat));
+ }
+
+ NSkiff::TSkiffSchemaPtr curSkiffSchema;
+ if (tablePaths[tableIndex].RenameColumns_) {
+ auto customOptions = options;
+ customOptions.RenameColumns(*tablePaths[tableIndex].RenameColumns_);
+ curSkiffSchema = CreateSkiffSchema(attributes["schema"], customOptions);
+ } else {
+ curSkiffSchema = CreateSkiffSchema(attributes["schema"], options);
+ }
+
+ if (!curSkiffSchema) {
+ return nullptr;
+ }
+ schemas.push_back(curSkiffSchema);
+ }
+ return NSkiff::CreateVariant16Schema(std::move(schemas));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+NSkiff::TSkiffSchemaPtr CreateSkiffSchema(
+ const TVector<NSkiff::TSkiffSchemaPtr>& tableSchemas,
+ const TCreateSkiffSchemaOptions& options
+) {
+ constexpr auto KEY_SWITCH_COLUMN = "$key_switch";
+ constexpr auto ROW_INDEX_COLUMN = "$row_index";
+ constexpr auto RANGE_INDEX_COLUMN = "$range_index";
+
+ TVector<NSkiff::TSkiffSchemaPtr> schemas;
+ schemas.reserve(tableSchemas.size());
+
+ for (const auto& tableSchema : tableSchemas) {
+ Y_ENSURE(tableSchema->GetWireType() == NSkiff::EWireType::Tuple,
+ "Expected 'tuple' wire type for table schema, got '" << tableSchema->GetWireType() << "'");
+
+ const auto& children = tableSchema->GetChildren();
+ NSkiff::TSkiffSchemaList columns;
+
+ columns.reserve(children.size() + 3);
+ if (options.HasKeySwitch_) {
+ columns.push_back(
+ CreateSimpleTypeSchema(NSkiff::EWireType::Boolean)->SetName(KEY_SWITCH_COLUMN));
+ }
+ columns.push_back(
+ NSkiff::CreateVariant8Schema({
+ CreateSimpleTypeSchema(NSkiff::EWireType::Nothing),
+ CreateSimpleTypeSchema(NSkiff::EWireType::Int64)})
+ ->SetName(ROW_INDEX_COLUMN));
+ if (options.HasRangeIndex_) {
+ columns.push_back(
+ NSkiff::CreateVariant8Schema({
+ CreateSimpleTypeSchema(NSkiff::EWireType::Nothing),
+ CreateSimpleTypeSchema(NSkiff::EWireType::Int64)})
+ ->SetName(RANGE_INDEX_COLUMN));
+ }
+ columns.insert(columns.end(), children.begin(), children.end());
+
+ schemas.push_back(NSkiff::CreateTupleSchema(columns));
+ }
+
+ return NSkiff::CreateVariant16Schema(schemas);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/skiff.h b/yt/cpp/mapreduce/client/skiff.h
new file mode 100644
index 0000000000..82d80a4967
--- /dev/null
+++ b/yt/cpp/mapreduce/client/skiff.h
@@ -0,0 +1,72 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/common/fwd.h>
+
+#include <yt/cpp/mapreduce/interface/fwd.h>
+#include <yt/cpp/mapreduce/interface/common.h>
+
+#include <yt/cpp/mapreduce/skiff/wire_type.h>
+#include <yt/cpp/mapreduce/skiff/skiff_schema.h>
+
+#include <util/generic/vector.h>
+
+namespace NYT::NYson {
+struct IYsonConsumer;
+} // namespace NYT::NYson
+
+namespace NYT {
+
+struct TClientContext;
+enum class ENodeReaderFormat : int;
+
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TCreateSkiffSchemaOptions
+{
+ using TSelf = TCreateSkiffSchemaOptions;
+
+ FLUENT_FIELD_DEFAULT(bool, HasKeySwitch, false);
+ FLUENT_FIELD_DEFAULT(bool, HasRangeIndex, false);
+
+ using TRenameColumnsDescriptor = THashMap<TString, TString>;
+ FLUENT_FIELD_OPTION(TRenameColumnsDescriptor, RenameColumns);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+NSkiff::TSkiffSchemaPtr CreateSkiffSchema(
+ const TVector<NSkiff::TSkiffSchemaPtr>& tableSchemas,
+ const TCreateSkiffSchemaOptions& options);
+
+NSkiff::TSkiffSchemaPtr GetJobInputSkiffSchema();
+
+NSkiff::EWireType ValueTypeToSkiffType(EValueType valueType);
+
+NSkiff::TSkiffSchemaPtr CreateSkiffSchema(
+ const TTableSchema& schema,
+ const TCreateSkiffSchemaOptions& options = TCreateSkiffSchemaOptions());
+
+NSkiff::TSkiffSchemaPtr CreateSkiffSchema(
+ const TNode& schemaNode,
+ const TCreateSkiffSchemaOptions& options = TCreateSkiffSchemaOptions());
+
+void Serialize(const NSkiff::TSkiffSchemaPtr& schema, NYson::IYsonConsumer* consumer);
+
+void Deserialize(NSkiff::TSkiffSchemaPtr& schema, const TNode& node);
+
+TFormat CreateSkiffFormat(const NSkiff::TSkiffSchemaPtr& schema);
+
+NSkiff::TSkiffSchemaPtr CreateSkiffSchemaIfNecessary(
+ const TClientContext& context,
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const TTransactionId& transactionId,
+ ENodeReaderFormat nodeReaderFormat,
+ const TVector<TRichYPath>& tablePaths,
+ const TCreateSkiffSchemaOptions& options = TCreateSkiffSchemaOptions());
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/structured_table_formats.cpp b/yt/cpp/mapreduce/client/structured_table_formats.cpp
new file mode 100644
index 0000000000..b6e82c6c15
--- /dev/null
+++ b/yt/cpp/mapreduce/client/structured_table_formats.cpp
@@ -0,0 +1,572 @@
+#include "structured_table_formats.h"
+
+#include "format_hints.h"
+#include "skiff.h"
+
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+
+#include <yt/cpp/mapreduce/io/yamr_table_reader.h>
+
+#include <yt/cpp/mapreduce/library/table_schema/protobuf.h>
+
+#include <yt/cpp/mapreduce/interface/common.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+
+#include <library/cpp/type_info/type_info.h>
+#include <library/cpp/yson/writer.h>
+
+#include <memory>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TMaybe<TNode> GetCommonTableFormat(
+ const TVector<TMaybe<TNode>>& formats)
+{
+ TMaybe<TNode> result;
+ bool start = true;
+ for (auto& format : formats) {
+ if (start) {
+ result = format;
+ start = false;
+ continue;
+ }
+
+ if (result.Defined() != format.Defined()) {
+ ythrow yexception() << "Different formats of input tables";
+ }
+
+ if (!result.Defined()) {
+ continue;
+ }
+
+ auto& resultAttrs = result.Get()->GetAttributes();
+ auto& formatAttrs = format.Get()->GetAttributes();
+
+ if (resultAttrs["key_column_names"] != formatAttrs["key_column_names"]) {
+ ythrow yexception() << "Different formats of input tables";
+ }
+
+ bool hasSubkeyColumns = resultAttrs.HasKey("subkey_column_names");
+ if (hasSubkeyColumns != formatAttrs.HasKey("subkey_column_names")) {
+ ythrow yexception() << "Different formats of input tables";
+ }
+
+ if (hasSubkeyColumns &&
+ resultAttrs["subkey_column_names"] != formatAttrs["subkey_column_names"])
+ {
+ ythrow yexception() << "Different formats of input tables";
+ }
+ }
+
+ return result;
+}
+
+TMaybe<TNode> GetTableFormat(
+ const IClientRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TRichYPath& path)
+{
+ auto formatPath = path.Path_ + "/@_format";
+ if (!NDetail::NRawClient::Exists(retryPolicy->CreatePolicyForGenericRequest(), context, transactionId, formatPath)) {
+ return TMaybe<TNode>();
+ }
+ TMaybe<TNode> format = NDetail::NRawClient::Get(retryPolicy->CreatePolicyForGenericRequest(), context, transactionId, formatPath);
+ if (format.Get()->AsString() != "yamred_dsv") {
+ return TMaybe<TNode>();
+ }
+ auto& formatAttrs = format.Get()->Attributes();
+ if (!formatAttrs.HasKey("key_column_names")) {
+ ythrow yexception() <<
+ "Table '" << path.Path_ << "': attribute 'key_column_names' is missing";
+ }
+ formatAttrs["has_subkey"] = "true";
+ formatAttrs["lenval"] = "true";
+ return format;
+}
+
+TMaybe<TNode> GetTableFormats(
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TVector<TRichYPath>& inputs)
+{
+ TVector<TMaybe<TNode>> formats;
+ for (auto& table : inputs) {
+ formats.push_back(GetTableFormat(clientRetryPolicy, context, transactionId, table));
+ }
+
+ return GetCommonTableFormat(formats);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+NSkiff::TSkiffSchemaPtr TryCreateSkiffSchema(
+ const TClientContext& context,
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const TTransactionId& transactionId,
+ const TVector<TRichYPath>& tables,
+ const TOperationOptions& options,
+ ENodeReaderFormat nodeReaderFormat)
+{
+ bool hasInputQuery = options.Spec_.Defined() && options.Spec_->IsMap() && options.Spec_->HasKey("input_query");
+ if (hasInputQuery) {
+ Y_ENSURE_EX(nodeReaderFormat != ENodeReaderFormat::Skiff,
+ TApiUsageError() << "Cannot use Skiff format for operations with 'input_query' in spec");
+ return nullptr;
+ }
+ return CreateSkiffSchemaIfNecessary(
+ context,
+ clientRetryPolicy,
+ transactionId,
+ nodeReaderFormat,
+ tables,
+ TCreateSkiffSchemaOptions()
+ .HasKeySwitch(true)
+ .HasRangeIndex(true));
+}
+
+TString CreateSkiffConfig(const NSkiff::TSkiffSchemaPtr& schema)
+{
+ TString result;
+ TStringOutput stream(result);
+ ::NYson::TYsonWriter writer(&stream);
+ Serialize(schema, &writer);
+ return result;
+}
+
+TString CreateProtoConfig(const TVector<const ::google::protobuf::Descriptor*>& descriptorList)
+{
+ TString result;
+ TStringOutput messageTypeList(result);
+ for (const auto& descriptor : descriptorList) {
+ messageTypeList << descriptor->full_name() << Endl;
+ }
+ return result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TGetTableStructureDescriptionStringImpl {
+ template<typename T>
+ TString operator()(const T& description) {
+ if constexpr (std::is_same_v<T, TUnspecifiedTableStructure>) {
+ return "Unspecified";
+ } else if constexpr (std::is_same_v<T, TProtobufTableStructure>) {
+ TString res;
+ TStringStream out(res);
+ if (description.Descriptor) {
+ out << description.Descriptor->full_name();
+ } else {
+ out << "<unknown>";
+ }
+ out << " protobuf message";
+ return res;
+ } else {
+ static_assert(TDependentFalse<T>, "Unknown type");
+ }
+ }
+};
+
+TString GetTableStructureDescriptionString(const TTableStructure& tableStructure)
+{
+ return std::visit(TGetTableStructureDescriptionStringImpl(), tableStructure);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TString JobTablePathString(const TStructuredJobTable& jobTable)
+{
+ if (jobTable.RichYPath) {
+ return jobTable.RichYPath->Path_;
+ } else {
+ return "<intermediate-table>";
+ }
+}
+
+TStructuredJobTableList ToStructuredJobTableList(const TVector<TStructuredTablePath>& tableList)
+{
+ TStructuredJobTableList result;
+ for (const auto& table : tableList) {
+ result.push_back(TStructuredJobTable{table.Description, table.RichYPath});
+ }
+ return result;
+}
+
+TStructuredJobTableList CanonizeStructuredTableList(const TClientContext& context, const TVector<TStructuredTablePath>& tableList)
+{
+ TVector<TRichYPath> toCanonize;
+ toCanonize.reserve(tableList.size());
+ for (const auto& table : tableList) {
+ toCanonize.emplace_back(table.RichYPath);
+ }
+ const auto canonized = NRawClient::CanonizeYPaths(/* retryPolicy */ nullptr, context, toCanonize);
+ Y_VERIFY(canonized.size() == tableList.size());
+
+ TStructuredJobTableList result;
+ result.reserve(tableList.size());
+ for (size_t i = 0; i != canonized.size(); ++i) {
+ result.emplace_back(TStructuredJobTable{tableList[i].Description, canonized[i]});
+ }
+ return result;
+}
+
+TVector<TRichYPath> GetPathList(
+ const TStructuredJobTableList& tableList,
+ const TMaybe<TVector<TTableSchema>>& jobSchemaInferenceResult,
+ bool inferSchemaFromDescriptions)
+{
+ Y_VERIFY(!jobSchemaInferenceResult || tableList.size() == jobSchemaInferenceResult->size());
+
+ auto maybeInferSchema = [&] (const TStructuredJobTable& table, ui32 tableIndex) -> TMaybe<TTableSchema> {
+ if (jobSchemaInferenceResult && !jobSchemaInferenceResult->at(tableIndex).Empty()) {
+ return jobSchemaInferenceResult->at(tableIndex);
+ }
+ if (inferSchemaFromDescriptions) {
+ return GetTableSchema(table.Description);
+ }
+ return Nothing();
+ };
+
+ TVector<TRichYPath> result;
+ result.reserve(tableList.size());
+ for (size_t tableIndex = 0; tableIndex != tableList.size(); ++tableIndex) {
+ const auto& table = tableList[tableIndex];
+ Y_VERIFY(table.RichYPath, "Cannot get path for intermediate table");
+ auto richYPath = *table.RichYPath;
+ if (!richYPath.Schema_) {
+ if (auto schema = maybeInferSchema(table, tableIndex)) {
+ richYPath.Schema(std::move(*schema));
+ }
+ }
+
+ result.emplace_back(std::move(richYPath));
+ }
+ return result;
+}
+
+
+TStructuredRowStreamDescription GetJobStreamDescription(
+ const IStructuredJob& job,
+ EIODirection direction)
+{
+ switch (direction) {
+ case EIODirection::Input:
+ return job.GetInputRowStreamDescription();
+ case EIODirection::Output:
+ return job.GetOutputRowStreamDescription();
+ default:
+ Y_FAIL("unreachable");
+ }
+}
+
+TString GetSuffix(EIODirection direction)
+{
+ switch (direction) {
+ case EIODirection::Input:
+ return "_input";
+ case EIODirection::Output:
+ return "_output";
+ }
+ Y_FAIL("unreachable");
+}
+
+TString GetAddIOMethodName(EIODirection direction)
+{
+ switch (direction) {
+ case EIODirection::Input:
+ return "AddInput<>";
+ case EIODirection::Output:
+ return "AddOutput<>";
+ }
+ Y_FAIL("unreachable");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TFormatBuilder::TFormatSwitcher
+{
+ template <typename T>
+ auto operator() (const T& /*t*/) {
+ if constexpr (std::is_same_v<T, TTNodeStructuredRowStream>) {
+ return &TFormatBuilder::CreateNodeFormat;
+ } else if constexpr (std::is_same_v<T, TTYaMRRowStructuredRowStream>) {
+ return &TFormatBuilder::CreateYamrFormat;
+ } else if constexpr (std::is_same_v<T, TProtobufStructuredRowStream>) {
+ return &TFormatBuilder::CreateProtobufFormat;
+ } else if constexpr (std::is_same_v<T, TVoidStructuredRowStream>) {
+ return &TFormatBuilder::CreateVoidFormat;
+ } else {
+ static_assert(TDependentFalse<T>, "unknown stream description");
+ }
+ }
+};
+
+TFormatBuilder::TFormatBuilder(
+ IClientRetryPolicyPtr clientRetryPolicy,
+ TClientContext context,
+ TTransactionId transactionId,
+ TOperationOptions operationOptions)
+ : ClientRetryPolicy_(std::move(clientRetryPolicy))
+ , Context_(std::move(context))
+ , TransactionId_(transactionId)
+ , OperationOptions_(std::move(operationOptions))
+{ }
+
+std::pair <TFormat, TMaybe<TSmallJobFile>> TFormatBuilder::CreateFormat(
+ const IStructuredJob& job,
+ const EIODirection& direction,
+ const TStructuredJobTableList& structuredTableList,
+ const TMaybe <TFormatHints>& formatHints,
+ ENodeReaderFormat nodeReaderFormat,
+ bool allowFormatFromTableAttribute)
+{
+ auto jobStreamDescription = GetJobStreamDescription(job, direction);
+ auto method = std::visit(TFormatSwitcher(), jobStreamDescription);
+ return (this->*method)(
+ job,
+ direction,
+ structuredTableList,
+ formatHints,
+ nodeReaderFormat,
+ allowFormatFromTableAttribute);
+}
+
+std::pair<TFormat, TMaybe<TSmallJobFile>> TFormatBuilder::CreateVoidFormat(
+ const IStructuredJob& /*job*/,
+ const EIODirection& /*direction*/,
+ const TStructuredJobTableList& /*structuredTableList*/,
+ const TMaybe<TFormatHints>& /*formatHints*/,
+ ENodeReaderFormat /*nodeReaderFormat*/,
+ bool /*allowFormatFromTableAttribute*/)
+{
+ return {
+ TFormat(),
+ Nothing()
+ };
+}
+
+std::pair<TFormat, TMaybe<TSmallJobFile>> TFormatBuilder::CreateYamrFormat(
+ const IStructuredJob& job,
+ const EIODirection& direction,
+ const TStructuredJobTableList& structuredTableList,
+ const TMaybe<TFormatHints>& /*formatHints*/,
+ ENodeReaderFormat /*nodeReaderFormat*/,
+ bool allowFormatFromTableAttribute)
+{
+ for (const auto& table: structuredTableList) {
+ if (!std::holds_alternative<TUnspecifiedTableStructure>(table.Description)) {
+ ythrow TApiUsageError()
+ << "cannot use " << direction << " table '" << JobTablePathString(table)
+ << "' with job " << TJobFactory::Get()->GetJobName(&job) << "; "
+ << "table has unsupported structure description; check " << GetAddIOMethodName(direction) << " for this table";
+ }
+ }
+ TMaybe<TNode> formatFromTableAttributes;
+ if (allowFormatFromTableAttribute && OperationOptions_.UseTableFormats_) {
+ TVector<TRichYPath> tableList;
+ for (const auto& table: structuredTableList) {
+ Y_VERIFY(table.RichYPath, "Cannot use format from table for intermediate table");
+ tableList.push_back(*table.RichYPath);
+ }
+ formatFromTableAttributes = GetTableFormats(ClientRetryPolicy_, Context_, TransactionId_, tableList);
+ }
+ if (formatFromTableAttributes) {
+ return {
+ TFormat(*formatFromTableAttributes),
+ Nothing()
+ };
+ } else {
+ auto formatNode = TNode("yamr");
+ formatNode.Attributes() = TNode()
+ ("lenval", true)
+ ("has_subkey", true)
+ ("enable_table_index", true);
+ return {
+ TFormat(formatNode),
+ Nothing()
+ };
+ }
+}
+
+std::pair<TFormat, TMaybe<TSmallJobFile>> TFormatBuilder::CreateNodeFormat(
+ const IStructuredJob& job,
+ const EIODirection& direction,
+ const TStructuredJobTableList& structuredTableList,
+ const TMaybe<TFormatHints>& formatHints,
+ ENodeReaderFormat nodeReaderFormat,
+ bool /*allowFormatFromTableAttribute*/)
+{
+ for (const auto& table: structuredTableList) {
+ if (!std::holds_alternative<TUnspecifiedTableStructure>(table.Description)) {
+ ythrow TApiUsageError()
+ << "cannot use " << direction << " table '" << JobTablePathString(table)
+ << "' with job " << TJobFactory::Get()->GetJobName(&job) << "; "
+ << "table has unsupported structure description; check AddInput<> / AddOutput<> for this table";
+ }
+ }
+ NSkiff::TSkiffSchemaPtr skiffSchema = nullptr;
+ if (nodeReaderFormat != ENodeReaderFormat::Yson) {
+ TVector<TRichYPath> tableList;
+ for (const auto& table: structuredTableList) {
+ Y_VERIFY(table.RichYPath, "Cannot use skiff with temporary tables");
+ tableList.emplace_back(*table.RichYPath);
+ }
+ skiffSchema = TryCreateSkiffSchema(
+ Context_,
+ ClientRetryPolicy_,
+ TransactionId_,
+ tableList,
+ OperationOptions_,
+ nodeReaderFormat);
+ }
+ if (skiffSchema) {
+ auto format = CreateSkiffFormat(skiffSchema);
+ NYT::NDetail::ApplyFormatHints<TNode>(&format, formatHints);
+ return {
+ CreateSkiffFormat(skiffSchema),
+ TSmallJobFile{
+ TString("skiff") + GetSuffix(direction),
+ CreateSkiffConfig(skiffSchema)
+ }
+ };
+ } else {
+ auto format = TFormat::YsonBinary();
+ NYT::NDetail::ApplyFormatHints<TNode>(&format, formatHints);
+ return {
+ format,
+ Nothing()
+ };
+ }
+}
+
+[[noreturn]] static void ThrowUnsupportedStructureDescription(
+ const EIODirection& direction,
+ const TStructuredJobTable& table,
+ const IStructuredJob& job)
+{
+ ythrow TApiUsageError()
+ << "cannot use " << direction << " table '" << JobTablePathString(table)
+ << "' with job " << TJobFactory::Get()->GetJobName(&job) << "; "
+ << "table has unsupported structure description; check " << GetAddIOMethodName(direction) << " for this table";
+}
+
+[[noreturn]] static void ThrowTypeDeriveFail(
+ const EIODirection& direction,
+ const IStructuredJob& job,
+ const TString& type)
+{
+ ythrow TApiUsageError()
+ << "Cannot derive exact " << type << " type for intermediate " << direction << " table for job "
+ << TJobFactory::Get()->GetJobName(&job)
+ << "; use one of TMapReduceOperationSpec::Hint* methods to specifiy intermediate table structure";
+}
+
+[[noreturn]] static void ThrowUnexpectedDifferentDescriptors(
+ const EIODirection& direction,
+ const TStructuredJobTable& table,
+ const IStructuredJob& job,
+ const TMaybe<TStringBuf> jobDescriptorName,
+ const TMaybe<TStringBuf> descriptorName)
+{
+ ythrow TApiUsageError()
+ << "Job " << TJobFactory::Get()->GetJobName(&job) << " expects "
+ << jobDescriptorName << " as " << direction << ", but table " << JobTablePathString(table)
+ << " is tagged with " << descriptorName;
+}
+
+std::pair<TFormat, TMaybe<TSmallJobFile>> TFormatBuilder::CreateProtobufFormat(
+ const IStructuredJob& job,
+ const EIODirection& direction,
+ const TStructuredJobTableList& structuredTableList,
+ const TMaybe<TFormatHints>& /*formatHints*/,
+ ENodeReaderFormat /*nodeReaderFormat*/,
+ bool /*allowFormatFromTableAttribute*/)
+{
+ if (Context_.Config->UseClientProtobuf) {
+ return {
+ TFormat::YsonBinary(),
+ TSmallJobFile{
+ TString("proto") + GetSuffix(direction),
+ CreateProtoConfig({}),
+ },
+ };
+ }
+ const ::google::protobuf::Descriptor* const jobDescriptor =
+ std::get<TProtobufStructuredRowStream>(GetJobStreamDescription(job, direction)).Descriptor;
+ Y_ENSURE(!structuredTableList.empty(),
+ "empty " << direction << " tables for job " << TJobFactory::Get()->GetJobName(&job));
+
+ TVector<const ::google::protobuf::Descriptor*> descriptorList;
+ for (const auto& table : structuredTableList) {
+ const ::google::protobuf::Descriptor* descriptor = nullptr;
+ if (std::holds_alternative<TProtobufTableStructure>(table.Description)) {
+ descriptor = std::get<TProtobufTableStructure>(table.Description).Descriptor;
+ } else if (table.RichYPath) {
+ ThrowUnsupportedStructureDescription(direction, table, job);
+ }
+ if (!descriptor) {
+ // It must be intermediate table, because there is no proper way to add such table to spec
+ // (AddInput requires to specify proper message).
+ Y_VERIFY(!table.RichYPath, "Descriptors for all tables except intermediate must be known");
+ if (jobDescriptor) {
+ descriptor = jobDescriptor;
+ } else {
+ ThrowTypeDeriveFail(direction, job, "protobuf");
+ }
+ }
+ if (jobDescriptor && descriptor != jobDescriptor) {
+ ThrowUnexpectedDifferentDescriptors(
+ direction,
+ table,
+ job,
+ jobDescriptor->full_name(),
+ descriptor->full_name());
+ }
+ descriptorList.push_back(descriptor);
+ }
+ Y_VERIFY(!descriptorList.empty(), "Messages for proto format are unknown (empty ProtoDescriptors)");
+ return {
+ TFormat::Protobuf(descriptorList, Context_.Config->ProtobufFormatWithDescriptors),
+ TSmallJobFile{
+ TString("proto") + GetSuffix(direction),
+ CreateProtoConfig(descriptorList)
+ },
+ };
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TGetTableSchemaImpl
+{
+ template <typename T>
+ TMaybe<TTableSchema> operator() (const T& description) {
+ if constexpr (std::is_same_v<T, TUnspecifiedTableStructure>) {
+ return Nothing();
+ } else if constexpr (std::is_same_v<T, TProtobufTableStructure>) {
+ if (!description.Descriptor) {
+ return Nothing();
+ }
+ return CreateTableSchema(*description.Descriptor);
+ } else {
+ static_assert(TDependentFalse<T>, "unknown type");
+ }
+ }
+};
+
+TMaybe<TTableSchema> GetTableSchema(const TTableStructure& tableStructure)
+{
+ return std::visit(TGetTableSchemaImpl(), tableStructure);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/structured_table_formats.h b/yt/cpp/mapreduce/client/structured_table_formats.h
new file mode 100644
index 0000000000..27d980c587
--- /dev/null
+++ b/yt/cpp/mapreduce/client/structured_table_formats.h
@@ -0,0 +1,146 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/fwd.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/operation.h>
+
+#include <yt/cpp/mapreduce/common/fwd.h>
+
+#include <yt/cpp/mapreduce/http/context.h>
+#include <yt/cpp/mapreduce/http/requests.h>
+
+#include <utility>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TMaybe<TNode> GetCommonTableFormat(
+ const TVector<TMaybe<TNode>>& formats);
+
+TMaybe<TNode> GetTableFormat(
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TRichYPath& path);
+
+TMaybe<TNode> GetTableFormats(
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TVector<TRichYPath>& paths);
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+enum class EIODirection
+{
+ Input,
+ Output,
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TSmallJobFile
+{
+ TString FileName;
+ TString Data;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Table that is used while preparing operation formats. Can be real table or intermediate
+struct TStructuredJobTable
+{
+ TTableStructure Description;
+ // Might be null for intermediate tables in MapReduce operation
+ TMaybe<TRichYPath> RichYPath;
+
+ static TStructuredJobTable Intermediate(TTableStructure description)
+ {
+ return TStructuredJobTable{std::move(description), Nothing()};
+ }
+};
+using TStructuredJobTableList = TVector<TStructuredJobTable>;
+TString JobTablePathString(const TStructuredJobTable& jobTable);
+TStructuredJobTableList ToStructuredJobTableList(const TVector<TStructuredTablePath>& tableList);
+
+TStructuredJobTableList CanonizeStructuredTableList(const TClientContext& context, const TVector<TStructuredTablePath>& tableList);
+TVector<TRichYPath> GetPathList(
+ const TStructuredJobTableList& tableList,
+ const TMaybe<TVector<TTableSchema>>& schemaInferenceResult,
+ bool inferSchema);
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TFormatBuilder
+{
+private:
+ struct TFormatSwitcher;
+
+public:
+ TFormatBuilder(
+ IClientRetryPolicyPtr clientRetryPolicy,
+ TClientContext context,
+ TTransactionId transactionId,
+ TOperationOptions operationOptions);
+
+ std::pair<TFormat, TMaybe<TSmallJobFile>> CreateFormat(
+ const IStructuredJob& job,
+ const EIODirection& direction,
+ const TStructuredJobTableList& structuredTableList,
+ const TMaybe<TFormatHints>& formatHints,
+ ENodeReaderFormat nodeReaderFormat,
+ bool allowFormatFromTableAttribute);
+
+ std::pair<TFormat, TMaybe<TSmallJobFile>> CreateVoidFormat(
+ const IStructuredJob& job,
+ const EIODirection& direction,
+ const TStructuredJobTableList& structuredTableList,
+ const TMaybe<TFormatHints>& formatHints,
+ ENodeReaderFormat nodeReaderFormat,
+ bool allowFormatFromTableAttribute);
+
+ std::pair<TFormat, TMaybe<TSmallJobFile>> CreateYamrFormat(
+ const IStructuredJob& job,
+ const EIODirection& direction,
+ const TStructuredJobTableList& structuredTableList,
+ const TMaybe<TFormatHints>& formatHints,
+ ENodeReaderFormat nodeReaderFormat,
+ bool allowFormatFromTableAttribute);
+
+ std::pair<TFormat, TMaybe<TSmallJobFile>> CreateNodeFormat(
+ const IStructuredJob& job,
+ const EIODirection& direction,
+ const TStructuredJobTableList& structuredTableList,
+ const TMaybe<TFormatHints>& formatHints,
+ ENodeReaderFormat nodeReaderFormat,
+ bool allowFormatFromTableAttribute);
+
+ std::pair<TFormat, TMaybe<TSmallJobFile>> CreateProtobufFormat(
+ const IStructuredJob& job,
+ const EIODirection& direction,
+ const TStructuredJobTableList& structuredTableList,
+ const TMaybe<TFormatHints>& formatHints,
+ ENodeReaderFormat nodeReaderFormat,
+ bool allowFormatFromTableAttribute);
+
+private:
+ const IClientRetryPolicyPtr ClientRetryPolicy_;
+ const TClientContext Context_;
+ const TTransactionId TransactionId_;
+ const TOperationOptions OperationOptions_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+TMaybe<TTableSchema> GetTableSchema(const TTableStructure& tableStructure);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/transaction.cpp b/yt/cpp/mapreduce/client/transaction.cpp
new file mode 100644
index 0000000000..0aa1a7a1c3
--- /dev/null
+++ b/yt/cpp/mapreduce/client/transaction.cpp
@@ -0,0 +1,195 @@
+#include "transaction.h"
+
+#include "transaction_pinger.h"
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/error_codes.h>
+
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+
+#include <yt/cpp/mapreduce/http/requests.h>
+#include <yt/cpp/mapreduce/http/retry_request.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+
+#include <util/datetime/base.h>
+
+#include <util/generic/scope.h>
+
+#include <util/random/random.h>
+
+#include <util/string/builder.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TPingableTransaction::TPingableTransaction(
+ const IClientRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& parentId,
+ ITransactionPingerPtr transactionPinger,
+ const TStartTransactionOptions& options)
+ : ClientRetryPolicy_(retryPolicy)
+ , Context_(context)
+ , AbortableRegistry_(NDetail::TAbortableRegistry::Get())
+ , AbortOnTermination_(true)
+ , AutoPingable_(options.AutoPingable_)
+ , Pinger_(std::move(transactionPinger))
+{
+ auto transactionId = NDetail::NRawClient::StartTransaction(
+ ClientRetryPolicy_->CreatePolicyForGenericRequest(),
+ context,
+ parentId,
+ options);
+
+ auto actualTimeout = options.Timeout_.GetOrElse(Context_.Config->TxTimeout);
+ Init(context, transactionId, actualTimeout);
+}
+
+TPingableTransaction::TPingableTransaction(
+ const IClientRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ ITransactionPingerPtr transactionPinger,
+ const TAttachTransactionOptions& options)
+ : ClientRetryPolicy_(retryPolicy)
+ , Context_(context)
+ , AbortableRegistry_(NDetail::TAbortableRegistry::Get())
+ , AbortOnTermination_(options.AbortOnTermination_)
+ , AutoPingable_(options.AutoPingable_)
+ , Pinger_(std::move(transactionPinger))
+{
+ auto timeoutNode = NDetail::NRawClient::TryGet(
+ ClientRetryPolicy_->CreatePolicyForGenericRequest(),
+ context,
+ TTransactionId(),
+ "#" + GetGuidAsString(transactionId) + "/@timeout",
+ TGetOptions());
+ if (timeoutNode.IsUndefined()) {
+ throw yexception() << "Transaction " << GetGuidAsString(transactionId) << " does not exist";
+ }
+ auto timeout = TDuration::MilliSeconds(timeoutNode.AsInt64());
+ Init(context, transactionId, timeout);
+}
+
+void TPingableTransaction::Init(
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ TDuration timeout)
+{
+ TransactionId_ = transactionId;
+
+ if (AbortOnTermination_) {
+ AbortableRegistry_->Add(
+ TransactionId_,
+ ::MakeIntrusive<NDetail::TTransactionAbortable>(context, TransactionId_));
+ }
+
+ if (AutoPingable_) {
+ // Compute 'MaxPingInterval_' and 'MinPingInterval_' such that 'pingInterval == (max + min) / 2'.
+ auto pingInterval = Context_.Config->PingInterval;
+ auto safeTimeout = timeout - TDuration::Seconds(5);
+ MaxPingInterval_ = Max(pingInterval, Min(safeTimeout, pingInterval * 1.5));
+ MinPingInterval_ = pingInterval - (MaxPingInterval_ - pingInterval);
+
+ Pinger_->RegisterTransaction(*this);
+ }
+}
+
+TPingableTransaction::~TPingableTransaction()
+{
+ try {
+ Stop(AbortOnTermination_ ? EStopAction::Abort : EStopAction::Detach);
+ } catch (...) {
+ }
+}
+
+const TTransactionId TPingableTransaction::GetId() const
+{
+ return TransactionId_;
+}
+
+const std::pair<TDuration, TDuration> TPingableTransaction::GetPingInterval() const {
+ return {MinPingInterval_, MaxPingInterval_};
+}
+
+const TClientContext TPingableTransaction::GetContext() const {
+ return Context_;
+}
+
+void TPingableTransaction::Commit()
+{
+ Stop(EStopAction::Commit);
+}
+
+void TPingableTransaction::Abort()
+{
+ Stop(EStopAction::Abort);
+}
+
+void TPingableTransaction::Detach()
+{
+ Stop(EStopAction::Detach);
+}
+
+void TPingableTransaction::Stop(EStopAction action)
+{
+ if (Finalized_) {
+ return;
+ }
+
+ Y_DEFER {
+ Finalized_ = true;
+ if (AutoPingable_ && Pinger_->HasTransaction(*this)) {
+ Pinger_->RemoveTransaction(*this);
+ }
+ };
+
+ switch (action) {
+ case EStopAction::Commit:
+ NDetail::NRawClient::CommitTransaction(
+ ClientRetryPolicy_->CreatePolicyForGenericRequest(),
+ Context_,
+ TransactionId_);
+ break;
+ case EStopAction::Abort:
+ NDetail::NRawClient::AbortTransaction(
+ ClientRetryPolicy_->CreatePolicyForGenericRequest(),
+ Context_,
+ TransactionId_);
+ break;
+ case EStopAction::Detach:
+ // Do nothing.
+ break;
+ }
+
+ AbortableRegistry_->Remove(TransactionId_);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TYPath Snapshot(
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path)
+{
+ auto lockId = NDetail::NRawClient::Lock(
+ clientRetryPolicy->CreatePolicyForGenericRequest(),
+ context,
+ transactionId,
+ path,
+ ELockMode::LM_SNAPSHOT);
+ auto lockedNodeId = NDetail::NRawClient::Get(
+ clientRetryPolicy->CreatePolicyForGenericRequest(),
+ context,
+ transactionId,
+ ::TStringBuilder() << '#' << GetGuidAsString(lockId) << "/@node_id");
+ return "#" + lockedNodeId.AsString();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/transaction.h b/yt/cpp/mapreduce/client/transaction.h
new file mode 100644
index 0000000000..559fca619e
--- /dev/null
+++ b/yt/cpp/mapreduce/client/transaction.h
@@ -0,0 +1,95 @@
+#pragma once
+
+#include "abortable_registry.h"
+
+#include <yt/cpp/mapreduce/http/requests.h>
+#include <yt/cpp/mapreduce/http/retry_request.h>
+
+#include <util/datetime/base.h>
+#include <util/generic/maybe.h>
+#include <util/generic/ptr.h>
+#include <util/system/thread.h>
+
+#include <atomic>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TPingableTransaction
+{
+public:
+ //
+ // Start a new transaction.
+ TPingableTransaction(
+ const IClientRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& parentId,
+ ITransactionPingerPtr transactionPinger,
+ const TStartTransactionOptions& options);
+
+ //
+ // Attach to an existing transaction.
+ TPingableTransaction(
+ const IClientRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ ITransactionPingerPtr transactionPinger,
+ const TAttachTransactionOptions& options);
+
+ ~TPingableTransaction();
+
+ const TTransactionId GetId() const;
+
+ const std::pair<TDuration, TDuration> GetPingInterval() const;
+ const TClientContext GetContext() const;
+
+ void Commit();
+ void Abort();
+ void Detach();
+
+
+private:
+ enum class EStopAction
+ {
+ Detach,
+ Abort,
+ Commit,
+ };
+
+private:
+ IClientRetryPolicyPtr ClientRetryPolicy_;
+ TClientContext Context_;
+ TTransactionId TransactionId_;
+ TDuration MinPingInterval_;
+ TDuration MaxPingInterval_;
+
+ // We have to own an IntrusivePtr to registry to prevent use-after-free.
+ ::TIntrusivePtr<NDetail::TAbortableRegistry> AbortableRegistry_;
+
+ bool AbortOnTermination_;
+
+ bool AutoPingable_;
+ bool Finalized_ = false;
+ ITransactionPingerPtr Pinger_;
+
+private:
+ void Init(
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ TDuration timeout);
+
+ void Stop(EStopAction action);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+TYPath Snapshot(
+ const IClientRetryPolicyPtr& clientRetryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/transaction_pinger.cpp b/yt/cpp/mapreduce/client/transaction_pinger.cpp
new file mode 100644
index 0000000000..2b51e47f9f
--- /dev/null
+++ b/yt/cpp/mapreduce/client/transaction_pinger.cpp
@@ -0,0 +1,321 @@
+#include "transaction_pinger.h"
+
+#include "transaction.h"
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/error_codes.h>
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+
+#include <yt/cpp/mapreduce/http/requests.h>
+#include <yt/cpp/mapreduce/http/retry_request.h>
+
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+
+#if defined(__x86_64__) || defined(__arm64__)
+ #include <yt/yt/core/concurrency/periodic_executor.h>
+ #include <yt/yt/core/concurrency/poller.h>
+ #include <yt/yt/core/concurrency/scheduler_api.h>
+ #include <yt/yt/core/concurrency/thread_pool_poller.h>
+ #include <yt/yt/core/concurrency/thread_pool.h>
+
+ #include <yt/yt/core/http/client.h>
+ #include <yt/yt/core/http/http.h>
+#endif // defined(__x86_64__) || defined(__arm64__)
+
+#include <library/cpp/yson/node/node_io.h>
+
+#include <library/cpp/yt/threading/spin_lock.h>
+#include <library/cpp/yt/assert/assert.h>
+
+#include <util/datetime/base.h>
+#include <util/random/random.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__x86_64__) || defined(__arm64__)
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+void CheckError(const TString& requestId, NHttp::IResponsePtr response)
+{
+ TErrorResponse errorResponse(static_cast<int>(response->GetStatusCode()), requestId);
+
+ if (const auto* ytError = response->GetHeaders()->Find("X-YT-Error")) {
+ errorResponse.ParseFromJsonError(*ytError);
+ }
+ if (errorResponse.IsOk()) {
+ return;
+ }
+
+ YT_LOG_ERROR("RSP %v - HTTP %v - %v",
+ requestId,
+ response->GetStatusCode(),
+ errorResponse.AsStrBuf());
+
+ ythrow errorResponse;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+
+void PingTx(NHttp::IClientPtr httpClient, const TPingableTransaction& tx)
+{
+ auto url = TString::Join("http://", tx.GetContext().ServerName, "/api/", tx.GetContext().Config->ApiVersion, "/ping_tx");
+ auto headers = New<NHttp::THeaders>();
+ auto requestId = CreateGuidAsString();
+
+ headers->Add("Host", url);
+ headers->Add("User-Agent", TProcessState::Get()->ClientVersion);
+
+ const auto& token = tx.GetContext().Token;
+ if (!token.empty()) {
+ headers->Add("Authorization", "OAuth " + token);
+ }
+
+ headers->Add("Transfer-Encoding", "chunked");
+ headers->Add("X-YT-Correlation-Id", requestId);
+ headers->Add("X-YT-Header-Format", "<format=text>yson");
+ headers->Add("Content-Encoding", "identity");
+ headers->Add("Accept-Encoding", "identity");
+
+ TNode node;
+ node["transaction_id"] = GetGuidAsString(tx.GetId());
+ auto strParams = NodeToYsonString(node);
+
+ YT_LOG_DEBUG("REQ %v - sending request (HostName: %v; Method POST %v; X-YT-Parameters (sent in body): %v)",
+ requestId,
+ tx.GetContext().ServerName,
+ url,
+ strParams
+ );
+
+ auto response = NConcurrency::WaitFor(httpClient->Post(url, TSharedRef::FromString(strParams), headers)).ValueOrThrow();
+ CheckError(requestId, response);
+
+ YT_LOG_DEBUG("RSP %v - received response %v bytes. (%v)",
+ requestId,
+ response->ReadAll().size(),
+ strParams);
+}
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TSharedTransactionPinger
+ : public ITransactionPinger
+{
+public:
+ TSharedTransactionPinger(NHttp::IClientPtr httpClient, int poolThreadCount)
+ : PingerPool_(NConcurrency::CreateThreadPool(
+ poolThreadCount, "tx_pinger_pool"))
+ , HttpClient_(std::move(httpClient))
+ { }
+
+ ~TSharedTransactionPinger() override
+ {
+ PingerPool_->Shutdown();
+ }
+
+ ITransactionPingerPtr GetChildTxPinger() override
+ {
+ return this;
+ }
+
+ void RegisterTransaction(const TPingableTransaction& pingableTx) override
+ {
+ auto [minPingInterval, maxPingInterval] = pingableTx.GetPingInterval();
+ auto pingInterval = (minPingInterval + maxPingInterval) / 2;
+ double jitter = (maxPingInterval - pingInterval) / pingInterval;
+
+ auto opts = NConcurrency::TPeriodicExecutorOptions{pingInterval, pingInterval, jitter};
+ auto periodic = std::make_shared<NConcurrency::TPeriodicExecutorPtr>(nullptr);
+ // Have to use weak_ptr in order to break reference cycle
+ // This weak_ptr holds pointer to periodic, which will contain this lambda
+ // Also we consider that lifetime of this lambda is no longer than lifetime of pingableTx
+ // because every pingableTx have to call RemoveTransaction before it is destroyed
+ auto pingRoutine = BIND([this, &pingableTx, periodic = std::weak_ptr{periodic}] {
+ auto strong_ptr = periodic.lock();
+ YT_VERIFY(strong_ptr);
+ DoPingTransaction(pingableTx, *strong_ptr);
+ });
+ *periodic = New<NConcurrency::TPeriodicExecutor>(PingerPool_->GetInvoker(), pingRoutine, opts);
+ (*periodic)->Start();
+
+ auto guard = Guard(SpinLock_);
+ YT_VERIFY(!Transactions_.contains(pingableTx.GetId()));
+ Transactions_[pingableTx.GetId()] = std::move(periodic);
+ }
+
+ bool HasTransaction(const TPingableTransaction& pingableTx) override
+ {
+ auto guard = Guard(SpinLock_);
+ return Transactions_.contains(pingableTx.GetId());
+ }
+
+
+ void RemoveTransaction(const TPingableTransaction& pingableTx) override
+ {
+ std::shared_ptr<NConcurrency::TPeriodicExecutorPtr> periodic;
+ {
+ auto guard = Guard(SpinLock_);
+
+ auto it = Transactions_.find(pingableTx.GetId());
+
+ YT_VERIFY(it != Transactions_.end());
+
+ periodic = std::move(it->second);
+ Transactions_.erase(it);
+ }
+ NConcurrency::WaitUntilSet((*periodic)->Stop());
+ }
+
+private:
+ void DoPingTransaction(const TPingableTransaction& pingableTx,
+ NConcurrency::TPeriodicExecutorPtr periodic)
+ {
+ try {
+ PingTx(HttpClient_, pingableTx);
+ } catch (const std::exception& e) {
+ if (auto* errorResponse = dynamic_cast<const TErrorResponse*>(&e)) {
+ if (errorResponse->GetError().ContainsErrorCode(NYT::NClusterErrorCodes::NTransactionClient::NoSuchTransaction)) {
+ YT_UNUSED_FUTURE(periodic->Stop());
+ } else if (errorResponse->GetError().ContainsErrorCode(NYT::NClusterErrorCodes::Timeout)) {
+ periodic->ScheduleOutOfBand();
+ }
+ }
+ }
+ }
+
+
+private:
+ YT_DECLARE_SPIN_LOCK(NThreading::TSpinLock, SpinLock_);
+ THashMap<TTransactionId, std::shared_ptr<NConcurrency::TPeriodicExecutorPtr>> Transactions_;
+
+ NConcurrency::IThreadPoolPtr PingerPool_;
+ NHttp::IClientPtr HttpClient_;
+};
+
+#endif // defined(__x86_64__) || defined(__arm64__)
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TThreadPerTransactionPinger
+ : public ITransactionPinger
+{
+public:
+ ~TThreadPerTransactionPinger() override
+ {
+ if (Running_) {
+ RemoveTransaction(*PingableTx_);
+ }
+ }
+
+ ITransactionPingerPtr GetChildTxPinger() override
+ {
+ return MakeIntrusive<TThreadPerTransactionPinger>();
+ }
+
+ void RegisterTransaction(const TPingableTransaction& pingableTx) override
+ {
+ YT_VERIFY(!Running_);
+ YT_VERIFY(PingableTx_ == nullptr);
+
+ PingableTx_ = &pingableTx;
+ Running_ = true;
+
+ PingerThread_ = MakeHolder<TThread>(
+ TThread::TParams{Pinger, this}.SetName("pingable_tx"));
+ PingerThread_->Start();
+ }
+
+ bool HasTransaction(const TPingableTransaction& pingableTx) override
+ {
+ return PingableTx_ == &pingableTx && Running_;
+ }
+
+ void RemoveTransaction(const TPingableTransaction& pingableTx) override
+ {
+ YT_VERIFY(HasTransaction(pingableTx));
+
+ Running_ = false;
+ if (PingerThread_) {
+ PingerThread_->Join();
+ }
+ }
+
+private:
+ static void* Pinger(void* opaque)
+ {
+ static_cast<TThreadPerTransactionPinger*>(opaque)->Pinger();
+ return nullptr;
+ }
+
+ void Pinger()
+ {
+ auto [minPingInterval, maxPingInterval] = PingableTx_->GetPingInterval();
+ while (Running_) {
+ TDuration waitTime = minPingInterval + (maxPingInterval - minPingInterval) * RandomNumber<float>();
+ try {
+ auto noRetryPolicy = MakeIntrusive<TAttemptLimitedRetryPolicy>(1u, PingableTx_->GetContext().Config);
+ NDetail::NRawClient::PingTx(noRetryPolicy, PingableTx_->GetContext(), PingableTx_->GetId());
+ } catch (const std::exception& e) {
+ if (auto* errorResponse = dynamic_cast<const TErrorResponse*>(&e)) {
+ if (errorResponse->GetError().ContainsErrorCode(NYT::NClusterErrorCodes::NTransactionClient::NoSuchTransaction)) {
+ break;
+ } else if (errorResponse->GetError().ContainsErrorCode(NYT::NClusterErrorCodes::Timeout)) {
+ waitTime = TDuration::MilliSeconds(0);
+ }
+ }
+ // Else do nothing, going to retry this error.
+ }
+
+ TInstant t = Now();
+ while (Running_ && Now() - t < waitTime) {
+ NDetail::TWaitProxy::Get()->Sleep(TDuration::MilliSeconds(100));
+ }
+ }
+ }
+
+private:
+ const TPingableTransaction* PingableTx_ = nullptr;
+
+ std::atomic<bool> Running_ = false;
+ THolder<TThread> PingerThread_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+ITransactionPingerPtr CreateTransactionPinger(const TConfigPtr& config)
+{
+ if (config->UseAsyncTxPinger) {
+// TODO(aleexfi): Remove it after YT-17689
+#if defined(__x86_64__) || defined(__arm64__)
+ YT_LOG_DEBUG("Using async transaction pinger");
+ auto httpClientConfig = NYT::New<NHttp::TClientConfig>();
+ httpClientConfig->MaxIdleConnections = 16;
+ auto httpPoller = NConcurrency::CreateThreadPoolPoller(
+ config->AsyncHttpClientThreads,
+ "tx_http_client_poller");
+ auto httpClient = NHttp::CreateClient(std::move(httpClientConfig), std::move(httpPoller));
+
+ return MakeIntrusive<TSharedTransactionPinger>(
+ std::move(httpClient),
+ config->AsyncTxPingerPoolThreads);
+#else
+ YT_LOG_WARNING("Async transaction pinger is not supported on your platform. Fallback to TThreadPerTransactionPinger...");
+#endif // defined(__x86_64__) || defined(__arm64__)
+ }
+ return MakeIntrusive<TThreadPerTransactionPinger>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/transaction_pinger.h b/yt/cpp/mapreduce/client/transaction_pinger.h
new file mode 100644
index 0000000000..98e8b5cb2f
--- /dev/null
+++ b/yt/cpp/mapreduce/client/transaction_pinger.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/common/fwd.h>
+
+#include <yt/cpp/mapreduce/http/requests.h>
+
+#include <util/generic/ptr.h>
+#include <util/system/thread.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TPingableTransaction;
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Each registered transaction must be removed from pinger
+// (using RemoveTransaction) before it is destroyed
+class ITransactionPinger
+ : public TThrRefBase
+{
+public:
+ virtual ~ITransactionPinger() = default;
+
+ virtual ITransactionPingerPtr GetChildTxPinger() = 0;
+
+ virtual void RegisterTransaction(const TPingableTransaction& pingableTx) = 0;
+
+ virtual bool HasTransaction(const TPingableTransaction& pingableTx) = 0;
+
+ virtual void RemoveTransaction(const TPingableTransaction& pingableTx) = 0;
+};
+
+ITransactionPingerPtr CreateTransactionPinger(const TConfigPtr& config);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/ya.make b/yt/cpp/mapreduce/client/ya.make
new file mode 100644
index 0000000000..a1b3b4da69
--- /dev/null
+++ b/yt/cpp/mapreduce/client/ya.make
@@ -0,0 +1,75 @@
+LIBRARY()
+
+INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
+
+SRCS(
+ abortable_registry.cpp
+ batch_request_impl.cpp
+ client_reader.cpp
+ client_writer.cpp
+ client.cpp
+ file_reader.cpp
+ file_writer.cpp
+ format_hints.cpp
+ init.cpp
+ lock.cpp
+ operation_helpers.cpp
+ operation_preparer.cpp
+ operation_tracker.cpp
+ operation.cpp
+ prepare_operation.cpp
+ py_helpers.cpp
+ retry_heavy_write_request.cpp
+ retryful_writer.cpp
+ retryless_writer.cpp
+ skiff.cpp
+ structured_table_formats.cpp
+ transaction.cpp
+ transaction_pinger.cpp
+ yt_poller.cpp
+)
+
+PEERDIR(
+ library/cpp/digest/md5
+ library/cpp/sighandler
+ library/cpp/threading/blocking_queue
+ library/cpp/threading/future
+ library/cpp/type_info
+ library/cpp/yson
+ yt/cpp/mapreduce/common
+ yt/cpp/mapreduce/http
+ yt/cpp/mapreduce/interface
+ yt/cpp/mapreduce/io
+ yt/cpp/mapreduce/library/table_schema
+ yt/cpp/mapreduce/raw_client
+)
+
+IF (ARCH_X86_64 OR OS_DARWIN)
+ PEERDIR(
+ yt/yt/core
+ yt/yt/core/http
+ )
+ELSE()
+ # Suppress yamaker's WBadIncl error on exotic platforms
+ PEERDIR(
+ yt/yt_proto/yt/core
+ )
+ENDIF()
+
+IF (BUILD_TYPE == "PROFILE")
+ PEERDIR(
+ yt/yt/library/ytprof
+ )
+
+ SRCS(
+ job_profiler.cpp
+ )
+ELSE()
+ SRCS(
+ dummy_job_profiler.cpp
+ )
+ENDIF()
+
+GENERATE_ENUM_SERIALIZATION(structured_table_formats.h)
+
+END()
diff --git a/yt/cpp/mapreduce/client/yt_poller.cpp b/yt/cpp/mapreduce/client/yt_poller.cpp
new file mode 100644
index 0000000000..e0bea1690e
--- /dev/null
+++ b/yt/cpp/mapreduce/client/yt_poller.cpp
@@ -0,0 +1,132 @@
+#include "yt_poller.h"
+
+#include <yt/cpp/mapreduce/raw_client/raw_batch_request.h>
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+
+#include <yt/cpp/mapreduce/common/debug_metrics.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+
+#include <yt/cpp/mapreduce/http/retry_request.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+namespace NYT {
+namespace NDetail {
+
+using namespace NRawClient;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TYtPoller::TYtPoller(
+ TClientContext context,
+ const IClientRetryPolicyPtr& retryPolicy)
+ : Context_(std::move(context))
+ , ClientRetryPolicy_(retryPolicy)
+ , WaiterThread_(&TYtPoller::WatchLoopProc, this)
+{
+ WaiterThread_.Start();
+}
+
+TYtPoller::~TYtPoller()
+{
+ Stop();
+}
+
+void TYtPoller::Watch(IYtPollerItemPtr item)
+{
+ auto g = Guard(Lock_);
+ Pending_.emplace_back(std::move(item));
+ HasData_.Signal();
+}
+
+
+void TYtPoller::Stop()
+{
+ {
+ auto g = Guard(Lock_);
+ if (!IsRunning_) {
+ return;
+ }
+ IsRunning_ = false;
+ HasData_.Signal();
+ }
+ WaiterThread_.Join();
+}
+
+void TYtPoller::DiscardQueuedItems()
+{
+ for (auto& item : Pending_) {
+ item->OnItemDiscarded();
+ }
+ for (auto& item : InProgress_) {
+ item->OnItemDiscarded();
+ }
+}
+
+void TYtPoller::WatchLoop()
+{
+ TInstant nextRequest = TInstant::Zero();
+ while (true) {
+ {
+ auto g = Guard(Lock_);
+ if (IsRunning_ && Pending_.empty() && InProgress_.empty()) {
+ TWaitProxy::Get()->WaitCondVar(HasData_, Lock_);
+ }
+
+ if (!IsRunning_) {
+ DiscardQueuedItems();
+ return;
+ }
+
+ {
+ auto ug = Unguard(Lock_); // allow adding new items into Pending_
+ TWaitProxy::Get()->SleepUntil(nextRequest);
+ nextRequest = TInstant::Now() + Context_.Config->WaitLockPollInterval;
+ }
+ if (!Pending_.empty()) {
+ InProgress_.splice(InProgress_.end(), Pending_);
+ }
+ Y_VERIFY(!InProgress_.empty());
+ }
+
+ TRawBatchRequest rawBatchRequest(Context_.Config);
+
+ for (auto& item : InProgress_) {
+ item->PrepareRequest(&rawBatchRequest);
+ }
+
+ try {
+ ExecuteBatch(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, rawBatchRequest);
+ } catch (const std::exception& ex) {
+ YT_LOG_ERROR("Exception while executing batch request: %v", ex.what());
+ }
+
+ for (auto it = InProgress_.begin(); it != InProgress_.end();) {
+ auto& item = *it;
+
+ IYtPollerItem::EStatus status = item->OnRequestExecuted();
+
+ if (status == IYtPollerItem::PollBreak) {
+ it = InProgress_.erase(it);
+ } else {
+ ++it;
+ }
+ }
+
+ IncDebugMetric(TStringBuf("yt_poller_top_loop_repeat_count"));
+ }
+}
+
+void* TYtPoller::WatchLoopProc(void* data)
+{
+ static_cast<TYtPoller*>(data)->WatchLoop();
+ return nullptr;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/client/yt_poller.h b/yt/cpp/mapreduce/client/yt_poller.h
new file mode 100644
index 0000000000..4f4e9eb7ab
--- /dev/null
+++ b/yt/cpp/mapreduce/client/yt_poller.h
@@ -0,0 +1,86 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/common/fwd.h>
+
+#include <yt/cpp/mapreduce/http/context.h>
+#include <yt/cpp/mapreduce/http/requests.h>
+
+#include <yt/cpp/mapreduce/interface/client.h>
+
+#include <util/generic/list.h>
+#include <util/system/mutex.h>
+#include <util/system/thread.h>
+#include <util/system/condvar.h>
+
+namespace NYT {
+namespace NDetail {
+
+namespace NRawClient {
+ class TRawBatchRequest;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class IYtPollerItem
+ : public TThrRefBase
+{
+public:
+ enum EStatus {
+ PollContinue,
+ PollBreak,
+ };
+
+public:
+ virtual ~IYtPollerItem() = default;
+
+ virtual void PrepareRequest(NRawClient::TRawBatchRequest* batchRequest) = 0;
+
+ // Should return PollContinue if poller should continue polling this item.
+ // Should return PollBreak if poller should stop polling this item.
+ virtual EStatus OnRequestExecuted() = 0;
+
+ virtual void OnItemDiscarded() = 0;
+
+};
+using IYtPollerItemPtr = ::TIntrusivePtr<IYtPollerItem>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TYtPoller
+ : public TThrRefBase
+{
+public:
+ TYtPoller(TClientContext context, const IClientRetryPolicyPtr& retryPolicy);
+ ~TYtPoller();
+
+ void Watch(IYtPollerItemPtr item);
+
+ void Stop();
+
+private:
+ void DiscardQueuedItems();
+
+ void WatchLoop();
+ static void* WatchLoopProc(void*);
+
+private:
+ struct TItem;
+
+ const TClientContext Context_;
+ const IClientRetryPolicyPtr ClientRetryPolicy_;
+
+
+ TList<IYtPollerItemPtr> InProgress_;
+ TList<IYtPollerItemPtr> Pending_;
+
+ TThread WaiterThread_;
+ TMutex Lock_;
+ TCondVar HasData_;
+
+ bool IsRunning_ = true;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/common/debug_metrics.cpp b/yt/cpp/mapreduce/common/debug_metrics.cpp
new file mode 100644
index 0000000000..6235e55f7e
--- /dev/null
+++ b/yt/cpp/mapreduce/common/debug_metrics.cpp
@@ -0,0 +1,62 @@
+#include "debug_metrics.h"
+
+#include <util/generic/hash.h>
+#include <util/generic/singleton.h>
+
+#include <util/string/cast.h>
+#include <util/system/mutex.h>
+
+namespace NYT {
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TDebugMetrics {
+public:
+ static TDebugMetrics& Get()
+ {
+ return *Singleton<TDebugMetrics>();
+ }
+
+ void Inc(TStringBuf name)
+ {
+ auto g = Guard(Lock_);
+ auto it = Metrics_.find(name);
+ if (it == Metrics_.end()) {
+ it = Metrics_.emplace(ToString(name), 0).first;
+ }
+ ++it->second;
+ }
+
+ ui64 Get(TStringBuf name) const
+ {
+ auto g = Guard(Lock_);
+ auto it = Metrics_.find(name);
+ if (it == Metrics_.end()) {
+ return 0;
+ } else {
+ return it->second;
+ }
+ }
+
+private:
+ TMutex Lock_;
+ THashMap<TString, ui64> Metrics_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+void IncDebugMetricImpl(TStringBuf name)
+{
+ TDebugMetrics::Get().Inc(name);
+}
+
+ui64 GetDebugMetric(TStringBuf name)
+{
+ return TDebugMetrics::Get().Get(name);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/common/debug_metrics.h b/yt/cpp/mapreduce/common/debug_metrics.h
new file mode 100644
index 0000000000..6ebbc89f72
--- /dev/null
+++ b/yt/cpp/mapreduce/common/debug_metrics.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/config.h>
+
+#include <util/generic/strbuf.h>
+
+namespace NYT {
+namespace NDetail {
+
+void IncDebugMetricImpl(TStringBuf name);
+
+// Helper functions that allows to track various events inside YT library, useful for testing.
+inline void IncDebugMetric(TStringBuf name)
+{
+ if (TConfig::Get()->EnableDebugMetrics) {
+ IncDebugMetricImpl(name);
+ }
+}
+ui64 GetDebugMetric(TStringBuf name);
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/common/fwd.h b/yt/cpp/mapreduce/common/fwd.h
new file mode 100644
index 0000000000..a195e727be
--- /dev/null
+++ b/yt/cpp/mapreduce/common/fwd.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+
+namespace NYT {
+ class IRequestRetryPolicy;
+ using IRequestRetryPolicyPtr = ::TIntrusivePtr<IRequestRetryPolicy>;
+
+ class IClientRetryPolicy;
+ using IClientRetryPolicyPtr = ::TIntrusivePtr<IClientRetryPolicy>;
+}
diff --git a/yt/cpp/mapreduce/common/helpers.cpp b/yt/cpp/mapreduce/common/helpers.cpp
new file mode 100644
index 0000000000..95924d812c
--- /dev/null
+++ b/yt/cpp/mapreduce/common/helpers.cpp
@@ -0,0 +1,126 @@
+#include "helpers.h"
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/serialize.h>
+#include <yt/cpp/mapreduce/interface/fluent.h>
+
+#include <library/cpp/yson/node/node_builder.h>
+#include <library/cpp/yson/node/node_visitor.h>
+
+#include <library/cpp/yson/parser.h>
+#include <library/cpp/yson/writer.h>
+
+#include <library/cpp/json/json_reader.h>
+#include <library/cpp/json/json_value.h>
+
+#include <util/stream/input.h>
+#include <util/stream/output.h>
+#include <util/stream/str.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TString NodeListToYsonString(const TNode::TListType& nodes)
+{
+ TStringStream stream;
+ ::NYson::TYsonWriter writer(&stream, NYson::EYsonFormat::Binary, ::NYson::EYsonType::ListFragment);
+ auto list = BuildYsonListFluently(&writer);
+ for (const auto& node : nodes) {
+ list.Item().Value(node);
+ }
+ return stream.Str();
+}
+
+TNode PathToNode(const TRichYPath& path)
+{
+ TNode result;
+ TNodeBuilder builder(&result);
+ Serialize(path, &builder);
+ return result;
+}
+
+TNode PathToParamNode(const TRichYPath& path)
+{
+ return TNode()("path", PathToNode(path));
+}
+
+TString AttributesToYsonString(const TNode& node)
+{
+ return BuildYsonStringFluently().BeginMap()
+ .Item("attributes").Value(node)
+ .EndMap();
+}
+
+TString AttributeFilterToYsonString(const TAttributeFilter& filter)
+{
+ return BuildYsonStringFluently().BeginMap()
+ .Item("attributes").Value(filter)
+ .EndMap();
+}
+
+TNode NodeFromTableSchema(const TTableSchema& schema)
+{
+ TNode result;
+ TNodeBuilder builder(&result);
+ Serialize(schema, &builder);
+ return result;
+}
+
+void MergeNodes(TNode& dst, const TNode& src)
+{
+ if (dst.IsMap() && src.IsMap()) {
+ auto& dstMap = dst.AsMap();
+ const auto& srcMap = src.AsMap();
+ for (const auto& srcItem : srcMap) {
+ const auto& key = srcItem.first;
+ auto dstItem = dstMap.find(key);
+ if (dstItem != dstMap.end()) {
+ MergeNodes(dstItem->second, srcItem.second);
+ } else {
+ dstMap[key] = srcItem.second;
+ }
+ }
+ } else {
+ if (dst.GetType() == src.GetType() && src.HasAttributes()) {
+ auto attributes = dst.GetAttributes();
+ MergeNodes(attributes, src.GetAttributes());
+ dst = src;
+ dst.Attributes() = attributes;
+ } else {
+ dst = src;
+ }
+ }
+}
+
+TYPath AddPathPrefix(const TYPath& path, const TString& pathPrefix)
+{
+ if (path.StartsWith("//") || path.StartsWith("#")) {
+ return path;
+ }
+ return pathPrefix + path;
+}
+
+TString GetWriteTableCommand(const TString& apiVersion)
+{
+ return apiVersion == "v2" ? "write" : "write_table";
+}
+
+TString GetReadTableCommand(const TString& apiVersion)
+{
+ return apiVersion == "v2" ? "read" : "read_table";
+}
+
+TString GetWriteFileCommand(const TString& apiVersion)
+{
+ return apiVersion == "v2" ? "upload" : "write_file";
+}
+
+TString GetReadFileCommand(const TString& apiVersion)
+{
+ return apiVersion == "v2" ? "download" : "read_file";
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/common/helpers.h b/yt/cpp/mapreduce/common/helpers.h
new file mode 100644
index 0000000000..2174ba820b
--- /dev/null
+++ b/yt/cpp/mapreduce/common/helpers.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "fwd.h"
+
+#include <library/cpp/yson/node/node_io.h> // backward compatibility
+
+#include <yt/cpp/mapreduce/interface/node.h>
+#include <yt/cpp/mapreduce/interface/common.h>
+#include <library/cpp/yson/public.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TString NodeListToYsonString(const TNode::TListType& nodes);
+
+TNode PathToNode(const TRichYPath& path);
+TNode PathToParamNode(const TRichYPath& path);
+
+TString AttributesToYsonString(const TNode& attributes);
+
+TString AttributeFilterToYsonString(const TAttributeFilter& filter);
+
+TNode NodeFromTableSchema(const TTableSchema& schema);
+
+void MergeNodes(TNode& dst, const TNode& src);
+
+TYPath AddPathPrefix(const TYPath& path, const TString& pathPrefix);
+
+TString GetWriteTableCommand(const TString& apiVersion);
+TString GetReadTableCommand(const TString& apiVersion);
+TString GetWriteFileCommand(const TString& apiVersion);
+TString GetReadFileCommand(const TString& apiVersion);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/common/node_builder.h b/yt/cpp/mapreduce/common/node_builder.h
new file mode 100644
index 0000000000..c7f731cf09
--- /dev/null
+++ b/yt/cpp/mapreduce/common/node_builder.h
@@ -0,0 +1,4 @@
+#pragma once
+
+// Backward compatibility.
+#include <library/cpp/yson/node/node_builder.h>
diff --git a/yt/cpp/mapreduce/common/node_visitor.h b/yt/cpp/mapreduce/common/node_visitor.h
new file mode 100644
index 0000000000..a8bde52b5a
--- /dev/null
+++ b/yt/cpp/mapreduce/common/node_visitor.h
@@ -0,0 +1,4 @@
+#pragma once
+
+// Backward compatibility.
+#include <library/cpp/yson/node/node_visitor.h>
diff --git a/yt/cpp/mapreduce/common/retry_lib.cpp b/yt/cpp/mapreduce/common/retry_lib.cpp
new file mode 100644
index 0000000000..cf2c021eb4
--- /dev/null
+++ b/yt/cpp/mapreduce/common/retry_lib.cpp
@@ -0,0 +1,267 @@
+#include "retry_lib.h"
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/errors.h>
+#include <yt/cpp/mapreduce/interface/error_codes.h>
+#include <yt/cpp/mapreduce/interface/retry_policy.h>
+
+#include <util/string/builder.h>
+#include <util/generic/set.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TAttemptLimitedRetryPolicy::TAttemptLimitedRetryPolicy(ui32 attemptLimit, const TConfigPtr& config)
+ : Config_(config)
+ , AttemptLimit_(attemptLimit)
+{ }
+
+void TAttemptLimitedRetryPolicy::NotifyNewAttempt()
+{
+ ++Attempt_;
+}
+
+TMaybe<TDuration> TAttemptLimitedRetryPolicy::OnGenericError(const std::exception& e)
+{
+ if (IsAttemptLimitExceeded()) {
+ return Nothing();
+ }
+ return GetBackoffDuration(e, Config_);
+}
+
+TMaybe<TDuration> TAttemptLimitedRetryPolicy::OnRetriableError(const TErrorResponse& e)
+{
+ if (IsAttemptLimitExceeded()) {
+ return Nothing();
+ }
+ return GetBackoffDuration(e, Config_);
+}
+
+void TAttemptLimitedRetryPolicy::OnIgnoredError(const TErrorResponse& /*e*/)
+{
+ --Attempt_;
+}
+
+TString TAttemptLimitedRetryPolicy::GetAttemptDescription() const
+{
+ return ::TStringBuilder() << "attempt " << Attempt_ << " of " << AttemptLimit_;
+}
+
+bool TAttemptLimitedRetryPolicy::IsAttemptLimitExceeded() const
+{
+ return Attempt_ >= AttemptLimit_;
+}
+////////////////////////////////////////////////////////////////////////////////
+
+class TTimeLimitedRetryPolicy
+ : public IRequestRetryPolicy
+{
+public:
+ TTimeLimitedRetryPolicy(IRequestRetryPolicyPtr retryPolicy, TDuration timeout)
+ : RetryPolicy_(retryPolicy)
+ , Deadline_(TInstant::Now() + timeout)
+ , Timeout_(timeout)
+ { }
+ void NotifyNewAttempt() override
+ {
+ if (TInstant::Now() >= Deadline_) {
+ ythrow TRequestRetriesTimeout() << "retry timeout exceeded (timeout: " << Timeout_ << ")";
+ }
+ RetryPolicy_->NotifyNewAttempt();
+ }
+
+ TMaybe<TDuration> OnGenericError(const std::exception& e) override
+ {
+ return RetryPolicy_->OnGenericError(e);
+ }
+
+ TMaybe<TDuration> OnRetriableError(const TErrorResponse& e) override
+ {
+ return RetryPolicy_->OnRetriableError(e);
+ }
+
+ void OnIgnoredError(const TErrorResponse& e) override
+ {
+ return RetryPolicy_->OnIgnoredError(e);
+ }
+
+ TString GetAttemptDescription() const override
+ {
+ return RetryPolicy_->GetAttemptDescription();
+ }
+
+private:
+ const IRequestRetryPolicyPtr RetryPolicy_;
+ const TInstant Deadline_;
+ const TDuration Timeout_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TDefaultClientRetryPolicy
+ : public IClientRetryPolicy
+{
+public:
+ explicit TDefaultClientRetryPolicy(IRetryConfigProviderPtr retryConfigProvider, const TConfigPtr& config)
+ : RetryConfigProvider_(std::move(retryConfigProvider))
+ , Config_(config)
+ { }
+
+ IRequestRetryPolicyPtr CreatePolicyForGenericRequest() override
+ {
+ return Wrap(CreateDefaultRequestRetryPolicy(Config_));
+ }
+
+ IRequestRetryPolicyPtr CreatePolicyForStartOperationRequest() override
+ {
+ return Wrap(MakeIntrusive<TAttemptLimitedRetryPolicy>(static_cast<ui32>(Config_->StartOperationRetryCount), Config_));
+ }
+
+ IRequestRetryPolicyPtr Wrap(IRequestRetryPolicyPtr basePolicy)
+ {
+ auto config = RetryConfigProvider_->CreateRetryConfig();
+ if (config.RetriesTimeLimit < TDuration::Max()) {
+ return ::MakeIntrusive<TTimeLimitedRetryPolicy>(std::move(basePolicy), config.RetriesTimeLimit);
+ }
+ return basePolicy;
+ }
+
+private:
+ IRetryConfigProviderPtr RetryConfigProvider_;
+ const TConfigPtr Config_;
+};
+
+class TDefaultRetryConfigProvider
+ : public IRetryConfigProvider
+{
+public:
+ TRetryConfig CreateRetryConfig() override
+ {
+ return {};
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+IRequestRetryPolicyPtr CreateDefaultRequestRetryPolicy(const TConfigPtr& config)
+{
+ return MakeIntrusive<TAttemptLimitedRetryPolicy>(static_cast<ui32>(config->RetryCount), config);
+}
+
+IClientRetryPolicyPtr CreateDefaultClientRetryPolicy(IRetryConfigProviderPtr retryConfigProvider, const TConfigPtr& config)
+{
+ return MakeIntrusive<TDefaultClientRetryPolicy>(std::move(retryConfigProvider), config);
+}
+IRetryConfigProviderPtr CreateDefaultRetryConfigProvider()
+{
+ return MakeIntrusive<TDefaultRetryConfigProvider>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+static bool IsChunkError(int code)
+{
+ return code / 100 == 7;
+}
+
+// Check whether:
+// 1) codes contain at least one chunk error AND
+// 2) codes don't contain non-retriable chunk errors.
+static bool IsRetriableChunkError(const TSet<int>& codes)
+{
+ using namespace NClusterErrorCodes;
+ auto isChunkError = false;
+ for (auto code : codes) {
+ switch (code) {
+ case NChunkClient::SessionAlreadyExists:
+ case NChunkClient::ChunkAlreadyExists:
+ case NChunkClient::WindowError:
+ case NChunkClient::BlockContentMismatch:
+ case NChunkClient::InvalidBlockChecksum:
+ case NChunkClient::BlockOutOfRange:
+ case NChunkClient::MissingExtension:
+ case NChunkClient::NoSuchBlock:
+ case NChunkClient::NoSuchChunk:
+ case NChunkClient::NoSuchChunkList:
+ case NChunkClient::NoSuchChunkTree:
+ case NChunkClient::NoSuchChunkView:
+ case NChunkClient::NoSuchMedium:
+ return false;
+ default:
+ isChunkError |= IsChunkError(code);
+ break;
+ }
+ }
+ return isChunkError;
+}
+
+static TMaybe<TDuration> TryGetBackoffDuration(const TErrorResponse& errorResponse, const TConfigPtr& config)
+{
+ int httpCode = errorResponse.GetHttpCode();
+ if (httpCode / 100 != 4 && !errorResponse.IsFromTrailers()) {
+ return config->RetryInterval;
+ }
+
+ auto allCodes = errorResponse.GetError().GetAllErrorCodes();
+ using namespace NClusterErrorCodes;
+ if (httpCode == 429
+ || allCodes.count(NSecurityClient::RequestQueueSizeLimitExceeded)
+ || allCodes.count(NRpc::RequestQueueSizeLimitExceeded))
+ {
+ // request rate limit exceeded
+ return config->RateLimitExceededRetryInterval;
+ }
+ if (errorResponse.IsConcurrentOperationsLimitReached()) {
+ // limit for the number of concurrent operations exceeded
+ return config->StartOperationRetryInterval;
+ }
+ if (IsRetriableChunkError(allCodes)) {
+ // chunk client errors
+ return config->ChunkErrorsRetryInterval;
+ }
+ for (auto code : TVector<int>{
+ NRpc::TransportError,
+ NRpc::Unavailable,
+ NApi::RetriableArchiveError,
+ Canceled,
+ }) {
+ if (allCodes.contains(code)) {
+ return config->RetryInterval;
+ }
+ }
+ return Nothing();
+}
+
+TDuration GetBackoffDuration(const TErrorResponse& errorResponse, const TConfigPtr& config)
+{
+ return TryGetBackoffDuration(errorResponse, config).GetOrElse(config->RetryInterval);
+}
+
+bool IsRetriable(const TErrorResponse& errorResponse)
+{
+ // Retriability of an error doesn't depend on config, so just use global one.
+ return TryGetBackoffDuration(errorResponse, TConfig::Get()).Defined();
+}
+
+bool IsRetriable(const std::exception& ex)
+{
+ if (dynamic_cast<const TRequestRetriesTimeout*>(&ex)) {
+ return false;
+ }
+ return true;
+}
+
+TDuration GetBackoffDuration(const std::exception& /*error*/, const TConfigPtr& config)
+{
+ return GetBackoffDuration(config);
+}
+
+TDuration GetBackoffDuration(const TConfigPtr& config)
+{
+ return config->RetryInterval;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/common/retry_lib.h b/yt/cpp/mapreduce/common/retry_lib.h
new file mode 100644
index 0000000000..c6c061f614
--- /dev/null
+++ b/yt/cpp/mapreduce/common/retry_lib.h
@@ -0,0 +1,100 @@
+#pragma once
+
+#include "fwd.h"
+
+#include <yt/cpp/mapreduce/interface/fwd.h>
+
+#include <util/datetime/base.h>
+#include <util/generic/maybe.h>
+#include <util/generic/ptr.h>
+#include <util/generic/string.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+// IRequestRetryPolicy class controls retries of single request.
+class IRequestRetryPolicy
+ : public virtual TThrRefBase
+{
+public:
+ // Helper function that returns text description of current attempt, e.g.
+ // "attempt 3 / 10"
+ // used in logs.
+ virtual TString GetAttemptDescription() const = 0;
+
+ // Library code calls this function before any request attempt.
+ virtual void NotifyNewAttempt() = 0;
+
+ // OnRetriableError is called whenever client gets YT error that can be retried (e.g. operation limit exceeded).
+ // OnGenericError is called whenever request failed due to generic error like network error.
+ //
+ // Both methods must return nothing if policy doesn't want to retry this error.
+ // Otherwise method should return backoff time.
+ virtual TMaybe<TDuration> OnRetriableError(const TErrorResponse& e) = 0;
+ virtual TMaybe<TDuration> OnGenericError(const std::exception& e) = 0;
+
+ // OnIgnoredError is called whenever client gets an error but is going to ignore it.
+ virtual void OnIgnoredError(const TErrorResponse& /*e*/) = 0;
+};
+using IRequestRetryPolicyPtr = ::TIntrusivePtr<IRequestRetryPolicy>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+// IClientRetryPolicy controls creation of policies for individual requests.
+class IClientRetryPolicy
+ : public virtual TThrRefBase
+{
+public:
+ virtual IRequestRetryPolicyPtr CreatePolicyForGenericRequest() = 0;
+ virtual IRequestRetryPolicyPtr CreatePolicyForStartOperationRequest() = 0;
+};
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TAttemptLimitedRetryPolicy
+ : public IRequestRetryPolicy
+{
+public:
+ explicit TAttemptLimitedRetryPolicy(ui32 attemptLimit, const TConfigPtr& config);
+
+ void NotifyNewAttempt() override;
+
+ TMaybe<TDuration> OnGenericError(const std::exception& e) override;
+ TMaybe<TDuration> OnRetriableError(const TErrorResponse& e) override;
+ void OnIgnoredError(const TErrorResponse& e) override;
+ TString GetAttemptDescription() const override;
+
+ bool IsAttemptLimitExceeded() const;
+
+protected:
+ const TConfigPtr Config_;
+
+private:
+ const ui32 AttemptLimit_;
+ ui32 Attempt_ = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+IRequestRetryPolicyPtr CreateDefaultRequestRetryPolicy(const TConfigPtr& config);
+IClientRetryPolicyPtr CreateDefaultClientRetryPolicy(IRetryConfigProviderPtr retryConfigProvider, const TConfigPtr& config);
+IRetryConfigProviderPtr CreateDefaultRetryConfigProvider();
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Check if error returned by YT can be retried
+bool IsRetriable(const TErrorResponse& errorResponse);
+bool IsRetriable(const std::exception& ex);
+
+// Get backoff duration for errors returned by YT.
+TDuration GetBackoffDuration(const TErrorResponse& errorResponse, const TConfigPtr& config);
+
+// Get backoff duration for errors that are not TErrorResponse.
+TDuration GetBackoffDuration(const std::exception& error, const TConfigPtr& config);
+TDuration GetBackoffDuration(const TConfigPtr& config);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/common/wait_proxy.cpp b/yt/cpp/mapreduce/common/wait_proxy.cpp
new file mode 100644
index 0000000000..3db034a098
--- /dev/null
+++ b/yt/cpp/mapreduce/common/wait_proxy.cpp
@@ -0,0 +1,118 @@
+#include "wait_proxy.h"
+
+
+#include <library/cpp/threading/future/future.h>
+
+#include <util/system/event.h>
+#include <util/system/condvar.h>
+
+namespace NYT {
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+bool TDefaultWaitProxy::WaitFuture(const NThreading::TFuture<void>& future, TDuration timeout)
+{
+ return future.Wait(timeout);
+}
+
+bool TDefaultWaitProxy::WaitEvent(TSystemEvent& event, TDuration timeout)
+{
+ return event.WaitT(timeout);
+}
+
+bool TDefaultWaitProxy::WaitCondVar(TCondVar &condVar, TMutex &mutex, TDuration timeout)
+{
+ return condVar.WaitT(mutex, timeout);
+}
+
+void TDefaultWaitProxy::Sleep(TDuration timeout)
+{
+ ::Sleep(timeout);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TWaitProxy::TWaitProxy()
+ : Proxy_(::MakeIntrusive<TDefaultWaitProxy>())
+{ }
+
+TWaitProxy* TWaitProxy::Get()
+{
+ return Singleton<TWaitProxy>();
+}
+
+void TWaitProxy::SetProxy(::TIntrusivePtr<IWaitProxy> proxy)
+{
+ Proxy_ = std::move(proxy);
+}
+
+bool TWaitProxy::WaitFuture(const NThreading::TFuture<void>& future)
+{
+ return Proxy_->WaitFuture(future, TDuration::Max());
+}
+
+bool TWaitProxy::WaitFuture(const NThreading::TFuture<void>& future, TInstant deadLine)
+{
+ return Proxy_->WaitFuture(future, deadLine - TInstant::Now());
+}
+
+bool TWaitProxy::WaitFuture(const NThreading::TFuture<void>& future, TDuration timeout)
+{
+ return Proxy_->WaitFuture(future, timeout);
+}
+
+bool TWaitProxy::WaitEventD(TSystemEvent& event, TInstant deadLine)
+{
+ return Proxy_->WaitEvent(event, deadLine - TInstant::Now());
+}
+
+bool TWaitProxy::WaitEventT(TSystemEvent& event, TDuration timeout)
+{
+ return Proxy_->WaitEvent(event, timeout);
+}
+
+void TWaitProxy::WaitEventI(TSystemEvent& event)
+{
+ Proxy_->WaitEvent(event, TDuration::Max());
+}
+
+bool TWaitProxy::WaitEvent(TSystemEvent& event)
+{
+ return Proxy_->WaitEvent(event, TDuration::Max());
+}
+
+bool TWaitProxy::WaitCondVarD(TCondVar& condVar, TMutex& m, TInstant deadLine)
+{
+ return Proxy_->WaitCondVar(condVar, m, deadLine - TInstant::Now());
+}
+
+bool TWaitProxy::WaitCondVarT(TCondVar& condVar, TMutex& m, TDuration timeOut)
+{
+ return Proxy_->WaitCondVar(condVar, m, timeOut);
+}
+
+void TWaitProxy::WaitCondVarI(TCondVar& condVar, TMutex& m)
+{
+ Proxy_->WaitCondVar(condVar, m, TDuration::Max());
+}
+
+void TWaitProxy::WaitCondVar(TCondVar& condVar, TMutex& m)
+{
+ Proxy_->WaitCondVar(condVar, m, TDuration::Max());
+}
+
+void TWaitProxy::Sleep(TDuration timeout)
+{
+ Proxy_->Sleep(timeout);
+}
+
+void TWaitProxy::SleepUntil(TInstant instant)
+{
+ Proxy_->Sleep(instant - TInstant::Now());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/common/wait_proxy.h b/yt/cpp/mapreduce/common/wait_proxy.h
new file mode 100644
index 0000000000..e7c944cf24
--- /dev/null
+++ b/yt/cpp/mapreduce/common/wait_proxy.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/wait_proxy.h>
+
+namespace NYT {
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TDefaultWaitProxy
+ : public IWaitProxy
+{
+public:
+ bool WaitFuture(const ::NThreading::TFuture<void>& future, TDuration timeout) override;
+ bool WaitEvent(TSystemEvent& event, TDuration timeout) override;
+ bool WaitCondVar(TCondVar& condVar, TMutex& mutex, TDuration timeout) override;
+ void Sleep(TDuration timeout) override;
+};
+
+class TWaitProxy {
+public:
+ TWaitProxy();
+
+ static TWaitProxy* Get();
+
+ // NB: Non thread-safe, should be called only in initialization code.
+ void SetProxy(::TIntrusivePtr<IWaitProxy> proxy);
+
+ bool WaitFuture(const ::NThreading::TFuture<void>& future);
+ bool WaitFuture(const ::NThreading::TFuture<void>& future, TInstant deadLine);
+ bool WaitFuture(const ::NThreading::TFuture<void>& future, TDuration timeout);
+
+ bool WaitEventD(TSystemEvent& event, TInstant deadLine);
+ bool WaitEventT(TSystemEvent& event, TDuration timeout);
+ void WaitEventI(TSystemEvent& event);
+ bool WaitEvent(TSystemEvent& event);
+
+ bool WaitCondVarD(TCondVar& condVar, TMutex& m, TInstant deadLine);
+ bool WaitCondVarT(TCondVar& condVar, TMutex& m, TDuration timeOut);
+ void WaitCondVarI(TCondVar& condVar, TMutex& m);
+ void WaitCondVar(TCondVar& condVar, TMutex& m);
+
+ void Sleep(TDuration timeout);
+ void SleepUntil(TInstant instant);
+
+private:
+ ::TIntrusivePtr<IWaitProxy> Proxy_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/common/ya.make b/yt/cpp/mapreduce/common/ya.make
new file mode 100644
index 0000000000..004708cb44
--- /dev/null
+++ b/yt/cpp/mapreduce/common/ya.make
@@ -0,0 +1,23 @@
+LIBRARY()
+
+INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
+
+SRCS(
+ debug_metrics.cpp
+ helpers.cpp
+ retry_lib.cpp
+ wait_proxy.cpp
+)
+
+PEERDIR(
+ library/cpp/json
+ library/cpp/svnversion
+ library/cpp/threading/future
+ library/cpp/yson
+ library/cpp/yson/json
+ library/cpp/yson/node
+ yt/cpp/mapreduce/interface
+ yt/cpp/mapreduce/interface/logging
+)
+
+END()
diff --git a/yt/cpp/mapreduce/http/abortable_http_response.cpp b/yt/cpp/mapreduce/http/abortable_http_response.cpp
new file mode 100644
index 0000000000..9da9241d33
--- /dev/null
+++ b/yt/cpp/mapreduce/http/abortable_http_response.cpp
@@ -0,0 +1,223 @@
+#include "abortable_http_response.h"
+
+#include <util/system/mutex.h>
+#include <util/generic/singleton.h>
+#include <util/generic/hash_set.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TAbortableHttpResponseRegistry {
+public:
+ TOutageId StartOutage(TString urlPattern, const TOutageOptions& options)
+ {
+ auto g = Guard(Lock_);
+ auto id = NextId_++;
+ IdToOutage.emplace(id, TOutageEntry{std::move(urlPattern), options.ResponseCount_, options.LengthLimit_});
+ return id;
+ }
+
+ void StopOutage(TOutageId id)
+ {
+ auto g = Guard(Lock_);
+ IdToOutage.erase(id);
+ }
+
+ void Add(IAbortableHttpResponse* response)
+ {
+ auto g = Guard(Lock_);
+ for (auto& [id, entry] : IdToOutage) {
+ if (entry.Counter > 0 && response->GetUrl().find(entry.Pattern) != TString::npos) {
+ response->SetLengthLimit(entry.LengthLimit);
+ entry.Counter -= 1;
+ }
+ }
+ ResponseList_.PushBack(response);
+ }
+
+ void Remove(IAbortableHttpResponse* response)
+ {
+ auto g = Guard(Lock_);
+ response->Unlink();
+ }
+
+ static TAbortableHttpResponseRegistry& Get()
+ {
+ return *Singleton<TAbortableHttpResponseRegistry>();
+ }
+
+ int AbortAll(const TString& urlPattern)
+ {
+ int result = 0;
+ for (auto& response : ResponseList_) {
+ if (!response.IsAborted() && response.GetUrl().find(urlPattern) != TString::npos) {
+ response.Abort();
+ ++result;
+ }
+ }
+ return result;
+ }
+
+private:
+ struct TOutageEntry
+ {
+ TString Pattern;
+ size_t Counter;
+ size_t LengthLimit;
+ };
+
+private:
+ TOutageId NextId_ = 0;
+ TIntrusiveList<IAbortableHttpResponse> ResponseList_;
+ THashMap<TOutageId, TOutageEntry> IdToOutage;
+ TMutex Lock_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+TAbortableHttpResponse::TOutage::TOutage(
+ TString urlPattern,
+ TAbortableHttpResponseRegistry& registry,
+ const TOutageOptions& options)
+ : UrlPattern_(std::move(urlPattern))
+ , Registry_(registry)
+ , Id_(registry.StartOutage(UrlPattern_, options))
+{ }
+
+TAbortableHttpResponse::TOutage::~TOutage()
+{
+ Stop();
+}
+
+void TAbortableHttpResponse::TOutage::Stop()
+{
+ if (!Stopped_) {
+ Registry_.StopOutage(Id_);
+ Stopped_ = true;
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TAbortableHttpResponseBase::TAbortableHttpResponseBase(const TString& url)
+ : Url_(url)
+{
+ TAbortableHttpResponseRegistry::Get().Add(this);
+}
+
+TAbortableHttpResponseBase::~TAbortableHttpResponseBase()
+{
+ TAbortableHttpResponseRegistry::Get().Remove(this);
+}
+
+void TAbortableHttpResponseBase::Abort()
+{
+ Aborted_ = true;
+}
+
+void TAbortableHttpResponseBase::SetLengthLimit(size_t limit)
+{
+ LengthLimit_ = limit;
+ if (LengthLimit_ == 0) {
+ Abort();
+ }
+}
+
+const TString& TAbortableHttpResponseBase::GetUrl() const
+{
+ return Url_;
+}
+
+bool TAbortableHttpResponseBase::IsAborted() const
+{
+ return Aborted_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TAbortableHttpResponse::TAbortableHttpResponse(
+ IInputStream* socketStream,
+ const TString& requestId,
+ const TString& hostName,
+ const TString& url)
+ : THttpResponse(socketStream, requestId, hostName)
+ , TAbortableHttpResponseBase(url)
+{
+}
+
+size_t TAbortableHttpResponse::DoRead(void* buf, size_t len)
+{
+ if (Aborted_) {
+ ythrow TAbortedForTestPurpose() << "response was aborted";
+ }
+ len = std::min(len, LengthLimit_);
+ auto read = THttpResponse::DoRead(buf, len);
+ LengthLimit_ -= read;
+ if (LengthLimit_ == 0) {
+ Abort();
+ }
+ return read;
+}
+
+size_t TAbortableHttpResponse::DoSkip(size_t len)
+{
+ if (Aborted_) {
+ ythrow TAbortedForTestPurpose() << "response was aborted";
+ }
+ return THttpResponse::DoSkip(len);
+}
+
+int TAbortableHttpResponse::AbortAll(const TString& urlPattern)
+{
+ return TAbortableHttpResponseRegistry::Get().AbortAll(urlPattern);
+}
+
+TAbortableHttpResponse::TOutage TAbortableHttpResponse::StartOutage(
+ const TString& urlPattern,
+ const TOutageOptions& options)
+{
+ return TOutage(urlPattern, TAbortableHttpResponseRegistry::Get(), options);
+}
+
+TAbortableHttpResponse::TOutage TAbortableHttpResponse::StartOutage(
+ const TString& urlPattern,
+ size_t responseCount)
+{
+ return StartOutage(urlPattern, TOutageOptions().ResponseCount(responseCount));
+}
+
+TAbortableCoreHttpResponse::TAbortableCoreHttpResponse(
+ std::unique_ptr<IInputStream> stream,
+ const TString& url)
+ : TAbortableHttpResponseBase(url)
+ , Stream_(std::move(stream))
+{
+}
+
+size_t TAbortableCoreHttpResponse::DoRead(void* buf, size_t len)
+{
+ if (Aborted_) {
+ ythrow TAbortedForTestPurpose() << "response was aborted";
+ }
+ len = std::min(len, LengthLimit_);
+ auto read = Stream_->Read(buf, len);
+ LengthLimit_ -= read;
+ if (LengthLimit_ == 0) {
+ Abort();
+ }
+
+ return read;
+}
+
+size_t TAbortableCoreHttpResponse::DoSkip(size_t len)
+{
+ if (Aborted_) {
+ ythrow TAbortedForTestPurpose() << "response was aborted";
+ }
+ return Stream_->Skip(len);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/http/abortable_http_response.h b/yt/cpp/mapreduce/http/abortable_http_response.h
new file mode 100644
index 0000000000..d72bcfa0a6
--- /dev/null
+++ b/yt/cpp/mapreduce/http/abortable_http_response.h
@@ -0,0 +1,142 @@
+#pragma once
+
+#include "http.h"
+
+#include <util/generic/intrlist.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TAbortableHttpResponseRegistry;
+
+using TOutageId = size_t;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TAbortedForTestPurpose
+ : public yexception
+{ };
+
+struct TOutageOptions
+{
+ using TSelf = TOutageOptions;
+
+ /// @brief Number of responses to abort.
+ FLUENT_FIELD_DEFAULT(size_t, ResponseCount, std::numeric_limits<size_t>::max());
+
+ /// @brief Number of bytes to read before abortion. If zero, abort immediately.
+ FLUENT_FIELD_DEFAULT(size_t, LengthLimit, 0);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class IAbortableHttpResponse
+ : public TIntrusiveListItem<IAbortableHttpResponse>
+{
+public:
+ virtual void Abort() = 0;
+ virtual const TString& GetUrl() const = 0;
+ virtual bool IsAborted() const = 0;
+ virtual void SetLengthLimit(size_t limit) = 0;
+
+ virtual ~IAbortableHttpResponse() = default;
+};
+
+class TAbortableHttpResponseBase
+ : public IAbortableHttpResponse
+{
+public:
+ TAbortableHttpResponseBase(const TString& url);
+ ~TAbortableHttpResponseBase();
+
+ void Abort() override;
+ const TString& GetUrl() const override;
+ bool IsAborted() const override;
+ void SetLengthLimit(size_t limit) override;
+
+protected:
+ TString Url_;
+ std::atomic<bool> Aborted_ = {false};
+ size_t LengthLimit_ = std::numeric_limits<size_t>::max();
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @brief Stream wrapper for @ref NYT::NHttpClient::TCoreHttpResponse with possibility to emulate errors.
+class TAbortableCoreHttpResponse
+ : public IInputStream
+ , public TAbortableHttpResponseBase
+{
+public:
+ TAbortableCoreHttpResponse(
+ std::unique_ptr<IInputStream> stream,
+ const TString& url);
+
+private:
+ size_t DoRead(void* buf, size_t len) override;
+ size_t DoSkip(size_t len) override;
+
+private:
+ std::unique_ptr<IInputStream> Stream_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @brief Class extends @ref NYT::THttpResponse with possibility to emulate errors.
+class TAbortableHttpResponse
+ : public THttpResponse
+ , public TAbortableHttpResponseBase
+{
+public:
+ class TOutage
+ {
+ public:
+ TOutage(TString urlPattern, TAbortableHttpResponseRegistry& registry, const TOutageOptions& options);
+ TOutage(TOutage&&) = default;
+ TOutage(const TOutage&) = delete;
+ ~TOutage();
+
+ void Stop();
+
+ private:
+ TString UrlPattern_;
+ TAbortableHttpResponseRegistry& Registry_;
+ TOutageId Id_;
+ bool Stopped_ = false;
+ };
+
+public:
+ TAbortableHttpResponse(
+ IInputStream* socketStream,
+ const TString& requestId,
+ const TString& hostName,
+ const TString& url);
+
+ /// @brief Abort any responses which match `urlPattern` (i.e. contain it in url).
+ ///
+ /// @return number of aborted responses.
+ static int AbortAll(const TString& urlPattern);
+
+ /// @brief Start outage. Future responses which match `urlPattern` (i.e. contain it in url) will fail.
+ ///
+ /// @return outage object controlling the lifetime of outage (outage stops when object is destroyed)
+ [[nodiscard]] static TOutage StartOutage(
+ const TString& urlPattern,
+ const TOutageOptions& options = TOutageOptions());
+
+ /// @brief Start outage. Future `responseCount` responses which match `urlPattern` (i.e. contain it in url) will fail.
+ ///
+ /// @return outage object controlling the lifetime of outage (outage stops when object is destroyed)
+ [[nodiscard]] static TOutage StartOutage(
+ const TString& urlPattern,
+ size_t responseCount);
+
+private:
+ size_t DoRead(void* buf, size_t len) override;
+ size_t DoSkip(size_t len) override;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/http/context.cpp b/yt/cpp/mapreduce/http/context.cpp
new file mode 100644
index 0000000000..1c016263c5
--- /dev/null
+++ b/yt/cpp/mapreduce/http/context.cpp
@@ -0,0 +1,25 @@
+#include "context.h"
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+bool operator==(const TClientContext& lhs, const TClientContext& rhs)
+{
+ return lhs.ServerName == rhs.ServerName &&
+ lhs.Token == rhs.Token &&
+ lhs.ImpersonationUser == rhs.ImpersonationUser &&
+ lhs.ServiceTicketAuth == rhs.ServiceTicketAuth &&
+ lhs.HttpClient == rhs.HttpClient &&
+ lhs.UseTLS == rhs.UseTLS &&
+ lhs.TvmOnly == rhs.TvmOnly;
+}
+
+bool operator!=(const TClientContext& lhs, const TClientContext& rhs)
+{
+ return !(rhs == lhs);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/http/context.h b/yt/cpp/mapreduce/http/context.h
new file mode 100644
index 0000000000..3926373e17
--- /dev/null
+++ b/yt/cpp/mapreduce/http/context.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include "fwd.h"
+
+#include <yt/cpp/mapreduce/interface/common.h>
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/public.h>
+
+
+namespace NYT {
+
+///////////////////////////////////////////////////////////////////////////////
+
+struct TClientContext
+{
+ TString ServerName;
+ TString Token;
+ TMaybe<TString> ImpersonationUser;
+ NAuth::IServiceTicketAuthPtrWrapperPtr ServiceTicketAuth;
+ NHttpClient::IHttpClientPtr HttpClient;
+ bool TvmOnly = false;
+ bool UseTLS = false;
+ TConfigPtr Config = TConfig::Get();
+};
+
+bool operator==(const TClientContext& lhs, const TClientContext& rhs);
+bool operator!=(const TClientContext& lhs, const TClientContext& rhs);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/http/core.h b/yt/cpp/mapreduce/http/core.h
new file mode 100644
index 0000000000..37c74d7551
--- /dev/null
+++ b/yt/cpp/mapreduce/http/core.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <yt/yt/core/http/public.h>
+
+#include <memory>
+
+namespace NYT::NHttp {
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @brief Wrapper for THeaderPtr which allows to hide NYT::IntrusivePtr from interfaces.
+struct THeadersPtrWrapper
+{
+ THeadersPtrWrapper(THeadersPtr ptr)
+ : Ptr(std::make_shared<THeadersPtr>(std::move(ptr)))
+ { }
+
+ THeadersPtr Get() {
+ return *Ptr;
+ }
+
+ std::shared_ptr<THeadersPtr> Ptr;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NHttp
diff --git a/yt/cpp/mapreduce/http/fwd.h b/yt/cpp/mapreduce/http/fwd.h
new file mode 100644
index 0000000000..62891731f6
--- /dev/null
+++ b/yt/cpp/mapreduce/http/fwd.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <memory>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TClientContext;
+class THttpHeader;
+
+namespace NHttpClient {
+
+class IHttpClient;
+class IHttpRequest;
+class IHttpResponse;
+
+using IHttpClientPtr = std::shared_ptr<IHttpClient>;
+using IHttpResponsePtr = std::unique_ptr<IHttpResponse>;
+using IHttpRequestPtr = std::unique_ptr<IHttpRequest>;
+
+} // namespace NHttpClient
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/http/helpers.cpp b/yt/cpp/mapreduce/http/helpers.cpp
new file mode 100644
index 0000000000..233a565f20
--- /dev/null
+++ b/yt/cpp/mapreduce/http/helpers.cpp
@@ -0,0 +1,88 @@
+#include "helpers.h"
+
+#include "context.h"
+#include "requests.h"
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+namespace NYT {
+
+///////////////////////////////////////////////////////////////////////////////
+
+TString CreateHostNameWithPort(const TString& hostName, const TClientContext& context)
+{
+ static constexpr int HttpProxyPort = 80;
+ static constexpr int HttpsProxyPort = 443;
+
+ static constexpr int TvmOnlyHttpProxyPort = 9026;
+ static constexpr int TvmOnlyHttpsProxyPort = 9443;
+
+ if (hostName.find(':') == TString::npos) {
+ int port;
+ if (context.TvmOnly) {
+ port = context.UseTLS
+ ? TvmOnlyHttpsProxyPort
+ : TvmOnlyHttpProxyPort;
+ } else {
+ port = context.UseTLS
+ ? HttpsProxyPort
+ : HttpProxyPort;
+ }
+ return Format("%v:%v", hostName, port);
+ }
+ return hostName;
+}
+
+TString GetFullUrl(const TString& hostName, const TClientContext& context, THttpHeader& header)
+{
+ Y_UNUSED(context);
+ return Format("http://%v%v", hostName, header.GetUrl());
+}
+
+static TString GetParametersDebugString(const THttpHeader& header)
+{
+ const auto& parameters = header.GetParameters();
+ if (parameters.Empty()) {
+ return "<empty>";
+ } else {
+ return NodeToYsonString(parameters);
+ }
+}
+
+TString TruncateForLogs(const TString& text, size_t maxSize)
+{
+ Y_VERIFY(maxSize > 10);
+ if (text.empty()) {
+ static TString empty = "empty";
+ return empty;
+ } else if (text.size() > maxSize) {
+ TStringStream out;
+ out << text.substr(0, maxSize) + "... (" << text.size() << " bytes total)";
+ return out.Str();
+ } else {
+ return text;
+ }
+}
+
+TString GetLoggedAttributes(const THttpHeader& header, const TString& url, bool includeParameters, size_t sizeLimit)
+{
+ const auto parametersDebugString = GetParametersDebugString(header);
+ TStringStream out;
+ out << "Method: " << url << "; "
+ << "X-YT-Parameters (sent in " << (includeParameters ? "header" : "body") << "): " << TruncateForLogs(parametersDebugString, sizeLimit);
+ return out.Str();
+}
+
+void LogRequest(const THttpHeader& header, const TString& url, bool includeParameters, const TString& requestId, const TString& hostName)
+{
+ YT_LOG_DEBUG("REQ %v - sending request (HostName: %v; %v)",
+ requestId,
+ hostName,
+ GetLoggedAttributes(header, url, includeParameters, Max<size_t>()));
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/http/helpers.h b/yt/cpp/mapreduce/http/helpers.h
new file mode 100644
index 0000000000..0c510fa2e8
--- /dev/null
+++ b/yt/cpp/mapreduce/http/helpers.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "fwd.h"
+
+#include "http.h"
+
+#include <util/generic/fwd.h>
+
+namespace NYT {
+
+///////////////////////////////////////////////////////////////////////////////
+
+TString CreateHostNameWithPort(const TString& name, const TClientContext& context);
+
+TString GetFullUrl(const TString& hostName, const TClientContext& context, THttpHeader& header);
+
+TString TruncateForLogs(const TString& text, size_t maxSize);
+
+TString GetLoggedAttributes(const THttpHeader& header, const TString& url, bool includeParameters, size_t sizeLimit);
+
+void LogRequest(const THttpHeader& header, const TString& url, bool includeParameters, const TString& requestId, const TString& hostName);
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/http/host_manager.cpp b/yt/cpp/mapreduce/http/host_manager.cpp
new file mode 100644
index 0000000000..a239dde769
--- /dev/null
+++ b/yt/cpp/mapreduce/http/host_manager.cpp
@@ -0,0 +1,140 @@
+#include "host_manager.h"
+
+#include "context.h"
+#include "helpers.h"
+#include "http.h"
+#include "http_client.h"
+#include "requests.h"
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+
+#include <library/cpp/json/json_reader.h>
+
+#include <util/generic/guid.h>
+#include <util/generic/vector.h>
+#include <util/generic/singleton.h>
+#include <util/generic/ymath.h>
+
+#include <util/random/random.h>
+
+#include <util/string/vector.h>
+
+namespace NYT::NPrivate {
+
+////////////////////////////////////////////////////////////////////////////////
+
+static TVector<TString> ParseJsonStringArray(const TString& response)
+{
+ NJson::TJsonValue value;
+ TStringInput input(response);
+ NJson::ReadJsonTree(&input, &value);
+
+ const NJson::TJsonValue::TArray& array = value.GetArray();
+ TVector<TString> result;
+ result.reserve(array.size());
+ for (size_t i = 0; i < array.size(); ++i) {
+ result.push_back(array[i].GetString());
+ }
+ return result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class THostManager::TClusterHostList
+{
+public:
+ explicit TClusterHostList(TVector<TString> hosts)
+ : Hosts_(std::move(hosts))
+ , Timestamp_(TInstant::Now())
+ { }
+
+ explicit TClusterHostList(std::exception_ptr error)
+ : Error_(std::move(error))
+ , Timestamp_(TInstant::Now())
+ { }
+
+ TString ChooseHostOrThrow() const
+ {
+ if (Error_) {
+ std::rethrow_exception(Error_);
+ }
+
+ if (Hosts_.empty()) {
+ ythrow yexception() << "fetched list of proxies is empty";
+ }
+
+ return Hosts_[RandomNumber<size_t>(Hosts_.size())];
+ }
+
+ TDuration GetAge() const
+ {
+ return TInstant::Now() - Timestamp_;
+ }
+
+private:
+ TVector<TString> Hosts_;
+ std::exception_ptr Error_;
+ TInstant Timestamp_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+THostManager& THostManager::Get()
+{
+ return *Singleton<THostManager>();
+}
+
+void THostManager::Reset()
+{
+ auto guard = Guard(Lock_);
+ ClusterHosts_.clear();
+}
+
+TString THostManager::GetProxyForHeavyRequest(const TClientContext& context)
+{
+ auto cluster = context.ServerName;
+ {
+ auto guard = Guard(Lock_);
+ auto it = ClusterHosts_.find(cluster);
+ if (it != ClusterHosts_.end() && it->second.GetAge() < context.Config->HostListUpdateInterval) {
+ return it->second.ChooseHostOrThrow();
+ }
+ }
+
+ auto hostList = GetHosts(context);
+ auto result = hostList.ChooseHostOrThrow();
+ {
+ auto guard = Guard(Lock_);
+ ClusterHosts_.emplace(cluster, std::move(hostList));
+ }
+ return result;
+}
+
+THostManager::TClusterHostList THostManager::GetHosts(const TClientContext& context)
+{
+ TString hostsEndpoint = context.Config->Hosts;
+ while (hostsEndpoint.StartsWith("/")) {
+ hostsEndpoint = hostsEndpoint.substr(1);
+ }
+ THttpHeader header("GET", hostsEndpoint, false);
+
+ try {
+ auto hostName = context.ServerName;
+ auto requestId = CreateGuidAsString();
+ // TODO: we need to set socket timeout here
+ auto response = context.HttpClient->Request(GetFullUrl(hostName, context, header), requestId, header);
+ auto hosts = ParseJsonStringArray(response->GetResponse());
+ for (auto& host : hosts) {
+ host = CreateHostNameWithPort(host, context);
+ }
+ return TClusterHostList(std::move(hosts));
+ } catch (const std::exception& e) {
+ return TClusterHostList(std::current_exception());
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NPrivate
diff --git a/yt/cpp/mapreduce/http/host_manager.h b/yt/cpp/mapreduce/http/host_manager.h
new file mode 100644
index 0000000000..fdbb740566
--- /dev/null
+++ b/yt/cpp/mapreduce/http/host_manager.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "fwd.h"
+
+#include <util/generic/string.h>
+#include <util/generic/hash.h>
+#include <util/system/spinlock.h>
+
+
+namespace NYT::NPrivate {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class THostManager
+{
+public:
+ static THostManager& Get();
+
+ TString GetProxyForHeavyRequest(const TClientContext& context);
+
+ // For testing purposes only.
+ void Reset();
+
+private:
+ class TClusterHostList;
+
+private:
+ TAdaptiveLock Lock_;
+ THashMap<TString, TClusterHostList> ClusterHosts_;
+
+private:
+ static TClusterHostList GetHosts(const TClientContext& context);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NPrivate
diff --git a/yt/cpp/mapreduce/http/http.cpp b/yt/cpp/mapreduce/http/http.cpp
new file mode 100644
index 0000000000..d44b2638a0
--- /dev/null
+++ b/yt/cpp/mapreduce/http/http.cpp
@@ -0,0 +1,1014 @@
+#include "http.h"
+
+#include "abortable_http_response.h"
+#include "core.h"
+#include "helpers.h"
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/errors.h>
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <yt/yt/core/http/http.h>
+
+#include <library/cpp/json/json_writer.h>
+
+#include <library/cpp/string_utils/base64/base64.h>
+#include <library/cpp/string_utils/quote/quote.h>
+
+#include <util/generic/singleton.h>
+#include <util/generic/algorithm.h>
+
+#include <util/stream/mem.h>
+
+#include <util/string/builder.h>
+#include <util/string/cast.h>
+#include <util/string/escape.h>
+#include <util/string/printf.h>
+
+#include <util/system/byteorder.h>
+#include <util/system/getpid.h>
+
+#include <exception>
+
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class THttpRequest::TRequestStream
+ : public IOutputStream
+{
+public:
+ TRequestStream(THttpRequest* httpRequest, const TSocket& s)
+ : HttpRequest_(httpRequest)
+ , SocketOutput_(s)
+ , HttpOutput_(static_cast<IOutputStream*>(&SocketOutput_))
+ {
+ HttpOutput_.EnableKeepAlive(true);
+ }
+
+private:
+ void DoWrite(const void* buf, size_t len) override
+ {
+ WrapWriteFunc([&] {
+ HttpOutput_.Write(buf, len);
+ });
+ }
+
+ void DoWriteV(const TPart* parts, size_t count) override
+ {
+ WrapWriteFunc([&] {
+ HttpOutput_.Write(parts, count);
+ });
+ }
+
+ void DoWriteC(char ch) override
+ {
+ WrapWriteFunc([&] {
+ HttpOutput_.Write(ch);
+ });
+ }
+
+ void DoFlush() override
+ {
+ WrapWriteFunc([&] {
+ HttpOutput_.Flush();
+ });
+ }
+
+ void DoFinish() override
+ {
+ WrapWriteFunc([&] {
+ HttpOutput_.Finish();
+ });
+ }
+
+ void WrapWriteFunc(std::function<void()> func)
+ {
+ CheckErrorState();
+ try {
+ func();
+ } catch (const std::exception&) {
+ HandleWriteException();
+ }
+ }
+
+ // In many cases http proxy stops reading request and resets connection
+ // if error has happend. This function tries to read error response
+ // in such cases.
+ void HandleWriteException() {
+ Y_VERIFY(WriteError_ == nullptr);
+ WriteError_ = std::current_exception();
+ Y_VERIFY(WriteError_ != nullptr);
+ try {
+ HttpRequest_->GetResponseStream();
+ } catch (const TErrorResponse &) {
+ throw;
+ } catch (...) {
+ }
+ std::rethrow_exception(WriteError_);
+ }
+
+ void CheckErrorState()
+ {
+ if (WriteError_) {
+ std::rethrow_exception(WriteError_);
+ }
+ }
+
+private:
+ THttpRequest* const HttpRequest_;
+ TSocketOutput SocketOutput_;
+ THttpOutput HttpOutput_;
+ std::exception_ptr WriteError_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+THttpHeader::THttpHeader(const TString& method, const TString& command, bool isApi)
+ : Method(method)
+ , Command(command)
+ , IsApi(isApi)
+{ }
+
+void THttpHeader::AddParameter(const TString& key, TNode value, bool overwrite)
+{
+ auto it = Parameters.find(key);
+ if (it == Parameters.end()) {
+ Parameters.emplace(key, std::move(value));
+ } else {
+ if (overwrite) {
+ it->second = std::move(value);
+ } else {
+ ythrow yexception() << "Duplicate key: " << key;
+ }
+ }
+}
+
+void THttpHeader::MergeParameters(const TNode& newParameters, bool overwrite)
+{
+ for (const auto& p : newParameters.AsMap()) {
+ AddParameter(p.first, p.second, overwrite);
+ }
+}
+
+void THttpHeader::RemoveParameter(const TString& key)
+{
+ Parameters.erase(key);
+}
+
+TNode THttpHeader::GetParameters() const
+{
+ return Parameters;
+}
+
+void THttpHeader::AddTransactionId(const TTransactionId& transactionId, bool overwrite)
+{
+ if (transactionId) {
+ AddParameter("transaction_id", GetGuidAsString(transactionId), overwrite);
+ } else {
+ RemoveParameter("transaction_id");
+ }
+}
+
+void THttpHeader::AddPath(const TString& path, bool overwrite)
+{
+ AddParameter("path", path, overwrite);
+}
+
+void THttpHeader::AddOperationId(const TOperationId& operationId, bool overwrite)
+{
+ AddParameter("operation_id", GetGuidAsString(operationId), overwrite);
+}
+
+void THttpHeader::AddMutationId()
+{
+ TGUID guid;
+
+ // Some users use `fork()' with yt wrapper
+ // (actually they use python + multiprocessing)
+ // and CreateGuid is not resistant to `fork()', so spice it a little bit.
+ //
+ // Check IGNIETFERRO-610
+ CreateGuid(&guid);
+ guid.dw[2] = GetPID() ^ MicroSeconds();
+
+ AddParameter("mutation_id", GetGuidAsString(guid), true);
+}
+
+bool THttpHeader::HasMutationId() const
+{
+ return Parameters.contains("mutation_id");
+}
+
+void THttpHeader::SetToken(const TString& token)
+{
+ Token = token;
+}
+
+void THttpHeader::SetImpersonationUser(const TString& impersonationUser)
+{
+ ImpersonationUser = impersonationUser;
+}
+
+void THttpHeader::SetServiceTicket(const TString& ticket)
+{
+ ServiceTicket = ticket;
+}
+
+void THttpHeader::SetInputFormat(const TMaybe<TFormat>& format)
+{
+ InputFormat = format;
+}
+
+void THttpHeader::SetOutputFormat(const TMaybe<TFormat>& format)
+{
+ OutputFormat = format;
+}
+
+TMaybe<TFormat> THttpHeader::GetOutputFormat() const
+{
+ return OutputFormat;
+}
+
+void THttpHeader::SetRequestCompression(const TString& compression)
+{
+ RequestCompression = compression;
+}
+
+void THttpHeader::SetResponseCompression(const TString& compression)
+{
+ ResponseCompression = compression;
+}
+
+TString THttpHeader::GetCommand() const
+{
+ return Command;
+}
+
+TString THttpHeader::GetUrl() const
+{
+ TStringStream url;
+
+ if (IsApi) {
+ url << "/api/" << TConfig::Get()->ApiVersion << "/" << Command;
+ } else {
+ url << "/" << Command;
+ }
+
+ return url.Str();
+}
+
+bool THttpHeader::ShouldAcceptFraming() const
+{
+ return TConfig::Get()->CommandsWithFraming.contains(Command);
+}
+
+TString THttpHeader::GetHeaderAsString(const TString& hostName, const TString& requestId, bool includeParameters) const
+{
+ TStringStream result;
+
+ result << Method << " " << GetUrl() << " HTTP/1.1\r\n";
+
+ GetHeader(hostName, requestId, includeParameters).Get()->WriteTo(&result);
+
+ if (ShouldAcceptFraming()) {
+ result << "X-YT-Accept-Framing: 1\r\n";
+ }
+
+ result << "\r\n";
+
+ return result.Str();
+}
+
+NHttp::THeadersPtrWrapper THttpHeader::GetHeader(const TString& hostName, const TString& requestId, bool includeParameters) const
+{
+ auto headers = New<NHttp::THeaders>();
+
+ headers->Add("Host", hostName);
+ headers->Add("User-Agent", TProcessState::Get()->ClientVersion);
+
+ if (!Token.empty()) {
+ headers->Add("Authorization", "OAuth " + Token);
+ }
+ if (!ServiceTicket.empty()) {
+ headers->Add("X-Ya-Service-Ticket", ServiceTicket);
+ }
+ if (!ImpersonationUser.empty()) {
+ headers->Add("X-Yt-User-Name", ImpersonationUser);
+ }
+
+ if (Method == "PUT" || Method == "POST") {
+ headers->Add("Transfer-Encoding", "chunked");
+ }
+
+ headers->Add("X-YT-Correlation-Id", requestId);
+ headers->Add("X-YT-Header-Format", "<format=text>yson");
+
+ headers->Add("Content-Encoding", RequestCompression);
+ headers->Add("Accept-Encoding", ResponseCompression);
+
+ auto printYTHeader = [&headers] (const char* headerName, const TString& value) {
+ static const size_t maxHttpHeaderSize = 64 << 10;
+ if (!value) {
+ return;
+ }
+ if (value.size() <= maxHttpHeaderSize) {
+ headers->Add(headerName, value);
+ return;
+ }
+
+ TString encoded;
+ Base64Encode(value, encoded);
+ auto ptr = encoded.data();
+ auto finish = encoded.data() + encoded.size();
+ size_t index = 0;
+ do {
+ auto end = Min(ptr + maxHttpHeaderSize, finish);
+ headers->Add(Format("%v%v", headerName, index++), TString(ptr, end));
+ ptr = end;
+ } while (ptr != finish);
+ };
+
+ if (InputFormat) {
+ printYTHeader("X-YT-Input-Format", NodeToYsonString(InputFormat->Config));
+ }
+ if (OutputFormat) {
+ printYTHeader("X-YT-Output-Format", NodeToYsonString(OutputFormat->Config));
+ }
+ if (includeParameters) {
+ printYTHeader("X-YT-Parameters", NodeToYsonString(Parameters));
+ }
+
+ return NHttp::THeadersPtrWrapper(std::move(headers));
+}
+
+const TString& THttpHeader::GetMethod() const
+{
+ return Method;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TAddressCache* TAddressCache::Get()
+{
+ return Singleton<TAddressCache>();
+}
+
+bool ContainsAddressOfRequiredVersion(const TAddressCache::TAddressPtr& address)
+{
+ if (!TConfig::Get()->ForceIpV4 && !TConfig::Get()->ForceIpV6) {
+ return true;
+ }
+
+ for (auto i = address->Begin(); i != address->End(); ++i) {
+ const auto& addressInfo = *i;
+ if (TConfig::Get()->ForceIpV4 && addressInfo.ai_family == AF_INET) {
+ return true;
+ }
+ if (TConfig::Get()->ForceIpV6 && addressInfo.ai_family == AF_INET6) {
+ return true;
+ }
+ }
+ return false;
+}
+
+TAddressCache::TAddressPtr TAddressCache::Resolve(const TString& hostName)
+{
+ auto address = FindAddress(hostName);
+ if (address) {
+ return address;
+ }
+
+ TString host(hostName);
+ ui16 port = 80;
+
+ auto colon = hostName.find(':');
+ if (colon != TString::npos) {
+ port = FromString<ui16>(hostName.substr(colon + 1));
+ host = hostName.substr(0, colon);
+ }
+
+ auto retryPolicy = CreateDefaultRequestRetryPolicy(TConfig::Get());
+ auto error = yexception() << "can not resolve address of required version for host " << hostName;
+ while (true) {
+ address = new TNetworkAddress(host, port);
+ if (ContainsAddressOfRequiredVersion(address)) {
+ break;
+ }
+ retryPolicy->NotifyNewAttempt();
+ YT_LOG_DEBUG("Failed to resolve address of required version for host %v, retrying: %v",
+ hostName,
+ retryPolicy->GetAttemptDescription());
+ if (auto backoffDuration = retryPolicy->OnGenericError(error)) {
+ NDetail::TWaitProxy::Get()->Sleep(*backoffDuration);
+ } else {
+ ythrow error;
+ }
+ }
+
+ AddAddress(hostName, address);
+ return address;
+}
+
+TAddressCache::TAddressPtr TAddressCache::FindAddress(const TString& hostName) const
+{
+ TCacheEntry entry;
+ {
+ TReadGuard guard(Lock_);
+ auto it = Cache_.find(hostName);
+ if (it == Cache_.end()) {
+ return nullptr;
+ }
+ entry = it->second;
+ }
+
+ if (TInstant::Now() > entry.ExpirationTime) {
+ YT_LOG_DEBUG("Address resolution cache entry for host %v is expired, will retry resolution",
+ hostName);
+ return nullptr;
+ }
+
+ if (!ContainsAddressOfRequiredVersion(entry.Address)) {
+ YT_LOG_DEBUG("Address of required version not found for host %v, will retry resolution",
+ hostName);
+ return nullptr;
+ }
+
+ return entry.Address;
+}
+
+void TAddressCache::AddAddress(TString hostName, TAddressPtr address)
+{
+ auto entry = TCacheEntry{
+ .Address = std::move(address),
+ .ExpirationTime = TInstant::Now() + TConfig::Get()->AddressCacheExpirationTimeout,
+ };
+
+ {
+ TWriteGuard guard(Lock_);
+ Cache_.emplace(std::move(hostName), std::move(entry));
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TConnectionPool* TConnectionPool::Get()
+{
+ return Singleton<TConnectionPool>();
+}
+
+TConnectionPtr TConnectionPool::Connect(
+ const TString& hostName,
+ TDuration socketTimeout)
+{
+ Refresh();
+
+ if (socketTimeout == TDuration::Zero()) {
+ socketTimeout = TConfig::Get()->SocketTimeout;
+ }
+
+ {
+ auto guard = Guard(Lock_);
+ auto now = TInstant::Now();
+ auto range = Connections_.equal_range(hostName);
+ for (auto it = range.first; it != range.second; ++it) {
+ auto& connection = it->second;
+ if (connection->DeadLine < now) {
+ continue;
+ }
+ if (!AtomicCas(&connection->Busy, 1, 0)) {
+ continue;
+ }
+
+ connection->DeadLine = now + socketTimeout;
+ connection->Socket->SetSocketTimeout(socketTimeout.Seconds());
+ return connection;
+ }
+ }
+
+ TConnectionPtr connection(new TConnection);
+
+ auto networkAddress = TAddressCache::Get()->Resolve(hostName);
+ TSocketHolder socket(DoConnect(networkAddress));
+ SetNonBlock(socket, false);
+
+ connection->Socket.Reset(new TSocket(socket.Release()));
+
+ connection->DeadLine = TInstant::Now() + socketTimeout;
+ connection->Socket->SetSocketTimeout(socketTimeout.Seconds());
+
+ {
+ auto guard = Guard(Lock_);
+ static ui32 connectionId = 0;
+ connection->Id = ++connectionId;
+ Connections_.insert({hostName, connection});
+ }
+
+ YT_LOG_DEBUG("New connection to %v #%v opened",
+ hostName,
+ connection->Id);
+
+ return connection;
+}
+
+void TConnectionPool::Release(TConnectionPtr connection)
+{
+ auto socketTimeout = TConfig::Get()->SocketTimeout;
+ auto newDeadline = TInstant::Now() + socketTimeout;
+
+ {
+ auto guard = Guard(Lock_);
+ connection->DeadLine = newDeadline;
+ }
+
+ connection->Socket->SetSocketTimeout(socketTimeout.Seconds());
+ AtomicSet(connection->Busy, 0);
+
+ Refresh();
+}
+
+void TConnectionPool::Invalidate(
+ const TString& hostName,
+ TConnectionPtr connection)
+{
+ auto guard = Guard(Lock_);
+ auto range = Connections_.equal_range(hostName);
+ for (auto it = range.first; it != range.second; ++it) {
+ if (it->second == connection) {
+ YT_LOG_DEBUG("Closing connection #%v",
+ connection->Id);
+ Connections_.erase(it);
+ return;
+ }
+ }
+}
+
+void TConnectionPool::Refresh()
+{
+ auto guard = Guard(Lock_);
+
+ // simple, since we don't expect too many connections
+ using TItem = std::pair<TInstant, TConnectionMap::iterator>;
+ std::vector<TItem> sortedConnections;
+ for (auto it = Connections_.begin(); it != Connections_.end(); ++it) {
+ sortedConnections.emplace_back(it->second->DeadLine, it);
+ }
+
+ std::sort(
+ sortedConnections.begin(),
+ sortedConnections.end(),
+ [] (const TItem& a, const TItem& b) -> bool {
+ return a.first < b.first;
+ });
+
+ auto removeCount = static_cast<int>(Connections_.size()) - TConfig::Get()->ConnectionPoolSize;
+
+ const auto now = TInstant::Now();
+ for (const auto& item : sortedConnections) {
+ const auto& mapIterator = item.second;
+ auto connection = mapIterator->second;
+ if (AtomicGet(connection->Busy)) {
+ continue;
+ }
+
+ if (removeCount > 0) {
+ Connections_.erase(mapIterator);
+ YT_LOG_DEBUG("Closing connection #%v (too many opened connections)",
+ connection->Id);
+ --removeCount;
+ continue;
+ }
+
+ if (connection->DeadLine < now) {
+ Connections_.erase(mapIterator);
+ YT_LOG_DEBUG("Closing connection #%v (timeout)",
+ connection->Id);
+ }
+ }
+}
+
+SOCKET TConnectionPool::DoConnect(TAddressCache::TAddressPtr address)
+{
+ int lastError = 0;
+
+ for (auto i = address->Begin(); i != address->End(); ++i) {
+ struct addrinfo* info = &*i;
+
+ if (TConfig::Get()->ForceIpV4 && info->ai_family != AF_INET) {
+ continue;
+ }
+
+ if (TConfig::Get()->ForceIpV6 && info->ai_family != AF_INET6) {
+ continue;
+ }
+
+ TSocketHolder socket(
+ ::socket(info->ai_family, info->ai_socktype, info->ai_protocol));
+
+ if (socket.Closed()) {
+ lastError = LastSystemError();
+ continue;
+ }
+
+ SetNonBlock(socket, true);
+ if (TConfig::Get()->SocketPriority) {
+ SetSocketPriority(socket, *TConfig::Get()->SocketPriority);
+ }
+
+ if (connect(socket, info->ai_addr, info->ai_addrlen) == 0)
+ return socket.Release();
+
+ int err = LastSystemError();
+ if (err == EINPROGRESS || err == EAGAIN || err == EWOULDBLOCK) {
+ struct pollfd p = {
+ socket,
+ POLLOUT,
+ 0
+ };
+ const ssize_t n = PollD(&p, 1, TInstant::Now() + TConfig::Get()->ConnectTimeout);
+ if (n < 0) {
+ ythrow TSystemError(-(int)n) << "can not connect to " << info;
+ }
+ CheckedGetSockOpt(socket, SOL_SOCKET, SO_ERROR, err, "socket error");
+ if (!err)
+ return socket.Release();
+ }
+
+ lastError = err;
+ continue;
+ }
+
+ ythrow TSystemError(lastError) << "can not connect to " << *address;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+static TMaybe<TString> GetProxyName(const THttpInput& input)
+{
+ if (auto proxyHeader = input.Headers().FindHeader("X-YT-Proxy")) {
+ return proxyHeader->Value();
+ }
+ return Nothing();
+}
+
+THttpResponse::THttpResponse(
+ IInputStream* socketStream,
+ const TString& requestId,
+ const TString& hostName)
+ : HttpInput_(socketStream)
+ , RequestId_(requestId)
+ , HostName_(GetProxyName(HttpInput_).GetOrElse(hostName))
+ , Unframe_(HttpInput_.Headers().HasHeader("X-YT-Framing"))
+{
+ HttpCode_ = ParseHttpRetCode(HttpInput_.FirstLine());
+ if (HttpCode_ == 200 || HttpCode_ == 202) {
+ return;
+ }
+
+ ErrorResponse_ = TErrorResponse(HttpCode_, RequestId_);
+
+ auto logAndSetError = [&] (const TString& rawError) {
+ YT_LOG_ERROR("RSP %v - HTTP %v - %v",
+ RequestId_,
+ HttpCode_,
+ rawError.data());
+ ErrorResponse_->SetRawError(rawError);
+ };
+
+ switch (HttpCode_) {
+ case 429:
+ logAndSetError("request rate limit exceeded");
+ break;
+
+ case 500:
+ logAndSetError(::TStringBuilder() << "internal error in proxy " << HostName_);
+ break;
+
+ default: {
+ TStringStream httpHeaders;
+ httpHeaders << "HTTP headers (";
+ for (const auto& header : HttpInput_.Headers()) {
+ httpHeaders << header.Name() << ": " << header.Value() << "; ";
+ }
+ httpHeaders << ")";
+
+ auto errorString = Sprintf("RSP %s - HTTP %d - %s",
+ RequestId_.data(),
+ HttpCode_,
+ httpHeaders.Str().data());
+
+ YT_LOG_ERROR("%v",
+ errorString.data());
+
+ if (auto parsedResponse = ParseError(HttpInput_.Headers())) {
+ ErrorResponse_ = parsedResponse.GetRef();
+ } else {
+ ErrorResponse_->SetRawError(
+ errorString + " - X-YT-Error is missing in headers");
+ }
+ break;
+ }
+ }
+}
+
+const THttpHeaders& THttpResponse::Headers() const
+{
+ return HttpInput_.Headers();
+}
+
+void THttpResponse::CheckErrorResponse() const
+{
+ if (ErrorResponse_) {
+ throw *ErrorResponse_;
+ }
+}
+
+bool THttpResponse::IsExhausted() const
+{
+ return IsExhausted_;
+}
+
+int THttpResponse::GetHttpCode() const
+{
+ return HttpCode_;
+}
+
+const TString& THttpResponse::GetHostName() const
+{
+ return HostName_;
+}
+
+bool THttpResponse::IsKeepAlive() const
+{
+ return HttpInput_.IsKeepAlive();
+}
+
+TMaybe<TErrorResponse> THttpResponse::ParseError(const THttpHeaders& headers)
+{
+ for (const auto& header : headers) {
+ if (header.Name() == "X-YT-Error") {
+ TErrorResponse errorResponse(HttpCode_, RequestId_);
+ errorResponse.ParseFromJsonError(header.Value());
+ if (errorResponse.IsOk()) {
+ return Nothing();
+ }
+ return errorResponse;
+ }
+ }
+ return Nothing();
+}
+
+size_t THttpResponse::DoRead(void* buf, size_t len)
+{
+ size_t read;
+ if (Unframe_) {
+ read = UnframeRead(buf, len);
+ } else {
+ read = HttpInput_.Read(buf, len);
+ }
+ if (read == 0 && len != 0) {
+ // THttpInput MUST return defined (but may be empty)
+ // trailers when it is exhausted.
+ Y_VERIFY(HttpInput_.Trailers().Defined(),
+ "trailers MUST be defined for exhausted stream");
+ CheckTrailers(HttpInput_.Trailers().GetRef());
+ IsExhausted_ = true;
+ }
+ return read;
+}
+
+size_t THttpResponse::DoSkip(size_t len)
+{
+ size_t skipped;
+ if (Unframe_) {
+ skipped = UnframeSkip(len);
+ } else {
+ skipped = HttpInput_.Skip(len);
+ }
+ if (skipped == 0 && len != 0) {
+ // THttpInput MUST return defined (but may be empty)
+ // trailers when it is exhausted.
+ Y_VERIFY(HttpInput_.Trailers().Defined(),
+ "trailers MUST be defined for exhausted stream");
+ CheckTrailers(HttpInput_.Trailers().GetRef());
+ IsExhausted_ = true;
+ }
+ return skipped;
+}
+
+void THttpResponse::CheckTrailers(const THttpHeaders& trailers)
+{
+ if (auto errorResponse = ParseError(trailers)) {
+ errorResponse->SetIsFromTrailers(true);
+ YT_LOG_ERROR("RSP %v - %v",
+ RequestId_,
+ errorResponse.GetRef().what());
+ ythrow errorResponse.GetRef();
+ }
+}
+
+static ui32 ReadDataFrameSize(THttpInput* stream)
+{
+ ui32 littleEndianSize;
+ auto read = stream->Load(&littleEndianSize, sizeof(littleEndianSize));
+ if (read < sizeof(littleEndianSize)) {
+ ythrow yexception() << "Bad data frame header: " <<
+ "expected " << sizeof(littleEndianSize) << " bytes, got " << read;
+ }
+ return LittleToHost(littleEndianSize);
+}
+
+bool THttpResponse::RefreshFrameIfNecessary()
+{
+ while (RemainingFrameSize_ == 0) {
+ ui8 frameTypeByte;
+ auto read = HttpInput_.Read(&frameTypeByte, sizeof(frameTypeByte));
+ if (read == 0) {
+ return false;
+ }
+ auto frameType = static_cast<EFrameType>(frameTypeByte);
+ switch (frameType) {
+ case EFrameType::KeepAlive:
+ break;
+ case EFrameType::Data:
+ RemainingFrameSize_ = ReadDataFrameSize(&HttpInput_);
+ break;
+ default:
+ ythrow yexception() << "Bad frame type " << static_cast<int>(frameTypeByte);
+ }
+ }
+ return true;
+}
+
+size_t THttpResponse::UnframeRead(void* buf, size_t len)
+{
+ if (!RefreshFrameIfNecessary()) {
+ return 0;
+ }
+ auto read = HttpInput_.Read(buf, Min(len, RemainingFrameSize_));
+ RemainingFrameSize_ -= read;
+ return read;
+}
+
+size_t THttpResponse::UnframeSkip(size_t len)
+{
+ if (!RefreshFrameIfNecessary()) {
+ return 0;
+ }
+ auto skipped = HttpInput_.Skip(Min(len, RemainingFrameSize_));
+ RemainingFrameSize_ -= skipped;
+ return skipped;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+THttpRequest::THttpRequest()
+{
+ RequestId = CreateGuidAsString();
+}
+
+THttpRequest::THttpRequest(const TString& requestId)
+ : RequestId(requestId)
+{ }
+
+THttpRequest::~THttpRequest()
+{
+ if (!Connection) {
+ return;
+ }
+
+ if (Input && Input->IsKeepAlive() && Input->IsExhausted()) {
+ // We should return to the pool only connections where HTTP response was fully read.
+ // Otherwise next reader might read our remaining data and misinterpret them (YT-6510).
+ TConnectionPool::Get()->Release(Connection);
+ } else {
+ TConnectionPool::Get()->Invalidate(HostName, Connection);
+ }
+}
+
+TString THttpRequest::GetRequestId() const
+{
+ return RequestId;
+}
+
+void THttpRequest::Connect(TString hostName, TDuration socketTimeout)
+{
+ HostName = std::move(hostName);
+ YT_LOG_DEBUG("REQ %v - requesting connection to %v from connection pool",
+ RequestId,
+ HostName);
+
+ StartTime_ = TInstant::Now();
+ Connection = TConnectionPool::Get()->Connect(HostName, socketTimeout);
+
+ YT_LOG_DEBUG("REQ %v - connection #%v",
+ RequestId,
+ Connection->Id);
+}
+
+IOutputStream* THttpRequest::StartRequestImpl(const THttpHeader& header, bool includeParameters)
+{
+ auto strHeader = header.GetHeaderAsString(HostName, RequestId, includeParameters);
+ Url_ = header.GetUrl();
+
+ LogRequest(header, Url_, includeParameters, RequestId, HostName);
+
+ LoggedAttributes_ = GetLoggedAttributes(header, Url_, includeParameters, 128);
+
+ auto outputFormat = header.GetOutputFormat();
+ if (outputFormat && outputFormat->IsTextYson()) {
+ LogResponse = true;
+ }
+
+ RequestStream_ = MakeHolder<TRequestStream>(this, *Connection->Socket.Get());
+
+ RequestStream_->Write(strHeader.data(), strHeader.size());
+ return RequestStream_.Get();
+}
+
+IOutputStream* THttpRequest::StartRequest(const THttpHeader& header)
+{
+ return StartRequestImpl(header, true);
+}
+
+void THttpRequest::FinishRequest()
+{
+ RequestStream_->Flush();
+ RequestStream_->Finish();
+}
+
+void THttpRequest::SmallRequest(const THttpHeader& header, TMaybe<TStringBuf> body)
+{
+ if (!body && (header.GetMethod() == "PUT" || header.GetMethod() == "POST")) {
+ const auto& parameters = header.GetParameters();
+ auto parametersStr = NodeToYsonString(parameters);
+ auto* output = StartRequestImpl(header, false);
+ output->Write(parametersStr);
+ FinishRequest();
+ } else {
+ auto* output = StartRequest(header);
+ if (body) {
+ output->Write(*body);
+ }
+ FinishRequest();
+ }
+}
+
+THttpResponse* THttpRequest::GetResponseStream()
+{
+ if (!Input) {
+ SocketInput.Reset(new TSocketInput(*Connection->Socket.Get()));
+ if (TConfig::Get()->UseAbortableResponse) {
+ Y_VERIFY(!Url_.empty());
+ Input.Reset(new TAbortableHttpResponse(SocketInput.Get(), RequestId, HostName, Url_));
+ } else {
+ Input.Reset(new THttpResponse(SocketInput.Get(), RequestId, HostName));
+ }
+ Input->CheckErrorResponse();
+ }
+ return Input.Get();
+}
+
+TString THttpRequest::GetResponse()
+{
+ TString result = GetResponseStream()->ReadAll();
+
+ TStringStream loggedAttributes;
+ loggedAttributes
+ << "Time: " << TInstant::Now() - StartTime_ << "; "
+ << "HostName: " << GetResponseStream()->GetHostName() << "; "
+ << LoggedAttributes_;
+
+ if (LogResponse) {
+ constexpr auto sizeLimit = 1 << 7;
+ YT_LOG_DEBUG("RSP %v - received response (Response: '%v'; %v)",
+ RequestId,
+ TruncateForLogs(result, sizeLimit),
+ loggedAttributes.Str());
+ } else {
+ YT_LOG_DEBUG("RSP %v - received response of %v bytes (%v)",
+ RequestId,
+ result.size(),
+ loggedAttributes.Str());
+ }
+ return result;
+}
+
+int THttpRequest::GetHttpCode() {
+ return GetResponseStream()->GetHttpCode();
+}
+
+void THttpRequest::InvalidateConnection()
+{
+ TConnectionPool::Get()->Invalidate(HostName, Connection);
+ Connection.Reset();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/http/http.h b/yt/cpp/mapreduce/http/http.h
new file mode 100644
index 0000000000..ee8783088d
--- /dev/null
+++ b/yt/cpp/mapreduce/http/http.h
@@ -0,0 +1,256 @@
+#pragma once
+
+#include "fwd.h"
+
+#include <yt/cpp/mapreduce/interface/common.h>
+#include <yt/cpp/mapreduce/interface/errors.h>
+#include <yt/cpp/mapreduce/interface/format.h>
+#include <yt/cpp/mapreduce/interface/io.h>
+#include <yt/cpp/mapreduce/interface/node.h>
+
+#include <library/cpp/deprecated/atomic/atomic.h>
+#include <library/cpp/http/io/stream.h>
+
+#include <util/generic/hash.h>
+#include <util/generic/hash_multi_map.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/guid.h>
+#include <util/network/socket.h>
+#include <util/stream/input.h>
+#include <util/system/mutex.h>
+#include <util/system/rwlock.h>
+#include <util/generic/ptr.h>
+
+namespace NYT {
+
+class TNode;
+
+namespace NHttp {
+
+struct THeadersPtrWrapper;
+
+} // NHttp
+
+///////////////////////////////////////////////////////////////////////////////
+
+enum class EFrameType
+{
+ Data = 0x01,
+ KeepAlive = 0x02,
+};
+
+
+class THttpHeader
+{
+public:
+ THttpHeader(const TString& method, const TString& command, bool isApi = true);
+
+ void AddParameter(const TString& key, TNode value, bool overwrite = false);
+ void RemoveParameter(const TString& key);
+ void MergeParameters(const TNode& parameters, bool overwrite = false);
+ TNode GetParameters() const;
+
+ void AddTransactionId(const TTransactionId& transactionId, bool overwrite = false);
+ void AddPath(const TString& path, bool overwrite = false);
+ void AddOperationId(const TOperationId& operationId, bool overwrite = false);
+ void AddMutationId();
+ bool HasMutationId() const;
+
+ void SetToken(const TString& token);
+ void SetImpersonationUser(const TString& impersonationUser);
+
+ void SetServiceTicket(const TString& ticket);
+
+ void SetInputFormat(const TMaybe<TFormat>& format);
+
+ void SetOutputFormat(const TMaybe<TFormat>& format);
+ TMaybe<TFormat> GetOutputFormat() const;
+
+ void SetRequestCompression(const TString& compression);
+ void SetResponseCompression(const TString& compression);
+
+ TString GetCommand() const;
+ TString GetUrl() const;
+ TString GetHeaderAsString(const TString& hostName, const TString& requestId, bool includeParameters = true) const;
+ NHttp::THeadersPtrWrapper GetHeader(const TString& hostName, const TString& requestId, bool includeParameters) const;
+
+ const TString& GetMethod() const;
+
+private:
+ bool ShouldAcceptFraming() const;
+
+private:
+ const TString Method;
+ const TString Command;
+ const bool IsApi;
+
+ TNode::TMapType Parameters;
+ TString ImpersonationUser;
+ TString Token;
+ TString ServiceTicket;
+ TNode Attributes;
+
+private:
+ TMaybe<TFormat> InputFormat = TFormat::YsonText();
+ TMaybe<TFormat> OutputFormat = TFormat::YsonText();
+
+ TString RequestCompression = "identity";
+ TString ResponseCompression = "identity";
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TAddressCache
+{
+public:
+ using TAddressPtr = TAtomicSharedPtr<TNetworkAddress>;
+
+ static TAddressCache* Get();
+
+ TAddressPtr Resolve(const TString& hostName);
+
+private:
+ struct TCacheEntry {
+ TAddressPtr Address;
+ TInstant ExpirationTime;
+ };
+
+private:
+ TAddressPtr FindAddress(const TString& hostName) const;
+ void AddAddress(TString hostName, TAddressPtr address);
+
+private:
+ TRWMutex Lock_;
+ THashMap<TString, TCacheEntry> Cache_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TConnection
+{
+ THolder<TSocket> Socket;
+ TAtomic Busy = 1;
+ TInstant DeadLine;
+ ui32 Id;
+};
+
+using TConnectionPtr = TAtomicSharedPtr<TConnection>;
+
+class TConnectionPool
+{
+public:
+ using TConnectionMap = THashMultiMap<TString, TConnectionPtr>;
+
+ static TConnectionPool* Get();
+
+ TConnectionPtr Connect(const TString& hostName, TDuration socketTimeout);
+ void Release(TConnectionPtr connection);
+ void Invalidate(const TString& hostName, TConnectionPtr connection);
+
+private:
+ void Refresh();
+ static SOCKET DoConnect(TAddressCache::TAddressPtr address);
+
+private:
+ TMutex Lock_;
+ TConnectionMap Connections_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+//
+// Input stream that handles YT-specific header/trailer errors
+// and throws TErrorResponse if it finds any.
+class THttpResponse
+ : public IInputStream
+{
+public:
+ // 'requestId' and 'hostName' are provided for debug reasons
+ // (they will appear in some error messages).
+ THttpResponse(
+ IInputStream* socketStream,
+ const TString& requestId,
+ const TString& hostName);
+
+ const THttpHeaders& Headers() const;
+
+ void CheckErrorResponse() const;
+ bool IsExhausted() const;
+ int GetHttpCode() const;
+ const TString& GetHostName() const;
+ bool IsKeepAlive() const;
+
+protected:
+ size_t DoRead(void* buf, size_t len) override;
+ size_t DoSkip(size_t len) override;
+
+private:
+ void CheckTrailers(const THttpHeaders& trailers);
+ TMaybe<TErrorResponse> ParseError(const THttpHeaders& headers);
+ size_t UnframeRead(void* buf, size_t len);
+ size_t UnframeSkip(size_t len);
+ bool RefreshFrameIfNecessary();
+
+private:
+ THttpInput HttpInput_;
+ const TString RequestId_;
+ const TString HostName_;
+ int HttpCode_ = 0;
+ TMaybe<TErrorResponse> ErrorResponse_;
+ bool IsExhausted_ = false;
+ const bool Unframe_;
+ size_t RemainingFrameSize_ = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class THttpRequest
+{
+public:
+ THttpRequest();
+ THttpRequest(const TString& requestId);
+ ~THttpRequest();
+
+ TString GetRequestId() const;
+
+ void Connect(TString hostName, TDuration socketTimeout = TDuration::Zero());
+
+ IOutputStream* StartRequest(const THttpHeader& header);
+ void FinishRequest();
+
+ void SmallRequest(const THttpHeader& header, TMaybe<TStringBuf> body);
+
+ THttpResponse* GetResponseStream();
+
+ TString GetResponse();
+
+ void InvalidateConnection();
+
+ int GetHttpCode();
+
+private:
+ IOutputStream* StartRequestImpl(const THttpHeader& header, bool includeParameters);
+
+private:
+ class TRequestStream;
+
+private:
+ TString HostName;
+ TString RequestId;
+ TString Url_;
+ TInstant StartTime_;
+ TString LoggedAttributes_;
+
+ TConnectionPtr Connection;
+
+ THolder<TRequestStream> RequestStream_;
+
+ THolder<TSocketInput> SocketInput;
+ THolder<THttpResponse> Input;
+
+ bool LogResponse = false;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/http/http_client.cpp b/yt/cpp/mapreduce/http/http_client.cpp
new file mode 100644
index 0000000000..a2af1182dc
--- /dev/null
+++ b/yt/cpp/mapreduce/http/http_client.cpp
@@ -0,0 +1,603 @@
+#include "http_client.h"
+
+#include "abortable_http_response.h"
+#include "core.h"
+#include "helpers.h"
+#include "http.h"
+
+#include <yt/cpp/mapreduce/interface/config.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <yt/yt/core/concurrency/thread_pool_poller.h>
+
+#include <yt/yt/core/http/client.h>
+#include <yt/yt/core/http/config.h>
+#include <yt/yt/core/http/http.h>
+
+#include <yt/yt/core/https/client.h>
+#include <yt/yt/core/https/config.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+namespace NYT::NHttpClient {
+
+namespace {
+
+TString CreateHost(TStringBuf host, TStringBuf port)
+{
+ if (!port.empty()) {
+ return Format("%v:%v", host, port);
+ }
+
+ return TString(host);
+}
+
+TMaybe<TErrorResponse> GetErrorResponse(const TString& hostName, const TString& requestId, const NHttp::IResponsePtr& response)
+{
+ auto httpCode = response->GetStatusCode();
+ if (httpCode == NHttp::EStatusCode::OK || httpCode == NHttp::EStatusCode::Accepted) {
+ return {};
+ }
+
+ TErrorResponse errorResponse(static_cast<int>(httpCode), requestId);
+
+ auto logAndSetError = [&] (const TString& rawError) {
+ YT_LOG_ERROR("RSP %v - HTTP %v - %v",
+ requestId,
+ httpCode,
+ rawError.data());
+ errorResponse.SetRawError(rawError);
+ };
+
+ switch (httpCode) {
+ case NHttp::EStatusCode::TooManyRequests:
+ logAndSetError("request rate limit exceeded");
+ break;
+
+ case NHttp::EStatusCode::InternalServerError:
+ logAndSetError("internal error in proxy " + hostName);
+ break;
+
+ default: {
+ TStringStream httpHeaders;
+ httpHeaders << "HTTP headers (";
+ for (const auto& [headerName, headerValue] : response->GetHeaders()->Dump()) {
+ httpHeaders << headerName << ": " << headerValue << "; ";
+ }
+ httpHeaders << ")";
+
+ auto errorString = Sprintf("RSP %s - HTTP %d - %s",
+ requestId.data(),
+ static_cast<int>(httpCode),
+ httpHeaders.Str().data());
+
+ YT_LOG_ERROR("%v",
+ errorString.data());
+
+ if (auto errorHeader = response->GetHeaders()->Find("X-YT-Error")) {
+ errorResponse.ParseFromJsonError(*errorHeader);
+ if (errorResponse.IsOk()) {
+ return Nothing();
+ }
+ return errorResponse;
+ }
+
+ errorResponse.SetRawError(
+ errorString + " - X-YT-Error is missing in headers");
+ break;
+ }
+ }
+
+ return errorResponse;
+}
+
+void CheckErrorResponse(const TString& hostName, const TString& requestId, const NHttp::IResponsePtr& response)
+{
+ auto errorResponse = GetErrorResponse(hostName, requestId, response);
+ if (errorResponse) {
+ throw *errorResponse;
+ }
+}
+
+} // namespace
+
+///////////////////////////////////////////////////////////////////////////////
+
+class TDefaultHttpResponse
+ : public IHttpResponse
+{
+public:
+ TDefaultHttpResponse(std::unique_ptr<THttpRequest> request)
+ : Request_(std::move(request))
+ { }
+
+ int GetStatusCode() override
+ {
+ return Request_->GetHttpCode();
+ }
+
+ IInputStream* GetResponseStream() override
+ {
+ return Request_->GetResponseStream();
+ }
+
+ TString GetResponse() override
+ {
+ return Request_->GetResponse();
+ }
+
+ TString GetRequestId() const override
+ {
+ return Request_->GetRequestId();
+ }
+
+private:
+ std::unique_ptr<THttpRequest> Request_;
+};
+
+class TDefaultHttpRequest
+ : public IHttpRequest
+{
+public:
+ TDefaultHttpRequest(std::unique_ptr<THttpRequest> request, IOutputStream* stream)
+ : Request_(std::move(request))
+ , Stream_(stream)
+ { }
+
+ IOutputStream* GetStream() override
+ {
+ return Stream_;
+ }
+
+ IHttpResponsePtr Finish() override
+ {
+ Request_->FinishRequest();
+ return std::make_unique<TDefaultHttpResponse>(std::move(Request_));
+ }
+
+private:
+ std::unique_ptr<THttpRequest> Request_;
+ IOutputStream* Stream_;
+};
+
+class TDefaultHttpClient
+ : public IHttpClient
+{
+public:
+ IHttpResponsePtr Request(const TString& url, const TString& requestId, const THttpConfig& config, const THttpHeader& header, TMaybe<TStringBuf> body) override
+ {
+ auto request = std::make_unique<THttpRequest>(requestId);
+
+ auto urlRef = NHttp::ParseUrl(url);
+
+ request->Connect(CreateHost(urlRef.Host, urlRef.PortStr), config.SocketTimeout);
+ request->SmallRequest(header, body);
+ return std::make_unique<TDefaultHttpResponse>(std::move(request));
+ }
+
+ IHttpRequestPtr StartRequest(const TString& url, const TString& requestId, const THttpConfig& config, const THttpHeader& header) override
+ {
+ auto request = std::make_unique<THttpRequest>(requestId);
+
+ auto urlRef = NHttp::ParseUrl(url);
+
+ request->Connect(CreateHost(urlRef.Host, urlRef.PortStr), config.SocketTimeout);
+ auto stream = request->StartRequest(header);
+ return std::make_unique<TDefaultHttpRequest>(std::move(request), stream);
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+struct TCoreRequestContext
+{
+ TString HostName;
+ TString Url;
+ TString RequestId;
+ bool LogResponse;
+ TInstant StartTime;
+ TString LoggedAttributes;
+};
+
+class TCoreHttpResponse
+ : public IHttpResponse
+{
+public:
+ TCoreHttpResponse(
+ TCoreRequestContext context,
+ NHttp::IResponsePtr response)
+ : Context_(std::move(context))
+ , Response_(std::move(response))
+ { }
+
+ int GetStatusCode() override
+ {
+ return static_cast<int>(Response_->GetStatusCode());
+ }
+
+ IInputStream* GetResponseStream() override
+ {
+ if (!Stream_) {
+ auto stream = std::make_unique<TWrappedStream>(
+ NConcurrency::CreateSyncAdapter(NConcurrency::CreateCopyingAdapter(Response_), NConcurrency::EWaitForStrategy::WaitFor),
+ Response_,
+ Context_.RequestId);
+ CheckErrorResponse(Context_.HostName, Context_.RequestId, Response_);
+
+ if (TConfig::Get()->UseAbortableResponse) {
+ Y_VERIFY(!Context_.Url.empty());
+ Stream_ = std::make_unique<TAbortableCoreHttpResponse>(std::move(stream), Context_.Url);
+ } else {
+ Stream_ = std::move(stream);
+ }
+ }
+
+ return Stream_.get();
+ }
+
+ TString GetResponse() override
+ {
+ auto result = GetResponseStream()->ReadAll();
+
+ TStringStream loggedAttributes;
+ loggedAttributes
+ << "Time: " << TInstant::Now() - Context_.StartTime << "; "
+ << "HostName: " << Context_.HostName << "; "
+ << Context_.LoggedAttributes;
+
+ if (Context_.LogResponse) {
+ constexpr auto sizeLimit = 1 << 7;
+ YT_LOG_DEBUG("RSP %v - received response (Response: '%v'; %v)",
+ Context_.RequestId,
+ TruncateForLogs(result, sizeLimit),
+ loggedAttributes.Str());
+ } else {
+ YT_LOG_DEBUG("RSP %v - received response of %v bytes (%v)",
+ Context_.RequestId,
+ result.size(),
+ loggedAttributes.Str());
+ }
+ return result;
+ }
+
+ TString GetRequestId() const override
+ {
+ return Context_.RequestId;
+ }
+
+private:
+ class TWrappedStream
+ : public IInputStream
+ {
+ public:
+ TWrappedStream(std::unique_ptr<IInputStream> underlying, NHttp::IResponsePtr response, TString requestId)
+ : Underlying_(std::move(underlying))
+ , Response_(std::move(response))
+ , RequestId_(std::move(requestId))
+ { }
+
+ protected:
+ size_t DoRead(void* buf, size_t len) override
+ {
+ size_t read = Underlying_->Read(buf, len);
+
+ if (read == 0 && len != 0) {
+ CheckTrailers(Response_->GetTrailers());
+ }
+ return read;
+ }
+
+ size_t DoSkip(size_t len) override
+ {
+ size_t skipped = Underlying_->Skip(len);
+ if (skipped == 0 && len != 0) {
+ CheckTrailers(Response_->GetTrailers());
+ }
+ return skipped;
+ }
+
+ private:
+ void CheckTrailers(const NHttp::THeadersPtr& trailers)
+ {
+ if (auto errorResponse = ParseError(trailers)) {
+ errorResponse->SetIsFromTrailers(true);
+ YT_LOG_ERROR("RSP %v - %v",
+ RequestId_,
+ errorResponse.GetRef().what());
+ ythrow errorResponse.GetRef();
+ }
+ }
+
+ TMaybe<TErrorResponse> ParseError(const NHttp::THeadersPtr& headers)
+ {
+ if (auto errorHeader = headers->Find("X-YT-Error")) {
+ TErrorResponse errorResponse(static_cast<int>(Response_->GetStatusCode()), RequestId_);
+ errorResponse.ParseFromJsonError(*errorHeader);
+ if (errorResponse.IsOk()) {
+ return Nothing();
+ }
+ return errorResponse;
+ }
+ return Nothing();
+ }
+
+ private:
+ std::unique_ptr<IInputStream> Underlying_;
+ NHttp::IResponsePtr Response_;
+ TString RequestId_;
+ };
+
+private:
+ TCoreRequestContext Context_;
+ NHttp::IResponsePtr Response_;
+ std::unique_ptr<IInputStream> Stream_;
+};
+
+class TCoreHttpRequest
+ : public IHttpRequest
+{
+public:
+ TCoreHttpRequest(TCoreRequestContext context, NHttp::IActiveRequestPtr activeRequest)
+ : Context_(std::move(context))
+ , ActiveRequest_(std::move(activeRequest))
+ , Stream_(NConcurrency::CreateBufferedSyncAdapter(ActiveRequest_->GetRequestStream()))
+ , WrappedStream_(this, Stream_.get())
+ { }
+
+ IOutputStream* GetStream() override
+ {
+ return &WrappedStream_;
+ }
+
+ IHttpResponsePtr Finish() override
+ {
+ WrappedStream_.Flush();
+ auto response = ActiveRequest_->Finish().Get().ValueOrThrow();
+ return std::make_unique<TCoreHttpResponse>(std::move(Context_), std::move(response));
+ }
+
+ IHttpResponsePtr FinishWithError()
+ {
+ auto response = ActiveRequest_->GetResponse();
+ return std::make_unique<TCoreHttpResponse>(std::move(Context_), std::move(response));
+ }
+
+private:
+ class TWrappedStream
+ : public IOutputStream
+ {
+ public:
+ TWrappedStream(TCoreHttpRequest* httpRequest, IOutputStream* underlying)
+ : HttpRequest_(httpRequest)
+ , Underlying_(underlying)
+ { }
+
+ private:
+ void DoWrite(const void* buf, size_t len) override
+ {
+ WrapWriteFunc([&] {
+ Underlying_->Write(buf, len);
+ });
+ }
+
+ void DoWriteV(const TPart* parts, size_t count) override
+ {
+ WrapWriteFunc([&] {
+ Underlying_->Write(parts, count);
+ });
+ }
+
+ void DoWriteC(char ch) override
+ {
+ WrapWriteFunc([&] {
+ Underlying_->Write(ch);
+ });
+ }
+
+ void DoFlush() override
+ {
+ WrapWriteFunc([&] {
+ Underlying_->Flush();
+ });
+ }
+
+ void DoFinish() override
+ {
+ WrapWriteFunc([&] {
+ Underlying_->Finish();
+ });
+ }
+
+ void WrapWriteFunc(std::function<void()> func)
+ {
+ CheckErrorState();
+ try {
+ func();
+ } catch (const std::exception&) {
+ HandleWriteException();
+ }
+ }
+
+ // In many cases http proxy stops reading request and resets connection
+ // if error has happend. This function tries to read error response
+ // in such cases.
+ void HandleWriteException() {
+ Y_VERIFY(WriteError_ == nullptr);
+ WriteError_ = std::current_exception();
+ Y_VERIFY(WriteError_ != nullptr);
+ try {
+ HttpRequest_->FinishWithError()->GetResponseStream();
+ } catch (const TErrorResponse &) {
+ throw;
+ } catch (...) {
+ }
+ std::rethrow_exception(WriteError_);
+ }
+
+ void CheckErrorState()
+ {
+ if (WriteError_) {
+ std::rethrow_exception(WriteError_);
+ }
+ }
+
+ private:
+ TCoreHttpRequest* const HttpRequest_;
+ IOutputStream* Underlying_;
+ std::exception_ptr WriteError_;
+ };
+
+private:
+ TCoreRequestContext Context_;
+ NHttp::IActiveRequestPtr ActiveRequest_;
+ std::unique_ptr<IOutputStream> Stream_;
+ TWrappedStream WrappedStream_;
+};
+
+class TCoreHttpClient
+ : public IHttpClient
+{
+public:
+ TCoreHttpClient(bool useTLS, const TConfigPtr& config)
+ : Poller_(NConcurrency::CreateThreadPoolPoller(1, "http_poller")) // TODO(nadya73): YT-18363: move threads count to config
+ {
+ if (useTLS) {
+ auto httpsConfig = NYT::New<NYT::NHttps::TClientConfig>();
+ httpsConfig->MaxIdleConnections = config->ConnectionPoolSize;
+ Client_ = NHttps::CreateClient(httpsConfig, Poller_);
+ } else {
+ auto httpConfig = NYT::New<NYT::NHttp::TClientConfig>();
+ httpConfig->MaxIdleConnections = config->ConnectionPoolSize;
+ Client_ = NHttp::CreateClient(httpConfig, Poller_);
+ }
+ }
+
+ IHttpResponsePtr Request(const TString& url, const TString& requestId, const THttpConfig& /*config*/, const THttpHeader& header, TMaybe<TStringBuf> body) override
+ {
+ TCoreRequestContext context = CreateContext(url, requestId, header);
+
+ // TODO(nadya73): YT-18363: pass socket timeouts from THttpConfig
+
+ NHttp::IResponsePtr response;
+
+ auto logRequest = [&](bool includeParameters) {
+ LogRequest(header, url, includeParameters, requestId, context.HostName);
+ context.LoggedAttributes = GetLoggedAttributes(header, url, includeParameters, 128);
+ };
+
+ if (!body && (header.GetMethod() == "PUT" || header.GetMethod() == "POST")) {
+ const auto& parameters = header.GetParameters();
+ auto parametersStr = NodeToYsonString(parameters);
+
+ bool includeParameters = false;
+ auto headers = header.GetHeader(context.HostName, requestId, includeParameters).Get();
+
+ logRequest(includeParameters);
+
+ auto activeRequest = StartRequestImpl(header.GetMethod(), url, headers);
+
+ activeRequest->GetRequestStream()->Write(TSharedRef::FromString(parametersStr)).Get().ThrowOnError();
+ response = activeRequest->Finish().Get().ValueOrThrow();
+ } else {
+ auto bodyRef = TSharedRef::FromString(TString(body ? *body : ""));
+ bool includeParameters = true;
+ auto headers = header.GetHeader(context.HostName, requestId, includeParameters).Get();
+
+ logRequest(includeParameters);
+
+ if (header.GetMethod() == "GET") {
+ response = RequestImpl(header.GetMethod(), url, headers, bodyRef);
+ } else {
+ auto activeRequest = StartRequestImpl(header.GetMethod(), url, headers);
+
+ auto request = std::make_unique<TCoreHttpRequest>(std::move(context), std::move(activeRequest));
+ if (body) {
+ request->GetStream()->Write(*body);
+ }
+ return request->Finish();
+ }
+ }
+
+ return std::make_unique<TCoreHttpResponse>(std::move(context), std::move(response));
+ }
+
+ IHttpRequestPtr StartRequest(const TString& url, const TString& requestId, const THttpConfig& /*config*/, const THttpHeader& header) override
+ {
+ TCoreRequestContext context = CreateContext(url, requestId, header);
+
+ LogRequest(header, url, true, requestId, context.HostName);
+ context.LoggedAttributes = GetLoggedAttributes(header, url, true, 128);
+
+ auto headers = header.GetHeader(context.HostName, requestId, true).Get();
+ auto activeRequest = StartRequestImpl(header.GetMethod(), url, headers);
+
+ return std::make_unique<TCoreHttpRequest>(std::move(context), std::move(activeRequest));
+ }
+
+private:
+ TCoreRequestContext CreateContext(const TString& url, const TString& requestId, const THttpHeader& header)
+ {
+ TCoreRequestContext context;
+ context.Url = url;
+ context.RequestId = requestId;
+
+ auto urlRef = NHttp::ParseUrl(url);
+ context.HostName = CreateHost(urlRef.Host, urlRef.PortStr);
+
+ context.LogResponse = false;
+ auto outputFormat = header.GetOutputFormat();
+ if (outputFormat && outputFormat->IsTextYson()) {
+ context.LogResponse = true;
+ }
+ context.StartTime = TInstant::Now();
+ return context;
+ }
+
+ NHttp::IResponsePtr RequestImpl(const TString& method, const TString& url, const NHttp::THeadersPtr& headers, const TSharedRef& body)
+ {
+ if (method == "GET") {
+ return Client_->Get(url, headers).Get().ValueOrThrow();
+ } else if (method == "POST") {
+ return Client_->Post(url, body, headers).Get().ValueOrThrow();
+ } else if (method == "PUT") {
+ return Client_->Put(url, body, headers).Get().ValueOrThrow();
+ } else {
+ YT_LOG_FATAL("Unsupported http method (Method: %v, Url: %v)",
+ method,
+ url);
+ }
+ }
+
+ NHttp::IActiveRequestPtr StartRequestImpl(const TString& method, const TString& url, const NHttp::THeadersPtr& headers)
+ {
+ if (method == "POST") {
+ return Client_->StartPost(url, headers).Get().ValueOrThrow();
+ } else if (method == "PUT") {
+ return Client_->StartPut(url, headers).Get().ValueOrThrow();
+ } else {
+ YT_LOG_FATAL("Unsupported http method (Method: %v, Url: %v)",
+ method,
+ url);
+ }
+ }
+
+ NConcurrency::IThreadPoolPollerPtr Poller_;
+ NHttp::IClientPtr Client_;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+IHttpClientPtr CreateDefaultHttpClient()
+{
+ return std::make_shared<TDefaultHttpClient>();
+}
+
+IHttpClientPtr CreateCoreHttpClient(bool useTLS, const TConfigPtr& config)
+{
+ return std::make_shared<TCoreHttpClient>(useTLS, config);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NHttpClient
diff --git a/yt/cpp/mapreduce/http/http_client.h b/yt/cpp/mapreduce/http/http_client.h
new file mode 100644
index 0000000000..859f0423cb
--- /dev/null
+++ b/yt/cpp/mapreduce/http/http_client.h
@@ -0,0 +1,76 @@
+#pragma once
+
+#include "fwd.h"
+
+#include <yt/cpp/mapreduce/interface/fwd.h>
+
+#include <util/datetime/base.h>
+
+#include <util/generic/maybe.h>
+#include <util/generic/string.h>
+
+#include <util/stream/fwd.h>
+
+#include <memory>
+
+namespace NYT::NHttpClient {
+
+///////////////////////////////////////////////////////////////////////////////
+
+struct THttpConfig
+{
+ TDuration SocketTimeout = TDuration::Zero();
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+class IHttpResponse
+{
+public:
+ virtual ~IHttpResponse() = default;
+
+ virtual int GetStatusCode() = 0;
+ virtual IInputStream* GetResponseStream() = 0;
+ virtual TString GetResponse() = 0;
+ virtual TString GetRequestId() const = 0;
+};
+
+class IHttpRequest
+{
+public:
+ virtual ~IHttpRequest() = default;
+
+ virtual IOutputStream* GetStream() = 0;
+ virtual IHttpResponsePtr Finish() = 0;
+};
+
+
+class IHttpClient
+{
+public:
+ virtual ~IHttpClient() = default;
+
+ virtual IHttpResponsePtr Request(const TString& url, const TString& requestId, const THttpConfig& config, const THttpHeader& header, TMaybe<TStringBuf> body = {}) = 0;
+
+ virtual IHttpResponsePtr Request(const TString& url, const TString& requestId, const THttpHeader& header, TMaybe<TStringBuf> body = {})
+ {
+ return Request(url, requestId, /*config*/ {}, header, body);
+ }
+
+ virtual IHttpRequestPtr StartRequest(const TString& url, const TString& requestId, const THttpConfig& config, const THttpHeader& header) = 0;
+
+ virtual IHttpRequestPtr StartRequest(const TString& url, const TString& requestId, const THttpHeader& header)
+ {
+ return StartRequest(url, requestId, /*config*/ {}, header);
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+IHttpClientPtr CreateDefaultHttpClient();
+
+IHttpClientPtr CreateCoreHttpClient(bool useTLS, const TConfigPtr& config);
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NHttpClient
diff --git a/yt/cpp/mapreduce/http/requests.cpp b/yt/cpp/mapreduce/http/requests.cpp
new file mode 100644
index 0000000000..7cf0f673bb
--- /dev/null
+++ b/yt/cpp/mapreduce/http/requests.cpp
@@ -0,0 +1,66 @@
+#include "requests.h"
+
+#include "context.h"
+#include "host_manager.h"
+#include "retry_request.h"
+
+#include <yt/cpp/mapreduce/client/transaction.h>
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+#include <yt/cpp/mapreduce/common/node_builder.h>
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/errors.h>
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+#include <yt/cpp/mapreduce/interface/serialize.h>
+
+#include <util/stream/file.h>
+#include <util/string/builder.h>
+#include <util/generic/buffer.h>
+
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+bool ParseBoolFromResponse(const TString& response)
+{
+ return GetBool(NodeFromYsonString(response));
+}
+
+TGUID ParseGuidFromResponse(const TString& response)
+{
+ auto node = NodeFromYsonString(response);
+ return GetGuid(node.AsString());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TString GetProxyForHeavyRequest(const TClientContext& context)
+{
+ if (!context.Config->UseHosts) {
+ return context.ServerName;
+ }
+
+ return NPrivate::THostManager::Get().GetProxyForHeavyRequest(context);
+}
+
+void LogRequestError(
+ const TString& requestId,
+ const THttpHeader& header,
+ const TString& message,
+ const TString& attemptDescription)
+{
+ YT_LOG_ERROR("RSP %v - %v - %v - %v - X-YT-Parameters: %v",
+ requestId,
+ header.GetUrl(),
+ message,
+ attemptDescription,
+ NodeToYsonString(header.GetParameters()));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/http/requests.h b/yt/cpp/mapreduce/http/requests.h
new file mode 100644
index 0000000000..2c692475d1
--- /dev/null
+++ b/yt/cpp/mapreduce/http/requests.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "fwd.h"
+#include "http.h"
+
+#include <util/generic/maybe.h>
+#include <util/str_stl.h>
+
+namespace NYT {
+
+///////////////////////////////////////////////////////////////////////////////
+
+bool ParseBoolFromResponse(const TString& response);
+
+TGUID ParseGuidFromResponse(const TString& response);
+
+////////////////////////////////////////////////////////////////////////////////
+
+TString GetProxyForHeavyRequest(const TClientContext& context);
+
+void LogRequestError(
+ const TString& requestId,
+ const THttpHeader& header,
+ const TString& message,
+ const TString& attemptDescription);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/http/retry_request.cpp b/yt/cpp/mapreduce/http/retry_request.cpp
new file mode 100644
index 0000000000..ba116edcf7
--- /dev/null
+++ b/yt/cpp/mapreduce/http/retry_request.cpp
@@ -0,0 +1,149 @@
+#include "retry_request.h"
+
+#include "context.h"
+#include "helpers.h"
+#include "http_client.h"
+#include "requests.h"
+
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/tvm.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+namespace NYT {
+namespace NDetail {
+
+///////////////////////////////////////////////////////////////////////////////
+
+static TResponseInfo Request(
+ const TClientContext& context,
+ THttpHeader& header,
+ TMaybe<TStringBuf> body,
+ const TString& requestId,
+ const TRequestConfig& config)
+{
+ TString hostName;
+ if (config.IsHeavy) {
+ hostName = GetProxyForHeavyRequest(context);
+ } else {
+ hostName = context.ServerName;
+ }
+
+ auto url = GetFullUrl(hostName, context, header);
+
+ auto response = context.HttpClient->Request(url, requestId, config.HttpConfig, header, body);
+
+ TResponseInfo result;
+ result.RequestId = requestId;
+ result.Response = response->GetResponse();
+ result.HttpCode = response->GetStatusCode();
+ return result;
+}
+
+TResponseInfo RequestWithoutRetry(
+ const TClientContext& context,
+ THttpHeader& header,
+ TMaybe<TStringBuf> body,
+ const TRequestConfig& config)
+{
+ if (context.ServiceTicketAuth) {
+ header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket());
+ } else {
+ header.SetToken(context.Token);
+ }
+
+ if (context.ImpersonationUser) {
+ header.SetImpersonationUser(*context.ImpersonationUser);
+ }
+
+ if (header.HasMutationId()) {
+ header.RemoveParameter("retry");
+ header.AddMutationId();
+ }
+ auto requestId = CreateGuidAsString();
+ return Request(context, header, body, requestId, config);
+}
+
+
+TResponseInfo RetryRequestWithPolicy(
+ IRequestRetryPolicyPtr retryPolicy,
+ const TClientContext& context,
+ THttpHeader& header,
+ TMaybe<TStringBuf> body,
+ const TRequestConfig& config)
+{
+ if (context.ServiceTicketAuth) {
+ header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket());
+ } else {
+ header.SetToken(context.Token);
+ }
+
+ if (context.ImpersonationUser) {
+ header.SetImpersonationUser(*context.ImpersonationUser);
+ }
+
+ bool useMutationId = header.HasMutationId();
+ bool retryWithSameMutationId = false;
+
+ if (!retryPolicy) {
+ retryPolicy = CreateDefaultRequestRetryPolicy(context.Config);
+ }
+
+ while (true) {
+ auto requestId = CreateGuidAsString();
+ try {
+ retryPolicy->NotifyNewAttempt();
+
+ if (useMutationId) {
+ if (retryWithSameMutationId) {
+ header.AddParameter("retry", true, /* overwrite = */ true);
+ } else {
+ header.RemoveParameter("retry");
+ header.AddMutationId();
+ }
+ }
+
+ return Request(context, header, body, requestId, config);
+ } catch (const TErrorResponse& e) {
+ LogRequestError(requestId, header, e.GetError().GetMessage(), retryPolicy->GetAttemptDescription());
+ retryWithSameMutationId = e.IsTransportError();
+
+ if (!IsRetriable(e)) {
+ throw;
+ }
+
+ auto maybeRetryTimeout = retryPolicy->OnRetriableError(e);
+ if (maybeRetryTimeout) {
+ TWaitProxy::Get()->Sleep(*maybeRetryTimeout);
+ } else {
+ throw;
+ }
+ } catch (const std::exception& e) {
+ LogRequestError(requestId, header, e.what(), retryPolicy->GetAttemptDescription());
+ retryWithSameMutationId = true;
+
+ if (!IsRetriable(e)) {
+ throw;
+ }
+
+ auto maybeRetryTimeout = retryPolicy->OnGenericError(e);
+ if (maybeRetryTimeout) {
+ TWaitProxy::Get()->Sleep(*maybeRetryTimeout);
+ } else {
+ throw;
+ }
+ }
+ }
+
+ Y_FAIL("Retries must have either succeeded or thrown an exception");
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/http/retry_request.h b/yt/cpp/mapreduce/http/retry_request.h
new file mode 100644
index 0000000000..2210e318f1
--- /dev/null
+++ b/yt/cpp/mapreduce/http/retry_request.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include "fwd.h"
+
+#include <yt/cpp/mapreduce/interface/fwd.h>
+#include <yt/cpp/mapreduce/common/fwd.h>
+
+#include <yt/cpp/mapreduce/http/http_client.h>
+
+#include <util/datetime/base.h>
+#include <util/generic/maybe.h>
+#include <util/generic/string.h>
+
+namespace NYT::NDetail {
+
+////////////////////////////////////////////////////////////////////
+
+struct TResponseInfo
+{
+ TString RequestId;
+ TString Response;
+ int HttpCode = 0;
+};
+
+////////////////////////////////////////////////////////////////////
+
+struct TRequestConfig
+{
+ NHttpClient::THttpConfig HttpConfig;
+ bool IsHeavy = false;
+};
+
+////////////////////////////////////////////////////////////////////
+
+// Retry request with given `header' and `body' using `retryPolicy'.
+// If `retryPolicy == nullptr' use default, currently `TAttemptLimitedRetryPolicy(TConfig::Get()->RetryCount)`.
+TResponseInfo RetryRequestWithPolicy(
+ IRequestRetryPolicyPtr retryPolicy,
+ const TClientContext& context,
+ THttpHeader& header,
+ TMaybe<TStringBuf> body = {},
+ const TRequestConfig& config = TRequestConfig());
+
+TResponseInfo RequestWithoutRetry(
+ const TClientContext& context,
+ THttpHeader& header,
+ TMaybe<TStringBuf> body = {},
+ const TRequestConfig& config = TRequestConfig());
+
+////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail
diff --git a/yt/cpp/mapreduce/http/ya.make b/yt/cpp/mapreduce/http/ya.make
new file mode 100644
index 0000000000..ef81a4b64a
--- /dev/null
+++ b/yt/cpp/mapreduce/http/ya.make
@@ -0,0 +1,29 @@
+LIBRARY()
+
+INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
+
+SRCS(
+ abortable_http_response.cpp
+ context.cpp
+ helpers.cpp
+ host_manager.cpp
+ http.cpp
+ http_client.cpp
+ requests.cpp
+ retry_request.cpp
+)
+
+PEERDIR(
+ library/cpp/deprecated/atomic
+ library/cpp/http/io
+ library/cpp/string_utils/base64
+ library/cpp/string_utils/quote
+ library/cpp/threading/cron
+ yt/cpp/mapreduce/common
+ yt/cpp/mapreduce/interface
+ yt/cpp/mapreduce/interface/logging
+ yt/yt/core/http
+ yt/yt/core/https
+)
+
+END()
diff --git a/yt/cpp/mapreduce/interface/batch_request.cpp b/yt/cpp/mapreduce/interface/batch_request.cpp
new file mode 100644
index 0000000000..fefdacb61a
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/batch_request.cpp
@@ -0,0 +1,15 @@
+#include "batch_request.h"
+#include "client.h"
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+IBatchRequestBase& IBatchRequest::WithTransaction(const ITransactionPtr& transaction)
+{
+ return WithTransaction(transaction->GetId());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/batch_request.h b/yt/cpp/mapreduce/interface/batch_request.h
new file mode 100644
index 0000000000..3ea28f76fd
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/batch_request.h
@@ -0,0 +1,222 @@
+#pragma once
+
+#include "fwd.h"
+
+#include "client_method_options.h"
+
+#include <library/cpp/threading/future/future.h>
+#include <util/generic/ptr.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////
+
+/// Helper base of @ref NYT::IBatchRequest holding most of useful methods.
+class IBatchRequestBase
+ : public TThrRefBase
+{
+public:
+ virtual ~IBatchRequestBase() = default;
+
+ ///
+ /// @brief Create cypress node.
+ ///
+ /// @see NYT::ICypressClient::Create
+ virtual ::NThreading::TFuture<TNodeId> Create(
+ const TYPath& path,
+ ENodeType type,
+ const TCreateOptions& options = TCreateOptions()) = 0;
+
+ ///
+ /// @brief Remove cypress node.
+ ///
+ /// @see NYT::ICypressClient::Remove
+ virtual ::NThreading::TFuture<void> Remove(
+ const TYPath& path,
+ const TRemoveOptions& options = TRemoveOptions()) = 0;
+
+ ///
+ /// @brief Check wether cypress node exists.
+ ///
+ /// @see NYT::ICypressClient::Exists
+ virtual ::NThreading::TFuture<bool> Exists(
+ const TYPath& path,
+ const TExistsOptions& options = TExistsOptions()) = 0;
+
+ ///
+ /// @brief Get cypress node.
+ ///
+ /// @see NYT::ICypressClient::Get
+ virtual ::NThreading::TFuture<TNode> Get(
+ const TYPath& path,
+ const TGetOptions& options = TGetOptions()) = 0;
+
+ ///
+ /// @brief Set cypress node.
+ ///
+ /// @see NYT::ICypressClient::Set
+ virtual ::NThreading::TFuture<void> Set(
+ const TYPath& path,
+ const TNode& node,
+ const TSetOptions& options = TSetOptions()) = 0;
+
+ ///
+ /// @brief List cypress directory.
+ ///
+ /// @see NYT::ICypressClient::List
+ virtual ::NThreading::TFuture<TNode::TListType> List(
+ const TYPath& path,
+ const TListOptions& options = TListOptions()) = 0;
+
+ ///
+ /// @brief Copy cypress node.
+ ///
+ /// @see NYT::ICypressClient::Copy
+ virtual ::NThreading::TFuture<TNodeId> Copy(
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TCopyOptions& options = TCopyOptions()) = 0;
+
+ ///
+ /// @brief Move cypress node.
+ ///
+ /// @see NYT::ICypressClient::Move
+ virtual ::NThreading::TFuture<TNodeId> Move(
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TMoveOptions& options = TMoveOptions()) = 0;
+
+ ///
+ /// @brief Create symbolic link.
+ ///
+ /// @see NYT::ICypressClient::Link.
+ virtual ::NThreading::TFuture<TNodeId> Link(
+ const TYPath& targetPath,
+ const TYPath& linkPath,
+ const TLinkOptions& options = TLinkOptions()) = 0;
+
+ ///
+ /// @brief Lock cypress node.
+ ///
+ /// @see NYT::ICypressClient::Lock
+ virtual ::NThreading::TFuture<ILockPtr> Lock(
+ const TYPath& path,
+ ELockMode mode,
+ const TLockOptions& options = TLockOptions()) = 0;
+
+ ///
+ /// @brief Unlock cypress node.
+ ///
+ /// @see NYT::ICypressClient::Unlock
+ virtual ::NThreading::TFuture<void> Unlock(
+ const TYPath& path,
+ const TUnlockOptions& options = TUnlockOptions()) = 0;
+
+ ///
+ /// @brief Abort operation.
+ ///
+ /// @see NYT::IClient::AbortOperation
+ virtual ::NThreading::TFuture<void> AbortOperation(const TOperationId& operationId) = 0;
+
+ ///
+ /// @brief Force complete operation.
+ ///
+ /// @see NYT::IClient::CompleteOperation
+ virtual ::NThreading::TFuture<void> CompleteOperation(const TOperationId& operationId) = 0;
+
+ ///
+ /// @brief Suspend operation.
+ ///
+ /// @see NYT::IClient::SuspendOperation
+ virtual ::NThreading::TFuture<void> SuspendOperation(
+ const TOperationId& operationId,
+ const TSuspendOperationOptions& options = TSuspendOperationOptions()) = 0;
+
+ ///
+ /// @brief Resume operation.
+ ///
+ /// @see NYT::IClient::ResumeOperation
+ virtual ::NThreading::TFuture<void> ResumeOperation(
+ const TOperationId& operationId,
+ const TResumeOperationOptions& options = TResumeOperationOptions()) = 0;
+
+ ///
+ /// @brief Update parameters of running operation.
+ ///
+ /// @see NYT::IClient::UpdateOperationParameters
+ virtual ::NThreading::TFuture<void> UpdateOperationParameters(
+ const TOperationId& operationId,
+ const TUpdateOperationParametersOptions& options = TUpdateOperationParametersOptions()) = 0;
+
+ ///
+ /// @brief Canonize cypress path
+ ///
+ /// @see NYT::ICypressClient::CanonizeYPath
+ virtual ::NThreading::TFuture<TRichYPath> CanonizeYPath(const TRichYPath& path) = 0;
+
+ ///
+ /// @brief Get table columnar statistic
+ ///
+ /// @see NYT::ICypressClient::GetTableColumnarStatistics
+ virtual ::NThreading::TFuture<TVector<TTableColumnarStatistics>> GetTableColumnarStatistics(
+ const TVector<TRichYPath>& paths,
+ const TGetTableColumnarStatisticsOptions& options = {}) = 0;
+
+ ///
+ /// @brief Check permission for given path.
+ ///
+ /// @see NYT::IClient::CheckPermission
+ virtual ::NThreading::TFuture<TCheckPermissionResponse> CheckPermission(
+ const TString& user,
+ EPermission permission,
+ const TYPath& path,
+ const TCheckPermissionOptions& options = TCheckPermissionOptions()) = 0;
+};
+
+///
+/// @brief Batch request object.
+///
+/// Allows to send multiple lightweight requests at once significantly
+/// reducing time of their execution.
+///
+/// Methods of this class accept same arguments as @ref NYT::IClient methods but
+/// return TFuture that is set after execution of @ref NYT::IBatchRequest::ExecuteBatch
+///
+/// @see [Example of usage](https://a.yandex-team.ru/arc/trunk/arcadia/yt/cpp/mapreduce/examples/tutorial/batch_request/main.cpp)
+class IBatchRequest
+ : public IBatchRequestBase
+{
+public:
+ ///
+ /// @brief Temporary override current transaction.
+ ///
+ /// Using WithTransaction user can temporary override default transaction.
+ /// Example of usage:
+ /// TBatchRequest batchRequest;
+ /// auto noTxResult = batchRequest.Get("//some/path");
+ /// auto txResult = batchRequest.WithTransaction(tx).Get("//some/path");
+ virtual IBatchRequestBase& WithTransaction(const TTransactionId& transactionId) = 0;
+ IBatchRequestBase& WithTransaction(const ITransactionPtr& transaction);
+
+ ///
+ /// @brief Executes all subrequests of batch request.
+ ///
+ /// After execution of this method all TFuture objects returned by subrequests will
+ /// be filled with either result or error.
+ ///
+ /// @note It is undefined in which order these requests are executed.
+ ///
+ /// @note This method doesn't throw if subrequest emits error.
+ /// Instead corresponding future is set with exception.
+ /// So it is always important to check TFuture status.
+ ///
+ /// Single TBatchRequest instance may be executed only once
+ /// and cannot be modified (filled with additional requests) after execution.
+ /// Exception is thrown on attempt to modify executed batch request
+ /// or execute it again.
+ virtual void ExecuteBatch(const TExecuteBatchOptions& options = TExecuteBatchOptions()) = 0;
+};
+
+////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/client.cpp b/yt/cpp/mapreduce/interface/client.cpp
new file mode 100644
index 0000000000..11d308b809
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/client.cpp
@@ -0,0 +1,19 @@
+#include "client.h"
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+void ILock::Wait(TDuration timeout)
+{
+ return GetAcquiredFuture().GetValue(timeout);
+}
+
+void ITransaction::Detach()
+{
+ Y_FAIL("ITransaction::Detach() is not implemented");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/client.h b/yt/cpp/mapreduce/interface/client.h
new file mode 100644
index 0000000000..54f37c3ae0
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/client.h
@@ -0,0 +1,568 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/client.h
+///
+/// Main header of the C++ YT Wrapper.
+
+///
+/// @mainpage C++ library for working with YT
+///
+/// This library provides possibilities to work with YT as a [MapReduce](https://en.wikipedia.org/wiki/MapReduce) system. It allows:
+/// - to read/write tables and files
+/// - to run operations
+/// - to work with transactions.
+///
+/// This library provides only basic functions for working with dynamic tables.
+/// To access full powers of YT dynamic tables one should use
+/// [yt/client](https://a.yandex-team.ru/arc/trunk/arcadia/yt/19_4/yt/client) library.
+///
+/// Entry points to this library:
+/// - @ref NYT::Initialize() initialization function for this library;
+/// - @ref NYT::IClient main interface to work with YT cluster;
+/// - @ref NYT::CreateClient() function that creates client for particular cluster;
+/// - @ref NYT::IOperationClient ancestor of @ref NYT::IClient containing the set of methods to run operations.
+///
+/// Tutorial on how to use this library can be found [here](https://yt.yandex-team.ru/docs/api/c++/examples).
+
+#include "fwd.h"
+
+#include "client_method_options.h"
+#include "constants.h"
+#include "batch_request.h"
+#include "cypress.h"
+#include "init.h"
+#include "io.h"
+#include "node.h"
+#include "operation.h"
+
+#include <library/cpp/threading/future/future.h>
+
+#include <util/datetime/base.h>
+#include <util/generic/maybe.h>
+#include <util/system/compiler.h>
+
+/// Main namespace of YT client
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// OAuth info (returned by @ref NYT::IClient::WhoAmI).
+struct TAuthorizationInfo
+{
+ /// User's login.
+ TString Login;
+
+ /// Realm.
+ TString Realm;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @brief Part of @ref NYT::TCheckPermissionResponse.
+///
+/// In case when 'Action == ESecurityAction::Deny' because of a 'deny' rule,
+/// the "denying" object name and id and "denied" subject name an id may be returned.
+struct TCheckPermissionResult
+{
+ /// Was the access granted or not.
+ ESecurityAction Action;
+
+ /// Id of the object whose ACL's "deny" rule forbids the access.
+ TMaybe<TGUID> ObjectId;
+
+ ///
+ /// @brief Name of the object whose ACL's "deny" rule forbids the access.
+ ///
+ /// Example is "node //tmp/x/y".
+ TMaybe<TString> ObjectName;
+
+ /// Id of the subject for whom the access was denied by a "deny" rule.
+ TMaybe<TGUID> SubjectId;
+
+ /// Name of the subject for whom the access was denied by a "deny" rule.
+ TMaybe<TString> SubjectName;
+};
+
+/// @brief Result of @ref NYT::IClient::CheckPermission command.
+///
+/// The base part of the response corresponds to the check result for the node itself.
+/// `Columns` vector contains check results for the columns (in the same order as in the request).
+struct TCheckPermissionResponse
+ : public TCheckPermissionResult
+{
+ /// @brief Results for the table columns access permissions.
+ ///
+ /// @see [Columnar ACL doc](https://yt.yandex-team.ru/docs/description/common/columnar_acl)
+ TVector<TCheckPermissionResult> Columns;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @brief Interface representing a lock obtained from @ref NYT::ITransaction::Lock.
+///
+/// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#start-tx)
+class ILock
+ : public TThrRefBase
+{
+public:
+ virtual ~ILock() = default;
+
+ /// Get cypress node id of lock itself.
+ virtual const TLockId& GetId() const = 0;
+
+ /// Get cypress node id of locked object.
+ virtual TNodeId GetLockedNodeId() const = 0;
+
+ ///
+ /// @brief Get future that will be set once lock is in "acquired" state.
+ ///
+ /// Note that future might contain exception if some error occurred
+ /// e.g. lock transaction was aborted.
+ virtual const ::NThreading::TFuture<void>& GetAcquiredFuture() const = 0;
+
+ ///
+ /// @brief Wait until lock is in "acquired" state.
+ ///
+ /// Throws exception if timeout exceeded or some error occurred
+ /// e.g. lock transaction was aborted.
+ void Wait(TDuration timeout = TDuration::Max());
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @brief Base class for @ref NYT::IClient and @ref NYT::ITransaction.
+///
+/// This class contains transactional commands.
+class IClientBase
+ : public TThrRefBase
+ , public ICypressClient
+ , public IIOClient
+ , public IOperationClient
+{
+public:
+ ///
+ /// @brief Start a [transaction] (https://yt.yandex-team.ru/docs/description/storage/transactions.html#master_transactions).
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#start-tx)
+ [[nodiscard]] virtual ITransactionPtr StartTransaction(
+ const TStartTransactionOptions& options = TStartTransactionOptions()) = 0;
+
+ ///
+ /// @brief Change properties of table.
+ ///
+ /// Allows to:
+ /// - switch table between dynamic/static mode
+ /// - or change table schema
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#alter-table)
+ virtual void AlterTable(
+ const TYPath& path,
+ const TAlterTableOptions& options = TAlterTableOptions()) = 0;
+
+ ///
+ /// @brief Create batch request object that allows to execute several light requests in parallel.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#execute-batch)
+ virtual TBatchRequestPtr CreateBatchRequest() = 0;
+
+ /// @brief Get root client outside of all transactions.
+ virtual IClientPtr GetParentClient() = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+/// @brief Interface representing a master transaction.
+///
+/// @see [YT doc](https://yt.yandex-team.ru/docs/description/storage/transactions.html#master_transactions)
+class ITransaction
+ : virtual public IClientBase
+{
+public:
+ /// Get id of transaction.
+ virtual const TTransactionId& GetId() const = 0;
+
+ ///
+ /// @brief Try to lock given path.
+ ///
+ /// Lock will be held until transaction is commited/aborted or @ref NYT::ITransaction::Unlock method is called.
+ /// Lock modes:
+ /// - `LM_EXCLUSIVE`: if exclusive lock is taken no other transaction can take exclusive or shared lock.
+ /// - `LM_SHARED`: if shared lock is taken other transactions can take shared lock but not exclusive.
+ /// - `LM_SNAPSHOT`: snapshot lock always succeeds, when snapshot lock is taken current transaction snapshots object.
+ /// It will not see changes that occurred to it in other transactions.
+ ///
+ /// Exclusive/shared lock can be waitable or not.
+ /// If nonwaitable lock cannot be taken exception is thrown.
+ /// If waitable lock cannot be taken it is created in pending state and client can wait until it actually taken.
+ /// Check @ref NYT::TLockOptions::Waitable and @ref NYT::ILock::GetAcquiredFuture for more details.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#lock)
+ virtual ILockPtr Lock(
+ const TYPath& path,
+ ELockMode mode,
+ const TLockOptions& options = TLockOptions()) = 0;
+
+ ///
+ /// @brief Remove all the locks (including pending ones) for this transaction from a Cypress node at `path`.
+ ///
+ /// If the locked version of the node differs from the original one,
+ /// an error will be thrown.
+ ///
+ /// Command is successful even if the node has no locks.
+ /// Only explicit (created by @ref NYT::ITransaction::Lock) locks are removed.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#unlock)
+ virtual void Unlock(
+ const TYPath& path,
+ const TUnlockOptions& options = TUnlockOptions()) = 0;
+
+ ///
+ /// @brief Commit transaction.
+ ///
+ /// All changes that are made by transactions become visible globally or to parent transaction.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#commit)
+ virtual void Commit() = 0;
+
+ ///
+ /// @brief Abort transaction.
+ ///
+ /// All changes made by current transaction are lost.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#abort)
+ virtual void Abort() = 0;
+
+ /// @brief Explicitly ping transaction.
+ ///
+ /// User usually does not need this method (as transactions are pinged automatically,
+ /// see @ref NYT::TStartTransactionOptions::AutoPingable).
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#ping)
+ virtual void Ping() = 0;
+
+ ///
+ /// @brief Detach transaction.
+ ///
+ /// Stop any activities connected with it: pinging, aborting on crashes etc.
+ /// Forget about the transaction totally.
+ virtual void Detach();
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Interface containing non-transactional commands.
+class IClient
+ : virtual public IClientBase
+{
+public:
+ ///
+ /// @brief Attach to existing master transaction.
+ ///
+ /// Returned object WILL NOT:
+ /// - ping transaction automatically (unless @ref NYT::TAttachTransactionOptions::AutoPing is set)
+ /// - abort it on program termination (unless @ref NYT::TAttachTransactionOptions::AbortOnTermination is set).
+ /// Otherwise returned object is similar to the object returned by @ref NYT::IClientBase::StartTransaction.
+ /// and it can see all the changes made inside the transaction.
+ [[nodiscard]] virtual ITransactionPtr AttachTransaction(
+ const TTransactionId& transactionId,
+ const TAttachTransactionOptions& options = TAttachTransactionOptions()) = 0;
+
+ ///
+ /// @brief Mount dynamic table.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#mount-table)
+ virtual void MountTable(
+ const TYPath& path,
+ const TMountTableOptions& options = TMountTableOptions()) = 0;
+
+ ///
+ /// @brief Unmount dynamic table.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#unmount-table)
+ virtual void UnmountTable(
+ const TYPath& path,
+ const TUnmountTableOptions& options = TUnmountTableOptions()) = 0;
+
+ ///
+ /// @brief Remount dynamic table.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#remount-table)
+ virtual void RemountTable(
+ const TYPath& path,
+ const TRemountTableOptions& options = TRemountTableOptions()) = 0;
+
+ ///
+ /// @brief Switch dynamic table from `mounted' into `frozen' state.
+ ///
+ /// When table is in frozen state all its data is flushed to disk and writes are disabled.
+ ///
+ /// @note this function launches the process of switching, but doesn't wait until switching is accomplished.
+ /// Waiting has to be performed by user.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#freeze-table)
+ virtual void FreezeTable(
+ const TYPath& path,
+ const TFreezeTableOptions& options = TFreezeTableOptions()) = 0;
+
+ ///
+ /// @brief Switch dynamic table from `frozen` into `mounted` state.
+ ///
+ /// @note this function launches the process of switching, but doesn't wait until switching is accomplished.
+ /// Waiting has to be performed by user.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#unfreeze-table)
+ virtual void UnfreezeTable(
+ const TYPath& path,
+ const TUnfreezeTableOptions& options = TUnfreezeTableOptions()) = 0;
+
+ ///
+ /// @brief Reshard dynamic table (break it into tablets) by given pivot keys.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#reshard-table)
+ virtual void ReshardTable(
+ const TYPath& path,
+ const TVector<TKey>& pivotKeys,
+ const TReshardTableOptions& options = TReshardTableOptions()) = 0;
+
+ ///
+ /// @brief Reshard dynamic table, breaking it into given number of tablets.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#reshard-table)
+ virtual void ReshardTable(
+ const TYPath& path,
+ i64 tabletCount,
+ const TReshardTableOptions& options = TReshardTableOptions()) = 0;
+
+ ///
+ /// @brief Insert rows into dynamic table.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#insert-rows)
+ virtual void InsertRows(
+ const TYPath& path,
+ const TNode::TListType& rows,
+ const TInsertRowsOptions& options = TInsertRowsOptions()) = 0;
+
+ ///
+ /// @brief Delete rows from dynamic table.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#delete-rows)
+ virtual void DeleteRows(
+ const TYPath& path,
+ const TNode::TListType& keys,
+ const TDeleteRowsOptions& options = TDeleteRowsOptions()) = 0;
+
+ ///
+ /// @brief Trim rows from the beginning of ordered dynamic table.
+ ///
+ /// Asynchronously removes `rowCount` rows from the beginning of ordered dynamic table.
+ /// Numeration of remaining rows *does not change*, e.g. after `trim(10)` and `trim(20)`
+ /// you get in total `20` deleted rows.
+ ///
+ /// @param path Path to ordered dynamic table.
+ /// @param tabletIndex Which tablet to trim.
+ /// @param rowCount How many trimmed rows will be in the table after command.
+ /// @param options Optional parameters.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#trim-rows)
+ virtual void TrimRows(
+ const TYPath& path,
+ i64 tabletIndex,
+ i64 rowCount,
+ const TTrimRowsOptions& options = TTrimRowsOptions()) = 0;
+
+ ///
+ /// @brief Lookup rows with given keys from dynamic table.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#lookup-rows)
+ virtual TNode::TListType LookupRows(
+ const TYPath& path,
+ const TNode::TListType& keys,
+ const TLookupRowsOptions& options = TLookupRowsOptions()) = 0;
+
+ ///
+ /// @brief Select rows from dynamic table, using [SQL dialect](https://yt.yandex-team.ru/docs//description/dynamic_tables/dyn_query_language.html).
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#select-rows)
+ virtual TNode::TListType SelectRows(
+ const TString& query,
+ const TSelectRowsOptions& options = TSelectRowsOptions()) = 0;
+
+ ///
+ /// @brief Change properties of table replica.
+ ///
+ /// Allows to enable/disable replica and/or change its mode.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#alter-table-replica)
+ virtual void AlterTableReplica(
+ const TReplicaId& replicaId,
+ const TAlterTableReplicaOptions& alterTableReplicaOptions) = 0;
+
+ ///
+ /// @brief Generate a monotonously increasing master timestamp.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#generate-timestamp)
+ virtual ui64 GenerateTimestamp() = 0;
+
+ /// Return YT username of current client.
+ virtual TAuthorizationInfo WhoAmI() = 0;
+
+ ///
+ /// @brief Get operation attributes.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-operation)
+ virtual TOperationAttributes GetOperation(
+ const TOperationId& operationId,
+ const TGetOperationOptions& options = TGetOperationOptions()) = 0;
+
+ ///
+ /// @brief List operations satisfying given filters.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#list-operations)
+ virtual TListOperationsResult ListOperations(
+ const TListOperationsOptions& options = TListOperationsOptions()) = 0;
+
+ ///
+ /// @brief Update operation runtime parameters.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#update-op-parameters)
+ virtual void UpdateOperationParameters(
+ const TOperationId& operationId,
+ const TUpdateOperationParametersOptions& options) = 0;
+
+ ///
+ /// @brief Get job attributes.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-job)
+ virtual TJobAttributes GetJob(
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobOptions& options = TGetJobOptions()) = 0;
+
+ ///
+ /// List attributes of jobs satisfying given filters.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#list-jobs)
+ virtual TListJobsResult ListJobs(
+ const TOperationId& operationId,
+ const TListJobsOptions& options = TListJobsOptions()) = 0;
+
+ ///
+ /// @brief Get the input of a running or failed job.
+ ///
+ /// @ref NYT::TErrorResponse exception is thrown if job is missing.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-job-input)
+ virtual IFileReaderPtr GetJobInput(
+ const TJobId& jobId,
+ const TGetJobInputOptions& options = TGetJobInputOptions()) = 0;
+
+ ///
+ /// @brief Get fail context of a failed job.
+ ///
+ /// @ref NYT::TErrorResponse exception is thrown if it is missing.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-job-fail-context)
+ virtual IFileReaderPtr GetJobFailContext(
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobFailContextOptions& options = TGetJobFailContextOptions()) = 0;
+
+ ///
+ /// @brief Get stderr of a running or failed job.
+ ///
+ /// @ref NYT::TErrorResponse exception is thrown if it is missing.
+ ///
+ /// @note YT doesn't store all job stderrs
+ ///
+ /// @note If job stderr exceeds few megabytes YT will store only head and tail of stderr.
+ ///
+ /// @see Description of `max_stderr_size` spec option [here](https://yt.yandex-team.ru/docs//description/mr/operations_options.html).
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-job-stderr)
+ virtual IFileReaderPtr GetJobStderr(
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobStderrOptions& options = TGetJobStderrOptions()) = 0;
+
+ ///
+ /// @brief Create one or several rbtorrents for files in a blob table.
+ ///
+ /// If specified, one torrent is created for each value of `KeyColumns` option.
+ /// Otherwise, a single torrent with all files of a table is created.
+ ///
+ /// @return list of nodes, each node has two fields
+ /// * `key`: list of key columns values. Empty if `KeyColumns` is not specified.
+ /// * `rbtorrent`: rbtorrent string (with `rbtorrent:` prefix)
+ ///
+ /// @see [More info.](https://docs.yandex-team.ru/docs/yt/description/storage/blobtables#sky_share)
+ virtual TNode::TListType SkyShareTable(
+ const std::vector<TYPath>& tablePaths,
+ const TSkyShareTableOptions& options) = 0;
+
+ ///
+ /// @brief Check if `user` has `permission` to access a Cypress node at `path`.
+ ///
+ /// For tables access to columns specified in `options.Columns_` can be checked
+ /// (@see [the doc](https://yt.yandex-team.ru/docs/description/common/columnar_acl)).
+ ///
+ /// If access is denied (the returned result has `.Action == ESecurityAction::Deny`)
+ /// because of a `deny` rule, the "denying" object name and id
+ /// and "denied" subject name an id may be returned.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#check_permission)
+ virtual TCheckPermissionResponse CheckPermission(
+ const TString& user,
+ EPermission permission,
+ const TYPath& path,
+ const TCheckPermissionOptions& options = TCheckPermissionOptions()) = 0;
+
+ /// @brief Get information about tablet
+ /// @see NYT::TTabletInfo
+ virtual TVector<TTabletInfo> GetTabletInfos(
+ const TYPath& path,
+ const TVector<int>& tabletIndexes,
+ const TGetTabletInfosOptions& options = TGetTabletInfosOptions()) = 0;
+
+ ///
+ /// @brief Suspend operation.
+ ///
+ /// Jobs will be aborted.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#suspend_op)
+ virtual void SuspendOperation(
+ const TOperationId& operationId,
+ const TSuspendOperationOptions& options = TSuspendOperationOptions()) = 0;
+
+ /// @brief Resume previously suspended operation.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#resume_op)
+ virtual void ResumeOperation(
+ const TOperationId& operationId,
+ const TResumeOperationOptions& options = TResumeOperationOptions()) = 0;
+
+ ///
+ /// @brief Synchronously terminates all client's background activities
+ ///
+ /// e.g. no callbacks will be executed after the function is completed
+ ///
+ /// @note It is safe to call Shutdown multiple times
+ ///
+ /// @note @ref NYT::TApiUsageError will be thrown if any client's method is called after shutdown
+ ///
+ virtual void Shutdown() = 0;
+};
+
+
+/// Create a client for particular MapReduce cluster.
+IClientPtr CreateClient(
+ const TString& serverName,
+ const TCreateClientOptions& options = TCreateClientOptions());
+
+
+/// Create a client for mapreduce cluster specified in `YT_PROXY` environment variable.
+IClientPtr CreateClientFromEnv(
+ const TCreateClientOptions& options = TCreateClientOptions());
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/client_method_options.cpp b/yt/cpp/mapreduce/interface/client_method_options.cpp
new file mode 100644
index 0000000000..66f72bfe5f
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/client_method_options.cpp
@@ -0,0 +1,34 @@
+#include "client_method_options.h"
+
+#include "tvm.h"
+
+namespace NYT {
+
+template <typename T>
+static void MergeMaybe(TMaybe<T>& origin, const TMaybe<T>& patch)
+{
+ if (patch) {
+ origin = patch;
+ }
+}
+
+void TFormatHints::Merge(const TFormatHints& patch)
+{
+ if (patch.SkipNullValuesForTNode_) {
+ SkipNullValuesForTNode(true);
+ }
+ MergeMaybe(EnableStringToAllConversion_, patch.EnableStringToAllConversion_);
+ MergeMaybe(EnableAllToStringConversion_, patch.EnableAllToStringConversion_);
+ MergeMaybe(EnableIntegralTypeConversion_, patch.EnableIntegralTypeConversion_);
+ MergeMaybe(EnableIntegralToDoubleConversion_, patch.EnableIntegralToDoubleConversion_);
+ MergeMaybe(EnableTypeConversion_, patch.EnableTypeConversion_);
+ MergeMaybe(ComplexTypeMode_, patch.ComplexTypeMode_);
+}
+
+TCreateClientOptions& TCreateClientOptions::ServiceTicketAuth(const NAuth::IServiceTicketAuthPtrWrapper& wrapper)
+{
+ ServiceTicketAuth_ = std::make_shared<NAuth::IServiceTicketAuthPtrWrapper>(wrapper);
+ return *this;
+}
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/client_method_options.h b/yt/cpp/mapreduce/interface/client_method_options.h
new file mode 100644
index 0000000000..8074632353
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/client_method_options.h
@@ -0,0 +1,1452 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/client_method_options.h
+///
+/// Header containing options for @ref NYT::IClient methods.
+
+#include "common.h"
+#include "config.h"
+#include "format.h"
+#include "public.h"
+#include "retry_policy.h"
+
+#include <util/datetime/base.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Type of the cypress node.
+enum ENodeType : int
+{
+ NT_STRING /* "string_node" */,
+ NT_INT64 /* "int64_node" */,
+ NT_UINT64 /* "uint64_node" */,
+ NT_DOUBLE /* "double_node" */,
+ NT_BOOLEAN /* "boolean_node" */,
+ NT_MAP /* "map_node" */,
+ NT_LIST /* "list_node" */,
+ NT_FILE /* "file" */,
+ NT_TABLE /* "table" */,
+ NT_DOCUMENT /* "document" */,
+ NT_REPLICATED_TABLE /* "replicated_table" */,
+ NT_TABLE_REPLICA /* "table_replica" */,
+ NT_USER /* "user" */,
+ NT_SCHEDULER_POOL /* "scheduler_pool" */,
+ NT_LINK /* "link" */,
+};
+
+///
+/// @brief Mode of composite type representation in yson.
+///
+/// @see https://yt.yandex-team.ru/docs/description/storage/data_types#yson
+enum class EComplexTypeMode : int
+{
+ Named /* "named" */,
+ Positional /* "positional" */,
+};
+
+///
+/// @brief Options for @ref NYT::ICypressClient::Create
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#create
+struct TCreateOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TCreateOptions;
+ /// @endcond
+
+ /// Create missing parent directories if required.
+ FLUENT_FIELD_DEFAULT(bool, Recursive, false);
+
+ ///
+ /// @brief Do not raise error if node already exists.
+ ///
+ /// Node is not recreated.
+ /// Force and IgnoreExisting MUST NOT be used simultaneously.
+ FLUENT_FIELD_DEFAULT(bool, IgnoreExisting, false);
+
+ ///
+ /// @brief Recreate node if it exists.
+ ///
+ /// Force and IgnoreExisting MUST NOT be used simultaneously.
+ FLUENT_FIELD_DEFAULT(bool, Force, false);
+
+ /// @brief Set node attributes.
+ FLUENT_FIELD_OPTION(TNode, Attributes);
+};
+
+///
+/// @brief Options for @ref NYT::ICypressClient::Remove
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#remove
+struct TRemoveOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TRemoveOptions;
+ /// @endcond
+
+ ///
+ /// @brief Remove whole tree when removing composite cypress node (e.g. `map_node`).
+ ///
+ /// Without this option removing nonempty composite node will fail.
+ FLUENT_FIELD_DEFAULT(bool, Recursive, false);
+
+ /// @brief Do not fail if removing node doesn't exist.
+ FLUENT_FIELD_DEFAULT(bool, Force, false);
+};
+
+/// Base class for options for operations that read from master.
+template <typename TDerived>
+struct TMasterReadOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ /// @brief Where to read from.
+ FLUENT_FIELD_OPTION(EMasterReadKind, ReadFrom);
+};
+
+///
+/// @brief Options for @ref NYT::ICypressClient::Exists
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#exists
+struct TExistsOptions
+ : public TMasterReadOptions<TExistsOptions>
+{
+};
+
+///
+/// @brief Options for @ref NYT::ICypressClient::Get
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#get
+struct TGetOptions
+ : public TMasterReadOptions<TGetOptions>
+{
+ /// @brief Attributes that should be fetched with each node.
+ FLUENT_FIELD_OPTION(TAttributeFilter, AttributeFilter);
+
+ /// @brief Limit for the number of children node.
+ FLUENT_FIELD_OPTION(i64, MaxSize);
+};
+
+///
+/// @brief Options for @ref NYT::ICypressClient::Set
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#set
+struct TSetOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TSetOptions;
+ /// @endcond
+
+ /// Create missing parent directories if required.
+ FLUENT_FIELD_DEFAULT(bool, Recursive, false);
+
+ /// Allow setting any nodes, not only attribute and document ones.
+ FLUENT_FIELD_OPTION(bool, Force);
+};
+
+///
+/// @brief Options for @ref NYT::ICypressClient::MultisetAttributes
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#multiset_attributes
+struct TMultisetAttributesOptions
+{ };
+
+///
+/// @brief Options for @ref NYT::ICypressClient::List
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#list
+struct TListOptions
+ : public TMasterReadOptions<TListOptions>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TListOptions;
+ /// @endcond
+
+ /// Attributes that should be fetched for each node.
+ FLUENT_FIELD_OPTION(TAttributeFilter, AttributeFilter);
+
+ /// Limit for the number of children that will be fetched.
+ FLUENT_FIELD_OPTION(i64, MaxSize);
+};
+
+///
+/// @brief Options for @ref NYT::ICypressClient::Copy
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#copy
+struct TCopyOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TCopyOptions;
+ /// @endcond
+
+ /// Create missing directories in destination path if required.
+ FLUENT_FIELD_DEFAULT(bool, Recursive, false);
+
+ /// Allows to use existing node as destination, it will be overwritten.
+ FLUENT_FIELD_DEFAULT(bool, Force, false);
+
+ /// Whether to preserves account of source node.
+ FLUENT_FIELD_DEFAULT(bool, PreserveAccount, false);
+
+ /// Whether to preserve `expiration_time` attribute of source node.
+ FLUENT_FIELD_OPTION(bool, PreserveExpirationTime);
+};
+
+///
+/// @brief Options for @ref NYT::ICypressClient::Move
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#move
+struct TMoveOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TMoveOptions;
+ /// @endcond
+
+ /// Create missing directories in destination path if required.
+ FLUENT_FIELD_DEFAULT(bool, Recursive, false);
+
+ /// Allows to use existing node as destination, it will be overwritten.
+ FLUENT_FIELD_DEFAULT(bool, Force, false);
+
+ /// Whether to preserves account of source node.
+ FLUENT_FIELD_DEFAULT(bool, PreserveAccount, false);
+
+ /// Whether to preserve `expiration_time` attribute of source node.
+ FLUENT_FIELD_OPTION(bool, PreserveExpirationTime);
+};
+
+///
+/// @brief Options for @ref NYT::ICypressClient::Link
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#link
+struct TLinkOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TLinkOptions;
+ /// @endcond
+
+ /// Create parent directories of destination if they don't exist.
+ FLUENT_FIELD_DEFAULT(bool, Recursive, false);
+
+ /// Do not raise error if link already exists.
+ FLUENT_FIELD_DEFAULT(bool, IgnoreExisting, false);
+
+ /// Force rewrite target node.
+ FLUENT_FIELD_DEFAULT(bool, Force, false);
+
+ /// Attributes of created link.
+ FLUENT_FIELD_OPTION(TNode, Attributes);
+};
+
+///
+/// @brief Options for @ref NYT::ICypressClient::Concatenate
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#concatenate
+struct TConcatenateOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TConcatenateOptions;
+ /// @endcond
+
+ /// Whether we should append to destination or rewrite it.
+ FLUENT_FIELD_OPTION(bool, Append);
+};
+
+///
+/// @brief Options for @ref NYT::IIOClient::CreateBlobTableReader
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#read_blob_table
+struct TBlobTableReaderOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TBlobTableReaderOptions;
+ /// @endcond
+
+ /// Name of the part index column. By default it is "part_index".
+ FLUENT_FIELD_OPTION(TString, PartIndexColumnName);
+
+ /// Name of the data column. By default it is "data".
+ FLUENT_FIELD_OPTION(TString, DataColumnName);
+
+ ///
+ /// @brief Size of each part.
+ ///
+ /// All blob parts except the last part of the blob must be of this size
+ /// otherwise blob table reader emits error.
+ FLUENT_FIELD_DEFAULT(ui64, PartSize, 4 * 1024 * 1024);
+
+ /// @brief Offset from which to start reading
+ FLUENT_FIELD_DEFAULT(i64, Offset, 0);
+};
+
+///
+/// @brief Resource limits for operation (or pool)
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/scheduler/scheduler_and_pools#resursy
+/// @see NYT::TUpdateOperationParametersOptions
+struct TResourceLimits
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TResourceLimits;
+ /// @endcond
+
+ /// Number of slots for user jobs.
+ FLUENT_FIELD_OPTION(i64, UserSlots);
+
+ /// Number of cpu cores.
+ FLUENT_FIELD_OPTION(double, Cpu);
+
+ /// Network usage. Doesn't have precise physical unit.
+ FLUENT_FIELD_OPTION(i64, Network);
+
+ /// Memory in bytes.
+ FLUENT_FIELD_OPTION(i64, Memory);
+};
+
+///
+/// @brief Scheduling options for single pool tree.
+///
+/// @see NYT::TUpdateOperationParametersOptions
+struct TSchedulingOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TSchedulingOptions;
+ /// @endcond
+
+ ///
+ /// @brief Pool to switch operation to.
+ ///
+ /// @note Switching is currently disabled on the server (will induce an exception).
+ FLUENT_FIELD_OPTION(TString, Pool);
+
+ /// @brief Operation weight.
+ FLUENT_FIELD_OPTION(double, Weight);
+
+ /// @brief Operation resource limits.
+ FLUENT_FIELD_OPTION(TResourceLimits, ResourceLimits);
+};
+
+///
+/// @brief Collection of scheduling options for multiple pool trees.
+///
+/// @see NYT::TUpdateOperationParametersOptions
+struct TSchedulingOptionsPerPoolTree
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TSchedulingOptionsPerPoolTree;
+ /// @endcond
+
+ TSchedulingOptionsPerPoolTree(const THashMap<TString, TSchedulingOptions>& options = {})
+ : Options_(options)
+ { }
+
+ /// Add scheduling options for pool tree.
+ TSelf& Add(TStringBuf poolTreeName, const TSchedulingOptions& schedulingOptions)
+ {
+ Y_ENSURE(Options_.emplace(poolTreeName, schedulingOptions).second);
+ return *this;
+ }
+
+ THashMap<TString, TSchedulingOptions> Options_;
+};
+
+///
+/// @brief Options for @ref NYT::IOperation::SuspendOperation
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#suspend_op
+struct TSuspendOperationOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TSuspendOperationOptions;
+ /// @endcond
+
+ ///
+ /// @brief Whether to abort already running jobs.
+ ///
+ /// By default running jobs are not aborted.
+ FLUENT_FIELD_OPTION(bool, AbortRunningJobs);
+};
+
+///
+/// @brief Options for @ref NYT::IOperation::ResumeOperation
+///
+/// @note They are empty for now but options might appear in the future.
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#resume_op
+struct TResumeOperationOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TResumeOperationOptions;
+ /// @endcond
+};
+
+///
+/// @brief Options for @ref NYT::IOperation::UpdateParameters
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#update_op_parameters
+struct TUpdateOperationParametersOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TUpdateOperationParametersOptions;
+ /// @endcond
+
+ /// New owners of the operation.
+ FLUENT_VECTOR_FIELD(TString, Owner);
+
+ /// Pool to switch operation to (for all pool trees it is running in).
+ FLUENT_FIELD_OPTION(TString, Pool);
+
+ /// New operation weight (for all pool trees it is running in).
+ FLUENT_FIELD_OPTION(double, Weight);
+
+ /// Scheduling options for each pool tree the operation is running in.
+ FLUENT_FIELD_OPTION(TSchedulingOptionsPerPoolTree, SchedulingOptionsPerPoolTree);
+};
+
+///
+/// @brief Base class for many options related to IO.
+///
+/// @ref NYT::TFileWriterOptions
+/// @ref NYT::TFileReaderOptions
+/// @ref NYT::TTableReaderOptions
+/// @ref NYT::TTableWriterOptions
+template <class TDerived>
+struct TIOOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ ///
+ /// @brief Advanced options for reader/writer.
+ ///
+ /// Readers/writers have many options not of all of them are supported by library.
+ /// If you need such unsupported option, you might use `Config` option until
+ /// option is supported.
+ ///
+ /// Example:
+ ///
+ /// TTableWriterOptions().Config(TNode()("max_row_weight", 64 << 20)))
+ ///
+ /// @note We encourage you to ask yt@ to add native C++ support of required options
+ /// and use `Config` only as temporary solution while native support is not ready.
+ FLUENT_FIELD_OPTION(TNode, Config);
+
+ ///
+ /// @brief Whether to create internal client transaction for reading / writing table.
+ ///
+ /// This is advanced option.
+ ///
+ /// If `CreateTransaction` is set to `false` reader/writer doesn't create internal transaction
+ /// and doesn't lock table. This option is overriden (effectively `false`) for writers by
+ /// @ref NYT::TTableWriterOptions::SingleHttpRequest
+ ///
+ /// WARNING: if `CreateTransaction` is `false`, read/write might become non-atomic.
+ /// Change ONLY if you are sure what you are doing!
+ FLUENT_FIELD_DEFAULT(bool, CreateTransaction, true);
+};
+
+/// @brief Options for reading file from YT.
+struct TFileReaderOptions
+ : public TIOOptions<TFileReaderOptions>
+{
+ ///
+ /// @brief Offset to start reading from.
+ ///
+ /// By default reading is started from the beginning of the file.
+ FLUENT_FIELD_OPTION(i64, Offset);
+
+ ///
+ /// @brief Maximum length to read.
+ ///
+ /// By default file is read until the end.
+ FLUENT_FIELD_OPTION(i64, Length);
+};
+
+/// @brief Options that control how server side of YT stores data.
+struct TWriterOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TWriterOptions;
+ /// @endcond
+
+ ///
+ /// @brief Whether to wait all replicas to be written.
+ ///
+ /// When set to true upload will be considered successful as soon as
+ /// @ref NYT::TWriterOptions::MinUploadReplicationFactor number of replicas are created.
+ FLUENT_FIELD_OPTION(bool, EnableEarlyFinish);
+
+ /// Number of replicas to be created.
+ FLUENT_FIELD_OPTION(ui64, UploadReplicationFactor);
+
+ ///
+ /// Min number of created replicas needed to consider upload successful.
+ ///
+ /// @see NYT::TWriterOptions::EnableEarlyFinish
+ FLUENT_FIELD_OPTION(ui64, MinUploadReplicationFactor);
+
+ ///
+ /// @brief Desired size of a chunk.
+ ///
+ /// @see @ref NYT::TWriterOptions::RetryBlockSize
+ FLUENT_FIELD_OPTION(ui64, DesiredChunkSize);
+
+ ///
+ /// @brief Size of data block accumulated in memory to provide retries.
+ ///
+ /// Data is accumulated in memory buffer so in case error occurs data could be resended.
+ ///
+ /// If `RetryBlockSize` is not set buffer size is set to `DesiredChunkSize`.
+ /// If niether `RetryBlockSize` nor `DesiredChunkSize` is set size of buffer is 64MB.
+ ///
+ /// @note Written chunks cannot be larger than size of this memory buffer.
+ ///
+ /// Since DesiredChunkSize is compared against data already compressed with compression codec
+ /// it makes sense to set `RetryBlockSize = DesiredChunkSize / ExpectedCompressionRatio`
+ ///
+ /// @see @ref NYT::TWriterOptions::DesiredChunkSize
+ /// @see @ref NYT::TTableWriterOptions::SingleHttpRequest
+ FLUENT_FIELD_OPTION(size_t, RetryBlockSize);
+};
+
+///
+/// @brief Options for writing file
+///
+/// @see NYT::IIOClient::CreateFileWriter
+struct TFileWriterOptions
+ : public TIOOptions<TFileWriterOptions>
+{
+ ///
+ /// @brief Whether to compute MD5 sum of written file.
+ ///
+ /// If ComputeMD5 is set to `true` and we are appending to an existing file
+ /// the `md5` attribute must be set (i.e. it was previously written only with `ComputeMD5 == true`).
+ FLUENT_FIELD_OPTION(bool, ComputeMD5);
+
+ ///
+ /// @brief Options to control how YT server side writes data.
+ ///
+ /// @see NYT::TWriterOptions
+ FLUENT_FIELD_OPTION(TWriterOptions, WriterOptions);
+};
+
+class TSkiffRowHints {
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TSkiffRowHints;
+ /// @endcond
+
+ ///
+ /// @brief Library doesn't interpret it, only pass it to CreateSkiffParser<...>() and GetSkiffSchema<...>() functions.
+ ///
+ /// You can set something in it to pass necessary information to CreateSkiffParser<...>() and GetSkiffSchema<...>() functions.
+ FLUENT_FIELD_OPTION(TNode, Attributes);
+};
+
+/// Options that control how C++ objects represent table rows when reading or writing a table.
+class TFormatHints
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TFormatHints;
+ /// @endcond
+
+ ///
+ /// @brief Whether to skip null values.
+ ///
+ /// When set to true TNode doesn't contain null column values
+ /// (e.g. corresponding keys will be missing instead of containing null value).
+ ///
+ /// Only meaningful for TNode representation.
+ ///
+ /// Useful for sparse tables which have many columns in schema
+ /// but only few columns are set in any row.
+ FLUENT_FIELD_DEFAULT(bool, SkipNullValuesForTNode, false);
+
+ ///
+ /// @brief Whether to convert string to numeric and boolean types (e.g. "42u" -> 42u, "false" -> %false)
+ /// when writing to schemaful table.
+ FLUENT_FIELD_OPTION(bool, EnableStringToAllConversion);
+
+ ///
+ /// @brief Whether to convert numeric and boolean types to string (e.g., 3.14 -> "3.14", %true -> "true")
+ /// when writing to schemaful table.
+ FLUENT_FIELD_OPTION(bool, EnableAllToStringConversion);
+
+ ///
+ /// @brief Whether to convert uint64 <-> int64 when writing to schemaful table.
+ ///
+ /// On overflow the corresponding error with be raised.
+ ///
+ /// This options is enabled by default.
+ FLUENT_FIELD_OPTION(bool, EnableIntegralTypeConversion);
+
+ /// Whether to convert uint64 and int64 to double (e.g. 42 -> 42.0) when writing to schemaful table.
+ FLUENT_FIELD_OPTION(bool, EnableIntegralToDoubleConversion);
+
+ /// Shortcut for enabling all type conversions.
+ FLUENT_FIELD_OPTION(bool, EnableTypeConversion);
+
+ ///
+ /// @brief Controls how complex types are represented in TNode or yson-strings.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/storage/data_types#yson
+ FLUENT_FIELD_OPTION(EComplexTypeMode, ComplexTypeMode);
+
+ ///
+ /// @brief Allow to use any meta-information for creating skiff schema and parser for reading ISkiffRow.
+ FLUENT_FIELD_OPTION(TSkiffRowHints, SkiffRowHints);
+
+ ///
+ /// @brief Apply the patch to the fields.
+ ///
+ /// Non-default and non-empty values replace the default and empty ones.
+ void Merge(const TFormatHints& patch);
+};
+
+/// Options that control which control attributes (like row_index) are added to rows during read.
+class TControlAttributes
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TControlAttributes;
+ /// @endcond
+
+ ///
+ /// @brief Whether to add "row_index" attribute to rows read.
+ FLUENT_FIELD_DEFAULT(bool, EnableRowIndex, true);
+
+ ///
+ /// @brief Whether to add "range_index" attribute to rows read.
+ FLUENT_FIELD_DEFAULT(bool, EnableRangeIndex, true);
+};
+
+/// Options for @ref NYT::IClient::CreateTableReader
+struct TTableReaderOptions
+ : public TIOOptions<TTableReaderOptions>
+{
+ /// @deprecated Size of internal client buffer.
+ FLUENT_FIELD_DEFAULT(size_t, SizeLimit, 4 << 20);
+
+ ///
+ /// @brief Allows to fine tune format that is used for reading tables.
+ ///
+ /// Has no effect when used with raw-reader.
+ FLUENT_FIELD_OPTION(TFormatHints, FormatHints);
+
+ ///
+ /// @brief Allows to tune which attributes are added to rows while reading tables.
+ ///
+ FLUENT_FIELD_DEFAULT(TControlAttributes, ControlAttributes, TControlAttributes());
+};
+
+/// Options for @ref NYT::IClient::CreateTableWriter
+struct TTableWriterOptions
+ : public TIOOptions<TTableWriterOptions>
+{
+ ///
+ /// @brief Enable or disable retryful writing.
+ ///
+ /// If set to true no retry is made but we also make less requests to master.
+ /// If set to false writer can make up to `TConfig::RetryCount` attempts to send each block of data.
+ ///
+ /// @note Writers' methods might throw strange exceptions that might look like network error
+ /// when `SingleHttpRequest == true` and YT node encounters an error
+ /// (due to limitations of HTTP protocol YT node have no chance to report error
+ /// before it reads the whole input so it just drops the connection).
+ FLUENT_FIELD_DEFAULT(bool, SingleHttpRequest, false);
+
+ ///
+ /// @brief Allows to change the size of locally buffered rows before flushing to yt.
+ ///
+ /// Used only with @ref NYT::TTableWriterOptions::SingleHttpRequest
+ FLUENT_FIELD_DEFAULT(size_t, BufferSize, 64 << 20);
+
+ ///
+ /// @brief Allows to fine tune format that is used for writing tables.
+ ///
+ /// Has no effect when used with raw-writer.
+ FLUENT_FIELD_OPTION(TFormatHints, FormatHints);
+
+ /// @brief Try to infer schema of inexistent table from the type of written rows.
+ ///
+ /// @note Default values for this option may differ depending on the row type.
+ /// For protobuf it's currently false by default.
+ FLUENT_FIELD_OPTION(bool, InferSchema);
+
+ ///
+ /// @brief Options to control how YT server side writes data.
+ ///
+ /// @see NYT::TWriterOptions
+ FLUENT_FIELD_OPTION(TWriterOptions, WriterOptions);
+};
+
+///
+/// @brief Options for @ref NYT::IClient::StartTransaction
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#start_tx
+struct TStartTransactionOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TStartTransactionOptions;
+ /// @endcond
+
+ FLUENT_FIELD_DEFAULT(bool, PingAncestors, false);
+
+ ///
+ /// @brief How long transaction lives after last ping.
+ ///
+ /// If server doesn't receive any pings for transaction for this time
+ /// transaction will be aborted. By default timeout is 15 seconds.
+ FLUENT_FIELD_OPTION(TDuration, Timeout);
+
+ ///
+ /// @brief Moment in the future when transaction is aborted.
+ FLUENT_FIELD_OPTION(TInstant, Deadline);
+
+ ///
+ /// @brief Whether to ping created transaction automatically.
+ ///
+ /// When set to true library creates a thread that pings transaction.
+ /// When set to false library doesn't ping transaction and it's user responsibility to ping it.
+ FLUENT_FIELD_DEFAULT(bool, AutoPingable, true);
+
+ ///
+ /// @brief Set the title attribute of transaction.
+ ///
+ /// If title was not specified
+ /// neither using this option nor using @ref NYT::TStartTransactionOptions::Attributes option
+ /// library will generate default title for transaction.
+ /// Such default title includes machine name, pid, user name and some other useful info.
+ FLUENT_FIELD_OPTION(TString, Title);
+
+ ///
+ /// @brief Set custom transaction attributes
+ ///
+ /// @note @ref NYT::TStartTransactionOptions::Title option overrides `"title"` attribute.
+ FLUENT_FIELD_OPTION(TNode, Attributes);
+};
+
+///
+/// @brief Options for attaching transaction.
+///
+/// @see NYT::IClient::AttachTransaction
+struct TAttachTransactionOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TAttachTransactionOptions;
+ /// @endcond
+
+ ///
+ /// @brief Ping transaction automatically.
+ ///
+ /// When set to |true| library creates a thread that pings transaction.
+ /// When set to |false| library doesn't ping transaction and
+ /// it's user responsibility to ping it.
+ FLUENT_FIELD_DEFAULT(bool, AutoPingable, false);
+
+ ///
+ /// @brief Abort transaction on program termination.
+ ///
+ /// Should the transaction be aborted on program termination
+ /// (either normal or by a signal or uncaught exception -- two latter
+ /// only if @ref TInitializeOptions::CleanupOnTermination is set).
+ FLUENT_FIELD_DEFAULT(bool, AbortOnTermination, false);
+};
+
+///
+/// @brief Type of the lock.
+///
+/// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locking_mode
+/// @see NYT::ITransaction::Lock
+enum ELockMode : int
+{
+ /// Exclusive lock.
+ LM_EXCLUSIVE /* "exclusive" */,
+
+ /// Shared lock.
+ LM_SHARED /* "shared" */,
+
+ /// Snapshot lock.
+ LM_SNAPSHOT /* "snapshot" */,
+};
+
+///
+/// @brief Options for locking cypress node
+///
+/// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locks
+/// @see NYT::ITransaction::Lock
+struct TLockOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TLockOptions;
+ /// @endcond
+
+ ///
+ /// @brief Whether to wait already locked node to be unlocked.
+ ///
+ /// If `Waitable' is set to true Lock method will create
+ /// waitable lock, that will be taken once other transactions
+ /// that hold lock to that node are commited / aborted.
+ ///
+ /// @note Lock method DOES NOT wait until lock is actually acquired.
+ /// Waiting should be done using corresponding methods of ILock.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locking_queue
+ FLUENT_FIELD_DEFAULT(bool, Waitable, false);
+
+ ///
+ /// @brief Also take attribute_key lock.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locks_compatibility
+ FLUENT_FIELD_OPTION(TString, AttributeKey);
+
+ ///
+ /// @brief Also take child_key lock.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locks_compatibility
+ FLUENT_FIELD_OPTION(TString, ChildKey);
+};
+
+///
+/// @brief Options for @ref NYT::ITransaction::Unlock
+///
+/// @note They are empty for now but options might appear in the future.
+///
+/// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locks_compatibility
+struct TUnlockOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TUnlockOptions;
+ /// @endcond
+};
+
+/// Base class for options that deal with tablets.
+template <class TDerived>
+struct TTabletOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ /// Index of a first tablet to deal with.
+ FLUENT_FIELD_OPTION(i64, FirstTabletIndex);
+
+ /// Index of a last tablet to deal with.
+ FLUENT_FIELD_OPTION(i64, LastTabletIndex);
+};
+
+///
+/// @brief Options for @ref NYT::IClient::MountTable
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands#mount_table
+struct TMountTableOptions
+ : public TTabletOptions<TMountTableOptions>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TMountTableOptions;
+ /// @endcond
+
+ /// If specified table will be mounted to this cell.
+ FLUENT_FIELD_OPTION(TTabletCellId, CellId);
+
+ /// If set to true tablets will be mounted in freezed state.
+ FLUENT_FIELD_DEFAULT(bool, Freeze, false);
+};
+
+///
+/// @brief Options for @ref NYT::IClient::UnmountTable
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands#unmount_table
+struct TUnmountTableOptions
+ : public TTabletOptions<TUnmountTableOptions>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TUnmountTableOptions;
+ /// @endcond
+
+ /// Advanced option, don't use unless yt team told you so.
+ FLUENT_FIELD_DEFAULT(bool, Force, false);
+};
+
+///
+/// @brief Options for @ref NYT::IClient::RemountTable
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands#remount_table
+struct TRemountTableOptions
+ : public TTabletOptions<TRemountTableOptions>
+{ };
+
+///
+/// @brief Options for @ref NYT::IClient::ReshardTable
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands#reshard_table
+struct TReshardTableOptions
+ : public TTabletOptions<TReshardTableOptions>
+{ };
+
+///
+/// @brief Options for @ref NYT::IClient::FreezeTable
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands#freeze_table
+struct TFreezeTableOptions
+ : public TTabletOptions<TFreezeTableOptions>
+{ };
+
+///
+/// @brief Options for @ref NYT::IClient::UnfreezeTable
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands#unfreeze_table
+struct TUnfreezeTableOptions
+ : public TTabletOptions<TUnfreezeTableOptions>
+{ };
+
+///
+/// @brief Options for @ref NYT::IClient::AlterTable
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands#alter_table
+struct TAlterTableOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TAlterTableOptions;
+ /// @endcond
+
+ /// Change table schema.
+ FLUENT_FIELD_OPTION(TTableSchema, Schema);
+
+ /// Alter table between static and dynamic mode.
+ FLUENT_FIELD_OPTION(bool, Dynamic);
+
+ ///
+ /// @brief Changes id of upstream replica on metacluster.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables
+ FLUENT_FIELD_OPTION(TReplicaId, UpstreamReplicaId);
+};
+
+///
+/// @brief Options for @ref NYT::IClient::LookupRows
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands#lookup_rows
+struct TLookupRowsOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TLookupRowsOptions;
+ /// @endcond
+
+ /// Timeout for operation.
+ FLUENT_FIELD_OPTION(TDuration, Timeout);
+
+ /// Column names to return.
+ FLUENT_FIELD_OPTION(TColumnNames, Columns);
+
+ ///
+ /// @brief Whether to return rows that were not found in table.
+ ///
+ /// If set to true List returned by LookupRows method will have same
+ /// length as list of keys. If row is not found in table corresponding item in list
+ /// will have null value.
+ FLUENT_FIELD_DEFAULT(bool, KeepMissingRows, false);
+
+ /// If set to true returned values will have "timestamp" attribute.
+ FLUENT_FIELD_OPTION(bool, Versioned);
+};
+
+///
+/// @brief Options for @ref NYT::IClient::SelectRows
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands#select_rows
+struct TSelectRowsOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TSelectRowsOptions;
+ /// @endcond
+
+ /// Timeout for operation.
+ FLUENT_FIELD_OPTION(TDuration, Timeout);
+
+ ///
+ /// @brief Limitation for number of rows read by single node.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/dyn_query_language#ogranicheniya-na-slozhnost-zaprosa-(opcii)
+ FLUENT_FIELD_OPTION(i64, InputRowLimit);
+
+ ///
+ /// @brief Limitation for number of output rows on single cluster node.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/dyn_query_language#ogranicheniya-na-slozhnost-zaprosa-(opcii)
+ FLUENT_FIELD_OPTION(i64, OutputRowLimit);
+
+ ///
+ /// @brief Maximum row ranges derived from WHERE clause.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/dyn_query_language#ogranicheniya-na-slozhnost-zaprosa-(opcii)
+ FLUENT_FIELD_DEFAULT(ui64, RangeExpansionLimit, 1000);
+
+ ///
+ /// @brief Whether to fail if InputRowLimit or OutputRowLimit is exceeded.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/dyn_query_language#ogranicheniya-na-slozhnost-zaprosa-(opcii)
+ FLUENT_FIELD_DEFAULT(bool, FailOnIncompleteResult, true);
+
+ /// @brief Enable verbose logging on server side.
+ FLUENT_FIELD_DEFAULT(bool, VerboseLogging, false);
+
+ FLUENT_FIELD_DEFAULT(bool, EnableCodeCache, true);
+};
+
+/// Options for NYT::CreateClient;
+struct TCreateClientOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TCreateClientOptions;
+ /// @endcond
+
+ /// @brief Impersonated user name.
+ ///
+ /// If authenticated user is allowed to impersonate other YT users (e.g. yql_agent), this field may be used to override user name.
+ FLUENT_FIELD_OPTION(TString, ImpersonationUser);
+
+ /// @brief User token.
+ ///
+ /// @see NYT::TCreateClientOptions::TokenPath
+ FLUENT_FIELD(TString, Token);
+
+ /// @brief Path to the file where user token is stored.
+ ///
+ /// Token is looked in these places in following order:
+ /// - @ref NYT::TCreateClientOptions::Token
+ /// - @ref NYT::TCreateClientOptions::TokenPath
+ /// - `TConfig::Get()->Token` option.
+ /// - `YT_TOKEN` environment variable
+ /// - `YT_SECURE_VAULT_YT_TOKEN` environment variable
+ /// - File specified in `YT_TOKEN_PATH` environment variable
+ /// - `$HOME/.yt/token` file.
+ FLUENT_FIELD(TString, TokenPath);
+
+ /// @brief TVM service ticket producer.
+ ///
+ /// We store a wrapper of NYT::TIntrusivePtr here (not a NYT::TIntrusivePtr),
+ /// because otherwise other projects will have build problems
+ /// because of visibility of two different `TIntrusivePtr`-s (::TInstrusivePtr and NYT::TInstrusivePtr).
+ ///
+ /// @see NYT::NAuth::TServiceTicketClientAuth
+ /// {@
+ NAuth::IServiceTicketAuthPtrWrapperPtr ServiceTicketAuth_ = nullptr;
+ TSelf& ServiceTicketAuth(const NAuth::IServiceTicketAuthPtrWrapper& wrapper);
+ /// @}
+
+ /// @brief Use tvm-only endpoints in cluster connection.
+ FLUENT_FIELD_DEFAULT(bool, TvmOnly, false);
+
+ /// @brief Use HTTPs (use HTTP client from yt/yt/core always).
+ ///
+ /// @see UseCoreHttpClient
+ FLUENT_FIELD_DEFAULT(bool, UseTLS, false);
+
+ /// @brief Use HTTP client from yt/yt/core.
+ FLUENT_FIELD_DEFAULT(bool, UseCoreHttpClient, false);
+
+ ///
+ /// @brief RetryConfig provider allows to fine tune request retries.
+ ///
+ /// E.g. set total timeout for all retries.
+ FLUENT_FIELD_DEFAULT(IRetryConfigProviderPtr, RetryConfigProvider, nullptr);
+
+ /// @brief Override global config for the client.
+ ///
+ /// The config contains implementation parameters such as connection timeouts,
+ /// access token, api version and more.
+ /// @see NYT::TConfig
+ FLUENT_FIELD_DEFAULT(TConfigPtr, Config, nullptr);
+};
+
+///
+/// @brief Options for @ref NYT::IBatchRequest::ExecuteBatch
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands#execute_batch
+struct TExecuteBatchOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TExecuteBatchOptions;
+ /// @endcond
+
+ ///
+ /// @brief How many requests will be executed in parallel on the cluster.
+ ///
+ /// This parameter could be used to avoid RequestLimitExceeded errors.
+ FLUENT_FIELD_OPTION(ui64, Concurrency);
+
+ ///
+ /// @brief Maximum size of batch sent in one request to server.
+ ///
+ /// Huge batches are executed using multiple requests.
+ /// BatchPartMaxSize is maximum size of single request that goes to server
+ /// If not specified it is set to `Concurrency * 5'
+ FLUENT_FIELD_OPTION(ui64, BatchPartMaxSize);
+};
+
+///
+/// @brief Durability mode.
+///
+/// @see NYT::TTabletTransactionOptions::TDurability
+/// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#sohrannost
+enum class EDurability
+{
+ /// Sync mode (default).
+ Sync /* "sync" */,
+
+ /// Async mode (might reduce latency of write requests, but less reliable).
+ Async /* "async" */,
+};
+
+///
+/// @brief Atomicity mode.
+///
+/// @see NYT::TTabletTransactionOptions::TDurability
+/// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#sohrannost
+enum class EAtomicity
+{
+ /// Transactions are non atomic (might reduce latency of write requests).
+ None /* "none" */,
+
+ /// Transactions are atomic (default).
+ Full /* "full" */,
+};
+
+///
+/// @brief Table replica mode.
+///
+/// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables#atributy
+enum class ETableReplicaMode
+{
+ Sync /* "sync" */,
+ Async /* "async" */,
+};
+
+/// Base class for options dealing with io to dynamic tables.
+template <typename TDerived>
+struct TTabletTransactionOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ ///
+ /// @brief Atomicity mode of operation
+ ///
+ /// Setting to NYT::EAtomicity::None allows to improve latency of operations
+ /// at the cost of weakening contracts.
+ ///
+ /// @note Use with care.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#oslablenie-garantij
+ FLUENT_FIELD_OPTION(EAtomicity, Atomicity);
+
+ ///
+ /// @brief Durability mode of operation
+ ///
+ /// Setting to NYT::EDurability::Async allows to improve latency of operations
+ /// at the cost of weakening contracts.
+ ///
+ /// @note Use with care.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#oslablenie-garantij
+ FLUENT_FIELD_OPTION(EDurability, Durability);
+};
+
+///
+/// @brief Options for NYT::IClient::InsertRows
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#insert_rows
+struct TInsertRowsOptions
+ : public TTabletTransactionOptions<TInsertRowsOptions>
+{
+ ///
+ /// @brief Whether to overwrite missing columns with nulls.
+ ///
+ /// By default all columns missing in input data are set to Null and overwrite currently stored value.
+ /// If `Update' is set to true currently stored value will not be overwritten for columns that are missing in input data.
+ FLUENT_FIELD_OPTION(bool, Update);
+
+ ///
+ /// @brief Whether to overwrite or aggregate aggregated columns.
+ ///
+ /// Used with aggregating columns.
+ /// By default value in aggregating column will be overwritten.
+ /// If `Aggregate' is set to true row will be considered as delta and it will be aggregated with currently stored value.
+ FLUENT_FIELD_OPTION(bool, Aggregate);
+
+ ///
+ /// @brief Whether to fail when inserting to table without sync replica.
+ ///
+ /// Used for insert operation for tables without sync replica.
+ /// https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables#write
+ /// Default value is 'false'. So insertion into table without sync replicas fails.
+ FLUENT_FIELD_OPTION(bool, RequireSyncReplica);
+};
+
+///
+/// @brief Options for NYT::IClient::DeleteRows
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#delete_rows
+struct TDeleteRowsOptions
+ : public TTabletTransactionOptions<TDeleteRowsOptions>
+{
+ ///
+ /// @brief Whether to fail when deleting from table without sync replica.
+ ///
+ // Used for delete operation for tables without sync replica.
+ // https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables#write
+ // Default value is 'false'. So deletion into table without sync replicas fails.
+ FLUENT_FIELD_OPTION(bool, RequireSyncReplica);
+};
+
+///
+/// @brief Options for NYT::IClient::TrimRows
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#trim_rows
+struct TTrimRowsOptions
+ : public TTabletTransactionOptions<TTrimRowsOptions>
+{ };
+
+/// @brief Options for NYT::IClient::AlterTableReplica
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#alter_table_replica
+/// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables
+struct TAlterTableReplicaOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TAlterTableReplicaOptions;
+ /// @endcond
+
+ ///
+ /// @brief Whether to enable or disable replica.
+ ///
+ /// Doesn't change state of replica if `Enabled' is not set.
+ FLUENT_FIELD_OPTION(bool, Enabled);
+
+ ///
+ /// @brief Change replica mode.
+ ///
+ /// Doesn't change replica mode if `Mode` is not set.
+ FLUENT_FIELD_OPTION(ETableReplicaMode, Mode);
+};
+
+///
+/// @brief Options for @ref NYT::IClient::GetFileFromCache
+///
+/// @note They are empty for now but options might appear in the future.
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#get_file_from_cache
+struct TGetFileFromCacheOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TGetFileFromCacheOptions;
+ /// @endcond
+};
+
+///
+/// @brief Options for @ref NYT::IClient::GetTableColumnarStatistics
+///
+/// @note They are empty for now but options might appear in the future.
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#put_file_to_cache
+struct TPutFileToCacheOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TPutFileToCacheOptions;
+ /// @endcond
+
+ /// Whether to preserve `expiration_timeout` attribute of source node.
+ FLUENT_FIELD_OPTION(bool, PreserveExpirationTimeout);
+};
+
+///
+/// Type of permission used in ACL.
+///
+/// @see https://yt.yandex-team.ru/docs/description/common/access_control
+enum class EPermission : int
+{
+ /// Applies to: all objects.
+ Read /* "read" */,
+
+ /// Applies to: all objects.
+ Write /* "write" */,
+
+ /// Applies to: accounts / pools.
+ Use /* "use" */,
+
+ /// Applies to: all objects.
+ Administer /* "administer" */,
+
+ /// Applies to: schemas.
+ Create /* "create" */,
+
+ /// Applies to: all objects.
+ Remove /* "remove" */,
+
+ /// Applies to: tables.
+ Mount /* "mount" */,
+
+ /// Applies to: operations.
+ Manage /* "manage" */,
+};
+
+/// Whether permission is granted or denied.
+enum class ESecurityAction : int
+{
+ /// Permission is granted.
+ Allow /* "allow" */,
+
+ /// Permission is denied.
+ Deny /* "deny" */,
+};
+
+///
+/// @brief Options for @ref NYT::IClient::CheckPermission
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#check_permission
+struct TCheckPermissionOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TCheckPermissionOptions;
+ /// @endcond
+
+ /// Columns to check permission to (for tables only).
+ FLUENT_VECTOR_FIELD(TString, Column);
+};
+
+///
+/// @brief Columnar statistics fetching mode.
+///
+/// @ref NYT::TGetTableColumnarStatisticsOptions::FetcherMode
+enum class EColumnarStatisticsFetcherMode
+{
+ /// Slow mode for fetching precise columnar statistics.
+ FromNodes /* "from_nodes" */,
+
+ ///
+ /// @brief Fast mode for fetching lightweight columnar statistics.
+ ///
+ /// Relative precision is 1 / 256.
+ ///
+ /// @note Might be unavailable for old tables in that case some upper bound is returned.
+ FromMaster /* "from_master" */,
+
+ /// Use lightweight columnar statistics (FromMaster) if available otherwise switch to slow but precise mode (FromNodes).
+ Fallback /* "fallback" */,
+};
+
+///
+/// @brief Options for @ref NYT::IClient::GetTableColumnarStatistics
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#get_table_columnar_statistics
+struct TGetTableColumnarStatisticsOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TGetTableColumnarStatisticsOptions;
+ /// @endcond
+
+ ///
+ /// @brief Mode of statistics fetching.
+ ///
+ /// @ref NYT::EColumnarStatisticsFetcherMode
+ FLUENT_FIELD_OPTION(EColumnarStatisticsFetcherMode, FetcherMode);
+};
+
+///
+/// @brief Table partitioning mode.
+///
+/// @ref NYT::TGetTablePartitionsOptions::PartitionMode
+enum class ETablePartitionMode
+{
+ ///
+ /// @brief Ignores the order of input tables and their chunk and sorting orders.
+ ///
+ Unordered /* "unordered" */,
+
+ ///
+ /// @brief The order of table ranges inside each partition obey the order of input tables and their chunk orders.
+ ///
+ Ordered /* "ordered" */,
+};
+
+///
+/// @brief Options for @ref NYT::IClient::GetTablePartitions
+///
+struct TGetTablePartitionsOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TGetTablePartitionsOptions;
+ /// @endcond
+
+ ///
+ /// @brief Table partitioning mode.
+ ///
+ /// @ref NYT::ETablePartitionMode
+ FLUENT_FIELD(ETablePartitionMode, PartitionMode);
+
+ ///
+ /// @brief Approximate data weight of each output partition.
+ ///
+ FLUENT_FIELD(i64, DataWeightPerPartition);
+
+ ///
+ /// @brief Maximum output partition count.
+ ///
+ /// Consider the situation when the `MaxPartitionCount` is given
+ /// and the total data weight exceeds `MaxPartitionCount * DataWeightPerPartition`.
+ /// If `AdjustDataWeightPerPartition` is |true|
+ /// `GetTablePartitions` will yield partitions exceeding the `DataWeightPerPartition`.
+ /// If `AdjustDataWeightPerPartition` is |false|
+ /// the partitioning will be aborted as soon as the output partition count exceeds this limit.
+ FLUENT_FIELD_OPTION(int, MaxPartitionCount);
+
+ ///
+ /// @brief Allow the data weight per partition to exceed `DataWeightPerPartition` when `MaxPartitionCount` is set.
+ ///
+ /// |True| by default.
+ FLUENT_FIELD_DEFAULT(bool, AdjustDataWeightPerPartition, true);
+};
+
+///
+/// @brief Options for @ref NYT::IClient::GetTabletInfos
+///
+/// @note They are empty for now but options might appear in the future.
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#get_tablet_infos
+struct TGetTabletInfosOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TGetTabletInfosOptions;
+ /// @endcond
+};
+
+/// Options for @ref NYT::IClient::SkyShareTable
+struct TSkyShareTableOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TSkyShareTableOptions;
+ /// @endcond
+
+ ///
+ /// @brief Key columns that are used to group files in a table into torrents.
+ ///
+ /// One torrent is created for each value of `KeyColumns` columns.
+ /// If not specified, all files go into single torrent.
+ FLUENT_FIELD_OPTION(TColumnNames, KeyColumns);
+
+ /// @brief Allow skynet manager to return fastbone links to skynet. See YT-11437
+ FLUENT_FIELD_OPTION(bool, EnableFastbone);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/common.cpp b/yt/cpp/mapreduce/interface/common.cpp
new file mode 100644
index 0000000000..f6d60127ce
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/common.cpp
@@ -0,0 +1,664 @@
+#include "common.h"
+
+#include "errors.h"
+#include "format.h"
+#include "serialize.h"
+#include "fluent.h"
+
+#include <yt/yt_proto/yt/formats/extension.pb.h>
+
+#include <library/cpp/yson/node/node_builder.h>
+#include <library/cpp/yson/node/node_io.h>
+#include <library/cpp/type_info/type.h>
+
+#include <util/generic/xrange.h>
+
+namespace NYT {
+
+using ::google::protobuf::FieldDescriptor;
+using ::google::protobuf::Descriptor;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TSortColumn::TSortColumn(TStringBuf name, ESortOrder sortOrder)
+ : Name_(name)
+ , SortOrder_(sortOrder)
+{ }
+
+TSortColumn::TSortColumn(const TString& name, ESortOrder sortOrder)
+ : TSortColumn(static_cast<TStringBuf>(name), sortOrder)
+{ }
+
+TSortColumn::TSortColumn(const char* name, ESortOrder sortOrder)
+ : TSortColumn(static_cast<TStringBuf>(name), sortOrder)
+{ }
+
+const TSortColumn& TSortColumn::EnsureAscending() const
+{
+ Y_ENSURE(SortOrder() == ESortOrder::SO_ASCENDING);
+ return *this;
+}
+
+TNode TSortColumn::ToNode() const
+{
+ return BuildYsonNodeFluently().Value(*this);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Below lie backward compatibility methods.
+////////////////////////////////////////////////////////////////////////////////
+
+TSortColumn& TSortColumn::operator = (TStringBuf name)
+{
+ EnsureAscending();
+ Name_ = name;
+ return *this;
+}
+
+TSortColumn& TSortColumn::operator = (const TString& name)
+{
+ return (*this = static_cast<TStringBuf>(name));
+}
+
+TSortColumn& TSortColumn::operator = (const char* name)
+{
+ return (*this = static_cast<TStringBuf>(name));
+}
+
+bool TSortColumn::operator == (TStringBuf rhsName) const
+{
+ EnsureAscending();
+ return Name_ == rhsName;
+}
+
+bool TSortColumn::operator != (TStringBuf rhsName) const
+{
+ return !(*this == rhsName);
+}
+
+bool TSortColumn::operator == (const TString& rhsName) const
+{
+ return *this == static_cast<TStringBuf>(rhsName);
+}
+
+bool TSortColumn::operator != (const TString& rhsName) const
+{
+ return !(*this == rhsName);
+}
+
+bool TSortColumn::operator == (const char* rhsName) const
+{
+ return *this == static_cast<TStringBuf>(rhsName);
+}
+
+bool TSortColumn::operator != (const char* rhsName) const
+{
+ return !(*this == rhsName);
+}
+
+TSortColumn::operator TStringBuf() const
+{
+ EnsureAscending();
+ return Name_;
+}
+
+TSortColumn::operator TString() const
+{
+ return TString(static_cast<TStringBuf>(*this));
+}
+
+TSortColumn::operator std::string() const
+{
+ EnsureAscending();
+ return static_cast<std::string>(Name_);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TSortColumns::TSortColumns()
+{ }
+
+TSortColumns::TSortColumns(const TVector<TString>& names)
+{
+ Parts_.assign(names.begin(), names.end());
+}
+
+TSortColumns::TSortColumns(const TColumnNames& names)
+ : TSortColumns(names.Parts_)
+{ }
+
+TSortColumns::operator TColumnNames() const
+{
+ return TColumnNames(EnsureAscending().GetNames());
+}
+
+const TSortColumns& TSortColumns::EnsureAscending() const
+{
+ for (const auto& sortColumn : Parts_) {
+ sortColumn.EnsureAscending();
+ }
+ return *this;
+}
+
+TVector<TString> TSortColumns::GetNames() const
+{
+ TVector<TString> names;
+ names.reserve(Parts_.size());
+ for (const auto& sortColumn : Parts_) {
+ names.push_back(sortColumn.Name());
+ }
+ return names;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+static NTi::TTypePtr OldTypeToTypeV3(EValueType type)
+{
+ switch (type) {
+ case VT_INT64:
+ return NTi::Int64();
+ case VT_UINT64:
+ return NTi::Uint64();
+
+ case VT_DOUBLE:
+ return NTi::Double();
+
+ case VT_BOOLEAN:
+ return NTi::Bool();
+
+ case VT_STRING:
+ return NTi::String();
+
+ case VT_ANY:
+ return NTi::Yson();
+
+ case VT_INT8:
+ return NTi::Int8();
+ case VT_INT16:
+ return NTi::Int16();
+ case VT_INT32:
+ return NTi::Int32();
+
+ case VT_UINT8:
+ return NTi::Uint8();
+ case VT_UINT16:
+ return NTi::Uint16();
+ case VT_UINT32:
+ return NTi::Uint32();
+
+ case VT_UTF8:
+ return NTi::Utf8();
+
+ case VT_NULL:
+ return NTi::Null();
+
+ case VT_VOID:
+ return NTi::Void();
+
+ case VT_DATE:
+ return NTi::Date();
+ case VT_DATETIME:
+ return NTi::Datetime();
+ case VT_TIMESTAMP:
+ return NTi::Timestamp();
+ case VT_INTERVAL:
+ return NTi::Interval();
+
+ case VT_FLOAT:
+ return NTi::Float();
+ case VT_JSON:
+ return NTi::Json();
+ }
+}
+
+static std::pair<EValueType, bool> Simplify(const NTi::TTypePtr& type)
+{
+ using namespace NTi;
+ const auto typeName = type->GetTypeName();
+ switch (typeName) {
+ case ETypeName::Bool:
+ return {VT_BOOLEAN, true};
+
+ case ETypeName::Int8:
+ return {VT_INT8, true};
+ case ETypeName::Int16:
+ return {VT_INT16, true};
+ case ETypeName::Int32:
+ return {VT_INT32, true};
+ case ETypeName::Int64:
+ return {VT_INT64, true};
+
+ case ETypeName::Uint8:
+ return {VT_UINT8, true};
+ case ETypeName::Uint16:
+ return {VT_UINT16, true};
+ case ETypeName::Uint32:
+ return {VT_UINT32, true};
+ case ETypeName::Uint64:
+ return {VT_UINT64, true};
+
+ case ETypeName::Float:
+ return {VT_FLOAT, true};
+ case ETypeName::Double:
+ return {VT_DOUBLE, true};
+
+ case ETypeName::String:
+ return {VT_STRING, true};
+ case ETypeName::Utf8:
+ return {VT_UTF8, true};
+
+ case ETypeName::Date:
+ return {VT_DATE, true};
+ case ETypeName::Datetime:
+ return {VT_DATETIME, true};
+ case ETypeName::Timestamp:
+ return {VT_TIMESTAMP, true};
+ case ETypeName::Interval:
+ return {VT_INTERVAL, true};
+
+ case ETypeName::TzDate:
+ case ETypeName::TzDatetime:
+ case ETypeName::TzTimestamp:
+ break;
+
+ case ETypeName::Json:
+ return {VT_JSON, true};
+ case ETypeName::Decimal:
+ return {VT_STRING, true};
+ case ETypeName::Uuid:
+ break;
+ case ETypeName::Yson:
+ return {VT_ANY, true};
+
+ case ETypeName::Void:
+ return {VT_VOID, false};
+ case ETypeName::Null:
+ return {VT_NULL, false};
+
+ case ETypeName::Optional:
+ {
+ auto itemType = type->AsOptional()->GetItemType();
+ if (itemType->IsPrimitive()) {
+ auto simplified = Simplify(itemType->AsPrimitive());
+ if (simplified.second) {
+ simplified.second = false;
+ return simplified;
+ }
+ }
+ return {VT_ANY, false};
+ }
+ case ETypeName::List:
+ return {VT_ANY, true};
+ case ETypeName::Dict:
+ return {VT_ANY, true};
+ case ETypeName::Struct:
+ return {VT_ANY, true};
+ case ETypeName::Tuple:
+ return {VT_ANY, true};
+ case ETypeName::Variant:
+ return {VT_ANY, true};
+ case ETypeName::Tagged:
+ return Simplify(type->AsTagged()->GetItemType());
+ }
+ ythrow TApiUsageError() << "Unsupported type: " << typeName;
+}
+
+NTi::TTypePtr ToTypeV3(EValueType type, bool required)
+{
+ auto typeV3 = OldTypeToTypeV3(type);
+ if (!Simplify(typeV3).second) {
+ if (required) {
+ ythrow TApiUsageError() << "type: " << type << " cannot be required";
+ } else {
+ return typeV3;
+ }
+ }
+ if (required) {
+ return typeV3;
+ } else {
+ return NTi::Optional(typeV3);
+ }
+}
+
+TColumnSchema::TColumnSchema()
+ : TypeV3_(NTi::Optional(NTi::Int64()))
+{ }
+
+EValueType TColumnSchema::Type() const
+{
+ return Simplify(TypeV3_).first;
+}
+
+TColumnSchema& TColumnSchema::Type(EValueType type) &
+{
+ return Type(ToTypeV3(type, false));
+}
+
+TColumnSchema TColumnSchema::Type(EValueType type) &&
+{
+ return Type(ToTypeV3(type, false));
+}
+
+TColumnSchema& TColumnSchema::Type(const NTi::TTypePtr& type) &
+{
+ Y_VERIFY(type.Get(), "Cannot create column schema with nullptr type");
+ TypeV3_ = type;
+ return *this;
+}
+
+TColumnSchema TColumnSchema::Type(const NTi::TTypePtr& type) &&
+{
+ Y_VERIFY(type.Get(), "Cannot create column schema with nullptr type");
+ TypeV3_ = type;
+ return *this;
+}
+
+TColumnSchema& TColumnSchema::TypeV3(const NTi::TTypePtr& type) &
+{
+ return Type(type);
+}
+
+TColumnSchema TColumnSchema::TypeV3(const NTi::TTypePtr& type) &&
+{
+ return Type(type);
+}
+
+NTi::TTypePtr TColumnSchema::TypeV3() const
+{
+ return TypeV3_;
+}
+
+bool TColumnSchema::Required() const
+{
+ return Simplify(TypeV3_).second;
+}
+
+TColumnSchema& TColumnSchema::Type(EValueType type, bool required) &
+{
+ return Type(ToTypeV3(type, required));
+}
+
+TColumnSchema TColumnSchema::Type(EValueType type, bool required) &&
+{
+ return Type(ToTypeV3(type, required));
+}
+
+bool operator==(const TColumnSchema& lhs, const TColumnSchema& rhs)
+{
+ return
+ lhs.Name() == rhs.Name() &&
+ NTi::NEq::TStrictlyEqual()(lhs.TypeV3(), rhs.TypeV3()) &&
+ lhs.SortOrder() == rhs.SortOrder() &&
+ lhs.Lock() == rhs.Lock() &&
+ lhs.Expression() == rhs.Expression() &&
+ lhs.Aggregate() == rhs.Aggregate() &&
+ lhs.Group() == rhs.Group();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+bool TTableSchema::Empty() const
+{
+ return Columns_.empty();
+}
+
+TTableSchema& TTableSchema::AddColumn(const TString& name, EValueType type) &
+{
+ Columns_.push_back(TColumnSchema().Name(name).Type(type));
+ return *this;
+}
+
+TTableSchema TTableSchema::AddColumn(const TString& name, EValueType type) &&
+{
+ return std::move(AddColumn(name, type));
+}
+
+TTableSchema& TTableSchema::AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &
+{
+ Columns_.push_back(TColumnSchema().Name(name).Type(type).SortOrder(sortOrder));
+ return *this;
+}
+
+TTableSchema TTableSchema::AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &&
+{
+ return std::move(AddColumn(name, type, sortOrder));
+}
+
+TTableSchema& TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type) &
+{
+ Columns_.push_back(TColumnSchema().Name(name).Type(type));
+ return *this;
+}
+
+TTableSchema TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type) &&
+{
+ return std::move(AddColumn(name, type));
+}
+
+TTableSchema& TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &
+{
+ Columns_.push_back(TColumnSchema().Name(name).Type(type).SortOrder(sortOrder));
+ return *this;
+}
+
+TTableSchema TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &&
+{
+ return std::move(AddColumn(name, type, sortOrder));
+}
+
+TTableSchema& TTableSchema::SortBy(const TSortColumns& sortColumns) &
+{
+ Y_ENSURE(sortColumns.Parts_.size() <= Columns_.size());
+
+ THashMap<TString, ui64> sortColumnIndex;
+ for (auto i: xrange(sortColumns.Parts_.size())) {
+ Y_ENSURE(sortColumnIndex.emplace(sortColumns.Parts_[i].Name(), i).second,
+ "Key column name '" << sortColumns.Parts_[i].Name() << "' repeats in columns list");
+ }
+
+ TVector<TColumnSchema> newColumnsSorted(sortColumns.Parts_.size());
+ TVector<TColumnSchema> newColumnsUnsorted;
+ for (auto& column : Columns_) {
+ auto it = sortColumnIndex.find(column.Name());
+ if (it == sortColumnIndex.end()) {
+ column.ResetSortOrder();
+ newColumnsUnsorted.push_back(std::move(column));
+ } else {
+ auto index = it->second;
+ const auto& sortColumn = sortColumns.Parts_[index];
+ column.SortOrder(sortColumn.SortOrder());
+ newColumnsSorted[index] = std::move(column);
+ sortColumnIndex.erase(it);
+ }
+ }
+
+ Y_ENSURE(sortColumnIndex.empty(), "Column name '" << sortColumnIndex.begin()->first
+ << "' not found in table schema");
+
+ newColumnsSorted.insert(newColumnsSorted.end(), newColumnsUnsorted.begin(), newColumnsUnsorted.end());
+ Columns_ = std::move(newColumnsSorted);
+
+ return *this;
+}
+
+TTableSchema TTableSchema::SortBy(const TSortColumns& sortColumns) &&
+{
+ return std::move(SortBy(sortColumns));
+}
+
+TVector<TColumnSchema>& TTableSchema::MutableColumns()
+{
+ return Columns_;
+}
+
+TNode TTableSchema::ToNode() const
+{
+ TNode result;
+ TNodeBuilder builder(&result);
+ Serialize(*this, &builder);
+ return result;
+}
+
+TTableSchema TTableSchema::FromNode(const TNode& node)
+{
+ TTableSchema schema;
+ Deserialize(schema, node);
+ return schema;
+}
+
+bool operator==(const TTableSchema& lhs, const TTableSchema& rhs)
+{
+ return
+ lhs.Columns() == rhs.Columns() &&
+ lhs.Strict() == rhs.Strict() &&
+ lhs.UniqueKeys() == rhs.UniqueKeys();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TKeyBound::TKeyBound(ERelation relation, TKey key)
+ : Relation_(relation)
+ , Key_(std::move(key))
+{ }
+
+////////////////////////////////////////////////////////////////////////////////
+
+TTableSchema CreateTableSchema(
+ const Descriptor& messageDescriptor,
+ const TSortColumns& sortColumns,
+ bool keepFieldsWithoutExtension)
+{
+ auto result = CreateTableSchema(messageDescriptor, keepFieldsWithoutExtension);
+ if (!sortColumns.Parts_.empty()) {
+ result.SortBy(sortColumns.Parts_);
+ }
+ return result;
+}
+
+TTableSchema CreateTableSchema(NTi::TTypePtr type)
+{
+ Y_VERIFY(type);
+ TTableSchema schema;
+ Deserialize(schema, NodeFromYsonString(NTi::NIo::AsYtSchema(type.Get())));
+ return schema;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+bool IsTrivial(const TReadLimit& readLimit)
+{
+ return !readLimit.Key_ && !readLimit.RowIndex_ && !readLimit.Offset_ && !readLimit.TabletIndex_ && !readLimit.KeyBound_;
+}
+
+EValueType NodeTypeToValueType(TNode::EType nodeType)
+{
+ switch (nodeType) {
+ case TNode::EType::Int64: return VT_INT64;
+ case TNode::EType::Uint64: return VT_UINT64;
+ case TNode::EType::String: return VT_STRING;
+ case TNode::EType::Double: return VT_DOUBLE;
+ case TNode::EType::Bool: return VT_BOOLEAN;
+ default:
+ ythrow yexception() << "Cannot convert TNode type " << nodeType << " to EValueType";
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+const TVector<TReadRange>& GetRangesCompat(const TRichYPath& path)
+{
+ static const TVector<TReadRange> empty;
+
+ const auto& maybeRanges = path.GetRanges();
+ if (maybeRanges.Empty()) {
+ return empty;
+ } else if (maybeRanges->size() > 0) {
+ return *maybeRanges;
+ } else {
+ // If you see this exception, that means that caller of this function doesn't known what to do
+ // with RichYPath that has set range list, but the range list is empty.
+ //
+ // To avoid this exception caller must explicitly handle such case.
+ // NB. YT-17683
+ ythrow TApiUsageError() << "Unsupported RichYPath: explicitly empty range list";
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TString ToString(EValueType type)
+{
+ switch (type) {
+ case VT_INT8:
+ return "int8";
+ case VT_INT16:
+ return "int16";
+ case VT_INT32:
+ return "int32";
+ case VT_INT64:
+ return "int64";
+
+ case VT_UINT8:
+ return "uint8";
+ case VT_UINT16:
+ return "uint16";
+ case VT_UINT32:
+ return "uint32";
+ case VT_UINT64:
+ return "uint64";
+
+ case VT_DOUBLE:
+ return "double";
+
+ case VT_BOOLEAN:
+ return "boolean";
+
+ case VT_STRING:
+ return "string";
+ case VT_UTF8:
+ return "utf8";
+
+ case VT_ANY:
+ return "any";
+
+ case VT_NULL:
+ return "null";
+ case VT_VOID:
+ return "void";
+
+ case VT_DATE:
+ return "date";
+ case VT_DATETIME:
+ return "datetime";
+ case VT_TIMESTAMP:
+ return "timestamp";
+ case VT_INTERVAL:
+ return "interval";
+
+ case VT_FLOAT:
+ return "float";
+
+ case VT_JSON:
+ return "json";
+ }
+ ythrow yexception() << "Invalid value type " << static_cast<int>(type);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
+
+template <>
+void Out<NYT::TSortColumn>(IOutputStream& os, const NYT::TSortColumn& sortColumn)
+{
+ if (sortColumn.SortOrder() == NYT::ESortOrder::SO_ASCENDING) {
+ os << sortColumn.Name();
+ } else {
+ os << NYT::BuildYsonStringFluently(NYson::EYsonFormat::Text).Value(sortColumn);
+ }
+}
diff --git a/yt/cpp/mapreduce/interface/common.h b/yt/cpp/mapreduce/interface/common.h
new file mode 100644
index 0000000000..b1754ade70
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/common.h
@@ -0,0 +1,1301 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/common.h
+///
+/// Header containing miscellaneous structs and classes used in library.
+
+#include "fwd.h"
+
+#include <library/cpp/type_info/type_info.h>
+#include <library/cpp/yson/node/node.h>
+
+#include <util/generic/guid.h>
+#include <util/generic/map.h>
+#include <util/generic/maybe.h>
+#include <util/generic/ptr.h>
+#include <util/system/type_name.h>
+#include <util/generic/vector.h>
+
+#include <google/protobuf/message.h>
+
+#include <initializer_list>
+#include <type_traits>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @cond Doxygen_Suppress
+#define FLUENT_FIELD(type, name) \
+ type name##_; \
+ TSelf& name(const type& value) \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ static_assert(true)
+
+#define FLUENT_FIELD_ENCAPSULATED(type, name) \
+private: \
+ type name##_; \
+public: \
+ TSelf& name(const type& value) & \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ TSelf name(const type& value) && \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ const type& name() const & \
+ { \
+ return name##_; \
+ } \
+ type name() && \
+ { \
+ return name##_; \
+ } \
+ static_assert(true)
+
+#define FLUENT_FIELD_OPTION(type, name) \
+ TMaybe<type> name##_; \
+ TSelf& name(const type& value) \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ static_assert(true)
+
+#define FLUENT_FIELD_OPTION_ENCAPSULATED(type, name) \
+private: \
+ TMaybe<type> name##_; \
+public: \
+ TSelf& name(const type& value) & \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ TSelf name(const type& value) && \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ TSelf& Reset##name() & \
+ { \
+ name##_ = Nothing(); \
+ return static_cast<TSelf&>(*this); \
+ } \
+ TSelf Reset##name() && \
+ { \
+ name##_ = Nothing(); \
+ return static_cast<TSelf&>(*this); \
+ } \
+ const TMaybe<type>& name() const& \
+ { \
+ return name##_; \
+ } \
+ TMaybe<type> name() && \
+ { \
+ return name##_; \
+ } \
+ static_assert(true)
+
+#define FLUENT_FIELD_DEFAULT(type, name, defaultValue) \
+ type name##_ = defaultValue; \
+ TSelf& name(const type& value) \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ static_assert(true)
+
+#define FLUENT_FIELD_DEFAULT_ENCAPSULATED(type, name, defaultValue) \
+private: \
+ type name##_ = defaultValue; \
+public: \
+ TSelf& name(const type& value) & \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ TSelf name(const type& value) && \
+ { \
+ name##_ = value; \
+ return static_cast<TSelf&>(*this); \
+ } \
+ const type& name() const & \
+ { \
+ return name##_; \
+ } \
+ type name() && \
+ { \
+ return name##_; \
+ } \
+ static_assert(true)
+
+#define FLUENT_VECTOR_FIELD(type, name) \
+ TVector<type> name##s_; \
+ TSelf& Add##name(const type& value) \
+ { \
+ name##s_.push_back(value); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf& name##s(TVector<type> values) \
+ { \
+ name##s_ = std::move(values); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ static_assert(true)
+
+#define FLUENT_OPTIONAL_VECTOR_FIELD_ENCAPSULATED(type, name) \
+private: \
+ TMaybe<TVector<type>> name##s_; \
+public: \
+ const TMaybe<TVector<type>>& name##s() const & { \
+ return name##s_; \
+ } \
+ TMaybe<TVector<type>>& name##s() & { \
+ return name##s_; \
+ } \
+ TMaybe<TVector<type>> name##s() && { \
+ return std::move(name##s_); \
+ } \
+ TSelf& Add##name(const type& value) & \
+ { \
+ if (name##s_.Empty()) { \
+ name##s_.ConstructInPlace(); \
+ } \
+ name##s_->push_back(value); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf Add##name(const type& value) && \
+ { \
+ if (name##s_.Empty()) { \
+ name##s_.ConstructInPlace(); \
+ } \
+ name##s_->push_back(value); \
+ return static_cast<TSelf&&>(*this);\
+ } \
+ TSelf& name##s(TVector<type> values) & \
+ { \
+ name##s_ = std::move(values); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf name##s(TVector<type> values) && \
+ { \
+ name##s_ = std::move(values); \
+ return static_cast<TSelf&&>(*this);\
+ } \
+ TSelf& name##s(TNothing) & \
+ { \
+ name##s_ = Nothing(); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf name##s(TNothing) && \
+ { \
+ name##s_ = Nothing(); \
+ return static_cast<TSelf&&>(*this);\
+ } \
+ TSelf& Reset##name##s() & \
+ { \
+ name##s_ = Nothing(); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf Reset##name##s() && \
+ { \
+ name##s_ = Nothing(); \
+ return static_cast<TSelf&&>(*this);\
+ } \
+ static_assert(true)
+
+#define FLUENT_VECTOR_FIELD_ENCAPSULATED(type, name) \
+private: \
+ TVector<type> name##s_; \
+public: \
+ TSelf& Add##name(const type& value) & \
+ { \
+ name##s_.push_back(value); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf Add##name(const type& value) && \
+ { \
+ name##s_.push_back(value); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf& name##s(TVector<type> value) & \
+ { \
+ name##s_ = std::move(value); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ TSelf name##s(TVector<type> value) && \
+ { \
+ name##s_ = std::move(value); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ const TVector<type>& name##s() const & \
+ { \
+ return name##s_; \
+ } \
+ TVector<type> name##s() && \
+ { \
+ return name##s_; \
+ } \
+ static_assert(true)
+
+#define FLUENT_MAP_FIELD(keytype, valuetype, name) \
+ TMap<keytype,valuetype> name##_; \
+ TSelf& Add##name(const keytype& key, const valuetype& value) \
+ { \
+ name##_.emplace(key, value); \
+ return static_cast<TSelf&>(*this);\
+ } \
+ static_assert(true)
+
+/// @endcond
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Convenience class that keeps sequence of items.
+///
+/// Designed to be used as function parameter.
+///
+/// Users of such function can then pass:
+/// - single item,
+/// - initializer list of items,
+/// - vector of items;
+/// as argument to this function.
+///
+/// Example:
+/// ```
+/// void Foo(const TOneOrMany<int>& arg);
+/// ...
+/// Foo(1); // ok
+/// Foo({1, 2, 3}); // ok
+/// ```
+template <class T, class TDerived>
+struct TOneOrMany
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = std::conditional_t<std::is_void_v<TDerived>, TOneOrMany, TDerived>;
+ /// @endcond
+
+ /// Initialize with empty sequence.
+ TOneOrMany() = default;
+
+ // Initialize from initializer list.
+ template<class U>
+ TOneOrMany(std::initializer_list<U> il)
+ {
+ Parts_.assign(il.begin(), il.end());
+ }
+
+ /// Put arguments to sequence
+ template <class U, class... TArgs>
+ requires std::is_convertible_v<U, T>
+ TOneOrMany(U&& arg, TArgs&&... args)
+ {
+ Add(arg, std::forward<TArgs>(args)...);
+ }
+
+ /// Initialize from vector.
+ TOneOrMany(TVector<T> args)
+ : Parts_(std::move(args))
+ { }
+
+ /// @brief Order is defined the same way as in TVector
+ bool operator==(const TOneOrMany& rhs) const
+ {
+ // N.B. We would like to make this method to be `= default`,
+ // but this breaks MSVC compiler for the cases when T doesn't
+ // support comparison.
+ return Parts_ == rhs.Parts_;
+ }
+
+ ///
+ /// @{
+ ///
+ /// @brief Add all arguments to sequence
+ template <class U, class... TArgs>
+ requires std::is_convertible_v<U, T>
+ TSelf& Add(U&& part, TArgs&&... args) &
+ {
+ Parts_.push_back(std::forward<U>(part));
+ if constexpr (sizeof...(args) > 0) {
+ [[maybe_unused]] int dummy[sizeof...(args)] = {(Parts_.push_back(std::forward<TArgs>(args)), 0) ... };
+ }
+ return static_cast<TSelf&>(*this);
+ }
+
+ template <class U, class... TArgs>
+ requires std::is_convertible_v<U, T>
+ TSelf Add(U&& part, TArgs&&... args) &&
+ {
+ return std::move(Add(std::forward<U>(part), std::forward<TArgs>(args)...));
+ }
+ /// @}
+
+ /// Content of sequence.
+ TVector<T> Parts_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Type of the value that can occur in YT table.
+///
+/// @ref NYT::TTableSchema
+/// https://yt.yandex-team.ru/docs/description/storage/data_types
+enum EValueType : int
+{
+ /// Int64, signed integer of 64 bits.
+ VT_INT64,
+
+ /// Uint64, unsigned integer of 64 bits.
+ VT_UINT64,
+
+ /// Double, floating point number of double precision (64 bits).
+ VT_DOUBLE,
+ /// Boolean, `true` or `false`.
+ VT_BOOLEAN,
+
+ /// String, arbitrary byte sequence.
+ VT_STRING,
+
+ /// Any, arbitrary yson document.
+ VT_ANY,
+
+ /// Int8, signed integer of 8 bits.
+ VT_INT8,
+ /// Int16, signed integer of 16 bits.
+ VT_INT16,
+ /// Int32, signed integer of 32 bits.
+ VT_INT32,
+
+ /// Uint8, unsigned integer of 8 bits.
+ VT_UINT8,
+ /// Uint16, unsigned integer of 16 bits.
+ VT_UINT16,
+ /// Uint32, unsigned integer of 32 bits.
+ VT_UINT32,
+
+ /// Utf8, byte sequence that is valid utf8.
+ VT_UTF8,
+
+ /// Null, absence of value (almost never used in schemas)
+ VT_NULL,
+ /// Void, absence of value (almost never used in schemas) the difference between null, and void is yql-specific.
+ VT_VOID,
+
+ /// Date, number of days since Unix epoch (unsigned)
+ VT_DATE,
+ /// Datetime, number of seconds since Unix epoch (unsigned)
+ VT_DATETIME,
+ /// Timestamp, number of milliseconds since Unix epoch (unsigned)
+ VT_TIMESTAMP,
+ /// Interval, difference between two timestamps (signed)
+ VT_INTERVAL,
+
+ /// Float, floating point number (32 bits)
+ VT_FLOAT,
+ /// Json, sequence of bytes that is valid json.
+ VT_JSON,
+};
+
+///
+/// @brief Sort order.
+///
+/// @ref NYT::TTableSchema
+enum ESortOrder : int
+{
+ /// Ascending sort order.
+ SO_ASCENDING /* "ascending" */,
+ /// Descending sort order.
+ SO_DESCENDING /* "descending" */,
+};
+
+///
+/// @brief Value of "optimize_for" attribute.
+///
+/// @ref NYT::TRichYPath
+enum EOptimizeForAttr : i8
+{
+ /// Optimize for scan
+ OF_SCAN_ATTR /* "scan" */,
+
+ /// Optimize for lookup
+ OF_LOOKUP_ATTR /* "lookup" */,
+};
+
+///
+/// @brief Value of "erasure_codec" attribute.
+///
+/// @ref NYT::TRichYPath
+enum EErasureCodecAttr : i8
+{
+ /// @cond Doxygen_Suppress
+ EC_NONE_ATTR /* "none" */,
+ EC_REED_SOLOMON_6_3_ATTR /* "reed_solomon_6_3" */,
+ EC_LRC_12_2_2_ATTR /* "lrc_12_2_2" */,
+ EC_ISA_LRC_12_2_2_ATTR /* "isa_lrc_12_2_2" */,
+ /// @endcond
+};
+
+///
+/// @brief Value of "schema_modification" attribute.
+///
+/// @ref NYT::TRichYPath
+enum ESchemaModificationAttr : i8
+{
+ SM_NONE_ATTR /* "none" */,
+ SM_UNVERSIONED_UPDATE /* "unversioned_update" */,
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Table key column description.
+///
+/// The description includes column name and sort order.
+///
+/// @anchor TSortOrder_backward_compatibility
+/// @note
+/// Many functions that use `TSortOrder` as argument used to take `TString`
+/// (the only allowed sort order was "ascending" and user didn't have to specify it).
+/// @note
+/// This class is designed to provide backward compatibility for such code and therefore
+/// objects of this class can be constructed and assigned from TString-like objects only.
+///
+/// @see NYT::TSortOperationSpec
+class TSortColumn
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TSortColumn;
+ /// @endcond
+
+ /// Column name
+ FLUENT_FIELD_ENCAPSULATED(TString, Name);
+
+ /// Sort order
+ FLUENT_FIELD_DEFAULT_ENCAPSULATED(ESortOrder, SortOrder, ESortOrder::SO_ASCENDING);
+
+ ///
+ /// @{
+ ///
+ /// @brief Construct object from name and sort order
+ ///
+ /// Constructors are intentionally implicit so `TSortColumn` can be compatible with old code.
+ /// @ref TSortOrder_backward_compatibility
+ TSortColumn(TStringBuf name = {}, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
+ TSortColumn(const TString& name, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
+ TSortColumn(const char* name, ESortOrder sortOrder = ESortOrder::SO_ASCENDING);
+ /// @}
+
+ /// Check that sort order is ascending, throw exception otherwise.
+ const TSortColumn& EnsureAscending() const;
+
+ /// @brief Convert sort to yson representation as YT API expects it.
+ TNode ToNode() const;
+
+ /// @brief Comparison is default and checks both name and sort order.
+ bool operator == (const TSortColumn& rhs) const = default;
+
+ ///
+ /// @{
+ ///
+ /// @brief Assign object from column name, and set sort order to `ascending`.
+ ///
+ /// This is backward compatibility methods.
+ ///
+ /// @ref TSortOrder_backward_compatibility
+ TSortColumn& operator = (TStringBuf name);
+ TSortColumn& operator = (const TString& name);
+ TSortColumn& operator = (const char* name);
+ /// @}
+
+ bool operator == (const TStringBuf rhsName) const;
+ bool operator != (const TStringBuf rhsName) const;
+ bool operator == (const TString& rhsName) const;
+ bool operator != (const TString& rhsName) const;
+ bool operator == (const char* rhsName) const;
+ bool operator != (const char* rhsName) const;
+
+ // Intentionally implicit conversions.
+ operator TString() const;
+ operator TStringBuf() const;
+ operator std::string() const;
+
+ Y_SAVELOAD_DEFINE(Name_, SortOrder_);
+};
+
+///
+/// @brief List of @ref TSortColumn
+///
+/// Contains a bunch of helper methods such as constructing from single object.
+class TSortColumns
+ : public TOneOrMany<TSortColumn, TSortColumns>
+{
+public:
+ using TOneOrMany<TSortColumn, TSortColumns>::TOneOrMany;
+
+ /// Construct empty list.
+ TSortColumns();
+
+ ///
+ /// @{
+ ///
+ /// @brief Construct list of ascending sort order columns by their names.
+ ///
+ /// Required for backward compatibility.
+ ///
+ /// @ref TSortOrder_backward_compatibility
+ TSortColumns(const TVector<TString>& names);
+ TSortColumns(const TColumnNames& names);
+ /// @}
+
+
+ ///
+ /// @brief Implicit conversion to column list.
+ ///
+ /// If all columns has ascending sort order return list of their names.
+ /// Throw exception otherwise.
+ ///
+ /// Required for backward compatibility.
+ ///
+ /// @ref TSortOrder_backward_compatibility
+ operator TColumnNames() const;
+
+ /// Make sure that all columns are of ascending sort order.
+ const TSortColumns& EnsureAscending() const;
+
+ /// Get list of column names.
+ TVector<TString> GetNames() const;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Helper function to create new style type from old style one.
+NTi::TTypePtr ToTypeV3(EValueType type, bool required);
+
+///
+/// @brief Single column description
+///
+/// Each field describing column has setter and getter.
+///
+/// Example reading field:
+/// ```
+/// ... columnSchema.Name() ...
+/// ```
+///
+/// Example setting field:
+/// ```
+/// columnSchema.Name("my-column").Type(VT_INT64); // set name and type
+/// ```
+///
+/// @ref https://yt.yandex-team.ru/docs/description/storage/static_schema
+class TColumnSchema
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TColumnSchema;
+ /// @endcond
+
+ ///
+ /// @brief Construct empty column schemas
+ ///
+ /// @note
+ /// Such schema cannot be used in schema as it it doesn't have name.
+ TColumnSchema();
+
+ ///
+ /// @{
+ ///
+ /// @brief Copy and move constructors are default.
+ TColumnSchema(const TColumnSchema&) = default;
+ TColumnSchema& operator=(const TColumnSchema&) = default;
+ /// @}
+
+
+ FLUENT_FIELD_ENCAPSULATED(TString, Name);
+
+ ///
+ /// @brief Functions to work with type in old manner.
+ ///
+ /// @deprecated New code is recommended to work with types using @ref NTi::TTypePtr from type_info library.
+ TColumnSchema& Type(EValueType type) &;
+ TColumnSchema Type(EValueType type) &&;
+ EValueType Type() const;
+
+ /// @brief Set and get column type.
+ /// @{
+ TColumnSchema& Type(const NTi::TTypePtr& type) &;
+ TColumnSchema Type(const NTi::TTypePtr& type) &&;
+
+ TColumnSchema& TypeV3(const NTi::TTypePtr& type) &;
+ TColumnSchema TypeV3(const NTi::TTypePtr& type) &&;
+ NTi::TTypePtr TypeV3() const;
+ /// @}
+
+ ///
+ /// @brief Raw yson representation of column type
+ /// @deprecated Prefer to use `TypeV3` methods.
+ FLUENT_FIELD_OPTION_ENCAPSULATED(TNode, RawTypeV3);
+
+ /// Column sort order
+ FLUENT_FIELD_OPTION_ENCAPSULATED(ESortOrder, SortOrder);
+
+ ///
+ /// @brief Lock group name
+ ///
+ /// @ref https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#blokirovka-stroki
+ FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Lock);
+
+ /// Expression defining column value
+ FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Expression);
+
+ /// Aggregating function name
+ FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Aggregate);
+
+ ///
+ /// @brief Storage group name
+ ///
+ /// @ref https://yt.yandex-team.ru/docs/description/storage/static_schema
+ FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Group);
+
+ ///
+ /// @brief Column requiredness.
+ ///
+ /// Required columns doesn't accept NULL values.
+ /// Usually if column is required it means that it has Optional<...> type
+ bool Required() const;
+
+ ///
+ /// @{
+ ///
+ /// @brief Set type in old-style manner
+ TColumnSchema& Type(EValueType type, bool required) &;
+ TColumnSchema Type(EValueType type, bool required) &&;
+ /// @}
+
+private:
+ friend void Deserialize(TColumnSchema& columnSchema, const TNode& node);
+ NTi::TTypePtr TypeV3_;
+ bool Required_ = false;
+};
+
+/// Equality check checks all fields of column schema.
+bool operator==(const TColumnSchema& lhs, const TColumnSchema& rhs);
+
+///
+/// @brief Description of table schema
+///
+/// @see https://yt.yandex-team.ru/docs/description/storage/static_schema
+class TTableSchema
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TTableSchema;
+ /// @endcond
+
+ /// Column schema
+ FLUENT_VECTOR_FIELD_ENCAPSULATED(TColumnSchema, Column);
+
+ ///
+ /// @brief Strictness of the schema
+ ///
+ /// Strict schemas are not allowed to have columns not described in schema.
+ /// Nonstrict schemas are allowed to have such columns, all such missing columns are assumed to have
+ FLUENT_FIELD_DEFAULT_ENCAPSULATED(bool, Strict, true);
+
+ ///
+ /// @brief Whether keys are unique
+ ///
+ /// This flag can be set only for schemas that have sorted columns.
+ /// If flag is set table cannot have multiple rows with same key.
+ FLUENT_FIELD_DEFAULT_ENCAPSULATED(bool, UniqueKeys, false);
+
+ /// Get modifiable column list
+ TVector<TColumnSchema>& MutableColumns();
+
+ /// Check if schema has any described column
+ [[nodiscard]] bool Empty() const;
+
+ /// Add column
+ TTableSchema& AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &;
+ /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
+ TTableSchema AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &&;
+
+ /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
+ TTableSchema& AddColumn(const TString& name, const NTi::TTypePtr& type) &;
+ /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&;
+ TTableSchema AddColumn(const TString& name, const NTi::TTypePtr& type) &&;
+
+ /// Add optional column of specified type
+ TTableSchema& AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &;
+ /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
+ TTableSchema AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &&;
+
+ /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
+ TTableSchema& AddColumn(const TString& name, EValueType type) &;
+ /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&;
+ TTableSchema AddColumn(const TString& name, EValueType type) &&;
+
+ ///
+ /// @brief Make table schema sorted by specified columns
+ ///
+ /// Resets old key columns if any
+ TTableSchema& SortBy(const TSortColumns& columns) &;
+
+ /// @copydoc NYT::TTableSchema::SortBy(const TSortColumns&)&;
+ TTableSchema SortBy(const TSortColumns& columns) &&;
+
+ /// Get yson description of table schema
+ [[nodiscard]] TNode ToNode() const;
+
+ /// Parse schema from yson node
+ static NYT::TTableSchema FromNode(const TNode& node);
+
+ friend void Deserialize(TTableSchema& tableSchema, const TNode& node);
+};
+
+/// Check for equality of all columns and all schema attributes
+bool operator==(const TTableSchema& lhs, const TTableSchema& rhs);
+
+/// Create table schema by protobuf message descriptor
+TTableSchema CreateTableSchema(
+ const ::google::protobuf::Descriptor& messageDescriptor,
+ const TSortColumns& sortColumns = TSortColumns(),
+ bool keepFieldsWithoutExtension = true);
+
+/// Create table schema by protobuf message type
+template <class TProtoType, typename = std::enable_if_t<std::is_base_of_v<::google::protobuf::Message, TProtoType>>>
+inline TTableSchema CreateTableSchema(
+ const TSortColumns& sortColumns = TSortColumns(),
+ bool keepFieldsWithoutExtension = true)
+{
+ static_assert(
+ std::is_base_of_v<::google::protobuf::Message, TProtoType>,
+ "Template argument must be derived from ::google::protobuf::Message");
+
+ return CreateTableSchema(
+ *TProtoType::descriptor(),
+ sortColumns,
+ keepFieldsWithoutExtension);
+}
+
+///
+/// @brief Create strict table schema from `struct` type.
+///
+/// Names and types of columns are taken from struct member names and types.
+/// `Strict` flag is set to true, all other attribute of schema and columns
+/// are left with default values
+TTableSchema CreateTableSchema(NTi::TTypePtr type);
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Enumeration describing comparison operation used in key bound.
+///
+/// ERelation is a part of @ref NYT::TKeyBound that can be used as
+/// lower or upper key limit in @ref TReadLimit.
+///
+/// Relations `Less` and `LessOrEqual` are for upper limit and
+/// relations `Greater` and `GreaterOrEqual` are for lower limit.
+///
+/// It is a error to use relation in the limit of wrong kind.
+///
+/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath
+enum class ERelation
+{
+ ///
+ /// @brief Relation "less"
+ ///
+ /// Specifies range of keys that are before specified key.
+ /// Can only be used in upper limit.
+ Less /* "<" */,
+
+ ///
+ /// @brief Relation "less or equal"
+ ///
+ /// Specifies range of keys that are before or equal specified key.
+ /// Can only be used in upper limit.
+ LessOrEqual /* "<=" */,
+
+ ///
+ /// @brief Relation "greater"
+ ///
+ /// Specifies range of keys that are after specified key.
+ /// Can only be used in lower limit.
+ Greater /* ">" */,
+
+ ///
+ /// @brief Relation "greater or equal"
+ ///
+ /// Specifies range of keys that are after or equal than specified key.
+ /// Can only be used in lower limit.
+ GreaterOrEqual /* ">=" */,
+};
+
+///
+/// @brief Key with relation specifying interval of keys in lower or upper limit of @ref NYT::TReadRange
+///
+/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath
+struct TKeyBound
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TKeyBound;
+
+ explicit TKeyBound(ERelation relation = ERelation::Less, TKey key = TKey{});
+
+ FLUENT_FIELD_DEFAULT_ENCAPSULATED(ERelation, Relation, ERelation::Less);
+ FLUENT_FIELD_DEFAULT_ENCAPSULATED(TKey, Key, TKey{});
+ /// @endcond
+};
+
+///
+/// @brief Description of the read limit.
+///
+/// It is actually a variant and must store exactly one field.
+///
+/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath
+struct TReadLimit
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TReadLimit;
+ /// @endcond
+
+ ///
+ /// @brief KeyBound specifies table key and whether to include it
+ ///
+ /// It can be used in lower or upper limit when reading tables.
+ FLUENT_FIELD_OPTION(TKeyBound, KeyBound);
+
+ ///
+ /// @brief Table key
+ ///
+ /// It can be used in exact, lower or upper limit when reading tables.
+ FLUENT_FIELD_OPTION(TKey, Key);
+
+ ///
+ /// @brief Row index
+ ///
+ /// It can be used in exact, lower or upper limit when reading tables.
+ FLUENT_FIELD_OPTION(i64, RowIndex);
+
+ ///
+ /// @brief File offset
+ ///
+ /// It can be used in lower or upper limit when reading files.
+ FLUENT_FIELD_OPTION(i64, Offset);
+
+ ///
+ /// @brief Tablet index
+ ///
+ /// It can be used in lower or upper limit in dynamic table operations
+ FLUENT_FIELD_OPTION(i64, TabletIndex);
+};
+
+///
+/// @brief Range of a table or a file
+///
+/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath
+struct TReadRange
+{
+ using TSelf = TReadRange;
+
+ ///
+ /// @brief Lower limit of the range
+ ///
+ /// It is usually inclusive (except when @ref NYT::TKeyBound with relation @ref NYT::ERelation::Greater is used).
+ FLUENT_FIELD(TReadLimit, LowerLimit);
+
+ ///
+ /// @brief Lower limit of the range
+ ///
+ /// It is usually exclusive (except when @ref NYT::TKeyBound with relation @ref NYT::ERelation::LessOrEqual is used).
+ FLUENT_FIELD(TReadLimit, UpperLimit);
+
+ /// Exact key or row index.
+ FLUENT_FIELD(TReadLimit, Exact);
+
+ /// Create read range from row indexes.
+ static TReadRange FromRowIndices(i64 lowerLimit, i64 upperLimit)
+ {
+ return TReadRange()
+ .LowerLimit(TReadLimit().RowIndex(lowerLimit))
+ .UpperLimit(TReadLimit().RowIndex(upperLimit));
+ }
+
+ /// Create read range from keys.
+ static TReadRange FromKeys(const TKey& lowerKeyInclusive, const TKey& upperKeyExclusive)
+ {
+ return TReadRange()
+ .LowerLimit(TReadLimit().Key(lowerKeyInclusive))
+ .UpperLimit(TReadLimit().Key(upperKeyExclusive));
+ }
+};
+
+///
+/// @brief Path with additional attributes.
+///
+/// Allows to specify additional attributes for path used in some operations.
+///
+/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath
+struct TRichYPath
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TRichYPath;
+ /// @endcond
+
+ /// Path itself.
+ FLUENT_FIELD(TYPath, Path);
+
+ /// Specifies that path should be appended not overwritten
+ FLUENT_FIELD_OPTION(bool, Append);
+
+ /// @deprecated Deprecated attribute.
+ FLUENT_FIELD_OPTION(bool, PartiallySorted);
+
+ /// Specifies that path is expected to be sorted by these columns.
+ FLUENT_FIELD(TSortColumns, SortedBy);
+
+ /// Add range to read.
+ TRichYPath& AddRange(TReadRange range)
+ {
+ if (!Ranges_) {
+ Ranges_.ConstructInPlace();
+ }
+ Ranges_->push_back(std::move(range));
+ return *this;
+ }
+
+ TRichYPath& ResetRanges()
+ {
+ Ranges_.Clear();
+ return *this;
+ }
+
+ ///
+ /// @{
+ ///
+ /// Return ranges to read.
+ ///
+ /// NOTE: Nothing (in TMaybe) and empty TVector are different ranges.
+ /// Nothing represents universal range (reader reads all table rows).
+ /// Empty TVector represents empty range (reader returns empty set of rows).
+ const TMaybe<TVector<TReadRange>>& GetRanges() const
+ {
+ return Ranges_;
+ }
+
+ TMaybe<TVector<TReadRange>>& MutableRanges()
+ {
+ return Ranges_;
+ }
+
+ ///
+ /// @{
+ ///
+ /// Get range view, that is convenient way to iterate through all ranges.
+ TArrayRef<TReadRange> MutableRangesView()
+ {
+ if (Ranges_.Defined()) {
+ return TArrayRef(Ranges_->data(), Ranges_->size());
+ } else {
+ return {};
+ }
+ }
+
+ TArrayRef<const TReadRange> GetRangesView() const
+ {
+ if (Ranges_.Defined()) {
+ return TArrayRef(Ranges_->data(), Ranges_->size());
+ } else {
+ return {};
+ }
+ }
+ /// @}
+
+ /// @{
+ ///
+ /// Get range by index.
+ const TReadRange& GetRange(ssize_t i) const
+ {
+ return Ranges_.GetRef()[i];
+ }
+
+ TReadRange& MutableRange(ssize_t i)
+ {
+ return Ranges_.GetRef()[i];
+ }
+ /// @}
+
+ ///
+ /// @brief Specifies columns that should be read.
+ ///
+ /// If it's set to Nothing then all columns will be read.
+ /// If empty TColumnNames is specified then each read row will be empty.
+ FLUENT_FIELD_OPTION(TColumnNames, Columns);
+
+ FLUENT_FIELD_OPTION(bool, Teleport);
+ FLUENT_FIELD_OPTION(bool, Primary);
+ FLUENT_FIELD_OPTION(bool, Foreign);
+ FLUENT_FIELD_OPTION(i64, RowCountLimit);
+
+ FLUENT_FIELD_OPTION(TString, FileName);
+
+ /// Specifies original path to be shown in Web UI
+ FLUENT_FIELD_OPTION(TYPath, OriginalPath);
+
+ ///
+ /// @brief Specifies that this path points to executable file
+ ///
+ /// Used in operation specs.
+ FLUENT_FIELD_OPTION(bool, Executable);
+
+ ///
+ /// @brief Specify format to use when loading table.
+ ///
+ /// Used in operation specs.
+ FLUENT_FIELD_OPTION(TNode, Format);
+
+ /// @brief Specifies table schema that will be set on the path
+ FLUENT_FIELD_OPTION(TTableSchema, Schema);
+
+ /// Specifies compression codec that will be set on the path
+ FLUENT_FIELD_OPTION(TString, CompressionCodec);
+
+ /// Specifies erasure codec that will be set on the path
+ FLUENT_FIELD_OPTION(EErasureCodecAttr, ErasureCodec);
+
+ /// Specifies schema modification that will be set on the path
+ FLUENT_FIELD_OPTION(ESchemaModificationAttr, SchemaModification);
+
+ /// Specifies optimize_for attribute that will be set on the path
+ FLUENT_FIELD_OPTION(EOptimizeForAttr, OptimizeFor);
+
+ ///
+ /// @brief Do not put file used in operation into node cache
+ ///
+ /// If BypassArtifactCache == true, file will be loaded into the job's sandbox bypassing the cache on the YT node.
+ /// It helps jobs that use tmpfs to start faster,
+ /// because files will be loaded into tmpfs directly bypassing disk cache
+ FLUENT_FIELD_OPTION(bool, BypassArtifactCache);
+
+ ///
+ /// @brief Timestamp of dynamic table.
+ ///
+ /// NOTE: it is _not_ unix timestamp
+ /// (instead it's transaction timestamp, that is more complex structure).
+ FLUENT_FIELD_OPTION(i64, Timestamp);
+
+ ///
+ /// @brief Specify transaction that should be used to access this path.
+ ///
+ /// Allows to start cross-transactional operations.
+ FLUENT_FIELD_OPTION(TTransactionId, TransactionId);
+
+ using TRenameColumnsDescriptor = THashMap<TString, TString>;
+
+ /// Specifies columnar mapping which will be applied to columns before transfer to job.
+ FLUENT_FIELD_OPTION(TRenameColumnsDescriptor, RenameColumns);
+
+ /// Create empty path with no attributes
+ TRichYPath()
+ { }
+
+ ///
+ /// @{
+ ///
+ /// @brief Create path from string
+ TRichYPath(const char* path)
+ : Path_(path)
+ { }
+
+ TRichYPath(const TYPath& path)
+ : Path_(path)
+ { }
+ /// @}
+
+private:
+ TMaybe<TVector<TReadRange>> Ranges_;
+};
+
+///
+/// @ref Create copy of @ref NYT::TRichYPath with schema derived from proto message.
+///
+///
+template <typename TProtoType>
+TRichYPath WithSchema(const TRichYPath& path, const TSortColumns& sortBy = TSortColumns())
+{
+ static_assert(std::is_base_of_v<::google::protobuf::Message, TProtoType>, "TProtoType must be Protobuf message");
+
+ auto schemedPath = path;
+ if (!schemedPath.Schema_) {
+ schemedPath.Schema(CreateTableSchema<TProtoType>(sortBy));
+ }
+ return schemedPath;
+}
+
+///
+/// @brief Create copy of @ref NYT::TRichYPath with schema derived from TRowType if possible.
+///
+/// If TRowType is protobuf message schema is derived from it and set to returned path.
+/// Otherwise schema of original path is left unchanged (and probably unset).
+template <typename TRowType>
+TRichYPath MaybeWithSchema(const TRichYPath& path, const TSortColumns& sortBy = TSortColumns())
+{
+ if constexpr (std::is_base_of_v<::google::protobuf::Message, TRowType>) {
+ return WithSchema<TRowType>(path, sortBy);
+ } else {
+ return path;
+ }
+}
+
+///
+/// @brief Get the list of ranges related to path in compatibility mode.
+///
+/// - If path is missing ranges, empty list is returned.
+/// - If path has associated range list and the list is not empty, function returns this list.
+/// - If path has associated range list and this list is empty, exception is thrown.
+///
+/// Before YT-17683 RichYPath didn't support empty range list and empty range actualy meant universal range.
+/// This function emulates this old behavior.
+///
+/// @see https://st.yandex-team.ru/YT-17683
+const TVector<TReadRange>& GetRangesCompat(const TRichYPath& path);
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Statistics about table columns.
+struct TTableColumnarStatistics
+{
+ /// Total data weight for all chunks for each of requested columns.
+ THashMap<TString, i64> ColumnDataWeight;
+
+ /// Total weight of all old chunks that don't keep columnar statistics.
+ i64 LegacyChunksDataWeight = 0;
+
+ /// Timestamps total weight (only for dynamic tables).
+ TMaybe<i64> TimestampTotalWeight;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Description of a partition.
+struct TMultiTablePartition
+{
+ struct TStatistics
+ {
+ i64 ChunkCount = 0;
+ i64 DataWeight = 0;
+ i64 RowCount = 0;
+ };
+
+ /// Ranges of input tables for this partition.
+ TVector<TRichYPath> TableRanges;
+
+ /// Aggregate statistics of all the table ranges in the partition.
+ TStatistics AggregateStatistics;
+};
+
+/// Table partitions from GetTablePartitions command.
+struct TMultiTablePartitions
+{
+ /// Disjoint partitions into which the input tables were divided.
+ TVector<TMultiTablePartition> Partitions;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Contains information about tablet
+///
+/// @see NYT::IClient::GetTabletInfos
+struct TTabletInfo
+{
+ ///
+ /// @brief Indicates the total number of rows added to the tablet (including trimmed ones).
+ ///
+ /// Currently only provided for ordered tablets.
+ i64 TotalRowCount = 0;
+
+ ///
+ /// @brief Contains the number of front rows that are trimmed and are not guaranteed to be accessible.
+ ///
+ /// Only makes sense for ordered tablet.
+ i64 TrimmedRowCount = 0;
+
+ ///
+ /// @brief Tablet cell barrier timestamp, which lags behind the current timestamp
+ ///
+ /// It is guaranteed that all transactions with commit timestamp not exceeding the barrier are fully committed;
+ /// e.g. all their added rows are visible (and are included in @ref NYT::TTabletInfo::TotalRowCount).
+ /// Mostly makes sense for ordered tablets.
+ ui64 BarrierTimestamp;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// List of attributes to retrieve in operations like @ref NYT::ICypressClient::Get
+struct TAttributeFilter
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TAttributeFilter;
+ /// @endcond
+
+ /// List of attributes.
+ FLUENT_VECTOR_FIELD(TString, Attribute);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Check if none of the fields of @ref NYT::TReadLimit is set.
+///
+/// @return true if any field of readLimit is set and false otherwise.
+bool IsTrivial(const TReadLimit& readLimit);
+
+/// Convert yson node type to table schema type
+EValueType NodeTypeToValueType(TNode::EType nodeType);
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Enumeration for specifying how reading from master is performed.
+///
+/// Used in operations like NYT::ICypressClient::Get
+enum class EMasterReadKind : int
+{
+ ///
+ /// @brief Reading from leader.
+ ///
+ /// Should almost never be used since it's expensive and for regular uses has no difference from
+ /// "follower" read.
+ Leader /* "leader" */,
+
+ /// @brief Reading from master follower (default).
+ Follower /* "follower" */,
+ Cache /* "cache" */,
+ MasterCache /* "master_cache" */,
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @cond Doxygen_Suppress
+namespace NDetail {
+
+// MUST NOT BE USED BY CLIENTS
+// TODO: we should use default GENERATE_ENUM_SERIALIZATION
+TString ToString(EValueType type);
+
+} // namespace NDetail
+/// @endcond
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/common_ut.cpp b/yt/cpp/mapreduce/interface/common_ut.cpp
new file mode 100644
index 0000000000..3f19433816
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/common_ut.cpp
@@ -0,0 +1,303 @@
+#include "common_ut.h"
+
+#include "fluent.h"
+
+#include <yt/cpp/mapreduce/interface/common.h>
+
+#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <library/cpp/yson/node/node_io.h>
+#include <library/cpp/yson/node/node_builder.h>
+
+#include <util/generic/xrange.h>
+
+using namespace NYT;
+
+template <class T>
+TString SaveToString(const T& obj)
+{
+ TString s;
+ TStringOutput out(s);
+ ::Save(&out, obj);
+ return s;
+}
+
+template <class T>
+T LoadFromString(TStringBuf s)
+{
+ TMemoryInput in(s);
+ T obj;
+ ::Load(&in, obj);
+ return obj;
+}
+
+template <class T>
+T SaveLoad(const T& obj)
+{
+ return LoadFromString<T>(SaveToString(obj));
+}
+
+Y_UNIT_TEST_SUITE(Common)
+{
+ Y_UNIT_TEST(SortColumnsLegacy)
+ {
+ TSortColumns keys1("a", "b");
+ UNIT_ASSERT((keys1.Parts_ == TSortColumns{"a", "b"}));
+
+ keys1.Add("c", "d");
+ UNIT_ASSERT((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"}));
+
+ auto keys2 = TSortColumns(keys1).Add("e", "f");
+ UNIT_ASSERT((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"}));
+ UNIT_ASSERT((keys2.Parts_ == TSortColumns{"a", "b", "c", "d", "e", "f"}));
+
+ auto keys3 = TSortColumns(keys1).Add("e").Add("f").Add("g");
+ UNIT_ASSERT((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"}));
+ UNIT_ASSERT((keys3.Parts_ == TSortColumns{"a", "b", "c", "d", "e", "f", "g"}));
+ }
+
+ Y_UNIT_TEST(SortColumn)
+ {
+ auto ascending = TSortColumn("a");
+ UNIT_ASSERT_VALUES_EQUAL(ascending.Name(), "a");
+ UNIT_ASSERT_VALUES_EQUAL(ascending.SortOrder(), ESortOrder::SO_ASCENDING);
+ UNIT_ASSERT_VALUES_EQUAL(ascending, TSortColumn("a", ESortOrder::SO_ASCENDING));
+ UNIT_ASSERT_VALUES_UNEQUAL(ascending, TSortColumn("a", ESortOrder::SO_DESCENDING));
+
+ UNIT_ASSERT_NO_EXCEPTION(ascending.EnsureAscending());
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<TString>(ascending), "a");
+ UNIT_ASSERT_VALUES_EQUAL(ascending, "a");
+
+ auto another = ascending;
+ UNIT_ASSERT_NO_EXCEPTION(another = "another");
+ UNIT_ASSERT_VALUES_EQUAL(another.Name(), "another");
+ UNIT_ASSERT_VALUES_EQUAL(another.SortOrder(), ESortOrder::SO_ASCENDING);
+ UNIT_ASSERT_VALUES_EQUAL(another, TSortColumn("another", ESortOrder::SO_ASCENDING));
+ UNIT_ASSERT_VALUES_UNEQUAL(another, TSortColumn("another", ESortOrder::SO_DESCENDING));
+
+ auto ascendingNode = BuildYsonNodeFluently().Value(ascending);
+ UNIT_ASSERT_VALUES_EQUAL(ascendingNode, TNode("a"));
+
+ UNIT_ASSERT_VALUES_EQUAL(SaveLoad(ascending), ascending);
+ UNIT_ASSERT_VALUES_UNEQUAL(SaveToString(ascending), SaveToString(TString("a")));
+
+ auto descending = TSortColumn("a", ESortOrder::SO_DESCENDING);
+ UNIT_ASSERT_VALUES_EQUAL(descending.Name(), "a");
+ UNIT_ASSERT_VALUES_EQUAL(descending.SortOrder(), ESortOrder::SO_DESCENDING);
+ UNIT_ASSERT_VALUES_EQUAL(descending, TSortColumn("a", ESortOrder::SO_DESCENDING));
+ UNIT_ASSERT_VALUES_UNEQUAL(descending, TSortColumn("a", ESortOrder::SO_ASCENDING));
+
+ UNIT_ASSERT_EXCEPTION(descending.EnsureAscending(), yexception);
+ UNIT_ASSERT_EXCEPTION(static_cast<TString>(descending), yexception);
+ UNIT_ASSERT_EXCEPTION(descending == "a", yexception);
+ UNIT_ASSERT_EXCEPTION(descending = "a", yexception);
+
+ auto descendingNode = BuildYsonNodeFluently().Value(descending);
+ UNIT_ASSERT_VALUES_EQUAL(descendingNode, TNode()("name", "a")("sort_order", "descending"));
+
+ UNIT_ASSERT_VALUES_EQUAL(SaveLoad(descending), descending);
+ UNIT_ASSERT_VALUES_UNEQUAL(SaveToString(descending), SaveToString("a"));
+
+ UNIT_ASSERT_VALUES_EQUAL(ToString(TSortColumn("blah")), "blah");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(TSortColumn("blah", ESortOrder::SO_DESCENDING)), "{\"name\"=\"blah\";\"sort_order\"=\"descending\"}");
+ }
+
+ Y_UNIT_TEST(SortColumns)
+ {
+ TSortColumns ascending("a", "b");
+ UNIT_ASSERT(ascending.Parts_ == (TSortColumns{"a", "b"}));
+ UNIT_ASSERT_NO_EXCEPTION(ascending.EnsureAscending());
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<TColumnNames>(ascending).Parts_, (TVector<TString>{"a", "b"}));
+ UNIT_ASSERT_VALUES_EQUAL(ascending.GetNames(), (TVector<TString>{"a", "b"}));
+
+ auto mixed = ascending;
+ mixed.Add(TSortColumn("c", ESortOrder::SO_DESCENDING), "d");
+ UNIT_ASSERT((mixed.Parts_ != TVector<TSortColumn>{"a", "b", "c", "d"}));
+ UNIT_ASSERT((mixed.Parts_ == TVector<TSortColumn>{"a", "b", TSortColumn("c", ESortOrder::SO_DESCENDING), "d"}));
+ UNIT_ASSERT_VALUES_EQUAL(mixed.GetNames(), (TVector<TString>{"a", "b", "c", "d"}));
+ UNIT_ASSERT_EXCEPTION(mixed.EnsureAscending(), yexception);
+ UNIT_ASSERT_EXCEPTION(static_cast<TColumnNames>(mixed), yexception);
+ }
+
+ Y_UNIT_TEST(KeyBound)
+ {
+ auto keyBound = TKeyBound(ERelation::Greater, TKey(7, "a", TNode()("x", "y")));
+ UNIT_ASSERT_VALUES_EQUAL(keyBound.Relation(), ERelation::Greater);
+ UNIT_ASSERT_EQUAL(keyBound.Key(), TKey(7, "a", TNode()("x", "y")));
+
+ auto keyBound1 = TKeyBound().Relation(ERelation::Greater).Key(TKey(7, "a", TNode()("x", "y")));
+ auto expectedNode = TNode()
+ .Add(">")
+ .Add(TNode().Add(7).Add("a").Add(TNode()("x", "y")));
+
+ UNIT_ASSERT_VALUES_EQUAL(expectedNode, BuildYsonNodeFluently().Value(keyBound));
+ UNIT_ASSERT_VALUES_EQUAL(expectedNode, BuildYsonNodeFluently().Value(keyBound1));
+
+ keyBound.Relation(ERelation::LessOrEqual);
+ keyBound.Key(TKey("A", 7));
+ UNIT_ASSERT_VALUES_EQUAL(keyBound.Relation(), ERelation::LessOrEqual);
+ UNIT_ASSERT_EQUAL(keyBound.Key(), TKey("A", 7));
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ BuildYsonNodeFluently().Value(keyBound),
+ TNode()
+ .Add("<=")
+ .Add(TNode().Add("A").Add(7)));
+ }
+
+ Y_UNIT_TEST(TTableSchema)
+ {
+ TTableSchema schema;
+ schema
+ .AddColumn(TColumnSchema().Name("a").Type(EValueType::VT_STRING).SortOrder(SO_ASCENDING))
+ .AddColumn(TColumnSchema().Name("b").Type(EValueType::VT_UINT64))
+ .AddColumn(TColumnSchema().Name("c").Type(EValueType::VT_INT64));
+ auto checkSortBy = [](TTableSchema schema, const TVector<TString>& columns) {
+ auto initialSchema = schema;
+ schema.SortBy(columns);
+ for (auto i: xrange(columns.size())) {
+ UNIT_ASSERT_VALUES_EQUAL(schema.Columns()[i].Name(), columns[i]);
+ UNIT_ASSERT_VALUES_EQUAL(schema.Columns()[i].SortOrder(), ESortOrder::SO_ASCENDING);
+ }
+ for (auto i: xrange(columns.size(), (size_t)initialSchema.Columns().size())) {
+ UNIT_ASSERT_VALUES_EQUAL(schema.Columns()[i].SortOrder(), Nothing());
+ }
+ UNIT_ASSERT_VALUES_EQUAL(initialSchema.Columns().size(), schema.Columns().size());
+ return schema;
+ };
+ auto newSchema = checkSortBy(schema, {"b"});
+ UNIT_ASSERT_VALUES_EQUAL(newSchema.Columns()[1].Name(), TString("a"));
+ UNIT_ASSERT_VALUES_EQUAL(newSchema.Columns()[2].Name(), TString("c"));
+ checkSortBy(schema, {"b", "c"});
+ checkSortBy(schema, {"c", "a"});
+ UNIT_ASSERT_EXCEPTION(checkSortBy(schema, {"b", "b"}), yexception);
+ UNIT_ASSERT_EXCEPTION(checkSortBy(schema, {"a", "junk"}), yexception);
+ }
+
+ Y_UNIT_TEST(TColumnSchema_TypeV3)
+ {
+ {
+ auto column = TColumnSchema().Type(NTi::Interval());
+ UNIT_ASSERT_VALUES_EQUAL(column.Required(), true);
+ UNIT_ASSERT_VALUES_EQUAL(column.Type(), VT_INTERVAL);
+ }
+ {
+ auto column = TColumnSchema().Type(NTi::Optional(NTi::Date()));
+ UNIT_ASSERT_VALUES_EQUAL(column.Required(), false);
+ UNIT_ASSERT_VALUES_EQUAL(column.Type(), VT_DATE);
+ }
+ {
+ auto column = TColumnSchema().Type(NTi::Null());
+ UNIT_ASSERT_VALUES_EQUAL(column.Required(), false);
+ UNIT_ASSERT_VALUES_EQUAL(column.Type(), VT_NULL);
+ }
+ {
+ auto column = TColumnSchema().Type(NTi::Optional(NTi::Null()));
+ UNIT_ASSERT_VALUES_EQUAL(column.Required(), false);
+ UNIT_ASSERT_VALUES_EQUAL(column.Type(), VT_ANY);
+ }
+ }
+
+ Y_UNIT_TEST(ToTypeV3)
+ {
+ UNIT_ASSERT_VALUES_EQUAL(*ToTypeV3(VT_INT32, true), *NTi::Int32());
+ UNIT_ASSERT_VALUES_EQUAL(*ToTypeV3(VT_UTF8, false), *NTi::Optional(NTi::Utf8()));
+ }
+
+ Y_UNIT_TEST(DeserializeColumn)
+ {
+ auto deserialize = [] (TStringBuf yson) {
+ auto node = NodeFromYsonString(yson);
+ TColumnSchema column;
+ Deserialize(column, node);
+ return column;
+ };
+
+ auto column = deserialize("{name=foo; type=int64; required=%false}");
+ UNIT_ASSERT_VALUES_EQUAL(column.Name(), "foo");
+ UNIT_ASSERT_VALUES_EQUAL(*column.TypeV3(), *NTi::Optional(NTi::Int64()));
+
+ column = deserialize("{name=bar; type=utf8; required=%true; type_v3=utf8}");
+ UNIT_ASSERT_VALUES_EQUAL(column.Name(), "bar");
+ UNIT_ASSERT_VALUES_EQUAL(*column.TypeV3(), *NTi::Utf8());
+ }
+
+ Y_UNIT_TEST(ColumnSchemaEquality)
+ {
+ auto base = TColumnSchema()
+ .Name("col")
+ .TypeV3(NTi::Optional(NTi::List(NTi::String())))
+ .SortOrder(ESortOrder::SO_ASCENDING)
+ .Lock("lock")
+ .Expression("x + 12")
+ .Aggregate("sum")
+ .Group("group");
+
+ auto other = base;
+ ASSERT_SERIALIZABLES_EQUAL(other, base);
+ other.Name("other");
+ ASSERT_SERIALIZABLES_UNEQUAL(other, base);
+
+ other = base;
+ other.TypeV3(NTi::List(NTi::String()));
+ ASSERT_SERIALIZABLES_UNEQUAL(other, base);
+
+ other = base;
+ other.ResetSortOrder();
+ ASSERT_SERIALIZABLES_UNEQUAL(other, base);
+
+ other = base;
+ other.Lock("lock1");
+ ASSERT_SERIALIZABLES_UNEQUAL(other, base);
+
+ other = base;
+ other.Expression("x + 13");
+ ASSERT_SERIALIZABLES_UNEQUAL(other, base);
+
+ other = base;
+ other.ResetAggregate();
+ ASSERT_SERIALIZABLES_UNEQUAL(other, base);
+
+ other = base;
+ other.Group("group1");
+ ASSERT_SERIALIZABLES_UNEQUAL(other, base);
+ }
+
+ Y_UNIT_TEST(TableSchemaEquality)
+ {
+ auto col1 = TColumnSchema()
+ .Name("col1")
+ .TypeV3(NTi::Optional(NTi::List(NTi::String())))
+ .SortOrder(ESortOrder::SO_ASCENDING);
+
+ auto col2 = TColumnSchema()
+ .Name("col2")
+ .TypeV3(NTi::Uint32());
+
+ auto schema = TTableSchema()
+ .AddColumn(col1)
+ .AddColumn(col2)
+ .Strict(true)
+ .UniqueKeys(true);
+
+ auto other = schema;
+ ASSERT_SERIALIZABLES_EQUAL(other, schema);
+
+ other.Strict(false);
+ ASSERT_SERIALIZABLES_UNEQUAL(other, schema);
+
+ other = schema;
+ other.MutableColumns()[0].TypeV3(NTi::List(NTi::String()));
+ ASSERT_SERIALIZABLES_UNEQUAL(other, schema);
+
+ other = schema;
+ other.MutableColumns().push_back(col1);
+ ASSERT_SERIALIZABLES_UNEQUAL(other, schema);
+
+ other = schema;
+ other.UniqueKeys(false);
+ ASSERT_SERIALIZABLES_UNEQUAL(other, schema);
+ }
+}
diff --git a/yt/cpp/mapreduce/interface/common_ut.h b/yt/cpp/mapreduce/interface/common_ut.h
new file mode 100644
index 0000000000..6f70f09bee
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/common_ut.h
@@ -0,0 +1 @@
+#pragma once
diff --git a/yt/cpp/mapreduce/interface/config.cpp b/yt/cpp/mapreduce/interface/config.cpp
new file mode 100644
index 0000000000..b474dc0844
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/config.cpp
@@ -0,0 +1,321 @@
+#include "config.h"
+
+#include "operation.h"
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <library/cpp/json/json_reader.h>
+#include <library/cpp/svnversion/svnversion.h>
+
+#include <library/cpp/yson/node/node_builder.h>
+#include <library/cpp/yson/node/node_io.h>
+
+#include <library/cpp/yson/json/yson2json_adapter.h>
+
+#include <util/string/strip.h>
+#include <util/folder/dirut.h>
+#include <util/folder/path.h>
+#include <util/stream/file.h>
+#include <util/generic/singleton.h>
+#include <util/string/builder.h>
+#include <util/string/cast.h>
+#include <util/string/type.h>
+#include <util/system/hostname.h>
+#include <util/system/user.h>
+#include <util/system/env.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+bool TConfig::GetBool(const char* var, bool defaultValue)
+{
+ TString val = GetEnv(var, "");
+ if (val.empty()) {
+ return defaultValue;
+ }
+ return IsTrue(val);
+}
+
+int TConfig::GetInt(const char* var, int defaultValue)
+{
+ int result = 0;
+ TString val = GetEnv(var, "");
+ if (val.empty()) {
+ return defaultValue;
+ }
+ try {
+ result = FromString<int>(val);
+ } catch (const yexception& e) {
+ ythrow yexception() << "Cannot parse " << var << '=' << val << " as integer: " << e.what();
+ }
+ return result;
+}
+
+TDuration TConfig::GetDuration(const char* var, TDuration defaultValue)
+{
+ return TDuration::Seconds(GetInt(var, defaultValue.Seconds()));
+}
+
+EEncoding TConfig::GetEncoding(const char* var)
+{
+ const TString encodingName = GetEnv(var, "identity");
+ EEncoding encoding;
+ if (TryFromString(encodingName, encoding)) {
+ return encoding;
+ } else {
+ ythrow yexception() << var << ": encoding '" << encodingName << "' is not supported";
+ }
+}
+
+ EUploadDeduplicationMode TConfig::GetUploadingDeduplicationMode(
+ const char* var,
+ EUploadDeduplicationMode defaultValue)
+{
+ const TString deduplicationMode = GetEnv(var, TEnumTraits<EUploadDeduplicationMode>::ToString(defaultValue));
+ return TEnumTraits<EUploadDeduplicationMode>::FromString(deduplicationMode);
+}
+
+void TConfig::ValidateToken(const TString& token)
+{
+ for (size_t i = 0; i < token.size(); ++i) {
+ ui8 ch = token[i];
+ if (ch < 0x21 || ch > 0x7e) {
+ ythrow yexception() << "Incorrect token character '" << ch << "' at position " << i;
+ }
+ }
+}
+
+TString TConfig::LoadTokenFromFile(const TString& tokenPath)
+{
+ TFsPath path(tokenPath);
+ return path.IsFile() ? Strip(TIFStream(path).ReadAll()) : TString();
+}
+
+TNode TConfig::LoadJsonSpec(const TString& strSpec)
+{
+ TNode spec;
+ TStringInput input(strSpec);
+ TNodeBuilder builder(&spec);
+ TYson2JsonCallbacksAdapter callbacks(&builder);
+
+ Y_ENSURE(NJson::ReadJson(&input, &callbacks), "Cannot parse json spec: " << strSpec);
+ Y_ENSURE(spec.IsMap(), "Json spec is not a map");
+
+ return spec;
+}
+
+TRichYPath TConfig::LoadApiFilePathOptions(const TString& ysonMap)
+{
+ TNode attributes;
+ try {
+ attributes = NodeFromYsonString(ysonMap);
+ } catch (const yexception& exc) {
+ ythrow yexception() << "Failed to parse YT_API_FILE_PATH_OPTIONS (it must be yson map): " << exc;
+ }
+ TNode pathNode = "";
+ pathNode.Attributes() = attributes;
+ TRichYPath path;
+ Deserialize(path, pathNode);
+ return path;
+}
+
+void TConfig::LoadToken()
+{
+ if (auto envToken = GetEnv("YT_TOKEN")) {
+ Token = envToken;
+ } else if (auto envToken = GetEnv("YT_SECURE_VAULT_YT_TOKEN")) {
+ // If this code runs inside an vanilla peration in YT
+ // it should not use regular environment variable `YT_TOKEN`
+ // because it would be visible in UI.
+ // Token should be passed via `secure_vault` parameter in operation spec.
+ Token = envToken;
+ } else if (auto tokenPath = GetEnv("YT_TOKEN_PATH")) {
+ Token = LoadTokenFromFile(tokenPath);
+ } else {
+ Token = LoadTokenFromFile(GetHomeDir() + "/.yt/token");
+ }
+ ValidateToken(Token);
+}
+
+void TConfig::LoadSpec()
+{
+ TString strSpec = GetEnv("YT_SPEC", "{}");
+ Spec = LoadJsonSpec(strSpec);
+
+ strSpec = GetEnv("YT_TABLE_WRITER", "{}");
+ TableWriter = LoadJsonSpec(strSpec);
+}
+
+void TConfig::LoadTimings()
+{
+ ConnectTimeout = GetDuration("YT_CONNECT_TIMEOUT",
+ TDuration::Seconds(10));
+
+ SocketTimeout = GetDuration("YT_SOCKET_TIMEOUT",
+ GetDuration("YT_SEND_RECEIVE_TIMEOUT", // common
+ TDuration::Seconds(60)));
+
+ AddressCacheExpirationTimeout = TDuration::Minutes(15);
+
+ CacheLockTimeoutPerGb = TDuration::MilliSeconds(1000.0 * 1_GB * 8 / 20_MB); // 20 Mbps = 20 MBps / 8.
+
+ TxTimeout = GetDuration("YT_TX_TIMEOUT",
+ TDuration::Seconds(120));
+
+ PingTimeout = GetDuration("YT_PING_TIMEOUT",
+ TDuration::Seconds(5));
+
+ PingInterval = GetDuration("YT_PING_INTERVAL",
+ TDuration::Seconds(5));
+
+ WaitLockPollInterval = TDuration::Seconds(5);
+
+ RetryInterval = GetDuration("YT_RETRY_INTERVAL",
+ TDuration::Seconds(3));
+
+ ChunkErrorsRetryInterval = GetDuration("YT_CHUNK_ERRORS_RETRY_INTERVAL",
+ TDuration::Seconds(60));
+
+ RateLimitExceededRetryInterval = GetDuration("YT_RATE_LIMIT_EXCEEDED_RETRY_INTERVAL",
+ TDuration::Seconds(60));
+
+ StartOperationRetryInterval = GetDuration("YT_START_OPERATION_RETRY_INTERVAL",
+ TDuration::Seconds(60));
+
+ HostListUpdateInterval = TDuration::Seconds(60);
+}
+
+void TConfig::Reset()
+{
+ Hosts = GetEnv("YT_HOSTS", "hosts");
+ Pool = GetEnv("YT_POOL");
+ Prefix = GetEnv("YT_PREFIX");
+ ApiVersion = GetEnv("YT_VERSION", "v3");
+ LogLevel = GetEnv("YT_LOG_LEVEL", "error");
+
+ ContentEncoding = GetEncoding("YT_CONTENT_ENCODING");
+ AcceptEncoding = GetEncoding("YT_ACCEPT_ENCODING");
+
+ GlobalTxId = GetEnv("YT_TRANSACTION", "");
+
+ UseAsyncTxPinger = false;
+ AsyncHttpClientThreads = 1;
+ AsyncTxPingerPoolThreads = 1;
+
+ ForceIpV4 = GetBool("YT_FORCE_IPV4");
+ ForceIpV6 = GetBool("YT_FORCE_IPV6");
+ UseHosts = GetBool("YT_USE_HOSTS", true);
+
+ LoadToken();
+ LoadSpec();
+ LoadTimings();
+
+ CacheUploadDeduplicationMode = GetUploadingDeduplicationMode("YT_UPLOAD_DEDUPLICATION", EUploadDeduplicationMode::Host);
+
+ RetryCount = Max(GetInt("YT_RETRY_COUNT", 10), 1);
+ ReadRetryCount = Max(GetInt("YT_READ_RETRY_COUNT", 30), 1);
+ StartOperationRetryCount = Max(GetInt("YT_START_OPERATION_RETRY_COUNT", 30), 1);
+
+ RemoteTempFilesDirectory = GetEnv("YT_FILE_STORAGE",
+ "//tmp/yt_wrapper/file_storage");
+ RemoteTempTablesDirectory = GetEnv("YT_TEMP_TABLES_STORAGE",
+ "//tmp/yt_wrapper/table_storage");
+ RemoteTempTablesDirectory = GetEnv("YT_TEMP_DIR",
+ RemoteTempTablesDirectory);
+
+ InferTableSchema = false;
+
+ UseClientProtobuf = GetBool("YT_USE_CLIENT_PROTOBUF", false);
+ NodeReaderFormat = ENodeReaderFormat::Auto;
+ ProtobufFormatWithDescriptors = true;
+
+ MountSandboxInTmpfs = GetBool("YT_MOUNT_SANDBOX_IN_TMPFS");
+
+ ApiFilePathOptions = LoadApiFilePathOptions(GetEnv("YT_API_FILE_PATH_OPTIONS", "{}"));
+
+ ConnectionPoolSize = GetInt("YT_CONNECTION_POOL_SIZE", 16);
+
+ TraceHttpRequestsMode = FromString<ETraceHttpRequestsMode>(to_lower(GetEnv("YT_TRACE_HTTP_REQUESTS", "never")));
+
+ CommandsWithFraming = {
+ "read_table",
+ "get_table_columnar_statistics",
+ "get_job_input",
+ "concatenate",
+ "partition_tables",
+ };
+}
+
+TConfig::TConfig()
+{
+ Reset();
+}
+
+TConfigPtr TConfig::Get()
+{
+ struct TConfigHolder
+ {
+ TConfigHolder()
+ : Config(::MakeIntrusive<TConfig>())
+ { }
+
+ TConfigPtr Config;
+ };
+
+ return Singleton<TConfigHolder>()->Config;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TProcessState::TProcessState()
+{
+ try {
+ FqdnHostName = ::FQDNHostName();
+ } catch (const yexception& e) {
+ try {
+ FqdnHostName = ::HostName();
+ } catch (const yexception& e) {
+ ythrow yexception() << "Cannot get fqdn and host name: " << e.what();
+ }
+ }
+
+ try {
+ UserName = ::GetUsername();
+ } catch (const yexception& e) {
+ ythrow yexception() << "Cannot get user name: " << e.what();
+ }
+
+ Pid = static_cast<int>(getpid());
+
+ if (!ClientVersion) {
+ ClientVersion = ::TStringBuilder() << "YT C++ native " << GetProgramCommitId();
+ }
+}
+
+static TString CensorString(TString input)
+{
+ static const TString prefix = "AQAD-";
+ if (input.find(prefix) == TString::npos) {
+ return input;
+ } else {
+ return TString(input.size(), '*');
+ }
+}
+
+void TProcessState::SetCommandLine(int argc, const char* argv[])
+{
+ for (int i = 0; i < argc; ++i) {
+ CommandLine.push_back(argv[i]);
+ CensoredCommandLine.push_back(CensorString(CommandLine.back()));
+ }
+}
+
+TProcessState* TProcessState::Get()
+{
+ return Singleton<TProcessState>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/config.h b/yt/cpp/mapreduce/interface/config.h
new file mode 100644
index 0000000000..c44ad25f1c
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/config.h
@@ -0,0 +1,228 @@
+#pragma once
+
+#include "fwd.h"
+#include "common.h"
+#include "node.h"
+
+#include <library/cpp/yt/misc/enum.h>
+
+#include <util/generic/maybe.h>
+#include <util/generic/string.h>
+#include <util/generic/hash_set.h>
+
+#include <util/datetime/base.h>
+
+namespace NYT {
+
+enum EEncoding : int
+{
+ E_IDENTITY /* "identity" */,
+ E_GZIP /* "gzip" */,
+ E_BROTLI /* "br" */,
+ E_Z_LZ4 /* "z-lz4" */,
+};
+
+enum class ENodeReaderFormat : int
+{
+ Yson, // Always use YSON format,
+ Skiff, // Always use Skiff format, throw exception if it's not possible (non-strict schema, dynamic table etc.)
+ Auto, // Use Skiff format if it's possible, YSON otherwise
+};
+
+enum class ETraceHttpRequestsMode
+{
+ // Never dump http requests.
+ Never /* "never" */,
+ // Dump failed http requests.
+ Error /* "error" */,
+ // Dump all http requests.
+ Always /* "always" */,
+};
+
+DEFINE_ENUM(EUploadDeduplicationMode,
+ // For each file only one process' thread from all possible hosts can upload it to the file cache at the same time.
+ // The others will wait for the uploading to finish and use already cached file.
+ ((Global) (0))
+
+ // For each file and each particular host only one process' thread can upload it to the file cache at the same time.
+ // The others will wait for the uploading to finish and use already cached file.
+ ((Host) (1))
+
+ // All processes' threads will upload a file to the cache concurrently.
+ ((Disabled) (2))
+);
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TConfig
+ : public TThrRefBase
+{
+ TString Hosts;
+ TString Pool;
+ TString Token;
+ TString Prefix;
+ TString ApiVersion;
+ TString LogLevel;
+
+ // Compression for data that is sent to YT cluster.
+ EEncoding ContentEncoding;
+
+ // Compression for data that is read from YT cluster.
+ EEncoding AcceptEncoding;
+
+ TString GlobalTxId;
+
+ bool ForceIpV4;
+ bool ForceIpV6;
+ bool UseHosts;
+
+ TDuration HostListUpdateInterval;
+
+ TNode Spec;
+ TNode TableWriter;
+
+ TDuration ConnectTimeout;
+ TDuration SocketTimeout;
+ TDuration AddressCacheExpirationTimeout;
+ TDuration TxTimeout;
+ TDuration PingTimeout;
+ TDuration PingInterval;
+
+ bool UseAsyncTxPinger;
+ int AsyncHttpClientThreads;
+ int AsyncTxPingerPoolThreads;
+
+ // How often should we poll for lock state
+ TDuration WaitLockPollInterval;
+
+ TDuration RetryInterval;
+ TDuration ChunkErrorsRetryInterval;
+
+ TDuration RateLimitExceededRetryInterval;
+ TDuration StartOperationRetryInterval;
+
+ int RetryCount;
+ int ReadRetryCount;
+ int StartOperationRetryCount;
+
+ /// @brief Period for checking status of running operation.
+ TDuration OperationTrackerPollPeriod = TDuration::Seconds(5);
+
+ TString RemoteTempFilesDirectory;
+ TString RemoteTempTablesDirectory;
+
+ //
+ // Infer schemas for nonexstent tables from typed rows (e.g. protobuf)
+ // when writing from operation or client writer.
+ // This options can be overriden in TOperationOptions and TTableWriterOptions.
+ bool InferTableSchema;
+
+ bool UseClientProtobuf;
+ ENodeReaderFormat NodeReaderFormat;
+ bool ProtobufFormatWithDescriptors;
+
+ int ConnectionPoolSize;
+
+ /// Defines replication factor that is used for files that are uploaded to YT
+ /// to use them in operations.
+ int FileCacheReplicationFactor = 10;
+
+ /// @brief Used when waiting for other process which uploads the same file to the file cache.
+ ///
+ /// If CacheUploadDeduplicationMode is not Disabled, current process can wait for some other
+ /// process which is uploading the same file. This value is proportional to the timeout of waiting,
+ /// actual timeout computes as follows: fileSizeGb * CacheLockTimeoutPerGb.
+ /// Default timeout assumes that host has uploading speed equal to 20 Mb/s.
+ /// If timeout was reached, the file will be uploaded by current process without any other waits.
+ TDuration CacheLockTimeoutPerGb;
+
+ /// @brief Used to prevent concurrent uploading of the same file to the file cache.
+ /// NB: Each mode affects only users with the same mode enabled.
+ EUploadDeduplicationMode CacheUploadDeduplicationMode;
+
+ bool MountSandboxInTmpfs;
+
+ /// @brief Set upload options (e.g.) for files created by library.
+ ///
+ /// Path itself is always ignored but path options (e.g. `BypassArtifactCache`) are used when uploading system files:
+ /// cppbinary, job state, etc
+ TRichYPath ApiFilePathOptions;
+
+ // Testing options, should never be used in user programs.
+ bool UseAbortableResponse = false;
+ bool EnableDebugMetrics = false;
+
+ //
+ // There is optimization used with local YT that enables to skip binary upload and use real binary path.
+ // When EnableLocalModeOptimization is set to false this optimization is completely disabled.
+ bool EnableLocalModeOptimization = true;
+
+ //
+ // If you want see stderr even if you jobs not failed set this true.
+ bool WriteStderrSuccessfulJobs = false;
+
+ //
+ // This configuration is useful for debug.
+ // If set to ETraceHttpRequestsMode::Error library will dump all http error requests.
+ // If set to ETraceHttpRequestsMode::All library will dump all http requests.
+ // All tracing occurres as DEBUG level logging.
+ ETraceHttpRequestsMode TraceHttpRequestsMode = ETraceHttpRequestsMode::Never;
+
+ TString SkynetApiHost;
+
+ // Sets SO_PRIORITY option on the socket
+ TMaybe<int> SocketPriority;
+
+ // Framing settings
+ // (cf. https://yt.yandex-team.ru/docs/description/proxy/http_proxy_reference#framing).
+ THashSet<TString> CommandsWithFraming;
+
+ static bool GetBool(const char* var, bool defaultValue = false);
+ static int GetInt(const char* var, int defaultValue);
+ static TDuration GetDuration(const char* var, TDuration defaultValue);
+ static EEncoding GetEncoding(const char* var);
+ static EUploadDeduplicationMode GetUploadingDeduplicationMode(
+ const char* var,
+ EUploadDeduplicationMode defaultValue);
+
+ static void ValidateToken(const TString& token);
+ static TString LoadTokenFromFile(const TString& tokenPath);
+
+ static TNode LoadJsonSpec(const TString& strSpec);
+
+ static TRichYPath LoadApiFilePathOptions(const TString& ysonMap);
+
+ void LoadToken();
+ void LoadSpec();
+ void LoadTimings();
+
+ void Reset();
+
+ TConfig();
+
+ static TConfigPtr Get();
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TProcessState
+{
+ TString FqdnHostName;
+ TString UserName;
+ TVector<TString> CommandLine;
+
+ // Command line with everything that looks like tokens censored.
+ TVector<TString> CensoredCommandLine;
+ int Pid;
+ TString ClientVersion;
+
+ TProcessState();
+
+ void SetCommandLine(int argc, const char* argv[]);
+
+ static TProcessState* Get();
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/config_ut.cpp b/yt/cpp/mapreduce/interface/config_ut.cpp
new file mode 100644
index 0000000000..e49ba02108
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/config_ut.cpp
@@ -0,0 +1,20 @@
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+
+using namespace NYT;
+
+Y_UNIT_TEST_SUITE(ConfigSuite)
+{
+ Y_UNIT_TEST(TestReset) {
+ // very limited test, checks only one config field
+
+ auto origConfig = *TConfig::Get();
+ TConfig::Get()->Reset();
+ UNIT_ASSERT_VALUES_EQUAL(origConfig.Hosts, TConfig::Get()->Hosts);
+
+ TConfig::Get()->Hosts = "hosts/fb867";
+ TConfig::Get()->Reset();
+ UNIT_ASSERT_VALUES_EQUAL(origConfig.Hosts, TConfig::Get()->Hosts);
+ }
+}
diff --git a/yt/cpp/mapreduce/interface/constants.h b/yt/cpp/mapreduce/interface/constants.h
new file mode 100644
index 0000000000..4f70410814
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/constants.h
@@ -0,0 +1,19 @@
+#pragma once
+
+
+#include <util/system/defaults.h>
+
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+// Maximum number of input tables for operation.
+// If greater number of input tables are provided behaviour is undefined
+// (it might work ok or it might fail or it might work very slowly).
+constexpr size_t MaxInputTableCount = 1000;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/cypress.cpp b/yt/cpp/mapreduce/interface/cypress.cpp
new file mode 100644
index 0000000000..53686effd2
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/cypress.cpp
@@ -0,0 +1,24 @@
+#include "cypress.h"
+
+#include "config.h"
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+void ICypressClient::Concatenate(
+ const TVector<TYPath>& sourcePaths,
+ const TYPath& destinationPath,
+ const TConcatenateOptions& options)
+{
+ TVector<TRichYPath> richSourcePaths;
+ richSourcePaths.reserve(sourcePaths.size());
+ for (const auto& path : sourcePaths) {
+ richSourcePaths.emplace_back(path);
+ }
+ Concatenate(richSourcePaths, destinationPath, options);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/cypress.h b/yt/cpp/mapreduce/interface/cypress.h
new file mode 100644
index 0000000000..e05316ebc6
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/cypress.h
@@ -0,0 +1,252 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/cypress.h
+///
+/// Header containing interface to execute [Cypress](https://yt.yandex-team.ru/docs/description/common/cypress.html)-related commands.
+
+#include "fwd.h"
+
+#include "client_method_options.h"
+#include "common.h"
+#include "node.h"
+
+#include <util/generic/maybe.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Client interface to execute [Cypress](https://yt.yandex-team.ru/docs/description/common/cypress.html)-related commands.
+class ICypressClient
+{
+public:
+ virtual ~ICypressClient() = default;
+
+ ///
+ /// @brief Create Cypress node of given type.
+ ///
+ /// @param path Path in Cypress to the new object.
+ /// @param type New node type.
+ /// @param options Optional parameters.
+ ///
+ /// @return Id of the created node.
+ ///
+ /// @note All but the last components must exist unless @ref NYT::TCreateOptions::Recursive is `true`.
+ ///
+ /// @note The node itself must not exist unless @ref NYT::TCreateOptions::IgnoreExisting or @ref NYT::TCreateOptions::Force are `true`.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#create)
+ virtual TNodeId Create(
+ const TYPath& path,
+ ENodeType type,
+ const TCreateOptions& options = TCreateOptions()) = 0;
+
+ ///
+ /// @brief Create table with schema inferred from the template argument.
+ ///
+ /// @tparam TRowType type of C++ representation of the row to be stored in the table.
+ /// @param path Path in Cypress to the new table.
+ /// @param sortColumns List of columns to mark as sorted in schema.
+ /// @param options Optional parameters.
+ ///
+ /// @return Id of the created node.
+ ///
+ /// @note If "schema" is passed in `options.Attributes` it has priority over the deduced schema (the latter is ignored).
+ template <typename TRowType>
+ TNodeId CreateTable(
+ const TYPath& path,
+ const TSortColumns& sortColumns = TSortColumns(),
+ const TCreateOptions& options = TCreateOptions());
+
+ ///
+ /// @brief Remove Cypress node.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#remove)
+ virtual void Remove(
+ const TYPath& path,
+ const TRemoveOptions& options = TRemoveOptions()) = 0;
+
+ ///
+ /// @brief Check if Cypress node exists.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#exists)
+ virtual bool Exists(
+ const TYPath& path,
+ const TExistsOptions& options = TExistsOptions()) = 0;
+
+ ///
+ /// @brief Get Cypress node contents.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get)
+ virtual TNode Get(
+ const TYPath& path,
+ const TGetOptions& options = TGetOptions()) = 0;
+
+ ///
+ /// @brief Set Cypress node contents.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#set)
+ virtual void Set(
+ const TYPath& path,
+ const TNode& value,
+ const TSetOptions& options = TSetOptions()) = 0;
+
+ ///
+ /// @brief Set multiple attributes for cypress path.
+ ///
+ /// @param path Path to root of the attributes to be set e.g. "//path/to/table/@";
+ /// it is important to make sure that path ends with "/@".
+ /// @param attributes Map with attributes
+ /// @param options Optional parameters.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#multiset_attributes)
+ virtual void MultisetAttributes(
+ const TYPath& path,
+ const TNode::TMapType& attributes,
+ const TMultisetAttributesOptions& options = TMultisetAttributesOptions()) = 0;
+
+ ///
+ /// @brief List Cypress map or attribute node keys.
+ ///
+ /// @param path Path in the tree to the node in question.
+ /// @param options Optional parameters.
+ ///
+ /// @return List of keys with attributes (if they were required in @ref NYT::TListOptions::AttributeFilter).
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#list)
+ virtual TNode::TListType List(
+ const TYPath& path,
+ const TListOptions& options = TListOptions()) = 0;
+
+ ///
+ /// @brief Copy Cypress node.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#copy)
+ virtual TNodeId Copy(
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TCopyOptions& options = TCopyOptions()) = 0;
+
+ ///
+ /// @brief Move Cypress node (equivalent to copy-then-remove).
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#move)
+ virtual TNodeId Move(
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TMoveOptions& options = TMoveOptions()) = 0;
+
+ ///
+ /// @brief Create link to Cypress node.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#link)
+ virtual TNodeId Link(
+ const TYPath& targetPath,
+ const TYPath& linkPath,
+ const TLinkOptions& options = TLinkOptions()) = 0;
+
+ ///
+ /// @brief Concatenate several tables into one.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#concatenate)
+ virtual void Concatenate(
+ const TVector<TRichYPath>& sourcePaths,
+ const TRichYPath& destinationPath,
+ const TConcatenateOptions& options = TConcatenateOptions()) = 0;
+
+ ///
+ /// @brief Concatenate several tables into one.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#concatenate)
+ virtual void Concatenate(
+ const TVector<TYPath>& sourcePaths,
+ const TYPath& destinationPath,
+ const TConcatenateOptions& options = TConcatenateOptions());
+
+ ///
+ /// @brief Canonize YPath, moving all the complex YPath features to attributes.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#parse-ypath)
+ virtual TRichYPath CanonizeYPath(const TRichYPath& path) = 0;
+
+ ///
+ /// @brief Get statistics for given sets of columns in given table ranges.
+ ///
+ /// @note Paths must contain column selectors.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-table-columnar-statistics)
+ virtual TVector<TTableColumnarStatistics> GetTableColumnarStatistics(
+ const TVector<TRichYPath>& paths,
+ const TGetTableColumnarStatisticsOptions& options = {}) = 0;
+
+ ///
+ /// @brief Divide input tables into disjoint partitions.
+ ///
+ /// Resulted partitions are vectors of rich YPaths.
+ /// Each partition can be given to a separate worker for further independent processing.
+ ///
+ virtual TMultiTablePartitions GetTablePartitions(
+ const TVector<TRichYPath>& paths,
+ const TGetTablePartitionsOptions& options) = 0;
+
+ ///
+ /// @brief Get file from file cache.
+ ///
+ /// @param md5Signature MD5 digest of the file.
+ /// @param cachePath Path to the file cache.
+ /// @param options Optional parameters.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-file-from-cache)
+ virtual TMaybe<TYPath> GetFileFromCache(
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TGetFileFromCacheOptions& options = TGetFileFromCacheOptions()) = 0;
+
+ ///
+ /// @brief Put file to file cache.
+ ///
+ /// @param filePath Path in Cypress to the file to cache.
+ /// @param md5Signature Expected MD5 digest of the file.
+ /// @param cachePath Path to the file cache.
+ /// @param options Optional parameters.
+ ///
+ /// @note The file in `filePath` must have been written with @ref NYT::TFileWriterOptions::ComputeMD5 set to `true`.
+ ///
+ /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#put-file-to-cache)
+ virtual TYPath PutFileToCache(
+ const TYPath& filePath,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TPutFileToCacheOptions& options = TPutFileToCacheOptions()) = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename TRowType>
+TNodeId ICypressClient::CreateTable(
+ const TYPath& path,
+ const TSortColumns& sortColumns,
+ const TCreateOptions& options)
+{
+ static_assert(
+ std::is_base_of_v<::google::protobuf::Message, TRowType>,
+ "TRowType must be inherited from google::protobuf::Message");
+
+ TCreateOptions actualOptions = options;
+ if (!actualOptions.Attributes_) {
+ actualOptions.Attributes_ = TNode::CreateMap();
+ }
+
+ if (!actualOptions.Attributes_->HasKey("schema")) {
+ actualOptions.Attributes_->AsMap().emplace(
+ "schema",
+ CreateTableSchema<TRowType>(sortColumns).ToNode());
+ }
+
+ return Create(path, ENodeType::NT_TABLE, actualOptions);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/error_codes.h b/yt/cpp/mapreduce/interface/error_codes.h
new file mode 100644
index 0000000000..d8d76e04fd
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/error_codes.h
@@ -0,0 +1,468 @@
+#pragma once
+
+//
+// generated by generate-error-codes.py
+//
+
+namespace NYT {
+namespace NClusterErrorCodes {
+
+
+
+// from ./core/misc/public.h
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int OK = 0;
+ constexpr int Generic = 1;
+ constexpr int Canceled = 2;
+ constexpr int Timeout = 3;
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+
+
+// from ./core/rpc/public.h
+namespace NRpc {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int TransportError = 100;
+ constexpr int ProtocolError = 101;
+ constexpr int NoSuchService = 102;
+ constexpr int NoSuchMethod = 103;
+ constexpr int Unavailable = 105;
+ constexpr int PoisonPill = 106;
+ constexpr int RequestQueueSizeLimitExceeded = 108;
+ constexpr int AuthenticationError = 109;
+ constexpr int InvalidCsrfToken = 110;
+ constexpr int InvalidCredentials = 111;
+ constexpr int StreamingNotSupported = 112;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NRpc
+
+
+
+// from ./core/bus/public.h
+namespace NBus {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int TransportError = 100;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NBus
+
+
+
+// from ./client/scheduler/public.h
+namespace NScheduler {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int NoSuchOperation = 200;
+ constexpr int InvalidOperationState = 201;
+ constexpr int TooManyOperations = 202;
+ constexpr int NoSuchJob = 203;
+ constexpr int OperationFailedOnJobRestart = 210;
+ constexpr int OperationFailedWithInconsistentLocking = 211;
+ constexpr int OperationControllerCrashed = 212;
+ constexpr int TestingError = 213;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NScheduler
+
+
+
+// from ./client/table_client/public.h
+namespace NTableClient {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int SortOrderViolation = 301;
+ constexpr int InvalidDoubleValue = 302;
+ constexpr int IncomparableType = 303;
+ constexpr int UnhashableType = 304;
+ // E.g. name table with more than #MaxColumnId columns (may come from legacy chunks).
+ constexpr int CorruptedNameTable = 305;
+ constexpr int UniqueKeyViolation = 306;
+ constexpr int SchemaViolation = 307;
+ constexpr int RowWeightLimitExceeded = 308;
+ constexpr int InvalidColumnFilter = 309;
+ constexpr int InvalidColumnRenaming = 310;
+ constexpr int IncompatibleKeyColumns = 311;
+ constexpr int ReaderDeadlineExpired = 312;
+ constexpr int TimestampOutOfRange = 313;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NTableClient
+
+
+
+// from ./client/cypress_client/public.h
+namespace NCypressClient {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int SameTransactionLockConflict = 400;
+ constexpr int DescendantTransactionLockConflict = 401;
+ constexpr int ConcurrentTransactionLockConflict = 402;
+ constexpr int PendingLockConflict = 403;
+ constexpr int LockDestroyed = 404;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NCypressClient
+
+
+
+// from ./core/ytree/public.h
+namespace NYTree {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int ResolveError = 500;
+ constexpr int AlreadyExists = 501;
+ constexpr int MaxChildCountViolation = 502;
+ constexpr int MaxStringLengthViolation = 503;
+ constexpr int MaxAttributeSizeViolation = 504;
+ constexpr int MaxKeyLengthViolation = 505;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYTree
+
+
+
+// from ./client/hydra/public.h
+namespace NHydra {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int NoSuchSnapshot = 600;
+ constexpr int NoSuchChangelog = 601;
+ constexpr int InvalidEpoch = 602;
+ constexpr int InvalidVersion = 603;
+ constexpr int OutOfOrderMutations = 609;
+ constexpr int InvalidSnapshotVersion = 610;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NHydra
+
+
+
+// from ./client/chunk_client/public.h
+namespace NChunkClient {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int AllTargetNodesFailed = 700;
+ constexpr int SendBlocksFailed = 701;
+ constexpr int NoSuchSession = 702;
+ constexpr int SessionAlreadyExists = 703;
+ constexpr int ChunkAlreadyExists = 704;
+ constexpr int WindowError = 705;
+ constexpr int BlockContentMismatch = 706;
+ constexpr int NoSuchBlock = 707;
+ constexpr int NoSuchChunk = 708;
+ constexpr int NoLocationAvailable = 710;
+ constexpr int IOError = 711;
+ constexpr int MasterCommunicationFailed = 712;
+ constexpr int NoSuchChunkTree = 713;
+ constexpr int MasterNotConnected = 714;
+ constexpr int ChunkUnavailable = 716;
+ constexpr int NoSuchChunkList = 717;
+ constexpr int WriteThrottlingActive = 718;
+ constexpr int NoSuchMedium = 719;
+ constexpr int OptimisticLockFailure = 720;
+ constexpr int InvalidBlockChecksum = 721;
+ constexpr int BlockOutOfRange = 722;
+ constexpr int ObjectNotReplicated = 723;
+ constexpr int MissingExtension = 724;
+ constexpr int BandwidthThrottlingFailed = 725;
+ constexpr int ReaderTimeout = 726;
+ constexpr int NoSuchChunkView = 727;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NChunkClient
+
+
+
+// from ./client/election/public.h
+namespace NElection {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int InvalidState = 800;
+ constexpr int InvalidLeader = 801;
+ constexpr int InvalidEpoch = 802;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NElection
+
+
+
+// from ./client/security_client/public.h
+namespace NSecurityClient {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int AuthenticationError = 900;
+ constexpr int AuthorizationError = 901;
+ constexpr int AccountLimitExceeded = 902;
+ constexpr int UserBanned = 903;
+ constexpr int RequestQueueSizeLimitExceeded = 904;
+ constexpr int NoSuchAccount = 905;
+ constexpr int SafeModeEnabled = 906;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NSecurityClient
+
+
+
+// from ./client/object_client/public.h
+namespace NObjectClient {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int PrerequisiteCheckFailed = 1000;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NObjectClient
+
+
+
+// from ./server/lib/exec_agent/public.h
+namespace NExecAgent {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int ConfigCreationFailed = 1100;
+ constexpr int AbortByScheduler = 1101;
+ constexpr int ResourceOverdraft = 1102;
+ constexpr int WaitingJobTimeout = 1103;
+ constexpr int SlotNotFound = 1104;
+ constexpr int JobEnvironmentDisabled = 1105;
+ constexpr int JobProxyConnectionFailed = 1106;
+ constexpr int ArtifactCopyingFailed = 1107;
+ constexpr int NodeDirectoryPreparationFailed = 1108;
+ constexpr int SlotLocationDisabled = 1109;
+ constexpr int QuotaSettingFailed = 1110;
+ constexpr int RootVolumePreparationFailed = 1111;
+ constexpr int NotEnoughDiskSpace = 1112;
+ constexpr int ArtifactDownloadFailed = 1113;
+ constexpr int JobProxyPreparationTimeout = 1114;
+ constexpr int JobPreparationTimeout = 1115;
+ constexpr int JobProxyFailed = 1120;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NExecAgent
+
+
+
+// from ./ytlib/job_proxy/public.h
+namespace NJobProxy {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int MemoryLimitExceeded = 1200;
+ constexpr int MemoryCheckFailed = 1201;
+ constexpr int JobTimeLimitExceeded = 1202;
+ constexpr int UnsupportedJobType = 1203;
+ constexpr int JobNotPrepared = 1204;
+ constexpr int UserJobFailed = 1205;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NJobProxy
+
+
+
+// from ./server/node/data_node/public.h
+namespace NDataNode {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int LocalChunkReaderFailed = 1300;
+ constexpr int LayerUnpackingFailed = 1301;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDataNode
+
+
+
+// from ./core/net/public.h
+namespace NNet {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int Aborted = 1500;
+ constexpr int ResolveTimedOut = 1501;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NNet
+
+
+
+// from ./client/node_tracker_client/public.h
+namespace NNodeTrackerClient {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int NoSuchNode = 1600;
+ constexpr int InvalidState = 1601;
+ constexpr int NoSuchNetwork = 1602;
+ constexpr int NoSuchRack = 1603;
+ constexpr int NoSuchDataCenter = 1604;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NNodeTrackerClient
+
+
+
+// from ./client/tablet_client/public.h
+namespace NTabletClient {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int TransactionLockConflict = 1700;
+ constexpr int NoSuchTablet = 1701;
+ constexpr int TabletNotMounted = 1702;
+ constexpr int AllWritesDisabled = 1703;
+ constexpr int InvalidMountRevision = 1704;
+ constexpr int TableReplicaAlreadyExists = 1705;
+ constexpr int InvalidTabletState = 1706;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NTabletClient
+
+
+
+// from ./server/lib/shell/public.h
+namespace NShell {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int ShellExited = 1800;
+ constexpr int ShellManagerShutDown = 1801;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NShell
+
+
+
+// from ./client/api/public.h
+namespace NApi {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int TooManyConcurrentRequests = 1900;
+ constexpr int JobArchiveUnavailable = 1910;
+ constexpr int RetriableArchiveError = 1911;
+ constexpr int NoSuchOperation = 1915;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NApi
+
+
+
+// from ./server/controller_agent/chunk_pools/public.h
+namespace NChunkPools {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int DataSliceLimitExceeded = 2000;
+ constexpr int MaxDataWeightPerJobExceeded = 2001;
+ constexpr int MaxPrimaryDataWeightPerJobExceeded = 2002;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NChunkPools
+
+
+
+// from ./client/api/rpc_proxy/public.h
+namespace NApi {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int ProxyBanned = 2100;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NApi
+
+
+
+// from ./ytlib/controller_agent/public.h
+namespace NControllerAgent {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int AgentCallFailed = 4400;
+ constexpr int NoOnlineNodeToScheduleJob = 4410;
+ constexpr int MaterializationFailed = 4415;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NControllerAgent
+
+
+
+// from ./client/transaction_client/public.h
+namespace NTransactionClient {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int NoSuchTransaction = 11000;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NTransactionClient
+
+
+
+// from ./server/lib/containers/public.h
+namespace NContainers {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int FailedToStartContainer = 13000;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NContainers
+
+
+
+// from ./ytlib/job_prober_client/public.h
+namespace NJobProberClient {
+
+////////////////////////////////////////////////////////////////////////////////
+
+ constexpr int JobIsNotRunning = 17000;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NJobProberClient
+
+} // namespace NClusterErrorCodes
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/error_ut.cpp b/yt/cpp/mapreduce/interface/error_ut.cpp
new file mode 100644
index 0000000000..03f2751b23
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/error_ut.cpp
@@ -0,0 +1,81 @@
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <library/cpp/json/json_reader.h>
+
+#include <yt/cpp/mapreduce/interface/errors.h>
+#include <yt/cpp/mapreduce/common/helpers.h>
+
+using namespace NYT;
+
+template<>
+void Out<NYT::TNode>(IOutputStream& s, const NYT::TNode& node)
+{
+ s << "TNode:" << NodeToYsonString(node);
+}
+
+Y_UNIT_TEST_SUITE(ErrorSuite)
+{
+ Y_UNIT_TEST(TestParseJson)
+ {
+ // Scary real world error! Бу!
+ const char* jsonText =
+ R"""({)"""
+ R"""("code":500,)"""
+ R"""("message":"Error resolving path //home/user/link",)"""
+ R"""("attributes":{)"""
+ R"""("fid":18446484571700269066,)"""
+ R"""("method":"Create",)"""
+ R"""("tid":17558639495721339338,)"""
+ R"""("datetime":"2017-04-07T13:38:56.474819Z",)"""
+ R"""("pid":414529,)"""
+ R"""("host":"build01-01g.yt.yandex.net"},)"""
+ R"""("inner_errors":[{)"""
+ R"""("code":1,)"""
+ R"""("message":"Node //tt cannot have children",)"""
+ R"""("attributes":{)"""
+ R"""("fid":18446484571700269066,)"""
+ R"""("tid":17558639495721339338,)"""
+ R"""("datetime":"2017-04-07T13:38:56.474725Z",)"""
+ R"""("pid":414529,)"""
+ R"""("host":"build01-01g.yt.yandex.net"},)"""
+ R"""("inner_errors":[]}]})""";
+
+ NJson::TJsonValue jsonValue;
+ ReadJsonFastTree(jsonText, &jsonValue, /*throwOnError=*/ true);
+
+ TYtError error(jsonValue);
+ UNIT_ASSERT_VALUES_EQUAL(error.GetCode(), 500);
+ UNIT_ASSERT_VALUES_EQUAL(error.GetMessage(), R"""(Error resolving path //home/user/link)""");
+ UNIT_ASSERT_VALUES_EQUAL(error.InnerErrors().size(), 1);
+ UNIT_ASSERT_VALUES_EQUAL(error.InnerErrors()[0].GetCode(), 1);
+
+ UNIT_ASSERT_VALUES_EQUAL(error.HasAttributes(), true);
+ UNIT_ASSERT_VALUES_EQUAL(error.GetAttributes().at("method"), TNode("Create"));
+
+ UNIT_ASSERT_VALUES_EQUAL(error.GetAllErrorCodes(), TSet<int>({500, 1}));
+ }
+
+ Y_UNIT_TEST(TestGetYsonText) {
+ const char* jsonText =
+ R"""({)"""
+ R"""("code":500,)"""
+ R"""("message":"outer error",)"""
+ R"""("attributes":{)"""
+ R"""("method":"Create",)"""
+ R"""("pid":414529},)"""
+ R"""("inner_errors":[{)"""
+ R"""("code":1,)"""
+ R"""("message":"inner error",)"""
+ R"""("attributes":{},)"""
+ R"""("inner_errors":[])"""
+ R"""(}]})""";
+ TYtError error;
+ error.ParseFrom(jsonText);
+ TString ysonText = error.GetYsonText();
+ TYtError error2(NodeFromYsonString(ysonText));
+ UNIT_ASSERT_EQUAL(
+ ysonText,
+ R"""({"code"=500;"message"="outer error";"attributes"={"method"="Create";"pid"=414529};"inner_errors"=[{"code"=1;"message"="inner error"}]})""");
+ UNIT_ASSERT_EQUAL(error2.GetYsonText(), ysonText);
+ }
+}
diff --git a/yt/cpp/mapreduce/interface/errors.cpp b/yt/cpp/mapreduce/interface/errors.cpp
new file mode 100644
index 0000000000..49a7c7cfc1
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/errors.cpp
@@ -0,0 +1,437 @@
+#include "errors.h"
+
+#include <library/cpp/yson/node/node_io.h>
+#include <library/cpp/yson/node/node_visitor.h>
+
+#include <yt/cpp/mapreduce/interface/error_codes.h>
+
+#include <library/cpp/json/json_reader.h>
+#include <library/cpp/yson/writer.h>
+
+#include <util/string/builder.h>
+#include <util/stream/str.h>
+#include <util/generic/set.h>
+
+namespace NYT {
+
+using namespace NJson;
+
+////////////////////////////////////////////////////////////////////
+
+static void WriteErrorDescription(const TYtError& error, IOutputStream* out)
+{
+ (*out) << '\'' << error.GetMessage() << '\'';
+ const auto& innerErrorList = error.InnerErrors();
+ if (!innerErrorList.empty()) {
+ (*out) << " { ";
+ bool first = true;
+ for (const auto& innerError : innerErrorList) {
+ if (first) {
+ first = false;
+ } else {
+ (*out) << " ; ";
+ }
+ WriteErrorDescription(innerError, out);
+ }
+ (*out) << " }";
+ }
+}
+
+static void SerializeError(const TYtError& error, NYson::IYsonConsumer* consumer)
+{
+ consumer->OnBeginMap();
+ {
+ consumer->OnKeyedItem("code");
+ consumer->OnInt64Scalar(error.GetCode());
+
+ consumer->OnKeyedItem("message");
+ consumer->OnStringScalar(error.GetMessage());
+
+ if (!error.GetAttributes().empty()) {
+ consumer->OnKeyedItem("attributes");
+ consumer->OnBeginMap();
+ {
+ for (const auto& item : error.GetAttributes()) {
+ consumer->OnKeyedItem(item.first);
+ TNodeVisitor(consumer).Visit(item.second);
+ }
+ }
+ consumer->OnEndMap();
+ }
+
+ if (!error.InnerErrors().empty()) {
+ consumer->OnKeyedItem("inner_errors");
+ {
+ consumer->OnBeginList();
+ for (const auto& innerError : error.InnerErrors()) {
+ SerializeError(innerError, consumer);
+ }
+ consumer->OnEndList();
+ }
+ }
+ }
+ consumer->OnEndMap();
+}
+
+static TString DumpJobInfoForException(const TOperationId& operationId, const TVector<TFailedJobInfo>& failedJobInfoList)
+{
+ ::TStringBuilder output;
+ // Exceptions have limit to contain 65508 bytes of text, so we also limit stderr text
+ constexpr size_t MAX_SIZE = 65508 / 2;
+
+ size_t written = 0;
+ for (const auto& failedJobInfo : failedJobInfoList) {
+ if (written >= MAX_SIZE) {
+ break;
+ }
+ TStringStream nextChunk;
+ nextChunk << '\n';
+ nextChunk << "OperationId: " << GetGuidAsString(operationId) << " JobId: " << GetGuidAsString(failedJobInfo.JobId) << '\n';
+ nextChunk << "Error: " << failedJobInfo.Error.FullDescription() << '\n';
+ if (!failedJobInfo.Stderr.empty()) {
+ nextChunk << "Stderr: " << Endl;
+ size_t tmpWritten = written + nextChunk.Str().size();
+ if (tmpWritten >= MAX_SIZE) {
+ break;
+ }
+
+ if (tmpWritten + failedJobInfo.Stderr.size() > MAX_SIZE) {
+ nextChunk << failedJobInfo.Stderr.substr(failedJobInfo.Stderr.size() - (MAX_SIZE - tmpWritten));
+ } else {
+ nextChunk << failedJobInfo.Stderr;
+ }
+ }
+ written += nextChunk.Str().size();
+ output << nextChunk.Str();
+ }
+ return output;
+}
+
+////////////////////////////////////////////////////////////////////
+
+TYtError::TYtError()
+ : Code_(0)
+{ }
+
+TYtError::TYtError(const TString& message)
+ : Code_(NYT::NClusterErrorCodes::Generic)
+ , Message_(message)
+{ }
+
+TYtError::TYtError(int code, const TString& message)
+ : Code_(code)
+ , Message_(message)
+{ }
+
+TYtError::TYtError(const TJsonValue& value)
+{
+ const TJsonValue::TMapType& map = value.GetMap();
+ TJsonValue::TMapType::const_iterator it = map.find("message");
+ if (it != map.end()) {
+ Message_ = it->second.GetString();
+ }
+
+ it = map.find("code");
+ if (it != map.end()) {
+ Code_ = static_cast<int>(it->second.GetInteger());
+ } else {
+ Code_ = NYT::NClusterErrorCodes::Generic;
+ }
+
+ it = map.find("inner_errors");
+ if (it != map.end()) {
+ const TJsonValue::TArray& innerErrors = it->second.GetArray();
+ for (const auto& innerError : innerErrors) {
+ InnerErrors_.push_back(TYtError(innerError));
+ }
+ }
+
+ it = map.find("attributes");
+ if (it != map.end()) {
+ auto attributes = NYT::NodeFromJsonValue(it->second);
+ if (attributes.IsMap()) {
+ Attributes_ = std::move(attributes.AsMap());
+ }
+ }
+}
+
+TYtError::TYtError(const TNode& node)
+{
+ const auto& map = node.AsMap();
+ auto it = map.find("message");
+ if (it != map.end()) {
+ Message_ = it->second.AsString();
+ }
+
+ it = map.find("code");
+ if (it != map.end()) {
+ Code_ = static_cast<int>(it->second.AsInt64());
+ } else {
+ Code_ = NYT::NClusterErrorCodes::Generic;
+ }
+
+ it = map.find("inner_errors");
+ if (it != map.end()) {
+ const auto& innerErrors = it->second.AsList();
+ for (const auto& innerError : innerErrors) {
+ InnerErrors_.push_back(TYtError(innerError));
+ }
+ }
+
+ it = map.find("attributes");
+ if (it != map.end()) {
+ auto& attributes = it->second;
+ if (attributes.IsMap()) {
+ Attributes_ = std::move(attributes.AsMap());
+ }
+ }
+}
+
+int TYtError::GetCode() const
+{
+ return Code_;
+}
+
+const TString& TYtError::GetMessage() const
+{
+ return Message_;
+}
+
+const TVector<TYtError>& TYtError::InnerErrors() const
+{
+ return InnerErrors_;
+}
+
+void TYtError::ParseFrom(const TString& jsonError)
+{
+ TJsonValue value;
+ TStringInput input(jsonError);
+ ReadJsonTree(&input, &value);
+ *this = TYtError(value);
+}
+
+TSet<int> TYtError::GetAllErrorCodes() const
+{
+ TDeque<const TYtError*> queue = {this};
+ TSet<int> result;
+ while (!queue.empty()) {
+ const auto* current = queue.front();
+ queue.pop_front();
+ result.insert(current->Code_);
+ for (const auto& error : current->InnerErrors_) {
+ queue.push_back(&error);
+ }
+ }
+ return result;
+}
+
+bool TYtError::ContainsErrorCode(int code) const
+{
+ if (Code_ == code) {
+ return true;
+ }
+ for (const auto& error : InnerErrors_) {
+ if (error.ContainsErrorCode(code)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+bool TYtError::ContainsText(const TStringBuf& text) const
+{
+ if (Message_.Contains(text)) {
+ return true;
+ }
+ for (const auto& error : InnerErrors_) {
+ if (error.ContainsText(text)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool TYtError::HasAttributes() const
+{
+ return !Attributes_.empty();
+}
+
+const TNode::TMapType& TYtError::GetAttributes() const
+{
+ return Attributes_;
+}
+
+TString TYtError::GetYsonText() const
+{
+ TStringStream out;
+ ::NYson::TYsonWriter writer(&out, NYson::EYsonFormat::Text);
+ SerializeError(*this, &writer);
+ return std::move(out.Str());
+}
+
+TString TYtError::ShortDescription() const
+{
+ TStringStream out;
+ WriteErrorDescription(*this, &out);
+ return std::move(out.Str());
+}
+
+TString TYtError::FullDescription() const
+{
+ TStringStream s;
+ WriteErrorDescription(*this, &s);
+ s << "; full error: " << GetYsonText();
+ return s.Str();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TErrorResponse::TErrorResponse(int httpCode, const TString& requestId)
+ : HttpCode_(httpCode)
+ , RequestId_(requestId)
+{ }
+
+bool TErrorResponse::IsOk() const
+{
+ return Error_.GetCode() == 0;
+}
+
+void TErrorResponse::SetRawError(const TString& message)
+{
+ Error_ = TYtError(message);
+ Setup();
+}
+
+void TErrorResponse::SetError(TYtError error)
+{
+ Error_ = std::move(error);
+ Setup();
+}
+
+void TErrorResponse::ParseFromJsonError(const TString& jsonError)
+{
+ Error_.ParseFrom(jsonError);
+ Setup();
+}
+
+void TErrorResponse::SetIsFromTrailers(bool isFromTrailers)
+{
+ IsFromTrailers_ = isFromTrailers;
+}
+
+int TErrorResponse::GetHttpCode() const
+{
+ return HttpCode_;
+}
+
+bool TErrorResponse::IsFromTrailers() const
+{
+ return IsFromTrailers_;
+}
+
+bool TErrorResponse::IsTransportError() const
+{
+ return HttpCode_ == 503;
+}
+
+TString TErrorResponse::GetRequestId() const
+{
+ return RequestId_;
+}
+
+const TYtError& TErrorResponse::GetError() const
+{
+ return Error_;
+}
+
+bool TErrorResponse::IsResolveError() const
+{
+ return Error_.ContainsErrorCode(NClusterErrorCodes::NYTree::ResolveError);
+}
+
+bool TErrorResponse::IsAccessDenied() const
+{
+ return Error_.ContainsErrorCode(NClusterErrorCodes::NSecurityClient::AuthorizationError);
+}
+
+bool TErrorResponse::IsConcurrentTransactionLockConflict() const
+{
+ return Error_.ContainsErrorCode(NClusterErrorCodes::NCypressClient::ConcurrentTransactionLockConflict);
+}
+
+bool TErrorResponse::IsRequestRateLimitExceeded() const
+{
+ return Error_.ContainsErrorCode(NClusterErrorCodes::NSecurityClient::RequestQueueSizeLimitExceeded);
+}
+
+bool TErrorResponse::IsRequestQueueSizeLimitExceeded() const
+{
+ return Error_.ContainsErrorCode(NClusterErrorCodes::NRpc::RequestQueueSizeLimitExceeded);
+}
+
+bool TErrorResponse::IsChunkUnavailable() const
+{
+ return Error_.ContainsErrorCode(NClusterErrorCodes::NChunkClient::ChunkUnavailable);
+}
+
+bool TErrorResponse::IsRequestTimedOut() const
+{
+ return Error_.ContainsErrorCode(NClusterErrorCodes::Timeout);
+}
+
+bool TErrorResponse::IsNoSuchTransaction() const
+{
+ return Error_.ContainsErrorCode(NClusterErrorCodes::NTransactionClient::NoSuchTransaction);
+}
+
+bool TErrorResponse::IsConcurrentOperationsLimitReached() const
+{
+ return Error_.ContainsErrorCode(NClusterErrorCodes::NScheduler::TooManyOperations);
+}
+
+void TErrorResponse::Setup()
+{
+ TStringStream s;
+ *this << Error_.FullDescription();
+}
+
+////////////////////////////////////////////////////////////////////
+
+TOperationFailedError::TOperationFailedError(
+ EState state,
+ TOperationId id,
+ TYtError ytError,
+ TVector<TFailedJobInfo> failedJobInfo)
+ : State_(state)
+ , OperationId_(id)
+ , Error_(std::move(ytError))
+ , FailedJobInfo_(std::move(failedJobInfo))
+{
+ *this << Error_.FullDescription();
+ if (!FailedJobInfo_.empty()) {
+ *this << DumpJobInfoForException(OperationId_, FailedJobInfo_);
+ }
+}
+
+TOperationFailedError::EState TOperationFailedError::GetState() const
+{
+ return State_;
+}
+
+TOperationId TOperationFailedError::GetOperationId() const
+{
+ return OperationId_;
+}
+
+const TYtError& TOperationFailedError::GetError() const
+{
+ return Error_;
+}
+
+const TVector<TFailedJobInfo>& TOperationFailedError::GetFailedJobInfo() const
+{
+ return FailedJobInfo_;
+}
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/errors.h b/yt/cpp/mapreduce/interface/errors.h
new file mode 100644
index 0000000000..afad58ed72
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/errors.h
@@ -0,0 +1,290 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/errors.h
+///
+/// Errors and exceptions emitted by library.
+
+#include "fwd.h"
+#include "common.h"
+
+#include <library/cpp/yson/node/node.h>
+
+#include <util/generic/bt_exception.h>
+#include <util/generic/yexception.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+
+namespace NJson {
+ class TJsonValue;
+} // namespace NJson
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Error that is thrown when library detects invalid usage of API.
+///
+/// For example trying to start operations on empty table list.
+class TApiUsageError
+ : public TWithBackTrace<yexception>
+{ };
+
+///
+/// @brief Error that is thrown when request retries continues for too long.
+///
+/// @see NYT::TRetryConfig
+/// @see NYT::IRetryConfigProvider
+class TRequestRetriesTimeout
+ : public yexception
+{ };
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Error returned by YT cluster.
+///
+/// An object of this class describe error that happened on YT server.
+/// Internally each error is a tree. Each node of the tree contains:
+/// - integer error code;
+/// - text description of error;
+/// - attributes describing error context.
+///
+/// To get text description of an error one should use
+/// @ref NYT::TYtError::ShortDescription or @ref NYT::TYtError::FullDescription
+///
+/// To distinguish between error kinds @ref NYT::TYtError::ContainsErrorCode should be used.
+///
+/// @see NYT::TErrorResponse
+/// @see NYT::TOperationFailedError
+class TYtError
+{
+public:
+ /// Constructs error with NYT::NClusterErrorCodes::OK code and empty message.
+ TYtError();
+
+ /// Constructs error with NYT::NClusterErrorCodes::Generic code and given message.
+ explicit TYtError(const TString& message);
+
+ /// Constructs error with given code and given message.
+ TYtError(int code, const TString& message);
+
+ /// Construct error from json representation.
+ TYtError(const ::NJson::TJsonValue& value);
+
+ /// Construct error from TNode representation.
+ TYtError(const TNode& value);
+
+ ///
+ /// @brief Check if error or any of inner errors has given error code.
+ ///
+ /// Use this method to distinguish kind of error.
+ bool ContainsErrorCode(int code) const;
+
+ ///
+ /// @brief Get short description of error.
+ ///
+ /// Short description contain text description of error and all inner errors.
+ /// It is human readable but misses some important information (error codes, error attributes).
+ ///
+ /// Usually it's better to use @ref NYT::TYtError::FullDescription to log errors.
+ TString ShortDescription() const;
+
+ ///
+ /// @brief Get full description of error.
+ ///
+ /// Full description contains readable short description
+ /// followed by text yson representation of error that contains error codes and attributes.
+ TString FullDescription() const;
+
+ ///
+ /// @brief Get error code of the topmost error.
+ ///
+ /// @warning Do not use this method to distinguish between error kinds
+ /// @ref NYT::TYtError::ContainsErrorCode should be used instead.
+ int GetCode() const;
+
+ ///
+ /// @brief Get error text of the topmost error.
+ ///
+ /// @warning This method should not be used to log errors
+ /// since text description of inner errors is going to be lost.
+ /// @ref NYT::TYtError::FullDescription should be used instead.
+ const TString& GetMessage() const;
+
+ ///
+ /// @brief Check if error or any of inner errors contains given text chunk.
+ ///
+ /// @warning @ref NYT::TYtError::ContainsErrorCode must be used instead of
+ /// this method when possible. If there is no suitable error code it's
+ /// better to ask yt@ to add one. This method should only be used as workaround.
+ bool ContainsText(const TStringBuf& text) const;
+
+ /// @brief Get inner errors.
+ const TVector<TYtError>& InnerErrors() const;
+
+ /// Parse error from json string.
+ void ParseFrom(const TString& jsonError);
+
+ /// Collect error codes from entire error tree.
+ TSet<int> GetAllErrorCodes() const;
+
+ /// Check if error has any attributes.
+ bool HasAttributes() const;
+
+ /// Get error attributes.
+ const TNode::TMapType& GetAttributes() const;
+
+ /// Get text yson representation of error
+ TString GetYsonText() const;
+
+private:
+ int Code_;
+ TString Message_;
+ TVector<TYtError> InnerErrors_;
+ TNode::TMapType Attributes_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Generic error response returned by server.
+///
+/// TErrorResponse can be thrown from almost any client method when server responds with error.
+///
+class TErrorResponse
+ : public yexception
+{
+public:
+ TErrorResponse(int httpCode, const TString& requestId);
+ TErrorResponse(int httpCode, TYtError error);
+
+ /// Get error object returned by server.
+ const TYtError& GetError() const;
+
+ /// Get if (correlation-id) of request that was responded with error.
+ TString GetRequestId() const;
+
+ /// Get HTTP code of response.
+ int GetHttpCode() const;
+
+ /// Is error parsed from response trailers.
+ bool IsFromTrailers() const;
+
+ /// Check if error was caused by transport problems inside YT cluster.
+ bool IsTransportError() const;
+
+ /// Check if error was caused by failure to resolve cypress path.
+ bool IsResolveError() const;
+
+ /// Check if error was caused by lack of permissions to execute request.
+ bool IsAccessDenied() const;
+
+ /// Check if error was caused by failure to lock object because of another transaction is holding lock.
+ bool IsConcurrentTransactionLockConflict() const;
+
+ /// Check if error was caused by request quota limit exceeding.
+ bool IsRequestRateLimitExceeded() const;
+
+ // YT can't serve request because it is overloaded.
+ bool IsRequestQueueSizeLimitExceeded() const;
+
+ /// Check if error was caused by failure to get chunk. Such errors are almost always temporary.
+ bool IsChunkUnavailable() const;
+
+ /// Check if error was caused by internal YT timeout.
+ bool IsRequestTimedOut() const;
+
+ /// Check if error was caused by trying to work with transaction that was finished or never existed.
+ bool IsNoSuchTransaction() const;
+
+ // User reached their limit of concurrently running operations.
+ bool IsConcurrentOperationsLimitReached() const;
+
+ /// @deprecated This method must not be used.
+ bool IsOk() const;
+
+ void SetRawError(const TString& message);
+ void SetError(TYtError error);
+ void ParseFromJsonError(const TString& jsonError);
+ void SetIsFromTrailers(bool isFromTrailers);
+
+private:
+ void Setup();
+
+private:
+ int HttpCode_;
+ TString RequestId_;
+ TYtError Error_;
+ bool IsFromTrailers_ = false;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Info about failed jobs.
+///
+/// @see NYT::TOperationFailedError
+struct TFailedJobInfo
+{
+ /// Id of a job.
+ TJobId JobId;
+
+ /// Error describing job failure.
+ TYtError Error;
+
+ /// Stderr of job.
+ ///
+ /// @note YT doesn't store all job stderrs, check @ref NYT::IOperationClient::GetJobStderr
+ /// for list of limitations.
+ ///
+ /// @see NYT::IOperationClient::GetJobStderr
+ TString Stderr;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Error that is thrown when operation watched by library fails.
+///
+/// This error is thrown from operation starting methods when they are started in sync mode (@ refNYT::TOperationOptions::Wait == true)
+/// or from future returned by NYT::IOperation::Watch.
+///
+/// @see NYT::IOperationClient
+class TOperationFailedError
+ : public yexception
+{
+public:
+ /// Final state of operation.
+ enum EState {
+ /// Operation was failed due to some error.
+ Failed,
+ /// Operation didn't experienced errors, but was aborted by user request or by YT.
+ Aborted,
+ };
+
+public:
+ TOperationFailedError(EState state, TOperationId id, TYtError ytError, TVector<TFailedJobInfo> failedJobInfo);
+
+ /// Get final state of operation.
+ EState GetState() const;
+
+ /// Get operation id.
+ TOperationId GetOperationId() const;
+
+ /// Return operation error.
+ const TYtError& GetError() const;
+
+ /// Return info about failed jobs (if any).
+ const TVector<TFailedJobInfo>& GetFailedJobInfo() const;
+
+private:
+ EState State_;
+ TOperationId OperationId_;
+ TYtError Error_;
+ TVector<TFailedJobInfo> FailedJobInfo_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/finish_or_die.h b/yt/cpp/mapreduce/interface/finish_or_die.h
new file mode 100644
index 0000000000..9d7dcece02
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/finish_or_die.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include <util/system/yassert.h>
+
+#include <exception>
+
+/// @cond Doxygen_Suppress
+namespace NYT::NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+void FinishOrDie(T* pThis, const char* className) noexcept
+{
+ auto fail = [&] (const char* what) {
+ Y_FAIL(
+ "\n\n"
+ "Destructor of %s caught exception during Finish: %s.\n"
+ "Some data is probably has not been written.\n"
+ "In order to handle such exceptions consider explicitly call Finish() method.\n",
+ className,
+ what);
+ };
+
+ try {
+ pThis->Finish();
+ } catch (const std::exception& ex) {
+ if (!std::uncaught_exceptions()) {
+ fail(ex.what());
+ }
+ } catch (...) {
+ if (!std::uncaught_exceptions()) {
+ fail("<unknown exception>");
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail
+/// @endcond
diff --git a/yt/cpp/mapreduce/interface/fluent.h b/yt/cpp/mapreduce/interface/fluent.h
new file mode 100644
index 0000000000..8ca6e86336
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/fluent.h
@@ -0,0 +1,678 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/fluent.h
+///
+/// Adapters for working with @ref NYson::IYsonConsumer in a structured way, with compile-time syntax checks.
+///
+/// The following documentation is copied verbatim from `yt/core/ytree/fluent.h`.
+///
+/// WHAT IS THIS
+///
+/// Fluent adapters encapsulate invocation of IYsonConsumer methods in a
+/// convenient structured manner. Key advantage of fluent-like code is that
+/// attempt of building syntactically incorrect YSON structure will result
+/// in a compile-time error.
+///
+/// Each fluent object is associated with a context that defines possible YSON
+/// tokens that may appear next. For example, TFluentMap is a fluent object
+/// that corresponds to a location within YSON map right before a key-value
+/// pair or the end of the map.
+///
+/// More precisely, each object that may be obtained by a sequence of fluent
+/// method calls has the full history of its enclosing YSON composite types in
+/// its single template argument hereinafter referred to as TParent. This allows
+/// us not to forget the original context after opening and closing the embedded
+/// composite structure.
+///
+/// It is possible to invoke a separate YSON building procedure by calling
+/// one of convenience Do* methods. There are two possibilities here: it is
+/// possible to delegate invocation context either as a fluent object (like
+/// TFluentMap, TFluentList, TFluentAttributes or TFluentAny) or as a raw
+/// IYsonConsumer*. The latter is discouraged since it is impossible to check
+/// if a given side-built YSON structure fits current fluent context.
+/// For example it is possible to call Do() method inside YSON map passing
+/// consumer to a procedure that will treat context like it is in a list.
+/// Passing typed fluent builder saves you from such a misbehaviour.
+///
+/// TFluentXxx corresponds to an internal class of TXxx
+/// without any history hidden in template argument. It allows you to
+/// write procedures of form:
+///
+/// void BuildSomeAttributesInYson(TFluentMap fluent) { ... }
+///
+/// without thinking about the exact way how this procedure is nested in other
+/// procedures.
+///
+/// An important notation: we will refer to a function whose first argument
+/// is TFluentXxx as TFuncXxx.
+///
+///
+/// BRIEF LIST OF AVAILABLE METHODS
+///
+/// Only the most popular methods are covered here. Refer to the code for the
+/// rest of them.
+///
+/// TAny:
+/// * Value(T value) -> TParent, serialize `value` using underlying consumer.
+/// T should be such that free function Serialize(NYson::IYsonConsumer*, const T&) is
+/// defined;
+/// * BeginMap() -> TFluentMap, open map;
+/// * BeginList() -> TFluentList, open list;
+/// * BeginAttributes() -> TFluentAttributes, open attributes;
+///
+/// * Do(TFuncAny func) -> TAny, delegate invocation to a separate procedure.
+/// * DoIf(bool condition, TFuncAny func) -> TAny, same as Do() but invoke
+/// `func` only if `condition` is true;
+/// * DoFor(TCollection collection, TFuncAny func) -> TAny, same as Do()
+/// but iterate over `collection` and pass each of its elements as a second
+/// argument to `func`. Instead of passing a collection you may it is possible
+/// to pass two iterators as an argument;
+///
+/// * DoMap(TFuncMap func) -> TAny, open a map, delegate invocation to a separate
+/// procedure and close map;
+/// * DoMapFor(TCollection collection, TFuncMap func) -> TAny, open a map, iterate
+/// over `collection` and pass each of its elements as a second argument to `func`
+/// and close map;
+/// * DoList(TFuncList func) -> TAny, same as DoMap();
+/// * DoListFor(TCollection collection, TFuncList func) -> TAny; same as DoMapFor().
+///
+///
+/// TFluentMap:
+/// * Item(TStringBuf key) -> TAny, open an element keyed with `key`;
+/// * EndMap() -> TParent, close map;
+/// * Do(TFuncMap func) -> TFluentMap, same as Do() for TAny;
+/// * DoIf(bool condition, TFuncMap func) -> TFluentMap, same as DoIf() for TAny;
+/// * DoFor(TCollection collection, TFuncMap func) -> TFluentMap, same as DoFor() for TAny.
+///
+///
+/// TFluentList:
+/// * Item() -> TAny, open an new list element;
+/// * EndList() -> TParent, close list;
+/// * Do(TFuncList func) -> TFluentList, same as Do() for TAny;
+/// * DoIf(bool condition, TFuncList func) -> TFluentList, same as DoIf() for TAny;
+/// * DoFor(TCollection collection, TListMap func) -> TFluentList, same as DoFor() for TAny.
+///
+///
+/// TFluentAttributes:
+/// * Item(TStringBuf key) -> TAny, open an element keyed with `key`.
+/// * EndAttributes() -> TParentWithoutAttributes, close attributes. Note that
+/// this method leads to a context that is forces not to have attributes,
+/// preventing us from putting attributes twice before an object.
+/// * Do(TFuncAttributes func) -> TFluentAttributes, same as Do() for TAny;
+/// * DoIf(bool condition, TFuncAttributes func) -> TFluentAttributes, same as DoIf()
+/// for TAny;
+/// * DoFor(TCollection collection, TListAttributes func) -> TFluentAttributes, same as DoFor()
+/// for TAny.
+///
+
+
+#include "common.h"
+#include "serialize.h"
+
+#include <library/cpp/yson/node/serialize.h>
+#include <library/cpp/yson/node/node_builder.h>
+
+#include <library/cpp/yson/consumer.h>
+#include <library/cpp/yson/writer.h>
+
+#include <util/generic/noncopyable.h>
+#include <util/generic/ptr.h>
+#include <util/stream/str.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class T>
+struct TFluentYsonUnwrapper
+{
+ using TUnwrapped = T;
+
+ static TUnwrapped Unwrap(T t)
+ {
+ return std::move(t);
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TFluentYsonVoid
+{ };
+
+template <>
+struct TFluentYsonUnwrapper<TFluentYsonVoid>
+{
+ using TUnwrapped = void;
+
+ static TUnwrapped Unwrap(TFluentYsonVoid)
+ { }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// This class is actually a namespace for specific fluent adapter classes.
+class TFluentYsonBuilder
+ : private TNonCopyable
+{
+private:
+ template <class T>
+ static void WriteValue(NYT::NYson::IYsonConsumer* consumer, const T& value)
+ {
+ Serialize(value, consumer);
+ }
+
+public:
+ class TFluentAny;
+ template <class TParent> class TAny;
+ template <class TParent> class TToAttributes;
+ template <class TParent> class TAttributes;
+ template <class TParent> class TListType;
+ template <class TParent> class TMapType;
+
+ /// Base class for all fluent adapters.
+ template <class TParent>
+ class TFluentBase
+ {
+ public:
+ /// Implicit conversion to yson consumer
+ operator NYT::NYson::IYsonConsumer* () const
+ {
+ return Consumer;
+ }
+
+ protected:
+ /// @cond Doxygen_Suppress
+ NYT::NYson::IYsonConsumer* Consumer;
+ TParent Parent;
+
+ TFluentBase(NYT::NYson::IYsonConsumer* consumer, TParent parent)
+ : Consumer(consumer)
+ , Parent(std::move(parent))
+ { }
+
+ using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped;
+
+ TUnwrappedParent GetUnwrappedParent()
+ {
+ return TFluentYsonUnwrapper<TParent>::Unwrap(std::move(Parent));
+ }
+ /// @endcond Doxygen_Suppress
+ };
+
+ /// Base class for fluent adapters for fragment of list, map or attributes.
+ template <template <class TParent> class TThis, class TParent>
+ class TFluentFragmentBase
+ : public TFluentBase<TParent>
+ {
+ public:
+ using TDeepThis = TThis<TParent>;
+ using TShallowThis = TThis<TFluentYsonVoid>;
+ using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped;
+
+ explicit TFluentFragmentBase(NYT::NYson::IYsonConsumer* consumer, TParent parent = TParent())
+ : TFluentBase<TParent>(consumer, std::move(parent))
+ { }
+
+ /// Delegate invocation to a separate procedure.
+ template <class TFunc>
+ TDeepThis& Do(const TFunc& func)
+ {
+ func(TShallowThis(this->Consumer));
+ return *static_cast<TDeepThis*>(this);
+ }
+
+ /// Conditionally delegate invocation to a separate procedure.
+ template <class TFunc>
+ TDeepThis& DoIf(bool condition, const TFunc& func)
+ {
+ if (condition) {
+ func(TShallowThis(this->Consumer));
+ }
+ return *static_cast<TDeepThis*>(this);
+ }
+
+ /// Calls `func(*this, element)` for each `element` in range `[begin, end)`.
+ template <class TFunc, class TIterator>
+ TDeepThis& DoFor(const TIterator& begin, const TIterator& end, const TFunc& func)
+ {
+ for (auto current = begin; current != end; ++current) {
+ func(TShallowThis(this->Consumer), current);
+ }
+ return *static_cast<TDeepThis*>(this);
+ }
+
+ /// Calls `func(*this, element)` for each `element` in `collection`.
+ template <class TFunc, class TCollection>
+ TDeepThis& DoFor(const TCollection& collection, const TFunc& func)
+ {
+ for (const auto& item : collection) {
+ func(TShallowThis(this->Consumer), item);
+ }
+ return *static_cast<TDeepThis*>(this);
+ }
+
+ };
+
+ /// Fluent adapter of a value without attributes.
+ template <class TParent>
+ class TAnyWithoutAttributes
+ : public TFluentBase<TParent>
+ {
+ public:
+ using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped;
+
+ TAnyWithoutAttributes(NYT::NYson::IYsonConsumer* consumer, TParent parent)
+ : TFluentBase<TParent>(consumer, std::move(parent))
+ { }
+
+ /// Pass `value` to underlying consumer.
+ template <class T>
+ TUnwrappedParent Value(const T& value)
+ {
+ WriteValue(this->Consumer, value);
+ return this->GetUnwrappedParent();
+ }
+
+ /// Call `OnEntity()` of underlying consumer.
+ TUnwrappedParent Entity()
+ {
+ this->Consumer->OnEntity();
+ return this->GetUnwrappedParent();
+ }
+
+ /// Serialize `collection` to underlying consumer as a list.
+ template <class TCollection>
+ TUnwrappedParent List(const TCollection& collection)
+ {
+ this->Consumer->OnBeginList();
+ for (const auto& item : collection) {
+ this->Consumer->OnListItem();
+ WriteValue(this->Consumer, item);
+ }
+ this->Consumer->OnEndList();
+ return this->GetUnwrappedParent();
+ }
+
+ /// Serialize maximum `maxSize` elements of `collection` to underlying consumer as a list.
+ template <class TCollection>
+ TUnwrappedParent ListLimited(const TCollection& collection, size_t maxSize)
+ {
+ this->Consumer->OnBeginAttributes();
+ this->Consumer->OnKeyedItem("count");
+ this->Consumer->OnInt64Scalar(collection.size());
+ this->Consumer->OnEndAttributes();
+ this->Consumer->OnBeginList();
+ size_t printedSize = 0;
+ for (const auto& item : collection) {
+ if (printedSize >= maxSize)
+ break;
+ this->Consumer->OnListItem();
+ WriteValue(this->Consumer, item);
+ ++printedSize;
+ }
+ this->Consumer->OnEndList();
+ return this->GetUnwrappedParent();
+ }
+
+ /// Open a list.
+ TListType<TParent> BeginList()
+ {
+ this->Consumer->OnBeginList();
+ return TListType<TParent>(this->Consumer, this->Parent);
+ }
+
+ /// Open a list, delegate invocation to `func`, then close the list.
+ template <class TFunc>
+ TUnwrappedParent DoList(const TFunc& func)
+ {
+ this->Consumer->OnBeginList();
+ func(TListType<TFluentYsonVoid>(this->Consumer));
+ this->Consumer->OnEndList();
+ return this->GetUnwrappedParent();
+ }
+
+ /// Open a list, call `func(*this, element)` for each `element` of range, then close the list.
+ template <class TFunc, class TIterator>
+ TUnwrappedParent DoListFor(const TIterator& begin, const TIterator& end, const TFunc& func)
+ {
+ this->Consumer->OnBeginList();
+ for (auto current = begin; current != end; ++current) {
+ func(TListType<TFluentYsonVoid>(this->Consumer), current);
+ }
+ this->Consumer->OnEndList();
+ return this->GetUnwrappedParent();
+ }
+
+ /// Open a list, call `func(*this, element)` for each `element` of `collection`, then close the list.
+ template <class TFunc, class TCollection>
+ TUnwrappedParent DoListFor(const TCollection& collection, const TFunc& func)
+ {
+ this->Consumer->OnBeginList();
+ for (const auto& item : collection) {
+ func(TListType<TFluentYsonVoid>(this->Consumer), item);
+ }
+ this->Consumer->OnEndList();
+ return this->GetUnwrappedParent();
+ }
+
+ /// Open a map.
+ TMapType<TParent> BeginMap()
+ {
+ this->Consumer->OnBeginMap();
+ return TMapType<TParent>(this->Consumer, this->Parent);
+ }
+
+ /// Open a map, delegate invocation to `func`, then close the map.
+ template <class TFunc>
+ TUnwrappedParent DoMap(const TFunc& func)
+ {
+ this->Consumer->OnBeginMap();
+ func(TMapType<TFluentYsonVoid>(this->Consumer));
+ this->Consumer->OnEndMap();
+ return this->GetUnwrappedParent();
+ }
+
+ /// Open a map, call `func(*this, element)` for each `element` of range, then close the map.
+ template <class TFunc, class TIterator>
+ TUnwrappedParent DoMapFor(const TIterator& begin, const TIterator& end, const TFunc& func)
+ {
+ this->Consumer->OnBeginMap();
+ for (auto current = begin; current != end; ++current) {
+ func(TMapType<TFluentYsonVoid>(this->Consumer), current);
+ }
+ this->Consumer->OnEndMap();
+ return this->GetUnwrappedParent();
+ }
+
+ /// Open a map, call `func(*this, element)` for each `element` of `collection`, then close the map.
+ template <class TFunc, class TCollection>
+ TUnwrappedParent DoMapFor(const TCollection& collection, const TFunc& func)
+ {
+ this->Consumer->OnBeginMap();
+ for (const auto& item : collection) {
+ func(TMapType<TFluentYsonVoid>(this->Consumer), item);
+ }
+ this->Consumer->OnEndMap();
+ return this->GetUnwrappedParent();
+ }
+ };
+
+ /// Fluent adapter of any value.
+ template <class TParent>
+ class TAny
+ : public TAnyWithoutAttributes<TParent>
+ {
+ public:
+ using TBase = TAnyWithoutAttributes<TParent>;
+
+ explicit TAny(NYT::NYson::IYsonConsumer* consumer, TParent parent)
+ : TBase(consumer, std::move(parent))
+ { }
+
+ /// Open attributes.
+ TAttributes<TBase> BeginAttributes()
+ {
+ this->Consumer->OnBeginAttributes();
+ return TAttributes<TBase>(
+ this->Consumer,
+ TBase(this->Consumer, this->Parent));
+ }
+ };
+
+ /// Fluent adapter of attributes fragment (the inside part of attributes).
+ template <class TParent = TFluentYsonVoid>
+ class TAttributes
+ : public TFluentFragmentBase<TAttributes, TParent>
+ {
+ public:
+ using TThis = TAttributes<TParent>;
+ using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped;
+
+ explicit TAttributes(NYT::NYson::IYsonConsumer* consumer, TParent parent = TParent())
+ : TFluentFragmentBase<TFluentYsonBuilder::TAttributes, TParent>(consumer, std::move(parent))
+ { }
+
+ /// Pass attribute key to underlying consumer.
+ TAny<TThis> Item(const TStringBuf& key)
+ {
+ this->Consumer->OnKeyedItem(key);
+ return TAny<TThis>(this->Consumer, *this);
+ }
+
+ /// Pass attribute key to underlying consumer.
+ template <size_t Size>
+ TAny<TThis> Item(const char (&key)[Size])
+ {
+ return Item(TStringBuf(key, Size - 1));
+ }
+
+ //TODO: from TNode
+
+ /// Close the attributes.
+ TUnwrappedParent EndAttributes()
+ {
+ this->Consumer->OnEndAttributes();
+ return this->GetUnwrappedParent();
+ }
+ };
+
+ /// Fluent adapter of list fragment (the inside part of a list).
+ template <class TParent = TFluentYsonVoid>
+ class TListType
+ : public TFluentFragmentBase<TListType, TParent>
+ {
+ public:
+ using TThis = TListType<TParent>;
+ using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped;
+
+ explicit TListType(NYT::NYson::IYsonConsumer* consumer, TParent parent = TParent())
+ : TFluentFragmentBase<TFluentYsonBuilder::TListType, TParent>(consumer, std::move(parent))
+ { }
+
+ /// Call `OnListItem()` of underlying consumer.
+ TAny<TThis> Item()
+ {
+ this->Consumer->OnListItem();
+ return TAny<TThis>(this->Consumer, *this);
+ }
+
+ // TODO: from TNode
+
+ /// Close the list.
+ TUnwrappedParent EndList()
+ {
+ this->Consumer->OnEndList();
+ return this->GetUnwrappedParent();
+ }
+ };
+
+ /// Fluent adapter of map fragment (the inside part of a map).
+ template <class TParent = TFluentYsonVoid>
+ class TMapType
+ : public TFluentFragmentBase<TMapType, TParent>
+ {
+ public:
+ using TThis = TMapType<TParent>;
+ using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped;
+
+ explicit TMapType(NYT::NYson::IYsonConsumer* consumer, TParent parent = TParent())
+ : TFluentFragmentBase<TFluentYsonBuilder::TMapType, TParent>(consumer, std::move(parent))
+ { }
+
+ /// Pass map key to underlying consumer.
+ template <size_t Size>
+ TAny<TThis> Item(const char (&key)[Size])
+ {
+ return Item(TStringBuf(key, Size - 1));
+ }
+
+ /// Pass map key to underlying consumer.
+ TAny<TThis> Item(const TStringBuf& key)
+ {
+ this->Consumer->OnKeyedItem(key);
+ return TAny<TThis>(this->Consumer, *this);
+ }
+
+ // TODO: from TNode
+
+ /// Close the map.
+ TUnwrappedParent EndMap()
+ {
+ this->Consumer->OnEndMap();
+ return this->GetUnwrappedParent();
+ }
+ };
+
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Builder representing any value.
+using TFluentAny = TFluentYsonBuilder::TAny<TFluentYsonVoid>;
+
+/// Builder representing the inside of a list (list fragment).
+using TFluentList = TFluentYsonBuilder::TListType<TFluentYsonVoid>;
+
+/// Builder representing the inside of a map (map fragment).
+using TFluentMap = TFluentYsonBuilder::TMapType<TFluentYsonVoid>;
+
+/// Builder representing the inside of attributes.
+using TFluentAttributes = TFluentYsonBuilder::TAttributes<TFluentYsonVoid>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Create a fluent adapter to invoke methods of `consumer`.
+static inline TFluentAny BuildYsonFluently(NYT::NYson::IYsonConsumer* consumer)
+{
+ return TFluentAny(consumer, TFluentYsonVoid());
+}
+
+/// Create a fluent adapter to invoke methods of `consumer` describing the contents of a list.
+static inline TFluentList BuildYsonListFluently(NYT::NYson::IYsonConsumer* consumer)
+{
+ return TFluentList(consumer);
+}
+
+/// Create a fluent adapter to invoke methods of `consumer` describing the contents of a map.
+static inline TFluentMap BuildYsonMapFluently(NYT::NYson::IYsonConsumer* consumer)
+{
+ return TFluentMap(consumer);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TFluentYsonWriterState
+ : public TThrRefBase
+{
+public:
+ using TValue = TString;
+
+ explicit TFluentYsonWriterState(::NYson::EYsonFormat format)
+ : Writer(&Output, format)
+ { }
+
+ TString GetValue()
+ {
+ return Output.Str();
+ }
+
+ NYT::NYson::IYsonConsumer* GetConsumer()
+ {
+ return &Writer;
+ }
+
+private:
+ TStringStream Output;
+ ::NYson::TYsonWriter Writer;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TFluentYsonBuilderState
+ : public TThrRefBase
+{
+public:
+ using TValue = TNode;
+
+ explicit TFluentYsonBuilderState()
+ : Builder(&Node)
+ { }
+
+ TNode GetValue()
+ {
+ return std::move(Node);
+ }
+
+ NYT::NYson::IYsonConsumer* GetConsumer()
+ {
+ return &Builder;
+ }
+
+private:
+ TNode Node;
+ TNodeBuilder Builder;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TState>
+class TFluentYsonHolder
+{
+public:
+ explicit TFluentYsonHolder(::TIntrusivePtr<TState> state)
+ : State(state)
+ { }
+
+ ::TIntrusivePtr<TState> GetState() const
+ {
+ return State;
+ }
+
+private:
+ ::TIntrusivePtr<TState> State;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TState>
+struct TFluentYsonUnwrapper< TFluentYsonHolder<TState> >
+{
+ using TUnwrapped = typename TState::TValue;
+
+ static TUnwrapped Unwrap(const TFluentYsonHolder<TState>& holder)
+ {
+ return std::move(holder.GetState()->GetValue());
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TState>
+TFluentYsonBuilder::TAny<TFluentYsonHolder<TState>>
+BuildYsonFluentlyWithState(::TIntrusivePtr<TState> state)
+{
+ return TFluentYsonBuilder::TAny<TFluentYsonHolder<TState>>(
+ state->GetConsumer(),
+ TFluentYsonHolder<TState>(state));
+}
+
+/// Create a fluent adapter returning a `TString` with corresponding YSON when construction is finished.
+inline TFluentYsonBuilder::TAny<TFluentYsonHolder<TFluentYsonWriterState>>
+BuildYsonStringFluently(::NYson::EYsonFormat format = ::NYson::EYsonFormat::Text)
+{
+ ::TIntrusivePtr<TFluentYsonWriterState> state(new TFluentYsonWriterState(format));
+ return BuildYsonFluentlyWithState(state);
+}
+
+/// Create a fluent adapter returning a @ref NYT::TNode when construction is finished.
+inline TFluentYsonBuilder::TAny<TFluentYsonHolder<TFluentYsonBuilderState>>
+BuildYsonNodeFluently()
+{
+ ::TIntrusivePtr<TFluentYsonBuilderState> state(new TFluentYsonBuilderState);
+ return BuildYsonFluentlyWithState(state);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/format.cpp b/yt/cpp/mapreduce/interface/format.cpp
new file mode 100644
index 0000000000..f8318310a4
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/format.cpp
@@ -0,0 +1,135 @@
+#include "format.h"
+#include "protobuf_format.h"
+
+#include "errors.h"
+
+#include <google/protobuf/descriptor.h>
+#include <google/protobuf/messagext.h>
+
+namespace NYT {
+
+TTableSchema CreateTableSchema(
+ const ::google::protobuf::Descriptor& messageDescriptor,
+ bool keepFieldsWithoutExtension)
+{
+ return NDetail::CreateTableSchemaImpl(messageDescriptor, keepFieldsWithoutExtension);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TFormat::TFormat(const TNode& config)
+ : Config(config)
+{ }
+
+
+TFormat TFormat::Protobuf(
+ const TVector<const ::google::protobuf::Descriptor*>& descriptors,
+ bool withDescriptors)
+{
+ if (withDescriptors) {
+ return TFormat(NDetail::MakeProtoFormatConfigWithDescriptors(descriptors));
+ } else {
+ return TFormat(NDetail::MakeProtoFormatConfigWithTables(descriptors));
+ }
+}
+
+TFormat TFormat::YsonText()
+{
+ TNode config("yson");
+ config.Attributes()("format", "text");
+ return TFormat(config);
+}
+
+TFormat TFormat::YsonBinary()
+{
+ TNode config("yson");
+ config.Attributes()("format", "binary");
+ return TFormat(config);
+}
+
+TFormat TFormat::YaMRLenval()
+{
+ TNode config("yamr");
+ config.Attributes()("lenval", true)("has_subkey", true);
+ return TFormat(config);
+}
+
+TFormat TFormat::Json()
+{
+ return TFormat(TNode("json"));
+}
+
+bool TFormat::IsTextYson() const
+{
+ if (!Config.IsString() || Config.AsString() != "yson") {
+ return false;
+ }
+ if (!Config.HasAttributes()) {
+ return false;
+ }
+ const auto& attributes = Config.GetAttributes();
+ if (!attributes.HasKey("format") || attributes["format"] != TNode("text")) {
+ return false;
+ }
+ return true;
+}
+
+bool TFormat::IsProtobuf() const
+{
+ return Config.IsString() && Config.AsString() == "protobuf";
+}
+
+bool TFormat::IsYamredDsv() const
+{
+ return Config.IsString() && Config.AsString() == "yamred_dsv";
+}
+
+static TString FormatName(const TFormat& format)
+{
+ if (!format.Config.IsString()) {
+ Y_VERIFY(format.Config.IsUndefined());
+ return "<undefined>";
+ }
+ return format.Config.AsString();
+}
+
+TYamredDsvAttributes TFormat::GetYamredDsvAttributes() const
+{
+ if (!IsYamredDsv()) {
+ ythrow TApiUsageError() << "Cannot get yamred_dsv attributes for " << FormatName(*this) << " format";
+ }
+ TYamredDsvAttributes attributes;
+
+ const auto& nodeAttributes = Config.GetAttributes();
+ {
+ const auto& keyColumns = nodeAttributes["key_column_names"];
+ if (!keyColumns.IsList()) {
+ ythrow yexception() << "Ill-formed format: key_column_names is of non-list type: " << keyColumns.GetType();
+ }
+ for (auto& column : keyColumns.AsList()) {
+ if (!column.IsString()) {
+ ythrow yexception() << "Ill-formed format: key_column_names: " << column.GetType();
+ }
+ attributes.KeyColumnNames.push_back(column.AsString());
+ }
+ }
+
+ if (nodeAttributes.HasKey("subkey_column_names")) {
+ const auto& subkeyColumns = nodeAttributes["subkey_column_names"];
+ if (!subkeyColumns.IsList()) {
+ ythrow yexception() << "Ill-formed format: subkey_column_names is not a list: " << subkeyColumns.GetType();
+ }
+ for (const auto& column : subkeyColumns.AsList()) {
+ if (!column.IsString()) {
+ ythrow yexception() << "Ill-formed format: non-string inside subkey_key_column_names: " << column.GetType();
+ }
+ attributes.SubkeyColumnNames.push_back(column.AsString());
+ }
+ }
+
+ return attributes;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/format.h b/yt/cpp/mapreduce/interface/format.h
new file mode 100644
index 0000000000..e297576464
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/format.h
@@ -0,0 +1,122 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/format.h
+///
+/// Header containing class to work with raw [YT formats](https://yt.yandex-team.ru/docs/description/storage/formats.html).
+
+#include "node.h"
+
+#include <google/protobuf/descriptor.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @deprecated
+struct TYamredDsvAttributes
+{
+ /// Names of key columns.
+ TVector<TString> KeyColumnNames;
+
+ /// Names of subkey columns.
+ TVector<TString> SubkeyColumnNames;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @brief Class representing YT data format.
+///
+/// Normally the user does not need to use it.
+/// However, the class is handy for "raw" operations and table reading and writing,
+/// e.g. @ref NYT::IOperationClient::RawMap and other raw operations,
+/// @ref NYT::IIOClient::CreateRawReader and @ref NYT::IIOClient::CreateRawWriter.
+/// Anyway, the static factory methods should be preferred to the constructor.
+///
+/// @see [YT doc](https://yt.yandex-team.ru/docs/description/storage/formats.html).
+struct TFormat
+{
+public:
+ /// Format representation understandable by YT.
+ TNode Config;
+
+public:
+ /// @brief Construct format from given YT format representation.
+ ///
+ /// @note Prefer using static factory methods (e.g. @ref NYT::TFormat::YsonBinary, @ref NYT::TFormat::YsonText, @ref NYT::TFormat::Protobuf).
+ explicit TFormat(const TNode& config = TNode());
+
+ /// @brief Create text YSON format.
+ ///
+ /// @see [the doc](https://yt.yandex-team.ru/docs/description/storage/formats.html#YSON)
+ static TFormat YsonText();
+
+ /// @brief Create binary YSON format.
+ ///
+ /// @see [the doc](https://yt.yandex-team.ru/docs/description/storage/formats.html#YSON)
+ static TFormat YsonBinary();
+
+ /// @brief Create YaMR format.
+ ///
+ /// @deprecated
+ static TFormat YaMRLenval();
+
+ /// @brief Create protobuf format from protobuf message descriptors.
+ ///
+ /// @see [the doc](https://yt.yandex-team.ru/docs/api/c++/protobuf.html).
+ static TFormat Protobuf(
+ const TVector<const ::google::protobuf::Descriptor*>& descriptors,
+ bool withDescriptors = false);
+
+ /// @brief Create JSON format.
+ ///
+ /// @see [the doc](https://yt.yandex-team.ru/docs/description/storage/formats.html#JSON)
+ static TFormat Json();
+
+ /// @brief Create protobuf format for the message specified in template parameter.
+ ///
+ /// `T` must be inherited from `Message`.
+ ///
+ /// @see [the doc](https://yt.yandex-team.ru/docs/api/c++/protobuf.html).
+ template<typename T>
+ static inline TFormat Protobuf(bool withDescriptors = false);
+
+ /// @brief Is the format text YSON?
+ ///
+ /// @see [the doc](https://yt.yandex-team.ru/docs/description/storage/formats.html#YSON)
+ bool IsTextYson() const;
+
+ /// @brief Is the format protobuf?
+ ///
+ /// @see [the doc](https://yt.yandex-team.ru/docs/api/c++/protobuf.html)
+ bool IsProtobuf() const;
+
+ /// @brief Is the format YaMR?
+ ///
+ /// @deprecated
+ bool IsYamredDsv() const;
+
+ /// @brief For YAMR format returns its attributes in structured way.
+ ///
+ /// @deprecated
+ TYamredDsvAttributes GetYamredDsvAttributes() const;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+template<typename T>
+TFormat TFormat::Protobuf(bool withDescriptors) {
+ return TFormat::Protobuf({T::descriptor()}, withDescriptors);
+}
+
+/// @brief Create table schema from protobuf message descriptor.
+///
+/// @param messageDescriptor Message descriptor
+/// @param keepFieldsWithoutExtension Add to schema fields without "column_name" or "key_column_name" extensions.
+TTableSchema CreateTableSchema(
+ const ::google::protobuf::Descriptor& messageDescriptor,
+ bool keepFieldsWithoutExtension);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/format_ut.cpp b/yt/cpp/mapreduce/interface/format_ut.cpp
new file mode 100644
index 0000000000..069c29087d
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/format_ut.cpp
@@ -0,0 +1,235 @@
+#include "common.h"
+#include "errors.h"
+#include "format.h"
+#include "common_ut.h"
+
+#include <yt/cpp/mapreduce/interface/proto3_ut.pb.h>
+#include <yt/cpp/mapreduce/interface/protobuf_table_schema_ut.pb.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NYT;
+
+static TNode GetColumns(const TFormat& format, int tableIndex = 0)
+{
+ return format.Config.GetAttributes()["tables"][tableIndex]["columns"];
+}
+
+Y_UNIT_TEST_SUITE(ProtobufFormat)
+{
+ Y_UNIT_TEST(TIntegral)
+ {
+ const auto format = TFormat::Protobuf<NUnitTesting::TIntegral>();
+ auto columns = GetColumns(format);
+
+ struct TColumn
+ {
+ TString Name;
+ TString ProtoType;
+ int FieldNumber;
+ };
+
+ auto expected = TVector<TColumn>{
+ {"DoubleField", "double", 1},
+ {"FloatField", "float", 2},
+ {"Int32Field", "int32", 3},
+ {"Int64Field", "int64", 4},
+ {"Uint32Field", "uint32", 5},
+ {"Uint64Field", "uint64", 6},
+ {"Sint32Field", "sint32", 7},
+ {"Sint64Field", "sint64", 8},
+ {"Fixed32Field", "fixed32", 9},
+ {"Fixed64Field", "fixed64", 10},
+ {"Sfixed32Field", "sfixed32", 11},
+ {"Sfixed64Field", "sfixed64", 12},
+ {"BoolField", "bool", 13},
+ {"EnumField", "enum_string", 14},
+ };
+
+ UNIT_ASSERT_VALUES_EQUAL(columns.Size(), expected.size());
+ for (int i = 0; i < static_cast<int>(columns.Size()); ++i) {
+ UNIT_ASSERT_VALUES_EQUAL(columns[i]["name"], expected[i].Name);
+ UNIT_ASSERT_VALUES_EQUAL(columns[i]["proto_type"], expected[i].ProtoType);
+ UNIT_ASSERT_VALUES_EQUAL(columns[i]["field_number"], expected[i].FieldNumber);
+ }
+ }
+
+ Y_UNIT_TEST(TRowFieldSerializationOption)
+ {
+ const auto format = TFormat::Protobuf<NUnitTesting::TRowFieldSerializationOption>();
+ auto columns = GetColumns(format);
+
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["name"], "UrlRow_1");
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["proto_type"], "structured_message");
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["field_number"], 1);
+ const auto& fields = columns[0]["fields"];
+ UNIT_ASSERT_VALUES_EQUAL(fields[0]["name"], "Host");
+ UNIT_ASSERT_VALUES_EQUAL(fields[0]["proto_type"], "string");
+ UNIT_ASSERT_VALUES_EQUAL(fields[0]["field_number"], 1);
+
+ UNIT_ASSERT_VALUES_EQUAL(fields[1]["name"], "Path");
+ UNIT_ASSERT_VALUES_EQUAL(fields[1]["proto_type"], "string");
+ UNIT_ASSERT_VALUES_EQUAL(fields[1]["field_number"], 2);
+
+ UNIT_ASSERT_VALUES_EQUAL(fields[2]["name"], "HttpCode");
+ UNIT_ASSERT_VALUES_EQUAL(fields[2]["proto_type"], "sint32");
+ UNIT_ASSERT_VALUES_EQUAL(fields[2]["field_number"], 3);
+
+ UNIT_ASSERT_VALUES_EQUAL(columns[1]["name"], "UrlRow_2");
+ UNIT_ASSERT_VALUES_EQUAL(columns[1]["proto_type"], "message");
+ UNIT_ASSERT_VALUES_EQUAL(columns[1]["field_number"], 2);
+ }
+
+ Y_UNIT_TEST(Packed)
+ {
+ const auto format = TFormat::Protobuf<NUnitTesting::TPacked>();
+ auto column = GetColumns(format)[0];
+
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "PackedListInt64");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "int64");
+ UNIT_ASSERT_VALUES_EQUAL(column["field_number"], 1);
+ UNIT_ASSERT_VALUES_EQUAL(column["packed"], true);
+ UNIT_ASSERT_VALUES_EQUAL(column["repeated"], true);
+ }
+
+ Y_UNIT_TEST(Cyclic)
+ {
+ UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic>(), TApiUsageError);
+ UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic::TA>(), TApiUsageError);
+ UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic::TB>(), TApiUsageError);
+ UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic::TC>(), TApiUsageError);
+ UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic::TD>(), TApiUsageError);
+
+ const auto format = TFormat::Protobuf<NUnitTesting::TCyclic::TE>();
+ auto column = GetColumns(format)[0];
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "d");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "message");
+ UNIT_ASSERT_VALUES_EQUAL(column["field_number"], 1);
+ }
+
+ Y_UNIT_TEST(Map)
+ {
+ const auto format = TFormat::Protobuf<NUnitTesting::TWithMap>();
+ auto columns = GetColumns(format);
+
+ UNIT_ASSERT_VALUES_EQUAL(columns.Size(), 5);
+ {
+ const auto& column = columns[0];
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapDefault");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "message");
+ }
+ {
+ const auto& column = columns[1];
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapListOfStructsLegacy");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "message");
+ }
+ {
+ const auto& column = columns[2];
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapListOfStructs");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message");
+ }
+ {
+ const auto& column = columns[3];
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapOptionalDict");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message");
+ }
+ {
+ const auto& column = columns[4];
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapDict");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message");
+ }
+ }
+
+ Y_UNIT_TEST(Oneof)
+ {
+ const auto format = TFormat::Protobuf<NUnitTesting::TWithOneof>();
+ auto columns = GetColumns(format);
+
+ UNIT_ASSERT_VALUES_EQUAL(columns.Size(), 4);
+ auto check = [] (const TNode& column, TStringBuf name, TStringBuf oneof2Name) {
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], name);
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 5);
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "field");
+
+ const auto& oneof2 = column["fields"][1];
+ UNIT_ASSERT_VALUES_EQUAL(oneof2["name"], oneof2Name);
+ UNIT_ASSERT_VALUES_EQUAL(oneof2["proto_type"], "oneof");
+ UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][0]["name"], "y2");
+ UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][1]["name"], "z2");
+ UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][1]["proto_type"], "structured_message");
+ const auto& embeddedOneof = oneof2["fields"][1]["fields"][0];
+ UNIT_ASSERT_VALUES_EQUAL(embeddedOneof["name"], "Oneof");
+ UNIT_ASSERT_VALUES_EQUAL(embeddedOneof["fields"][0]["name"], "x");
+ UNIT_ASSERT_VALUES_EQUAL(embeddedOneof["fields"][1]["name"], "y");
+ UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][2]["name"], "x2");
+
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][2]["name"], "x1");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][3]["name"], "y1");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][4]["name"], "z1");
+ };
+
+ check(columns[0], "DefaultSeparateFields", "variant_field_name");
+ check(columns[1], "NoDefault", "Oneof2");
+
+ {
+ const auto& column = columns[2];
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "SerializationProtobuf");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 3);
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "x1");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["name"], "y1");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][2]["name"], "z1");
+ }
+ {
+ const auto& column = columns[3];
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "TopLevelOneof");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "oneof");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 1);
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "MemberOfTopLevelOneof");
+ }
+ }
+}
+
+Y_UNIT_TEST_SUITE(Proto3)
+{
+ Y_UNIT_TEST(TWithOptional)
+ {
+ const auto format = TFormat::Protobuf<NTestingProto3::TWithOptional>();
+ auto columns = GetColumns(format);
+
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["name"], "x");
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["proto_type"], "int64");
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["field_number"], 1);
+ }
+
+ Y_UNIT_TEST(TWithOptionalMessage)
+ {
+ const auto format = TFormat::Protobuf<NTestingProto3::TWithOptionalMessage>();
+ auto columns = GetColumns(format);
+
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["name"], "x");
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["proto_type"], "structured_message");
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["field_number"], 1);
+
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["fields"].Size(), 1);
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["fields"][0]["name"], "x");
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["fields"][0]["proto_type"], "int64");
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["fields"][0]["field_number"], 1);
+ }
+}
diff --git a/yt/cpp/mapreduce/interface/fwd.h b/yt/cpp/mapreduce/interface/fwd.h
new file mode 100644
index 0000000000..0434c03d8b
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/fwd.h
@@ -0,0 +1,397 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/fwd.h
+///
+/// Header containing mostly forward declarations of types.
+
+
+#include <util/generic/fwd.h>
+#include <util/system/types.h>
+
+#include <variant>
+
+/// @cond Doxygen_Suppress
+namespace google::protobuf {
+ class Message;
+}
+
+namespace NYT {
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // batch_request.h
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class IBatchRequest;
+ using TBatchRequestPtr = ::TIntrusivePtr<IBatchRequest>;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // client.h
+ ////////////////////////////////////////////////////////////////////////////////
+
+ enum ELockMode : int;
+
+ struct TStartTransactionOptions;
+
+ struct TLockOptions;
+
+ template <class TDerived>
+ struct TTabletOptions;
+
+ struct TMountTableOptions;
+
+ struct TUnmountTableOptions;
+
+ struct TRemountTableOptions;
+
+ struct TReshardTableOptions;
+
+ struct TAlterTableOptions;
+
+ struct TLookupRowsOptions;
+
+ struct TSelectRowsOptions;
+
+ struct TCreateClientOptions;
+
+ struct TAlterTableReplicaOptions;
+
+ struct TGetFileFromCacheOptions;
+
+ struct TPutFileToCacheOptions;
+
+ struct TCheckPermissionResult;
+ struct TCheckPermissionResponse;
+ struct TCheckPermissionOptions;
+
+ struct TTabletInfo;
+
+ class ILock;
+ using ILockPtr = ::TIntrusivePtr<ILock>;
+
+ class ITransaction;
+ using ITransactionPtr = ::TIntrusivePtr<ITransaction>;
+
+ class ITransactionPinger;
+ using ITransactionPingerPtr = ::TIntrusivePtr<ITransactionPinger>;
+
+ struct IOperation;
+ using IOperationPtr = ::TIntrusivePtr<IOperation>;
+
+ class IClientBase;
+
+ class IClient;
+
+ using IClientPtr = ::TIntrusivePtr<IClient>;
+ using IClientBasePtr = ::TIntrusivePtr<IClientBase>;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // config.h
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct TConfig;
+ using TConfigPtr = ::TIntrusivePtr<TConfig>;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // cypress.h
+ ////////////////////////////////////////////////////////////////////////////////
+
+ enum ENodeType : int;
+
+ struct TCreateOptions;
+
+ struct TRemoveOptions;
+
+ struct TGetOptions;
+
+ struct TSetOptions;
+
+ struct TMultisetAttributesOptions;
+
+ struct TListOptions;
+
+ struct TCopyOptions;
+
+ struct TMoveOptions;
+
+ struct TLinkOptions;
+
+ struct TConcatenateOptions;
+
+ struct TInsertRowsOptions;
+
+ struct TDeleteRowsOptions;
+
+ struct TTrimRowsOptions;
+
+ class ICypressClient;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // errors.h
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TApiUsageError;
+
+ class TYtError;
+
+ class TErrorResponse;
+
+ struct TFailedJobInfo;
+
+ class TOperationFailedError;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // node.h
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TNode;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // common.h
+ ////////////////////////////////////////////////////////////////////////////////
+
+ using TTransactionId = TGUID;
+ using TNodeId = TGUID;
+ using TLockId = TGUID;
+ using TOperationId = TGUID;
+ using TTabletCellId = TGUID;
+ using TReplicaId = TGUID;
+ using TJobId = TGUID;
+
+ using TYPath = TString;
+ using TLocalFilePath = TString;
+
+ template <class T, class TDerived = void>
+ struct TOneOrMany;
+
+ // key column values
+ using TKey = TOneOrMany<TNode>;
+
+ class TSortColumn;
+
+ // column names
+ using TColumnNames = TOneOrMany<TString>;
+
+ // key column descriptors.
+ class TSortColumns;
+
+ enum EValueType : int;
+
+ enum ESortOrder : int;
+
+ enum EOptimizeForAttr : i8;
+
+ enum EErasureCodecAttr : i8;
+
+ enum ESchemaModificationAttr : i8;
+
+ enum class EMasterReadKind : int;
+
+ class TColumnSchema;
+
+ class TTableSchema;
+
+ enum class ERelation;
+
+ struct TKeyBound;
+
+ struct TReadLimit;
+
+ struct TReadRange;
+
+ struct TRichYPath;
+
+ struct TAttributeFilter;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // io.h
+ ////////////////////////////////////////////////////////////////////////////////
+
+ enum class EFormatType : int;
+
+ struct TFormat;
+
+ class IFileReader;
+
+ using IFileReaderPtr = ::TIntrusivePtr<IFileReader>;
+
+ class IFileWriter;
+
+ using IFileWriterPtr = ::TIntrusivePtr<IFileWriter>;
+
+ class IBlobTableReader;
+ using IBlobTableReaderPtr = ::TIntrusivePtr<IBlobTableReader>;
+
+ class TRawTableReader;
+
+ using TRawTableReaderPtr = ::TIntrusivePtr<TRawTableReader>;
+
+ class TRawTableWriter;
+
+ using TRawTableWriterPtr = ::TIntrusivePtr<TRawTableWriter>;
+
+ template <class T, class = void>
+ class TTableReader;
+
+ template <class T, class = void>
+ class TTableRangesReader;
+
+ template <typename T>
+ using TTableRangesReaderPtr = ::TIntrusivePtr<TTableRangesReader<T>>;
+
+ template <class T>
+ using TTableReaderPtr = ::TIntrusivePtr<TTableReader<T>>;
+
+ template <class T, class = void>
+ class TTableWriter;
+
+ template <class T>
+ using TTableWriterPtr = ::TIntrusivePtr<TTableWriter<T>>;
+
+ struct TYaMRRow;
+
+ using ::google::protobuf::Message;
+
+ class ISkiffRowParser;
+
+ using ISkiffRowParserPtr = ::TIntrusivePtr<ISkiffRowParser>;
+
+ class ISkiffRowSkipper;
+
+ using ISkiffRowSkipperPtr = ::TIntrusivePtr<ISkiffRowSkipper>;
+
+ namespace NDetail {
+
+ class TYdlGenericRowType;
+
+ } // namespace NDetail
+
+ template<class... TYdlRowTypes>
+ class TYdlOneOf;
+
+ template<class... TProtoRowTypes>
+ class TProtoOneOf;
+
+ template<class... TSkiffRowTypes>
+ class TSkiffRowOneOf;
+
+ using TYaMRReader = TTableReader<TYaMRRow>;
+ using TYaMRWriter = TTableWriter<TYaMRRow>;
+ using TNodeReader = TTableReader<TNode>;
+ using TNodeWriter = TTableWriter<TNode>;
+ using TMessageReader = TTableReader<Message>;
+ using TMessageWriter = TTableWriter<Message>;
+ using TYdlTableWriter = TTableWriter<NDetail::TYdlGenericRowType>;
+
+ template <class TDerived>
+ struct TIOOptions;
+
+ struct TFileReaderOptions;
+
+ struct TFileWriterOptions;
+
+ struct TTableReaderOptions;
+
+ class TSkiffRowHints;
+
+ struct TTableWriterOptions;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // job_statistics.h
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TJobStatistics;
+
+ template <typename T>
+ class TJobStatisticsEntry;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // operation.h
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TFormatHints;
+
+ struct TUserJobSpec;
+
+ struct TMapOperationSpec;
+
+ struct TRawMapOperationSpec;
+
+ struct TReduceOperationSpec;
+
+ struct TMapReduceOperationSpec;
+
+ struct TJoinReduceOperationSpec;
+
+ struct TSortOperationSpec;
+
+ class IIOperationPreparationContext;
+
+ class IJob;
+ using IJobPtr = ::TIntrusivePtr<IJob>;
+
+ class IRawJob;
+ using IRawJobPtr = ::TIntrusivePtr<IRawJob>;
+
+ enum EMergeMode : int;
+
+ struct TMergeOperationSpec;
+
+ struct TEraseOperationSpec;
+
+ template <class TR, class TW>
+ class IMapper;
+
+ template <class TR, class TW>
+ class IReducer;
+
+ template <class TR, class TW>
+ class IAggregatorReducer;
+
+ struct TSuspendOperationOptions;
+
+ struct TResumeOperationOptions;
+
+ enum class EOperationBriefState : int;
+
+ struct TOperationAttributes;
+
+ struct TOperationOptions;
+
+ enum class EOperationAttribute : int;
+
+ struct TOperationAttributeFilter;
+
+ struct TGetOperationOptions;
+
+ struct TListOperationsOptions;
+
+ struct TGetJobOptions;
+
+ struct TListJobsOptions;
+
+ struct IOperationClient;
+
+ enum class EFinishedJobState : int;
+
+ enum class EJobType : int;
+ enum class EJobState : int;
+ enum class ETaskName : int;
+ class TTaskName;
+
+ struct TJobBinaryDefault;
+
+ struct TJobBinaryLocalPath;
+
+ struct TJobBinaryCypressPath;
+
+ using TJobBinaryConfig = std::variant<
+ TJobBinaryDefault,
+ TJobBinaryLocalPath,
+ TJobBinaryCypressPath>;
+
+ struct TRetryConfig;
+ class IRetryConfigProvider;
+ using IRetryConfigProviderPtr = ::TIntrusivePtr<IRetryConfigProvider>;
+}
+/// @endcond
diff --git a/yt/cpp/mapreduce/interface/init.h b/yt/cpp/mapreduce/interface/init.h
new file mode 100644
index 0000000000..302be268fc
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/init.h
@@ -0,0 +1,71 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/init.h
+///
+/// Initialization functions of YT Wrapper.
+
+#include <yt/cpp/mapreduce/interface/wait_proxy.h>
+
+#include <util/generic/fwd.h>
+
+#include <functional>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Options for @ref NYT::Initialize() and @ref NYT::JoblessInitialize() functions
+struct TInitializeOptions
+{
+ using TSelf = TInitializeOptions;
+
+ ///
+ /// @brief Override waiting functions for YT Wrapper.
+ ///
+ /// This options allows to override functions used by this library to wait something.
+ FLUENT_FIELD_DEFAULT(::TIntrusivePtr<IWaitProxy>, WaitProxy, nullptr);
+
+ ///
+ /// @brief Enable/disable cleanup when program execution terminates abnormally.
+ ///
+ /// When set to true, library will abort all active transactions and running operations when program
+ /// terminates on error or signal.
+ FLUENT_FIELD_DEFAULT(bool, CleanupOnTermination, false);
+
+ ///
+ /// @brief Set callback to be called before exit() in job mode.
+ ///
+ /// Provided function will be called just before exit() when program is started in job mode.
+ /// This might be useful for shutting down libraries that are used inside operations.
+ ///
+ /// NOTE: Keep in mind that inside job execution environment differs from client execution environment.
+ /// So JobOnExitFunction should not depend on argc/argv environment variables etc.
+ FLUENT_FIELD_OPTION(std::function<void()>, JobOnExitFunction);
+};
+
+///
+/// @brief Performs basic initialization (logging, termination handlers, etc).
+///
+/// This function never switches to job mode.
+void JoblessInitialize(const TInitializeOptions& options = TInitializeOptions());
+
+///
+/// @brief Performs basic initialization and switches to a job mode if required.
+///
+/// This function performs basic initialization (it sets up logging reads the config, etc) and checks if binary is launched
+/// on YT machine inside a job. If latter is true this function launches proper job and after job is done it calls exit().
+///
+/// This function must be called if application starts any operation.
+/// This function must be called immediately after entering main() function before any argument parsing is done.
+void Initialize(int argc, const char **argv, const TInitializeOptions &options = TInitializeOptions());
+
+/// Similar to @ref NYT::Initialize(int, const char**, const TInitializeOptions&)
+void Initialize(int argc, char **argv, const TInitializeOptions &options = TInitializeOptions());
+
+/// Similar to @ref NYT::Initialize(int, const char**, const TInitializeOptions&)
+void Initialize(const TInitializeOptions &options = TInitializeOptions());
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/io-inl.h b/yt/cpp/mapreduce/interface/io-inl.h
new file mode 100644
index 0000000000..c35ebb7481
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/io-inl.h
@@ -0,0 +1,1015 @@
+#pragma once
+
+#ifndef IO_INL_H_
+#error "Direct inclusion of this file is not allowed, use io.h"
+#endif
+#undef IO_INL_H_
+
+#include "finish_or_die.h"
+
+#include <util/generic/typetraits.h>
+#include <util/generic/yexception.h>
+#include <util/stream/length.h>
+
+#include <util/system/mutex.h>
+#include <util/system/spinlock.h>
+
+#include <library/cpp/yson/node/node_builder.h>
+
+#include <yt/cpp/mapreduce/interface/serialize.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+template<class T>
+struct TIsProtoOneOf
+ : std::false_type
+{ };
+
+template <class ...TProtoRowTypes>
+struct TIsProtoOneOf<TProtoOneOf<TProtoRowTypes...>>
+ : std::true_type
+{ };
+
+template <class T>
+struct TIsSkiffRowOneOf
+ : std::false_type
+{ };
+
+template <class ...TSkiffRowTypes>
+struct TIsSkiffRowOneOf<TSkiffRowOneOf<TSkiffRowTypes...>>
+ : std::true_type
+{ };
+
+} // namespace NDetail
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class T, class = void>
+struct TRowTraits;
+
+template <>
+struct TRowTraits<TNode>
+{
+ using TRowType = TNode;
+ using IReaderImpl = INodeReaderImpl;
+ using IWriterImpl = INodeWriterImpl;
+};
+
+template <>
+struct TRowTraits<TYaMRRow>
+{
+ using TRowType = TYaMRRow;
+ using IReaderImpl = IYaMRReaderImpl;
+ using IWriterImpl = IYaMRWriterImpl;
+};
+
+template <>
+struct TRowTraits<Message>
+{
+ using TRowType = Message;
+ using IReaderImpl = IProtoReaderImpl;
+ using IWriterImpl = IProtoWriterImpl;
+};
+
+template <class T>
+struct TRowTraits<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>>
+{
+ using TRowType = T;
+ using IReaderImpl = IProtoReaderImpl;
+ using IWriterImpl = IProtoWriterImpl;
+};
+
+template <class T>
+struct TRowTraits<T, std::enable_if_t<TIsSkiffRow<T>::value>>
+{
+ using TRowType = T;
+ using IReaderImpl = ISkiffRowReaderImpl;
+};
+
+template <class... TSkiffRowTypes>
+struct TRowTraits<TSkiffRowOneOf<TSkiffRowTypes...>>
+{
+ using TRowType = TSkiffRowOneOf<TSkiffRowTypes...>;
+ using IReaderImpl = ISkiffRowReaderImpl;
+};
+
+template <class... TProtoRowTypes>
+struct TRowTraits<TProtoOneOf<TProtoRowTypes...>>
+{
+ using TRowType = TProtoOneOf<TProtoRowTypes...>;
+ using IReaderImpl = IProtoReaderImpl;
+ using IWriterImpl = IProtoWriterImpl;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct IReaderImplBase
+ : public TThrRefBase
+{
+ virtual bool IsValid() const = 0;
+ virtual void Next() = 0;
+ virtual ui32 GetTableIndex() const = 0;
+ virtual ui32 GetRangeIndex() const = 0;
+ virtual ui64 GetRowIndex() const = 0;
+ virtual void NextKey() = 0;
+
+ // Not pure virtual because of clients that has already implemented this interface.
+ virtual TMaybe<size_t> GetReadByteCount() const;
+ virtual i64 GetTabletIndex() const;
+ virtual bool IsEndOfStream() const;
+ virtual bool IsRawReaderExhausted() const;
+};
+
+struct INodeReaderImpl
+ : public IReaderImplBase
+{
+ virtual const TNode& GetRow() const = 0;
+ virtual void MoveRow(TNode* row) = 0;
+};
+
+struct IYaMRReaderImpl
+ : public IReaderImplBase
+{
+ virtual const TYaMRRow& GetRow() const = 0;
+ virtual void MoveRow(TYaMRRow* row)
+ {
+ *row = GetRow();
+ }
+};
+
+struct IProtoReaderImpl
+ : public IReaderImplBase
+{
+ virtual void ReadRow(Message* row) = 0;
+};
+
+struct ISkiffRowReaderImpl
+ : public IReaderImplBase
+{
+ virtual void ReadRow(const ISkiffRowParserPtr& parser) = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+// We don't include <yt/cpp/mapreduce/interface/logging/yt_log.h> in this file
+// to avoid macro name clashes (specifically YT_LOG_DEBUG)
+void LogTableReaderStatistics(ui64 rowCount, TMaybe<size_t> byteCount);
+
+template <class T>
+class TTableReaderBase
+ : public TThrRefBase
+{
+public:
+ using TRowType = typename TRowTraits<T>::TRowType;
+ using IReaderImpl = typename TRowTraits<T>::IReaderImpl;
+
+ explicit TTableReaderBase(::TIntrusivePtr<IReaderImpl> reader)
+ : Reader_(reader)
+ { }
+
+ ~TTableReaderBase() override
+ {
+ NDetail::LogTableReaderStatistics(ReadRowCount_, Reader_->GetReadByteCount());
+ }
+
+ bool IsValid() const
+ {
+ return Reader_->IsValid();
+ }
+
+ void Next()
+ {
+ Reader_->Next();
+ ++ReadRowCount_;
+ RowState_ = ERowState::None;
+ }
+
+ bool IsEndOfStream()
+ {
+ return Reader_->IsEndOfStream();
+ }
+
+ bool IsRawReaderExhausted()
+ {
+ return Reader_->IsRawReaderExhausted();
+ }
+
+ ui32 GetTableIndex() const
+ {
+ return Reader_->GetTableIndex();
+ }
+
+ ui32 GetRangeIndex() const
+ {
+ return Reader_->GetRangeIndex();
+ }
+
+ ui64 GetRowIndex() const
+ {
+ return Reader_->GetRowIndex();
+ }
+
+ i64 GetTabletIndex() const
+ {
+ return Reader_->GetTabletIndex();
+ }
+
+protected:
+ template <typename TCacher, typename TCacheGetter>
+ const auto& DoGetRowCached(TCacher cacher, TCacheGetter cacheGetter) const
+ {
+ switch (RowState_) {
+ case ERowState::None:
+ cacher();
+ RowState_ = ERowState::Cached;
+ break;
+ case ERowState::Cached:
+ break;
+ case ERowState::MovedOut:
+ ythrow yexception() << "Row is already moved";
+ }
+ return *cacheGetter();
+ }
+
+ template <typename U, typename TMover, typename TCacheMover>
+ void DoMoveRowCached(U* result, TMover mover, TCacheMover cacheMover)
+ {
+ Y_VERIFY(result);
+ switch (RowState_) {
+ case ERowState::None:
+ mover(result);
+ break;
+ case ERowState::Cached:
+ cacheMover(result);
+ break;
+ case ERowState::MovedOut:
+ ythrow yexception() << "Row is already moved";
+ }
+ RowState_ = ERowState::MovedOut;
+ }
+
+private:
+ enum class ERowState
+ {
+ None,
+ Cached,
+ MovedOut,
+ };
+
+protected:
+ ::TIntrusivePtr<IReaderImpl> Reader_;
+
+private:
+ ui64 ReadRowCount_ = 0;
+ mutable ERowState RowState_ = ERowState::None;
+};
+
+template <class T>
+class TSimpleTableReader
+ : public TTableReaderBase<T>
+{
+public:
+ using TBase = TTableReaderBase<T>;
+ using typename TBase::TRowType;
+
+ using TBase::TBase;
+
+ const TRowType& GetRow() const
+ {
+ // Caching is implemented in underlying reader.
+ return TBase::DoGetRowCached(
+ /* cacher */ [&] {},
+ /* cacheGetter */ [&] {
+ return &Reader_->GetRow();
+ });
+ }
+
+ void MoveRow(TRowType* result)
+ {
+ // Caching is implemented in underlying reader.
+ TBase::DoMoveRowCached(
+ result,
+ /* mover */ [&] (TRowType* result) {
+ Reader_->MoveRow(result);
+ },
+ /* cacheMover */ [&] (TRowType* result) {
+ Reader_->MoveRow(result);
+ });
+ }
+
+ TRowType MoveRow()
+ {
+ TRowType result;
+ MoveRow(&result);
+ return result;
+ }
+
+private:
+ using TBase::Reader_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+
+template <>
+class TTableReader<TNode>
+ : public NDetail::TSimpleTableReader<TNode>
+{
+ using TSimpleTableReader<TNode>::TSimpleTableReader;
+};
+
+template <>
+class TTableReader<TYaMRRow>
+ : public NDetail::TSimpleTableReader<TYaMRRow>
+{
+ using TSimpleTableReader<TYaMRRow>::TSimpleTableReader;
+};
+
+template <>
+class TTableReader<Message>
+ : public NDetail::TTableReaderBase<Message>
+{
+public:
+ using TBase = NDetail::TTableReaderBase<Message>;
+
+ using TBase::TBase;
+
+ template <class U>
+ const U& GetRow() const
+ {
+ static_assert(TIsBaseOf<Message, U>::Value);
+
+ return TBase::DoGetRowCached(
+ /* cacher */ [&] {
+ CachedRow_.Reset(new U);
+ Reader_->ReadRow(CachedRow_.Get());
+ },
+ /* cacheGetter */ [&] {
+ auto result = dynamic_cast<const U*>(CachedRow_.Get());
+ Y_VERIFY(result);
+ return result;
+ });
+ }
+
+ template <class U>
+ void MoveRow(U* result)
+ {
+ static_assert(TIsBaseOf<Message, U>::Value);
+
+ TBase::DoMoveRowCached(
+ result,
+ /* mover */ [&] (U* result) {
+ Reader_->ReadRow(result);
+ },
+ /* cacheMover */ [&] (U* result) {
+ auto cast = dynamic_cast<U*>(CachedRow_.Get());
+ Y_VERIFY(cast);
+ result->Swap(cast);
+ });
+ }
+
+ template <class U>
+ U MoveRow()
+ {
+ static_assert(TIsBaseOf<Message, U>::Value);
+
+ U result;
+ MoveRow(&result);
+ return result;
+ }
+
+ ::TIntrusivePtr<IProtoReaderImpl> GetReaderImpl() const
+ {
+ return Reader_;
+ }
+
+private:
+ using TBase::Reader_;
+ mutable THolder<Message> CachedRow_;
+};
+
+template<class... TProtoRowTypes>
+class TTableReader<TProtoOneOf<TProtoRowTypes...>>
+ : public NDetail::TTableReaderBase<TProtoOneOf<TProtoRowTypes...>>
+{
+public:
+ using TBase = NDetail::TTableReaderBase<TProtoOneOf<TProtoRowTypes...>>;
+
+ using TBase::TBase;
+
+ template <class U>
+ const U& GetRow() const
+ {
+ AssertIsOneOf<U>();
+ return TBase::DoGetRowCached(
+ /* cacher */ [&] {
+ Reader_->ReadRow(&std::get<U>(CachedRows_));
+ CachedIndex_ = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value;
+ },
+ /* cacheGetter */ [&] {
+ return &std::get<U>(CachedRows_);
+ });
+ }
+
+ template <class U>
+ void MoveRow(U* result)
+ {
+ AssertIsOneOf<U>();
+ return TBase::DoMoveRowCached(
+ result,
+ /* mover */ [&] (U* result) {
+ Reader_->ReadRow(result);
+ },
+ /* cacheMover */ [&] (U* result) {
+ Y_VERIFY((NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value) == CachedIndex_);
+ *result = std::move(std::get<U>(CachedRows_));
+ });
+ }
+
+ template <class U>
+ U MoveRow()
+ {
+ U result;
+ MoveRow(&result);
+ return result;
+ }
+
+ ::TIntrusivePtr<IProtoReaderImpl> GetReaderImpl() const
+ {
+ return Reader_;
+ }
+
+private:
+ using TBase::Reader_;
+ // std::variant could also be used here, but std::tuple leads to better performance
+ // because of deallocations that std::variant has to do
+ mutable std::tuple<TProtoRowTypes...> CachedRows_;
+ mutable int CachedIndex_;
+
+ template <class U>
+ static constexpr void AssertIsOneOf()
+ {
+ static_assert(
+ (std::is_same<U, TProtoRowTypes>::value || ...),
+ "Template parameter must be one of TProtoOneOf template parameter");
+ }
+};
+
+template <class T>
+class TTableReader<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>>
+ : public TTableReader<TProtoOneOf<T>>
+{
+public:
+ using TRowType = T;
+ using TBase = TTableReader<TProtoOneOf<T>>;
+
+ using TBase::TBase;
+
+ const T& GetRow() const
+ {
+ return TBase::template GetRow<T>();
+ }
+
+ void MoveRow(T* result)
+ {
+ TBase::template MoveRow<T>(result);
+ }
+
+ T MoveRow()
+ {
+ return TBase::template MoveRow<T>();
+ }
+};
+
+template<class... TSkiffRowTypes>
+class TTableReader<TSkiffRowOneOf<TSkiffRowTypes...>>
+ : public NDetail::TTableReaderBase<TSkiffRowOneOf<TSkiffRowTypes...>>
+{
+public:
+ using TBase = NDetail::TTableReaderBase<TSkiffRowOneOf<TSkiffRowTypes...>>;
+
+ using TBase::TBase;
+
+ explicit TTableReader(::TIntrusivePtr<typename TBase::IReaderImpl> reader, const TMaybe<TSkiffRowHints>& hints)
+ : TBase(reader)
+ , Parsers_({(CreateSkiffParser<TSkiffRowTypes>(&std::get<TSkiffRowTypes>(CachedRows_), hints))...})
+ { }
+
+ template <class U>
+ const U& GetRow() const
+ {
+ AssertIsOneOf<U>();
+ return TBase::DoGetRowCached(
+ /* cacher */ [&] {
+ auto index = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value;
+ Reader_->ReadRow(Parsers_[index]);
+ CachedIndex_ = index;
+ },
+ /* cacheGetter */ [&] {
+ return &std::get<U>(CachedRows_);
+ });
+ }
+
+ template <class U>
+ void MoveRow(U* result)
+ {
+ AssertIsOneOf<U>();
+ return TBase::DoMoveRowCached(
+ result,
+ /* mover */ [&] (U* result) {
+ auto index = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value;
+ Reader_->ReadRow(Parsers_[index]);
+ *result = std::move(std::get<U>(CachedRows_));
+ },
+ /* cacheMover */ [&] (U* result) {
+ Y_VERIFY((NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value) == CachedIndex_);
+ *result = std::move(std::get<U>(CachedRows_));
+ });
+ }
+
+ template <class U>
+ U MoveRow()
+ {
+ U result;
+ MoveRow(&result);
+ return result;
+ }
+
+ ::TIntrusivePtr<ISkiffRowReaderImpl> GetReaderImpl() const
+ {
+ return Reader_;
+ }
+
+private:
+ using TBase::Reader_;
+ // std::variant could also be used here, but std::tuple leads to better performance
+ // because of deallocations that std::variant has to do
+ mutable std::tuple<TSkiffRowTypes...> CachedRows_;
+ mutable std::vector<ISkiffRowParserPtr> Parsers_;
+ mutable int CachedIndex_;
+
+ template <class U>
+ static constexpr void AssertIsOneOf()
+ {
+ static_assert(
+ (std::is_same<U, TSkiffRowTypes>::value || ...),
+ "Template parameter must be one of TSkiffRowOneOf template parameter");
+ }
+};
+
+template <class T>
+class TTableReader<T, std::enable_if_t<TIsSkiffRow<T>::value>>
+ : public TTableReader<TSkiffRowOneOf<T>>
+{
+public:
+ using TRowType = T;
+ using TBase = TTableReader<TSkiffRowOneOf<T>>;
+
+ using TBase::TBase;
+
+ const T& GetRow()
+ {
+ return TBase::template GetRow<T>();
+ }
+
+ void MoveRow(T* result)
+ {
+ TBase::template MoveRow<T>(result);
+ }
+
+ T MoveRow()
+ {
+ return TBase::template MoveRow<T>();
+ }
+};
+
+template <>
+inline TTableReaderPtr<TNode> IIOClient::CreateTableReader<TNode>(
+ const TRichYPath& path, const TTableReaderOptions& options)
+{
+ return new TTableReader<TNode>(CreateNodeReader(path, options));
+}
+
+template <>
+inline TTableReaderPtr<TYaMRRow> IIOClient::CreateTableReader<TYaMRRow>(
+ const TRichYPath& path, const TTableReaderOptions& options)
+{
+ return new TTableReader<TYaMRRow>(CreateYaMRReader(path, options));
+}
+
+template <class T, class = std::enable_if_t<TIsBaseOf<Message, T>::Value>>
+struct TReaderCreator
+{
+ static TTableReaderPtr<T> Create(::TIntrusivePtr<IProtoReaderImpl> reader)
+ {
+ return new TTableReader<T>(reader);
+ }
+};
+
+template <class T>
+inline TTableReaderPtr<T> IIOClient::CreateTableReader(
+ const TRichYPath& path, const TTableReaderOptions& options)
+{
+ if constexpr (TIsBaseOf<Message, T>::Value) {
+ TAutoPtr<T> prototype(new T);
+ return new TTableReader<T>(CreateProtoReader(path, options, prototype.Get()));
+ } else if constexpr (TIsSkiffRow<T>::value) {
+ const auto& hints = options.FormatHints_ ? options.FormatHints_->SkiffRowHints_ : Nothing();
+ auto schema = GetSkiffSchema<T>(hints);
+ auto skipper = CreateSkiffSkipper<T>(hints);
+ return new TTableReader<T>(CreateSkiffRowReader(path, options, skipper, schema), hints);
+ } else {
+ static_assert(TDependentFalse<T>, "Unsupported type for table reader");
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+TTableReaderPtr<T> CreateTableReader(
+ IInputStream* stream,
+ const TTableReaderOptions& options)
+{
+ return TReaderCreator<T>::Create(NDetail::CreateProtoReader(stream, options, T::descriptor()));
+}
+
+template <class... Ts>
+TTableReaderPtr<typename NDetail::TProtoOneOfUnique<Ts...>::TType> CreateProtoMultiTableReader(
+ IInputStream* stream,
+ const TTableReaderOptions& options)
+{
+ return new TTableReader<typename NDetail::TProtoOneOfUnique<Ts...>::TType>(
+ NDetail::CreateProtoReader(stream, options, {Ts::descriptor()...}));
+}
+
+template <class T>
+TTableReaderPtr<T> CreateProtoMultiTableReader(
+ IInputStream* stream,
+ int tableCount,
+ const TTableReaderOptions& options)
+{
+ static_assert(TIsBaseOf<::google::protobuf::Message, T>::Value);
+ TVector<const ::google::protobuf::Descriptor*> descriptors(tableCount, T::descriptor());
+ return new TTableReader<T>(NDetail::CreateProtoReader(stream, options, std::move(descriptors)));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class T>
+class TTableRangesReader<T>
+ : public TThrRefBase
+{
+public:
+ using TRowType = T;
+
+private:
+ using TReaderImpl = typename TRowTraits<TRowType>::IReaderImpl;
+
+public:
+ TTableRangesReader(::TIntrusivePtr<TReaderImpl> readerImpl)
+ : ReaderImpl_(readerImpl)
+ , Reader_(MakeIntrusive<TTableReader<TRowType>>(readerImpl))
+ , IsValid_(Reader_->IsValid())
+ { }
+
+ TTableReader<T>& GetRange()
+ {
+ return *Reader_;
+ }
+
+ bool IsValid() const
+ {
+ return IsValid_;
+ }
+
+ void Next()
+ {
+ ReaderImpl_->NextKey();
+ if ((IsValid_ = Reader_->IsValid())) {
+ Reader_->Next();
+ }
+ }
+
+private:
+ ::TIntrusivePtr<TReaderImpl> ReaderImpl_;
+ ::TIntrusivePtr<TTableReader<TRowType>> Reader_;
+ bool IsValid_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+struct IWriterImplBase
+ : public TThrRefBase
+{
+ virtual void AddRow(const T& row, size_t tableIndex) = 0;
+
+ virtual void AddRow(const T& row, size_t tableIndex, size_t /*rowWeight*/)
+ {
+ AddRow(row, tableIndex);
+ }
+
+ virtual void AddRow(T&& row, size_t tableIndex) = 0;
+
+ virtual void AddRow(T&& row, size_t tableIndex, size_t /*rowWeight*/)
+ {
+ AddRow(std::move(row), tableIndex);
+ }
+
+ virtual void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex, size_t rowBatchWeight = 0)
+ {
+ for (const auto& row : rowBatch) {
+ AddRow(row, tableIndex, rowBatchWeight / rowBatch.size());
+ }
+ }
+
+ virtual void AddRowBatch(TVector<T>&& rowBatch, size_t tableIndex, size_t rowBatchWeight = 0)
+ {
+ auto rowBatchSize = rowBatch.size();
+ for (auto&& row : std::move(rowBatch)) {
+ AddRow(std::move(row), tableIndex, rowBatchWeight / rowBatchSize);
+ }
+ }
+
+ virtual size_t GetTableCount() const = 0;
+ virtual void FinishTable(size_t tableIndex) = 0;
+ virtual void Abort()
+ { }
+};
+
+struct INodeWriterImpl
+ : public IWriterImplBase<TNode>
+{
+};
+
+struct IYaMRWriterImpl
+ : public IWriterImplBase<TYaMRRow>
+{
+};
+
+struct IProtoWriterImpl
+ : public IWriterImplBase<Message>
+{
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class T>
+class TTableWriterBase
+ : public TThrRefBase
+{
+public:
+ using TRowType = T;
+ using IWriterImpl = typename TRowTraits<T>::IWriterImpl;
+
+ explicit TTableWriterBase(::TIntrusivePtr<IWriterImpl> writer)
+ : Writer_(writer)
+ , Locks_(MakeAtomicShared<TVector<TAdaptiveLock>>(writer->GetTableCount()))
+ { }
+
+ ~TTableWriterBase() override
+ {
+ if (Locks_.RefCount() == 1) {
+ NDetail::FinishOrDie(this, "TTableWriterBase");
+ }
+ }
+
+ void Abort()
+ {
+ Writer_->Abort();
+ }
+
+ void AddRow(const T& row, size_t tableIndex = 0, size_t rowWeight = 0)
+ {
+ DoAddRow<T>(row, tableIndex, rowWeight);
+ }
+
+ void AddRow(T&& row, size_t tableIndex = 0, size_t rowWeight = 0)
+ {
+ DoAddRow<T>(std::move(row), tableIndex, rowWeight);
+ }
+
+ void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
+ {
+ DoAddRowBatch<T>(rowBatch, tableIndex, rowBatchWeight);
+ }
+
+ void AddRowBatch(TVector<T>&& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
+ {
+ DoAddRowBatch<T>(std::move(rowBatch), tableIndex, rowBatchWeight);
+ }
+
+ void Finish()
+ {
+ for (size_t i = 0; i < Locks_->size(); ++i) {
+ auto guard = Guard((*Locks_)[i]);
+ Writer_->FinishTable(i);
+ }
+ }
+
+protected:
+ template <class U>
+ void DoAddRow(const U& row, size_t tableIndex = 0, size_t rowWeight = 0)
+ {
+ if (tableIndex >= Locks_->size()) {
+ ythrow TIOException() <<
+ "Table index " << tableIndex <<
+ " is out of range [0, " << Locks_->size() << ")";
+ }
+
+ auto guard = Guard((*Locks_)[tableIndex]);
+ Writer_->AddRow(row, tableIndex, rowWeight);
+ }
+
+ template <class U>
+ void DoAddRow(U&& row, size_t tableIndex = 0, size_t rowWeight = 0)
+ {
+ if (tableIndex >= Locks_->size()) {
+ ythrow TIOException() <<
+ "Table index " << tableIndex <<
+ " is out of range [0, " << Locks_->size() << ")";
+ }
+
+ auto guard = Guard((*Locks_)[tableIndex]);
+ Writer_->AddRow(std::move(row), tableIndex, rowWeight);
+ }
+
+ template <class U>
+ void DoAddRowBatch(const TVector<U>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
+ {
+ if (tableIndex >= Locks_->size()) {
+ ythrow TIOException() <<
+ "Table index " << tableIndex <<
+ " is out of range [0, " << Locks_->size() << ")";
+ }
+
+ auto guard = Guard((*Locks_)[tableIndex]);
+ Writer_->AddRowBatch(rowBatch, tableIndex, rowBatchWeight);
+ }
+
+ template <class U>
+ void DoAddRowBatch(TVector<U>&& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
+ {
+ if (tableIndex >= Locks_->size()) {
+ ythrow TIOException() <<
+ "Table index " << tableIndex <<
+ " is out of range [0, " << Locks_->size() << ")";
+ }
+
+ auto guard = Guard((*Locks_)[tableIndex]);
+ Writer_->AddRowBatch(std::move(rowBatch), tableIndex, rowBatchWeight);
+ }
+
+ ::TIntrusivePtr<IWriterImpl> GetWriterImpl()
+ {
+ return Writer_;
+ }
+
+private:
+ ::TIntrusivePtr<IWriterImpl> Writer_;
+ TAtomicSharedPtr<TVector<TAdaptiveLock>> Locks_;
+};
+
+template <>
+class TTableWriter<TNode>
+ : public TTableWriterBase<TNode>
+{
+public:
+ using TBase = TTableWriterBase<TNode>;
+
+ explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer)
+ : TBase(writer)
+ { }
+};
+
+template <>
+class TTableWriter<TYaMRRow>
+ : public TTableWriterBase<TYaMRRow>
+{
+public:
+ using TBase = TTableWriterBase<TYaMRRow>;
+
+ explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer)
+ : TBase(writer)
+ { }
+};
+
+template <>
+class TTableWriter<Message>
+ : public TTableWriterBase<Message>
+{
+public:
+ using TBase = TTableWriterBase<Message>;
+
+ explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer)
+ : TBase(writer)
+ { }
+
+ template <class U, std::enable_if_t<std::is_base_of<Message, U>::value>* = nullptr>
+ void AddRow(const U& row, size_t tableIndex = 0, size_t rowWeight = 0)
+ {
+ TBase::AddRow(row, tableIndex, rowWeight);
+ }
+
+ template <class U, std::enable_if_t<std::is_base_of<Message, U>::value>* = nullptr>
+ void AddRowBatch(const TVector<U>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
+ {
+ for (const auto& row : rowBatch) {
+ AddRow(row, tableIndex, rowBatchWeight / rowBatch.size());
+ }
+ }
+};
+
+template <class T>
+class TTableWriter<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>>
+ : public TTableWriter<Message>
+{
+public:
+ using TRowType = T;
+ using TBase = TTableWriter<Message>;
+
+ explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer)
+ : TBase(writer)
+ { }
+
+ void AddRow(const T& row, size_t tableIndex = 0, size_t rowWeight = 0)
+ {
+ TBase::AddRow<T>(row, tableIndex, rowWeight);
+ }
+
+ void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0)
+ {
+ TBase::AddRowBatch<T>(rowBatch, tableIndex, rowBatchWeight);
+ }
+};
+
+template <>
+inline TTableWriterPtr<TNode> IIOClient::CreateTableWriter<TNode>(
+ const TRichYPath& path, const TTableWriterOptions& options)
+{
+ return new TTableWriter<TNode>(CreateNodeWriter(path, options));
+}
+
+template <>
+inline TTableWriterPtr<TYaMRRow> IIOClient::CreateTableWriter<TYaMRRow>(
+ const TRichYPath& path, const TTableWriterOptions& options)
+{
+ return new TTableWriter<TYaMRRow>(CreateYaMRWriter(path, options));
+}
+
+template <class T>
+inline TTableWriterPtr<T> IIOClient::CreateTableWriter(
+ const TRichYPath& path, const TTableWriterOptions& options)
+{
+ if constexpr (TIsBaseOf<Message, T>::Value) {
+ TAutoPtr<T> prototype(new T);
+ return new TTableWriter<T>(CreateProtoWriter(path, options, prototype.Get()));
+ } else {
+ static_assert(TDependentFalse<T>, "Unsupported type for table writer");
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+TTableReaderPtr<T> CreateConcreteProtobufReader(TTableReader<Message>* reader)
+{
+ static_assert(std::is_base_of_v<Message, T>, "T must be a protobuf type (either Message or its descendant)");
+ Y_ENSURE(reader, "reader must be non-null");
+ return ::MakeIntrusive<TTableReader<T>>(reader->GetReaderImpl());
+}
+
+template <typename T>
+TTableReaderPtr<T> CreateConcreteProtobufReader(const TTableReaderPtr<Message>& reader)
+{
+ Y_ENSURE(reader, "reader must be non-null");
+ return CreateConcreteProtobufReader<T>(reader.Get());
+}
+
+template <typename T>
+TTableReaderPtr<Message> CreateGenericProtobufReader(TTableReader<T>* reader)
+{
+ static_assert(std::is_base_of_v<Message, T>, "T must be a protobuf type (either Message or its descendant)");
+ Y_ENSURE(reader, "reader must be non-null");
+ return ::MakeIntrusive<TTableReader<Message>>(reader->GetReaderImpl());
+}
+
+template <typename T>
+TTableReaderPtr<Message> CreateGenericProtobufReader(const TTableReaderPtr<T>& reader)
+{
+ Y_ENSURE(reader, "reader must be non-null");
+ return CreateGenericProtobufReader(reader.Get());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/io.cpp b/yt/cpp/mapreduce/interface/io.cpp
new file mode 100644
index 0000000000..f97629721a
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/io.cpp
@@ -0,0 +1,47 @@
+#include "io.h"
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <util/string/cast.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TMaybe<size_t> IReaderImplBase::GetReadByteCount() const
+{
+ return Nothing();
+}
+
+i64 IReaderImplBase::GetTabletIndex() const
+{
+ Y_FAIL("Unimplemented");
+}
+
+bool IReaderImplBase::IsEndOfStream() const
+{
+ Y_FAIL("Unimplemented");
+}
+
+bool IReaderImplBase::IsRawReaderExhausted() const
+{
+ Y_FAIL("Unimplemented");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+void LogTableReaderStatistics(ui64 rowCount, TMaybe<size_t> byteCount)
+{
+ TString byteCountStr = (byteCount ? ::ToString(*byteCount) : "<unknown>");
+ YT_LOG_DEBUG("Table reader has read %v rows, %v bytes",
+ rowCount,
+ byteCountStr);
+}
+
+} // namespace NDetail
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/io.h b/yt/cpp/mapreduce/interface/io.h
new file mode 100644
index 0000000000..e2b20a1802
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/io.h
@@ -0,0 +1,586 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/io.h
+///
+/// Header containing client interface for reading and writing tables and files.
+
+
+#include "fwd.h"
+
+#include "client_method_options.h"
+#include "common.h"
+#include "format.h"
+#include "node.h"
+#include "mpl.h"
+#include "skiff_row.h"
+
+#include <google/protobuf/message.h>
+
+#include <util/stream/input.h>
+#include <util/stream/output.h>
+#include <util/generic/yexception.h>
+#include <util/generic/maybe.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief "Marker" type to use for several protobuf types in @ref NYT::TTableReader.
+///
+/// @tparam Ts Possible types of rows to be read.
+template<class... TProtoRowTypes>
+class TProtoOneOf
+{
+public:
+ static_assert(
+ (TIsBaseOf<::google::protobuf::Message, TProtoRowTypes>::Value && ...),
+ "Template parameters can only be protobuf types");
+
+ TProtoOneOf() = delete;
+};
+
+///
+/// @brief "Marker" type to use for several skiff row types in @ref NYT::TTableReader.
+///
+/// @tparam Ts Possible types of rows to be read.
+template<class... TSkiffRowTypes>
+class TSkiffRowOneOf
+{
+public:
+ static_assert(
+ (TIsSkiffRow<TSkiffRowTypes>::value && ...),
+ "Template parameters can only be SkiffRow types");
+
+ TSkiffRowOneOf() = delete;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @cond Doxygen_Suppress
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TTuple>
+struct TProtoOneOfFromTuple;
+
+template <class... Ts>
+struct TProtoOneOfFromTuple<std::tuple<Ts...>>
+{
+ using TType = TProtoOneOf<Ts...>;
+};
+
+template <class... Ts>
+struct TProtoOneOfUnique
+{
+ using TTuple = typename TUniqueTypes<std::tuple<>, std::tuple<Ts...>>::TType;
+ using TType = typename TProtoOneOfFromTuple<TTuple>::TType;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+/// @endcond
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct INodeReaderImpl;
+struct IYaMRReaderImpl;
+struct IProtoReaderImpl;
+struct ISkiffRowReaderImpl;
+struct INodeWriterImpl;
+struct IYaMRWriterImpl;
+struct IProtoWriterImpl;
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Class of exceptions connected to reading or writing tables or files.
+class TIOException
+ : public yexception
+{ };
+
+///////////////////////////////////////////////////////////////////////////////
+
+/// Interface representing YT file reader.
+class IFileReader
+ : public TThrRefBase
+ , public IInputStream
+{ };
+
+/// Interface representing YT file writer.
+class IFileWriter
+ : public TThrRefBase
+ , public IOutputStream
+{ };
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Low-level interface to read YT table with retries.
+class TRawTableReader
+ : public TThrRefBase
+ , public IInputStream
+{
+public:
+ /// @brief Retry table read starting from the specified `rangeIndex` and `rowIndex`.
+ ///
+ /// @param rangeIndex Index of first range to read
+ /// @param rowIndex Index of first row to read; if `rowIndex == Nothing` entire request will be retried.
+ ///
+ /// @return `true` on successful request retry, `false` if no retry attempts are left (then `Retry()` shouldn't be called any more).
+ ///
+ /// `rowIndex` must be inside the range with index `rangeIndex` if the latter is specified.
+ ///
+ /// After successful retry the user should reset `rangeIndex` / `rowIndex` values and read new ones
+ /// from the stream.
+ virtual bool Retry(
+ const TMaybe<ui32>& rangeIndex,
+ const TMaybe<ui64>& rowIndex) = 0;
+
+ /// Resets retry attempt count to the initial value (then `Retry()` can be called again).
+ virtual void ResetRetries() = 0;
+
+ /// @brief May the input stream contain table ranges?
+ ///
+ /// In the case when it is `true` the `TRawTableReader` user is responsible
+ /// to track active range index in order to pass it to Retry().
+ virtual bool HasRangeIndices() const = 0;
+};
+
+/// @brief Low-level interface to write YT table.
+///
+/// Retries must be handled by implementation.
+class TRawTableWriter
+ : public TThrRefBase
+ , public IOutputStream
+{
+public:
+ /// @brief Call this method after complete row representation is written to the stream.
+ ///
+ /// When this method is called `TRowTableWriter` can check its buffer
+ /// and if it is full send data to YT.
+ /// @note `TRawTableWriter` never sends partial records to YT (due to retries).
+ virtual void NotifyRowEnd() = 0;
+
+ /// @brief Try to abort writing process as soon as possible (makes sense for multi-threaded writers).
+ ///
+ /// By default it does nothing, but implementations are welcome to override this method.
+ virtual void Abort()
+ { }
+};
+
+/// @brief Interface to deal with multiple raw output streams.
+class IProxyOutput
+{
+public:
+ virtual ~IProxyOutput()
+ { }
+
+ /// Get amount of managed streams.
+ virtual size_t GetStreamCount() const = 0;
+
+ /// Get stream corresponding to the specified table index.
+ virtual IOutputStream* GetStream(size_t tableIndex) const = 0;
+
+ /// This handler must be called right after the next row has been written.
+ virtual void OnRowFinished(size_t tableIndex) = 0;
+
+ /// @brief Try to abort writing process as soon as possible (makes sense for multi-threaded writers).
+ ///
+ /// By default it does nothing, but implementations are welcome to override this method.
+ virtual void Abort()
+ { }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @brief Class template to read typed rows from YT tables.
+///
+/// @tparam T Row type.
+///
+/// Correct usage of this class usually looks like
+/// ```
+/// for (const auto& cursor : *reader) {
+/// const auto& row = cursor.GetRow();
+/// ...
+/// }
+/// ```
+/// or, more verbosely,
+/// ```
+/// for (; reader->IsValid(); reader->Next()) {
+/// const auto& row = reader->GetRow();
+/// ...
+/// }
+/// ```
+///
+/// @note Actual (partial) specializations of this template may look a bit different,
+/// e.g. @ref NYT::TTableReader::GetRow, @ref NYT::TTableReader::MoveRow may be method templates.
+template <class T, class>
+class TTableReader
+ : public TThrRefBase
+{
+public:
+ /// Get current row.
+ const T& GetRow() const;
+
+ /// Extract current row; further calls to `GetRow` and `MoveRow` will fail.
+ T MoveRow();
+
+ /// Extract current row to `result`; further calls to `GetRow` and `MoveRow` will fail.
+ void MoveRow(T* result);
+
+ /// Check whether all the rows were read.
+ bool IsValid() const;
+
+ /// Move the cursor to the next row.
+ void Next();
+
+ /// Get table index of the current row.
+ ui32 GetTableIndex() const;
+
+ /// Get range index of the current row (zero if it is unknown or read request contains no ranges)
+ ui32 GetRangeIndex() const;
+
+ /// Get current row index (zero if it unknown).
+ ui64 GetRowIndex() const;
+
+ /// Get current tablet index (for ordered dynamic tables).
+ i64 GetTabletIndex() const;
+
+ /// Returns `true` if job consumed all the input and `false` otherwise.
+ bool IsEndOfStream() const;
+
+ /// Returns `true` if job raw input stream was closed and `false` otherwise.
+ bool IsRawReaderExhausted() const;
+};
+
+/// @brief Iterator for use in range-based-for.
+///
+/// @note Idiomatic usage:
+/// ```
+/// for (const auto& cursor : *reader) {
+/// const auto& row = cursor.GetRow();
+/// ...
+/// }
+/// ```
+template <class T>
+class TTableReaderIterator
+{
+public:
+ /// Construct iterator from table reader (can be `nullptr`).
+ explicit TTableReaderIterator<T>(TTableReader<T>* reader)
+ {
+ if (reader && reader->IsValid()) {
+ Reader_ = reader;
+ } else {
+ Reader_ = nullptr;
+ }
+ }
+
+ /// Equality operator.
+ bool operator==(const TTableReaderIterator& it) const
+ {
+ return Reader_ == it.Reader_;
+ }
+
+ /// Inequality operator.
+ bool operator!=(const TTableReaderIterator& it) const
+ {
+ return Reader_ != it.Reader_;
+ }
+
+ /// Dereference operator.
+ TTableReader<T>& operator*()
+ {
+ return *Reader_;
+ }
+
+ /// Const dereference operator.
+ const TTableReader<T>& operator*() const
+ {
+ return *Reader_;
+ }
+
+ /// Preincrement operator.
+ TTableReaderIterator& operator++()
+ {
+ Reader_->Next();
+ if (!Reader_->IsValid()) {
+ Reader_ = nullptr;
+ }
+ return *this;
+ }
+
+private:
+ TTableReader<T>* Reader_;
+};
+
+/// @brief Function to facilitate range-based-for for @ref NYT::TTableReader.
+///
+/// @see @ref NYT::TTableReaderIterator
+template <class T>
+TTableReaderIterator<T> begin(TTableReader<T>& reader)
+{
+ return TTableReaderIterator<T>(&reader);
+}
+
+/// @brief Function to facilitate range-based-for for @ref NYT::TTableReader.
+///
+/// @see @ref NYT::TTableReaderIterator
+template <class T>
+TTableReaderIterator<T> end(TTableReader<T>&)
+{
+ return TTableReaderIterator<T>(nullptr);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @brief Class to facilitate reading table rows sorted by key.
+///
+/// Each reader returned from @ref NYT::TTableRangesReader::GetRange represents
+/// a range of rows with the same key.
+///
+/// @note Idiomatic usage:
+/// ```
+/// for (; reader->IsValid(); reader->Next()) {
+/// auto& rangeReader = reader->GetRange();
+/// ...
+/// }
+/// ```
+template <class T, class>
+class TTableRangesReader
+ : public TThrRefBase
+{
+public:
+ /// Get reader for rows with the same key.
+ TTableReader<T>& GetRange();
+
+ /// Check whether all rows are read.
+ bool IsValid() const;
+
+ /// Move cursor to the next range.
+ void Next();
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Class template to write typed rows to YT tables.
+template <class T, class>
+class TTableWriter
+ : public TThrRefBase
+{
+public:
+ /// @brief Submit a row for writing.
+ ///
+ /// The row may (and very probably will) *not* be written immediately.
+ void AddRow(const T& row);
+
+ /// Stop writing data as soon as possible (without flushing data, e.g. before aborting parent transaction).
+ void Finish();
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @brief Type representing YaMR table row.
+///
+/// @deprecated
+struct TYaMRRow
+{
+ /// Key column.
+ TStringBuf Key;
+
+ /// Subkey column.
+ TStringBuf SubKey;
+
+ /// Value column.
+ TStringBuf Value;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Interface for creating table and file readers and writer.
+class IIOClient
+{
+public:
+ virtual ~IIOClient() = default;
+
+ /// Create a reader for file at `path`.
+ virtual IFileReaderPtr CreateFileReader(
+ const TRichYPath& path,
+ const TFileReaderOptions& options = TFileReaderOptions()) = 0;
+
+ /// Create a writer for file at `path`.
+ virtual IFileWriterPtr CreateFileWriter(
+ const TRichYPath& path,
+ const TFileWriterOptions& options = TFileWriterOptions()) = 0;
+
+ /// Create a typed reader for table at `path`.
+ template <class T>
+ TTableReaderPtr<T> CreateTableReader(
+ const TRichYPath& path,
+ const TTableReaderOptions& options = TTableReaderOptions());
+
+ /// Create a typed writer for table at `path`.
+ template <class T>
+ TTableWriterPtr<T> CreateTableWriter(
+ const TRichYPath& path,
+ const TTableWriterOptions& options = TTableWriterOptions());
+
+ /// Create a writer to write protobuf messages with specified descriptor.
+ virtual TTableWriterPtr<::google::protobuf::Message> CreateTableWriter(
+ const TRichYPath& path,
+ const ::google::protobuf::Descriptor& descriptor,
+ const TTableWriterOptions& options = TTableWriterOptions()) = 0;
+
+ /// Create a reader to read a table using specified format.
+ virtual TRawTableReaderPtr CreateRawReader(
+ const TRichYPath& path,
+ const TFormat& format,
+ const TTableReaderOptions& options = TTableReaderOptions()) = 0;
+
+ /// Create a reader to write a table using specified format.
+ virtual TRawTableWriterPtr CreateRawWriter(
+ const TRichYPath& path,
+ const TFormat& format,
+ const TTableWriterOptions& options = TTableWriterOptions()) = 0;
+
+ ///
+ /// @brief Create a reader for [blob table](https://docs.yandex-team.ru/docs/yt/description/storage/blobtables) at `path`.
+ ///
+ /// @param path Blob table path.
+ /// @param blobId Key identifying the blob.
+ /// @param options Optional parameters
+ ///
+ /// Blob table is a table that stores a number of blobs.
+ /// Blobs are sliced into parts of the same size (maybe except of last part).
+ /// Those parts are stored in the separate rows.
+ ///
+ /// Blob table have constraints on its schema.
+ /// - There must be columns that identify blob (blob id columns). That columns might be of any type.
+ /// - There must be a column of `int64` type that identify part inside the blob (this column is called `part index`).
+ /// - There must be a column of `string` type that stores actual data (this column is called `data column`).
+ virtual IFileReaderPtr CreateBlobTableReader(
+ const TYPath& path,
+ const TKey& blobId,
+ const TBlobTableReaderOptions& options = TBlobTableReaderOptions()) = 0;
+
+private:
+ virtual ::TIntrusivePtr<INodeReaderImpl> CreateNodeReader(
+ const TRichYPath& path, const TTableReaderOptions& options) = 0;
+
+ virtual ::TIntrusivePtr<IYaMRReaderImpl> CreateYaMRReader(
+ const TRichYPath& path, const TTableReaderOptions& options) = 0;
+
+ virtual ::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader(
+ const TRichYPath& path,
+ const TTableReaderOptions& options,
+ const ::google::protobuf::Message* prototype) = 0;
+
+ virtual ::TIntrusivePtr<ISkiffRowReaderImpl> CreateSkiffRowReader(
+ const TRichYPath& path,
+ const TTableReaderOptions& options,
+ const ISkiffRowSkipperPtr& skipper,
+ const NSkiff::TSkiffSchemaPtr& schema) = 0;
+
+ virtual ::TIntrusivePtr<INodeWriterImpl> CreateNodeWriter(
+ const TRichYPath& path, const TTableWriterOptions& options) = 0;
+
+ virtual ::TIntrusivePtr<IYaMRWriterImpl> CreateYaMRWriter(
+ const TRichYPath& path, const TTableWriterOptions& options) = 0;
+
+ virtual ::TIntrusivePtr<IProtoWriterImpl> CreateProtoWriter(
+ const TRichYPath& path,
+ const TTableWriterOptions& options,
+ const ::google::protobuf::Message* prototype) = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Create a protobuf table reader from a stream.
+///
+/// @tparam T Protobuf message type to read (must be inherited from `Message`).
+///
+/// @param stream Input stream in YT protobuf format.
+template <typename T>
+TTableReaderPtr<T> CreateTableReader(
+ IInputStream* stream,
+ const TTableReaderOptions& options = {});
+
+///
+/// @brief Create a protobuf multi table reader from a stream.
+///
+/// @tparam Ts Protobuf message types to read (must be inherited from `Message`).
+///
+/// @param stream Input stream in YT protobuf format.
+template <class... Ts>
+TTableReaderPtr<typename NDetail::TProtoOneOfUnique<Ts...>::TType> CreateProtoMultiTableReader(
+ IInputStream* stream,
+ const TTableReaderOptions& options = {});
+
+///
+/// @brief Create a homogenous protobuf multi table reader from a stream.
+///
+/// @tparam T Protobuf message type to read (must be inherited from `Message`).
+///
+/// @param stream Input stream in YT protobuf format.
+/// @param tableCount Number of tables in input stream.
+template <class T>
+TTableReaderPtr<T> CreateProtoMultiTableReader(
+ IInputStream* stream,
+ int tableCount,
+ const TTableReaderOptions& options = {});
+
+/// Create a @ref NYT::TNode table reader from a stream.
+template <>
+TTableReaderPtr<TNode> CreateTableReader<TNode>(
+ IInputStream* stream, const TTableReaderOptions& options);
+
+/// Create a @ref NYT::TYaMRRow table reader from a stream.
+template <>
+TTableReaderPtr<TYaMRRow> CreateTableReader<TYaMRRow>(
+ IInputStream* stream, const TTableReaderOptions& options);
+
+namespace NDetail {
+
+/// Create a protobuf table reader from a stream.
+::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader(
+ IInputStream* stream,
+ const TTableReaderOptions& options,
+ const ::google::protobuf::Descriptor* descriptor);
+
+
+/// Create a protobuf table reader from a stream that can contain table switches.
+::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader(
+ IInputStream* stream,
+ const TTableReaderOptions& options,
+ TVector<const ::google::protobuf::Descriptor*> descriptors);
+
+} // namespace NDetail
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Convert generic protobuf table reader to a concrete one (for certain type `T`).
+template <typename T>
+TTableReaderPtr<T> CreateConcreteProtobufReader(TTableReader<Message>* reader);
+
+/// Convert generic protobuf table reader to a concrete one (for certain type `T`).
+template <typename T>
+TTableReaderPtr<T> CreateConcreteProtobufReader(const TTableReaderPtr<Message>& reader);
+
+/// Convert a concrete (for certain type `T`) protobuf table reader to a generic one.
+template <typename T>
+TTableReaderPtr<Message> CreateGenericProtobufReader(TTableReader<T>* reader);
+
+/// Convert a concrete (for certain type `T`) protobuf table reader to a generic one.
+template <typename T>
+TTableReaderPtr<Message> CreateGenericProtobufReader(const TTableReaderPtr<T>& reader);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
+
+#define IO_INL_H_
+#include "io-inl.h"
+#undef IO_INL_H_
diff --git a/yt/cpp/mapreduce/interface/job_counters.cpp b/yt/cpp/mapreduce/interface/job_counters.cpp
new file mode 100644
index 0000000000..6d4a2a6fcb
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/job_counters.cpp
@@ -0,0 +1,164 @@
+#include "job_counters.h"
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////
+
+namespace {
+ ui64 CountTotal(const TNode& data)
+ {
+ if (data.IsMap()) {
+ if (auto totalPtr = data.AsMap().FindPtr("total")) {
+ return data["total"].IntCast<ui64>();
+ } else {
+ ui64 total = 0;
+ for (const auto& keyVal: data.AsMap()) {
+ total += CountTotal(keyVal.second);
+ }
+ return total;
+ }
+ } else {
+ return data.IntCast<ui64>();
+ }
+ }
+
+ TNode GetNode(const TNode& data, const TStringBuf& key)
+ {
+ if (auto resPtr = data.AsMap().FindPtr(key)) {
+ return *resPtr;
+ }
+ return TNode();
+ }
+} // namespace
+
+////////////////////////////////////////////////////////////////////
+
+TJobCounter::TJobCounter(TNode data)
+ : Data_(std::move(data))
+{
+ if (Data_.HasValue()) {
+ Total_ = CountTotal(Data_);
+ }
+}
+
+TJobCounter::TJobCounter(ui64 total)
+ : Total_(total)
+{ }
+
+ui64 TJobCounter::GetTotal() const
+{
+ return Total_;
+}
+
+ui64 TJobCounter::GetValue(const TStringBuf key) const
+{
+ if (Data_.HasValue()) {
+ return CountTotal(Data_[key]);
+ }
+ return 0;
+}
+
+////////////////////////////////////////////////////////////////////
+
+TJobCounters::TJobCounters(const NYT::TNode& counters)
+ : Total_(0)
+{
+ if (!counters.IsMap()) {
+ ythrow yexception() << "TJobCounters must be initialized with Map type TNode";
+ }
+ auto abortedNode = GetNode(counters, "aborted");
+ if (abortedNode.HasValue()) {
+ Aborted_ = TJobCounter(GetNode(abortedNode, "total"));
+ AbortedScheduled_ = TJobCounter(GetNode(abortedNode, "scheduled"));
+ AbortedNonScheduled_ = TJobCounter(GetNode(abortedNode, "non_scheduled"));
+ }
+ auto completedNode = GetNode(counters, "completed");
+ if (completedNode.HasValue()) {
+ Completed_ = TJobCounter(GetNode(completedNode, "total"));
+ CompletedNonInterrupted_ = TJobCounter(GetNode(completedNode, "non-interrupted"));
+ CompletedInterrupted_ = TJobCounter(GetNode(completedNode, "interrupted"));
+ }
+ Lost_ = TJobCounter(GetNode(counters, "lost"));
+ Invalidated_ = TJobCounter(GetNode(counters, "invalidated"));
+ Failed_ = TJobCounter(GetNode(counters, "failed"));
+ Running_ = TJobCounter(GetNode(counters, "running"));
+ Suspended_ = TJobCounter(GetNode(counters, "suspended"));
+ Pending_ = TJobCounter(GetNode(counters, "pending"));
+ Blocked_ = TJobCounter(GetNode(counters, "blocked"));
+ Total_ = CountTotal(counters);
+}
+
+
+const TJobCounter& TJobCounters::GetAborted() const
+{
+ return Aborted_;
+}
+
+const TJobCounter& TJobCounters::GetAbortedScheduled() const
+{
+ return AbortedScheduled_;
+}
+
+const TJobCounter& TJobCounters::GetAbortedNonScheduled() const
+{
+ return AbortedNonScheduled_;
+}
+
+const TJobCounter& TJobCounters::GetCompleted() const
+{
+ return Completed_;
+}
+
+const TJobCounter& TJobCounters::GetCompletedNonInterrupted() const
+{
+ return CompletedNonInterrupted_;
+}
+
+const TJobCounter& TJobCounters::GetCompletedInterrupted() const
+{
+ return CompletedInterrupted_;
+}
+
+const TJobCounter& TJobCounters::GetLost() const
+{
+ return Lost_;
+}
+
+const TJobCounter& TJobCounters::GetInvalidated() const
+{
+ return Invalidated_;
+}
+
+const TJobCounter& TJobCounters::GetFailed() const
+{
+ return Failed_;
+}
+
+const TJobCounter& TJobCounters::GetRunning() const
+{
+ return Running_;
+}
+
+const TJobCounter& TJobCounters::GetSuspended() const
+{
+ return Suspended_;
+}
+
+const TJobCounter& TJobCounters::GetPending() const
+{
+ return Pending_;
+}
+
+const TJobCounter& TJobCounters::GetBlocked() const
+{
+ return Blocked_;
+}
+
+ui64 TJobCounters::GetTotal() const
+{
+ return Total_;
+}
+
+////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/job_counters.h b/yt/cpp/mapreduce/interface/job_counters.h
new file mode 100644
index 0000000000..9257cc1ec1
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/job_counters.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include "fwd.h"
+
+#include <yt/cpp/mapreduce/interface/node.h>
+
+namespace NYT {
+
+class TJobCounter
+{
+private:
+ TNode Data_;
+ ui64 Total_ = 0;
+
+public:
+ TJobCounter() = default;
+
+ TJobCounter(TNode data);
+ TJobCounter(ui64 total);
+
+ ui64 GetTotal() const;
+
+ ui64 GetValue(const TStringBuf key) const;
+};
+
+/// Class representing a collection of job counters.
+class TJobCounters
+{
+public:
+ ///
+ /// Construct empty counter.
+ TJobCounters() = default;
+
+ ///
+ /// Construct counter from counters node.
+ TJobCounters(const NYT::TNode& counters);
+
+ const TJobCounter& GetAborted() const;
+ const TJobCounter& GetAbortedScheduled() const;
+ const TJobCounter& GetAbortedNonScheduled() const;
+ const TJobCounter& GetCompleted() const;
+ const TJobCounter& GetCompletedNonInterrupted() const;
+ const TJobCounter& GetCompletedInterrupted() const;
+ const TJobCounter& GetLost() const;
+ const TJobCounter& GetInvalidated() const;
+ const TJobCounter& GetFailed() const;
+ const TJobCounter& GetRunning() const;
+ const TJobCounter& GetSuspended() const;
+ const TJobCounter& GetPending() const;
+ const TJobCounter& GetBlocked() const;
+
+ ui64 GetTotal() const;
+
+private:
+ ui64 Total_ = 0;
+
+ TJobCounter Aborted_;
+ TJobCounter AbortedScheduled_;
+ TJobCounter AbortedNonScheduled_;
+ TJobCounter Completed_;
+ TJobCounter CompletedNonInterrupted_;
+ TJobCounter CompletedInterrupted_;
+ TJobCounter Lost_;
+ TJobCounter Invalidated_;
+ TJobCounter Failed_;
+ TJobCounter Running_;
+ TJobCounter Suspended_;
+ TJobCounter Pending_;
+ TJobCounter Blocked_;
+};
+
+////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/job_counters_ut.cpp b/yt/cpp/mapreduce/interface/job_counters_ut.cpp
new file mode 100644
index 0000000000..56d3932b8f
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/job_counters_ut.cpp
@@ -0,0 +1,103 @@
+#include <yt/cpp/mapreduce/interface/job_counters.h>
+#include <yt/cpp/mapreduce/interface/operation.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NYT;
+
+Y_UNIT_TEST_SUITE(JobCounters)
+{
+ Y_UNIT_TEST(Full)
+ {
+ const TString input = R"""(
+ {
+ "completed" = {
+ "total" = 6;
+ "non-interrupted" = 1;
+ "interrupted" = {
+ "whatever_interrupted" = 2;
+ "whatever_else_interrupted" = 3;
+ };
+ };
+ "aborted" = {
+ "non_scheduled" = {
+ "whatever_non_scheduled" = 4;
+ "whatever_else_non_scheduled" = 5;
+ };
+ "scheduled" = {
+ "whatever_scheduled" = 6;
+ "whatever_else_scheduled" = 7;
+ };
+ "total" = 22;
+ };
+ "lost" = 8;
+ "invalidated" = 9;
+ "failed" = 10;
+ "running" = 11;
+ "suspended" = 12;
+ "pending" = 13;
+ "blocked" = 14;
+ "total" = 105;
+ })""";
+
+ TJobCounters counters(NodeFromYsonString(input));
+
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetTotal(), 105);
+
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetCompleted().GetTotal(), 6);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedNonInterrupted().GetTotal(), 1);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedInterrupted().GetTotal(), 5);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetAborted().GetTotal(), 22);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedNonScheduled().GetTotal(), 9);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedScheduled().GetTotal(), 13);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetLost().GetTotal(), 8);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetInvalidated().GetTotal(), 9);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetFailed().GetTotal(), 10);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetRunning().GetTotal(), 11);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetSuspended().GetTotal(), 12);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetPending().GetTotal(), 13);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetBlocked().GetTotal(), 14);
+
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedInterrupted().GetValue("whatever_interrupted"), 2);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedInterrupted().GetValue("whatever_else_interrupted"), 3);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedNonScheduled().GetValue("whatever_non_scheduled"), 4);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedNonScheduled().GetValue("whatever_else_non_scheduled"), 5);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedScheduled().GetValue("whatever_scheduled"), 6);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedScheduled().GetValue("whatever_else_scheduled"), 7);
+
+ UNIT_ASSERT_EXCEPTION(counters.GetCompletedInterrupted().GetValue("Nothingness"), yexception);
+ }
+
+ Y_UNIT_TEST(Empty)
+ {
+ const TString input = "{}";
+
+ TJobCounters counters(NodeFromYsonString(input));
+
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetTotal(), 0);
+
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetCompleted().GetTotal(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedNonInterrupted().GetTotal(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedInterrupted().GetTotal(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetAborted().GetTotal(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedNonScheduled().GetTotal(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedScheduled().GetTotal(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetLost().GetTotal(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetInvalidated().GetTotal(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetFailed().GetTotal(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetRunning().GetTotal(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetSuspended().GetTotal(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetPending().GetTotal(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(counters.GetBlocked().GetTotal(), 0);
+ }
+
+ Y_UNIT_TEST(Broken)
+ {
+ UNIT_ASSERT_EXCEPTION_CONTAINS(TJobCounters(TNode()), yexception, "TJobCounters");
+ UNIT_ASSERT_EXCEPTION_CONTAINS(TJobCounters(TNode(1)), yexception, "TJobCounters");
+ UNIT_ASSERT_EXCEPTION_CONTAINS(TJobCounters(TNode(1.0)), yexception, "TJobCounters");
+ UNIT_ASSERT_EXCEPTION_CONTAINS(TJobCounters(TNode("Whatever")), yexception, "TJobCounters");
+ }
+}
diff --git a/yt/cpp/mapreduce/interface/job_statistics.cpp b/yt/cpp/mapreduce/interface/job_statistics.cpp
new file mode 100644
index 0000000000..bd9791672d
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/job_statistics.cpp
@@ -0,0 +1,361 @@
+#include "job_statistics.h"
+
+#include "operation.h"
+
+#include <library/cpp/yson/node/node.h>
+#include <library/cpp/yson/node/serialize.h>
+
+#include <library/cpp/yson/writer.h>
+
+#include <util/datetime/base.h>
+#include <util/generic/hash_set.h>
+#include <util/generic/ptr.h>
+#include <util/stream/file.h>
+#include <util/string/cast.h>
+#include <util/string/subst.h>
+#include <util/system/file.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////
+
+template <>
+i64 ConvertJobStatisticsEntry(i64 value)
+{
+ return value;
+}
+
+template <>
+TDuration ConvertJobStatisticsEntry(i64 value)
+{
+ return TDuration::MilliSeconds(value);
+}
+
+////////////////////////////////////////////////////////////////////
+
+static TTaskName JobTypeToTaskName(EJobType jobType)
+{
+ switch (jobType) {
+ case EJobType::PartitionMap:
+ return ETaskName::PartitionMap0;
+ case EJobType::Partition:
+ return ETaskName::Partition0;
+ default:
+ return ToString(jobType);
+ }
+}
+
+static TTaskName FixTaskName(TString taskName)
+{
+ if (taskName == "partition") {
+ return ETaskName::Partition0;
+ } else if (taskName == "partition_map") {
+ return ETaskName::PartitionMap0;
+ }
+ return taskName;
+}
+
+////////////////////////////////////////////////////////////////////
+
+class TJobStatistics::TData
+ : public TThrRefBase
+{
+public:
+ using TTaskName2Data = THashMap<TString, TJobStatistics::TDataEntry>;
+ using TState2TaskName2Data = THashMap<EJobState, TTaskName2Data>;
+ using TName2State2TaskName2Data = THashMap<TString, TState2TaskName2Data>;
+
+public:
+ TName2State2TaskName2Data Name2State2TaskName2Data;
+
+public:
+ TData() = default;
+
+ TData(const TNode& statisticsNode)
+ {
+ ParseNode(statisticsNode, TString(), &Name2State2TaskName2Data);
+ }
+
+ static void Aggregate(TJobStatistics::TDataEntry* result, const TJobStatistics::TDataEntry& other)
+ {
+ result->Max = Max(result->Max, other.Max);
+ result->Min = Min(result->Min, other.Min);
+ result->Sum += other.Sum;
+ result->Count += other.Count;
+ }
+
+ static void ParseNode(const TNode& node, TState2TaskName2Data* output)
+ {
+ auto getInt = [] (const TNode& theNode, TStringBuf key) {
+ const auto& nodeAsMap = theNode.AsMap();
+ auto it = nodeAsMap.find(key);
+ if (it == nodeAsMap.end()) {
+ ythrow yexception() << "Key '" << key << "' is not found";
+ }
+ const auto& valueNode = it->second;
+ if (!valueNode.IsInt64()) {
+ ythrow yexception() << "Key '" << key << "' is not of int64 type";
+ }
+ return valueNode.AsInt64();
+ };
+
+ for (const auto& [stateStr, taskName2DataNode] : node.AsMap()) {
+ EJobState state;
+ if (!TryFromString(stateStr, state)) {
+ continue;
+ }
+ for (const auto& [taskName, dataNode] : taskName2DataNode.AsMap()) {
+ auto fixedTaskName = FixTaskName(taskName);
+ auto& data = (*output)[state][fixedTaskName.Get()];
+ data.Max = getInt(dataNode, "max");
+ data.Min = getInt(dataNode, "min");
+ data.Sum = getInt(dataNode, "sum");
+ data.Count = getInt(dataNode, "count");
+ }
+ }
+ }
+
+ static void ParseNode(const TNode& node, const TString& curPath, TName2State2TaskName2Data* output)
+ {
+ Y_VERIFY(node.IsMap());
+
+ for (const auto& [key, value] : node.AsMap()) {
+ if (key == "$"sv) {
+ ParseNode(value, &(*output)[curPath]);
+ } else {
+ TString childPath = curPath;
+ if (!childPath.empty()) {
+ childPath.push_back('/');
+ }
+ if (key.find_first_of('/') != key.npos) {
+ TString keyCopy(key);
+ SubstGlobal(keyCopy, "/", "\\/");
+ childPath += keyCopy;
+ } else {
+ childPath += key;
+ }
+ ParseNode(value, childPath, output);
+ }
+ }
+ }
+};
+
+////////////////////////////////////////////////////////////////////
+
+struct TJobStatistics::TFilter
+ : public TThrRefBase
+{
+ TVector<TTaskName> TaskNameFilter;
+ TVector<EJobState> JobStateFilter = {EJobState::Completed};
+};
+
+////////////////////////////////////////////////////////////////////
+
+const TString TJobStatistics::CustomStatisticsNamePrefix_ = "custom/";
+
+TJobStatistics::TJobStatistics()
+ : Data_(::MakeIntrusive<TData>())
+ , Filter_(::MakeIntrusive<TFilter>())
+{ }
+
+
+TJobStatistics::TJobStatistics(const NYT::TNode& statisticsNode)
+ : Data_(::MakeIntrusive<TData>(statisticsNode))
+ , Filter_(::MakeIntrusive<TFilter>())
+{ }
+
+TJobStatistics::TJobStatistics(::TIntrusivePtr<TData> data, ::TIntrusivePtr<TFilter> filter)
+ : Data_(data)
+ , Filter_(::MakeIntrusive<TFilter>(*filter))
+{ }
+
+TJobStatistics::TJobStatistics(const TJobStatistics& jobStatistics) = default;
+TJobStatistics::TJobStatistics(TJobStatistics&&) = default;
+
+TJobStatistics& TJobStatistics::operator=(const TJobStatistics& jobStatistics) = default;
+TJobStatistics& TJobStatistics::operator=(TJobStatistics&& jobStatistics) = default;
+
+TJobStatistics::~TJobStatistics() = default;
+
+TJobStatistics TJobStatistics::TaskName(TVector<TTaskName> taskNames) const
+{
+ auto newFilter = ::MakeIntrusive<TFilter>(*Filter_);
+ newFilter->TaskNameFilter = std::move(taskNames);
+ return TJobStatistics(Data_, std::move(newFilter));
+}
+
+TJobStatistics TJobStatistics::JobState(TVector<EJobState> jobStates) const
+{
+ auto newFilter = ::MakeIntrusive<TFilter>(*Filter_);
+ newFilter->JobStateFilter = std::move(jobStates);
+ return TJobStatistics(Data_, std::move(newFilter));
+}
+
+TJobStatistics TJobStatistics::JobType(TVector<EJobType> jobTypes) const
+{
+ TVector<TTaskName> taskNames;
+ for (auto jobType : jobTypes) {
+ taskNames.push_back(JobTypeToTaskName(jobType));
+ }
+ return TaskName(std::move(taskNames));
+}
+
+bool TJobStatistics::HasStatistics(TStringBuf name) const
+{
+ return Data_->Name2State2TaskName2Data.contains(name);
+}
+
+TJobStatisticsEntry<i64> TJobStatistics::GetStatistics(TStringBuf name) const
+{
+ return GetStatisticsAs<i64>(name);
+}
+
+TVector<TString> TJobStatistics::GetStatisticsNames() const
+{
+ TVector<TString> result;
+ result.reserve(Data_->Name2State2TaskName2Data.size());
+ for (const auto& entry : Data_->Name2State2TaskName2Data) {
+ result.push_back(entry.first);
+ }
+ return result;
+}
+
+bool TJobStatistics::HasCustomStatistics(TStringBuf name) const
+{
+ return HasStatistics(CustomStatisticsNamePrefix_ + name);
+}
+
+TJobStatisticsEntry<i64> TJobStatistics::GetCustomStatistics(TStringBuf name) const
+{
+ return GetCustomStatisticsAs<i64>(name);
+}
+
+TVector<TString> TJobStatistics::GetCustomStatisticsNames() const
+{
+ TVector<TString> result;
+ for (const auto& entry : Data_->Name2State2TaskName2Data) {
+ if (entry.first.StartsWith(CustomStatisticsNamePrefix_)) {
+ result.push_back(entry.first.substr(CustomStatisticsNamePrefix_.size()));
+ }
+ }
+ return result;
+}
+
+TMaybe<TJobStatistics::TDataEntry> TJobStatistics::GetStatisticsImpl(TStringBuf name) const
+{
+ auto name2State2TaskName2DataIt = Data_->Name2State2TaskName2Data.find(name);
+ Y_ENSURE(
+ name2State2TaskName2DataIt != Data_->Name2State2TaskName2Data.end(),
+ "Statistics '" << name << "' are missing");
+ const auto& state2TaskName2Data = name2State2TaskName2DataIt->second;
+
+ TMaybe<TDataEntry> result;
+ auto aggregate = [&] (const TDataEntry& data) {
+ if (result) {
+ TData::Aggregate(&result.GetRef(), data);
+ } else {
+ result = data;
+ }
+ };
+
+ auto aggregateTaskName2Data = [&] (const TData::TTaskName2Data& taskName2Data) {
+ if (Filter_->TaskNameFilter.empty()) {
+ for (const auto& [taskName, data] : taskName2Data) {
+ aggregate(data);
+ }
+ } else {
+ for (const auto& taskName : Filter_->TaskNameFilter) {
+ auto it = taskName2Data.find(taskName.Get());
+ if (it == taskName2Data.end()) {
+ continue;
+ }
+ const auto& data = it->second;
+ aggregate(data);
+ }
+ }
+ };
+
+ if (Filter_->JobStateFilter.empty()) {
+ for (const auto& [state, taskName2Data] : state2TaskName2Data) {
+ aggregateTaskName2Data(taskName2Data);
+ }
+ } else {
+ for (auto state : Filter_->JobStateFilter) {
+ auto it = state2TaskName2Data.find(state);
+ if (it == state2TaskName2Data.end()) {
+ continue;
+ }
+ const auto& taskName2Data = it->second;
+ aggregateTaskName2Data(taskName2Data);
+ }
+ }
+
+ return result;
+}
+
+////////////////////////////////////////////////////////////////////
+
+namespace {
+
+constexpr int USER_STATISTICS_FILE_DESCRIPTOR = 5;
+constexpr char PATH_DELIMITER = '/';
+constexpr char ESCAPE = '\\';
+
+IOutputStream* GetStatisticsStream()
+{
+ static TFile file = Duplicate(USER_STATISTICS_FILE_DESCRIPTOR);
+ static TFileOutput stream(file);
+ return &stream;
+}
+
+template <typename T>
+void WriteCustomStatisticsAny(TStringBuf path, const T& value)
+{
+ ::NYson::TYsonWriter writer(GetStatisticsStream(), NYson::EYsonFormat::Binary, ::NYson::EYsonType::ListFragment);
+ int depth = 0;
+ size_t begin = 0;
+ size_t end = 0;
+ TVector<TString> items;
+ while (end <= path.size()) {
+ if (end + 1 < path.size() && path[end] == ESCAPE && path[end + 1] == PATH_DELIMITER) {
+ end += 2;
+ continue;
+ }
+ if (end == path.size() || path[end] == PATH_DELIMITER) {
+ writer.OnBeginMap();
+ items.emplace_back(path.data() + begin, end - begin);
+ SubstGlobal(items.back(), "\\/", "/");
+ writer.OnKeyedItem(TStringBuf(items.back()));
+ ++depth;
+ begin = end + 1;
+ }
+ ++end;
+ }
+ Serialize(value, &writer);
+ while (depth > 0) {
+ writer.OnEndMap();
+ --depth;
+ }
+}
+
+}
+
+////////////////////////////////////////////////////////////////////
+
+void WriteCustomStatistics(const TNode& statistics)
+{
+ ::NYson::TYsonWriter writer(GetStatisticsStream(), NYson::EYsonFormat::Binary, ::NYson::EYsonType::ListFragment);
+ Serialize(statistics, &writer);
+}
+
+void WriteCustomStatistics(TStringBuf path, i64 value)
+{
+ WriteCustomStatisticsAny(path, value);
+}
+
+void FlushCustomStatisticsStream() {
+ GetStatisticsStream()->Flush();
+}
+////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/job_statistics.h b/yt/cpp/mapreduce/interface/job_statistics.h
new file mode 100644
index 0000000000..8af751604f
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/job_statistics.h
@@ -0,0 +1,268 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/job_statistics.h
+///
+/// Header containing classes and utility functions to work with
+/// [job statistics](https://docs.yandex-team.ru/yt/problems/jobstatistics).
+
+#include "fwd.h"
+
+#include <library/cpp/yson/node/node.h>
+
+#include <util/system/defaults.h>
+#include <util/generic/maybe.h>
+#include <util/generic/ptr.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Convert i64 representation of statistics to other type.
+///
+/// Library defines this template for types TDuration and i64.
+/// Users may define it for their types.
+///
+/// @see @ref NYT::TJobStatistics::GetStatisticsAs method.
+template <typename T>
+T ConvertJobStatisticsEntry(i64 value);
+
+////////////////////////////////////////////////////////////////////
+
+/// Class representing a collection of job statistics.
+class TJobStatistics
+{
+public:
+ ///
+ /// Construct empty statistics.
+ TJobStatistics();
+
+ ///
+ /// Construct statistics from statistics node.
+ TJobStatistics(const NYT::TNode& statistics);
+
+ TJobStatistics(const TJobStatistics& jobStatistics);
+ TJobStatistics(TJobStatistics&& jobStatistics);
+
+ TJobStatistics& operator=(const TJobStatistics& jobStatistics);
+ TJobStatistics& operator=(TJobStatistics&& jobStatistics);
+
+ ~TJobStatistics();
+
+ ///
+ /// @brief Filter statistics by task name.
+ ///
+ /// @param taskNames What task names to include (empty means all).
+ TJobStatistics TaskName(TVector<TTaskName> taskNames) const;
+
+ ///
+ /// @brief Filter statistics by job state.
+ ///
+ /// @param filter What job states to include (empty means all).
+ ///
+ /// @note Default statistics include only (successfully) completed jobs.
+ TJobStatistics JobState(TVector<EJobState> filter) const;
+
+ ///
+ /// @brief Filter statistics by job type.
+ ///
+ /// @param filter What job types to include (empty means all).
+ ///
+ /// @deprecated Use @ref TJobStatistics::TaskName instead.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/jobs#obshaya-shema
+ TJobStatistics JobType(TVector<EJobType> filter) const;
+
+ ///
+ /// @brief Check that given statistics exist.
+ ///
+ /// @param name Slash separated statistics name, e.g. "time/total" (like it appears in web interface).
+ bool HasStatistics(TStringBuf name) const;
+
+ ///
+ /// @brief Get statistics by name.
+ ///
+ /// @param name Slash separated statistics name, e.g. "time/total" (like it appears in web interface).
+ ///
+ /// @note If statistics is missing an exception is thrown. If because of filters
+ /// no fields remain the returned value is empty (all fields are `Nothing`).
+ ///
+ /// @note We don't use `TMaybe<TJobStatisticsEntry>` here;
+ /// instead, @ref NYT::TJobStatisticsEntry methods return `TMaybe<i64>`,
+ /// so user easier use `.GetOrElse`:
+ /// ```
+ /// jobStatistics.GetStatistics("some/statistics/name").Max().GetOrElse(0);
+ /// ```
+ TJobStatisticsEntry<i64> GetStatistics(TStringBuf name) const;
+
+ ///
+ /// @brief Get statistics by name.
+ ///
+ /// @param name Slash separated statistics name, e.g. "time/total" (like it appears in web interface).
+ ///
+ /// @note In order to use `GetStatisticsAs` method, @ref NYT::ConvertJobStatisticsEntry function must be defined
+ /// (the library defines it for `i64` and `TDuration`, user may define it for other types).
+ template <typename T>
+ TJobStatisticsEntry<T> GetStatisticsAs(TStringBuf name) const;
+
+ ///
+ /// Get (slash separated) names of statistics.
+ TVector<TString> GetStatisticsNames() const;
+
+ ///
+ /// @brief Check if given custom statistics exists.
+ ///
+ /// @param name Slash separated custom statistics name.
+ bool HasCustomStatistics(TStringBuf name) const;
+
+ ///
+ /// @brief Get custom statistics (those the user can write in job with @ref NYT::WriteCustomStatistics).
+ ///
+ /// @param name Slash separated custom statistics name.
+ TJobStatisticsEntry<i64> GetCustomStatistics(TStringBuf name) const;
+
+ ///
+ /// @brief Get custom statistics (those the user can write in job with @ref NYT::WriteCustomStatistics).
+ ///
+ /// @param name Slash separated custom statistics name.
+ template <typename T>
+ TJobStatisticsEntry<T> GetCustomStatisticsAs(TStringBuf name) const;
+
+ ///
+ /// Get names of all custom statistics.
+ TVector<TString> GetCustomStatisticsNames() const;
+
+private:
+ class TData;
+ struct TFilter;
+
+ struct TDataEntry {
+ i64 Max;
+ i64 Min;
+ i64 Sum;
+ i64 Count;
+ };
+
+ static const TString CustomStatisticsNamePrefix_;
+
+private:
+ TJobStatistics(::TIntrusivePtr<TData> data, ::TIntrusivePtr<TFilter> filter);
+
+ TMaybe<TDataEntry> GetStatisticsImpl(TStringBuf name) const;
+
+private:
+ ::TIntrusivePtr<TData> Data_;
+ ::TIntrusivePtr<TFilter> Filter_;
+
+private:
+ template<typename T>
+ friend class TJobStatisticsEntry;
+};
+
+////////////////////////////////////////////////////////////////////
+
+/// Class representing single statistic.
+template <typename T>
+class TJobStatisticsEntry
+{
+public:
+ TJobStatisticsEntry(TMaybe<TJobStatistics::TDataEntry> data)
+ : Data_(std::move(data))
+ { }
+
+ /// Sum of the statistic over all jobs.
+ TMaybe<T> Sum() const
+ {
+ if (Data_) {
+ return ConvertJobStatisticsEntry<T>(Data_->Sum);
+ }
+ return Nothing();
+ }
+
+ /// @brief Average of the statistic over all jobs.
+ ///
+ /// @note Only jobs that emitted statistics are taken into account.
+ TMaybe<T> Avg() const
+ {
+ if (Data_ && Data_->Count) {
+ return ConvertJobStatisticsEntry<T>(Data_->Sum / Data_->Count);
+ }
+ return Nothing();
+ }
+
+ /// @brief Number of jobs that emitted this statistic.
+ TMaybe<T> Count() const
+ {
+ if (Data_) {
+ return ConvertJobStatisticsEntry<T>(Data_->Count);
+ }
+ return Nothing();
+ }
+
+ /// @brief Maximum value of the statistic over all jobs.
+ TMaybe<T> Max() const
+ {
+ if (Data_) {
+ return ConvertJobStatisticsEntry<T>(Data_->Max);
+ }
+ return Nothing();
+ }
+
+ /// @brief Minimum value of the statistic over all jobs.
+ TMaybe<T> Min() const
+ {
+ if (Data_) {
+ return ConvertJobStatisticsEntry<T>(Data_->Min);
+ }
+ return Nothing();
+ }
+
+private:
+ TMaybe<TJobStatistics::TDataEntry> Data_;
+
+private:
+ friend class TJobStatistics;
+};
+
+////////////////////////////////////////////////////////////////////
+
+template <typename T>
+TJobStatisticsEntry<T> TJobStatistics::GetStatisticsAs(TStringBuf name) const
+{
+ return TJobStatisticsEntry<T>(GetStatisticsImpl(name));
+}
+
+template <typename T>
+TJobStatisticsEntry<T> TJobStatistics::GetCustomStatisticsAs(TStringBuf name) const
+{
+ return TJobStatisticsEntry<T>(GetStatisticsImpl(CustomStatisticsNamePrefix_ + name));
+}
+
+////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Write [custom statistics](https://yt.yandex-team.ru/docs/description/mr/jobs#user_stats).
+///
+/// @param path Slash-separated path (length must not exceed 512 bytes).
+/// @param value Value of the statistic.
+///
+/// @note The function must be called in job.
+/// Total number of statistics (with different paths) must not exceed 128.
+void WriteCustomStatistics(TStringBuf path, i64 value);
+
+///
+/// @brief Write several [custom statistics](https://yt.yandex-team.ru/docs/description/mr/jobs#user_stats) at once.
+///
+/// @param statistics A tree of map nodes with leaves of type `i64`.
+///
+/// @note The call is equivalent to calling @ref NYT::WriteCustomStatistics(TStringBuf, i64) for every path in the given map.
+void WriteCustomStatistics(const TNode& statistics);
+
+///
+/// @brief Flush [custom statistics stream](https://yt.yandex-team.ru/docs/description/mr/jobs#user_stats)
+///
+void FlushCustomStatisticsStream();
+////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/job_statistics_ut.cpp b/yt/cpp/mapreduce/interface/job_statistics_ut.cpp
new file mode 100644
index 0000000000..0cf53d771a
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/job_statistics_ut.cpp
@@ -0,0 +1,257 @@
+#include <yt/cpp/mapreduce/interface/job_statistics.h>
+#include <yt/cpp/mapreduce/interface/operation.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NYT;
+
+Y_UNIT_TEST_SUITE(JobStatistics)
+{
+ Y_UNIT_TEST(Simple)
+ {
+ const TString input = R"""(
+ {
+ "data" = {
+ "output" = {
+ "0" = {
+ "uncompressed_data_size" = {
+ "$" = {
+ "completed" = {
+ "simple_sort" = {
+ "max" = 130;
+ "count" = 1;
+ "min" = 130;
+ "sum" = 130;
+ };
+ "map" = {
+ "max" = 42;
+ "count" = 1;
+ "min" = 42;
+ "sum" = 42;
+ };
+ };
+ "aborted" = {
+ "simple_sort" = {
+ "max" = 24;
+ "count" = 1;
+ "min" = 24;
+ "sum" = 24;
+ };
+ };
+ };
+ };
+ };
+ };
+ };
+ })""";
+
+ TJobStatistics stat(NodeFromYsonString(input));
+
+ UNIT_ASSERT(stat.HasStatistics("data/output/0/uncompressed_data_size"));
+ UNIT_ASSERT(!stat.HasStatistics("nonexistent-statistics"));
+ UNIT_ASSERT_EXCEPTION_CONTAINS(stat.GetStatistics("BLAH-BLAH"), yexception, "Statistics");
+
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetStatisticsNames(), TVector<TString>{"data/output/0/uncompressed_data_size"});
+
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Max(), 130);
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Count(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Min(), 42);
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Sum(), 172);
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Avg(), 172 / 2);
+
+ UNIT_ASSERT_VALUES_EQUAL(stat.JobState({EJobState::Aborted}).GetStatistics("data/output/0/uncompressed_data_size").Sum(), 24);
+ UNIT_ASSERT_VALUES_EQUAL(stat.JobType({EJobType::Map}).JobState({EJobState::Aborted}).GetStatistics("data/output/0/uncompressed_data_size").Sum(), TMaybe<i64>());
+ }
+
+ Y_UNIT_TEST(TestOtherTypes)
+ {
+ const TString input = R"""(
+ {
+ "time" = {
+ "exec" = {
+ "$" = {
+ "completed" = {
+ "map" = {
+ "max" = 2482468;
+ "count" = 38;
+ "min" = 578976;
+ "sum" = 47987270;
+ };
+ };
+ };
+ };
+ };
+ })""";
+
+ TJobStatistics stat(NodeFromYsonString(input));
+
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetStatisticsAs<TDuration>("time/exec").Max(), TDuration::MilliSeconds(2482468));
+ }
+
+ Y_UNIT_TEST(Custom)
+ {
+ const TString input = R"""(
+ {
+ "custom" = {
+ "some" = {
+ "path" = {
+ "$" = {
+ "completed" = {
+ "map" = {
+ "max" = -1;
+ "count" = 1;
+ "min" = -1;
+ "sum" = -1;
+ };
+ };
+ };
+ };
+ };
+ "another" = {
+ "path" = {
+ "$" = {
+ "completed" = {
+ "map" = {
+ "max" = 1001;
+ "count" = 2;
+ "min" = 1001;
+ "sum" = 2002;
+ };
+ };
+ };
+ };
+ };
+ };
+ })""";
+
+ TJobStatistics stat(NodeFromYsonString(input));
+
+ UNIT_ASSERT(stat.HasCustomStatistics("some/path"));
+ UNIT_ASSERT(!stat.HasCustomStatistics("nonexistent-statistics"));
+ UNIT_ASSERT_EXCEPTION_CONTAINS(stat.GetCustomStatistics("BLAH-BLAH"), yexception, "Statistics");
+
+ const auto names = stat.GetCustomStatisticsNames();
+ const THashSet<TString> expected = {"some/path", "another/path"};
+ UNIT_ASSERT_VALUES_EQUAL(THashSet<TString>(names.begin(), names.end()), expected);
+
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetCustomStatistics("some/path").Max(), -1);
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetCustomStatistics("another/path").Avg(), 1001);
+ }
+
+ Y_UNIT_TEST(TaskNames)
+ {
+ const TString input = R"""(
+ {
+ "data" = {
+ "output" = {
+ "0" = {
+ "uncompressed_data_size" = {
+ "$" = {
+ "completed" = {
+ "partition_map" = {
+ "max" = 130;
+ "count" = 1;
+ "min" = 130;
+ "sum" = 130;
+ };
+ "partition(0)" = {
+ "max" = 42;
+ "count" = 1;
+ "min" = 42;
+ "sum" = 42;
+ };
+ };
+ "aborted" = {
+ "simple_sort" = {
+ "max" = 24;
+ "count" = 1;
+ "min" = 24;
+ "sum" = 24;
+ };
+ };
+ };
+ };
+ };
+ };
+ };
+ })""";
+
+ TJobStatistics stat(NodeFromYsonString(input));
+
+ UNIT_ASSERT(stat.HasStatistics("data/output/0/uncompressed_data_size"));
+ UNIT_ASSERT(!stat.HasStatistics("nonexistent-statistics"));
+ UNIT_ASSERT_EXCEPTION_CONTAINS(stat.GetStatistics("BLAH-BLAH"), yexception, "Statistics");
+
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetStatisticsNames(), TVector<TString>{"data/output/0/uncompressed_data_size"});
+
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Max(), 130);
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Count(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Min(), 42);
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Sum(), 172);
+ UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Avg(), 172 / 2);
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ stat
+ .JobState({EJobState::Aborted})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ 24);
+ UNIT_ASSERT_VALUES_EQUAL(
+ stat
+ .JobType({EJobType::Partition})
+ .JobState({EJobState::Aborted})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ TMaybe<i64>());
+ UNIT_ASSERT_VALUES_EQUAL(
+ stat
+ .TaskName({"partition(0)"})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ 42);
+ UNIT_ASSERT_VALUES_EQUAL(
+ stat
+ .TaskName({"partition"})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ TMaybe<i64>());
+ UNIT_ASSERT_VALUES_EQUAL(
+ stat
+ .TaskName({"partition_map(0)"})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ 130);
+ UNIT_ASSERT_VALUES_EQUAL(
+ stat
+ .JobType({EJobType::Partition})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ 42);
+ UNIT_ASSERT_VALUES_EQUAL(
+ stat
+ .JobType({EJobType::PartitionMap})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ 130);
+ UNIT_ASSERT_VALUES_EQUAL(
+ stat
+ .TaskName({ETaskName::Partition0})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ 42);
+ UNIT_ASSERT_VALUES_EQUAL(
+ stat
+ .TaskName({ETaskName::Partition1})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ TMaybe<i64>());
+ UNIT_ASSERT_VALUES_EQUAL(
+ stat
+ .TaskName({ETaskName::PartitionMap0})
+ .GetStatistics("data/output/0/uncompressed_data_size")
+ .Sum(),
+ 130);
+ }
+}
diff --git a/yt/cpp/mapreduce/interface/logging/logger.cpp b/yt/cpp/mapreduce/interface/logging/logger.cpp
new file mode 100644
index 0000000000..bfa56b94f6
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/logging/logger.cpp
@@ -0,0 +1,188 @@
+#include "logger.h"
+
+#include <util/datetime/base.h>
+
+#include <util/stream/file.h>
+#include <util/stream/format.h>
+#include <util/stream/printf.h>
+#include <util/stream/str.h>
+
+#include <util/system/mutex.h>
+#include <util/system/rwlock.h>
+#include <util/system/thread.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+static TStringBuf StripFileName(TStringBuf path) {
+ TStringBuf l, r;
+ if (path.TryRSplit('/', l, r) || path.TryRSplit('\\', l, r)) {
+ return r;
+ } else {
+ return path;
+ }
+}
+
+static char GetLogLevelCode(ILogger::ELevel level) {
+ switch (level) {
+ case ILogger::FATAL: return 'F';
+ case ILogger::ERROR: return 'E';
+ case ILogger::INFO: return 'I';
+ case ILogger::DEBUG: return 'D';
+ }
+ Y_UNREACHABLE();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TNullLogger
+ : public ILogger
+{
+public:
+ void Log(ELevel level, const TSourceLocation& sourceLocation, const char* format, va_list args) override
+ {
+ Y_UNUSED(level);
+ Y_UNUSED(sourceLocation);
+ Y_UNUSED(format);
+ Y_UNUSED(args);
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TLoggerBase
+ : public ILogger
+{
+public:
+ TLoggerBase(ELevel cutLevel)
+ : CutLevel_(cutLevel)
+ { }
+
+ virtual void OutputLine(const TString& line) = 0;
+
+ void Log(ELevel level, const TSourceLocation& sourceLocation, const char* format, va_list args) override
+ {
+ if (level > CutLevel_) {
+ return;
+ }
+
+ TStringStream stream;
+ stream << TInstant::Now().ToStringLocal()
+ << " " << GetLogLevelCode(level)
+ << " [" << Hex(TThread::CurrentThreadId(), HF_FULL) << "] ";
+ Printf(stream, format, args);
+ stream << " - " << StripFileName(sourceLocation.File) << ':' << sourceLocation.Line << Endl;
+
+ TGuard<TMutex> guard(Mutex_);
+ OutputLine(stream.Str());
+ }
+
+private:
+ ELevel CutLevel_;
+ TMutex Mutex_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TStdErrLogger
+ : public TLoggerBase
+{
+public:
+ TStdErrLogger(ELevel cutLevel)
+ : TLoggerBase(cutLevel)
+ { }
+
+ void OutputLine(const TString& line) override
+ {
+ Cerr << line;
+ }
+};
+
+ILoggerPtr CreateStdErrLogger(ILogger::ELevel cutLevel)
+{
+ return new TStdErrLogger(cutLevel);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TFileLogger
+ : public TLoggerBase
+{
+public:
+ TFileLogger(ELevel cutLevel, const TString& path, bool append)
+ : TLoggerBase(cutLevel)
+ , Stream_(TFile(path, OpenAlways | WrOnly | Seq | (append ? ForAppend : EOpenMode())))
+ { }
+
+ void OutputLine(const TString& line) override
+ {
+ Stream_ << line;
+ }
+
+private:
+ TUnbufferedFileOutput Stream_;
+};
+
+ILoggerPtr CreateFileLogger(ILogger::ELevel cutLevel, const TString& path, bool append)
+{
+ return new TFileLogger(cutLevel, path, append);
+}
+////////////////////////////////////////////////////////////////////////////////
+
+class TBufferedFileLogger
+ : public TLoggerBase
+{
+public:
+ TBufferedFileLogger(ELevel cutLevel, const TString& path, bool append)
+ : TLoggerBase(cutLevel)
+ , Stream_(TFile(path, OpenAlways | WrOnly | Seq | (append ? ForAppend : EOpenMode())))
+ { }
+
+ void OutputLine(const TString& line) override
+ {
+ Stream_ << line;
+ }
+
+private:
+ TFileOutput Stream_;
+};
+
+ILoggerPtr CreateBufferedFileLogger(ILogger::ELevel cutLevel, const TString& path, bool append)
+{
+ return new TBufferedFileLogger(cutLevel, path, append);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+static TRWMutex LoggerMutex;
+static ILoggerPtr Logger;
+
+struct TLoggerInitializer
+{
+ TLoggerInitializer()
+ {
+ Logger = new TNullLogger;
+ }
+} LoggerInitializer;
+
+void SetLogger(ILoggerPtr logger)
+{
+ auto guard = TWriteGuard(LoggerMutex);
+ if (logger) {
+ Logger = logger;
+ } else {
+ Logger = new TNullLogger;
+ }
+}
+
+ILoggerPtr GetLogger()
+{
+ auto guard = TReadGuard(LoggerMutex);
+ return Logger;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+}
+
diff --git a/yt/cpp/mapreduce/interface/logging/logger.h b/yt/cpp/mapreduce/interface/logging/logger.h
new file mode 100644
index 0000000000..2b5aae87d1
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/logging/logger.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <util/generic/ptr.h>
+#include <util/generic/string.h>
+#include <util/system/compat.h>
+#include <util/system/src_location.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class ILogger
+ : public TThrRefBase
+{
+public:
+ enum ELevel
+ {
+ FATAL /* "fatal", "FATAL" */,
+ // We don't have such level as `warning', but we support it for compatibility with other APIs.
+ ERROR /* "error", "warning", "ERROR", "WARNING" */,
+ INFO /* "info", "INFO" */,
+ DEBUG /* "debug", "DEBUG" */
+ };
+
+ virtual void Log(ELevel level, const ::TSourceLocation& sourceLocation, const char* format, va_list args) = 0;
+};
+
+using ILoggerPtr = ::TIntrusivePtr<ILogger>;
+
+void SetLogger(ILoggerPtr logger);
+ILoggerPtr GetLogger();
+
+ILoggerPtr CreateStdErrLogger(ILogger::ELevel cutLevel);
+ILoggerPtr CreateFileLogger(ILogger::ELevel cutLevel, const TString& path, bool append = false);
+
+/**
+ * Create logger that writes to a file in a buffered manner.
+ * It should result in fewer system calls (useful if you expect a lot of log messages),
+ * but in case of a crash, you would lose some log messages that haven't been flushed yet.
+ */
+ILoggerPtr CreateBufferedFileLogger(ILogger::ELevel cutLevel, const TString& path, bool append = false);
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/logging/ya.make b/yt/cpp/mapreduce/interface/logging/ya.make
new file mode 100644
index 0000000000..8095bfe4ba
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/logging/ya.make
@@ -0,0 +1,16 @@
+LIBRARY()
+
+INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
+
+SRCS(
+ logger.cpp
+ yt_log.cpp
+)
+
+PEERDIR(
+ library/cpp/yt/logging
+)
+
+GENERATE_ENUM_SERIALIZATION(logger.h)
+
+END()
diff --git a/yt/cpp/mapreduce/interface/logging/yt_log.cpp b/yt/cpp/mapreduce/interface/logging/yt_log.cpp
new file mode 100644
index 0000000000..9fa7b91580
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/logging/yt_log.cpp
@@ -0,0 +1,126 @@
+#include "yt_log.h"
+
+#include "logger.h"
+
+#include <util/generic/guid.h>
+
+#include <util/system/mutex.h>
+
+namespace NYT {
+
+using namespace NLogging;
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+class TLogManager
+ : public ILogManager
+{
+public:
+ static constexpr TStringBuf CategoryName = "Wrapper";
+
+public:
+ void RegisterStaticAnchor(
+ TLoggingAnchor* anchor,
+ ::TSourceLocation sourceLocation,
+ TStringBuf anchorMessage) override
+ {
+ if (anchor->Registered.exchange(true)) {
+ return;
+ }
+
+ anchor->Enabled.store(true);
+
+ auto guard = Guard(Mutex_);
+ anchor->SourceLocation = sourceLocation;
+ anchor->AnchorMessage = anchorMessage;
+ }
+
+ void UpdateAnchor(TLoggingAnchor* /*position*/) override
+ { }
+
+ void Enqueue(TLogEvent&& event) override
+ {
+ auto message = TString(event.MessageRef.ToStringBuf());
+ LogMessage(
+ ToImplLevel(event.Level),
+ ::TSourceLocation(event.SourceFile, event.SourceLine),
+ "%.*s",
+ event.MessageRef.size(),
+ event.MessageRef.begin());
+ }
+
+ const TLoggingCategory* GetCategory(TStringBuf categoryName) override
+ {
+ Y_VERIFY(categoryName == CategoryName);
+ return &Category_;
+ }
+
+ void UpdateCategory(TLoggingCategory* /*category*/) override
+ {
+ Y_FAIL();
+ }
+
+ bool GetAbortOnAlert() const override
+ {
+ return false;
+ }
+
+private:
+ static ILogger::ELevel ToImplLevel(ELogLevel level)
+ {
+ switch (level) {
+ case ELogLevel::Minimum:
+ case ELogLevel::Trace:
+ case ELogLevel::Debug:
+ return ILogger::ELevel::DEBUG;
+ case ELogLevel::Info:
+ return ILogger::ELevel::INFO;
+ case ELogLevel::Warning:
+ case ELogLevel::Error:
+ return ILogger::ELevel::ERROR;
+ case ELogLevel::Alert:
+ case ELogLevel::Fatal:
+ case ELogLevel::Maximum:
+ return ILogger::ELevel::FATAL;
+ }
+ }
+
+ static void LogMessage(ILogger::ELevel level, const ::TSourceLocation& sourceLocation, const char* format, ...)
+ {
+ va_list args;
+ va_start(args, format);
+ GetLogger()->Log(level, sourceLocation, format, args);
+ va_end(args);
+ }
+
+private:
+ ::TMutex Mutex_;
+ std::atomic<int> ActualVersion_{1};
+ const TLoggingCategory Category_{
+ .Name{CategoryName},
+ .MinPlainTextLevel{ELogLevel::Minimum},
+ .CurrentVersion{1},
+ .ActualVersion = &ActualVersion_,
+ };
+};
+
+TLogManager LogManager;
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+TLogger Logger(&LogManager, TLogManager::CategoryName);
+
+////////////////////////////////////////////////////////////////////////////////
+
+void FormatValue(TStringBuilderBase* builder, const TGUID& value, TStringBuf /*format*/)
+{
+ builder->AppendString(GetGuidAsString(value));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/logging/yt_log.h b/yt/cpp/mapreduce/interface/logging/yt_log.h
new file mode 100644
index 0000000000..4cf93a6ba1
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/logging/yt_log.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <library/cpp/yt/logging/logger.h>
+
+struct TGUID;
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+extern NLogging::TLogger Logger;
+
+void FormatValue(TStringBuilderBase* builder, const TGUID& value, TStringBuf format);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/mpl.h b/yt/cpp/mapreduce/interface/mpl.h
new file mode 100644
index 0000000000..9865e28b6c
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/mpl.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include "fwd.h"
+
+#include <tuple>
+#include <type_traits>
+
+namespace NYT {
+
+/// @cond Doxygen_Suppress
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TBase, class TDerived>
+struct TIsBaseOf
+{
+ static constexpr bool Value = std::is_base_of_v<TBase, TDerived> && !std::is_same_v<TBase, TDerived>;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+template <class T, class Tuple>
+struct TIndexInTuple;
+
+template <class T, class... Types>
+struct TIndexInTuple<T, std::tuple<T, Types...>>
+{
+ static constexpr int Value = 0;
+};
+
+template <class T>
+struct TIndexInTuple<T, std::tuple<>>
+{
+ static constexpr int Value = 0;
+};
+
+template <class T, class U, class... Types>
+struct TIndexInTuple<T, std::tuple<U, Types...>>
+{
+ static constexpr int Value = 1 + TIndexInTuple<T, std::tuple<Types...>>::Value;
+};
+
+template <class T, class TTuple>
+constexpr bool DoesTupleContainType = (TIndexInTuple<T, TTuple>::Value < std::tuple_size<TTuple>{});
+
+template <class TOut, class TIn = std::tuple<>>
+struct TUniqueTypes;
+
+template <class... TOut, class TInCar, class... TInCdr>
+struct TUniqueTypes<std::tuple<TOut...>, std::tuple<TInCar, TInCdr...>>
+{
+ using TType = std::conditional_t<
+ DoesTupleContainType<TInCar, std::tuple<TOut...>>,
+ typename TUniqueTypes<std::tuple<TOut...>, std::tuple<TInCdr...>>::TType,
+ typename TUniqueTypes<std::tuple<TOut..., TInCar>, std::tuple<TInCdr...>>::TType
+ >;
+};
+
+template <class TOut>
+struct TUniqueTypes<TOut, std::tuple<>>
+{
+ using TType = TOut;
+};
+
+} // namespace NDetail
+
+/// @endcond Doxygen_Suppress
+
+////////////////////////////////////////////////////////////////////////////////
+
+}
diff --git a/yt/cpp/mapreduce/interface/node.h b/yt/cpp/mapreduce/interface/node.h
new file mode 100644
index 0000000000..fece1b36de
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/node.h
@@ -0,0 +1,7 @@
+#pragma once
+
+// Backward compatibility
+#include "fwd.h"
+#include <library/cpp/yson/node/node.h>
+
+
diff --git a/yt/cpp/mapreduce/interface/operation-inl.h b/yt/cpp/mapreduce/interface/operation-inl.h
new file mode 100644
index 0000000000..8d53cd446f
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/operation-inl.h
@@ -0,0 +1,928 @@
+#pragma once
+
+#ifndef OPERATION_INL_H_
+#error "Direct inclusion of this file is not allowed, use operation.h"
+#include "operation.h"
+#endif
+#undef OPERATION_INL_H_
+
+#include "errors.h"
+
+#include <util/generic/bt_exception.h>
+#include <util/generic/singleton.h>
+#include <util/system/type_name.h>
+
+#include <util/stream/file.h>
+#include <util/stream/buffer.h>
+#include <util/string/subst.h>
+
+#include <typeindex>
+
+namespace NYT {
+
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+template<class T>
+void Assign(TVector<T>& array, size_t idx, const T& value) {
+ array.resize(std::max(array.size(), idx + 1));
+ array[idx] = value;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename TRow>
+TStructuredRowStreamDescription GetStructuredRowStreamDescription()
+{
+ if constexpr (std::is_same_v<TRow, NYT::TNode>) {
+ return TTNodeStructuredRowStream{};
+ } else if constexpr (std::is_same_v<TRow, NYT::TYaMRRow>) {
+ return TTYaMRRowStructuredRowStream{};
+ } else if constexpr (std::is_same_v<::google::protobuf::Message, TRow>) {
+ return TProtobufStructuredRowStream{nullptr};
+ } else if constexpr (TIsBaseOf<::google::protobuf::Message, TRow>::Value) {
+ return TProtobufStructuredRowStream{TRow::descriptor()};
+ } else if constexpr (TIsProtoOneOf<TRow>::value) {
+ return TProtobufStructuredRowStream{nullptr};
+ } else {
+ static_assert(TDependentFalse<TRow>, "Unknown row type");
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename TRow>
+TStructuredTablePath Structured(TRichYPath richYPath)
+{
+ return TStructuredTablePath(std::move(richYPath), StructuredTableDescription<TRow>());
+}
+
+template <typename TRow>
+TTableStructure StructuredTableDescription()
+{
+ if constexpr (std::is_same_v<TRow, NYT::TNode>) {
+ return TUnspecifiedTableStructure{};
+ } else if constexpr (std::is_same_v<TRow, NYT::TYaMRRow>) {
+ return TUnspecifiedTableStructure{};
+ } else if constexpr (std::is_base_of_v<::google::protobuf::Message, TRow>) {
+ if constexpr (std::is_same_v<::google::protobuf::Message, TRow>) {
+ static_assert(TDependentFalse<TRow>, "Cannot use ::google::protobuf::Message as table descriptor");
+ } else {
+ return TProtobufTableStructure{TRow::descriptor()};
+ }
+ } else {
+ static_assert(TDependentFalse<TRow>, "Unknown row type");
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename TDerived>
+TDerived& TRawOperationIoTableSpec<TDerived>::AddInput(const TRichYPath& path)
+{
+ Inputs_.push_back(path);
+ return static_cast<TDerived&>(*this);
+}
+
+template <typename TDerived>
+TDerived& TRawOperationIoTableSpec<TDerived>::SetInput(size_t tableIndex, const TRichYPath& path)
+{
+ NDetail::Assign(Inputs_, tableIndex, path);
+}
+
+template <typename TDerived>
+TDerived& TRawOperationIoTableSpec<TDerived>::AddOutput(const TRichYPath& path)
+{
+ Outputs_.push_back(path);
+ return static_cast<TDerived&>(*this);
+}
+
+template <typename TDerived>
+TDerived& TRawOperationIoTableSpec<TDerived>::SetOutput(size_t tableIndex, const TRichYPath& path)
+{
+ NDetail::Assign(Outputs_, tableIndex, path);
+}
+
+template <typename TDerived>
+const TVector<TRichYPath>& TRawOperationIoTableSpec<TDerived>::GetInputs() const
+{
+ return Inputs_;
+}
+
+template <typename TDerived>
+const TVector<TRichYPath>& TRawOperationIoTableSpec<TDerived>::GetOutputs() const
+{
+ return Outputs_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename TDerived>
+TDerived& TRawMapReduceOperationIoSpec<TDerived>::AddMapOutput(const TRichYPath& path)
+{
+ MapOutputs_.push_back(path);
+ return static_cast<TDerived&>(*this);
+}
+
+template <typename TDerived>
+TDerived& TRawMapReduceOperationIoSpec<TDerived>::SetMapOutput(size_t tableIndex, const TRichYPath& path)
+{
+ NDetail::Assign(MapOutputs_, tableIndex, path);
+}
+
+template <typename TDerived>
+const TVector<TRichYPath>& TRawMapReduceOperationIoSpec<TDerived>::GetMapOutputs() const
+{
+ return MapOutputs_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+::TIntrusivePtr<INodeReaderImpl> CreateJobNodeReader(TRawTableReaderPtr rawTableReader);
+::TIntrusivePtr<IYaMRReaderImpl> CreateJobYaMRReader(TRawTableReaderPtr rawTableReader);
+::TIntrusivePtr<IProtoReaderImpl> CreateJobProtoReader(TRawTableReaderPtr rawTableReader);
+
+::TIntrusivePtr<INodeWriterImpl> CreateJobNodeWriter(THolder<IProxyOutput> rawTableWriter);
+::TIntrusivePtr<IYaMRWriterImpl> CreateJobYaMRWriter(THolder<IProxyOutput> rawTableWriter);
+::TIntrusivePtr<IProtoWriterImpl> CreateJobProtoWriter(THolder<IProxyOutput> rawTableWriter);
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class T>
+inline ::TIntrusivePtr<typename TRowTraits<T>::IReaderImpl> CreateJobReaderImpl(TRawTableReaderPtr rawTableReader);
+
+template <>
+inline ::TIntrusivePtr<INodeReaderImpl> CreateJobReaderImpl<TNode>(TRawTableReaderPtr rawTableReader)
+{
+ return CreateJobNodeReader(rawTableReader);
+}
+
+template <>
+inline ::TIntrusivePtr<IYaMRReaderImpl> CreateJobReaderImpl<TYaMRRow>(TRawTableReaderPtr rawTableReader)
+{
+ return CreateJobYaMRReader(rawTableReader);
+}
+
+template <>
+inline ::TIntrusivePtr<IProtoReaderImpl> CreateJobReaderImpl<Message>(TRawTableReaderPtr rawTableReader)
+{
+ return CreateJobProtoReader(rawTableReader);
+}
+
+template <class T>
+inline ::TIntrusivePtr<typename TRowTraits<T>::IReaderImpl> CreateJobReaderImpl(TRawTableReaderPtr rawTableReader)
+{
+ if constexpr (TIsBaseOf<Message, T>::Value || NDetail::TIsProtoOneOf<T>::value) {
+ return CreateJobProtoReader(rawTableReader);
+ } else {
+ static_assert(TDependentFalse<T>, "Unknown row type");
+ }
+}
+
+template <class T>
+inline TTableReaderPtr<T> CreateJobReader(TRawTableReaderPtr rawTableReader)
+{
+ return new TTableReader<T>(CreateJobReaderImpl<T>(rawTableReader));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class T>
+TTableWriterPtr<T> CreateJobWriter(THolder<IProxyOutput> rawJobWriter);
+
+template <>
+inline TTableWriterPtr<TNode> CreateJobWriter<TNode>(THolder<IProxyOutput> rawJobWriter)
+{
+ return new TTableWriter<TNode>(CreateJobNodeWriter(std::move(rawJobWriter)));
+}
+
+template <>
+inline TTableWriterPtr<TYaMRRow> CreateJobWriter<TYaMRRow>(THolder<IProxyOutput> rawJobWriter)
+{
+ return new TTableWriter<TYaMRRow>(CreateJobYaMRWriter(std::move(rawJobWriter)));
+}
+
+template <>
+inline TTableWriterPtr<Message> CreateJobWriter<Message>(THolder<IProxyOutput> rawJobWriter)
+{
+ return new TTableWriter<Message>(CreateJobProtoWriter(std::move(rawJobWriter)));
+}
+
+template <class T, class = void>
+struct TProtoWriterCreator;
+
+template <class T>
+struct TProtoWriterCreator<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>>
+{
+ static TTableWriterPtr<T> Create(::TIntrusivePtr<IProtoWriterImpl> writer)
+ {
+ return new TTableWriter<T>(writer);
+ }
+};
+
+template <class T>
+inline TTableWriterPtr<T> CreateJobWriter(THolder<IProxyOutput> rawJobWriter)
+{
+ if constexpr (TIsBaseOf<Message, T>::Value) {
+ return TProtoWriterCreator<T>::Create(CreateJobProtoWriter(std::move(rawJobWriter)));
+ } else {
+ static_assert(TDependentFalse<T>, "Unknown row type");
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class T>
+void TOperationInputSpecBase::AddInput(const TRichYPath& path)
+{
+ Inputs_.push_back(path);
+ StructuredInputs_.emplace_back(Structured<T>(path));
+}
+
+template <class T>
+void TOperationInputSpecBase::SetInput(size_t tableIndex, const TRichYPath& path)
+{
+ NDetail::Assign(Inputs_, tableIndex, path);
+ NDetail::Assign(StructuredInputs_, tableIndex, Structured<T>(path));
+}
+
+
+template <class T>
+void TOperationOutputSpecBase::AddOutput(const TRichYPath& path)
+{
+ Outputs_.push_back(path);
+ StructuredOutputs_.emplace_back(Structured<T>(path));
+}
+
+template <class T>
+void TOperationOutputSpecBase::SetOutput(size_t tableIndex, const TRichYPath& path)
+{
+ NDetail::Assign(Outputs_, tableIndex, path);
+ NDetail::Assign(StructuredOutputs_, tableIndex, Structured<T>(path));
+}
+
+template <class TDerived>
+template <class T>
+TDerived& TOperationIOSpec<TDerived>::AddInput(const TRichYPath& path)
+{
+ static_assert(!std::is_same<T, Message>::value, "input type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)");
+ TOperationInputSpecBase::AddInput<T>(path);
+ return *static_cast<TDerived*>(this);
+}
+
+template <class TDerived>
+template <class T>
+TDerived& TOperationIOSpec<TDerived>::SetInput(size_t tableIndex, const TRichYPath& path)
+{
+ static_assert(!std::is_same<T, Message>::value, "input type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)");
+ TOperationInputSpecBase::SetInput<T>(tableIndex, path);
+ return *static_cast<TDerived*>(this);
+}
+
+
+template <class TDerived>
+template <class T>
+TDerived& TOperationIOSpec<TDerived>::AddOutput(const TRichYPath& path)
+{
+ static_assert(!std::is_same<T, Message>::value, "output type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)");
+ TOperationOutputSpecBase::AddOutput<T>(path);
+ return *static_cast<TDerived*>(this);
+}
+
+template <class TDerived>
+template <class T>
+TDerived& TOperationIOSpec<TDerived>::SetOutput(size_t tableIndex, const TRichYPath& path)
+{
+ static_assert(!std::is_same<T, Message>::value, "output type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)");
+ TOperationOutputSpecBase::SetOutput<T>(tableIndex, path);
+ return *static_cast<TDerived*>(this);
+}
+
+template <class TDerived>
+TDerived& TOperationIOSpec<TDerived>::AddStructuredInput(TStructuredTablePath path)
+{
+ TOperationInputSpecBase::AddStructuredInput(std::move(path));
+ return *static_cast<TDerived*>(this);
+}
+
+template <class TDerived>
+TDerived& TOperationIOSpec<TDerived>::AddStructuredOutput(TStructuredTablePath path)
+{
+ TOperationOutputSpecBase::AddStructuredOutput(std::move(path));
+ return *static_cast<TDerived*>(this);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class T>
+TVanillaTask& TVanillaTask::AddOutput(const TRichYPath& path)
+{
+ static_assert(!std::is_same<T, Message>::value, "output type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)");
+ TOperationOutputSpecBase::AddOutput<T>(path);
+ return *this;
+}
+
+template <class T>
+TVanillaTask& TVanillaTask::SetOutput(size_t tableIndex, const TRichYPath& path)
+{
+ static_assert(!std::is_same<T, Message>::value, "output type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)");
+ TOperationOutputSpecBase::SetOutput<T>(tableIndex, path);
+ return *this;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+void ResetUseClientProtobuf(const char* methodName);
+
+} // namespace NDetail
+
+template <class TDerived>
+TDerived& TOperationIOSpec<TDerived>::AddProtobufInput_VerySlow_Deprecated(const TRichYPath& path)
+{
+ NDetail::ResetUseClientProtobuf("AddProtobufInput_VerySlow_Deprecated");
+ Inputs_.push_back(path);
+ StructuredInputs_.emplace_back(TStructuredTablePath(path, TProtobufTableStructure{nullptr}));
+ return *static_cast<TDerived*>(this);
+}
+
+template <class TDerived>
+TDerived& TOperationIOSpec<TDerived>::AddProtobufOutput_VerySlow_Deprecated(const TRichYPath& path)
+{
+ NDetail::ResetUseClientProtobuf("AddProtobufOutput_VerySlow_Deprecated");
+ Outputs_.push_back(path);
+ StructuredOutputs_.emplace_back(TStructuredTablePath(path, TProtobufTableStructure{nullptr}));
+ return *static_cast<TDerived*>(this);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename TRow>
+TJobOperationPreparer::TInputGroup& TJobOperationPreparer::TInputGroup::Description()
+{
+ for (auto i : Indices_) {
+ Preparer_.InputDescription<TRow>(i);
+ }
+ return *this;
+}
+
+template <typename TRow>
+TJobOperationPreparer::TOutputGroup& TJobOperationPreparer::TOutputGroup::Description(bool inferSchema)
+{
+ for (auto i : Indices_) {
+ Preparer_.OutputDescription<TRow>(i, inferSchema);
+ }
+ return *this;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename TCont>
+TJobOperationPreparer::TInputGroup TJobOperationPreparer::BeginInputGroup(const TCont& indices)
+{
+ for (auto i : indices) {
+ ValidateInputTableIndex(i, TStringBuf("BeginInputGroup()"));
+ }
+ return TInputGroup(*this, TVector<int>(std::begin(indices), std::end(indices)));
+}
+
+template <typename TCont>
+TJobOperationPreparer::TOutputGroup TJobOperationPreparer::BeginOutputGroup(const TCont& indices)
+{
+ for (auto i : indices) {
+ ValidateOutputTableIndex(i, TStringBuf("BeginOutputGroup()"));
+ }
+ return TOutputGroup(*this, indices);
+}
+
+
+template <typename TRow>
+TJobOperationPreparer& TJobOperationPreparer::InputDescription(int tableIndex)
+{
+ ValidateMissingInputDescription(tableIndex);
+ InputTableDescriptions_[tableIndex] = StructuredTableDescription<TRow>();
+ return *this;
+}
+
+template <typename TRow>
+TJobOperationPreparer& TJobOperationPreparer::OutputDescription(int tableIndex, bool inferSchema)
+{
+ ValidateMissingOutputDescription(tableIndex);
+ OutputTableDescriptions_[tableIndex] = StructuredTableDescription<TRow>();
+ if (inferSchema && !OutputSchemas_[tableIndex]) {
+ OutputSchemas_[tableIndex] = CreateTableSchema<TRow>();
+ }
+ return *this;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TDerived>
+template <class TRow>
+TDerived& TIntermediateTablesHintSpec<TDerived>::HintMapOutput()
+{
+ IntermediateMapOutputDescription_ = StructuredTableDescription<TRow>();
+ return *static_cast<TDerived*>(this);
+}
+
+template <class TDerived>
+template <class TRow>
+TDerived& TIntermediateTablesHintSpec<TDerived>::AddMapOutput(const TRichYPath& path)
+{
+ MapOutputs_.push_back(path);
+ StructuredMapOutputs_.emplace_back(Structured<TRow>(path));
+ return *static_cast<TDerived*>(this);
+}
+
+template <class TDerived>
+template <class TRow>
+TDerived& TIntermediateTablesHintSpec<TDerived>::HintReduceCombinerInput()
+{
+ IntermediateReduceCombinerInputDescription_ = StructuredTableDescription<TRow>();
+ return *static_cast<TDerived*>(this);
+}
+
+template <class TDerived>
+template <class TRow>
+TDerived& TIntermediateTablesHintSpec<TDerived>::HintReduceCombinerOutput()
+{
+ IntermediateReduceCombinerOutputDescription_ = StructuredTableDescription<TRow>();
+ return *static_cast<TDerived*>(this);
+}
+
+template <class TDerived>
+template <class TRow>
+TDerived& TIntermediateTablesHintSpec<TDerived>::HintReduceInput()
+{
+ IntermediateReducerInputDescription_ = StructuredTableDescription<TRow>();
+ return *static_cast<TDerived*>(this);
+}
+
+template <class TDerived>
+const TVector<TStructuredTablePath>& TIntermediateTablesHintSpec<TDerived>::GetStructuredMapOutputs() const
+{
+ return StructuredMapOutputs_;
+}
+
+template <class TDerived>
+const TMaybe<TTableStructure>& TIntermediateTablesHintSpec<TDerived>::GetIntermediateMapOutputDescription() const
+{
+ return IntermediateMapOutputDescription_;
+}
+
+template <class TDerived>
+const TMaybe<TTableStructure>& TIntermediateTablesHintSpec<TDerived>::GetIntermediateReduceCombinerInputDescription() const
+{
+ return IntermediateReduceCombinerInputDescription_;
+}
+
+template <class TDerived>
+const TMaybe<TTableStructure>& TIntermediateTablesHintSpec<TDerived>::GetIntermediateReduceCombinerOutputDescription() const
+{
+ return IntermediateReduceCombinerOutputDescription_;
+}
+
+template <class TDerived>
+const TMaybe<TTableStructure>& TIntermediateTablesHintSpec<TDerived>::GetIntermediateReducerInputDescription() const
+{
+ return IntermediateReducerInputDescription_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TReducerContext
+{
+ bool Break = false;
+ static TReducerContext* Get() { return Singleton<TReducerContext>(); }
+};
+
+template <class TR, class TW>
+inline void IReducer<TR, TW>::Break()
+{
+ TReducerContext::Get()->Break = true;
+}
+
+template <typename TReader, typename TWriter>
+void FeedJobInput(
+ IMapper<TReader, TWriter>* mapper,
+ typename TRowTraits<typename TReader::TRowType>::IReaderImpl* readerImpl,
+ TWriter* writer)
+{
+ using TInputRow = typename TReader::TRowType;
+
+ auto reader = MakeIntrusive<TTableReader<TInputRow>>(readerImpl);
+ mapper->Do(reader.Get(), writer);
+}
+
+template <typename TReader, typename TWriter>
+void FeedJobInput(
+ IReducer<TReader, TWriter>* reducer,
+ typename TRowTraits<typename TReader::TRowType>::IReaderImpl* readerImpl,
+ TWriter* writer)
+{
+ using TInputRow = typename TReader::TRowType;
+
+ auto rangesReader = MakeIntrusive<TTableRangesReader<TInputRow>>(readerImpl);
+ for (; rangesReader->IsValid(); rangesReader->Next()) {
+ reducer->Do(&rangesReader->GetRange(), writer);
+ if (TReducerContext::Get()->Break) {
+ break;
+ }
+ }
+}
+
+template <typename TReader, typename TWriter>
+void FeedJobInput(
+ IAggregatorReducer<TReader, TWriter>* reducer,
+ typename TRowTraits<typename TReader::TRowType>::IReaderImpl* readerImpl,
+ TWriter* writer)
+{
+ using TInputRow = typename TReader::TRowType;
+
+ auto rangesReader = MakeIntrusive<TTableRangesReader<TInputRow>>(readerImpl);
+ reducer->Do(rangesReader.Get(), writer);
+}
+
+template <class TRawJob>
+int RunRawJob(size_t outputTableCount, IInputStream& jobStateStream)
+{
+ TRawJobContext context(outputTableCount);
+
+ TRawJob job;
+ job.Load(jobStateStream);
+ job.Do(context);
+ return 0;
+}
+
+template <>
+inline int RunRawJob<TCommandRawJob>(size_t /* outputTableCount */, IInputStream& /* jobStateStream */)
+{
+ Y_FAIL();
+}
+
+template <class TVanillaJob>
+int RunVanillaJob(size_t outputTableCount, IInputStream& jobStateStream)
+{
+ TVanillaJob job;
+ job.Load(jobStateStream);
+
+ if constexpr (std::is_base_of<IVanillaJob<>, TVanillaJob>::value) {
+ Y_VERIFY(outputTableCount == 0, "Void vanilla job expects zero 'outputTableCount'");
+ job.Do();
+ } else {
+ Y_VERIFY(outputTableCount, "Vanilla job with table writer expects nonzero 'outputTableCount'");
+ using TOutputRow = typename TVanillaJob::TWriter::TRowType;
+
+ THolder<IProxyOutput> rawJobWriter;
+ if (auto customWriter = job.CreateCustomRawJobWriter(outputTableCount)) {
+ rawJobWriter = std::move(customWriter);
+ } else {
+ rawJobWriter = CreateRawJobWriter(outputTableCount);
+ }
+ auto writer = CreateJobWriter<TOutputRow>(std::move(rawJobWriter));
+
+ job.Start(writer.Get());
+ job.Do(writer.Get());
+ job.Finish(writer.Get());
+
+ writer->Finish();
+ }
+ return 0;
+}
+
+template <>
+inline int RunVanillaJob<TCommandVanillaJob>(size_t /* outputTableCount */, IInputStream& /* jobStateStream */)
+{
+ Y_FAIL();
+}
+
+template <class TJob>
+ requires TIsBaseOf<IStructuredJob, TJob>::Value
+int RunJob(size_t outputTableCount, IInputStream& jobStateStream)
+{
+ using TInputRow = typename TJob::TReader::TRowType;
+ using TOutputRow = typename TJob::TWriter::TRowType;
+
+ auto job = MakeIntrusive<TJob>();
+ job->Load(jobStateStream);
+
+ TRawTableReaderPtr rawJobReader;
+ if (auto customReader = job->CreateCustomRawJobReader(/*fd*/ 0)) {
+ rawJobReader = customReader;
+ } else {
+ rawJobReader = CreateRawJobReader(/*fd*/ 0);
+ }
+ auto readerImpl = CreateJobReaderImpl<TInputRow>(rawJobReader);
+
+ // Many users don't expect to have jobs with empty input so we skip such jobs.
+ if (!readerImpl->IsValid()) {
+ return 0;
+ }
+
+ THolder<IProxyOutput> rawJobWriter;
+ if (auto customWriter = job->CreateCustomRawJobWriter(outputTableCount)) {
+ rawJobWriter = std::move(customWriter);
+ } else {
+ rawJobWriter = CreateRawJobWriter(outputTableCount);
+ }
+ auto writer = CreateJobWriter<TOutputRow>(std::move(rawJobWriter));
+
+ job->Start(writer.Get());
+ FeedJobInput(job.Get(), readerImpl.Get(), writer.Get());
+ job->Finish(writer.Get());
+
+ writer->Finish();
+
+ return 0;
+}
+
+//
+// We leave RunMapJob/RunReduceJob/RunAggregatorReducer for backward compatibility,
+// some user use them already. :(
+
+template <class TMapper>
+int RunMapJob(size_t outputTableCount, IInputStream& jobStateStream)
+{
+ return RunJob<TMapper>(outputTableCount, jobStateStream);
+}
+
+template <class TReducer>
+int RunReduceJob(size_t outputTableCount, IInputStream& jobStateStream)
+{
+ return RunJob<TReducer>(outputTableCount, jobStateStream);
+}
+
+template <class TReducer>
+int RunAggregatorReducer(size_t outputTableCount, IInputStream& jobStateStream)
+{
+ return RunJob<TReducer>(outputTableCount, jobStateStream);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename T, typename = void>
+struct TIsConstructibleFromNode
+ : std::false_type
+{ };
+
+template <typename T>
+struct TIsConstructibleFromNode<T, std::void_t<decltype(T::FromNode(std::declval<TNode&>()))>>
+ : std::true_type
+{ };
+
+template <class TJob>
+::TIntrusivePtr<NYT::IStructuredJob> ConstructJobFromNode(const TNode& node)
+{
+ if constexpr (TIsConstructibleFromNode<TJob>::value) {
+ Y_ENSURE(node.GetType() != TNode::Undefined,
+ "job has FromNode method but constructor arguments were not provided");
+ return TJob::FromNode(node);
+ } else {
+ Y_ENSURE(node.GetType() == TNode::Undefined,
+ "constructor arguments provided but job does not contain FromNode method");
+ return MakeIntrusive<TJob>();
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+using TJobFunction = int (*)(size_t, IInputStream&);
+using TConstructJobFunction = ::TIntrusivePtr<NYT::IStructuredJob> (*)(const TNode&);
+
+class TJobFactory
+{
+public:
+ static TJobFactory* Get()
+ {
+ return Singleton<TJobFactory>();
+ }
+
+ template <class TJob>
+ void RegisterJob(const char* name)
+ {
+ RegisterJobImpl<TJob>(name, RunJob<TJob>);
+ JobConstructors[name] = ConstructJobFromNode<TJob>;
+ }
+
+ template <class TRawJob>
+ void RegisterRawJob(const char* name)
+ {
+ RegisterJobImpl<TRawJob>(name, RunRawJob<TRawJob>);
+ }
+
+ template <class TVanillaJob>
+ void RegisterVanillaJob(const char* name)
+ {
+ RegisterJobImpl<TVanillaJob>(name, RunVanillaJob<TVanillaJob>);
+ }
+
+ TString GetJobName(const IJob* job)
+ {
+ const auto typeIndex = std::type_index(typeid(*job));
+ CheckJobRegistered(typeIndex);
+ return JobNames[typeIndex];
+ }
+
+ TJobFunction GetJobFunction(const char* name)
+ {
+ CheckNameRegistered(name);
+ return JobFunctions[name];
+ }
+
+ TConstructJobFunction GetConstructingFunction(const char* name)
+ {
+ CheckNameRegistered(name);
+ return JobConstructors[name];
+ }
+
+private:
+ TMap<std::type_index, TString> JobNames;
+ THashMap<TString, TJobFunction> JobFunctions;
+ THashMap<TString, TConstructJobFunction> JobConstructors;
+
+ template <typename TJob, typename TRunner>
+ void RegisterJobImpl(const char* name, TRunner runner) {
+ const auto typeIndex = std::type_index(typeid(TJob));
+ CheckNotRegistered(typeIndex, name);
+ JobNames[typeIndex] = name;
+ JobFunctions[name] = runner;
+ }
+
+ void CheckNotRegistered(const std::type_index& typeIndex, const char* name)
+ {
+ Y_ENSURE(!JobNames.contains(typeIndex),
+ "type_info '" << typeIndex.name() << "'"
+ "is already registered under name '" << JobNames[typeIndex] << "'");
+ Y_ENSURE(!JobFunctions.contains(name),
+ "job with name '" << name << "' is already registered");
+ }
+
+ void CheckJobRegistered(const std::type_index& typeIndex)
+ {
+ Y_ENSURE(JobNames.contains(typeIndex),
+ "type_info '" << typeIndex.name() << "' is not registered, use REGISTER_* macros");
+ }
+
+ void CheckNameRegistered(const char* name)
+ {
+ Y_ENSURE(JobFunctions.contains(name),
+ "job with name '" << name << "' is not registered, use REGISTER_* macros");
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TMapper>
+struct TMapperRegistrator
+{
+ TMapperRegistrator(const char* name)
+ {
+ static_assert(TMapper::JobType == IJob::EType::Mapper,
+ "REGISTER_MAPPER is not compatible with this job class");
+
+ NYT::TJobFactory::Get()->RegisterJob<TMapper>(name);
+ }
+};
+
+template <class TReducer>
+struct TReducerRegistrator
+{
+ TReducerRegistrator(const char* name)
+ {
+ static_assert(TReducer::JobType == IJob::EType::Reducer ||
+ TReducer::JobType == IJob::EType::ReducerAggregator,
+ "REGISTER_REDUCER is not compatible with this job class");
+
+ NYT::TJobFactory::Get()->RegisterJob<TReducer>(name);
+ }
+};
+
+template <class TRawJob>
+struct TRawJobRegistrator
+{
+ TRawJobRegistrator(const char* name)
+ {
+ static_assert(TRawJob::JobType == IJob::EType::RawJob,
+ "REGISTER_RAW_JOB is not compatible with this job class");
+ NYT::TJobFactory::Get()->RegisterRawJob<TRawJob>(name);
+ }
+};
+
+template <class TVanillaJob>
+struct TVanillaJobRegistrator
+{
+ TVanillaJobRegistrator(const char* name)
+ {
+ static_assert(TVanillaJob::JobType == IJob::EType::VanillaJob,
+ "REGISTER_VANILLA_JOB is not compatible with this job class");
+ NYT::TJobFactory::Get()->RegisterVanillaJob<TVanillaJob>(name);
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+inline TString YtRegistryTypeName(const TString& name) {
+ TString res = name;
+#ifdef _win_
+ SubstGlobal(res, "class ", "");
+#endif
+ return res;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+#define REGISTER_MAPPER(...) \
+static const NYT::TMapperRegistrator<__VA_ARGS__> \
+Y_GENERATE_UNIQUE_ID(TJobRegistrator)(NYT::YtRegistryTypeName(TypeName<__VA_ARGS__>()).data());
+
+#define REGISTER_NAMED_MAPPER(name, ...) \
+static const NYT::TMapperRegistrator<__VA_ARGS__> \
+Y_GENERATE_UNIQUE_ID(TJobRegistrator)(name);
+
+#define REGISTER_REDUCER(...) \
+static const NYT::TReducerRegistrator<__VA_ARGS__> \
+Y_GENERATE_UNIQUE_ID(TJobRegistrator)(NYT::YtRegistryTypeName(TypeName<__VA_ARGS__>()).data());
+
+#define REGISTER_NAMED_REDUCER(name, ...) \
+static const NYT::TReducerRegistrator<__VA_ARGS__> \
+Y_GENERATE_UNIQUE_ID(TJobRegistrator)(name);
+
+#define REGISTER_NAMED_RAW_JOB(name, ...) \
+static const NYT::TRawJobRegistrator<__VA_ARGS__> \
+Y_GENERATE_UNIQUE_ID(TJobRegistrator)(name);
+
+#define REGISTER_RAW_JOB(...) \
+REGISTER_NAMED_RAW_JOB((NYT::YtRegistryTypeName(TypeName<__VA_ARGS__>()).data()), __VA_ARGS__)
+
+#define REGISTER_NAMED_VANILLA_JOB(name, ...) \
+static NYT::TVanillaJobRegistrator<__VA_ARGS__> \
+Y_GENERATE_UNIQUE_ID(TJobRegistrator)(name);
+
+#define REGISTER_VANILLA_JOB(...) \
+REGISTER_NAMED_VANILLA_JOB((NYT::YtRegistryTypeName(TypeName<__VA_ARGS__>()).data()), __VA_ARGS__)
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename TReader, typename TWriter>
+TStructuredRowStreamDescription IMapper<TReader, TWriter>::GetInputRowStreamDescription() const
+{
+ return NYT::NDetail::GetStructuredRowStreamDescription<typename TReader::TRowType>();
+}
+
+template <typename TReader, typename TWriter>
+TStructuredRowStreamDescription IMapper<TReader, TWriter>::GetOutputRowStreamDescription() const
+{
+ return NYT::NDetail::GetStructuredRowStreamDescription<typename TWriter::TRowType>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename TReader, typename TWriter>
+TStructuredRowStreamDescription IReducer<TReader, TWriter>::GetInputRowStreamDescription() const
+{
+ return NYT::NDetail::GetStructuredRowStreamDescription<typename TReader::TRowType>();
+}
+
+template <typename TReader, typename TWriter>
+TStructuredRowStreamDescription IReducer<TReader, TWriter>::GetOutputRowStreamDescription() const
+{
+ return NYT::NDetail::GetStructuredRowStreamDescription<typename TWriter::TRowType>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename TReader, typename TWriter>
+TStructuredRowStreamDescription IAggregatorReducer<TReader, TWriter>::GetInputRowStreamDescription() const
+{
+ return NYT::NDetail::GetStructuredRowStreamDescription<typename TReader::TRowType>();
+}
+
+template <typename TReader, typename TWriter>
+TStructuredRowStreamDescription IAggregatorReducer<TReader, TWriter>::GetOutputRowStreamDescription() const
+{
+ return NYT::NDetail::GetStructuredRowStreamDescription<typename TWriter::TRowType>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename TWriter>
+TStructuredRowStreamDescription IVanillaJob<TWriter>::GetInputRowStreamDescription() const
+{
+ return TVoidStructuredRowStream();
+}
+
+template <typename TWriter>
+TStructuredRowStreamDescription IVanillaJob<TWriter>::GetOutputRowStreamDescription() const
+{
+ return NYT::NDetail::GetStructuredRowStreamDescription<typename TWriter::TRowType>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/operation.cpp b/yt/cpp/mapreduce/interface/operation.cpp
new file mode 100644
index 0000000000..706fc4caa4
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/operation.cpp
@@ -0,0 +1,663 @@
+#include "operation.h"
+
+#include <util/generic/iterator_range.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+ i64 OutputTableCount = -1;
+} // namespace NDetail
+
+////////////////////////////////////////////////////////////////////////////////
+
+TTaskName::TTaskName(TString taskName)
+ : TaskName_(std::move(taskName))
+{ }
+
+TTaskName::TTaskName(const char* taskName)
+ : TaskName_(taskName)
+{ }
+
+TTaskName::TTaskName(ETaskName taskName)
+ : TaskName_(ToString(taskName))
+{ }
+
+const TString& TTaskName::Get() const
+{
+ return TaskName_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TCommandRawJob::TCommandRawJob(TStringBuf command)
+ : Command_(command)
+{ }
+
+const TString& TCommandRawJob::GetCommand() const
+{
+ return Command_;
+}
+
+void TCommandRawJob::Do(const TRawJobContext& /* jobContext */)
+{
+ Y_FAIL("TCommandRawJob::Do must not be called");
+}
+
+REGISTER_NAMED_RAW_JOB("NYT::TCommandRawJob", TCommandRawJob)
+
+////////////////////////////////////////////////////////////////////////////////
+
+TCommandVanillaJob::TCommandVanillaJob(TStringBuf command)
+ : Command_(command)
+{ }
+
+const TString& TCommandVanillaJob::GetCommand() const
+{
+ return Command_;
+}
+
+void TCommandVanillaJob::Do()
+{
+ Y_FAIL("TCommandVanillaJob::Do must not be called");
+}
+
+REGISTER_NAMED_VANILLA_JOB("NYT::TCommandVanillaJob", TCommandVanillaJob);
+
+////////////////////////////////////////////////////////////////////////////////
+
+bool operator==(const TUnspecifiedTableStructure&, const TUnspecifiedTableStructure&)
+{
+ return true;
+}
+
+bool operator==(const TProtobufTableStructure& lhs, const TProtobufTableStructure& rhs)
+{
+ return lhs.Descriptor == rhs.Descriptor;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+const TVector<TStructuredTablePath>& TOperationInputSpecBase::GetStructuredInputs() const
+{
+ return StructuredInputs_;
+}
+
+const TVector<TStructuredTablePath>& TOperationOutputSpecBase::GetStructuredOutputs() const
+{
+ return StructuredOutputs_;
+}
+
+void TOperationInputSpecBase::AddStructuredInput(TStructuredTablePath path)
+{
+ Inputs_.push_back(path.RichYPath);
+ StructuredInputs_.push_back(std::move(path));
+}
+
+void TOperationOutputSpecBase::AddStructuredOutput(TStructuredTablePath path)
+{
+ Outputs_.push_back(path.RichYPath);
+ StructuredOutputs_.push_back(std::move(path));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TVanillaTask& TVanillaTask::AddStructuredOutput(TStructuredTablePath path)
+{
+ TOperationOutputSpecBase::AddStructuredOutput(std::move(path));
+ return *this;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TStructuredRowStreamDescription IVanillaJob<void>::GetInputRowStreamDescription() const
+{
+ return TVoidStructuredRowStream();
+}
+
+TStructuredRowStreamDescription IVanillaJob<void>::GetOutputRowStreamDescription() const
+{
+ return TVoidStructuredRowStream();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TRawJobContext::TRawJobContext(size_t outputTableCount)
+ : InputFile_(Duplicate(0))
+{
+ for (size_t i = 0; i != outputTableCount; ++i) {
+ OutputFileList_.emplace_back(Duplicate(3 * i + 1));
+ }
+}
+
+const TFile& TRawJobContext::GetInputFile() const
+{
+ return InputFile_;
+}
+
+const TVector<TFile>& TRawJobContext::GetOutputFileList() const
+{
+ return OutputFileList_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TUserJobSpec& TUserJobSpec::AddLocalFile(
+ const TLocalFilePath& path,
+ const TAddLocalFileOptions& options)
+{
+ LocalFiles_.emplace_back(path, options);
+ return *this;
+}
+
+TUserJobSpec& TUserJobSpec::JobBinaryLocalPath(TString path, TMaybe<TString> md5)
+{
+ JobBinary_ = TJobBinaryLocalPath{path, md5};
+ return *this;
+}
+
+TUserJobSpec& TUserJobSpec::JobBinaryCypressPath(TString path, TMaybe<TTransactionId> transactionId)
+{
+ JobBinary_ = TJobBinaryCypressPath{path, transactionId};
+ return *this;
+}
+
+const TJobBinaryConfig& TUserJobSpec::GetJobBinary() const
+{
+ return JobBinary_;
+}
+
+TVector<std::tuple<TLocalFilePath, TAddLocalFileOptions>> TUserJobSpec::GetLocalFiles() const
+{
+ return LocalFiles_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TJobOperationPreparer::TInputGroup::TInputGroup(TJobOperationPreparer& preparer, TVector<int> indices)
+ : Preparer_(preparer)
+ , Indices_(std::move(indices))
+{ }
+
+TJobOperationPreparer::TInputGroup& TJobOperationPreparer::TInputGroup::ColumnRenaming(const THashMap<TString, TString>& renaming)
+{
+ for (auto i : Indices_) {
+ Preparer_.InputColumnRenaming(i, renaming);
+ }
+ return *this;
+}
+
+TJobOperationPreparer::TInputGroup& TJobOperationPreparer::TInputGroup::ColumnFilter(const TVector<TString>& columns)
+{
+ for (auto i : Indices_) {
+ Preparer_.InputColumnFilter(i, columns);
+ }
+ return *this;
+}
+
+TJobOperationPreparer& TJobOperationPreparer::TInputGroup::EndInputGroup()
+{
+ return Preparer_;
+}
+
+TJobOperationPreparer::TOutputGroup::TOutputGroup(TJobOperationPreparer& preparer, TVector<int> indices)
+ : Preparer_(preparer)
+ , Indices_(std::move(indices))
+{ }
+
+TJobOperationPreparer::TOutputGroup& TJobOperationPreparer::TOutputGroup::Schema(const TTableSchema &schema)
+{
+ for (auto i : Indices_) {
+ Preparer_.OutputSchema(i, schema);
+ }
+ return *this;
+}
+
+TJobOperationPreparer::TOutputGroup& TJobOperationPreparer::TOutputGroup::NoSchema()
+{
+ for (auto i : Indices_) {
+ Preparer_.NoOutputSchema(i);
+ }
+ return *this;
+}
+
+TJobOperationPreparer& TJobOperationPreparer::TOutputGroup::EndOutputGroup()
+{
+ return Preparer_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TJobOperationPreparer::TJobOperationPreparer(const IOperationPreparationContext& context)
+ : Context_(context)
+ , OutputSchemas_(context.GetOutputCount())
+ , InputColumnRenamings_(context.GetInputCount())
+ , InputColumnFilters_(context.GetInputCount())
+ , InputTableDescriptions_(context.GetInputCount())
+ , OutputTableDescriptions_(context.GetOutputCount())
+{ }
+
+TJobOperationPreparer::TInputGroup TJobOperationPreparer::BeginInputGroup(int begin, int end)
+{
+ Y_ENSURE_EX(begin <= end, TApiUsageError()
+ << "BeginInputGroup(): begin must not exceed end, got " << begin << ", " << end);
+ TVector<int> indices;
+ for (int i = begin; i < end; ++i) {
+ ValidateInputTableIndex(i, TStringBuf("BeginInputGroup()"));
+ indices.push_back(i);
+ }
+ return TInputGroup(*this, std::move(indices));
+}
+
+
+TJobOperationPreparer::TOutputGroup TJobOperationPreparer::BeginOutputGroup(int begin, int end)
+{
+ Y_ENSURE_EX(begin <= end, TApiUsageError()
+ << "BeginOutputGroup(): begin must not exceed end, got " << begin << ", " << end);
+ TVector<int> indices;
+ for (int i = begin; i < end; ++i) {
+ ValidateOutputTableIndex(i, TStringBuf("BeginOutputGroup()"));
+ indices.push_back(i);
+ }
+ return TOutputGroup(*this, std::move(indices));
+}
+
+TJobOperationPreparer& TJobOperationPreparer::NodeOutput(int tableIndex)
+{
+ ValidateMissingOutputDescription(tableIndex);
+ OutputTableDescriptions_[tableIndex] = StructuredTableDescription<TNode>();
+ return *this;
+}
+
+TJobOperationPreparer& TJobOperationPreparer::OutputSchema(int tableIndex, TTableSchema schema)
+{
+ ValidateMissingOutputSchema(tableIndex);
+ OutputSchemas_[tableIndex] = std::move(schema);
+ return *this;
+}
+
+TJobOperationPreparer& TJobOperationPreparer::NoOutputSchema(int tableIndex)
+{
+ ValidateMissingOutputSchema(tableIndex);
+ OutputSchemas_[tableIndex] = EmptyNonstrictSchema();
+ return *this;
+}
+
+TJobOperationPreparer& TJobOperationPreparer::InputColumnRenaming(
+ int tableIndex,
+ const THashMap<TString,TString>& renaming)
+{
+ ValidateInputTableIndex(tableIndex, TStringBuf("InputColumnRenaming()"));
+ InputColumnRenamings_[tableIndex] = renaming;
+ return *this;
+}
+
+TJobOperationPreparer& TJobOperationPreparer::InputColumnFilter(int tableIndex, const TVector<TString>& columns)
+{
+ ValidateInputTableIndex(tableIndex, TStringBuf("InputColumnFilter()"));
+ InputColumnFilters_[tableIndex] = columns;
+ return *this;
+}
+
+TJobOperationPreparer& TJobOperationPreparer::FormatHints(TUserJobFormatHints newFormatHints)
+{
+ FormatHints_ = newFormatHints;
+ return *this;
+}
+
+void TJobOperationPreparer::Finish()
+{
+ FinallyValidate();
+}
+
+TVector<TTableSchema> TJobOperationPreparer::GetOutputSchemas()
+{
+ TVector<TTableSchema> result;
+ result.reserve(OutputSchemas_.size());
+ for (auto& schema : OutputSchemas_) {
+ Y_VERIFY(schema.Defined());
+ result.push_back(std::move(*schema));
+ schema.Clear();
+ }
+ return result;
+}
+
+void TJobOperationPreparer::FinallyValidate() const
+{
+ TVector<int> illegallyMissingSchemaIndices;
+ for (int i = 0; i < static_cast<int>(OutputSchemas_.size()); ++i) {
+ if (!OutputSchemas_[i]) {
+ illegallyMissingSchemaIndices.push_back(i);
+ }
+ }
+ if (illegallyMissingSchemaIndices.empty()) {
+ return;
+ }
+ TApiUsageError error;
+ error << "Output table schemas are missing: ";
+ for (auto i : illegallyMissingSchemaIndices) {
+ error << "no. " << i;
+ if (auto path = Context_.GetInputPath(i)) {
+ error << "(" << *path << ")";
+ }
+ error << "; ";
+ }
+ ythrow std::move(error);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void TJobOperationPreparer::ValidateInputTableIndex(int tableIndex, TStringBuf message) const
+{
+ Y_ENSURE_EX(
+ 0 <= tableIndex && tableIndex < static_cast<int>(Context_.GetInputCount()),
+ TApiUsageError() <<
+ message << ": input table index " << tableIndex << " us out of range [0;" <<
+ OutputSchemas_.size() << ")");
+}
+
+void TJobOperationPreparer::ValidateOutputTableIndex(int tableIndex, TStringBuf message) const
+{
+ Y_ENSURE_EX(
+ 0 <= tableIndex && tableIndex < static_cast<int>(Context_.GetOutputCount()),
+ TApiUsageError() <<
+ message << ": output table index " << tableIndex << " us out of range [0;" <<
+ OutputSchemas_.size() << ")");
+}
+
+void TJobOperationPreparer::ValidateMissingOutputSchema(int tableIndex) const
+{
+ ValidateOutputTableIndex(tableIndex, "ValidateMissingOutputSchema()");
+ Y_ENSURE_EX(!OutputSchemas_[tableIndex],
+ TApiUsageError() <<
+ "Output table schema no. " << tableIndex << " " <<
+ "(" << Context_.GetOutputPath(tableIndex).GetOrElse("<unknown path>") << ") " <<
+ "is already set");
+}
+
+void TJobOperationPreparer::ValidateMissingInputDescription(int tableIndex) const
+{
+ ValidateInputTableIndex(tableIndex, "ValidateMissingInputDescription()");
+ Y_ENSURE_EX(!InputTableDescriptions_[tableIndex],
+ TApiUsageError() <<
+ "Description for input no. " << tableIndex << " " <<
+ "(" << Context_.GetOutputPath(tableIndex).GetOrElse("<unknown path>") << ") " <<
+ "is already set");
+}
+
+void TJobOperationPreparer::ValidateMissingOutputDescription(int tableIndex) const
+{
+ ValidateOutputTableIndex(tableIndex, "ValidateMissingOutputDescription()");
+ Y_ENSURE_EX(!OutputTableDescriptions_[tableIndex],
+ TApiUsageError() <<
+ "Description for output no. " << tableIndex << " " <<
+ "(" << Context_.GetOutputPath(tableIndex).GetOrElse("<unknown path>") << ") " <<
+ "is already set");
+}
+
+TTableSchema TJobOperationPreparer::EmptyNonstrictSchema() {
+ return TTableSchema().Strict(false);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+const TVector<THashMap<TString, TString>>& TJobOperationPreparer::GetInputColumnRenamings() const
+{
+ return InputColumnRenamings_;
+}
+
+const TVector<TMaybe<TVector<TString>>>& TJobOperationPreparer::GetInputColumnFilters() const
+{
+ return InputColumnFilters_;
+}
+
+const TVector<TMaybe<TTableStructure>>& TJobOperationPreparer::GetInputDescriptions() const
+{
+ return InputTableDescriptions_;
+}
+
+const TVector<TMaybe<TTableStructure>>& TJobOperationPreparer::GetOutputDescriptions() const
+{
+ return OutputTableDescriptions_;
+}
+
+const TUserJobFormatHints& TJobOperationPreparer::GetFormatHints() const
+{
+ return FormatHints_;
+}
+
+TJobOperationPreparer& TJobOperationPreparer::InputFormatHints(TFormatHints hints)
+{
+ FormatHints_.InputFormatHints(hints);
+ return *this;
+}
+
+TJobOperationPreparer& TJobOperationPreparer::OutputFormatHints(TFormatHints hints)
+{
+ FormatHints_.OutputFormatHints(hints);
+ return *this;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void IJob::PrepareOperation(const IOperationPreparationContext& context, TJobOperationPreparer& resultBuilder) const
+{
+ for (int i = 0; i < context.GetOutputCount(); ++i) {
+ resultBuilder.NoOutputSchema(i);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+IOperationPtr IOperationClient::Map(
+ const TMapOperationSpec& spec,
+ ::TIntrusivePtr<IMapperBase> mapper,
+ const TOperationOptions& options)
+{
+ Y_VERIFY(mapper.Get());
+
+ return DoMap(
+ spec,
+ std::move(mapper),
+ options);
+}
+
+IOperationPtr IOperationClient::Map(
+ ::TIntrusivePtr<IMapperBase> mapper,
+ const TOneOrMany<TStructuredTablePath>& input,
+ const TOneOrMany<TStructuredTablePath>& output,
+ const TMapOperationSpec& spec,
+ const TOperationOptions& options)
+{
+ Y_ENSURE_EX(spec.Inputs_.empty(),
+ TApiUsageError() << "TMapOperationSpec::Inputs MUST be empty");
+ Y_ENSURE_EX(spec.Outputs_.empty(),
+ TApiUsageError() << "TMapOperationSpec::Outputs MUST be empty");
+
+ auto mapSpec = spec;
+ for (const auto& inputPath : input.Parts_) {
+ mapSpec.AddStructuredInput(inputPath);
+ }
+ for (const auto& outputPath : output.Parts_) {
+ mapSpec.AddStructuredOutput(outputPath);
+ }
+ return Map(mapSpec, std::move(mapper), options);
+}
+
+IOperationPtr IOperationClient::Reduce(
+ const TReduceOperationSpec& spec,
+ ::TIntrusivePtr<IReducerBase> reducer,
+ const TOperationOptions& options)
+{
+ Y_VERIFY(reducer.Get());
+
+ return DoReduce(
+ spec,
+ std::move(reducer),
+ options);
+}
+
+IOperationPtr IOperationClient::Reduce(
+ ::TIntrusivePtr<IReducerBase> reducer,
+ const TOneOrMany<TStructuredTablePath>& input,
+ const TOneOrMany<TStructuredTablePath>& output,
+ const TSortColumns& reduceBy,
+ const TReduceOperationSpec& spec,
+ const TOperationOptions& options)
+{
+ Y_ENSURE_EX(spec.Inputs_.empty(),
+ TApiUsageError() << "TReduceOperationSpec::Inputs MUST be empty");
+ Y_ENSURE_EX(spec.Outputs_.empty(),
+ TApiUsageError() << "TReduceOperationSpec::Outputs MUST be empty");
+ Y_ENSURE_EX(spec.ReduceBy_.Parts_.empty(),
+ TApiUsageError() << "TReduceOperationSpec::ReduceBy MUST be empty");
+
+ auto reduceSpec = spec;
+ for (const auto& inputPath : input.Parts_) {
+ reduceSpec.AddStructuredInput(inputPath);
+ }
+ for (const auto& outputPath : output.Parts_) {
+ reduceSpec.AddStructuredOutput(outputPath);
+ }
+ reduceSpec.ReduceBy(reduceBy);
+ return Reduce(reduceSpec, std::move(reducer), options);
+}
+
+IOperationPtr IOperationClient::JoinReduce(
+ const TJoinReduceOperationSpec& spec,
+ ::TIntrusivePtr<IReducerBase> reducer,
+ const TOperationOptions& options)
+{
+ Y_VERIFY(reducer.Get());
+
+ return DoJoinReduce(
+ spec,
+ std::move(reducer),
+ options);
+}
+
+IOperationPtr IOperationClient::MapReduce(
+ const TMapReduceOperationSpec& spec,
+ ::TIntrusivePtr<IMapperBase> mapper,
+ ::TIntrusivePtr<IReducerBase> reducer,
+ const TOperationOptions& options)
+{
+ Y_VERIFY(reducer.Get());
+
+ return DoMapReduce(
+ spec,
+ std::move(mapper),
+ nullptr,
+ std::move(reducer),
+ options);
+}
+
+IOperationPtr IOperationClient::MapReduce(
+ const TMapReduceOperationSpec& spec,
+ ::TIntrusivePtr<IMapperBase> mapper,
+ ::TIntrusivePtr<IReducerBase> reduceCombiner,
+ ::TIntrusivePtr<IReducerBase> reducer,
+ const TOperationOptions& options)
+{
+ Y_VERIFY(reducer.Get());
+
+ return DoMapReduce(
+ spec,
+ std::move(mapper),
+ std::move(reduceCombiner),
+ std::move(reducer),
+ options);
+}
+
+IOperationPtr IOperationClient::MapReduce(
+ ::TIntrusivePtr<IMapperBase> mapper,
+ ::TIntrusivePtr<IReducerBase> reducer,
+ const TOneOrMany<TStructuredTablePath>& input,
+ const TOneOrMany<TStructuredTablePath>& output,
+ const TSortColumns& reduceBy,
+ TMapReduceOperationSpec spec,
+ const TOperationOptions& options)
+{
+ Y_ENSURE_EX(spec.Inputs_.empty(),
+ TApiUsageError() << "TMapReduceOperationSpec::Inputs MUST be empty");
+ Y_ENSURE_EX(spec.Outputs_.empty(),
+ TApiUsageError() << "TMapReduceOperationSpec::Outputs MUST be empty");
+ Y_ENSURE_EX(spec.ReduceBy_.Parts_.empty(),
+ TApiUsageError() << "TMapReduceOperationSpec::ReduceBy MUST be empty");
+
+ for (const auto& inputPath : input.Parts_) {
+ spec.AddStructuredInput(inputPath);
+ }
+ for (const auto& outputPath : output.Parts_) {
+ spec.AddStructuredOutput(outputPath);
+ }
+ spec.ReduceBy(reduceBy);
+ return MapReduce(spec, std::move(mapper), std::move(reducer), options);
+}
+
+IOperationPtr IOperationClient::MapReduce(
+ ::TIntrusivePtr<IMapperBase> mapper,
+ ::TIntrusivePtr<IReducerBase> reduceCombiner,
+ ::TIntrusivePtr<IReducerBase> reducer,
+ const TOneOrMany<TStructuredTablePath>& input,
+ const TOneOrMany<TStructuredTablePath>& output,
+ const TSortColumns& reduceBy,
+ TMapReduceOperationSpec spec,
+ const TOperationOptions& options)
+{
+ Y_ENSURE_EX(spec.Inputs_.empty(),
+ TApiUsageError() << "TMapReduceOperationSpec::Inputs MUST be empty");
+ Y_ENSURE_EX(spec.Outputs_.empty(),
+ TApiUsageError() << "TMapReduceOperationSpec::Outputs MUST be empty");
+ Y_ENSURE_EX(spec.ReduceBy_.Parts_.empty(),
+ TApiUsageError() << "TMapReduceOperationSpec::ReduceBy MUST be empty");
+
+ for (const auto& inputPath : input.Parts_) {
+ spec.AddStructuredInput(inputPath);
+ }
+ for (const auto& outputPath : output.Parts_) {
+ spec.AddStructuredOutput(outputPath);
+ }
+ spec.ReduceBy(reduceBy);
+ return MapReduce(spec, std::move(mapper), std::move(reduceCombiner), std::move(reducer), options);
+}
+
+IOperationPtr IOperationClient::Sort(
+ const TOneOrMany<TRichYPath>& input,
+ const TRichYPath& output,
+ const TSortColumns& sortBy,
+ const TSortOperationSpec& spec,
+ const TOperationOptions& options)
+{
+ Y_ENSURE_EX(spec.Inputs_.empty(),
+ TApiUsageError() << "TSortOperationSpec::Inputs MUST be empty");
+ Y_ENSURE_EX(spec.Output_.Path_.empty(),
+ TApiUsageError() << "TSortOperationSpec::Output MUST be empty");
+ Y_ENSURE_EX(spec.SortBy_.Parts_.empty(),
+ TApiUsageError() << "TSortOperationSpec::SortBy MUST be empty");
+
+ auto sortSpec = spec;
+ for (const auto& inputPath : input.Parts_) {
+ sortSpec.AddInput(inputPath);
+ }
+ sortSpec.Output(output);
+ sortSpec.SortBy(sortBy);
+ return Sort(sortSpec, options);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TRawTableReaderPtr IStructuredJob::CreateCustomRawJobReader(int) const
+{
+ return nullptr;
+}
+
+THolder<IProxyOutput> IStructuredJob::CreateCustomRawJobWriter(size_t) const
+{
+ return nullptr;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/operation.h b/yt/cpp/mapreduce/interface/operation.h
new file mode 100644
index 0000000000..171a7e4af7
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/operation.h
@@ -0,0 +1,3494 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/operation.h
+///
+/// Header containing interface to run operations in YT
+/// and retrieve information about them.
+/// @see [the doc](https://yt.yandex-team.ru/docs/description/mr/map_reduce_overview.html).
+
+#include "client_method_options.h"
+#include "errors.h"
+#include "io.h"
+#include "job_statistics.h"
+#include "job_counters.h"
+
+#include <library/cpp/threading/future/future.h>
+#include <library/cpp/type_info/type_info.h>
+
+#include <util/datetime/base.h>
+#include <util/generic/variant.h>
+#include <util/generic/vector.h>
+#include <util/generic/maybe.h>
+#include <util/system/file.h>
+#include <util/system/types.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Tag class marking that the row type for table is not specified.
+struct TUnspecifiedTableStructure
+{ };
+
+/// Tag class marking that table rows have protobuf type.
+struct TProtobufTableStructure
+{
+ /// @brief Descriptor of the protobuf type of table rows.
+ ///
+ /// @note If table is tagged with @ref ::google::protobuf::Message instead of real proto class
+ /// this descriptor might be null.
+ const ::google::protobuf::Descriptor* Descriptor = nullptr;
+};
+
+
+/// Tag class to specify table row type.
+using TTableStructure = std::variant<
+ TUnspecifiedTableStructure,
+ TProtobufTableStructure
+>;
+
+bool operator==(const TUnspecifiedTableStructure&, const TUnspecifiedTableStructure&);
+bool operator==(const TProtobufTableStructure& lhs, const TProtobufTableStructure& rhs);
+
+/// Table path marked with @ref NYT::TTableStructure tag.
+struct TStructuredTablePath
+{
+ TStructuredTablePath(TRichYPath richYPath = TRichYPath(), TTableStructure description = TUnspecifiedTableStructure())
+ : RichYPath(std::move(richYPath))
+ , Description(std::move(description))
+ { }
+
+ TStructuredTablePath(TRichYPath richYPath, const ::google::protobuf::Descriptor* descriptor)
+ : RichYPath(std::move(richYPath))
+ , Description(TProtobufTableStructure({descriptor}))
+ { }
+
+ TStructuredTablePath(TYPath path)
+ : RichYPath(std::move(path))
+ , Description(TUnspecifiedTableStructure())
+ { }
+
+ TStructuredTablePath(const char* path)
+ : RichYPath(path)
+ , Description(TUnspecifiedTableStructure())
+ { }
+
+ TRichYPath RichYPath;
+ TTableStructure Description;
+};
+
+/// Create marked table path from row type.
+template <typename TRow>
+TStructuredTablePath Structured(TRichYPath richYPath);
+
+/// Create tag class from row type.
+template <typename TRow>
+TTableStructure StructuredTableDescription();
+
+///////////////////////////////////////////////////////////////////////////////
+
+/// Tag class marking that row stream is empty.
+struct TVoidStructuredRowStream
+{ };
+
+/// Tag class marking that row stream consists of `NYT::TNode`.
+struct TTNodeStructuredRowStream
+{ };
+
+/// Tag class marking that row stream consists of @ref NYT::TYaMRRow.
+struct TTYaMRRowStructuredRowStream
+{ };
+
+/// Tag class marking that row stream consists of protobuf rows of given type.
+struct TProtobufStructuredRowStream
+{
+ /// @brief Descriptor of the protobuf type of table rows.
+ ///
+ /// @note If `Descriptor` is nullptr, then row stream consists of multiple message types.
+ const ::google::protobuf::Descriptor* Descriptor = nullptr;
+};
+
+/// Tag class to specify type of rows in an operation row stream
+using TStructuredRowStreamDescription = std::variant<
+ TVoidStructuredRowStream,
+ TTNodeStructuredRowStream,
+ TTYaMRRowStructuredRowStream,
+ TProtobufStructuredRowStream
+>;
+
+///////////////////////////////////////////////////////////////////////////////
+
+/// Tag class marking that current binary should be used in operation.
+struct TJobBinaryDefault
+{ };
+
+/// Tag class marking that binary from specified local path should be used in operation.
+struct TJobBinaryLocalPath
+{
+ TString Path;
+ TMaybe<TString> MD5CheckSum;
+};
+
+/// Tag class marking that binary from specified Cypress path should be used in operation.
+struct TJobBinaryCypressPath
+{
+ TYPath Path;
+ TMaybe<TTransactionId> TransactionId;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+/// @cond Doxygen_Suppress
+namespace NDetail {
+ extern i64 OutputTableCount;
+} // namespace NDetail
+/// @endcond
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Auto merge mode.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/automerge
+enum class EAutoMergeMode
+{
+ /// Auto merge is disabled.
+ Disabled /* "disabled" */,
+
+ /// Mode that tries to achieve good chunk sizes and doesn't limit usage of chunk quota for intermediate chunks.
+ Relaxed /* "relaxed" */,
+
+ /// Mode that tries to optimize usage of chunk quota for intermediate chunks, operation might run slower.
+ Economy /* "economy" */,
+
+ ///
+ /// @brief Manual configuration of automerge parameters.
+ ///
+ /// @ref TAutoMergeSpec
+ Manual /* "manual" */,
+};
+
+///
+/// @brief Options for auto merge operation stage.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/automerge
+class TAutoMergeSpec
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TAutoMergeSpec;
+ /// @endcond
+
+ /// Mode of the auto merge.
+ FLUENT_FIELD_OPTION(EAutoMergeMode, Mode);
+
+ /// @brief Upper limit for number of intermediate chunks.
+ ///
+ /// Works only for Manual mode.
+ FLUENT_FIELD_OPTION(i64, MaxIntermediateChunkCount);
+
+ /// @brief Number of chunks limit to merge in one job.
+ ///
+ /// Works only for Manual mode.
+ FLUENT_FIELD_OPTION(i64, ChunkCountPerMergeJob);
+
+ /// @brief Automerge will not merge chunks that are larger than `DesiredChunkSize * (ChunkSizeThreshold / 100.)`
+ ///
+ /// Works only for Manual mode.
+ FLUENT_FIELD_OPTION(i64, ChunkSizeThreshold);
+};
+
+/// Base for operations with auto merge options.
+template <class TDerived>
+class TWithAutoMergeSpec
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ /// @brief Options for auto merge operation stage.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/automerge
+ FLUENT_FIELD_OPTION(TAutoMergeSpec, AutoMerge);
+};
+
+///
+/// @brief Resources controlled by scheduler and used by running operations.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/scheduler/scheduler_and_pools#resursy
+class TSchedulerResources
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TSchedulerResources;
+ /// @endcond
+
+ /// Each job consumes exactly one user slot.
+ FLUENT_FIELD_OPTION_ENCAPSULATED(i64, UserSlots);
+
+ /// Number of (virtual) cpu cores consumed by all jobs.
+ FLUENT_FIELD_OPTION_ENCAPSULATED(i64, Cpu);
+
+ /// Amount of memory in bytes.
+ FLUENT_FIELD_OPTION_ENCAPSULATED(i64, Memory);
+};
+
+/// Base for input format hints of a user job.
+template <class TDerived>
+class TUserJobInputFormatHintsBase
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ /// @brief Fine tune input format of the job.
+ FLUENT_FIELD_OPTION(TFormatHints, InputFormatHints);
+};
+
+/// Base for output format hints of a user job.
+template <class TDerived>
+class TUserJobOutputFormatHintsBase
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ /// @brief Fine tune output format of the job.
+ FLUENT_FIELD_OPTION(TFormatHints, OutputFormatHints);
+};
+
+/// Base for format hints of a user job.
+template <class TDerived>
+class TUserJobFormatHintsBase
+ : public TUserJobInputFormatHintsBase<TDerived>
+ , public TUserJobOutputFormatHintsBase<TDerived>
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+};
+
+/// User job format hints.
+class TUserJobFormatHints
+ : public TUserJobFormatHintsBase<TUserJobFormatHints>
+{ };
+
+/// Spec of input and output tables of a raw operation.
+template <class TDerived>
+class TRawOperationIoTableSpec
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ /// Add input table path to input path list.
+ TDerived& AddInput(const TRichYPath& path);
+
+ /// Set input table path no. `tableIndex`.
+ TDerived& SetInput(size_t tableIndex, const TRichYPath& path);
+
+ /// Add output table path to output path list.
+ TDerived& AddOutput(const TRichYPath& path);
+
+ /// Set output table path no. `tableIndex`.
+ TDerived& SetOutput(size_t tableIndex, const TRichYPath& path);
+
+ /// Get all input table paths.
+ const TVector<TRichYPath>& GetInputs() const;
+
+ /// Get all output table paths.
+ const TVector<TRichYPath>& GetOutputs() const;
+
+private:
+ TVector<TRichYPath> Inputs_;
+ TVector<TRichYPath> Outputs_;
+};
+
+/// Base spec for IO in "simple" raw operations (Map, Reduce etc.).
+template <class TDerived>
+struct TSimpleRawOperationIoSpec
+ : public TRawOperationIoTableSpec<TDerived>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ /// @brief Describes format for both input and output.
+ ///
+ /// @note `Format' is overriden by `InputFormat' and `OutputFormat'.
+ FLUENT_FIELD_OPTION(TFormat, Format);
+
+ /// Describes input format.
+ FLUENT_FIELD_OPTION(TFormat, InputFormat);
+
+ /// Describes output format.
+ FLUENT_FIELD_OPTION(TFormat, OutputFormat);
+};
+
+/// Spec for IO in MapReduce operation.
+template <class TDerived>
+class TRawMapReduceOperationIoSpec
+ : public TRawOperationIoTableSpec<TDerived>
+{
+public:
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ /// @brief Describes format for both input and output of mapper.
+ ///
+ /// @note `MapperFormat' is overriden by `MapperInputFormat' and `MapperOutputFormat'.
+ FLUENT_FIELD_OPTION(TFormat, MapperFormat);
+
+ /// Describes mapper input format.
+ FLUENT_FIELD_OPTION(TFormat, MapperInputFormat);
+
+ /// Describes mapper output format.
+ FLUENT_FIELD_OPTION(TFormat, MapperOutputFormat);
+
+ /// @brief Describes format for both input and output of reduce combiner.
+ ///
+ /// @note `ReduceCombinerFormat' is overriden by `ReduceCombinerInputFormat' and `ReduceCombinerOutputFormat'.
+ FLUENT_FIELD_OPTION(TFormat, ReduceCombinerFormat);
+
+ /// Describes reduce combiner input format.
+ FLUENT_FIELD_OPTION(TFormat, ReduceCombinerInputFormat);
+
+ /// Describes reduce combiner output format.
+ FLUENT_FIELD_OPTION(TFormat, ReduceCombinerOutputFormat);
+
+ /// @brief Describes format for both input and output of reducer.
+ ///
+ /// @note `ReducerFormat' is overriden by `ReducerInputFormat' and `ReducerOutputFormat'.
+ FLUENT_FIELD_OPTION(TFormat, ReducerFormat);
+
+ /// Describes reducer input format.
+ FLUENT_FIELD_OPTION(TFormat, ReducerInputFormat);
+
+ /// Describes reducer output format.
+ FLUENT_FIELD_OPTION(TFormat, ReducerOutputFormat);
+
+ /// Add direct map output table path.
+ TDerived& AddMapOutput(const TRichYPath& path);
+
+ /// Set direct map output table path no. `tableIndex`.
+ TDerived& SetMapOutput(size_t tableIndex, const TRichYPath& path);
+
+ /// Get all direct map output table paths
+ const TVector<TRichYPath>& GetMapOutputs() const;
+
+private:
+ TVector<TRichYPath> MapOutputs_;
+};
+
+///
+/// @brief Base spec of operations with input tables.
+class TOperationInputSpecBase
+{
+public:
+ template <class T, class = void>
+ struct TFormatAdder;
+
+ ///
+ /// @brief Add input table path to input path list and specify type of rows.
+ template <class T>
+ void AddInput(const TRichYPath& path);
+
+ ///
+ /// @brief Add input table path as structured paths.
+ void AddStructuredInput(TStructuredTablePath path);
+
+ ///
+ /// @brief Set input table path and type.
+ template <class T>
+ void SetInput(size_t tableIndex, const TRichYPath& path);
+
+ ///
+ /// @brief All input paths.
+ TVector<TRichYPath> Inputs_;
+
+ ///
+ /// @brief Get all input structured paths.
+ const TVector<TStructuredTablePath>& GetStructuredInputs() const;
+
+private:
+ TVector<TStructuredTablePath> StructuredInputs_;
+ friend struct TOperationIOSpecBase;
+ template <class T>
+ friend struct TOperationIOSpec;
+};
+
+///
+/// @brief Base spec of operations with output tables.
+class TOperationOutputSpecBase
+{
+public:
+ template <class T, class = void>
+ struct TFormatAdder;
+
+ ///
+ /// @brief Add output table path to output path list and specify type of rows.
+ template <class T>
+ void AddOutput(const TRichYPath& path);
+
+ ///
+ /// @brief Add output table path as structured paths.
+ void AddStructuredOutput(TStructuredTablePath path);
+
+ ///
+ /// @brief Set output table path and type.
+ template <class T>
+ void SetOutput(size_t tableIndex, const TRichYPath& path);
+
+ ///
+ /// @brief All output paths.
+ TVector<TRichYPath> Outputs_;
+
+ ///
+ /// @brief Get all output structured paths.
+ const TVector<TStructuredTablePath>& GetStructuredOutputs() const;
+
+private:
+ TVector<TStructuredTablePath> StructuredOutputs_;
+ friend struct TOperationIOSpecBase;
+ template <class T>
+ friend struct TOperationIOSpec;
+};
+
+///
+/// @brief Base spec for operations with inputs and outputs.
+struct TOperationIOSpecBase
+ : public TOperationInputSpecBase
+ , public TOperationOutputSpecBase
+{ };
+
+///
+/// @brief Base spec for operations with inputs and outputs.
+template <class TDerived>
+struct TOperationIOSpec
+ : public TOperationIOSpecBase
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ template <class T>
+ TDerived& AddInput(const TRichYPath& path);
+
+ TDerived& AddStructuredInput(TStructuredTablePath path);
+
+ template <class T>
+ TDerived& SetInput(size_t tableIndex, const TRichYPath& path);
+
+ template <class T>
+ TDerived& AddOutput(const TRichYPath& path);
+
+ TDerived& AddStructuredOutput(TStructuredTablePath path);
+
+ template <class T>
+ TDerived& SetOutput(size_t tableIndex, const TRichYPath& path);
+
+
+ // DON'T USE THESE METHODS! They are left solely for backward compatibility.
+ // These methods are the only way to do equivalent of (Add/Set)(Input/Output)<Message>
+ // but please consider using (Add/Set)(Input/Output)<TConcreteMessage>
+ // (where TConcreteMessage is some descendant of Message)
+ // because they are faster and better (see https://st.yandex-team.ru/YT-6967)
+ TDerived& AddProtobufInput_VerySlow_Deprecated(const TRichYPath& path);
+ TDerived& AddProtobufOutput_VerySlow_Deprecated(const TRichYPath& path);
+};
+
+///
+/// @brief Base spec for all operations.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/operations_options
+template <class TDerived>
+struct TOperationSpecBase
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ ///
+ /// @brief Limit on operation execution time.
+ ///
+ /// If operation doesn't finish in time it will be aborted.
+ FLUENT_FIELD_OPTION(TDuration, TimeLimit);
+
+ /// @brief Title to be shown in web interface.
+ FLUENT_FIELD_OPTION(TString, Title);
+
+ /// @brief Pool to be used for this operation.
+ FLUENT_FIELD_OPTION(TString, Pool);
+
+ /// @brief Weight of operation.
+ ///
+ /// Coefficient defining how much resources operation gets relative to its siblings in the same pool.
+ FLUENT_FIELD_OPTION(double, Weight);
+
+ /// @breif Pool tree list that operation will use.
+ FLUENT_OPTIONAL_VECTOR_FIELD_ENCAPSULATED(TString, PoolTree);
+
+ /// How much resources can be consumed by operation.
+ FLUENT_FIELD_OPTION_ENCAPSULATED(TSchedulerResources, ResourceLimits);
+};
+
+///
+/// @brief Base spec for all operations with user jobs.
+template <class TDerived>
+struct TUserOperationSpecBase
+ : TOperationSpecBase<TDerived>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ /// How many jobs can fail before operation is failed.
+ FLUENT_FIELD_OPTION(ui64, MaxFailedJobCount);
+
+ /// On any unsuccessful job completion (i.e. abortion or failure) force the whole operation to fail.
+ FLUENT_FIELD_OPTION(bool, FailOnJobRestart);
+
+ ///
+ /// @brief Table to save whole stderr of operation.
+ ///
+ /// @see https://clubs.at.yandex-team.ru/yt/1045
+ FLUENT_FIELD_OPTION(TYPath, StderrTablePath);
+
+ ///
+ /// @brief Table to save coredumps of operation.
+ ///
+ /// @see https://clubs.at.yandex-team.ru/yt/1045
+ FLUENT_FIELD_OPTION(TYPath, CoreTablePath);
+
+ ///
+ /// @brief How long should the scheduler wait for the job to be started on a node.
+ ///
+ /// When you run huge jobs that require preemption of all the other jobs on
+ /// a node, the default timeout might be insufficient and your job may be
+ /// aborted with 'waiting_timeout' reason. This is especially problematic
+ /// when you are setting 'FailOnJobRestart' option.
+ ///
+ /// @note The value must be between 10 seconds and 10 minutes.
+ FLUENT_FIELD_OPTION(TDuration, WaitingJobTimeout);
+};
+
+///
+/// @brief Class to provide information on intermediate mapreduce stream protobuf types.
+///
+/// When using protobuf format it is important to know exact types of proto messages
+/// that are used in input/output.
+///
+/// Sometimes such messages cannot be derived from job class
+/// i.e. when job class uses `NYT::TTableReader<::google::protobuf::Message>`
+/// or `NYT::TTableWriter<::google::protobuf::Message>`.
+///
+/// When using such jobs user can provide exact message type using this class.
+///
+/// @note Only input/output that relate to intermediate tables can be hinted.
+/// Input to map and output of reduce is derived from `AddInput`/`AddOutput`.
+template <class TDerived>
+struct TIntermediateTablesHintSpec
+{
+ /// Specify intermediate map output type.
+ template <class T>
+ TDerived& HintMapOutput();
+
+ /// Specify reduce combiner input.
+ template <class T>
+ TDerived& HintReduceCombinerInput();
+
+ /// Specify reduce combiner output.
+ template <class T>
+ TDerived& HintReduceCombinerOutput();
+
+ /// Specify reducer input.
+ template <class T>
+ TDerived& HintReduceInput();
+
+ ///
+ /// @brief Add output of map stage.
+ ///
+ /// Mapper output table #0 is always intermediate table that is going to be reduced later.
+ /// Rows that mapper write to tables #1, #2, ... are saved in MapOutput tables.
+ template <class T>
+ TDerived& AddMapOutput(const TRichYPath& path);
+
+ TVector<TRichYPath> MapOutputs_;
+
+ const TVector<TStructuredTablePath>& GetStructuredMapOutputs() const;
+ const TMaybe<TTableStructure>& GetIntermediateMapOutputDescription() const;
+ const TMaybe<TTableStructure>& GetIntermediateReduceCombinerInputDescription() const;
+ const TMaybe<TTableStructure>& GetIntermediateReduceCombinerOutputDescription() const;
+ const TMaybe<TTableStructure>& GetIntermediateReducerInputDescription() const;
+
+private:
+ TVector<TStructuredTablePath> StructuredMapOutputs_;
+ TMaybe<TTableStructure> IntermediateMapOutputDescription_;
+ TMaybe<TTableStructure> IntermediateReduceCombinerInputDescription_;
+ TMaybe<TTableStructure> IntermediateReduceCombinerOutputDescription_;
+ TMaybe<TTableStructure> IntermediateReducerInputDescription_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TAddLocalFileOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TAddLocalFileOptions;
+ /// @endcond
+
+ ///
+ /// @brief Path by which job will see the uploaded file.
+ ///
+ /// Defaults to basename of the local path.
+ FLUENT_FIELD_OPTION(TString, PathInJob);
+
+ ///
+ /// @brief MD5 checksum of uploaded file.
+ ///
+ /// If not specified it is computed by this library.
+ /// If this argument is provided, the user can some cpu and disk IO.
+ FLUENT_FIELD_OPTION(TString, MD5CheckSum);
+
+ ///
+ /// @brief Do not put file into node cache
+ ///
+ /// @see NYT::TRichYPath::BypassArtifactCache
+ FLUENT_FIELD_OPTION(bool, BypassArtifactCache);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// @brief Binary to run job profiler on.
+enum class EProfilingBinary
+{
+ /// Profile job proxy.
+ JobProxy /* "job_proxy" */,
+
+ /// Profile user job.
+ UserJob /* "user_job" */,
+};
+
+/// @brief Type of job profiler.
+enum class EProfilerType
+{
+ /// Profile CPU usage.
+ Cpu /* "cpu" */,
+
+ /// Profile memory usage.
+ Memory /* "memory" */,
+
+ /// Profiler peak memory usage.
+ PeakMemory /* "peak_memory" */,
+};
+
+/// @brief Specifies a job profiler.
+struct TJobProfilerSpec
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TJobProfilerSpec;
+ /// @endcond
+
+ /// @brief Binary to profile.
+ FLUENT_FIELD_OPTION(EProfilingBinary, ProfilingBinary);
+
+ /// @brief Type of the profiler.
+ FLUENT_FIELD_OPTION(EProfilerType, ProfilerType);
+
+ /// @brief Probabiliy of the job being selected for profiling.
+ FLUENT_FIELD_OPTION(double, ProfilingProbability);
+
+ /// @brief For sampling profilers, sets the number of samples per second.
+ FLUENT_FIELD_OPTION(int, SamplingFrequency);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Spec of user job.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/operations_options#user_script_options
+struct TUserJobSpec
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TUserJobSpec;
+ /// @endcond
+
+ ///
+ /// @brief Specify a local file to upload to Cypress and prepare for use in job.
+ TSelf& AddLocalFile(const TLocalFilePath& path, const TAddLocalFileOptions& options = TAddLocalFileOptions());
+
+ ///
+ /// @brief Get the list of all added local files.
+ TVector<std::tuple<TLocalFilePath, TAddLocalFileOptions>> GetLocalFiles() const;
+
+ /// @brief Paths to files in Cypress to use in job.
+ FLUENT_VECTOR_FIELD(TRichYPath, File);
+
+ ///
+ /// @brief MemoryLimit specifies how much memory job process can use.
+ ///
+ /// @note
+ /// If job uses tmpfs (check @ref NYT::TOperationOptions::MountSandboxInTmpfs)
+ /// YT computes its memory usage as total of:
+ /// - memory usage of job process itself (including mapped files);
+ /// - total size of tmpfs used by this job.
+ ///
+ /// @note
+ /// When @ref NYT::TOperationOptions::MountSandboxInTmpfs is enabled library will compute
+ /// total size of all files used by this job and add this total size to MemoryLimit.
+ /// Thus you shouldn't include size of your files (e.g. binary file) into MemoryLimit.
+ ///
+ /// @note
+ /// Final memory memory_limit passed to YT is calculated as follows:
+ ///
+ /// @note
+ /// ```
+ /// memory_limit = MemoryLimit + <total-size-of-used-files> + ExtraTmpfsSize
+ /// ```
+ ///
+ /// @see NYT::TUserJobSpec::ExtraTmpfsSize
+ FLUENT_FIELD_OPTION(i64, MemoryLimit);
+
+ ///
+ /// @brief Size of data that is going to be written to tmpfs.
+ ///
+ /// This option should be used if job writes data to tmpfs.
+ ///
+ /// ExtraTmpfsSize should not include size of files specified with
+ /// @ref NYT::TUserJobSpec::AddLocalFile or @ref NYT::TUserJobSpec::AddFile
+ /// These files are copied to tmpfs automatically and their total size
+ /// is computed automatically.
+ ///
+ /// @see NYT::TOperationOptions::MountSandboxInTmpfs
+ /// @see NYT::TUserJobSpec::MemoryLimit
+ FLUENT_FIELD_OPTION(i64, ExtraTmpfsSize);
+
+ ///
+ /// @brief Maximum number of CPU cores for a single job to use.
+ FLUENT_FIELD_OPTION(double, CpuLimit);
+
+ ///
+ /// @brief Fraction of @ref NYT::TUserJobSpec::MemoryLimit that job gets at start.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/operations_options#memory_reserve_factor
+ FLUENT_FIELD_OPTION(double, MemoryReserveFactor);
+
+ ///
+ /// @brief Local path to executable to be used inside jobs.
+ ////
+ /// Provided executable must use C++ YT API library (this library)
+ /// and implement job class that is going to be used.
+ ///
+ /// This option might be useful if we want to start operation from nonlinux machines
+ /// (in that case we use `JobBinary` to provide path to the same program compiled for linux).
+ /// Other example of using this option is uploading executable to cypress in advance
+ /// and save the time required to upload current executable to cache.
+ /// `md5` argument can be used to save cpu time and disk IO when binary MD5 checksum is known.
+ /// When argument is not provided library will compute it itself.
+ TUserJobSpec& JobBinaryLocalPath(TString path, TMaybe<TString> md5 = Nothing());
+
+ ///
+ /// @brief Cypress path to executable to be used inside jobs.
+ TUserJobSpec& JobBinaryCypressPath(TString path, TMaybe<TTransactionId> transactionId = Nothing());
+
+ ///
+ /// @brief String that will be prepended to the command.
+ ///
+ /// This option overrides @ref NYT::TOperationOptions::JobCommandPrefix.
+ FLUENT_FIELD(TString, JobCommandPrefix);
+
+ ///
+ /// @brief String that will be appended to the command.
+ ///
+ /// This option overrides @ref NYT::TOperationOptions::JobCommandSuffix.
+ FLUENT_FIELD(TString, JobCommandSuffix);
+
+ ///
+ /// @brief Map of environment variables that will be set for jobs.
+ FLUENT_MAP_FIELD(TString, TString, Environment);
+
+ ///
+ /// @brief Limit for all files inside job sandbox (in bytes).
+ FLUENT_FIELD_OPTION(ui64, DiskSpaceLimit);
+
+ ///
+ /// @brief Number of ports reserved for the job (passed through environment in YT_PORT_0, YT_PORT_1, ...).
+ FLUENT_FIELD_OPTION(ui16, PortCount);
+
+ ///
+ /// @brief Network project used to isolate job network.
+ FLUENT_FIELD_OPTION(TString, NetworkProject);
+
+ ///
+ /// @brief Limit on job execution time.
+ ///
+ /// Jobs that exceed this limit will be considered failed.
+ FLUENT_FIELD_OPTION(TDuration, JobTimeLimit);
+
+ ///
+ /// @brief Get job binary config.
+ const TJobBinaryConfig& GetJobBinary() const;
+
+ ///
+ /// @brief List of profilers to run.
+ FLUENT_VECTOR_FIELD(TJobProfilerSpec, JobProfiler);
+
+private:
+ TVector<std::tuple<TLocalFilePath, TAddLocalFileOptions>> LocalFiles_;
+ TJobBinaryConfig JobBinary_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Spec of Map operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/map
+template <typename TDerived>
+struct TMapOperationSpecBase
+ : public TUserOperationSpecBase<TDerived>
+ , public TWithAutoMergeSpec<TDerived>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ ///
+ /// @brief Spec of mapper job.
+ FLUENT_FIELD(TUserJobSpec, MapperSpec);
+
+ ///
+ /// @brief Whether to guarantee the order of rows passed to mapper matches the order in the table.
+ ///
+ /// When `Ordered' is false (by default), there is no guaranties about order of reading rows.
+ /// In this case mapper might work slightly faster because row delivered from fast node can be processed YT waits
+ /// response from slow nodes.
+ /// When `Ordered' is true, rows will come in order in which they are stored in input tables.
+ FLUENT_FIELD_OPTION(bool, Ordered);
+
+ ///
+ /// @brief Recommended number of jobs to run.
+ ///
+ /// `JobCount' has higher priority than @ref NYT::TMapOperationSpecBase::DataSizePerJob.
+ /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits.
+ FLUENT_FIELD_OPTION(ui32, JobCount);
+
+ ///
+ /// @brief Recommended of data size for each job.
+ ///
+ /// `DataSizePerJob` has lower priority that @ref NYT::TMapOperationSpecBase::JobCount.
+ /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits.
+ FLUENT_FIELD_OPTION(ui64, DataSizePerJob);
+};
+
+///
+/// @brief Spec of Map operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/map
+struct TMapOperationSpec
+ : public TMapOperationSpecBase<TMapOperationSpec>
+ , public TOperationIOSpec<TMapOperationSpec>
+ , public TUserJobFormatHintsBase<TMapOperationSpec>
+{ };
+
+///
+/// @brief Spec of raw Map operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/map
+struct TRawMapOperationSpec
+ : public TMapOperationSpecBase<TRawMapOperationSpec>
+ , public TSimpleRawOperationIoSpec<TRawMapOperationSpec>
+{ };
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Spec of Reduce operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/reduce
+template <typename TDerived>
+struct TReduceOperationSpecBase
+ : public TUserOperationSpecBase<TDerived>
+ , public TWithAutoMergeSpec<TDerived>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ ///
+ /// @brief Spec of reduce job.
+ FLUENT_FIELD(TUserJobSpec, ReducerSpec);
+
+ ///
+ /// @brief Columns to sort rows by (must include `ReduceBy` as prefix).
+ FLUENT_FIELD(TSortColumns, SortBy);
+
+ ///
+ /// @brief Columns to group rows by.
+ FLUENT_FIELD(TSortColumns, ReduceBy);
+
+ ///
+ /// @brief Columns to join foreign tables by (must be prefix of `ReduceBy`).
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables
+ FLUENT_FIELD_OPTION(TSortColumns, JoinBy);
+
+ ///
+ /// @brief Guarantee to feed all rows with same `ReduceBy` columns to a single job (`true` by default).
+ FLUENT_FIELD_OPTION(bool, EnableKeyGuarantee);
+
+ ///
+ /// @brief Recommended number of jobs to run.
+ ///
+ /// `JobCount' has higher priority than @ref NYT::TReduceOperationSpecBase::DataSizePerJob.
+ /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits.
+ FLUENT_FIELD_OPTION(ui32, JobCount);
+
+ ///
+ /// @brief Recommended of data size for each job.
+ ///
+ /// `DataSizePerJob` has lower priority that @ref NYT::TReduceOperationSpecBase::JobCount.
+ /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits.
+ FLUENT_FIELD_OPTION(ui64, DataSizePerJob);
+};
+
+///
+/// @brief Spec of Reduce operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/reduce
+struct TReduceOperationSpec
+ : public TReduceOperationSpecBase<TReduceOperationSpec>
+ , public TOperationIOSpec<TReduceOperationSpec>
+ , public TUserJobFormatHintsBase<TReduceOperationSpec>
+{ };
+
+///
+/// @brief Spec of raw Reduce operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/reduce
+struct TRawReduceOperationSpec
+ : public TReduceOperationSpecBase<TRawReduceOperationSpec>
+ , public TSimpleRawOperationIoSpec<TRawReduceOperationSpec>
+{ };
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Spec of JoinReduce operation.
+///
+/// @deprecated Instead the user should run a reduce operation
+/// with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false`.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables
+template <typename TDerived>
+struct TJoinReduceOperationSpecBase
+ : public TUserOperationSpecBase<TDerived>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ ///
+ /// @brief Spec of reduce job.
+ FLUENT_FIELD(TUserJobSpec, ReducerSpec);
+
+ ///
+ /// @brief Columns to join foreign tables by (must be prefix of `ReduceBy`).
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables
+ FLUENT_FIELD(TSortColumns, JoinBy);
+
+ ///
+ /// @brief Recommended number of jobs to run.
+ ///
+ /// `JobCount' has higher priority than @ref NYT::TJoinReduceOperationSpecBase::DataSizePerJob.
+ /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits.
+ FLUENT_FIELD_OPTION(ui32, JobCount);
+
+ ///
+ /// @brief Recommended of data size for each job.
+ ///
+ /// `DataSizePerJob` has lower priority that @ref NYT::TJoinReduceOperationSpecBase::JobCount.
+ /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits.
+ FLUENT_FIELD_OPTION(ui64, DataSizePerJob);
+};
+
+///
+/// @brief Spec of JoinReduce operation.
+///
+/// @deprecated Instead the user should run a reduce operation
+/// with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false`.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables
+struct TJoinReduceOperationSpec
+ : public TJoinReduceOperationSpecBase<TJoinReduceOperationSpec>
+ , public TOperationIOSpec<TJoinReduceOperationSpec>
+ , public TUserJobFormatHintsBase<TJoinReduceOperationSpec>
+{ };
+
+///
+/// @brief Spec of raw JoinReduce operation.
+///
+/// @deprecated Instead the user should run a reduce operation
+/// with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false`.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables
+struct TRawJoinReduceOperationSpec
+ : public TJoinReduceOperationSpecBase<TRawJoinReduceOperationSpec>
+ , public TSimpleRawOperationIoSpec<TRawJoinReduceOperationSpec>
+{ };
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Spec of MapReduce operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce
+template <typename TDerived>
+struct TMapReduceOperationSpecBase
+ : public TUserOperationSpecBase<TDerived>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TDerived;
+ /// @endcond
+
+ ///
+ /// @brief Spec of map job.
+ FLUENT_FIELD(TUserJobSpec, MapperSpec);
+
+ ///
+ /// @brief Spec of reduce job.
+ FLUENT_FIELD(TUserJobSpec, ReducerSpec);
+
+ ///
+ /// @brief Spec of reduce combiner.
+ FLUENT_FIELD(TUserJobSpec, ReduceCombinerSpec);
+
+ ///
+ /// @brief Columns to sort rows by (must include `ReduceBy` as prefix).
+ FLUENT_FIELD(TSortColumns, SortBy);
+
+ ///
+ /// @brief Columns to group rows by.
+ FLUENT_FIELD(TSortColumns, ReduceBy);
+
+ ///
+ /// @brief Recommended number of map jobs to run.
+ ///
+ /// `JobCount' has higher priority than @ref NYT::TMapReduceOperationSpecBase::DataSizePerMapJob.
+ /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits.
+ FLUENT_FIELD_OPTION(ui32, MapJobCount);
+
+ ///
+ /// @brief Recommended of data size for each map job.
+ ///
+ /// `DataSizePerJob` has lower priority that @ref NYT::TMapReduceOperationSpecBase::MapJobCount.
+ /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits.
+ FLUENT_FIELD_OPTION(ui64, DataSizePerMapJob);
+
+ ///
+ /// @brief Recommended number of intermediate data partitions.
+ FLUENT_FIELD_OPTION(ui64, PartitionCount);
+
+ ///
+ /// @brief Recommended size of intermediate data partitions.
+ FLUENT_FIELD_OPTION(ui64, PartitionDataSize);
+
+ ///
+ /// @brief Account to use for intermediate data.
+ FLUENT_FIELD_OPTION(TString, IntermediateDataAccount);
+
+ ///
+ /// @brief Replication factor for intermediate data (1 by default).
+ FLUENT_FIELD_OPTION(ui64, IntermediateDataReplicationFactor);
+
+ ///
+ /// @brief Recommended size of data to be passed to a single reduce combiner.
+ FLUENT_FIELD_OPTION(ui64, DataSizePerSortJob);
+
+ ///
+ /// @brief Whether to guarantee the order of rows passed to mapper matches the order in the table.
+ ///
+ /// @see @ref NYT::TMapOperationSpec::Ordered for more info.
+ FLUENT_FIELD_OPTION(bool, Ordered);
+
+ ///
+ /// @brief Guarantee to run reduce combiner before reducer.
+ FLUENT_FIELD_OPTION(bool, ForceReduceCombiners);
+};
+
+///
+/// @brief Spec of MapReduce operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce
+struct TMapReduceOperationSpec
+ : public TMapReduceOperationSpecBase<TMapReduceOperationSpec>
+ , public TOperationIOSpec<TMapReduceOperationSpec>
+ , public TIntermediateTablesHintSpec<TMapReduceOperationSpec>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TMapReduceOperationSpec;
+ /// @endcond
+
+ ///
+ /// @brief Format hints for mapper.
+ FLUENT_FIELD_DEFAULT(TUserJobFormatHints, MapperFormatHints, TUserJobFormatHints());
+
+ ///
+ /// @brief Format hints for reducer.
+ FLUENT_FIELD_DEFAULT(TUserJobFormatHints, ReducerFormatHints, TUserJobFormatHints());
+
+ ///
+ /// @brief Format hints for reduce combiner.
+ FLUENT_FIELD_DEFAULT(TUserJobFormatHints, ReduceCombinerFormatHints, TUserJobFormatHints());
+};
+
+///
+/// @brief Spec of raw MapReduce operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce
+struct TRawMapReduceOperationSpec
+ : public TMapReduceOperationSpecBase<TRawMapReduceOperationSpec>
+ , public TRawMapReduceOperationIoSpec<TRawMapReduceOperationSpec>
+{ };
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Schema inference mode.
+///
+/// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference
+enum class ESchemaInferenceMode : int
+{
+ FromInput /* "from_input" */,
+ FromOutput /* "from_output" */,
+ Auto /* "auto" */,
+};
+
+///
+/// @brief Spec of Sort operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/sort
+struct TSortOperationSpec
+ : TOperationSpecBase<TSortOperationSpec>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TSortOperationSpec;
+ /// @endcond
+
+ ///
+ /// @brief Paths to input tables.
+ FLUENT_VECTOR_FIELD(TRichYPath, Input);
+
+ ///
+ /// @brief Path to output table.
+ FLUENT_FIELD(TRichYPath, Output);
+
+ ///
+ /// @brief Columns to sort table by.
+ FLUENT_FIELD(TSortColumns, SortBy);
+
+ ///
+ /// @brief Recommended number of intermediate data partitions.
+ FLUENT_FIELD_OPTION(ui64, PartitionCount);
+
+ ///
+ /// @brief Recommended size of intermediate data partitions.
+ FLUENT_FIELD_OPTION(ui64, PartitionDataSize);
+
+ ///
+ /// @brief Recommended number of partition jobs to run.
+ ///
+ /// `JobCount' has higher priority than @ref NYT::TSortOperationSpec::DataSizePerPartitionJob.
+ /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits.
+ FLUENT_FIELD_OPTION(ui64, PartitionJobCount);
+
+ ///
+ /// @brief Recommended of data size for each partition job.
+ ///
+ /// `DataSizePerJob` has lower priority that @ref NYT::TSortOperationSpec::PartitionJobCount.
+ /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits.
+ FLUENT_FIELD_OPTION(ui64, DataSizePerPartitionJob);
+
+ ///
+ /// @brief Inference mode for output table schema.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference
+ FLUENT_FIELD_OPTION(ESchemaInferenceMode, SchemaInferenceMode);
+
+ ///
+ /// @brief Account to use for intermediate data.
+ FLUENT_FIELD_OPTION(TString, IntermediateDataAccount);
+
+ ///
+ /// @brief Replication factor for intermediate data (1 by default).
+ FLUENT_FIELD_OPTION(ui64, IntermediateDataReplicationFactor);
+};
+
+
+///
+/// @brief Merge mode.
+enum EMergeMode : int
+{
+ MM_UNORDERED /* "unordered" */,
+ MM_ORDERED /* "ordered" */,
+ MM_SORTED /* "sorted" */,
+};
+
+///
+/// @brief Spec of Merge operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/merge
+struct TMergeOperationSpec
+ : TOperationSpecBase<TMergeOperationSpec>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TMergeOperationSpec;
+ /// @endcond
+
+ ///
+ /// @brief Paths to input tables.
+ FLUENT_VECTOR_FIELD(TRichYPath, Input);
+
+ ///
+ /// @brief Path to output table.
+ FLUENT_FIELD(TRichYPath, Output);
+
+ ///
+ /// @brief Columns by which to merge (for @ref NYT::EMergeMode::MM_SORTED).
+ FLUENT_FIELD(TSortColumns, MergeBy);
+
+ ///
+ /// @brief Merge mode.
+ FLUENT_FIELD_DEFAULT(EMergeMode, Mode, MM_UNORDERED);
+
+ ///
+ /// @brief Combine output chunks to larger ones.
+ FLUENT_FIELD_DEFAULT(bool, CombineChunks, false);
+
+ ///
+ /// @brief Guarantee that all input chunks will be read.
+ FLUENT_FIELD_DEFAULT(bool, ForceTransform, false);
+
+ ///
+ /// @brief Recommended number of jobs to run.
+ ///
+ /// `JobCount' has higher priority than @ref NYT::TMergeOperationSpec::DataSizePerJob.
+ /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits.
+ FLUENT_FIELD_OPTION(ui32, JobCount);
+
+ ///
+ /// @brief Recommended of data size for each job.
+ ///
+ /// `DataSizePerJob` has lower priority that @ref NYT::TMergeOperationSpec::JobCount.
+ /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits.
+ FLUENT_FIELD_OPTION(ui64, DataSizePerJob);
+
+ ///
+ /// @brief Inference mode for output table schema.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference
+ FLUENT_FIELD_OPTION(ESchemaInferenceMode, SchemaInferenceMode);
+};
+
+///
+/// @brief Spec of Erase operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/erase
+struct TEraseOperationSpec
+ : TOperationSpecBase<TEraseOperationSpec>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TEraseOperationSpec;
+ /// @endcond
+
+ ///
+ /// @brief Which table (or row range) to erase.
+ FLUENT_FIELD(TRichYPath, TablePath);
+
+ ///
+ /// Combine output chunks to larger ones.
+ FLUENT_FIELD_DEFAULT(bool, CombineChunks, false);
+
+ ///
+ /// @brief Inference mode for output table schema.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference
+ FLUENT_FIELD_OPTION(ESchemaInferenceMode, SchemaInferenceMode);
+};
+
+///
+/// @brief Spec of RemoteCopy operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/remote_copy
+struct TRemoteCopyOperationSpec
+ : TOperationSpecBase<TRemoteCopyOperationSpec>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TRemoteCopyOperationSpec;
+ /// @endcond
+
+ ///
+ /// @brief Source cluster name.
+ FLUENT_FIELD(TString, ClusterName);
+
+ ///
+ /// @brief Network to use for copy (all remote cluster nodes must have it configured).
+ FLUENT_FIELD_OPTION(TString, NetworkName);
+
+ ///
+ /// @brief Paths to input tables.
+ FLUENT_VECTOR_FIELD(TRichYPath, Input);
+
+ ///
+ /// @brief Path to output table.
+ FLUENT_FIELD(TRichYPath, Output);
+
+ ///
+ /// @brief Inference mode for output table schema.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference
+ FLUENT_FIELD_OPTION(ESchemaInferenceMode, SchemaInferenceMode);
+
+ ///
+ /// @brief Copy user attributes from input to output table (allowed only for single input table).
+ FLUENT_FIELD_DEFAULT(bool, CopyAttributes, false);
+
+ ///
+ /// @brief Names of user attributes to copy from input to output table.
+ ///
+ /// @note To make this option make sense set @ref NYT::TRemoteCopyOperationSpec::CopyAttributes to `true`.
+ FLUENT_VECTOR_FIELD(TString, AttributeKey);
+
+private:
+
+ ///
+ /// @brief Config for remote cluster connection.
+ FLUENT_FIELD_OPTION(TNode, ClusterConnection);
+};
+
+class IVanillaJobBase;
+
+///
+/// @brief Task of Vanilla operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/vanilla
+struct TVanillaTask
+ : public TOperationOutputSpecBase
+ , public TUserJobOutputFormatHintsBase<TVanillaTask>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TVanillaTask;
+ /// @endcond
+
+ ///
+ /// @brief Add output table path and specify the task output type (i.e. TMyProtoMessage).
+ template <class T>
+ TSelf& AddOutput(const TRichYPath& path);
+
+ ///
+ /// @brief Add output table path as structured path.
+ TSelf& AddStructuredOutput(TStructuredTablePath path);
+
+ ///
+ /// @brief Set output table path and specify the task output type (i.e. TMyProtoMessage).
+ template <class T>
+ TSelf& SetOutput(size_t tableIndex, const TRichYPath& path);
+
+ ///
+ /// @brief Task name.
+ FLUENT_FIELD(TString, Name);
+
+ ///
+ /// @brief Job to be executed in this task.
+ FLUENT_FIELD(::TIntrusivePtr<IVanillaJobBase>, Job);
+
+ ///
+ /// @brief User job spec.
+ FLUENT_FIELD(TUserJobSpec, Spec);
+
+ ///
+ /// @brief Number of jobs to run and wait for successful completion.
+ ///
+ /// @note If @ref NYT::TUserOperationSpecBase::FailOnJobRestart is `false`, a failed job will be restarted
+ /// and will not count in this amount.
+ FLUENT_FIELD(ui64, JobCount);
+
+ ///
+ /// @brief Network project name.
+ FLUENT_FIELD(TMaybe<TString>, NetworkProject);
+
+};
+
+///
+/// @brief Spec of Vanilla operation.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/vanilla
+struct TVanillaOperationSpec
+ : TUserOperationSpecBase<TVanillaOperationSpec>
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TVanillaOperationSpec;
+ /// @endcond
+
+ ///
+ /// @brief Description of tasks to run in this operation.
+ FLUENT_VECTOR_FIELD(TVanillaTask, Task);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Options for @ref NYT::IOperationClient::Map and other operation start commands.
+struct TOperationOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TOperationOptions;
+ /// @endcond
+
+ ///
+ /// @brief Additional field to put to operation spec.
+ FLUENT_FIELD_OPTION(TNode, Spec);
+
+ ///
+ /// @brief Start operation mode.
+ enum class EStartOperationMode : int
+ {
+ ///
+ /// @brief Prepare operation asynchronously. Call IOperation::Start() to start operation.
+ AsyncPrepare,
+
+ ///
+ /// @brief Prepare and start operation asynchronously. Don't wait for operation completion.
+ AsyncStart,
+
+ ///
+ /// @brief Prepare and start operation synchronously. Don't wait for operation completion.
+ SyncStart,
+
+ ///
+ /// @brief Prepare, start and wait for operation completion synchronously.
+ SyncWait,
+ };
+
+ ///
+ /// @brief Start operation mode.
+ FLUENT_FIELD_DEFAULT(EStartOperationMode, StartOperationMode, EStartOperationMode::SyncWait);
+
+ ///
+ /// @brief Wait for operation finish synchronously.
+ ///
+ /// @deprecated Use StartOperationMode() instead.
+ TSelf& Wait(bool value) {
+ StartOperationMode_ = value ? EStartOperationMode::SyncWait : EStartOperationMode::SyncStart;
+ return static_cast<TSelf&>(*this);
+ }
+
+ ///
+ ///
+ /// @brief Use format from table attribute (for YAMR-like format).
+ ///
+ /// @deprecated
+ FLUENT_FIELD_DEFAULT(bool, UseTableFormats, false);
+
+ ///
+ /// @brief Prefix for bash command running the jobs.
+ ///
+ /// Can be overridden for the specific job type in the @ref NYT::TUserJobSpec.
+ FLUENT_FIELD(TString, JobCommandPrefix);
+
+ ///
+ /// @brief Suffix for bash command running the jobs.
+ ///
+ /// Can be overridden for the specific job type in the @ref NYT::TUserJobSpec.
+ FLUENT_FIELD(TString, JobCommandSuffix);
+
+ ///
+ /// @brief Put all files required by the job into tmpfs.
+ ///
+ /// This option can be set globally using @ref NYT::TConfig::MountSandboxInTmpfs.
+ /// @see https://yt.yandex-team.ru/docs/problems/woodpeckers
+ FLUENT_FIELD_DEFAULT(bool, MountSandboxInTmpfs, false);
+
+ ///
+ /// @brief Path to directory to store temporary files.
+ FLUENT_FIELD_OPTION(TString, FileStorage);
+
+ ///
+ /// @brief Expiration timeout for uploaded files.
+ FLUENT_FIELD_OPTION(TDuration, FileExpirationTimeout);
+
+ ///
+ /// @brief Info to be passed securely to the job.
+ FLUENT_FIELD_OPTION(TNode, SecureVault);
+
+ ///
+ /// @brief File cache mode.
+ enum class EFileCacheMode : int
+ {
+ ///
+ /// @brief Use YT API commands "get_file_from_cache" and "put_file_to_cache".
+ ApiCommandBased,
+
+ ///
+ /// @brief Upload files to random paths inside @ref NYT::TOperationOptions::FileStorage without caching.
+ CachelessRandomPathUpload,
+ };
+
+ ///
+ /// @brief File cache mode.
+ FLUENT_FIELD_DEFAULT(EFileCacheMode, FileCacheMode, EFileCacheMode::ApiCommandBased);
+
+ ///
+ /// @brief Id of transaction within which all Cypress file storage entries will be checked/created.
+ ///
+ /// By default, the root transaction is used.
+ ///
+ /// @note Set a specific transaction only if you
+ /// 1. specify non-default file storage path in @ref NYT::TOperationOptions::FileStorage or in @ref NYT::TConfig::RemoteTempFilesDirectory.
+ /// 2. use `CachelessRandomPathUpload` caching mode (@ref NYT::TOperationOptions::FileCacheMode).
+ FLUENT_FIELD(TTransactionId, FileStorageTransactionId);
+
+ ///
+ /// @brief Ensure stderr and core tables exist before starting operation.
+ ///
+ /// If set to `false`, it is user's responsibility to ensure these tables exist.
+ FLUENT_FIELD_DEFAULT(bool, CreateDebugOutputTables, true);
+
+ ///
+ /// @brief Ensure output tables exist before starting operation.
+ ///
+ /// If set to `false`, it is user's responsibility to ensure output tables exist.
+ FLUENT_FIELD_DEFAULT(bool, CreateOutputTables, true);
+
+ ///
+ /// @brief Try to infer schema of inexistent table from the type of written rows.
+ ///
+ /// @note Default values for this option may differ depending on the row type.
+ /// For protobuf it's currently `false` by default.
+ FLUENT_FIELD_OPTION(bool, InferOutputSchema);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Get operation secure vault (specified in @ref NYT::TOperationOptions::SecureVault) inside a job.
+const TNode& GetJobSecureVault();
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Context passed to @ref NYT::IRawJob::Do.
+class TRawJobContext
+{
+public:
+ explicit TRawJobContext(size_t outputTableCount);
+
+ ///
+ /// @brief Get file corresponding to input stream.
+ const TFile& GetInputFile() const;
+
+ ///
+ /// @brief Get files corresponding to output streams.
+ const TVector<TFile>& GetOutputFileList() const;
+
+private:
+ TFile InputFile_;
+ TVector<TFile> OutputFileList_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Interface for classes that can be Saved/Loaded (to be used with @ref Y_SAVELOAD_JOB).
+class ISerializableForJob
+{
+public:
+ virtual ~ISerializableForJob() = default;
+
+ ///
+ /// @brief Dump state to output stream to be restored in job.
+ virtual void Save(IOutputStream& stream) const = 0;
+
+ ///
+ /// @brief Load state from a stream.
+ virtual void Load(IInputStream& stream) = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Provider of information about operation inputs/outputs during @ref NYT::IJob::PrepareOperation.
+class IOperationPreparationContext
+{
+public:
+ virtual ~IOperationPreparationContext() = default;
+
+ /// @brief Get the number of input tables.
+ virtual int GetInputCount() const = 0;
+
+ /// @brief Get the number of output tables.
+ virtual int GetOutputCount() const = 0;
+
+ /// @brief Get the schema of input table no. `index`.
+ virtual const TTableSchema& GetInputSchema(int index) const = 0;
+
+ /// @brief Get all the input table schemas.
+ virtual const TVector<TTableSchema>& GetInputSchemas() const = 0;
+
+ /// @brief Path to the input table if available (`Nothing()` for intermediate tables).
+ virtual TMaybe<TYPath> GetInputPath(int index) const = 0;
+
+ /// @brief Path to the output table if available (`Nothing()` for intermediate tables).
+ virtual TMaybe<TYPath> GetOutputPath(int index) const = 0;
+};
+
+///
+/// @brief Fluent builder class for @ref NYT::IJob::PrepareOperation.
+///
+/// @note Method calls are supposed to be chained.
+class TJobOperationPreparer
+{
+public:
+
+ ///
+ /// @brief Group of input tables that allows to specify properties on all of them at once.
+ ///
+ /// The instances are created with @ref NYT::TJobOperationPreparer::BeginInputGroup, not directly.
+ class TInputGroup
+ {
+ public:
+ TInputGroup(TJobOperationPreparer& preparer, TVector<int> indices);
+
+ /// @brief Specify the type of input rows.
+ template <typename TRow>
+ TInputGroup& Description();
+
+ /// @brief Specify renaming of input columns.
+ TInputGroup& ColumnRenaming(const THashMap<TString, TString>& renaming);
+
+ /// @brief Specify what input columns to send to job
+ ///
+ /// @note Filter is applied before renaming, so it must specify original column names.
+ TInputGroup& ColumnFilter(const TVector<TString>& columns);
+
+ /// @brief Finish describing the input group.
+ TJobOperationPreparer& EndInputGroup();
+
+ private:
+ TJobOperationPreparer& Preparer_;
+ TVector<int> Indices_;
+ };
+
+ ///
+ /// @brief Group of output tables that allows to specify properties on all of them at once.
+ ///
+ /// The instances are created with @ref NYT::TJobOperationPreparer::BeginOutputGroup, not directly.
+ class TOutputGroup
+ {
+ public:
+ TOutputGroup(TJobOperationPreparer& preparer, TVector<int> indices);
+
+ /// @brief Specify the type of output rows.
+ ///
+ /// @tparam TRow type of output rows from tables of this group.
+ /// @param inferSchema Infer schema from `TRow` and specify it for these output tables.
+ template <typename TRow>
+ TOutputGroup& Description(bool inferSchema = true);
+
+ /// @brief Specify schema for these tables.
+ TOutputGroup& Schema(const TTableSchema& schema);
+
+ /// @brief Specify that all the the tables in this group are unschematized.
+ ///
+ /// It is equivalent of `.Schema(TTableSchema().Strict(false)`.
+ TOutputGroup& NoSchema();
+
+ /// @brief Finish describing the output group.
+ TJobOperationPreparer& EndOutputGroup();
+
+ private:
+ TJobOperationPreparer& Preparer_;
+ TVector<int> Indices_;
+ };
+
+public:
+ explicit TJobOperationPreparer(const IOperationPreparationContext& context);
+
+ /// @brief Begin input group consisting of tables with indices `[begin, end)`.
+ ///
+ /// @param begin First index.
+ /// @param end Index after the last one.
+ TInputGroup BeginInputGroup(int begin, int end);
+
+ /// @brief Begin input group consisting of tables with indices from `indices`.
+ ///
+ /// @tparam TCont Container with integers. Must support `std::begin` and `std::end` functions.
+ /// @param indices Indices of tables to include in the group.
+ template <typename TCont>
+ TInputGroup BeginInputGroup(const TCont& indices);
+
+ /// @brief Begin output group consisting of tables with indices `[begin, end)`.
+ ///
+ /// @param begin First index.
+ /// @param end Index after the last one.
+ TOutputGroup BeginOutputGroup(int begin, int end);
+
+ /// @brief Begin input group consisting of tables with indices from `indices`.
+ ///
+ /// @tparam TCont Container with integers. Must support `std::begin` and `std::end` functions.
+ /// @param indices Indices of tables to include in the group.
+ template <typename TCont>
+ TOutputGroup BeginOutputGroup(const TCont& indices);
+
+ /// @brief Specify the schema for output table no `tableIndex`.
+ ///
+ /// @note All the output schemas must be specified either with this method, `NoOutputSchema` or `OutputDescription` with `inferSchema == true`
+ TJobOperationPreparer& OutputSchema(int tableIndex, TTableSchema schema);
+
+ /// @brief Mark the output table no. `tableIndex` as unschematized.
+ TJobOperationPreparer& NoOutputSchema(int tableIndex);
+
+ /// @brief Specify renaming of input columns for table no. `tableIndex`.
+ TJobOperationPreparer& InputColumnRenaming(int tableIndex, const THashMap<TString, TString>& renaming);
+
+ /// @brief Specify what input columns of table no. `tableIndex` to send to job
+ ///
+ /// @note Filter is applied before renaming, so it must specify original column names.
+ TJobOperationPreparer& InputColumnFilter(int tableIndex, const TVector<TString>& columns);
+
+ /// @brief Specify the type of input rows for table no. `tableIndex`.
+ ///
+ /// @tparam TRow type of input rows.
+ template <typename TRow>
+ TJobOperationPreparer& InputDescription(int tableIndex);
+
+ /// @brief Specify the type of output rows for table no. `tableIndex`.
+ ///
+ /// @tparam TRow type of output rows.
+ /// @param inferSchema Infer schema from `TRow` and specify it for the output tables.
+ template <typename TRow>
+ TJobOperationPreparer& OutputDescription(int tableIndex, bool inferSchema = true);
+
+ /// @brief Set type of output rows for table no. `tableIndex` to TNode
+ ///
+ /// @note Set schema via `OutputSchema` if needed
+ TJobOperationPreparer& NodeOutput(int tableIndex);
+
+ /// @brief Specify input format hints.
+ ///
+ /// These hints have lower priority than ones specified in spec.
+ TJobOperationPreparer& InputFormatHints(TFormatHints hints);
+
+ /// @brief Specify output format hints.
+ ///
+ /// These hints have lower priority than ones specified in spec.
+ TJobOperationPreparer& OutputFormatHints(TFormatHints hints);
+
+ /// @brief Specify format hints.
+ ///
+ /// These hints have lower priority than ones specified in spec.
+ TJobOperationPreparer& FormatHints(TUserJobFormatHints newFormatHints);
+
+ /// @name "Private" members
+ /// The following methods should not be used by clients in @ref NYT::IJob::PrepareOperation
+ ///@{
+
+ /// @brief Finish the building process.
+ void Finish();
+
+ /// @brief Get output table schemas as specified by the user.
+ TVector<TTableSchema> GetOutputSchemas();
+
+ /// @brief Get input column renamings as specified by the user.
+ const TVector<THashMap<TString, TString>>& GetInputColumnRenamings() const;
+
+ /// @brief Get input column filters as specified by the user.
+ const TVector<TMaybe<TVector<TString>>>& GetInputColumnFilters() const;
+
+ /// @brief Get input column descriptions as specified by the user.
+ const TVector<TMaybe<TTableStructure>>& GetInputDescriptions() const;
+
+ /// @brief Get output column descriptions as specified by the user.
+ const TVector<TMaybe<TTableStructure>>& GetOutputDescriptions() const;
+
+ /// @brief Get format hints as specified by the user.
+ const TUserJobFormatHints& GetFormatHints() const;
+
+ ///@}
+private:
+
+ /// @brief Validate that schema for output table no. `tableIndex` has not been set yet.
+ void ValidateMissingOutputSchema(int tableIndex) const;
+
+ /// @brief Validate that description for input table no. `tableIndex` has not been set yet.
+ void ValidateMissingInputDescription(int tableIndex) const;
+
+ /// @brief Validate that description for output table no. `tableIndex` has not been set yet.
+ void ValidateMissingOutputDescription(int tableIndex) const;
+
+ /// @brief Validate that `tableIndex` is in correct range for input table indices.
+ ///
+ /// @param message Message to add to the exception in case of violation.
+ void ValidateInputTableIndex(int tableIndex, TStringBuf message) const;
+
+ /// @brief Validate that `tableIndex` is in correct range for output table indices.
+ ///
+ /// @param message Message to add to the exception in case of violation.
+ void ValidateOutputTableIndex(int tableIndex, TStringBuf message) const;
+
+ /// @brief Validate that all the output schemas has been set.
+ void FinallyValidate() const;
+
+ static TTableSchema EmptyNonstrictSchema();
+
+private:
+ const IOperationPreparationContext& Context_;
+
+ TVector<TMaybe<TTableSchema>> OutputSchemas_;
+ TVector<THashMap<TString, TString>> InputColumnRenamings_;
+ TVector<TMaybe<TVector<TString>>> InputColumnFilters_;
+ TVector<TMaybe<TTableStructure>> InputTableDescriptions_;
+ TVector<TMaybe<TTableStructure>> OutputTableDescriptions_;
+ TUserJobFormatHints FormatHints_ = {};
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Interface for all user jobs.
+class IJob
+ : public TThrRefBase
+{
+public:
+
+ ///
+ /// @brief Type of job.
+ enum EType
+ {
+ Mapper,
+ Reducer,
+ ReducerAggregator,
+ RawJob,
+ VanillaJob,
+ };
+
+ ///
+ /// @brief Save job state to stream to be restored on cluster nodes.
+ virtual void Save(IOutputStream& stream) const
+ {
+ Y_UNUSED(stream);
+ }
+
+ ///
+ /// @brief Restore job state from a stream.
+ virtual void Load(IInputStream& stream)
+ {
+ Y_UNUSED(stream);
+ }
+
+ ///
+ /// @brief Get operation secure vault (specified in @ref NYT::TOperationOptions::SecureVault) inside a job.
+ const TNode& SecureVault() const
+ {
+ return GetJobSecureVault();
+ }
+
+ ///
+ /// @brief Get number of output tables.
+ i64 GetOutputTableCount() const
+ {
+ Y_VERIFY(NDetail::OutputTableCount > 0);
+
+ return NDetail::OutputTableCount;
+ }
+
+ ///
+ /// @brief Method allowing user to control some properties of input and output tables and formats.
+ ///
+ /// User can override this method in their job class to:
+ /// - specify output table schemas.
+ /// The most natural way is usually through @ref NYT::TJobOperationPreparer::OutputDescription (especially for protobuf),
+ /// but you can use @ref NYT::TJobOperationPreparer::OutputSchema directly
+ /// - specify output row type (@ref NYT::TJobOperationPreparer::OutputDescription)
+ /// - specify input row type (@ref NYT::TJobOperationPreparer::InputDescription)
+ /// - specify input column filter and renaming (@ref NYT::TJobOperationPreparer::InputColumnFilter and @ref NYT::TJobOperationPreparer::InputColumnRenaming)
+ /// - specify format hints (@ref NYT::TJobOperationPreparer::InputFormatHints,
+ /// NYT::TJobOperationPreparer::OutputFormatHints and @ref NYT::TJobOperationPreparer::FormatHints)
+ /// - maybe something more, cf. the methods of @ref NYT::TJobOperationPreparer.
+ ///
+ /// If one has several similar tables, groups can be used.
+ /// Groups are delimited by @ref NYT::TJobOperationPreparer::BeginInputGroup /
+ /// @ref NYT::TJobOperationPreparer::TInputGroup::EndInputGroup and
+ /// @ref NYT::TJobOperationPreparer::BeginOutputGroup /
+ /// @ref NYT::TJobOperationPreparer::TOutputGroup::EndOutputGroup.
+ /// Example:
+ /// @code{.cpp}
+ /// preparer
+ /// .BeginInputGroup({1,2,4,8})
+ /// .ColumnRenaming({{"a", "b"}, {"c", "d"}})
+ /// .ColumnFilter({"a", "c"})
+ /// .EndInputGroup();
+ /// @endcode
+ ///
+ /// @note All the output table schemas must be set
+ /// (possibly as empty nonstrict using @ref NYT::TJobOperationPreparer::NoOutputSchema or
+ /// @ref NYT::TJobOperationPreparer::TOutputGroup::NoSchema).
+ /// By default all the output table schemas are marked as empty nonstrict.
+ virtual void PrepareOperation(const IOperationPreparationContext& context, TJobOperationPreparer& preparer) const;
+};
+
+///
+/// @brief Declare what fields of currently declared job class to save and restore on cluster node.
+#define Y_SAVELOAD_JOB(...) \
+ virtual void Save(IOutputStream& stream) const override { Save(&stream); } \
+ virtual void Load(IInputStream& stream) override { Load(&stream); } \
+ Y_PASS_VA_ARGS(Y_SAVELOAD_DEFINE(__VA_ARGS__))
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Interface for jobs with typed inputs and outputs.
+class IStructuredJob
+ : public IJob
+{
+public:
+ ///
+ /// @brief This methods are called when creating table reader and writer for the job.
+ ///
+ /// Override them if you want to implement custom input logic. (e.g. addtitional bufferization)
+ virtual TRawTableReaderPtr CreateCustomRawJobReader(int fd) const;
+ virtual THolder<IProxyOutput> CreateCustomRawJobWriter(size_t outputTableCount) const;
+
+ virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const = 0;
+ virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Create default raw job reader.
+TRawTableReaderPtr CreateRawJobReader(int fd = 0);
+
+///
+/// @brief Create default raw job writer.
+THolder<IProxyOutput> CreateRawJobWriter(size_t outputTableCount);
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Base interface for structured (typed) map jobs.
+class IMapperBase
+ : public IStructuredJob
+{ };
+
+///
+/// @brief Base interface for structured (typed) map jobs with given reader and writer.
+template <class TR, class TW>
+class IMapper
+ : public IMapperBase
+{
+public:
+ using TReader = TR;
+ using TWriter = TW;
+
+public:
+ /// Type of job implemented by this class.
+ static constexpr EType JobType = EType::Mapper;
+
+ ///
+ /// @brief This method is called before feeding input rows to mapper (before `Do` method).
+ virtual void Start(TWriter* writer)
+ {
+ Y_UNUSED(writer);
+ }
+
+ ///
+ /// @brief This method is called exactly once for the whole job input.
+ ///
+ /// Read input rows from `reader` and write output ones to `writer`.
+ virtual void Do(TReader* reader, TWriter* writer) = 0;
+
+ ///
+ /// @brief This method is called after feeding input rows to mapper (after `Do` method).
+ virtual void Finish(TWriter* writer)
+ {
+ Y_UNUSED(writer);
+ }
+
+ virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override;
+ virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Base interface for structured (typed) reduce jobs.
+///
+/// It is common base for @ref NYT::IReducer and @ref NYT::IAggregatorReducer.
+class IReducerBase
+ : public IStructuredJob
+{ };
+
+///
+/// @brief Base interface for structured (typed) reduce jobs with given reader and writer.
+template <class TR, class TW>
+class IReducer
+ : public IReducerBase
+{
+public:
+ using TReader = TR;
+ using TWriter = TW;
+
+public:
+ /// Type of job implemented by this class.
+ static constexpr EType JobType = EType::Reducer;
+
+public:
+
+ ///
+ /// @brief This method is called before feeding input rows to reducer (before `Do` method).
+ virtual void Start(TWriter* writer)
+ {
+ Y_UNUSED(writer);
+ }
+
+ ///
+ /// @brief This method is called exactly once for each range with same value of `ReduceBy` (or `JoinBy`) keys.
+ virtual void Do(TReader* reader, TWriter* writer) = 0;
+
+ ///
+ /// @brief This method is called after feeding input rows to reducer (after `Do` method).
+ virtual void Finish(TWriter* writer)
+ {
+ Y_UNUSED(writer);
+ }
+
+ ///
+ /// @brief Refuse to process the remaining row ranges and finish the job (successfully).
+ void Break();
+
+ virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override;
+ virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Base interface of jobs used inside reduce operations.
+///
+/// Unlike @ref NYT::IReducer jobs their `Do' method is called only once
+/// and takes whole range of records split by key boundaries.
+///
+/// Template argument `TR` must be @ref NYT::TTableRangesReader.
+template <class TR, class TW>
+class IAggregatorReducer
+ : public IReducerBase
+{
+public:
+ using TReader = TR;
+ using TWriter = TW;
+
+public:
+ /// Type of job implemented by this class.
+ static constexpr EType JobType = EType::ReducerAggregator;
+
+public:
+ ///
+ /// @brief This method is called before feeding input rows to reducer (before `Do` method).
+ virtual void Start(TWriter* writer)
+ {
+ Y_UNUSED(writer);
+ }
+
+ ///
+ /// @brief This method is called exactly once for the whole job input.
+ virtual void Do(TReader* reader, TWriter* writer) = 0;
+
+ ///
+ /// @brief This method is called after feeding input rows to reducer (after `Do` method).
+ virtual void Finish(TWriter* writer)
+ {
+ Y_UNUSED(writer);
+ }
+
+ virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override;
+ virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Interface for raw jobs (i.e. reading and writing byte streams).
+class IRawJob
+ : public IJob
+{
+public:
+ /// Type of job implemented by this class.
+ static constexpr EType JobType = EType::RawJob;
+
+ ///
+ /// @brief This method is called exactly once for the whole job input.
+ virtual void Do(const TRawJobContext& jobContext) = 0;
+};
+
+///
+/// @brief Interface of jobs that run the given bash command.
+class ICommandJob
+ : public IJob
+{
+public:
+ ///
+ /// @brief Get bash command to run.
+ ///
+ /// @note This method is called on the client side.
+ virtual const TString& GetCommand() const = 0;
+};
+
+///
+/// @brief Raw job executing given bash command.
+///
+/// @note The binary will not be uploaded.
+class TCommandRawJob
+ : public IRawJob
+ , public ICommandJob
+{
+public:
+ ///
+ /// @brief Create job with specified command.
+ ///
+ /// @param command Bash command to run.
+ explicit TCommandRawJob(TStringBuf command = {});
+
+ const TString& GetCommand() const override;
+ void Do(const TRawJobContext& jobContext) override;
+
+private:
+ TString Command_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Base interface for vanilla jobs.
+///
+/// @see https://yt.yandex-team.ru/docs/description/mr/vanilla
+class IVanillaJobBase
+ : public virtual IStructuredJob
+{
+public:
+ /// Type of job implemented by this class.
+ static constexpr EType JobType = EType::VanillaJob;
+};
+
+template <class TW = void>
+class IVanillaJob;
+
+///
+/// @brief Interface of vanilla job without outputs.
+template <>
+class IVanillaJob<void>
+ : public IVanillaJobBase
+{
+public:
+ ///
+ /// @brief This method is called exactly once for each vanilla job.
+ virtual void Do() = 0;
+
+ virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override;
+ virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override;
+};
+
+///
+/// @brief Vanilla job executing given bash command.
+///
+/// @note The binary will not be uploaded.
+class TCommandVanillaJob
+ : public IVanillaJob<>
+ , public ICommandJob
+{
+public:
+ ///
+ /// @brief Create job with specified command.
+ ///
+ /// @param command Bash command to run.
+ explicit TCommandVanillaJob(TStringBuf command = {});
+
+ const TString& GetCommand() const override;
+ void Do() override;
+
+private:
+ TString Command_;
+};
+
+///
+/// @brief Interface for vanilla jobs with output tables.
+template <class TW>
+class IVanillaJob
+ : public IVanillaJobBase
+{
+public:
+ using TWriter = TW;
+
+public:
+ ///
+ /// @brief This method is called before `Do` method.
+ virtual void Start(TWriter* /* writer */)
+ { }
+
+ ///
+ /// @brief This method is called exactly once for each vanilla job.
+ ///
+ /// Write output rows to `writer`.
+ virtual void Do(TWriter* writer) = 0;
+
+ ///
+ /// @brief This method is called after `Do` method.
+ virtual void Finish(TWriter* /* writer */)
+ { }
+
+ virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override;
+ virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Attributes to request for an operation.
+enum class EOperationAttribute : int
+{
+ Id /* "id" */,
+ Type /* "type" */,
+ State /* "state" */,
+ AuthenticatedUser /* "authenticated_user" */,
+ StartTime /* "start_time" */,
+ FinishTime /* "finish_time" */,
+ BriefProgress /* "brief_progress" */,
+ BriefSpec /* "brief_spec" */,
+ Suspended /* "suspended" */,
+ Result /* "result" */,
+ Progress /* "progress" */,
+ Events /* "events" */,
+ Spec /* "spec" */,
+ FullSpec /* "full_spec" */,
+ UnrecognizedSpec /* "unrecognized_spec" */,
+};
+
+///
+/// @brief Class describing which attributes to request in @ref NYT::IClient::GetOperation or @ref NYT::IClient::ListOperations.
+struct TOperationAttributeFilter
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TOperationAttributeFilter;
+ /// @endcond
+
+ TVector<EOperationAttribute> Attributes_;
+
+ ///
+ /// @brief Add attribute to the filter. Calls are supposed to be chained.
+ TSelf& Add(EOperationAttribute attribute)
+ {
+ Attributes_.push_back(attribute);
+ return *this;
+ }
+};
+
+///
+/// @brief Options for @ref NYT::IClient::GetOperation call.
+struct TGetOperationOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TGetOperationOptions;
+ /// @endcond
+
+ ///
+ /// @brief What attributes to request (if omitted, the default set of attributes will be requested).
+ FLUENT_FIELD_OPTION(TOperationAttributeFilter, AttributeFilter);
+};
+
+///
+/// @brief "Coarse-grained" state of an operation.
+enum class EOperationBriefState : int
+{
+ InProgress /* "in_progress" */,
+ Completed /* "completed" */,
+ Aborted /* "aborted" */,
+
+ /// Failed
+ Failed /* "failed" */,
+};
+
+///
+/// @brief Operation type.
+enum class EOperationType : int
+{
+ Map /* "map" */,
+ Merge /* "merge" */,
+ Erase /* "erase" */,
+ Sort /* "sort" */,
+ Reduce /* "reduce" */,
+ MapReduce /* "map_reduce" */,
+ RemoteCopy /* "remote_copy" */,
+ JoinReduce /* "join_reduce" */,
+ Vanilla /* "vanilla" */,
+};
+
+///
+/// @brief Operation progress.
+struct TOperationProgress
+{
+ ///
+ /// @brief Total job statistics.
+ TJobStatistics JobStatistics;
+
+ ///
+ /// @brief Job counter for various job states with hierarchy.
+ TJobCounters JobCounters;
+
+ ///
+ /// @brief Time when this progress was built on scheduler or CA.
+ TMaybe<TInstant> BuildTime;
+};
+
+///
+/// @brief Brief operation progress (numbers of jobs in these states).
+struct TOperationBriefProgress
+{
+ ui64 Aborted = 0;
+ ui64 Completed = 0;
+ ui64 Failed = 0;
+ ui64 Lost = 0;
+ ui64 Pending = 0;
+ ui64 Running = 0;
+ ui64 Total = 0;
+};
+
+///
+/// @brief Operation result.
+struct TOperationResult
+{
+ ///
+ /// @brief For a unsuccessfully finished operation: description of error.
+ TMaybe<TYtError> Error;
+};
+
+///
+/// @brief Operation event (change of state).
+struct TOperationEvent
+{
+ ///
+ /// @brief New state of operation.
+ TString State;
+
+ ///
+ /// @brief Time of state change.
+ TInstant Time;
+};
+
+///
+/// @brief Operation info.
+///
+/// A field may be `Nothing()` either if it was not requested (see @ref NYT::TGetOperationOptions::AttributeFilter)
+/// or it is not available (i.e. `FinishTime` for a running operation).
+/// @see https://yt.yandex-team.ru/docs/api/commands#get_operation
+struct TOperationAttributes
+{
+ ///
+ /// @brief Operation id.
+ TMaybe<TOperationId> Id;
+
+ ///
+ /// @brief Operation type.
+ TMaybe<EOperationType> Type;
+
+ ///
+ /// @brief Operation state.
+ TMaybe<TString> State;
+
+ ///
+ /// @brief "Coarse-grained" operation state.
+ TMaybe<EOperationBriefState> BriefState;
+
+ ///
+ /// @brief Name of user that started the operation.
+ TMaybe<TString> AuthenticatedUser;
+
+ ///
+ /// @brief Operation start time.
+ TMaybe<TInstant> StartTime;
+
+ ///
+ /// @brief Operation finish time (if the operation has finished).
+ TMaybe<TInstant> FinishTime;
+
+ ///
+ /// @brief Brief progress of the operation.
+ TMaybe<TOperationBriefProgress> BriefProgress;
+
+ ///
+ /// @brief Brief spec of operation (light-weight fields only).
+ TMaybe<TNode> BriefSpec;
+
+ ///
+ /// @brief Spec of the operation as provided by the user.
+ TMaybe<TNode> Spec;
+
+ ///
+ /// @brief Full spec of operation (all fields not specified by user are filled with default values).
+ TMaybe<TNode> FullSpec;
+
+ ///
+ /// @brief Fields not recognized by scheduler.
+ TMaybe<TNode> UnrecognizedSpec;
+
+ ///
+ /// @brief Is operation suspended.
+ TMaybe<bool> Suspended;
+
+ ///
+ /// @brief Operation result.
+ TMaybe<TOperationResult> Result;
+
+ ///
+ /// @brief Operation progress.
+ TMaybe<TOperationProgress> Progress;
+
+ ///
+ /// @brief List of operation events (changes of state).
+ TMaybe<TVector<TOperationEvent>> Events;
+
+ ///
+ /// @brief Map from alert name to its description.
+ TMaybe<THashMap<TString, TYtError>> Alerts;
+};
+
+///
+/// @brief Direction of cursor for paging, see @ref NYT::TListOperationsOptions::CursorDirection.
+enum class ECursorDirection
+{
+ Past /* "past" */,
+ Future /* "future" */,
+};
+
+///
+/// @brief Options of @ref NYT::IClient::ListOperations command.
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#list_operations
+struct TListOperationsOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TListOperationsOptions;
+ /// @endcond
+
+ ///
+ /// @name Time range specification
+ ///
+ /// List operations with start time in half-closed interval
+ /// `[CursorTime, ToTime)` if `CursorDirection == Future` or
+ /// `[FromTime, CursorTime)` if `CursorDirection == Past`.
+ ///@{
+
+ ///
+ /// @brief Search for operations with start time >= `FromTime`.
+ FLUENT_FIELD_OPTION(TInstant, FromTime);
+
+ ///
+ /// @brief Search for operations with start time < `ToTime`.
+ FLUENT_FIELD_OPTION(TInstant, ToTime);
+
+ ///
+ /// @brief Additional restriction on operation start time (useful for pagination).
+ ///
+ /// Search for operations with start time >= `CursorTime` if `CursorDirection == Future`
+ /// and with start time < `CursorTime` if `CursorDirection == Past`
+ FLUENT_FIELD_OPTION(TInstant, CursorTime);
+
+ ///
+ /// @brief Direction of pagination (see @ref NYT::TListOperationsOptions::CursorTime).
+ FLUENT_FIELD_OPTION(ECursorDirection, CursorDirection);
+
+ ///@}
+
+ ///
+ /// @name Filters
+ /// Choose operations satisfying given filters.
+ ///@{
+
+ ///
+ /// @brief Search for `Filter` as a substring in operation text factors
+ /// (e.g. title or input/output table paths).
+ FLUENT_FIELD_OPTION(TString, Filter);
+
+ ///
+ /// @brief Choose operations whose pools include `Pool`.
+ FLUENT_FIELD_OPTION(TString, Pool);
+
+ ///
+ /// @brief Choose operations with given @ref NYT::TOperationAttributes::AuthenticatedUser.
+ FLUENT_FIELD_OPTION(TString, User);
+
+ ///
+ /// @brief Choose operations with given @ref NYT::TOperationAttributes::State.
+ FLUENT_FIELD_OPTION(TString, State);
+
+ ///
+ /// @brief Choose operations with given @ref NYT::TOperationAttributes::Type.
+ FLUENT_FIELD_OPTION(EOperationType, Type);
+
+ ///
+ /// @brief Choose operations having (or not having) any failed jobs.
+ FLUENT_FIELD_OPTION(bool, WithFailedJobs);
+
+ ///@}
+
+ ///
+ /// @brief Search for operations in the archive in addition to Cypress.
+ FLUENT_FIELD_OPTION(bool, IncludeArchive);
+
+ ///
+ /// @brief Include the counters for different filter parameters in the response.
+ ///
+ /// Include number of operations for each pool, user, state, type
+ /// and the number of operations having failed jobs.
+ FLUENT_FIELD_OPTION(bool, IncludeCounters);
+
+ ///
+ /// @brief Return no more than `Limit` operations (current default and maximum value is 1000).
+ FLUENT_FIELD_OPTION(i64, Limit);
+};
+
+///
+/// @brief Response for @ref NYT::IClient::ListOperations command.
+struct TListOperationsResult
+{
+ ///
+ /// @brief Found operations' attributes.
+ TVector<TOperationAttributes> Operations;
+
+ ///
+ /// @name Counters for different filter.
+ ///
+ /// If counters were requested (@ref NYT::TListOperationsOptions::IncludeCounters is `true`)
+ /// the maps contain the number of operations found for each pool, user, state and type.
+ /// NOTE:
+ /// 1) Counters ignore CursorTime and CursorDirection,
+ /// they always are collected in the whole [FromTime, ToTime) interval.
+ /// 2) Each next counter in the sequence [pool, user, state, type, with_failed_jobs]
+ /// takes into account all the previous filters (i.e. if you set User filter to "some-user"
+ /// type counts describe only operations with user "some-user").
+ /// @{
+
+ ///
+ /// @brief Number of operations for each pool.
+ TMaybe<THashMap<TString, i64>> PoolCounts;
+
+ ///
+ /// @brief Number of operations for each user (subject to previous filters).
+ TMaybe<THashMap<TString, i64>> UserCounts;
+
+ ///
+ /// @brief Number of operations for each state (subject to previous filters).
+ TMaybe<THashMap<TString, i64>> StateCounts;
+
+ ///
+ /// @brief Number of operations for each type (subject to previous filters).
+ TMaybe<THashMap<EOperationType, i64>> TypeCounts;
+
+ ///
+ /// @brief Number of operations having failed jobs (subject to all previous filters).
+ TMaybe<i64> WithFailedJobsCount;
+
+ /// @}
+
+ ///
+ /// @brief Whether some operations were not returned due to @ref NYT::TListOperationsOptions::Limit.
+ ///
+ /// `Incomplete == true` means that not all operations satisfying filters
+ /// were returned (limit exceeded) and you need to repeat the request with new @ref NYT::TListOperationsOptions::CursorTime
+ /// (e.g. `CursorTime == *Operations.back().StartTime`, but don't forget to
+ /// remove the duplicates).
+ bool Incomplete;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Data source for @ref NYT::IClient::ListJobs command.
+enum class EListJobsDataSource : int
+{
+ Runtime /* "runtime" */,
+ Archive /* "archive" */,
+ Auto /* "auto" */,
+ Manual /* "manual" */,
+};
+
+///
+/// @brief Job type.
+enum class EJobType : int
+{
+ SchedulerFirst /* "scheduler_first" */,
+ Map /* "map" */,
+ PartitionMap /* "partition_map" */,
+ SortedMerge /* "sorted_merge" */,
+ OrderedMerge /* "ordered_merge" */,
+ UnorderedMerge /* "unordered_merge" */,
+ Partition /* "partition" */,
+ SimpleSort /* "simple_sort" */,
+ FinalSort /* "final_sort" */,
+ SortedReduce /* "sorted_reduce" */,
+ PartitionReduce /* "partition_reduce" */,
+ ReduceCombiner /* "reduce_combiner" */,
+ RemoteCopy /* "remote_copy" */,
+ IntermediateSort /* "intermediate_sort" */,
+ OrderedMap /* "ordered_map" */,
+ JoinReduce /* "join_reduce" */,
+ Vanilla /* "vanilla" */,
+ SchedulerUnknown /* "scheduler_unknown" */,
+ SchedulerLast /* "scheduler_last" */,
+ ReplicatorFirst /* "replicator_first" */,
+ ReplicateChunk /* "replicate_chunk" */,
+ RemoveChunk /* "remove_chunk" */,
+ RepairChunk /* "repair_chunk" */,
+ SealChunk /* "seal_chunk" */,
+ ReplicatorLast /* "replicator_last" */,
+};
+
+///
+/// @brief Well-known task names.
+enum class ETaskName : int
+{
+ Map /* "map" */,
+ PartitionMap0 /* "partition_map(0)" */,
+ SortedMerge /* "sorted_merge" */,
+ OrderedMerge /* "ordered_merge" */,
+ UnorderedMerge /* "unordered_merge" */,
+ Partition0 /* "partition(0)" */,
+ Partition1 /* "partition(1)" */,
+ Partition2 /* "partition(2)" */,
+ SimpleSort /* "simple_sort" */,
+ FinalSort /* "final_sort" */,
+ SortedReduce /* "sorted_reduce" */,
+ PartitionReduce /* "partition_reduce" */,
+ ReduceCombiner /* "reduce_combiner" */,
+ RemoteCopy /* "remote_copy" */,
+ IntermediateSort /* "intermediate_sort" */,
+ OrderedMap /* "ordered_map" */,
+ JoinReduce /* "join_reduce" */,
+};
+
+///
+/// @brief Task name (can either well-known or just a string).
+class TTaskName
+{
+public:
+
+ // Constructors are implicit by design.
+
+ ///
+ /// @brief Construct a custom task name.
+ TTaskName(TString taskName);
+
+ ///
+ /// @brief Construct a custom task name.
+ TTaskName(const char* taskName);
+
+ ///
+ /// @brief Construct a well-known task name.
+ TTaskName(ETaskName taskName);
+
+ const TString& Get() const;
+
+private:
+ TString TaskName_;
+};
+
+///
+/// @brief Job state.
+enum class EJobState : int
+{
+ None /* "none" */,
+ Waiting /* "waiting" */,
+ Running /* "running" */,
+ Aborting /* "aborting" */,
+ Completed /* "completed" */,
+ Failed /* "failed" */,
+ Aborted /* "aborted" */,
+ Lost /* "lost" */,
+};
+
+///
+/// @brief Job sort field.
+///
+/// @see @ref NYT::TListJobsOptions.
+enum class EJobSortField : int
+{
+ Type /* "type" */,
+ State /* "state" */,
+ StartTime /* "start_time" */,
+ FinishTime /* "finish_time" */,
+ Address /* "address" */,
+ Duration /* "duration" */,
+ Progress /* "progress" */,
+ Id /* "id" */,
+};
+
+///
+/// @brief Job sort direction.
+///
+/// @see @ref NYT::TListJobsOptions.
+enum class EJobSortDirection : int
+{
+ Ascending /* "ascending" */,
+ Descending /* "descending" */,
+};
+
+///
+/// @brief Options for @ref NYT::IClient::ListJobs.
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands.html#list_jobs
+struct TListJobsOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TListJobsOptions;
+ /// @endcond
+
+ ///
+ /// @name Filters
+ /// Return only jobs with given value of parameter (type, state, address and existence of stderr).
+ /// If a field is `Nothing()`, return jobs with all possible values of the corresponding parameter.
+ /// @{
+
+ ///
+ /// @brief Job type.
+ FLUENT_FIELD_OPTION(EJobType, Type);
+
+ ///
+ /// @brief Job state.
+ FLUENT_FIELD_OPTION(EJobState, State);
+
+ ///
+ /// @brief Address of the cluster node where job was running.
+ FLUENT_FIELD_OPTION(TString, Address);
+
+ ///
+ /// @brief Return only jobs whose stderr has been saved.
+ FLUENT_FIELD_OPTION(bool, WithStderr);
+
+ ///
+ /// @brief Return only jobs whose spec has been saved.
+ FLUENT_FIELD_OPTION(bool, WithSpec);
+
+ ///
+ /// @brief Return only jobs whose fail context has been saved.
+ FLUENT_FIELD_OPTION(bool, WithFailContext);
+
+ /// @}
+
+ ///
+ /// @name Sort options
+ /// @{
+
+ ///
+ /// @brief Sort by this field.
+ FLUENT_FIELD_OPTION(EJobSortField, SortField);
+
+ ///
+ /// @brief Sort order.
+ FLUENT_FIELD_OPTION(ESortOrder, SortOrder);
+
+ /// @}
+
+ ///
+ /// @brief Data source.
+ ///
+ /// Where to search for jobs: in scheduler and Cypress ('Runtime'), in archive ('Archive'),
+ /// automatically basing on operation presence in Cypress ('Auto') or choose manually (`Manual').
+ FLUENT_FIELD_OPTION(EListJobsDataSource, DataSource);
+
+ /// @deprecated
+ FLUENT_FIELD_OPTION(bool, IncludeCypress);
+
+ /// @deprecated
+ FLUENT_FIELD_OPTION(bool, IncludeControllerAgent);
+
+ /// @deprecated
+ FLUENT_FIELD_OPTION(bool, IncludeArchive);
+
+ ///
+ /// @brief Maximum number of jobs to return.
+ FLUENT_FIELD_OPTION(i64, Limit);
+
+ ///
+ /// @brief Number of jobs (in specified sort order) to skip.
+ ///
+ /// Together with @ref NYT::TListJobsOptions::Limit may be used for pagination.
+ FLUENT_FIELD_OPTION(i64, Offset);
+};
+
+///
+/// @brief Description of a core dump that happened in the job.
+struct TCoreInfo
+{
+ i64 ProcessId;
+ TString ExecutableName;
+ TMaybe<ui64> Size;
+ TMaybe<TYtError> Error;
+};
+
+///
+/// @brief Job attributes.
+///
+/// A field may be `Nothing()` if it is not available (i.e. `FinishTime` for a running job).
+///
+/// @see https://yt.yandex-team.ru/docs/api/commands#get_job
+struct TJobAttributes
+{
+ ///
+ /// @brief Job id.
+ TMaybe<TJobId> Id;
+
+ ///
+ /// @brief Job type
+ TMaybe<EJobType> Type;
+
+ ///
+ /// @brief Job state.
+ TMaybe<EJobState> State;
+
+ ///
+ /// @brief Address of a cluster node where job was running.
+ TMaybe<TString> Address;
+
+ ///
+ /// @brief The name of the task that job corresponds to.
+ TMaybe<TString> TaskName;
+
+ ///
+ /// @brief Job start time.
+ TMaybe<TInstant> StartTime;
+
+ ///
+ /// @brief Job finish time (for a finished job).
+ TMaybe<TInstant> FinishTime;
+
+ ///
+ /// @brief Estimated ratio of job's completed work.
+ TMaybe<double> Progress;
+
+ ///
+ /// @brief Size of saved job stderr.
+ TMaybe<i64> StderrSize;
+
+ ///
+ /// @brief Error for a unsuccessfully finished job.
+ TMaybe<TYtError> Error;
+
+ ///
+ /// @brief Job brief statistics.
+ TMaybe<TNode> BriefStatistics;
+
+ ///
+ /// @brief Job input paths (with ranges).
+ TMaybe<TVector<TRichYPath>> InputPaths;
+
+ ///
+ /// @brief Infos for core dumps produced by job.
+ TMaybe<TVector<TCoreInfo>> CoreInfos;
+};
+
+///
+/// @brief Response for @ref NYT::IOperation::ListJobs.
+struct TListJobsResult
+{
+ ///
+ /// @brief Jobs.
+ TVector<TJobAttributes> Jobs;
+
+ ///
+ /// @deprecated
+ TMaybe<i64> CypressJobCount;
+
+ ///
+ /// @brief Number of jobs retrieved from controller agent.
+ TMaybe<i64> ControllerAgentJobCount;
+
+ ///
+ /// @brief Number of jobs retrieved from archive.
+ TMaybe<i64> ArchiveJobCount;
+};
+
+////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Options for @ref NYT::IClient::GetJob.
+struct TGetJobOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TGetJobOptions;
+ /// @endcond
+};
+
+///
+/// @brief Options for @ref NYT::IClient::GetJobInput.
+struct TGetJobInputOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TGetJobInputOptions;
+ /// @endcond
+};
+
+///
+/// @brief Options for @ref NYT::IClient::GetJobFailContext.
+struct TGetJobFailContextOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TGetJobFailContextOptions;
+ /// @endcond
+};
+
+///
+/// @brief Options for @ref NYT::IClient::GetJobStderr.
+struct TGetJobStderrOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TGetJobStderrOptions;
+ /// @endcond
+};
+
+////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Options for @ref NYT::IOperation::GetFailedJobInfo.
+struct TGetFailedJobInfoOptions
+{
+ /// @cond Doxygen_Suppress
+ using TSelf = TGetFailedJobInfoOptions;
+ /// @endcond
+
+ ///
+ /// @brief How many jobs to download. Which jobs will be chosen is undefined.
+ FLUENT_FIELD_DEFAULT(ui64, MaxJobCount, 10);
+
+ ///
+ /// @brief How much of stderr tail should be downloaded.
+ FLUENT_FIELD_DEFAULT(ui64, StderrTailSize, 64 * 1024);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Interface representing an operation.
+struct IOperation
+ : public TThrRefBase
+{
+ virtual ~IOperation() = default;
+
+ ///
+ /// @brief Get operation id.
+ virtual const TOperationId& GetId() const = 0;
+
+ ///
+ /// @brief Get URL of the operation in YT Web UI.
+ virtual TString GetWebInterfaceUrl() const = 0;
+
+ ///
+ /// @brief Get last error for not started operations. Get state on YT cluster for started operations.
+ ///
+ /// For not started operations last error is an error that's being retried during operation
+ /// preparation/start (e.g. lock files, start operation request).
+ virtual TString GetStatus() const = 0;
+
+ ///
+ /// @brief Get preparation future.
+ ///
+ /// @return future that is set when operation is prepared.
+ virtual ::NThreading::TFuture<void> GetPreparedFuture() = 0;
+
+ ///
+ /// @brief Start operation synchronously.
+ ///
+ /// @note: Do NOT call this method twice.
+ ///
+ /// If operation is not prepared yet, Start() will block waiting for preparation finish.
+ /// Be ready to catch exception if operation preparation or start failed.
+ virtual void Start() = 0;
+
+ ///
+ /// @brief Is the operation started
+ ///
+ /// Returns true if the operation is started on the cluster
+ virtual bool IsStarted() const = 0;
+
+ ///
+ /// @brief Get start future.
+ ///
+ /// @return future that is set when operation is started.
+ virtual ::NThreading::TFuture<void> GetStartedFuture() = 0;
+
+ ///
+ /// @brief Start watching operation.
+ ///
+ /// @return future that is set when operation is complete.
+ ///
+ /// @note: the user should check value of returned future to ensure that operation completed successfully e.g.
+ /// @code{.cpp}
+ /// auto operationComplete = operation->Watch();
+ /// operationComplete.Wait();
+ /// operationComplete.GetValue(); /// will throw if operation completed with errors
+ /// @endcode
+ ///
+ /// If operation is completed successfully the returned future contains void value.
+ /// If operation is completed with error future contains @ref NYT::TOperationFailedError.
+ /// In rare cases when error occurred while waiting (e.g. YT become unavailable) future might contain other exception.
+ virtual ::NThreading::TFuture<void> Watch() = 0;
+
+ ///
+ /// @brief Get information about failed jobs.
+ ///
+ /// Can be called for operation in any stage.
+ /// Though user should keep in mind that this method always fetches info from cypress
+ /// and doesn't work when operation is archived. Successfully completed operations can be archived
+ /// quite quickly (in about ~30 seconds).
+ virtual TVector<TFailedJobInfo> GetFailedJobInfo(const TGetFailedJobInfoOptions& options = TGetFailedJobInfoOptions()) = 0;
+
+ ///
+ /// Get operation brief state.
+ virtual EOperationBriefState GetBriefState() = 0;
+
+ ///
+ /// @brief Get error (if operation has failed).
+ ///
+ /// @return `Nothing()` if operation is in 'Completed' or 'InProgress' state (or reason for failed / aborted operation).
+ virtual TMaybe<TYtError> GetError() = 0;
+
+ ///
+ /// Get job statistics.
+ virtual TJobStatistics GetJobStatistics() = 0;
+
+ ///
+ /// Get operation progress.
+ ///
+ /// @return `Nothing()` if operation has no running jobs yet, e.g. when it is in "materializing" or "pending" state.
+ virtual TMaybe<TOperationBriefProgress> GetBriefProgress() = 0;
+
+ ///
+ /// @brief Abort operation.
+ ///
+ /// Operation will be finished immediately.
+ /// All results of completed/running jobs will be lost.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/api/commands#abort_op
+ virtual void AbortOperation() = 0;
+
+ ///
+ /// @brief Complete operation.
+ ///
+ /// Operation will be finished immediately.
+ /// All results of completed jobs will appear in output tables.
+ /// All results of running (not completed) jobs will be lost.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/api/commands#complete_op
+ virtual void CompleteOperation() = 0;
+
+ ///
+ /// @brief Suspend operation.
+ ///
+ /// Jobs will not be aborted by default, c.f. @ref NYT::TSuspendOperationOptions.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/api/commands#suspend_op
+ virtual void SuspendOperation(
+ const TSuspendOperationOptions& options = TSuspendOperationOptions()) = 0;
+
+ ///
+ /// @brief Resume previously suspended operation.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/api/commands#resume_op
+ virtual void ResumeOperation(
+ const TResumeOperationOptions& options = TResumeOperationOptions()) = 0;
+
+ ///
+ /// @brief Get operation attributes.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/api/commands#get_operation
+ virtual TOperationAttributes GetAttributes(
+ const TGetOperationOptions& options = TGetOperationOptions()) = 0;
+
+ ///
+ /// @brief Update operation runtime parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/api/commands#update_op_parameters
+ virtual void UpdateParameters(
+ const TUpdateOperationParametersOptions& options = TUpdateOperationParametersOptions()) = 0;
+
+ ///
+ /// @brief Get job attributes.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/api/commands#get_job
+ virtual TJobAttributes GetJob(
+ const TJobId& jobId,
+ const TGetJobOptions& options = TGetJobOptions()) = 0;
+
+ ///
+ /// List jobs satisfying given filters (see @ref NYT::TListJobsOptions).
+ ///
+ /// @see https://yt.yandex-team.ru/docs/api/commands#list_jobs
+ virtual TListJobsResult ListJobs(
+ const TListJobsOptions& options = TListJobsOptions()) = 0;
+};
+
+///
+/// @brief Interface of client capable of managing operations.
+struct IOperationClient
+{
+ ///
+ /// @brief Run Map operation.
+ ///
+ /// @param spec Operation spec.
+ /// @param mapper Instance of a job to run.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/map
+ IOperationPtr Map(
+ const TMapOperationSpec& spec,
+ ::TIntrusivePtr<IMapperBase> mapper,
+ const TOperationOptions& options = TOperationOptions());
+
+ ///
+ /// @brief Run Map operation.
+ ///
+ /// @param mapper Instance of a job to run.
+ /// @param input Input table(s)
+ /// @param output Output table(s)
+ /// @param spec Operation spec.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/map
+ IOperationPtr Map(
+ ::TIntrusivePtr<IMapperBase> mapper,
+ const TOneOrMany<TStructuredTablePath>& input,
+ const TOneOrMany<TStructuredTablePath>& output,
+ const TMapOperationSpec& spec = TMapOperationSpec(),
+ const TOperationOptions& options = TOperationOptions());
+
+ ///
+ /// @brief Run raw Map operation.
+ ///
+ /// @param spec Operation spec.
+ /// @param rawJob Instance of a raw mapper to run.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/map
+ virtual IOperationPtr RawMap(
+ const TRawMapOperationSpec& spec,
+ ::TIntrusivePtr<IRawJob> rawJob,
+ const TOperationOptions& options = TOperationOptions()) = 0;
+
+ ///
+ /// @brief Run Reduce operation.
+ ///
+ /// @param spec Operation spec.
+ /// @param reducer Instance of a job to run.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/reduce
+ IOperationPtr Reduce(
+ const TReduceOperationSpec& spec,
+ ::TIntrusivePtr<IReducerBase> reducer,
+ const TOperationOptions& options = TOperationOptions());
+
+ ///
+ /// @brief Run Reduce operation.
+ ///
+ /// @param reducer Instance of a job to run.
+ /// @param input Input table(s)
+ /// @param output Output table(s)
+ /// @param reduceBy Columns to group rows by.
+ /// @param spec Operation spec.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/reduce
+ IOperationPtr Reduce(
+ ::TIntrusivePtr<IReducerBase> reducer,
+ const TOneOrMany<TStructuredTablePath>& input,
+ const TOneOrMany<TStructuredTablePath>& output,
+ const TSortColumns& reduceBy,
+ const TReduceOperationSpec& spec = TReduceOperationSpec(),
+ const TOperationOptions& options = TOperationOptions());
+
+ ///
+ /// @brief Run raw Reduce operation.
+ ///
+ /// @param spec Operation spec.
+ /// @param rawJob Instance of a raw reducer to run.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/reduce
+ virtual IOperationPtr RawReduce(
+ const TRawReduceOperationSpec& spec,
+ ::TIntrusivePtr<IRawJob> rawJob,
+ const TOperationOptions& options = TOperationOptions()) = 0;
+
+ ///
+ /// @brief Run JoinReduce operation.
+ ///
+ /// @param spec Operation spec.
+ /// @param reducer Instance of a job to run.
+ /// @param options Optional parameters.
+ ///
+ /// @deprecated Use @ref NYT::IOperationClient::Reduce with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false.
+ IOperationPtr JoinReduce(
+ const TJoinReduceOperationSpec& spec,
+ ::TIntrusivePtr<IReducerBase> reducer,
+ const TOperationOptions& options = TOperationOptions());
+
+ ///
+ /// @brief Run raw JoinReduce operation.
+ ///
+ /// @param spec Operation spec.
+ /// @param rawJob Instance of a raw reducer to run.
+ /// @param options Optional parameters.
+ ///
+ /// @deprecated Use @ref NYT::IOperationClient::RawReduce with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false.
+ virtual IOperationPtr RawJoinReduce(
+ const TRawJoinReduceOperationSpec& spec,
+ ::TIntrusivePtr<IRawJob> rawJob,
+ const TOperationOptions& options = TOperationOptions()) = 0;
+
+ ///
+ /// @brief Run MapReduce operation.
+ ///
+ /// @param spec Operation spec.
+ /// @param mapper Instance of a map job to run (identity mapper if `nullptr`).
+ /// @param reducer Instance of a reduce job to run.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce
+ IOperationPtr MapReduce(
+ const TMapReduceOperationSpec& spec,
+ ::TIntrusivePtr<IMapperBase> mapper,
+ ::TIntrusivePtr<IReducerBase> reducer,
+ const TOperationOptions& options = TOperationOptions());
+
+ ///
+ /// @brief Run MapReduce operation.
+ ///
+ /// @param spec Operation spec.
+ /// @param mapper Instance of a map job to run (identity mapper if `nullptr`).
+ /// @param reducerCombiner Instance of a reduce combiner to run (identity reduce combiner if `nullptr`).
+ /// @param reducer Instance of a reduce job to run.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce
+ IOperationPtr MapReduce(
+ const TMapReduceOperationSpec& spec,
+ ::TIntrusivePtr<IMapperBase> mapper,
+ ::TIntrusivePtr<IReducerBase> reduceCombiner,
+ ::TIntrusivePtr<IReducerBase> reducer,
+ const TOperationOptions& options = TOperationOptions());
+
+ ///
+ /// @brief Run MapReduce operation.
+ ///
+ /// @param mapper Instance of mapper to run (identity mapper if `nullptr`).
+ /// @param reducer Instance of reducer to run.
+ /// @param input Input table(s)
+ /// @param output Output table(s)
+ /// @param reduceBy Columns to group rows by.
+ /// @param spec Operation spec.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce
+ IOperationPtr MapReduce(
+ ::TIntrusivePtr<IMapperBase> mapper,
+ ::TIntrusivePtr<IReducerBase> reducer,
+ const TOneOrMany<TStructuredTablePath>& input,
+ const TOneOrMany<TStructuredTablePath>& output,
+ const TSortColumns& reduceBy,
+ TMapReduceOperationSpec spec = TMapReduceOperationSpec(),
+ const TOperationOptions& options = TOperationOptions());
+
+ ///
+ /// @brief Run MapReduce operation.
+ ///
+ /// @param mapper Instance of mapper to run (identity mapper if `nullptr`).
+ /// @param reduceCombiner Instance of reduceCombiner to run (identity reduce combiner if `nullptr`).
+ /// @param reducer Instance of reducer to run.
+ /// @param input Input table(s)
+ /// @param output Output table(s)
+ /// @param reduceBy Columns to group rows by.
+ /// @param spec Operation spec.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce
+ IOperationPtr MapReduce(
+ ::TIntrusivePtr<IMapperBase> mapper,
+ ::TIntrusivePtr<IReducerBase> reduceCombiner,
+ ::TIntrusivePtr<IReducerBase> reducer,
+ const TOneOrMany<TStructuredTablePath>& input,
+ const TOneOrMany<TStructuredTablePath>& output,
+ const TSortColumns& reduceBy,
+ TMapReduceOperationSpec spec = TMapReduceOperationSpec(),
+ const TOperationOptions& options = TOperationOptions());
+
+ ///
+ /// @brief Run raw MapReduce operation.
+ ///
+ /// @param spec Operation spec.
+ /// @param mapper Instance of a raw mapper to run (identity mapper if `nullptr`).
+ /// @param mapper Instance of a raw reduce combiner to run (identity reduce combiner if `nullptr`).
+ /// @param mapper Instance of a raw reducer to run.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce
+ virtual IOperationPtr RawMapReduce(
+ const TRawMapReduceOperationSpec& spec,
+ ::TIntrusivePtr<IRawJob> mapper,
+ ::TIntrusivePtr<IRawJob> reduceCombiner,
+ ::TIntrusivePtr<IRawJob> reducer,
+ const TOperationOptions& options = TOperationOptions()) = 0;
+
+ ///
+ /// @brief Run Sort operation.
+ ///
+ /// @param spec Operation spec.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/sort
+ virtual IOperationPtr Sort(
+ const TSortOperationSpec& spec,
+ const TOperationOptions& options = TOperationOptions()) = 0;
+
+ ///
+ /// @brief Run Sort operation.
+ ///
+ /// @param input Input table(s).
+ /// @param output Output table.
+ /// @param sortBy Columns to sort input rows by.
+ /// @param spec Operation spec.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/sort
+ IOperationPtr Sort(
+ const TOneOrMany<TRichYPath>& input,
+ const TRichYPath& output,
+ const TSortColumns& sortBy,
+ const TSortOperationSpec& spec = TSortOperationSpec(),
+ const TOperationOptions& options = TOperationOptions());
+
+ ///
+ /// @brief Run Merge operation.
+ ///
+ /// @param spec Operation spec.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/merge
+ virtual IOperationPtr Merge(
+ const TMergeOperationSpec& spec,
+ const TOperationOptions& options = TOperationOptions()) = 0;
+
+ ///
+ /// @brief Run Erase operation.
+ ///
+ /// @param spec Operation spec.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/erase
+ virtual IOperationPtr Erase(
+ const TEraseOperationSpec& spec,
+ const TOperationOptions& options = TOperationOptions()) = 0;
+
+ ///
+ /// @brief Run RemoteCopy operation.
+ ///
+ /// @param spec Operation spec.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/remote_copy
+ virtual IOperationPtr RemoteCopy(
+ const TRemoteCopyOperationSpec& spec,
+ const TOperationOptions& options = TOperationOptions()) = 0;
+
+ ///
+ /// @brief Run Vanilla operation.
+ ///
+ /// @param spec Operation spec.
+ /// @param options Optional parameters.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/description/mr/vanilla
+ virtual IOperationPtr RunVanilla(
+ const TVanillaOperationSpec& spec,
+ const TOperationOptions& options = TOperationOptions()) = 0;
+
+ ///
+ /// @brief Abort operation.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/api/commands#abort_op
+ virtual void AbortOperation(
+ const TOperationId& operationId) = 0;
+
+ ///
+ /// @brief Complete operation.
+ ///
+ /// @see https://yt.yandex-team.ru/docs/api/commands#complete_op
+ virtual void CompleteOperation(
+ const TOperationId& operationId) = 0;
+
+ ///
+ /// @brief Wait for operation to finish.
+ virtual void WaitForOperation(
+ const TOperationId& operationId) = 0;
+
+ ///
+ /// @brief Check and return operation status.
+ ///
+ /// @note this function will never return @ref NYT::EOperationBriefState::Failed or @ref NYT::EOperationBriefState::Aborted status,
+ /// it will throw @ref NYT::TOperationFailedError instead.
+ virtual EOperationBriefState CheckOperation(
+ const TOperationId& operationId) = 0;
+
+ ///
+ /// @brief Create an operation object given operation id.
+ ///
+ /// @throw @ref NYT::TErrorResponse if the operation doesn't exist.
+ virtual IOperationPtr AttachOperation(const TOperationId& operationId) = 0;
+
+private:
+ virtual IOperationPtr DoMap(
+ const TMapOperationSpec& spec,
+ ::TIntrusivePtr<IStructuredJob> mapper,
+ const TOperationOptions& options) = 0;
+
+ virtual IOperationPtr DoReduce(
+ const TReduceOperationSpec& spec,
+ ::TIntrusivePtr<IStructuredJob> reducer,
+ const TOperationOptions& options) = 0;
+
+ virtual IOperationPtr DoJoinReduce(
+ const TJoinReduceOperationSpec& spec,
+ ::TIntrusivePtr<IStructuredJob> reducer,
+ const TOperationOptions& options) = 0;
+
+ virtual IOperationPtr DoMapReduce(
+ const TMapReduceOperationSpec& spec,
+ ::TIntrusivePtr<IStructuredJob> mapper,
+ ::TIntrusivePtr<IStructuredJob> reduceCombiner,
+ ::TIntrusivePtr<IStructuredJob> reducer,
+ const TOperationOptions& options) = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
+
+#define OPERATION_INL_H_
+#include "operation-inl.h"
+#undef OPERATION_INL_H_
diff --git a/yt/cpp/mapreduce/interface/operation_ut.cpp b/yt/cpp/mapreduce/interface/operation_ut.cpp
new file mode 100644
index 0000000000..0fa62e1568
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/operation_ut.cpp
@@ -0,0 +1,269 @@
+#include <yt/cpp/mapreduce/interface/common_ut.h>
+#include <yt/cpp/mapreduce/interface/job_statistics.h>
+#include <yt/cpp/mapreduce/interface/operation.h>
+#include <yt/cpp/mapreduce/interface/protobuf_table_schema_ut.pb.h>
+
+#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NYT;
+using namespace NYT::NUnitTesting;
+
+class TDummyInferenceContext
+ : public IOperationPreparationContext
+{
+public:
+ TDummyInferenceContext(int inputCount, int outputCount)
+ : InputCount_(inputCount)
+ , OutputCount_(outputCount)
+ , InputSchemas_(inputCount)
+ { }
+
+ int GetInputCount() const override
+ {
+ return InputCount_;
+ }
+
+ int GetOutputCount() const override
+ {
+ return OutputCount_;
+ }
+
+ const TVector<TTableSchema>& GetInputSchemas() const override
+ {
+ return InputSchemas_;
+ }
+
+ const TTableSchema& GetInputSchema(int index) const override
+ {
+ return InputSchemas_[index];
+ }
+
+ TMaybe<TYPath> GetInputPath(int) const override
+ {
+ return Nothing();
+ }
+
+ TMaybe<TYPath> GetOutputPath(int) const override
+ {
+ return Nothing();
+ }
+
+private:
+ int InputCount_;
+ int OutputCount_;
+ TVector<TTableSchema> InputSchemas_;
+};
+
+Y_UNIT_TEST_SUITE(PrepareOperation)
+{
+
+ Y_UNIT_TEST(BasicSchemas)
+ {
+ auto firstSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64));
+ auto otherSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("other_column").Type(EValueType::VT_BOOLEAN));
+ auto thirdSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("third_column").Type(EValueType::VT_STRING));
+
+ TDummyInferenceContext context(3,7);
+ TJobOperationPreparer builder(context);
+
+ builder
+ .OutputSchema(1, firstSchema)
+ .BeginOutputGroup(TVector<int>{2, 5})
+ .Schema(otherSchema)
+ .EndOutputGroup()
+ .BeginOutputGroup(3, 5)
+ .Schema(thirdSchema)
+ .EndOutputGroup()
+ .BeginOutputGroup(TVector<int>{0, 6})
+ .Schema(thirdSchema)
+ .EndOutputGroup();
+
+ UNIT_ASSERT_EXCEPTION(builder.OutputSchema(1, otherSchema), TApiUsageError);
+ UNIT_ASSERT_EXCEPTION(builder.BeginOutputGroup(3, 5).Schema(otherSchema), TApiUsageError);
+ UNIT_ASSERT_EXCEPTION(builder.BeginOutputGroup(TVector<int>{3,6,7}).Schema(otherSchema), TApiUsageError);
+
+ builder.Finish();
+ auto result = builder.GetOutputSchemas();
+
+ ASSERT_SERIALIZABLES_EQUAL(result[0], thirdSchema);
+ ASSERT_SERIALIZABLES_EQUAL(result[1], firstSchema);
+ ASSERT_SERIALIZABLES_EQUAL(result[2], otherSchema);
+ ASSERT_SERIALIZABLES_EQUAL(result[3], thirdSchema);
+ ASSERT_SERIALIZABLES_EQUAL(result[4], thirdSchema);
+ ASSERT_SERIALIZABLES_EQUAL(result[5], otherSchema);
+ ASSERT_SERIALIZABLES_EQUAL(result[6], thirdSchema);
+ }
+
+ Y_UNIT_TEST(NoSchema)
+ {
+ auto schema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64));
+
+ TDummyInferenceContext context(3,4);
+ TJobOperationPreparer builder(context);
+
+ builder
+ .OutputSchema(1, schema)
+ .NoOutputSchema(0)
+ .BeginOutputGroup(2, 4)
+ .Schema(schema)
+ .EndOutputGroup();
+
+ UNIT_ASSERT_EXCEPTION(builder.OutputSchema(0, schema), TApiUsageError);
+
+ builder.Finish();
+ auto result = builder.GetOutputSchemas();
+
+ UNIT_ASSERT(result[0].Empty());
+
+ ASSERT_SERIALIZABLES_EQUAL(result[1], schema);
+ ASSERT_SERIALIZABLES_EQUAL(result[2], schema);
+ ASSERT_SERIALIZABLES_EQUAL(result[3], schema);
+ }
+
+ Y_UNIT_TEST(Descriptions)
+ {
+ auto urlRowSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("Host").Type(NTi::Optional(NTi::String())))
+ .AddColumn(TColumnSchema().Name("Path").Type(NTi::Optional(NTi::String())))
+ .AddColumn(TColumnSchema().Name("HttpCode").Type(NTi::Optional(NTi::Int32())));
+
+ auto urlRowStruct = NTi::Struct({
+ {"Host", NTi::Optional(NTi::String())},
+ {"Path", NTi::Optional(NTi::String())},
+ {"HttpCode", NTi::Optional(NTi::Int32())},
+ });
+
+ auto rowFieldSerializationOptionSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("UrlRow_1").Type(NTi::Optional(urlRowStruct)))
+ .AddColumn(TColumnSchema().Name("UrlRow_2").Type(NTi::Optional(NTi::String())));
+
+ auto rowSerializedRepeatedFieldsSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("Ints").Type(NTi::List(NTi::Int64())))
+ .AddColumn(TColumnSchema().Name("UrlRows").Type(NTi::List(urlRowStruct)));
+
+ TDummyInferenceContext context(5,7);
+ TJobOperationPreparer builder(context);
+
+ builder
+ .InputDescription<TUrlRow>(0)
+ .BeginInputGroup(2, 3)
+ .Description<TUrlRow>()
+ .EndInputGroup()
+ .BeginInputGroup(TVector<int>{1, 4})
+ .Description<TRowSerializedRepeatedFields>()
+ .EndInputGroup()
+ .InputDescription<TUrlRow>(3);
+
+ UNIT_ASSERT_EXCEPTION(builder.InputDescription<TUrlRow>(0), TApiUsageError);
+
+ builder
+ .OutputDescription<TUrlRow>(0, false)
+ .OutputDescription<TRowFieldSerializationOption>(1)
+ .BeginOutputGroup(2, 4)
+ .Description<TUrlRow>()
+ .EndOutputGroup()
+ .BeginOutputGroup(TVector<int>{4,6})
+ .Description<TRowSerializedRepeatedFields>()
+ .EndOutputGroup()
+ .OutputDescription<TUrlRow>(5, false);
+
+ UNIT_ASSERT_EXCEPTION(builder.OutputDescription<TUrlRow>(0), TApiUsageError);
+ UNIT_ASSERT_NO_EXCEPTION(builder.OutputSchema(0, urlRowSchema));
+ UNIT_ASSERT_NO_EXCEPTION(builder.OutputSchema(5, urlRowSchema));
+ UNIT_ASSERT_EXCEPTION(builder.OutputSchema(1, urlRowSchema), TApiUsageError);
+
+ builder.Finish();
+ auto result = builder.GetOutputSchemas();
+
+ ASSERT_SERIALIZABLES_EQUAL(result[0], urlRowSchema);
+ ASSERT_SERIALIZABLES_EQUAL(result[1], rowFieldSerializationOptionSchema);
+ ASSERT_SERIALIZABLES_EQUAL(result[2], urlRowSchema);
+ ASSERT_SERIALIZABLES_EQUAL(result[3], urlRowSchema);
+ ASSERT_SERIALIZABLES_EQUAL(result[4], rowSerializedRepeatedFieldsSchema);
+ ASSERT_SERIALIZABLES_EQUAL(result[5], urlRowSchema);
+ ASSERT_SERIALIZABLES_EQUAL(result[6], rowSerializedRepeatedFieldsSchema);
+
+ auto expectedInputDescriptions = TVector<TMaybe<TTableStructure>>{
+ {TProtobufTableStructure{TUrlRow::descriptor()}},
+ {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}},
+ {TProtobufTableStructure{TUrlRow::descriptor()}},
+ {TProtobufTableStructure{TUrlRow::descriptor()}},
+ {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}},
+ };
+ UNIT_ASSERT_EQUAL(expectedInputDescriptions, builder.GetInputDescriptions());
+
+ auto expectedOutputDescriptions = TVector<TMaybe<TTableStructure>>{
+ {TProtobufTableStructure{TUrlRow::descriptor()}},
+ {TProtobufTableStructure{TRowFieldSerializationOption::descriptor()}},
+ {TProtobufTableStructure{TUrlRow::descriptor()}},
+ {TProtobufTableStructure{TUrlRow::descriptor()}},
+ {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}},
+ {TProtobufTableStructure{TUrlRow::descriptor()}},
+ {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}},
+ };
+ UNIT_ASSERT_EQUAL(expectedOutputDescriptions, builder.GetOutputDescriptions());
+ }
+
+ Y_UNIT_TEST(InputColumns)
+ {
+ TDummyInferenceContext context(5, 1);
+ TJobOperationPreparer builder(context);
+ builder
+ .InputColumnFilter(2, {"a", "b"})
+ .BeginInputGroup(0, 2)
+ .ColumnFilter({"b", "c"})
+ .ColumnRenaming({{"b", "B"}, {"c", "C"}})
+ .EndInputGroup()
+ .InputColumnRenaming(3, {{"a", "AAA"}})
+ .NoOutputSchema(0);
+ builder.Finish();
+
+ auto expectedRenamings = TVector<THashMap<TString, TString>>{
+ {{"b", "B"}, {"c", "C"}},
+ {{"b", "B"}, {"c", "C"}},
+ {},
+ {{"a", "AAA"}},
+ {},
+ };
+ UNIT_ASSERT_EQUAL(builder.GetInputColumnRenamings(), expectedRenamings);
+
+ auto expectedFilters = TVector<TMaybe<TVector<TString>>>{
+ {{"b", "c"}},
+ {{"b", "c"}},
+ {{"a", "b"}},
+ {},
+ {},
+ };
+ UNIT_ASSERT_EQUAL(builder.GetInputColumnFilters(), expectedFilters);
+ }
+
+ Y_UNIT_TEST(Bug_r7349102)
+ {
+ auto firstSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64));
+ auto otherSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("other_column").Type(EValueType::VT_BOOLEAN));
+ auto thirdSchema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("third_column").Type(EValueType::VT_STRING));
+
+ TDummyInferenceContext context(3,1);
+ TJobOperationPreparer builder(context);
+
+ builder
+ .InputDescription<TUrlRow>(0)
+ .InputDescription<TUrlRow>(1)
+ .InputDescription<TUrlRow>(2)
+ .OutputDescription<TUrlRow>(0);
+
+ builder.Finish();
+ }
+
+} // Y_UNIT_TEST_SUITE(SchemaInference)
diff --git a/yt/cpp/mapreduce/interface/proto3_ut.proto b/yt/cpp/mapreduce/interface/proto3_ut.proto
new file mode 100644
index 0000000000..b24c13085b
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/proto3_ut.proto
@@ -0,0 +1,17 @@
+syntax = "proto3";
+
+import "yt/yt_proto/yt/formats/extension.proto";
+
+package NYT.NTestingProto3;
+
+option (NYT.file_default_field_flags) = SERIALIZATION_YT;
+
+message TWithOptional
+{
+ optional int64 x = 1;
+}
+
+message TWithOptionalMessage
+{
+ optional TWithOptional x = 1;
+}
diff --git a/yt/cpp/mapreduce/interface/protobuf_file_options_ut.cpp b/yt/cpp/mapreduce/interface/protobuf_file_options_ut.cpp
new file mode 100644
index 0000000000..5ffa9564d7
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/protobuf_file_options_ut.cpp
@@ -0,0 +1,271 @@
+#include "errors.h"
+#include "format.h"
+#include "common_ut.h"
+
+#include <yt/cpp/mapreduce/interface/protobuf_file_options_ut.pb.h>
+
+#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NYT;
+
+Y_UNIT_TEST_SUITE(ProtobufFileOptions)
+{
+ NTi::TTypePtr GetUrlRowType(bool required)
+ {
+ static const NTi::TTypePtr structType = NTi::Struct({
+ {"Host", ToTypeV3(EValueType::VT_STRING, false)},
+ {"Path", ToTypeV3(EValueType::VT_STRING, false)},
+ {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)}});
+ return required ? structType : NTi::TTypePtr(NTi::Optional(structType));
+ }
+
+ Y_UNIT_TEST(TRowFieldSerializationOption)
+ {
+ const auto schema = CreateTableSchema<NTestingFileOptions::TRowFieldSerializationOption>();
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("UrlRow_1").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false))));
+ }
+
+ Y_UNIT_TEST(TRowMixedSerializationOptions)
+ {
+ const auto schema = CreateTableSchema<NTestingFileOptions::TRowMixedSerializationOptions>();
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("UrlRow_1").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false))));
+ }
+
+ Y_UNIT_TEST(FieldSortOrder)
+ {
+ const auto schema = CreateTableSchema<NTestingFileOptions::TFieldSortOrder>();
+
+ auto asInProtoFile = NTi::Optional(NTi::Struct({
+ {"x", NTi::Optional(NTi::Int64())},
+ {"y", NTi::Optional(NTi::String())},
+ {"z", NTi::Optional(NTi::Bool())},
+ }));
+ auto byFieldNumber = NTi::Optional(NTi::Struct({
+ {"z", NTi::Optional(NTi::Bool())},
+ {"x", NTi::Optional(NTi::Int64())},
+ {"y", NTi::Optional(NTi::String())},
+ }));
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("EmbeddedDefault").Type(asInProtoFile))
+ .AddColumn(TColumnSchema().Name("EmbeddedAsInProtoFile").Type(asInProtoFile))
+ .AddColumn(TColumnSchema().Name("EmbeddedByFieldNumber").Type(byFieldNumber)));
+ }
+
+ Y_UNIT_TEST(Map)
+ {
+ const auto schema = CreateTableSchema<NTestingFileOptions::TWithMap>();
+
+ auto createKeyValueStruct = [] (NTi::TTypePtr key, NTi::TTypePtr value) {
+ return NTi::List(NTi::Struct({
+ {"key", NTi::Optional(key)},
+ {"value", NTi::Optional(value)},
+ }));
+ };
+
+ auto embedded = NTi::Struct({
+ {"x", NTi::Optional(NTi::Int64())},
+ {"y", NTi::Optional(NTi::String())},
+ });
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("MapDefault")
+ .Type(createKeyValueStruct(NTi::Int64(), embedded)))
+ .AddColumn(TColumnSchema()
+ .Name("MapDict")
+ .Type(NTi::Dict(NTi::Int64(), embedded))));
+ }
+
+ Y_UNIT_TEST(Oneof)
+ {
+ const auto schema = CreateTableSchema<NTestingFileOptions::TWithOneof>();
+
+ auto embedded = NTi::Struct({
+ {"x", NTi::Optional(NTi::Int64())},
+ {"y", NTi::Optional(NTi::String())},
+ });
+
+ auto defaultVariantType = NTi::Optional(NTi::Struct({
+ {"field", NTi::Optional(NTi::String())},
+ {"Oneof2", NTi::Optional(NTi::Variant(NTi::Struct({
+ {"y2", NTi::String()},
+ {"z2", embedded},
+ {"x2", NTi::Int64()},
+ })))},
+ {"x1", NTi::Optional(NTi::Int64())},
+ {"y1", NTi::Optional(NTi::String())},
+ {"z1", NTi::Optional(embedded)},
+ }));
+
+ auto noDefaultType = NTi::Optional(NTi::Struct({
+ {"field", NTi::Optional(NTi::String())},
+ {"y2", NTi::Optional(NTi::String())},
+ {"z2", NTi::Optional(embedded)},
+ {"x2", NTi::Optional(NTi::Int64())},
+ {"x1", NTi::Optional(NTi::Int64())},
+ {"y1", NTi::Optional(NTi::String())},
+ {"z1", NTi::Optional(embedded)},
+ }));
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("DefaultVariant")
+ .Type(defaultVariantType)
+ )
+ .AddColumn(TColumnSchema()
+ .Name("NoDefault")
+ .Type(noDefaultType)
+ )
+ .AddColumn(TColumnSchema()
+ .Name("SerializationProtobuf")
+ .Type(NTi::Optional(NTi::Struct({
+ {"x1", NTi::Optional(NTi::Int64())},
+ {"y1", NTi::Optional(NTi::String())},
+ {"z1", NTi::Optional(NTi::String())},
+ })))
+ )
+ .AddColumn(TColumnSchema()
+ .Name("MemberOfTopLevelOneof")
+ .Type(NTi::Optional(NTi::Int64()))
+ )
+ );
+ }
+}
+
+static TNode GetColumns(const TFormat& format, int tableIndex = 0)
+{
+ return format.Config.GetAttributes()["tables"][tableIndex]["columns"];
+}
+
+Y_UNIT_TEST_SUITE(ProtobufFormatFileOptions)
+{
+ Y_UNIT_TEST(TRowFieldSerializationOption)
+ {
+ const auto format = TFormat::Protobuf<NTestingFileOptions::TRowFieldSerializationOption>();
+ auto columns = GetColumns(format);
+
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["name"], "UrlRow_1");
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["proto_type"], "message");
+ UNIT_ASSERT_VALUES_EQUAL(columns[0]["field_number"], 1);
+
+ UNIT_ASSERT_VALUES_EQUAL(columns[1]["name"], "UrlRow_2");
+ UNIT_ASSERT_VALUES_EQUAL(columns[1]["proto_type"], "structured_message");
+ UNIT_ASSERT_VALUES_EQUAL(columns[1]["field_number"], 2);
+ const auto& fields = columns[1]["fields"];
+ UNIT_ASSERT_VALUES_EQUAL(fields[0]["name"], "Host");
+ UNIT_ASSERT_VALUES_EQUAL(fields[0]["proto_type"], "string");
+ UNIT_ASSERT_VALUES_EQUAL(fields[0]["field_number"], 1);
+
+ UNIT_ASSERT_VALUES_EQUAL(fields[1]["name"], "Path");
+ UNIT_ASSERT_VALUES_EQUAL(fields[1]["proto_type"], "string");
+ UNIT_ASSERT_VALUES_EQUAL(fields[1]["field_number"], 2);
+
+ UNIT_ASSERT_VALUES_EQUAL(fields[2]["name"], "HttpCode");
+ UNIT_ASSERT_VALUES_EQUAL(fields[2]["proto_type"], "sint32");
+ UNIT_ASSERT_VALUES_EQUAL(fields[2]["field_number"], 3);
+ }
+
+ Y_UNIT_TEST(Map)
+ {
+ const auto format = TFormat::Protobuf<NTestingFileOptions::TWithMap>();
+ auto columns = GetColumns(format);
+
+ UNIT_ASSERT_VALUES_EQUAL(columns.Size(), 2);
+ {
+ const auto& column = columns[0];
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapDefault");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message");
+ }
+ {
+ const auto& column = columns[1];
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapDict");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message");
+ }
+ }
+
+ Y_UNIT_TEST(Oneof)
+ {
+ const auto format = TFormat::Protobuf<NTestingFileOptions::TWithOneof>();
+ auto columns = GetColumns(format);
+
+ UNIT_ASSERT_VALUES_EQUAL(columns.Size(), 4);
+
+ {
+ const auto& column = columns[0];
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "DefaultVariant");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 5);
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "field");
+
+ const auto& oneof2 = column["fields"][1];
+ UNIT_ASSERT_VALUES_EQUAL(oneof2["name"], "Oneof2");
+ UNIT_ASSERT_VALUES_EQUAL(oneof2["proto_type"], "oneof");
+ UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][0]["name"], "y2");
+ UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][1]["name"], "z2");
+ UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][1]["proto_type"], "structured_message");
+ const auto& embeddedFields = oneof2["fields"][1]["fields"];
+ UNIT_ASSERT_VALUES_EQUAL(embeddedFields[0]["name"], "x");
+ UNIT_ASSERT_VALUES_EQUAL(embeddedFields[1]["name"], "y");
+
+ UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][2]["name"], "x2");
+
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][2]["name"], "x1");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][3]["name"], "y1");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][4]["name"], "z1");
+ };
+
+ {
+ const auto& column = columns[1];
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "NoDefault");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message");
+ const auto& fields = column["fields"];
+ UNIT_ASSERT_VALUES_EQUAL(fields.Size(), 7);
+
+ UNIT_ASSERT_VALUES_EQUAL(fields[0]["name"], "field");
+
+ UNIT_ASSERT_VALUES_EQUAL(fields[1]["name"], "y2");
+
+ UNIT_ASSERT_VALUES_EQUAL(fields[2]["name"], "z2");
+ UNIT_ASSERT_VALUES_EQUAL(fields[2]["proto_type"], "structured_message");
+ const auto& embeddedFields = fields[2]["fields"];
+ UNIT_ASSERT_VALUES_EQUAL(embeddedFields[0]["name"], "x");
+ UNIT_ASSERT_VALUES_EQUAL(embeddedFields[1]["name"], "y");
+
+ UNIT_ASSERT_VALUES_EQUAL(fields[3]["name"], "x2");
+
+ UNIT_ASSERT_VALUES_EQUAL(fields[4]["name"], "x1");
+ UNIT_ASSERT_VALUES_EQUAL(fields[5]["name"], "y1");
+ UNIT_ASSERT_VALUES_EQUAL(fields[6]["name"], "z1");
+ };
+
+ {
+ const auto& column = columns[2];
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "SerializationProtobuf");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 3);
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "x1");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["name"], "y1");
+ UNIT_ASSERT_VALUES_EQUAL(column["fields"][2]["name"], "z1");
+ }
+ {
+ const auto& column = columns[3];
+ UNIT_ASSERT_VALUES_EQUAL(column["name"], "MemberOfTopLevelOneof");
+ UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "int64");
+ }
+ }
+}
diff --git a/yt/cpp/mapreduce/interface/protobuf_file_options_ut.proto b/yt/cpp/mapreduce/interface/protobuf_file_options_ut.proto
new file mode 100644
index 0000000000..4804b2f60c
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/protobuf_file_options_ut.proto
@@ -0,0 +1,142 @@
+import "yt/yt_proto/yt/formats/extension.proto";
+
+package NYT.NTestingFileOptions;
+
+option (NYT.file_default_field_flags) = SERIALIZATION_YT;
+option (NYT.file_default_field_flags) = MAP_AS_LIST_OF_STRUCTS;
+option (NYT.file_default_message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE;
+option (NYT.file_default_oneof_flags) = SEPARATE_FIELDS;
+
+message TUrlRow
+{
+ optional string Host = 1 [(NYT.column_name) = "Host"];
+ optional string Path = 2 [(NYT.column_name) = "Path"];
+ optional sint32 HttpCode = 3 [(NYT.column_name) = "HttpCode"];
+}
+
+message TRowFieldSerializationOption
+{
+ optional TUrlRow UrlRow_1 = 1 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+ optional TUrlRow UrlRow_2 = 2;
+}
+
+message TRowMixedSerializationOptions
+{
+ option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF;
+ optional TUrlRow UrlRow_1 = 1;
+ optional TUrlRow UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_YT];
+}
+
+message TRowSerializedRepeatedFields
+{
+ repeated int64 Ints = 1;
+ repeated TUrlRow UrlRows = 2;
+}
+
+message TFieldSortOrder
+{
+ message TEmbeddedDefault {
+ optional int64 x = 2;
+ optional string y = 12;
+ optional bool z = 1;
+ }
+ message TEmbeddedAsInProtoFile {
+ option (NYT.message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE;
+ optional int64 x = 2;
+ optional string y = 12;
+ optional bool z = 1;
+ }
+ message TEmbeddedByFieldNumber {
+ option (NYT.message_flags) = SORT_FIELDS_BY_FIELD_NUMBER;
+ optional int64 x = 2;
+ optional string y = 12;
+ optional bool z = 1;
+ }
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ optional TEmbeddedDefault EmbeddedDefault = 1;
+ optional TEmbeddedAsInProtoFile EmbeddedAsInProtoFile = 2;
+ optional TEmbeddedByFieldNumber EmbeddedByFieldNumber = 3;
+}
+
+message TWithMap
+{
+ message TEmbedded {
+ optional int64 x = 1;
+ optional string y = 2;
+ }
+
+ map<int64, TEmbedded> MapDefault = 1;
+ map<int64, TEmbedded> MapDict = 5 [(NYT.flags) = MAP_AS_DICT];
+}
+
+message TWithOneof
+{
+ message TEmbedded
+ {
+ oneof Oneof {
+ int64 x = 1;
+ string y = 2;
+ }
+ }
+
+ message TDefaultVariant
+ {
+ option (NYT.default_oneof_flags) = VARIANT;
+ optional string field = 1;
+
+ oneof Oneof2
+ {
+ string y2 = 4;
+ TEmbedded z2 = 6;
+ int64 x2 = 2;
+ }
+
+ oneof Oneof1
+ {
+ option (NYT.oneof_flags) = SEPARATE_FIELDS;
+ int64 x1 = 10;
+ string y1 = 3;
+ TEmbedded z1 = 5;
+ }
+ }
+
+ message TNoDefault
+ {
+ optional string field = 1;
+
+ oneof Oneof2
+ {
+ string y2 = 4;
+ TEmbedded z2 = 6;
+ int64 x2 = 2;
+ }
+
+ oneof Oneof1
+ {
+ int64 x1 = 10;
+ string y1 = 3;
+ TEmbedded z1 = 5;
+ }
+ }
+
+ message TSerializationProtobuf
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF;
+ oneof Oneof
+ {
+ int64 x1 = 2;
+ string y1 = 1;
+ TEmbedded z1 = 3;
+ }
+ }
+
+ optional TDefaultVariant DefaultVariant = 1;
+ optional TNoDefault NoDefault = 2;
+ optional TSerializationProtobuf SerializationProtobuf = 3;
+
+ oneof TopLevelOneof
+ {
+ int64 MemberOfTopLevelOneof = 4;
+ }
+}
diff --git a/yt/cpp/mapreduce/interface/protobuf_format.cpp b/yt/cpp/mapreduce/interface/protobuf_format.cpp
new file mode 100644
index 0000000000..3d57ed2797
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/protobuf_format.cpp
@@ -0,0 +1,1498 @@
+#include "protobuf_format.h"
+
+#include "errors.h"
+
+#include <yt/yt_proto/yt/formats/extension.pb.h>
+
+#include <google/protobuf/text_format.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+#include <util/generic/hash_set.h>
+#include <util/generic/stack.h>
+#include <util/generic/overloaded.h>
+
+#include <util/stream/output.h>
+#include <util/stream/file.h>
+
+namespace NYT::NDetail {
+
+using ::google::protobuf::Descriptor;
+using ::google::protobuf::DescriptorProto;
+using ::google::protobuf::EnumDescriptor;
+using ::google::protobuf::EnumDescriptorProto;
+using ::google::protobuf::FieldDescriptor;
+using ::google::protobuf::FieldDescriptorProto;
+using ::google::protobuf::OneofDescriptor;
+using ::google::protobuf::Message;
+using ::google::protobuf::FileDescriptor;
+using ::google::protobuf::FileDescriptorProto;
+using ::google::protobuf::FileDescriptorSet;
+using ::google::protobuf::FieldOptions;
+using ::google::protobuf::FileOptions;
+using ::google::protobuf::OneofOptions;
+using ::google::protobuf::MessageOptions;
+
+using ::ToString;
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+using TOneofOption = std::variant<
+ EProtobufOneofMode>;
+
+using TFieldOption = std::variant<
+ EProtobufType,
+ EProtobufSerializationMode,
+ EProtobufListMode,
+ EProtobufMapMode,
+ EProtobufEnumWritingMode>;
+
+using TMessageOption = std::variant<
+ EProtobufFieldSortOrder>;
+
+struct TOtherColumns
+{ };
+
+using TValueTypeOrOtherColumns = std::variant<EValueType, TOtherColumns>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TFieldOption FieldFlagToOption(EWrapperFieldFlag::Enum flag)
+{
+ using EFlag = EWrapperFieldFlag;
+ switch (flag) {
+ case EFlag::SERIALIZATION_PROTOBUF:
+ return EProtobufSerializationMode::Protobuf;
+ case EFlag::SERIALIZATION_YT:
+ return EProtobufSerializationMode::Yt;
+
+ case EFlag::ANY:
+ return EProtobufType::Any;
+ case EFlag::OTHER_COLUMNS:
+ return EProtobufType::OtherColumns;
+ case EFlag::ENUM_INT:
+ return EProtobufType::EnumInt;
+ case EFlag::ENUM_STRING:
+ return EProtobufType::EnumString;
+
+ case EFlag::OPTIONAL_LIST:
+ return EProtobufListMode::Optional;
+ case EFlag::REQUIRED_LIST:
+ return EProtobufListMode::Required;
+
+ case EFlag::MAP_AS_LIST_OF_STRUCTS_LEGACY:
+ return EProtobufMapMode::ListOfStructsLegacy;
+ case EFlag::MAP_AS_LIST_OF_STRUCTS:
+ return EProtobufMapMode::ListOfStructs;
+ case EFlag::MAP_AS_DICT:
+ return EProtobufMapMode::Dict;
+ case EFlag::MAP_AS_OPTIONAL_DICT:
+ return EProtobufMapMode::OptionalDict;
+ case EFlag::EMBEDDED:
+ return EProtobufSerializationMode::Embedded;
+
+ case EFlag::ENUM_SKIP_UNKNOWN_VALUES:
+ return EProtobufEnumWritingMode::SkipUnknownValues;
+ case EFlag::ENUM_CHECK_VALUES:
+ return EProtobufEnumWritingMode::CheckValues;
+ }
+ Y_FAIL();
+}
+
+TMessageOption MessageFlagToOption(EWrapperMessageFlag::Enum flag)
+{
+ using EFlag = EWrapperMessageFlag;
+ switch (flag) {
+ case EFlag::DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE:
+ return EProtobufFieldSortOrder::AsInProtoFile;
+ case EFlag::SORT_FIELDS_BY_FIELD_NUMBER:
+ return EProtobufFieldSortOrder::ByFieldNumber;
+ }
+ Y_FAIL();
+}
+
+TOneofOption OneofFlagToOption(EWrapperOneofFlag::Enum flag)
+{
+ using EFlag = EWrapperOneofFlag;
+ switch (flag) {
+ case EFlag::SEPARATE_FIELDS:
+ return EProtobufOneofMode::SeparateFields;
+ case EFlag::VARIANT:
+ return EProtobufOneofMode::Variant;
+ }
+ Y_FAIL();
+}
+
+EWrapperFieldFlag::Enum OptionToFieldFlag(TFieldOption option)
+{
+ using EFlag = EWrapperFieldFlag;
+ struct TVisitor
+ {
+ EFlag::Enum operator() (EProtobufType type)
+ {
+ switch (type) {
+ case EProtobufType::Any:
+ return EFlag::ANY;
+ case EProtobufType::OtherColumns:
+ return EFlag::OTHER_COLUMNS;
+ case EProtobufType::EnumInt:
+ return EFlag::ENUM_INT;
+ case EProtobufType::EnumString:
+ return EFlag::ENUM_STRING;
+ }
+ Y_FAIL();
+ }
+ EFlag::Enum operator() (EProtobufSerializationMode serializationMode)
+ {
+ switch (serializationMode) {
+ case EProtobufSerializationMode::Yt:
+ return EFlag::SERIALIZATION_YT;
+ case EProtobufSerializationMode::Protobuf:
+ return EFlag::SERIALIZATION_PROTOBUF;
+ case EProtobufSerializationMode::Embedded:
+ return EFlag::EMBEDDED;
+ }
+ Y_FAIL();
+ }
+ EFlag::Enum operator() (EProtobufListMode listMode)
+ {
+ switch (listMode) {
+ case EProtobufListMode::Optional:
+ return EFlag::OPTIONAL_LIST;
+ case EProtobufListMode::Required:
+ return EFlag::REQUIRED_LIST;
+ }
+ Y_FAIL();
+ }
+ EFlag::Enum operator() (EProtobufMapMode mapMode)
+ {
+ switch (mapMode) {
+ case EProtobufMapMode::ListOfStructsLegacy:
+ return EFlag::MAP_AS_LIST_OF_STRUCTS_LEGACY;
+ case EProtobufMapMode::ListOfStructs:
+ return EFlag::MAP_AS_LIST_OF_STRUCTS;
+ case EProtobufMapMode::Dict:
+ return EFlag::MAP_AS_DICT;
+ case EProtobufMapMode::OptionalDict:
+ return EFlag::MAP_AS_OPTIONAL_DICT;
+ }
+ Y_FAIL();
+ }
+ EFlag::Enum operator() (EProtobufEnumWritingMode enumWritingMode)
+ {
+ switch (enumWritingMode) {
+ case EProtobufEnumWritingMode::SkipUnknownValues:
+ return EFlag::ENUM_SKIP_UNKNOWN_VALUES;
+ case EProtobufEnumWritingMode::CheckValues:
+ return EFlag::ENUM_CHECK_VALUES;
+ }
+ Y_FAIL();
+ }
+ };
+
+ return std::visit(TVisitor(), option);
+}
+
+EWrapperMessageFlag::Enum OptionToMessageFlag(TMessageOption option)
+{
+ using EFlag = EWrapperMessageFlag;
+ struct TVisitor
+ {
+ EFlag::Enum operator() (EProtobufFieldSortOrder sortOrder)
+ {
+ switch (sortOrder) {
+ case EProtobufFieldSortOrder::AsInProtoFile:
+ return EFlag::DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE;
+ case EProtobufFieldSortOrder::ByFieldNumber:
+ return EFlag::SORT_FIELDS_BY_FIELD_NUMBER;
+ }
+ Y_FAIL();
+ }
+ };
+
+ return std::visit(TVisitor(), option);
+}
+
+EWrapperOneofFlag::Enum OptionToOneofFlag(TOneofOption option)
+{
+ using EFlag = EWrapperOneofFlag;
+ struct TVisitor
+ {
+ EFlag::Enum operator() (EProtobufOneofMode mode)
+ {
+ switch (mode) {
+ case EProtobufOneofMode::SeparateFields:
+ return EFlag::SEPARATE_FIELDS;
+ case EProtobufOneofMode::Variant:
+ return EFlag::VARIANT;
+ }
+ Y_FAIL();
+ }
+ };
+
+ return std::visit(TVisitor(), option);
+}
+
+
+template <typename T, typename TOptionToFlag>
+void SetOption(TMaybe<T>& option, T newOption, TOptionToFlag optionToFlag)
+{
+ if (option) {
+ if (*option == newOption) {
+ ythrow yexception() << "Duplicate protobuf flag " << optionToFlag(newOption);
+ } else {
+ ythrow yexception() << "Incompatible protobuf flags " <<
+ optionToFlag(*option) << " and " << optionToFlag(newOption);
+ }
+ }
+ option = newOption;
+}
+
+class TParseProtobufFieldOptionsVisitor
+{
+public:
+ void operator() (EProtobufType type)
+ {
+ SetOption(Type, type);
+ }
+
+ void operator() (EProtobufSerializationMode serializationMode)
+ {
+ SetOption(SerializationMode, serializationMode);
+ }
+
+ void operator() (EProtobufListMode listMode)
+ {
+ SetOption(ListMode, listMode);
+ }
+
+ void operator() (EProtobufMapMode mapMode)
+ {
+ SetOption(MapMode, mapMode);
+ }
+
+ void operator() (EProtobufEnumWritingMode enumWritingMode)
+ {
+ SetOption(EnumWritingMode, enumWritingMode);
+ }
+
+ template <typename T>
+ void SetOption(TMaybe<T>& option, T newOption)
+ {
+ NYT::NDetail::SetOption(option, newOption, OptionToFieldFlag);
+ }
+
+public:
+ TMaybe<EProtobufType> Type;
+ TMaybe<EProtobufSerializationMode> SerializationMode;
+ TMaybe<EProtobufListMode> ListMode;
+ TMaybe<EProtobufMapMode> MapMode;
+ TMaybe<EProtobufEnumWritingMode> EnumWritingMode;
+};
+
+class TParseProtobufMessageOptionsVisitor
+{
+public:
+ void operator() (EProtobufFieldSortOrder fieldSortOrder)
+ {
+ SetOption(FieldSortOrder, fieldSortOrder);
+ }
+
+ template <typename T>
+ void SetOption(TMaybe<T>& option, T newOption)
+ {
+ NYT::NDetail::SetOption(option, newOption, OptionToMessageFlag);
+ }
+
+public:
+ TMaybe<EProtobufFieldSortOrder> FieldSortOrder;
+};
+
+class TParseProtobufOneofOptionsVisitor
+{
+public:
+ void operator() (EProtobufOneofMode mode)
+ {
+ SetOption(Mode, mode);
+ }
+
+ template <typename T>
+ void SetOption(TMaybe<T>& option, T newOption)
+ {
+ NYT::NDetail::SetOption(option, newOption, OptionToOneofFlag);
+ }
+
+public:
+ TMaybe<EProtobufOneofMode> Mode;
+};
+
+void ParseProtobufFieldOptions(
+ const ::google::protobuf::RepeatedField<EWrapperFieldFlag::Enum>& flags,
+ TProtobufFieldOptions* fieldOptions)
+{
+ TParseProtobufFieldOptionsVisitor visitor;
+ for (auto flag : flags) {
+ std::visit(visitor, FieldFlagToOption(flag));
+ }
+ if (visitor.Type) {
+ fieldOptions->Type = *visitor.Type;
+ }
+ if (visitor.SerializationMode) {
+ fieldOptions->SerializationMode = *visitor.SerializationMode;
+ }
+ if (visitor.ListMode) {
+ fieldOptions->ListMode = *visitor.ListMode;
+ }
+ if (visitor.MapMode) {
+ fieldOptions->MapMode = *visitor.MapMode;
+ }
+}
+
+void ParseProtobufMessageOptions(
+ const ::google::protobuf::RepeatedField<EWrapperMessageFlag::Enum>& flags,
+ TProtobufMessageOptions* messageOptions)
+{
+ TParseProtobufMessageOptionsVisitor visitor;
+ for (auto flag : flags) {
+ std::visit(visitor, MessageFlagToOption(flag));
+ }
+ if (visitor.FieldSortOrder) {
+ messageOptions->FieldSortOrder = *visitor.FieldSortOrder;
+ }
+}
+
+void ParseProtobufOneofOptions(
+ const ::google::protobuf::RepeatedField<EWrapperOneofFlag::Enum>& flags,
+ TProtobufOneofOptions* messageOptions)
+{
+ TParseProtobufOneofOptionsVisitor visitor;
+ for (auto flag : flags) {
+ std::visit(visitor, OneofFlagToOption(flag));
+ }
+ if (visitor.Mode) {
+ messageOptions->Mode = *visitor.Mode;
+ }
+}
+
+TProtobufFieldOptions GetDefaultFieldOptions(
+ const Descriptor* descriptor,
+ TProtobufFieldOptions defaultFieldOptions = {})
+{
+ ParseProtobufFieldOptions(
+ descriptor->file()->options().GetRepeatedExtension(file_default_field_flags),
+ &defaultFieldOptions);
+ ParseProtobufFieldOptions(
+ descriptor->options().GetRepeatedExtension(default_field_flags),
+ &defaultFieldOptions);
+ return defaultFieldOptions;
+}
+
+TProtobufOneofOptions GetDefaultOneofOptions(const Descriptor* descriptor)
+{
+ TProtobufOneofOptions defaultOneofOptions;
+ ParseProtobufOneofOptions(
+ descriptor->file()->options().GetRepeatedExtension(file_default_oneof_flags),
+ &defaultOneofOptions);
+ ParseProtobufOneofOptions(
+ descriptor->options().GetRepeatedExtension(default_oneof_flags),
+ &defaultOneofOptions);
+ switch (defaultOneofOptions.Mode) {
+ case EProtobufOneofMode::Variant: {
+ auto defaultFieldOptions = GetDefaultFieldOptions(descriptor);
+ switch (defaultFieldOptions.SerializationMode) {
+ case EProtobufSerializationMode::Protobuf:
+ // For Protobuf serialization mode default is SeparateFields.
+ defaultOneofOptions.Mode = EProtobufOneofMode::SeparateFields;
+ return defaultOneofOptions;
+ case EProtobufSerializationMode::Yt:
+ case EProtobufSerializationMode::Embedded:
+ return defaultOneofOptions;
+ }
+ Y_FAIL();
+ }
+ case EProtobufOneofMode::SeparateFields:
+ return defaultOneofOptions;
+ }
+ Y_FAIL();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void ValidateProtobufType(const FieldDescriptor& fieldDescriptor, EProtobufType protobufType)
+{
+ const auto fieldType = fieldDescriptor.type();
+ auto ensureType = [&] (FieldDescriptor::Type expectedType) {
+ Y_ENSURE(fieldType == expectedType,
+ "Type of field " << fieldDescriptor.name() << "does not match specified field flag " <<
+ OptionToFieldFlag(protobufType) << ": "
+ "expected " << FieldDescriptor::TypeName(expectedType) << ", " <<
+ "got " << FieldDescriptor::TypeName(fieldType));
+ };
+ switch (protobufType) {
+ case EProtobufType::Any:
+ ensureType(FieldDescriptor::TYPE_BYTES);
+ return;
+ case EProtobufType::OtherColumns:
+ ensureType(FieldDescriptor::TYPE_BYTES);
+ return;
+ case EProtobufType::EnumInt:
+ ensureType(FieldDescriptor::TYPE_ENUM);
+ return;
+ case EProtobufType::EnumString:
+ ensureType(FieldDescriptor::TYPE_ENUM);
+ return;
+ }
+ Y_FAIL();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TCycleChecker
+{
+private:
+ class TGuard
+ {
+ public:
+ TGuard(TCycleChecker* checker, const Descriptor* descriptor)
+ : Checker_(checker)
+ , Descriptor_(descriptor)
+ {
+ Checker_->ActiveVertices_.insert(Descriptor_);
+ Checker_->Stack_.push(Descriptor_);
+ }
+
+ ~TGuard()
+ {
+ Checker_->ActiveVertices_.erase(Descriptor_);
+ Checker_->Stack_.pop();
+ }
+
+ private:
+ TCycleChecker* Checker_;
+ const Descriptor* Descriptor_;
+ };
+
+public:
+ [[nodiscard]] TGuard Enter(const Descriptor* descriptor)
+ {
+ if (ActiveVertices_.contains(descriptor)) {
+ Y_VERIFY(!Stack_.empty());
+ ythrow TApiUsageError() << "Cyclic reference found for protobuf messages. " <<
+ "Consider removing " << EWrapperFieldFlag::SERIALIZATION_YT << " flag " <<
+ "somewhere on the cycle containing " <<
+ Stack_.top()->full_name() << " and " << descriptor->full_name();
+ }
+ return TGuard(this, descriptor);
+ }
+
+private:
+ THashSet<const Descriptor*> ActiveVertices_;
+ TStack<const Descriptor*> Stack_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+TProtobufFieldOptions GetFieldOptions(
+ const FieldDescriptor* fieldDescriptor,
+ const TMaybe<TProtobufFieldOptions>& defaultFieldOptions)
+{
+ TProtobufFieldOptions options;
+ if (defaultFieldOptions) {
+ options = *defaultFieldOptions;
+ } else {
+ options = GetDefaultFieldOptions(fieldDescriptor->containing_type());
+ }
+ ParseProtobufFieldOptions(fieldDescriptor->options().GetRepeatedExtension(flags), &options);
+ return options;
+}
+
+TProtobufOneofOptions GetOneofOptions(
+ const OneofDescriptor* oneofDescriptor,
+ const TMaybe<TProtobufOneofOptions>& defaultOneofOptions)
+{
+ TProtobufOneofOptions options;
+ if (defaultOneofOptions) {
+ options = *defaultOneofOptions;
+ } else {
+ options = GetDefaultOneofOptions(oneofDescriptor->containing_type());
+ }
+ ParseProtobufOneofOptions(oneofDescriptor->options().GetRepeatedExtension(oneof_flags), &options);
+
+ if (oneofDescriptor->is_synthetic()) {
+ options.Mode = EProtobufOneofMode::SeparateFields;
+ }
+
+ auto variantFieldName = oneofDescriptor->options().GetExtension(variant_field_name);
+ switch (options.Mode) {
+ case EProtobufOneofMode::SeparateFields:
+ if (variantFieldName) {
+ ythrow TApiUsageError() << "\"variant_field_name\" requires (NYT.oneof_flags) = VARIANT";
+ }
+ break;
+ case EProtobufOneofMode::Variant:
+ if (variantFieldName) {
+ options.VariantFieldName = variantFieldName;
+ } else {
+ options.VariantFieldName = oneofDescriptor->name();
+ }
+ break;
+ }
+ return options;
+}
+
+
+TProtobufMessageOptions GetMessageOptions(const Descriptor* descriptor)
+{
+ TProtobufMessageOptions options;
+ ParseProtobufMessageOptions(
+ descriptor->file()->options().GetRepeatedExtension(file_default_message_flags),
+ &options);
+ ParseProtobufMessageOptions(
+ descriptor->options().GetRepeatedExtension(message_flags),
+ &options);
+ return options;
+}
+
+TNode MakeEnumerationConfig(const ::google::protobuf::EnumDescriptor* enumDescriptor)
+{
+ auto config = TNode::CreateMap();
+ for (int i = 0; i < enumDescriptor->value_count(); ++i) {
+ config[enumDescriptor->value(i)->name()] = enumDescriptor->value(i)->number();
+ }
+ return config;
+}
+
+TString DeduceProtobufType(
+ const FieldDescriptor* fieldDescriptor,
+ const TProtobufFieldOptions& options)
+{
+ if (options.Type) {
+ ValidateProtobufType(*fieldDescriptor, *options.Type);
+ return ToString(*options.Type);
+ }
+ switch (fieldDescriptor->type()) {
+ case FieldDescriptor::TYPE_ENUM:
+ return ToString(EProtobufType::EnumString);
+ case FieldDescriptor::TYPE_MESSAGE:
+ switch (options.SerializationMode) {
+ case EProtobufSerializationMode::Protobuf:
+ return "message";
+ case EProtobufSerializationMode::Yt:
+ return "structured_message";
+ case EProtobufSerializationMode::Embedded:
+ return "embedded_message";
+ }
+ Y_FAIL();
+ default:
+ return fieldDescriptor->type_name();
+ }
+ Y_FAIL();
+}
+
+TString GetColumnName(const ::google::protobuf::FieldDescriptor& field)
+{
+ const auto& options = field.options();
+ const auto columnName = options.GetExtension(column_name);
+ if (!columnName.empty()) {
+ return columnName;
+ }
+ const auto keyColumnName = options.GetExtension(key_column_name);
+ if (!keyColumnName.empty()) {
+ return keyColumnName;
+ }
+ return field.name();
+}
+
+TNode MakeProtoFormatMessageFieldsConfig(
+ const Descriptor* descriptor,
+ TNode* enumerations,
+ TCycleChecker& cycleChecker);
+
+TNode MakeProtoFormatMessageFieldsConfig(
+ const Descriptor* descriptor,
+ TNode* enumerations,
+ const TProtobufFieldOptions& defaultFieldOptions,
+ const TProtobufOneofOptions& defaultOneofOptions,
+ TCycleChecker& cycleChecker);
+
+TNode MakeMapFieldsConfig(
+ const FieldDescriptor* fieldDescriptor,
+ TNode* enumerations,
+ const TProtobufFieldOptions& fieldOptions,
+ TCycleChecker& cycleChecker)
+{
+ Y_VERIFY(fieldDescriptor->is_map());
+ auto message = fieldDescriptor->message_type();
+ switch (fieldOptions.MapMode) {
+ case EProtobufMapMode::ListOfStructsLegacy:
+ return MakeProtoFormatMessageFieldsConfig(
+ message,
+ enumerations,
+ cycleChecker);
+ case EProtobufMapMode::ListOfStructs:
+ case EProtobufMapMode::Dict:
+ case EProtobufMapMode::OptionalDict: {
+ TProtobufFieldOptions defaultFieldOptions;
+ defaultFieldOptions.SerializationMode = EProtobufSerializationMode::Yt;
+ return MakeProtoFormatMessageFieldsConfig(
+ message,
+ enumerations,
+ defaultFieldOptions,
+ TProtobufOneofOptions{},
+ cycleChecker);
+ }
+ }
+ Y_FAIL();
+}
+
+TNode MakeProtoFormatFieldConfig(
+ const FieldDescriptor* fieldDescriptor,
+ TNode* enumerations,
+ const TProtobufFieldOptions& defaultOptions,
+ TCycleChecker& cycleChecker)
+{
+ auto fieldConfig = TNode::CreateMap();
+ fieldConfig["field_number"] = fieldDescriptor->number();
+ fieldConfig["name"] = GetColumnName(*fieldDescriptor);
+
+ auto fieldOptions = GetFieldOptions(fieldDescriptor, defaultOptions);
+
+ Y_ENSURE(fieldOptions.SerializationMode != EProtobufSerializationMode::Embedded,
+ "EMBEDDED flag is currently supported only with "
+ "ProtobufFormatWithDescriptors config option set to true");
+
+ if (fieldDescriptor->is_repeated()) {
+ Y_ENSURE_EX(fieldOptions.SerializationMode == EProtobufSerializationMode::Yt,
+ TApiUsageError() << "Repeated field \"" << fieldDescriptor->full_name() << "\" " <<
+ "must have flag \"" << EWrapperFieldFlag::SERIALIZATION_YT << "\"");
+ }
+ fieldConfig["repeated"] = fieldDescriptor->is_repeated();
+ fieldConfig["packed"] = fieldDescriptor->is_packed();
+
+ fieldConfig["proto_type"] = DeduceProtobufType(fieldDescriptor, fieldOptions);
+
+ if (fieldDescriptor->type() == FieldDescriptor::TYPE_ENUM) {
+ auto* enumeration = fieldDescriptor->enum_type();
+ (*enumerations)[enumeration->full_name()] = MakeEnumerationConfig(enumeration);
+ fieldConfig["enumeration_name"] = enumeration->full_name();
+ }
+
+ if (fieldOptions.SerializationMode != EProtobufSerializationMode::Yt) {
+ return fieldConfig;
+ }
+
+ if (fieldDescriptor->is_map()) {
+ fieldConfig["fields"] = MakeMapFieldsConfig(fieldDescriptor, enumerations, fieldOptions, cycleChecker);
+ return fieldConfig;
+ }
+
+ if (fieldDescriptor->type() == FieldDescriptor::TYPE_MESSAGE) {
+ fieldConfig["fields"] = MakeProtoFormatMessageFieldsConfig(
+ fieldDescriptor->message_type(),
+ enumerations,
+ cycleChecker);
+ }
+
+ return fieldConfig;
+}
+
+void MakeProtoFormatOneofConfig(
+ const OneofDescriptor* oneofDescriptor,
+ TNode* enumerations,
+ const TProtobufFieldOptions& defaultFieldOptions,
+ const TProtobufOneofOptions& defaultOneofOptions,
+ TCycleChecker& cycleChecker,
+ TNode* fields)
+{
+ auto addFields = [&] (TNode* fields) {
+ for (int i = 0; i < oneofDescriptor->field_count(); ++i) {
+ fields->Add(MakeProtoFormatFieldConfig(
+ oneofDescriptor->field(i),
+ enumerations,
+ defaultFieldOptions,
+ cycleChecker));
+ }
+ };
+
+ auto oneofOptions = GetOneofOptions(oneofDescriptor, defaultOneofOptions);
+ switch (oneofOptions.Mode) {
+ case EProtobufOneofMode::SeparateFields:
+ addFields(fields);
+ return;
+ case EProtobufOneofMode::Variant: {
+ auto oneofFields = TNode::CreateList();
+ addFields(&oneofFields);
+ auto oneofField = TNode()
+ ("proto_type", "oneof")
+ ("name", oneofOptions.VariantFieldName)
+ ("fields", std::move(oneofFields));
+ fields->Add(std::move(oneofField));
+ return;
+ }
+ }
+ Y_FAIL();
+}
+
+TNode MakeProtoFormatMessageFieldsConfig(
+ const Descriptor* descriptor,
+ TNode* enumerations,
+ const TProtobufFieldOptions& defaultFieldOptions,
+ const TProtobufOneofOptions& defaultOneofOptions,
+ TCycleChecker& cycleChecker)
+{
+ auto fields = TNode::CreateList();
+ THashSet<const OneofDescriptor*> visitedOneofs;
+ auto guard = cycleChecker.Enter(descriptor);
+ for (int fieldIndex = 0; fieldIndex < descriptor->field_count(); ++fieldIndex) {
+ auto fieldDescriptor = descriptor->field(fieldIndex);
+ auto oneofDescriptor = fieldDescriptor->containing_oneof();
+ if (!oneofDescriptor) {
+ fields.Add(MakeProtoFormatFieldConfig(
+ fieldDescriptor,
+ enumerations,
+ defaultFieldOptions,
+ cycleChecker));
+ } else if (!visitedOneofs.contains(oneofDescriptor)) {
+ MakeProtoFormatOneofConfig(
+ oneofDescriptor,
+ enumerations,
+ defaultFieldOptions,
+ defaultOneofOptions,
+ cycleChecker,
+ &fields);
+ visitedOneofs.insert(oneofDescriptor);
+ }
+ }
+ return fields;
+}
+
+TNode MakeProtoFormatMessageFieldsConfig(
+ const Descriptor* descriptor,
+ TNode* enumerations,
+ TCycleChecker& cycleChecker)
+{
+ return MakeProtoFormatMessageFieldsConfig(
+ descriptor,
+ enumerations,
+ GetDefaultFieldOptions(descriptor),
+ GetDefaultOneofOptions(descriptor),
+ cycleChecker);
+}
+
+TNode MakeProtoFormatConfigWithTables(const TVector<const Descriptor*>& descriptors)
+{
+ TNode config("protobuf");
+ config.Attributes()
+ ("enumerations", TNode::CreateMap())
+ ("tables", TNode::CreateList());
+
+ auto& enumerations = config.Attributes()["enumerations"];
+
+ for (auto* descriptor : descriptors) {
+ TCycleChecker cycleChecker;
+ auto columns = MakeProtoFormatMessageFieldsConfig(descriptor, &enumerations, cycleChecker);
+ config.Attributes()["tables"].Add(
+ TNode()("columns", std::move(columns)));
+ }
+
+ return config;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TFileDescriptorSetBuilder
+{
+public:
+ TFileDescriptorSetBuilder()
+ : ExtensionFile_(EWrapperFieldFlag::descriptor()->file())
+ { }
+
+ void AddDescriptor(const Descriptor* descriptor)
+ {
+ auto [it, inserted] = AllDescriptors_.insert(descriptor);
+ if (!inserted) {
+ return;
+ }
+
+ const auto* containingType = descriptor->containing_type();
+ while (containingType) {
+ AddDescriptor(containingType);
+ containingType = containingType->containing_type();
+ }
+ for (int i = 0; i < descriptor->field_count(); ++i) {
+ AddField(descriptor->field(i));
+ }
+ }
+
+ FileDescriptorSet Build()
+ {
+ THashSet<const FileDescriptor*> visitedFiles;
+ TVector<const FileDescriptor*> fileTopoOrder;
+ for (const auto* descriptor : AllDescriptors_) {
+ TraverseDependencies(descriptor->file(), visitedFiles, fileTopoOrder);
+ }
+
+ THashSet<TString> messageTypeNames;
+ THashSet<TString> enumTypeNames;
+ for (const auto* descriptor : AllDescriptors_) {
+ messageTypeNames.insert(descriptor->full_name());
+ }
+ for (const auto* enumDescriptor : EnumDescriptors_) {
+ enumTypeNames.insert(enumDescriptor->full_name());
+ }
+ FileDescriptorSet fileDescriptorSetProto;
+ for (const auto* file : fileTopoOrder) {
+ auto* fileProto = fileDescriptorSetProto.add_file();
+ file->CopyTo(fileProto);
+ Strip(fileProto, messageTypeNames, enumTypeNames);
+ }
+ return fileDescriptorSetProto;
+ }
+
+private:
+ void AddField(const FieldDescriptor* fieldDescriptor)
+ {
+ if (fieldDescriptor->message_type()) {
+ AddDescriptor(fieldDescriptor->message_type());
+ }
+ if (fieldDescriptor->enum_type()) {
+ AddEnumDescriptor(fieldDescriptor->enum_type());
+ }
+ }
+
+ void AddEnumDescriptor(const EnumDescriptor* enumDescriptor)
+ {
+ auto [it, inserted] = EnumDescriptors_.insert(enumDescriptor);
+ if (!inserted) {
+ return;
+ }
+ const auto* containingType = enumDescriptor->containing_type();
+ while (containingType) {
+ AddDescriptor(containingType);
+ containingType = containingType->containing_type();
+ }
+ }
+
+ void TraverseDependencies(
+ const FileDescriptor* current,
+ THashSet<const FileDescriptor*>& visited,
+ TVector<const FileDescriptor*>& topoOrder)
+ {
+ auto [it, inserted] = visited.insert(current);
+ if (!inserted) {
+ return;
+ }
+ for (int i = 0; i < current->dependency_count(); ++i) {
+ TraverseDependencies(current->dependency(i), visited, topoOrder);
+ }
+ topoOrder.push_back(current);
+ }
+
+ template <typename TOptions>
+ void StripUnknownOptions(TOptions* options)
+ {
+ std::vector<const FieldDescriptor*> fields;
+ auto reflection = options->GetReflection();
+ reflection->ListFields(*options, &fields);
+ for (auto field : fields) {
+ if (field->is_extension() && field->file() != ExtensionFile_) {
+ reflection->ClearField(options, field);
+ }
+ }
+ }
+
+ template <typename TRepeatedField, typename TPredicate>
+ void RemoveIf(TRepeatedField* repeatedField, TPredicate predicate)
+ {
+ repeatedField->erase(
+ std::remove_if(repeatedField->begin(), repeatedField->end(), predicate),
+ repeatedField->end());
+ }
+
+ void Strip(
+ const TString& containingTypePrefix,
+ DescriptorProto* messageProto,
+ const THashSet<TString>& messageTypeNames,
+ const THashSet<TString>& enumTypeNames)
+ {
+ const auto prefix = containingTypePrefix + messageProto->name() + '.';
+
+ RemoveIf(messageProto->mutable_nested_type(), [&] (const DescriptorProto& descriptorProto) {
+ return !messageTypeNames.contains(prefix + descriptorProto.name());
+ });
+ RemoveIf(messageProto->mutable_enum_type(), [&] (const EnumDescriptorProto& enumDescriptorProto) {
+ return !enumTypeNames.contains(prefix + enumDescriptorProto.name());
+ });
+
+ messageProto->clear_extension();
+ StripUnknownOptions(messageProto->mutable_options());
+ for (auto& fieldProto : *messageProto->mutable_field()) {
+ StripUnknownOptions(fieldProto.mutable_options());
+ }
+ for (auto& oneofProto : *messageProto->mutable_oneof_decl()) {
+ StripUnknownOptions(oneofProto.mutable_options());
+ }
+ for (auto& nestedTypeProto : *messageProto->mutable_nested_type()) {
+ Strip(prefix, &nestedTypeProto, messageTypeNames, enumTypeNames);
+ }
+ for (auto& enumProto : *messageProto->mutable_enum_type()) {
+ StripUnknownOptions(enumProto.mutable_options());
+ for (auto& enumValue : *enumProto.mutable_value()) {
+ StripUnknownOptions(enumValue.mutable_options());
+ }
+ }
+ }
+
+ void Strip(
+ FileDescriptorProto* fileProto,
+ const THashSet<TString>& messageTypeNames,
+ const THashSet<TString>& enumTypeNames)
+ {
+ const auto prefix = fileProto->package().Empty()
+ ? ""
+ : fileProto->package() + '.';
+
+ RemoveIf(fileProto->mutable_message_type(), [&] (const DescriptorProto& descriptorProto) {
+ return !messageTypeNames.contains(prefix + descriptorProto.name());
+ });
+ RemoveIf(fileProto->mutable_enum_type(), [&] (const EnumDescriptorProto& enumDescriptorProto) {
+ return !enumTypeNames.contains(prefix + enumDescriptorProto.name());
+ });
+
+ fileProto->clear_service();
+ fileProto->clear_extension();
+
+ StripUnknownOptions(fileProto->mutable_options());
+ for (auto& messageProto : *fileProto->mutable_message_type()) {
+ Strip(prefix, &messageProto, messageTypeNames, enumTypeNames);
+ }
+ for (auto& enumProto : *fileProto->mutable_enum_type()) {
+ StripUnknownOptions(enumProto.mutable_options());
+ for (auto& enumValue : *enumProto.mutable_value()) {
+ StripUnknownOptions(enumValue.mutable_options());
+ }
+ }
+ }
+
+private:
+ const FileDescriptor* const ExtensionFile_;
+ THashSet<const Descriptor*> AllDescriptors_;
+ THashSet<const EnumDescriptor*> EnumDescriptors_;
+};
+
+TNode MakeProtoFormatConfigWithDescriptors(const TVector<const Descriptor*>& descriptors)
+{
+ TFileDescriptorSetBuilder builder;
+ auto typeNames = TNode::CreateList();
+ for (const auto* descriptor : descriptors) {
+ builder.AddDescriptor(descriptor);
+ typeNames.Add(descriptor->full_name());
+ }
+
+ auto fileDescriptorSetText = builder.Build().ShortDebugString();
+ TNode config("protobuf");
+ config.Attributes()
+ ("file_descriptor_set_text", std::move(fileDescriptorSetText))
+ ("type_names", std::move(typeNames));
+ return config;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+using TTypePtrOrOtherColumns = std::variant<NTi::TTypePtr, TOtherColumns>;
+
+struct TMember {
+ TString Name;
+ TTypePtrOrOtherColumns TypeOrOtherColumns;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+TValueTypeOrOtherColumns GetScalarFieldType(
+ const FieldDescriptor& fieldDescriptor,
+ const TProtobufFieldOptions& options)
+{
+ if (options.Type) {
+ switch (*options.Type) {
+ case EProtobufType::EnumInt:
+ return EValueType::VT_INT64;
+ case EProtobufType::EnumString:
+ return EValueType::VT_STRING;
+ case EProtobufType::Any:
+ return EValueType::VT_ANY;
+ case EProtobufType::OtherColumns:
+ return TOtherColumns{};
+ }
+ Y_FAIL();
+ }
+
+ switch (fieldDescriptor.cpp_type()) {
+ case FieldDescriptor::CPPTYPE_INT32:
+ return EValueType::VT_INT32;
+ case FieldDescriptor::CPPTYPE_INT64:
+ return EValueType::VT_INT64;
+ case FieldDescriptor::CPPTYPE_UINT32:
+ return EValueType::VT_UINT32;
+ case FieldDescriptor::CPPTYPE_UINT64:
+ return EValueType::VT_UINT64;
+ case FieldDescriptor::CPPTYPE_FLOAT:
+ case FieldDescriptor::CPPTYPE_DOUBLE:
+ return EValueType::VT_DOUBLE;
+ case FieldDescriptor::CPPTYPE_BOOL:
+ return EValueType::VT_BOOLEAN;
+ case FieldDescriptor::CPPTYPE_STRING:
+ case FieldDescriptor::CPPTYPE_MESSAGE:
+ case FieldDescriptor::CPPTYPE_ENUM:
+ return EValueType::VT_STRING;
+ default:
+ ythrow yexception() <<
+ "Unexpected field type '" << fieldDescriptor.cpp_type_name() << "' " <<
+ "for field " << fieldDescriptor.name();
+ }
+}
+
+bool HasNameExtension(const FieldDescriptor& fieldDescriptor)
+{
+ const auto& options = fieldDescriptor.options();
+ return options.HasExtension(column_name) || options.HasExtension(key_column_name);
+}
+
+void SortFields(TVector<const FieldDescriptor*>& fieldDescriptors, EProtobufFieldSortOrder fieldSortOrder)
+{
+ switch (fieldSortOrder) {
+ case EProtobufFieldSortOrder::AsInProtoFile:
+ return;
+ case EProtobufFieldSortOrder::ByFieldNumber:
+ SortBy(fieldDescriptors, [] (const FieldDescriptor* fieldDescriptor) {
+ return fieldDescriptor->number();
+ });
+ return;
+ }
+ Y_FAIL();
+}
+
+NTi::TTypePtr CreateStruct(TStringBuf fieldName, TVector<TMember> members)
+{
+ TVector<NTi::TStructType::TOwnedMember> structMembers;
+ structMembers.reserve(members.size());
+ for (auto& member : members) {
+ std::visit(TOverloaded{
+ [&] (TOtherColumns) {
+ ythrow TApiUsageError() <<
+ "Could not deduce YT type for field " << member.Name << " of " <<
+ "embedded message field " << fieldName << " " <<
+ "(note that " << EWrapperFieldFlag::OTHER_COLUMNS << " fields " <<
+ "are not allowed inside embedded messages)";
+ },
+ [&] (NTi::TTypePtr& type) {
+ structMembers.emplace_back(std::move(member.Name), std::move(type));
+ },
+ }, member.TypeOrOtherColumns);
+ }
+ return NTi::Struct(std::move(structMembers));
+}
+
+TMaybe<TVector<TString>> InferColumnFilter(const ::google::protobuf::Descriptor& descriptor)
+{
+ auto isOtherColumns = [] (const ::google::protobuf::FieldDescriptor& field) {
+ return GetFieldOptions(&field).Type == EProtobufType::OtherColumns;
+ };
+
+ TVector<TString> result;
+ result.reserve(descriptor.field_count());
+ for (int i = 0; i < descriptor.field_count(); ++i) {
+ const auto& field = *descriptor.field(i);
+ if (isOtherColumns(field)) {
+ return {};
+ }
+ result.push_back(GetColumnName(field));
+ }
+ return result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TTableSchemaInferrer
+{
+public:
+ TTableSchemaInferrer(bool keepFieldsWithoutExtension)
+ : KeepFieldsWithoutExtension_(keepFieldsWithoutExtension)
+ { }
+
+ TTableSchema InferSchema(const Descriptor& messageDescriptor);
+
+private:
+ TTypePtrOrOtherColumns GetFieldType(
+ const FieldDescriptor& fieldDescriptor,
+ const TProtobufFieldOptions& defaultOptions);
+
+ void ProcessOneofField(
+ TStringBuf containingFieldName,
+ const OneofDescriptor& oneofDescriptor,
+ const TProtobufFieldOptions& defaultFieldOptions,
+ const TProtobufOneofOptions& defaultOneofOptions,
+ EProtobufFieldSortOrder fieldSortOrder,
+ TVector<TMember>* members);
+
+ TVector<TMember> GetMessageMembers(
+ TStringBuf containingFieldName,
+ const Descriptor& fieldDescriptor,
+ TProtobufFieldOptions defaultFieldOptions,
+ std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder = std::nullopt);
+
+ NTi::TTypePtr GetMessageType(
+ const FieldDescriptor& fieldDescriptor,
+ TProtobufFieldOptions defaultFieldOptions);
+
+ NTi::TTypePtr GetMapType(
+ const FieldDescriptor& fieldDescriptor,
+ const TProtobufFieldOptions& fieldOptions);
+
+private:
+ void GetMessageMembersImpl(
+ TStringBuf containingFieldName,
+ const Descriptor& fieldDescriptor,
+ TProtobufFieldOptions defaultFieldOptions,
+ std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder,
+ TVector<TMember>* members);
+
+private:
+ const bool KeepFieldsWithoutExtension_;
+ TCycleChecker CycleChecker_;
+};
+
+void TTableSchemaInferrer::ProcessOneofField(
+ TStringBuf containingFieldName,
+ const OneofDescriptor& oneofDescriptor,
+ const TProtobufFieldOptions& defaultFieldOptions,
+ const TProtobufOneofOptions& defaultOneofOptions,
+ EProtobufFieldSortOrder fieldSortOrder,
+ TVector<TMember>* members)
+{
+ auto oneofOptions = GetOneofOptions(&oneofDescriptor, defaultOneofOptions);
+
+ auto addFields = [&] (TVector<TMember>* members, bool removeOptionality) {
+ TVector<const FieldDescriptor*> fieldDescriptors;
+ for (int i = 0; i < oneofDescriptor.field_count(); ++i) {
+ fieldDescriptors.push_back(oneofDescriptor.field(i));
+ }
+ SortFields(fieldDescriptors, fieldSortOrder);
+ for (auto innerFieldDescriptor : fieldDescriptors) {
+ auto typeOrOtherColumns = GetFieldType(
+ *innerFieldDescriptor,
+ defaultFieldOptions);
+ if (auto* maybeType = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns);
+ maybeType && removeOptionality && (*maybeType)->IsOptional())
+ {
+ typeOrOtherColumns = (*maybeType)->AsOptional()->GetItemType();
+ }
+ members->push_back(TMember{
+ GetColumnName(*innerFieldDescriptor),
+ std::move(typeOrOtherColumns),
+ });
+ }
+ };
+
+ switch (oneofOptions.Mode) {
+ case EProtobufOneofMode::SeparateFields:
+ addFields(members, /* removeOptionality */ false);
+ return;
+ case EProtobufOneofMode::Variant: {
+ TVector<TMember> variantMembers;
+ addFields(&variantMembers, /* removeOptionality */ true);
+ members->push_back(TMember{
+ oneofOptions.VariantFieldName,
+ NTi::Optional(
+ NTi::Variant(
+ CreateStruct(containingFieldName, std::move(variantMembers))
+ )
+ )
+ });
+ return;
+ }
+ }
+ Y_FAIL();
+}
+
+TVector<TMember> TTableSchemaInferrer::GetMessageMembers(
+ TStringBuf containingFieldName,
+ const Descriptor& messageDescriptor,
+ TProtobufFieldOptions defaultFieldOptions,
+ std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder)
+{
+ TVector<TMember> members;
+ GetMessageMembersImpl(
+ containingFieldName,
+ messageDescriptor,
+ defaultFieldOptions,
+ overrideFieldSortOrder,
+ &members
+ );
+ return members;
+}
+
+void TTableSchemaInferrer::GetMessageMembersImpl(
+ TStringBuf containingFieldName,
+ const Descriptor& messageDescriptor,
+ TProtobufFieldOptions defaultFieldOptions,
+ std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder,
+ TVector<TMember>* members)
+{
+ auto guard = CycleChecker_.Enter(&messageDescriptor);
+ defaultFieldOptions = GetDefaultFieldOptions(&messageDescriptor, defaultFieldOptions);
+ auto messageOptions = GetMessageOptions(&messageDescriptor);
+ auto defaultOneofOptions = GetDefaultOneofOptions(&messageDescriptor);
+
+ TVector<const FieldDescriptor*> fieldDescriptors;
+ fieldDescriptors.reserve(messageDescriptor.field_count());
+ for (int i = 0; i < messageDescriptor.field_count(); ++i) {
+ if (!KeepFieldsWithoutExtension_ && !HasNameExtension(*messageDescriptor.field(i))) {
+ continue;
+ }
+ fieldDescriptors.push_back(messageDescriptor.field(i));
+ }
+
+ auto fieldSortOrder = overrideFieldSortOrder.value_or(messageOptions.FieldSortOrder);
+ SortFields(fieldDescriptors, fieldSortOrder);
+
+ THashSet<const OneofDescriptor*> visitedOneofs;
+ for (const auto innerFieldDescriptor : fieldDescriptors) {
+ auto oneofDescriptor = innerFieldDescriptor->containing_oneof();
+ if (oneofDescriptor) {
+ if (visitedOneofs.contains(oneofDescriptor)) {
+ continue;
+ }
+ ProcessOneofField(
+ containingFieldName,
+ *oneofDescriptor,
+ defaultFieldOptions,
+ defaultOneofOptions,
+ messageOptions.FieldSortOrder,
+ members);
+ visitedOneofs.insert(oneofDescriptor);
+ continue;
+ }
+ auto fieldOptions = GetFieldOptions(innerFieldDescriptor, defaultFieldOptions);
+ if (fieldOptions.SerializationMode == EProtobufSerializationMode::Embedded) {
+ Y_ENSURE(innerFieldDescriptor->type() == FieldDescriptor::TYPE_MESSAGE,
+ "EMBEDDED column must have message type");
+ Y_ENSURE(innerFieldDescriptor->label() == FieldDescriptor::LABEL_REQUIRED,
+ "EMBEDDED column must be marked required");
+ GetMessageMembersImpl(
+ innerFieldDescriptor->full_name(),
+ *innerFieldDescriptor->message_type(),
+ defaultFieldOptions,
+ /*overrideFieldSortOrder*/ std::nullopt,
+ members);
+ } else {
+ auto typeOrOtherColumns = GetFieldType(
+ *innerFieldDescriptor,
+ defaultFieldOptions);
+ members->push_back(TMember{
+ GetColumnName(*innerFieldDescriptor),
+ std::move(typeOrOtherColumns),
+ });
+ }
+ }
+}
+
+NTi::TTypePtr TTableSchemaInferrer::GetMessageType(
+ const FieldDescriptor& fieldDescriptor,
+ TProtobufFieldOptions defaultFieldOptions)
+{
+ Y_VERIFY(fieldDescriptor.message_type());
+ const auto& messageDescriptor = *fieldDescriptor.message_type();
+ auto members = GetMessageMembers(
+ fieldDescriptor.full_name(),
+ messageDescriptor,
+ defaultFieldOptions);
+
+ return CreateStruct(fieldDescriptor.full_name(), std::move(members));
+}
+
+NTi::TTypePtr TTableSchemaInferrer::GetMapType(
+ const FieldDescriptor& fieldDescriptor,
+ const TProtobufFieldOptions& fieldOptions)
+{
+ Y_VERIFY(fieldDescriptor.is_map());
+ switch (fieldOptions.MapMode) {
+ case EProtobufMapMode::ListOfStructsLegacy:
+ case EProtobufMapMode::ListOfStructs: {
+ TProtobufFieldOptions embeddedOptions;
+ if (fieldOptions.MapMode == EProtobufMapMode::ListOfStructs) {
+ embeddedOptions.SerializationMode = EProtobufSerializationMode::Yt;
+ }
+ auto list = NTi::List(GetMessageType(fieldDescriptor, embeddedOptions));
+ switch (fieldOptions.ListMode) {
+ case EProtobufListMode::Required:
+ return list;
+ case EProtobufListMode::Optional:
+ return NTi::Optional(std::move(list));
+ }
+ Y_FAIL();
+ }
+ case EProtobufMapMode::Dict:
+ case EProtobufMapMode::OptionalDict: {
+ auto message = fieldDescriptor.message_type();
+ Y_VERIFY(message->field_count() == 2);
+ auto keyVariant = GetScalarFieldType(*message->field(0), TProtobufFieldOptions{});
+ Y_VERIFY(std::holds_alternative<EValueType>(keyVariant));
+ auto key = std::get<EValueType>(keyVariant);
+ TProtobufFieldOptions embeddedOptions;
+ embeddedOptions.SerializationMode = EProtobufSerializationMode::Yt;
+ auto valueVariant = GetFieldType(*message->field(1), embeddedOptions);
+ Y_VERIFY(std::holds_alternative<NTi::TTypePtr>(valueVariant));
+ auto value = std::get<NTi::TTypePtr>(valueVariant);
+ Y_VERIFY(value->IsOptional());
+ value = value->AsOptional()->GetItemType();
+ auto dict = NTi::Dict(ToTypeV3(key, true), value);
+ if (fieldOptions.MapMode == EProtobufMapMode::OptionalDict) {
+ return NTi::Optional(dict);
+ } else {
+ return dict;
+ }
+ }
+ }
+}
+
+TTypePtrOrOtherColumns TTableSchemaInferrer::GetFieldType(
+ const FieldDescriptor& fieldDescriptor,
+ const TProtobufFieldOptions& defaultOptions)
+{
+ auto fieldOptions = GetFieldOptions(&fieldDescriptor, defaultOptions);
+ if (fieldOptions.Type) {
+ ValidateProtobufType(fieldDescriptor, *fieldOptions.Type);
+ }
+
+ auto getScalarType = [&] {
+ auto valueTypeOrOtherColumns = GetScalarFieldType(fieldDescriptor, fieldOptions);
+ return std::visit(TOverloaded{
+ [] (TOtherColumns) -> TTypePtrOrOtherColumns {
+ return TOtherColumns{};
+ },
+ [] (EValueType valueType) -> TTypePtrOrOtherColumns {
+ return ToTypeV3(valueType, true);
+ }
+ }, valueTypeOrOtherColumns);
+ };
+
+ auto withFieldLabel = [&] (const TTypePtrOrOtherColumns& typeOrOtherColumns) -> TTypePtrOrOtherColumns {
+ switch (fieldDescriptor.label()) {
+ case FieldDescriptor::Label::LABEL_REPEATED: {
+ Y_ENSURE(fieldOptions.SerializationMode == EProtobufSerializationMode::Yt,
+ "Repeated fields are supported only for YT serialization mode, field \"" + fieldDescriptor.full_name() +
+ "\" has incorrect serialization mode");
+ auto* type = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns);
+ Y_ENSURE(type, "OTHER_COLUMNS field can not be repeated");
+ switch (fieldOptions.ListMode) {
+ case EProtobufListMode::Required:
+ return NTi::TTypePtr(NTi::List(*type));
+ case EProtobufListMode::Optional:
+ return NTi::TTypePtr(NTi::Optional(NTi::List(*type)));
+ }
+ Y_FAIL();
+ }
+ case FieldDescriptor::Label::LABEL_OPTIONAL:
+ return std::visit(TOverloaded{
+ [] (TOtherColumns) -> TTypePtrOrOtherColumns {
+ return TOtherColumns{};
+ },
+ [] (NTi::TTypePtr type) -> TTypePtrOrOtherColumns {
+ return NTi::TTypePtr(NTi::Optional(std::move(type)));
+ }
+ }, typeOrOtherColumns);
+ case FieldDescriptor::LABEL_REQUIRED: {
+ auto* type = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns);
+ Y_ENSURE(type, "OTHER_COLUMNS field can not be required");
+ return *type;
+ }
+ }
+ Y_FAIL();
+ };
+
+ switch (fieldOptions.SerializationMode) {
+ case EProtobufSerializationMode::Protobuf:
+ return withFieldLabel(getScalarType());
+ case EProtobufSerializationMode::Yt:
+ if (fieldDescriptor.type() == FieldDescriptor::TYPE_MESSAGE) {
+ if (fieldDescriptor.is_map()) {
+ return GetMapType(fieldDescriptor, fieldOptions);
+ } else {
+ return withFieldLabel(GetMessageType(fieldDescriptor, TProtobufFieldOptions{}));
+ }
+ } else {
+ return withFieldLabel(getScalarType());
+ }
+ case EProtobufSerializationMode::Embedded:
+ ythrow yexception() << "EMBEDDED field is not allowed for field "
+ << fieldDescriptor.full_name();
+ }
+ Y_FAIL();
+}
+
+TTableSchema TTableSchemaInferrer::InferSchema(const Descriptor& messageDescriptor)
+{
+ TTableSchema result;
+
+ auto defaultFieldOptions = GetDefaultFieldOptions(&messageDescriptor);
+ auto members = GetMessageMembers(
+ messageDescriptor.full_name(),
+ messageDescriptor,
+ defaultFieldOptions,
+ // Use special sort order for top level messages.
+ /*overrideFieldSortOrder*/ EProtobufFieldSortOrder::AsInProtoFile);
+
+ for (auto& member : members) {
+ std::visit(TOverloaded{
+ [&] (TOtherColumns) {
+ result.Strict(false);
+ },
+ [&] (NTi::TTypePtr& type) {
+ result.AddColumn(TColumnSchema()
+ .Name(std::move(member.Name))
+ .Type(std::move(type))
+ );
+ },
+ }, member.TypeOrOtherColumns);
+ }
+
+ return result;
+}
+
+TTableSchema CreateTableSchemaImpl(
+ const Descriptor& messageDescriptor,
+ bool keepFieldsWithoutExtension)
+{
+ TTableSchemaInferrer inferrer(keepFieldsWithoutExtension);
+ return inferrer.InferSchema(messageDescriptor);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <>
+void Out<NYT::EWrapperFieldFlag::Enum>(IOutputStream& stream, NYT::EWrapperFieldFlag::Enum value)
+{
+ stream << NYT::EWrapperFieldFlag_Enum_Name(value);
+}
+
+template <>
+void Out<NYT::EWrapperMessageFlag::Enum>(IOutputStream& stream, NYT::EWrapperMessageFlag::Enum value)
+{
+ stream << NYT::EWrapperMessageFlag_Enum_Name(value);
+}
+
+template <>
+void Out<NYT::EWrapperOneofFlag::Enum>(IOutputStream& stream, NYT::EWrapperOneofFlag::Enum value)
+{
+ stream << NYT::EWrapperOneofFlag_Enum_Name(value);
+}
diff --git a/yt/cpp/mapreduce/interface/protobuf_format.h b/yt/cpp/mapreduce/interface/protobuf_format.h
new file mode 100644
index 0000000000..aafbced386
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/protobuf_format.h
@@ -0,0 +1,106 @@
+#pragma once
+
+#include "common.h"
+
+#include <yt/yt_proto/yt/formats/extension.pb.h>
+
+#include <util/generic/maybe.h>
+
+#include <google/protobuf/message.h>
+
+/// @cond Doxygen_Suppress
+namespace NYT::NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+enum class EProtobufType
+{
+ EnumInt /* "enum_int" */,
+ EnumString /* "enum_string" */,
+ Any /* "any" */,
+ OtherColumns /* "other_columns" */,
+};
+
+enum class EProtobufSerializationMode
+{
+ Protobuf,
+ Yt,
+ Embedded,
+};
+
+enum class EProtobufListMode
+{
+ Optional,
+ Required,
+};
+
+enum class EProtobufMapMode
+{
+ ListOfStructsLegacy,
+ ListOfStructs,
+ Dict,
+ OptionalDict,
+};
+
+enum class EProtobufFieldSortOrder
+{
+ AsInProtoFile,
+ ByFieldNumber,
+};
+
+enum class EProtobufOneofMode
+{
+ SeparateFields,
+ Variant,
+};
+
+enum class EProtobufEnumWritingMode
+{
+ SkipUnknownValues,
+ CheckValues,
+};
+
+struct TProtobufOneofOptions
+{
+ EProtobufOneofMode Mode = EProtobufOneofMode::Variant;
+ TString VariantFieldName;
+};
+
+struct TProtobufFieldOptions
+{
+ TMaybe<EProtobufType> Type;
+ EProtobufSerializationMode SerializationMode = EProtobufSerializationMode::Protobuf;
+ EProtobufListMode ListMode = EProtobufListMode::Required;
+ EProtobufMapMode MapMode = EProtobufMapMode::ListOfStructsLegacy;
+};
+
+struct TProtobufMessageOptions
+{
+ EProtobufFieldSortOrder FieldSortOrder = EProtobufFieldSortOrder::ByFieldNumber;
+};
+
+TString GetColumnName(const ::google::protobuf::FieldDescriptor& field);
+
+TProtobufFieldOptions GetFieldOptions(
+ const ::google::protobuf::FieldDescriptor* fieldDescriptor,
+ const TMaybe<TProtobufFieldOptions>& defaultFieldOptions = {});
+
+TProtobufOneofOptions GetOneofOptions(
+ const ::google::protobuf::OneofDescriptor* oneofDescriptor,
+ const TMaybe<TProtobufOneofOptions>& defaultOneofOptions = {});
+
+TProtobufMessageOptions GetMessageOptions(const ::google::protobuf::Descriptor* descriptor);
+
+TMaybe<TVector<TString>> InferColumnFilter(const ::google::protobuf::Descriptor& descriptor);
+
+TNode MakeProtoFormatConfigWithTables(const TVector<const ::google::protobuf::Descriptor*>& descriptors);
+TNode MakeProtoFormatConfigWithDescriptors(const TVector<const ::google::protobuf::Descriptor*>& descriptors);
+
+TTableSchema CreateTableSchemaImpl(
+ const ::google::protobuf::Descriptor& messageDescriptor,
+ bool keepFieldsWithoutExtension);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail
+/// @endcond
diff --git a/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.cpp b/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.cpp
new file mode 100644
index 0000000000..19a3d5163f
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.cpp
@@ -0,0 +1,451 @@
+#include "common.h"
+#include "errors.h"
+#include "common_ut.h"
+#include "util/generic/fwd.h"
+
+#include <yt/cpp/mapreduce/interface/protobuf_table_schema_ut.pb.h>
+#include <yt/cpp/mapreduce/interface/proto3_ut.pb.h>
+
+#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <algorithm>
+
+using namespace NYT;
+
+bool IsFieldPresent(const TTableSchema& schema, TStringBuf name)
+{
+ for (const auto& field : schema.Columns()) {
+ if (field.Name() == name) {
+ return true;
+ }
+ }
+ return false;
+}
+
+Y_UNIT_TEST_SUITE(ProtoSchemaTest_Simple)
+{
+ Y_UNIT_TEST(TIntegral)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TIntegral>();
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("DoubleField").Type(ToTypeV3(EValueType::VT_DOUBLE, false)))
+ .AddColumn(TColumnSchema().Name("FloatField").Type(ToTypeV3(EValueType::VT_DOUBLE, false)))
+ .AddColumn(TColumnSchema().Name("Int32Field").Type(ToTypeV3(EValueType::VT_INT32, false)))
+ .AddColumn(TColumnSchema().Name("Int64Field").Type(ToTypeV3(EValueType::VT_INT64, false)))
+ .AddColumn(TColumnSchema().Name("Uint32Field").Type(ToTypeV3(EValueType::VT_UINT32, false)))
+ .AddColumn(TColumnSchema().Name("Uint64Field").Type(ToTypeV3(EValueType::VT_UINT64, false)))
+ .AddColumn(TColumnSchema().Name("Sint32Field").Type(ToTypeV3(EValueType::VT_INT32, false)))
+ .AddColumn(TColumnSchema().Name("Sint64Field").Type(ToTypeV3(EValueType::VT_INT64, false)))
+ .AddColumn(TColumnSchema().Name("Fixed32Field").Type(ToTypeV3(EValueType::VT_UINT32, false)))
+ .AddColumn(TColumnSchema().Name("Fixed64Field").Type(ToTypeV3(EValueType::VT_UINT64, false)))
+ .AddColumn(TColumnSchema().Name("Sfixed32Field").Type(ToTypeV3(EValueType::VT_INT32, false)))
+ .AddColumn(TColumnSchema().Name("Sfixed64Field").Type(ToTypeV3(EValueType::VT_INT64, false)))
+ .AddColumn(TColumnSchema().Name("BoolField").Type(ToTypeV3(EValueType::VT_BOOLEAN, false)))
+ .AddColumn(TColumnSchema().Name("EnumField").Type(ToTypeV3(EValueType::VT_STRING, false))));
+ }
+
+ Y_UNIT_TEST(TOneOf)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TOneOf>();
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("DoubleField").Type(ToTypeV3(EValueType::VT_DOUBLE, false)))
+ .AddColumn(TColumnSchema().Name("Int32Field").Type(ToTypeV3(EValueType::VT_INT32, false)))
+ .AddColumn(TColumnSchema().Name("BoolField").Type(ToTypeV3(EValueType::VT_BOOLEAN, false))));
+ }
+
+ Y_UNIT_TEST(TWithRequired)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TWithRequired>();
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("RequiredField").Type(ToTypeV3(EValueType::VT_STRING, true)))
+ .AddColumn(TColumnSchema().Name("NotRequiredField").Type(ToTypeV3(EValueType::VT_STRING, false))));
+ }
+
+ Y_UNIT_TEST(TAggregated)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TAggregated>();
+
+ UNIT_ASSERT_VALUES_EQUAL(6, schema.Columns().size());
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("StringField").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("BytesField").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("NestedField").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("NestedRepeatedField").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("NestedOneOfField").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("NestedRecursiveField").Type(ToTypeV3(EValueType::VT_STRING, false))));
+ }
+
+ Y_UNIT_TEST(TAliased)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TAliased>();
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("key").Type(ToTypeV3(EValueType::VT_INT32, false)))
+ .AddColumn(TColumnSchema().Name("subkey").Type(ToTypeV3(EValueType::VT_DOUBLE, false)))
+ .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false))));
+ }
+
+ Y_UNIT_TEST(SortColumns)
+ {
+ const TSortColumns keys = {"key", "subkey"};
+
+ const auto schema = CreateTableSchema<NUnitTesting::TAliased>(keys);
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("key")
+ .Type(ToTypeV3(EValueType::VT_INT32, false))
+ .SortOrder(ESortOrder::SO_ASCENDING))
+ .AddColumn(TColumnSchema()
+ .Name("subkey")
+ .Type(ToTypeV3(EValueType::VT_DOUBLE, false))
+ .SortOrder(ESortOrder::SO_ASCENDING))
+ .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false))));
+ }
+
+ Y_UNIT_TEST(SortColumnsReordered)
+ {
+ const TSortColumns keys = {"subkey"};
+
+ const auto schema = CreateTableSchema<NUnitTesting::TAliased>(keys);
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("subkey")
+ .Type(ToTypeV3(EValueType::VT_DOUBLE, false))
+ .SortOrder(ESortOrder::SO_ASCENDING))
+ .AddColumn(TColumnSchema().Name("key").Type(ToTypeV3(EValueType::VT_INT32, false)))
+ .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false))));
+ }
+
+ Y_UNIT_TEST(SortColumnsInvalid)
+ {
+ UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TAliased>({"subkey", "subkey"}), yexception);
+ UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TAliased>({"key", "junk"}), yexception);
+ }
+
+ Y_UNIT_TEST(KeepFieldsWithoutExtensionTrue)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TAliased>({}, true);
+ UNIT_ASSERT(IsFieldPresent(schema, "key"));
+ UNIT_ASSERT(IsFieldPresent(schema, "subkey"));
+ UNIT_ASSERT(IsFieldPresent(schema, "Data"));
+ UNIT_ASSERT(schema.Strict());
+ }
+
+ Y_UNIT_TEST(KeepFieldsWithoutExtensionFalse)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TAliased>({}, false);
+ UNIT_ASSERT(IsFieldPresent(schema, "key"));
+ UNIT_ASSERT(IsFieldPresent(schema, "subkey"));
+ UNIT_ASSERT(!IsFieldPresent(schema, "Data"));
+ UNIT_ASSERT(schema.Strict());
+ }
+
+ Y_UNIT_TEST(ProtobufTypeOption)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TWithTypeOptions>({});
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .Strict(false)
+ .AddColumn(TColumnSchema().Name("ColorIntField").Type(ToTypeV3(EValueType::VT_INT64, false)))
+ .AddColumn(TColumnSchema().Name("ColorStringField").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("AnyField").Type(ToTypeV3(EValueType::VT_ANY, false)))
+ .AddColumn(TColumnSchema().Name("EmbeddedField").Type(
+ NTi::Optional(NTi::Struct({
+ {"ColorIntField", ToTypeV3(EValueType::VT_INT64, false)},
+ {"ColorStringField", ToTypeV3(EValueType::VT_STRING, false)},
+ {"AnyField", ToTypeV3(EValueType::VT_ANY, false)}}))))
+ .AddColumn(TColumnSchema().Name("RepeatedEnumIntField").Type(NTi::List(NTi::Int64()))));
+ }
+
+ Y_UNIT_TEST(ProtobufTypeOption_TypeMismatch)
+ {
+ UNIT_ASSERT_EXCEPTION(
+ CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_EnumInt>({}),
+ yexception);
+ UNIT_ASSERT_EXCEPTION(
+ CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_EnumString>({}),
+ yexception);
+ UNIT_ASSERT_EXCEPTION(
+ CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_Any>({}),
+ yexception);
+ UNIT_ASSERT_EXCEPTION(
+ CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_OtherColumns>({}),
+ yexception);
+ }
+}
+
+Y_UNIT_TEST_SUITE(ProtoSchemaTest_Complex)
+{
+ Y_UNIT_TEST(TRepeated)
+ {
+ UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TRepeated>(), yexception);
+
+ const auto schema = CreateTableSchema<NUnitTesting::TRepeatedYtMode>();
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("Int32Field").Type(NTi::List(ToTypeV3(EValueType::VT_INT32, true)))));
+ }
+
+ Y_UNIT_TEST(TRepeatedOptionalList)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TOptionalList>();
+ auto type = NTi::Optional(NTi::List(NTi::Int64()));
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("OptionalListInt64").TypeV3(type)));
+ }
+
+ NTi::TTypePtr GetUrlRowType(bool required)
+ {
+ static const NTi::TTypePtr structType = NTi::Struct({
+ {"Host", ToTypeV3(EValueType::VT_STRING, false)},
+ {"Path", ToTypeV3(EValueType::VT_STRING, false)},
+ {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)}});
+ return required ? structType : NTi::TTypePtr(NTi::Optional(structType));
+ }
+
+ Y_UNIT_TEST(TRowFieldSerializationOption)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TRowFieldSerializationOption>();
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false)))
+ .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false))));
+ }
+
+ Y_UNIT_TEST(TRowMessageSerializationOption)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TRowMessageSerializationOption>();
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false)))
+ .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false))));
+ }
+
+ Y_UNIT_TEST(TRowMixedSerializationOptions)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TRowMixedSerializationOptions>();
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false)))
+ .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false))));
+ }
+
+ NTi::TTypePtr GetUrlRowType_ColumnNames(bool required)
+ {
+ static const NTi::TTypePtr type = NTi::Struct({
+ {"Host_ColumnName", ToTypeV3(EValueType::VT_STRING, false)},
+ {"Path_KeyColumnName", ToTypeV3(EValueType::VT_STRING, false)},
+ {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)},
+ });
+ return required ? type : NTi::TTypePtr(NTi::Optional(type));
+ }
+
+ Y_UNIT_TEST(TRowMixedSerializationOptions_ColumnNames)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TRowMixedSerializationOptions_ColumnNames>();
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType_ColumnNames(false)))
+ .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false))));
+ }
+
+ Y_UNIT_TEST(NoOptionInheritance)
+ {
+ auto deepestEmbedded = NTi::Optional(NTi::Struct({{"x", ToTypeV3(EValueType::VT_INT64, false)}}));
+
+ const auto schema = CreateTableSchema<NUnitTesting::TNoOptionInheritance>();
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("EmbeddedYt_YtOption")
+ .Type(NTi::Optional(NTi::Struct({{"embedded", deepestEmbedded}}))))
+ .AddColumn(TColumnSchema().Name("EmbeddedYt_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("EmbeddedYt_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema()
+ .Name("EmbeddedProtobuf_YtOption")
+ .Type(NTi::Optional(NTi::Struct({{"embedded", ToTypeV3(EValueType::VT_STRING, false)}}))))
+ .AddColumn(TColumnSchema().Name("EmbeddedProtobuf_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("EmbeddedProtobuf_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema()
+ .Name("Embedded_YtOption")
+ .Type(NTi::Optional(NTi::Struct({{"embedded", ToTypeV3(EValueType::VT_STRING, false)}}))))
+ .AddColumn(TColumnSchema().Name("Embedded_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false)))
+ .AddColumn(TColumnSchema().Name("Embedded_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false))));
+ }
+
+ Y_UNIT_TEST(Cyclic)
+ {
+ UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic>(), TApiUsageError);
+ UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic::TA>(), TApiUsageError);
+ UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic::TB>(), TApiUsageError);
+ UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic::TC>(), TApiUsageError);
+ UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic::TD>(), TApiUsageError);
+
+ ASSERT_SERIALIZABLES_EQUAL(
+ TTableSchema().AddColumn(
+ TColumnSchema().Name("d").TypeV3(NTi::Optional(NTi::String()))),
+ CreateTableSchema<NUnitTesting::TCyclic::TE>());
+ }
+
+ Y_UNIT_TEST(FieldSortOrder)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TFieldSortOrder>();
+
+ auto byFieldNumber = NTi::Optional(NTi::Struct({
+ {"z", NTi::Optional(NTi::Bool())},
+ {"x", NTi::Optional(NTi::Int64())},
+ {"y", NTi::Optional(NTi::String())},
+ }));
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema().Name("EmbeddedDefault").Type(byFieldNumber))
+ .AddColumn(TColumnSchema()
+ .Name("EmbeddedAsInProtoFile")
+ .Type(NTi::Optional(NTi::Struct({
+ {"x", NTi::Optional(NTi::Int64())},
+ {"y", NTi::Optional(NTi::String())},
+ {"z", NTi::Optional(NTi::Bool())},
+ }))))
+ .AddColumn(TColumnSchema().Name("EmbeddedByFieldNumber").Type(byFieldNumber)));
+ }
+
+ Y_UNIT_TEST(Map)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TWithMap>();
+
+ auto createKeyValueStruct = [] (NTi::TTypePtr key, NTi::TTypePtr value) {
+ return NTi::List(NTi::Struct({
+ {"key", NTi::Optional(key)},
+ {"value", NTi::Optional(value)},
+ }));
+ };
+
+ auto embedded = NTi::Struct({
+ {"x", NTi::Optional(NTi::Int64())},
+ {"y", NTi::Optional(NTi::String())},
+ });
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("MapDefault")
+ .Type(createKeyValueStruct(NTi::Int64(), NTi::String())))
+ .AddColumn(TColumnSchema()
+ .Name("MapListOfStructsLegacy")
+ .Type(createKeyValueStruct(NTi::Int64(), NTi::String())))
+ .AddColumn(TColumnSchema()
+ .Name("MapListOfStructs")
+ .Type(createKeyValueStruct(NTi::Int64(), embedded)))
+ .AddColumn(TColumnSchema()
+ .Name("MapOptionalDict")
+ .Type(NTi::Optional(NTi::Dict(NTi::Int64(), embedded))))
+ .AddColumn(TColumnSchema()
+ .Name("MapDict")
+ .Type(NTi::Dict(NTi::Int64(), embedded))));
+ }
+
+ Y_UNIT_TEST(Oneof)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TWithOneof>();
+
+ auto embedded = NTi::Struct({
+ {"Oneof", NTi::Optional(NTi::Variant(NTi::Struct({
+ {"x", NTi::Int64()},
+ {"y", NTi::String()},
+ })))},
+ });
+
+ auto createType = [&] (TString oneof2Name) {
+ return NTi::Optional(NTi::Struct({
+ {"field", NTi::Optional(NTi::String())},
+ {oneof2Name, NTi::Optional(NTi::Variant(NTi::Struct({
+ {"x2", NTi::Int64()},
+ {"y2", NTi::String()},
+ {"z2", embedded},
+ })))},
+ {"y1", NTi::Optional(NTi::String())},
+ {"z1", NTi::Optional(embedded)},
+ {"x1", NTi::Optional(NTi::Int64())},
+ }));
+ };
+
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("DefaultSeparateFields")
+ .Type(createType("variant_field_name")))
+ .AddColumn(TColumnSchema()
+ .Name("NoDefault")
+ .Type(createType("Oneof2")))
+ .AddColumn(TColumnSchema()
+ .Name("SerializationProtobuf")
+ .Type(NTi::Optional(NTi::Struct({
+ {"y1", NTi::Optional(NTi::String())},
+ {"x1", NTi::Optional(NTi::Int64())},
+ {"z1", NTi::Optional(NTi::String())},
+ }))))
+ .AddColumn(TColumnSchema()
+ .Name("TopLevelOneof")
+ .Type(
+ NTi::Optional(
+ NTi::Variant(NTi::Struct({
+ {"MemberOfTopLevelOneof", NTi::Int64()}
+ }))
+ )
+ ))
+ );
+ }
+
+ Y_UNIT_TEST(Embedded)
+ {
+ const auto schema = CreateTableSchema<NUnitTesting::TEmbeddingMessage>();
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .Strict(false)
+ .AddColumn(TColumnSchema().Name("embedded2_num").Type(NTi::Optional(NTi::Uint64())))
+ .AddColumn(TColumnSchema().Name("embedded2_struct").Type(NTi::Optional(NTi::Struct({
+ {"float1", NTi::Optional(NTi::Double())},
+ {"string1", NTi::Optional(NTi::String())},
+ }))))
+ .AddColumn(TColumnSchema().Name("embedded2_repeated").Type(NTi::List(NTi::String())))
+ .AddColumn(TColumnSchema().Name("embedded_num").Type(NTi::Optional(NTi::Uint64())))
+ .AddColumn(TColumnSchema().Name("embedded_extra_field").Type(NTi::Optional(NTi::String())))
+ .AddColumn(TColumnSchema().Name("variant").Type(NTi::Optional(NTi::Variant(NTi::Struct({
+ {"str_variant", NTi::String()},
+ {"uint_variant", NTi::Uint64()},
+ })))))
+ .AddColumn(TColumnSchema().Name("num").Type(NTi::Optional(NTi::Uint64())))
+ .AddColumn(TColumnSchema().Name("extra_field").Type(NTi::Optional(NTi::String())))
+ );
+ }
+}
+
+Y_UNIT_TEST_SUITE(ProtoSchemaTest_Proto3)
+{
+ Y_UNIT_TEST(TWithOptional)
+ {
+ const auto schema = CreateTableSchema<NTestingProto3::TWithOptional>();
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("x").Type(NTi::Optional(NTi::Int64()))
+ )
+ );
+ }
+
+ Y_UNIT_TEST(TWithOptionalMessage)
+ {
+ const auto schema = CreateTableSchema<NTestingProto3::TWithOptionalMessage>();
+ ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema()
+ .AddColumn(TColumnSchema()
+ .Name("x").Type(
+ NTi::Optional(
+ NTi::Struct({{"x", NTi::Optional(NTi::Int64())}})
+ )
+ )
+ )
+ );
+ }
+}
diff --git a/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.proto b/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.proto
new file mode 100644
index 0000000000..60bad6e650
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.proto
@@ -0,0 +1,402 @@
+import "yt/yt_proto/yt/formats/extension.proto";
+
+package NYT.NUnitTesting;
+
+message TIntegral
+{
+ optional double DoubleField = 1;
+ optional float FloatField = 2;
+ optional int32 Int32Field = 3;
+ optional int64 Int64Field = 4;
+ optional uint32 Uint32Field = 5;
+ optional uint64 Uint64Field = 6;
+ optional sint32 Sint32Field = 7;
+ optional sint64 Sint64Field = 8;
+ optional fixed32 Fixed32Field = 9;
+ optional fixed64 Fixed64Field = 10;
+ optional sfixed32 Sfixed32Field = 11;
+ optional sfixed64 Sfixed64Field = 12;
+ optional bool BoolField = 13;
+ enum TriBool
+ {
+ TRI_FALSE = 0;
+ TRI_TRUE = 1;
+ TRI_UNDEF = -1;
+ }
+ optional TriBool EnumField = 14;
+}
+
+message TRepeated
+{
+ repeated int32 Int32Field = 1;
+}
+
+message TRepeatedYtMode
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ repeated int32 Int32Field = 1;
+}
+
+message TWithTypeOptions
+{
+ enum Color
+ {
+ WHITE = 0;
+ BLUE = 1;
+ RED = -1;
+ }
+
+ message TEmbedded
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ optional Color ColorIntField = 1 [(NYT.flags) = ENUM_INT];
+ optional Color ColorStringField = 2 [(NYT.flags) = ENUM_STRING];
+ optional bytes AnyField = 3 [(NYT.flags) = ANY];
+ }
+
+ optional Color ColorIntField = 1 [(NYT.flags) = ENUM_INT];
+ optional Color ColorStringField = 2 [(NYT.flags) = ENUM_STRING];
+ optional bytes AnyField = 3 [(NYT.flags) = ANY];
+ optional bytes OtherColumnsField = 4 [(NYT.flags) = OTHER_COLUMNS];
+ optional TEmbedded EmbeddedField = 5 [(NYT.flags) = SERIALIZATION_YT];
+ repeated Color RepeatedEnumIntField = 6 [(NYT.flags) = SERIALIZATION_YT, (NYT.flags) = ENUM_INT];
+}
+
+message TWithTypeOptions_TypeMismatch_EnumInt
+{
+ optional int64 EnumField = 1 [(NYT.flags) = ENUM_INT];
+}
+
+message TWithTypeOptions_TypeMismatch_EnumString
+{
+ optional string EnumField = 1 [(NYT.flags) = ENUM_STRING];
+}
+
+message TWithTypeOptions_TypeMismatch_Any
+{
+ optional string AnyField = 1 [(NYT.flags) = ANY];
+}
+
+message TWithTypeOptions_TypeMismatch_OtherColumns
+{
+ optional string OtherColumnsField = 1 [(NYT.flags) = OTHER_COLUMNS];
+}
+
+message TOneOf
+{
+ oneof Chooser
+ {
+ double DoubleField = 1;
+ int32 Int32Field = 2;
+ }
+ optional bool BoolField = 3;
+}
+
+message TWithRequired
+{
+ required string RequiredField = 1;
+ optional string NotRequiredField = 2;
+};
+
+message TAggregated
+{
+ optional string StringField = 1;
+ optional bytes BytesField = 2;
+ optional TIntegral NestedField = 3;
+ optional TRepeated NestedRepeatedField = 4;
+ optional TOneOf NestedOneOfField = 5;
+ optional TAggregated NestedRecursiveField = 6;
+}
+
+message TAliased
+{
+ optional int32 Key = 1 [(NYT.key_column_name) = "key"];
+ optional double Subkey = 2 [(NYT.key_column_name) = "subkey"];
+ optional TAggregated Data = 3;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+message TUrlRow
+{
+ optional string Host = 1 [(NYT.column_name) = "Host"];
+ optional string Path = 2 [(NYT.column_name) = "Path"];
+ optional sint32 HttpCode = 3 [(NYT.column_name) = "HttpCode"];
+}
+
+message TRowFieldSerializationOption
+{
+ optional TUrlRow UrlRow_1 = 1 [(NYT.flags) = SERIALIZATION_YT];
+ optional TUrlRow UrlRow_2 = 2;
+}
+
+message TRowMessageSerializationOption
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional TUrlRow UrlRow_1 = 1;
+ optional TUrlRow UrlRow_2 = 2;
+}
+
+message TRowMixedSerializationOptions
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional TUrlRow UrlRow_1 = 1;
+ optional TUrlRow UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+}
+
+message TRowSerializedRepeatedFields
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ repeated int64 Ints = 1;
+ repeated TUrlRow UrlRows = 2;
+}
+
+message TUrlRowWithColumnNames
+{
+ optional string Host = 1 [(NYT.column_name) = "Host_ColumnName", (NYT.key_column_name) = "Host_KeyColumnName"];
+ optional string Path = 2 [(NYT.key_column_name) = "Path_KeyColumnName"];
+ optional sint32 HttpCode = 3;
+}
+
+message TRowMixedSerializationOptions_ColumnNames
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional TUrlRowWithColumnNames UrlRow_1 = 1;
+ optional TUrlRowWithColumnNames UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+}
+
+message TNoOptionInheritance
+{
+ message TDeepestEmbedded
+ {
+ optional int64 x = 1;
+ }
+
+ message TEmbedded
+ {
+ optional TDeepestEmbedded embedded = 1;
+ }
+
+ message TEmbeddedYt
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ optional TDeepestEmbedded embedded = 1;
+ }
+
+ message TEmbeddedProtobuf
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF;
+
+ optional TDeepestEmbedded embedded = 1;
+ }
+
+ optional TEmbeddedYt EmbeddedYt_YtOption = 1 [(NYT.flags) = SERIALIZATION_YT];
+ optional TEmbeddedYt EmbeddedYt_ProtobufOption = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+ optional TEmbeddedYt EmbeddedYt_NoOption = 3;
+ optional TEmbeddedProtobuf EmbeddedProtobuf_YtOption = 4 [(NYT.flags) = SERIALIZATION_YT];
+ optional TEmbeddedProtobuf EmbeddedProtobuf_ProtobufOption = 5 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+ optional TEmbeddedProtobuf EmbeddedProtobuf_NoOption = 6;
+ optional TEmbedded Embedded_YtOption = 7 [(NYT.flags) = SERIALIZATION_YT];
+ optional TEmbedded Embedded_ProtobufOption = 8 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+ optional TEmbedded Embedded_NoOption = 9;
+}
+
+message TOptionalList
+{
+ repeated int64 OptionalListInt64 = 1 [(NYT.flags) = OPTIONAL_LIST, (NYT.flags) = SERIALIZATION_YT];
+}
+
+message TPacked
+{
+ repeated int64 PackedListInt64 = 1 [(NYT.flags) = SERIALIZATION_YT, packed=true];
+}
+
+message TCyclic
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ message TA
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ repeated TB b = 1;
+ optional TC c = 2;
+ }
+
+ message TB
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional TD d = 1;
+ }
+
+ message TC
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional TD d = 1;
+ }
+
+ message TD
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional TA a = 1;
+ }
+
+ message TE
+ {
+ optional TD d = 1 [(NYT.flags) = SERIALIZATION_PROTOBUF];
+ }
+
+ optional TA a = 1;
+}
+
+message TFieldSortOrder
+{
+ message TEmbeddedDefault {
+ optional int64 x = 2;
+ optional string y = 12;
+ optional bool z = 1;
+ }
+ message TEmbeddedAsInProtoFile {
+ option (NYT.message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE;
+ optional int64 x = 2;
+ optional string y = 12;
+ optional bool z = 1;
+ }
+ message TEmbeddedByFieldNumber {
+ option (NYT.message_flags) = SORT_FIELDS_BY_FIELD_NUMBER;
+ optional int64 x = 2;
+ optional string y = 12;
+ optional bool z = 1;
+ }
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ optional TEmbeddedDefault EmbeddedDefault = 1;
+ optional TEmbeddedAsInProtoFile EmbeddedAsInProtoFile = 2;
+ optional TEmbeddedByFieldNumber EmbeddedByFieldNumber = 3;
+}
+
+message TWithMap
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ message TEmbedded {
+ optional int64 x = 1;
+ optional string y = 2;
+ }
+
+ map<int64, TEmbedded> MapDefault = 1;
+ map<int64, TEmbedded> MapListOfStructsLegacy = 2 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS_LEGACY];
+ map<int64, TEmbedded> MapListOfStructs = 3 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS];
+ map<int64, TEmbedded> MapOptionalDict = 4 [(NYT.flags) = MAP_AS_OPTIONAL_DICT];
+ map<int64, TEmbedded> MapDict = 5 [(NYT.flags) = MAP_AS_DICT];
+}
+
+message TWithOneof
+{
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ message TEmbedded
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ oneof Oneof {
+ int64 x = 1;
+ string y = 2;
+ }
+ }
+
+ message TDefaultSeparateFields
+ {
+ option (NYT.default_oneof_flags) = SEPARATE_FIELDS;
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ optional string field = 1;
+
+ oneof Oneof2
+ {
+ option (NYT.variant_field_name) = "variant_field_name";
+ option (NYT.oneof_flags) = VARIANT;
+ string y2 = 4;
+ TEmbedded z2 = 6;
+ int64 x2 = 2;
+ }
+
+ oneof Oneof1
+ {
+ int64 x1 = 10;
+ string y1 = 3;
+ TEmbedded z1 = 5;
+ }
+ }
+
+ message TNoDefault
+ {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+
+ optional string field = 1;
+
+ oneof Oneof2
+ {
+ string y2 = 4;
+ TEmbedded z2 = 6;
+ int64 x2 = 2;
+ }
+
+ oneof Oneof1
+ {
+ option (NYT.oneof_flags) = SEPARATE_FIELDS;
+ int64 x1 = 10;
+ string y1 = 3;
+ TEmbedded z1 = 5;
+ }
+ }
+
+ message TSerializationProtobuf
+ {
+ oneof Oneof
+ {
+ int64 x1 = 2;
+ string y1 = 1;
+ TEmbedded z1 = 3;
+ }
+ }
+
+ optional TDefaultSeparateFields DefaultSeparateFields = 1;
+ optional TNoDefault NoDefault = 2;
+ optional TSerializationProtobuf SerializationProtobuf = 3;
+
+ oneof TopLevelOneof
+ {
+ int64 MemberOfTopLevelOneof = 4;
+ }
+}
+
+message TEmbeddedStruct {
+ optional float float1 = 1;
+ optional string string1 = 2;
+}
+
+message TEmbedded2Message {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ optional uint64 embedded2_num = 10;
+ optional TEmbeddedStruct embedded2_struct = 17;
+ repeated string embedded2_repeated = 42;
+}
+
+message TEmbedded1Message {
+ option (NYT.default_field_flags) = SERIALIZATION_YT;
+ required TEmbedded2Message t2 = 1 [(NYT.flags) = EMBEDDED];
+ oneof variant {
+ string str_variant = 101;
+ uint64 uint_variant = 102;
+ }
+ optional uint64 embedded_num = 10; // make intensional field_num collision!
+ optional string embedded_extra_field = 11;
+}
+
+message TEmbeddingMessage {
+ optional bytes other_columns_field = 15 [(NYT.flags) = OTHER_COLUMNS];
+ required TEmbedded1Message t1 = 2 [(NYT.flags) = EMBEDDED];
+ optional uint64 num = 12;
+ optional string extra_field = 13;
+}
diff --git a/yt/cpp/mapreduce/interface/public.h b/yt/cpp/mapreduce/interface/public.h
new file mode 100644
index 0000000000..bdeda78795
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/public.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <memory>
+
+namespace NYT::NAuth {
+
+struct IServiceTicketAuthPtrWrapper;
+using IServiceTicketAuthPtrWrapperPtr = std::shared_ptr<IServiceTicketAuthPtrWrapper>;
+
+} // namespace NYT::NAuth
diff --git a/yt/cpp/mapreduce/interface/retry_policy.h b/yt/cpp/mapreduce/interface/retry_policy.h
new file mode 100644
index 0000000000..c198839079
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/retry_policy.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <util/datetime/base.h>
+#include <util/generic/ptr.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// A configuration that controls retries of a single request.
+struct TRetryConfig
+{
+ ///
+ /// @brief How long retries of a single YT request can go on.
+ ///
+ /// If this limit is reached while retry count is not yet exceeded @ref TRequestRetriesTimeout exception is thrown.
+ TDuration RetriesTimeLimit = TDuration::Max();
+};
+
+/// The library uses this class to understand how to retry individual requests.
+class IRetryConfigProvider
+ : public virtual TThrRefBase
+{
+public:
+ ///
+ /// @brief Gets retry policy for single request.
+ ///
+ /// CreateRetryConfig is called before ANY request.
+ /// Returned config controls retries of this request.
+ ///
+ /// Must be thread safe since it can be used from different threads
+ /// to perform internal library requests (e.g. pings).
+ ///
+ /// Some methods (e.g. IClient::Map) involve multiple requests to YT and therefore
+ /// this method will be called several times during execution of single method.
+ ///
+ /// If user needs to limit overall retries inside long operation they might create
+ /// retry policy that knows about overall deadline
+ /// @ref NYT::TRetryConfig::RetriesTimeLimit taking into account that overall deadline.
+ /// (E.g. when deadline reached it returns zero limit for retries).
+ virtual TRetryConfig CreateRetryConfig() = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
+
diff --git a/yt/cpp/mapreduce/interface/serialize.cpp b/yt/cpp/mapreduce/interface/serialize.cpp
new file mode 100644
index 0000000000..ae05d9f50d
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/serialize.cpp
@@ -0,0 +1,553 @@
+#include "serialize.h"
+
+#include "common.h"
+#include "fluent.h"
+
+#include <library/cpp/yson/parser.h>
+#include <library/cpp/yson/node/node_io.h>
+#include <library/cpp/yson/node/serialize.h>
+
+#include <library/cpp/type_info/type_io.h>
+
+#include <util/generic/string.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+// const auto& nodeMap = node.AsMap();
+#define DESERIALIZE_ITEM(NAME, MEMBER) \
+ if (const auto* item = nodeMap.FindPtr(NAME)) { \
+ Deserialize(MEMBER, *item); \
+ }
+
+// const auto& attributesMap = node.GetAttributes().AsMap();
+#define DESERIALIZE_ATTR(NAME, MEMBER) \
+ if (const auto* attr = attributesMap.FindPtr(NAME)) { \
+ Deserialize(MEMBER, *attr); \
+ }
+
+////////////////////////////////////////////////////////////////////////////////
+
+void Serialize(const TSortColumn& sortColumn, NYson::IYsonConsumer* consumer)
+{
+ if (sortColumn.SortOrder() == ESortOrder::SO_ASCENDING) {
+ Serialize(sortColumn.Name(), consumer);
+ } else {
+ BuildYsonFluently(consumer).BeginMap()
+ .Item("name").Value(sortColumn.Name())
+ .Item("sort_order").Value(ToString(sortColumn.SortOrder()))
+ .EndMap();
+ }
+}
+
+void Deserialize(TSortColumn& sortColumn, const TNode& node)
+{
+ if (node.IsString()) {
+ sortColumn = TSortColumn(node.AsString());
+ } else if (node.IsMap()) {
+ const auto& name = node["name"].AsString();
+ const auto& sortOrderString = node["sort_order"].AsString();
+ sortColumn = TSortColumn(name, ::FromString<ESortOrder>(sortOrderString));
+ } else {
+ ythrow yexception() << "Expected sort column to be string or map, got " << node.GetType();
+ }
+}
+
+template <class T, class TDerived>
+void SerializeOneOrMany(const TOneOrMany<T, TDerived>& oneOrMany, NYson::IYsonConsumer* consumer)
+{
+ BuildYsonFluently(consumer).List(oneOrMany.Parts_);
+}
+
+template <class T, class TDerived>
+void DeserializeOneOrMany(TOneOrMany<T, TDerived>& oneOrMany, const TNode& node)
+{
+ Deserialize(oneOrMany.Parts_, node);
+}
+
+void Serialize(const TKey& key, NYson::IYsonConsumer* consumer)
+{
+ SerializeOneOrMany(key, consumer);
+}
+
+void Deserialize(TKey& key, const TNode& node)
+{
+ DeserializeOneOrMany(key, node);
+}
+
+void Serialize(const TSortColumns& sortColumns, NYson::IYsonConsumer* consumer)
+{
+ SerializeOneOrMany(sortColumns, consumer);
+}
+
+void Deserialize(TSortColumns& sortColumns, const TNode& node)
+{
+ DeserializeOneOrMany(sortColumns, node);
+}
+
+void Serialize(const TColumnNames& columnNames, NYson::IYsonConsumer* consumer)
+{
+ SerializeOneOrMany(columnNames, consumer);
+}
+
+void Deserialize(TColumnNames& columnNames, const TNode& node)
+{
+ DeserializeOneOrMany(columnNames, node);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void Deserialize(EValueType& valueType, const TNode& node)
+{
+ const auto& nodeStr = node.AsString();
+ static const THashMap<TString, EValueType> str2ValueType = {
+ {"int8", VT_INT8},
+ {"int16", VT_INT16},
+ {"int32", VT_INT32},
+ {"int64", VT_INT64},
+
+ {"uint8", VT_UINT8},
+ {"uint16", VT_UINT16},
+ {"uint32", VT_UINT32},
+ {"uint64", VT_UINT64},
+
+ {"boolean", VT_BOOLEAN},
+ {"double", VT_DOUBLE},
+
+ {"string", VT_STRING},
+ {"utf8", VT_UTF8},
+
+ {"any", VT_ANY},
+
+ {"null", VT_NULL},
+ {"void", VT_VOID},
+
+ {"date", VT_DATE},
+ {"datetime", VT_DATETIME},
+ {"timestamp", VT_TIMESTAMP},
+ {"interval", VT_INTERVAL},
+ {"float", VT_FLOAT},
+ {"json", VT_JSON},
+ };
+
+ auto it = str2ValueType.find(nodeStr);
+ if (it == str2ValueType.end()) {
+ ythrow yexception() << "Invalid value type '" << nodeStr << "'";
+ }
+
+ valueType = it->second;
+}
+
+void Deserialize(ESortOrder& sortOrder, const TNode& node)
+{
+ sortOrder = FromString<ESortOrder>(node.AsString());
+}
+
+void Deserialize(EOptimizeForAttr& optimizeFor, const TNode& node)
+{
+ optimizeFor = FromString<EOptimizeForAttr>(node.AsString());
+}
+
+void Deserialize(EErasureCodecAttr& erasureCodec, const TNode& node)
+{
+ erasureCodec = FromString<EErasureCodecAttr>(node.AsString());
+}
+
+void Deserialize(ESchemaModificationAttr& schemaModification, const TNode& node)
+{
+ schemaModification = FromString<ESchemaModificationAttr>(node.AsString());
+}
+
+void Serialize(const TColumnSchema& columnSchema, NYson::IYsonConsumer* consumer)
+{
+ BuildYsonFluently(consumer).BeginMap()
+ .Item("name").Value(columnSchema.Name())
+ .DoIf(!columnSchema.RawTypeV3().Defined(),
+ [&] (TFluentMap fluent) {
+ fluent.Item("type").Value(NDetail::ToString(columnSchema.Type()));
+ fluent.Item("required").Value(columnSchema.Required());
+ if (columnSchema.Type() == VT_ANY
+ && *columnSchema.TypeV3() != *NTi::Optional(NTi::Yson()))
+ {
+ // A lot of user canonize serialized schema.
+ // To be backward compatible we only set type_v3 for new types.
+ fluent.Item("type_v3").Value(columnSchema.TypeV3());
+ }
+ }
+ )
+ .DoIf(columnSchema.RawTypeV3().Defined(), [&] (TFluentMap fluent) {
+ const auto& rawTypeV3 = *columnSchema.RawTypeV3();
+ fluent.Item("type_v3").Value(rawTypeV3);
+
+ // We going set old fields `type` and `required` to be compatible
+ // with old clusters that doesn't support type_v3 yet.
+
+ // if type is simple return its name otherwise return empty optional
+ auto isRequired = [](TStringBuf simpleType) {
+ return simpleType != "null" && simpleType != "void";
+ };
+ auto getSimple = [] (const TNode& typeV3) -> TMaybe<TString> {
+ static const THashMap<TString,TString> typeV3ToOld = {
+ {"bool", "boolean"},
+ {"yson", "any"},
+ };
+ TMaybe<TString> result;
+ if (typeV3.IsString()) {
+ result = typeV3.AsString();
+ } else if (typeV3.IsMap() && typeV3.Size() == 1) {
+ Y_VERIFY(typeV3["type_name"].IsString(), "invalid type is passed");
+ result = typeV3["type_name"].AsString();
+ }
+ if (result) {
+ auto it = typeV3ToOld.find(*result);
+ if (it != typeV3ToOld.end()) {
+ result = it->second;
+ }
+ }
+ return result;
+ };
+ auto simplify = [&](const TNode& typeV3) -> TMaybe<std::pair<TString, bool>> {
+ auto simple = getSimple(typeV3);
+ if (simple) {
+ return std::make_pair(*simple, isRequired(*simple));
+ }
+ if (typeV3.IsMap() && typeV3["type_name"] == "optional") {
+ auto simpleItem = getSimple(typeV3["item"]);
+ if (simpleItem && isRequired(*simpleItem)) {
+ return std::make_pair(*simpleItem, false);
+ }
+ }
+ return {};
+ };
+
+ auto simplified = simplify(rawTypeV3);
+
+ if (simplified) {
+ const auto& [simpleType, required] = *simplified;
+ fluent
+ .Item("type").Value(simpleType)
+ .Item("required").Value(required);
+ return;
+ }
+ })
+ .DoIf(columnSchema.SortOrder().Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("sort_order").Value(ToString(*columnSchema.SortOrder()));
+ })
+ .DoIf(columnSchema.Lock().Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("lock").Value(*columnSchema.Lock());
+ })
+ .DoIf(columnSchema.Expression().Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("expression").Value(*columnSchema.Expression());
+ })
+ .DoIf(columnSchema.Aggregate().Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("aggregate").Value(*columnSchema.Aggregate());
+ })
+ .DoIf(columnSchema.Group().Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("group").Value(*columnSchema.Group());
+ })
+ .EndMap();
+}
+
+void Deserialize(TColumnSchema& columnSchema, const TNode& node)
+{
+ const auto& nodeMap = node.AsMap();
+ DESERIALIZE_ITEM("name", columnSchema.Name_);
+ DESERIALIZE_ITEM("type_v3", columnSchema.RawTypeV3_);
+ DESERIALIZE_ITEM("sort_order", columnSchema.SortOrder_);
+ DESERIALIZE_ITEM("lock", columnSchema.Lock_);
+ DESERIALIZE_ITEM("expression", columnSchema.Expression_);
+ DESERIALIZE_ITEM("aggregate", columnSchema.Aggregate_);
+ DESERIALIZE_ITEM("group", columnSchema.Group_);
+
+ if (nodeMap.contains("type_v3")) {
+ NTi::TTypePtr type;
+ DESERIALIZE_ITEM("type_v3", type);
+ columnSchema.Type(type);
+ } else {
+ EValueType oldType = VT_INT64;
+ bool required = false;
+ DESERIALIZE_ITEM("type", oldType);
+ DESERIALIZE_ITEM("required", required);
+ columnSchema.Type(ToTypeV3(oldType, required));
+ }
+}
+
+void Serialize(const TTableSchema& tableSchema, NYson::IYsonConsumer* consumer)
+{
+ BuildYsonFluently(consumer).BeginAttributes()
+ .Item("strict").Value(tableSchema.Strict())
+ .Item("unique_keys").Value(tableSchema.UniqueKeys())
+ .EndAttributes()
+ .List(tableSchema.Columns());
+}
+
+void Deserialize(TTableSchema& tableSchema, const TNode& node)
+{
+ const auto& attributesMap = node.GetAttributes().AsMap();
+ DESERIALIZE_ATTR("strict", tableSchema.Strict_);
+ DESERIALIZE_ATTR("unique_keys", tableSchema.UniqueKeys_);
+ Deserialize(tableSchema.Columns_, node);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void Serialize(const TKeyBound& keyBound, NYson::IYsonConsumer* consumer)
+{
+ BuildYsonFluently(consumer).BeginList()
+ .Item().Value(ToString(keyBound.Relation()))
+ .Item().Value(keyBound.Key())
+ .EndList();
+}
+
+void Deserialize(TKeyBound& keyBound, const TNode& node)
+{
+ const auto& nodeList = node.AsList();
+ Y_ENSURE(nodeList.size() == 2);
+
+ const auto& relationNode = nodeList[0];
+ keyBound.Relation(::FromString<ERelation>(relationNode.AsString()));
+
+ const auto& keyNode = nodeList[1];
+ TKey key;
+ Deserialize(key, keyNode);
+ keyBound.Key(std::move(key));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void Serialize(const TReadLimit& readLimit, NYson::IYsonConsumer* consumer)
+{
+ BuildYsonFluently(consumer).BeginMap()
+ .DoIf(readLimit.KeyBound_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("key_bound").Value(*readLimit.KeyBound_);
+ })
+ .DoIf(readLimit.Key_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("key").Value(*readLimit.Key_);
+ })
+ .DoIf(readLimit.RowIndex_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("row_index").Value(*readLimit.RowIndex_);
+ })
+ .DoIf(readLimit.Offset_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("offset").Value(*readLimit.Offset_);
+ })
+ .DoIf(readLimit.TabletIndex_.Defined(), [&] (TFluentMap fluent) {
+ fluent.Item("tablet_index").Value(*readLimit.TabletIndex_);
+ })
+ .EndMap();
+}
+
+void Deserialize(TReadLimit& readLimit, const TNode& node)
+{
+ const auto& nodeMap = node.AsMap();
+ DESERIALIZE_ITEM("key_bound", readLimit.KeyBound_);
+ DESERIALIZE_ITEM("key", readLimit.Key_);
+ DESERIALIZE_ITEM("row_index", readLimit.RowIndex_);
+ DESERIALIZE_ITEM("offset", readLimit.Offset_);
+ DESERIALIZE_ITEM("tablet_index", readLimit.TabletIndex_);
+}
+
+void Serialize(const TReadRange& readRange, NYson::IYsonConsumer* consumer)
+{
+ BuildYsonFluently(consumer).BeginMap()
+ .DoIf(!IsTrivial(readRange.LowerLimit_), [&] (TFluentMap fluent) {
+ fluent.Item("lower_limit").Value(readRange.LowerLimit_);
+ })
+ .DoIf(!IsTrivial(readRange.UpperLimit_), [&] (TFluentMap fluent) {
+ fluent.Item("upper_limit").Value(readRange.UpperLimit_);
+ })
+ .DoIf(!IsTrivial(readRange.Exact_), [&] (TFluentMap fluent) {
+ fluent.Item("exact").Value(readRange.Exact_);
+ })
+ .EndMap();
+}
+
+void Deserialize(TReadRange& readRange, const TNode& node)
+{
+ const auto& nodeMap = node.AsMap();
+ DESERIALIZE_ITEM("lower_limit", readRange.LowerLimit_);
+ DESERIALIZE_ITEM("upper_limit", readRange.UpperLimit_);
+ DESERIALIZE_ITEM("exact", readRange.Exact_);
+}
+
+void Serialize(const THashMap<TString, TString>& renameColumns, NYson::IYsonConsumer* consumer)
+{
+ BuildYsonFluently(consumer)
+ .DoMapFor(renameColumns, [] (TFluentMap fluent, const auto& item) {
+ fluent.Item(item.first).Value(item.second);
+ });
+}
+
+void Serialize(const TRichYPath& path, NYson::IYsonConsumer* consumer)
+{
+ BuildYsonFluently(consumer).BeginAttributes()
+ .DoIf(path.GetRanges().Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("ranges").List(*path.GetRanges());
+ })
+ .DoIf(path.Columns_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("columns").Value(*path.Columns_);
+ })
+ .DoIf(path.Append_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("append").Value(*path.Append_);
+ })
+ .DoIf(path.PartiallySorted_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("partially_sorted").Value(*path.PartiallySorted_);
+ })
+ .DoIf(!path.SortedBy_.Parts_.empty(), [&] (TFluentAttributes fluent) {
+ fluent.Item("sorted_by").Value(path.SortedBy_);
+ })
+ .DoIf(path.Teleport_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("teleport").Value(*path.Teleport_);
+ })
+ .DoIf(path.Primary_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("primary").Value(*path.Primary_);
+ })
+ .DoIf(path.Foreign_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("foreign").Value(*path.Foreign_);
+ })
+ .DoIf(path.RowCountLimit_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("row_count_limit").Value(*path.RowCountLimit_);
+ })
+ .DoIf(path.FileName_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("file_name").Value(*path.FileName_);
+ })
+ .DoIf(path.OriginalPath_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("original_path").Value(*path.OriginalPath_);
+ })
+ .DoIf(path.Executable_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("executable").Value(*path.Executable_);
+ })
+ .DoIf(path.Format_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("format").Value(*path.Format_);
+ })
+ .DoIf(path.Schema_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("schema").Value(*path.Schema_);
+ })
+ .DoIf(path.Timestamp_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("timestamp").Value(*path.Timestamp_);
+ })
+ .DoIf(path.CompressionCodec_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("compression_codec").Value(*path.CompressionCodec_);
+ })
+ .DoIf(path.ErasureCodec_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("erasure_codec").Value(ToString(*path.ErasureCodec_));
+ })
+ .DoIf(path.SchemaModification_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("schema_modification").Value(ToString(*path.SchemaModification_));
+ })
+ .DoIf(path.OptimizeFor_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("optimize_for").Value(ToString(*path.OptimizeFor_));
+ })
+ .DoIf(path.TransactionId_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("transaction_id").Value(GetGuidAsString(*path.TransactionId_));
+ })
+ .DoIf(path.RenameColumns_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("rename_columns").Value(*path.RenameColumns_);
+ })
+ .DoIf(path.BypassArtifactCache_.Defined(), [&] (TFluentAttributes fluent) {
+ fluent.Item("bypass_artifact_cache").Value(*path.BypassArtifactCache_);
+ })
+ .EndAttributes()
+ .Value(path.Path_);
+}
+
+void Deserialize(TRichYPath& path, const TNode& node)
+{
+ path = {};
+
+ const auto& attributesMap = node.GetAttributes().AsMap();
+ DESERIALIZE_ATTR("ranges", path.MutableRanges());
+ DESERIALIZE_ATTR("columns", path.Columns_);
+ DESERIALIZE_ATTR("append", path.Append_);
+ DESERIALIZE_ATTR("partially_sorted", path.PartiallySorted_);
+ DESERIALIZE_ATTR("sorted_by", path.SortedBy_);
+ DESERIALIZE_ATTR("teleport", path.Teleport_);
+ DESERIALIZE_ATTR("primary", path.Primary_);
+ DESERIALIZE_ATTR("foreign", path.Foreign_);
+ DESERIALIZE_ATTR("row_count_limit", path.RowCountLimit_);
+ DESERIALIZE_ATTR("file_name", path.FileName_);
+ DESERIALIZE_ATTR("original_path", path.OriginalPath_);
+ DESERIALIZE_ATTR("executable", path.Executable_);
+ DESERIALIZE_ATTR("format", path.Format_);
+ DESERIALIZE_ATTR("schema", path.Schema_);
+ DESERIALIZE_ATTR("timestamp", path.Timestamp_);
+ DESERIALIZE_ATTR("compression_codec", path.CompressionCodec_);
+ DESERIALIZE_ATTR("erasure_codec", path.ErasureCodec_);
+ DESERIALIZE_ATTR("schema_modification", path.SchemaModification_);
+ DESERIALIZE_ATTR("optimize_for", path.OptimizeFor_);
+ DESERIALIZE_ATTR("transaction_id", path.TransactionId_);
+ DESERIALIZE_ATTR("rename_columns", path.RenameColumns_);
+ DESERIALIZE_ATTR("bypass_artifact_cache", path.BypassArtifactCache_);
+ Deserialize(path.Path_, node);
+}
+
+void Serialize(const TAttributeFilter& filter, NYson::IYsonConsumer* consumer)
+{
+ BuildYsonFluently(consumer).List(filter.Attributes_);
+}
+
+void Deserialize(TTableColumnarStatistics& statistics, const TNode& node)
+{
+ const auto& nodeMap = node.AsMap();
+ DESERIALIZE_ITEM("column_data_weights", statistics.ColumnDataWeight);
+ DESERIALIZE_ITEM("legacy_chunks_data_weight", statistics.LegacyChunksDataWeight);
+ DESERIALIZE_ITEM("timestamp_total_weight", statistics.TimestampTotalWeight);
+}
+
+void Deserialize(TMultiTablePartition::TStatistics& statistics, const TNode& node)
+{
+ const auto& nodeMap = node.AsMap();
+ DESERIALIZE_ITEM("chunk_count", statistics.ChunkCount);
+ DESERIALIZE_ITEM("data_weight", statistics.DataWeight);
+ DESERIALIZE_ITEM("row_count", statistics.RowCount);
+}
+
+void Deserialize(TMultiTablePartition& partition, const TNode& node)
+{
+ const auto& nodeMap = node.AsMap();
+ DESERIALIZE_ITEM("table_ranges", partition.TableRanges);
+ DESERIALIZE_ITEM("aggregate_statistics", partition.AggregateStatistics);
+}
+
+void Deserialize(TMultiTablePartitions& partitions, const TNode& node)
+{
+ const auto& nodeMap = node.AsMap();
+ DESERIALIZE_ITEM("partitions", partitions.Partitions);
+}
+
+void Serialize(const TGUID& value, NYson::IYsonConsumer* consumer)
+{
+ BuildYsonFluently(consumer).Value(GetGuidAsString(value));
+}
+
+void Deserialize(TGUID& value, const TNode& node)
+{
+ value = GetGuid(node.AsString());
+}
+
+void Deserialize(TTabletInfo& value, const TNode& node)
+{
+ auto nodeMap = node.AsMap();
+ DESERIALIZE_ITEM("total_row_count", value.TotalRowCount)
+ DESERIALIZE_ITEM("trimmed_row_count", value.TrimmedRowCount)
+ DESERIALIZE_ITEM("barrier_timestamp", value.BarrierTimestamp)
+}
+
+void Serialize(const NTi::TTypePtr& type, NYson::IYsonConsumer* consumer)
+{
+ auto yson = NTi::NIo::SerializeYson(type.Get());
+ ::NYson::ParseYsonStringBuffer(yson, consumer);
+}
+
+void Deserialize(NTi::TTypePtr& type, const TNode& node)
+{
+ auto yson = NodeToYsonString(node, NYson::EYsonFormat::Binary);
+ type = NTi::NIo::DeserializeYson(*NTi::HeapFactory(), yson);
+}
+
+#undef DESERIALIZE_ITEM
+#undef DESERIALIZE_ATTR
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/serialize.h b/yt/cpp/mapreduce/interface/serialize.h
new file mode 100644
index 0000000000..223dd446ba
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/serialize.h
@@ -0,0 +1,90 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/serialize.h
+///
+/// Header containing declaration of functions for serializing to/from YSON.
+
+#include "common.h"
+
+#include <library/cpp/type_info/fwd.h>
+
+namespace NYT::NYson {
+struct IYsonConsumer;
+} // namespace NYT::NYson
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class T>
+void Deserialize(TMaybe<T>& value, const TNode& node)
+{
+ value.ConstructInPlace();
+ Deserialize(value.GetRef(), node);
+}
+
+template <class T>
+void Deserialize(TVector<T>& value, const TNode& node)
+{
+ for (const auto& element : node.AsList()) {
+ value.emplace_back();
+ Deserialize(value.back(), element);
+ }
+}
+
+template <class T>
+void Deserialize(THashMap<TString, T>& value, const TNode& node)
+{
+ for (const auto& item : node.AsMap()) {
+ Deserialize(value[item.first], item.second);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void Serialize(const TKey& key, NYT::NYson::IYsonConsumer* consumer);
+void Deserialize(TKey& key, const TNode& node);
+
+void Serialize(const TSortColumns& sortColumns, NYT::NYson::IYsonConsumer* consumer);
+void Deserialize(TSortColumns& sortColumns, const TNode& node);
+
+void Serialize(const TColumnNames& columnNames, NYT::NYson::IYsonConsumer* consumer);
+void Deserialize(TColumnNames& columnNames, const TNode& node);
+
+void Serialize(const TSortColumn& sortColumn, NYT::NYson::IYsonConsumer* consumer);
+void Deserialize(TSortColumn& sortColumn, const TNode& node);
+
+void Serialize(const TKeyBound& keyBound, NYT::NYson::IYsonConsumer* consumer);
+void Deserialize(TKeyBound& keyBound, const TNode& node);
+
+void Serialize(const TReadLimit& readLimit, NYT::NYson::IYsonConsumer* consumer);
+void Deserialize(TReadLimit& readLimit, const TNode& node);
+
+void Serialize(const TReadRange& readRange, NYT::NYson::IYsonConsumer* consumer);
+
+void Serialize(const TRichYPath& path, NYT::NYson::IYsonConsumer* consumer);
+void Deserialize(TRichYPath& path, const TNode& node);
+
+void Serialize(const TAttributeFilter& filter, NYT::NYson::IYsonConsumer* consumer);
+
+void Serialize(const TColumnSchema& columnSchema, NYT::NYson::IYsonConsumer* consumer);
+void Serialize(const TTableSchema& tableSchema, NYT::NYson::IYsonConsumer* consumer);
+
+void Deserialize(EValueType& valueType, const TNode& node);
+void Deserialize(TTableSchema& tableSchema, const TNode& node);
+void Deserialize(TColumnSchema& columnSchema, const TNode& node);
+void Deserialize(TTableColumnarStatistics& statistics, const TNode& node);
+void Deserialize(TMultiTablePartition& partition, const TNode& node);
+void Deserialize(TMultiTablePartitions& partitions, const TNode& node);
+void Deserialize(TTabletInfo& tabletInfos, const TNode& node);
+
+void Serialize(const TGUID& path, NYT::NYson::IYsonConsumer* consumer);
+void Deserialize(TGUID& value, const TNode& node);
+
+void Serialize(const NTi::TTypePtr& type, NYT::NYson::IYsonConsumer* consumer);
+void Deserialize(NTi::TTypePtr& type, const TNode& node);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/serialize_ut.cpp b/yt/cpp/mapreduce/interface/serialize_ut.cpp
new file mode 100644
index 0000000000..59d4501ee8
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/serialize_ut.cpp
@@ -0,0 +1,49 @@
+#include <yt/cpp/mapreduce/interface/serialize.h>
+#include <yt/cpp/mapreduce/interface/common.h>
+
+#include <library/cpp/yson/node/node_builder.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/generic/serialized_enum.h>
+
+using namespace NYT;
+
+Y_UNIT_TEST_SUITE(Serialization)
+{
+ Y_UNIT_TEST(TableSchema)
+ {
+ auto schema = TTableSchema()
+ .AddColumn(TColumnSchema().Name("a").Type(EValueType::VT_STRING).SortOrder(SO_ASCENDING))
+ .AddColumn(TColumnSchema().Name("b").Type(EValueType::VT_UINT64))
+ .AddColumn(TColumnSchema().Name("c").Type(EValueType::VT_INT64, true));
+
+ auto schemaNode = schema.ToNode();
+ UNIT_ASSERT(schemaNode.IsList());
+ UNIT_ASSERT_VALUES_EQUAL(schemaNode.Size(), 3);
+
+
+ UNIT_ASSERT_VALUES_EQUAL(schemaNode[0]["name"], "a");
+ UNIT_ASSERT_VALUES_EQUAL(schemaNode[0]["type"], "string");
+ UNIT_ASSERT_VALUES_EQUAL(schemaNode[0]["required"], false);
+ UNIT_ASSERT_VALUES_EQUAL(schemaNode[0]["sort_order"], "ascending");
+
+ UNIT_ASSERT_VALUES_EQUAL(schemaNode[1]["name"], "b");
+ UNIT_ASSERT_VALUES_EQUAL(schemaNode[1]["type"], "uint64");
+ UNIT_ASSERT_VALUES_EQUAL(schemaNode[1]["required"], false);
+
+ UNIT_ASSERT_VALUES_EQUAL(schemaNode[2]["name"], "c");
+ UNIT_ASSERT_VALUES_EQUAL(schemaNode[2]["type"], "int64");
+ UNIT_ASSERT_VALUES_EQUAL(schemaNode[2]["required"], true);
+ }
+
+ Y_UNIT_TEST(ValueTypeSerialization)
+ {
+ for (const auto value : GetEnumAllValues<EValueType>()) {
+ TNode serialized = NYT::NDetail::ToString(value);
+ EValueType deserialized;
+ Deserialize(deserialized, serialized);
+ UNIT_ASSERT_VALUES_EQUAL(value, deserialized);
+ }
+ }
+}
diff --git a/yt/cpp/mapreduce/interface/skiff_row.cpp b/yt/cpp/mapreduce/interface/skiff_row.cpp
new file mode 100644
index 0000000000..7838bdaee9
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/skiff_row.cpp
@@ -0,0 +1 @@
+#include "skiff_row.h"
diff --git a/yt/cpp/mapreduce/interface/skiff_row.h b/yt/cpp/mapreduce/interface/skiff_row.h
new file mode 100644
index 0000000000..5dd335cb65
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/skiff_row.h
@@ -0,0 +1,127 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/skiff_row.h
+/// Header containing interfaces that you need to define for using TSkiffRowTableReader
+/// What you need to do for your struct type TMyType:
+/// 1. Write `true` specialization TIsSkiffRow<TMyType>;
+/// 2. Write specialization GetSkiffSchema<TMyType>();
+/// 3. Write your own parser derived from ISkiffRowParser and write specialization GetSkiffParser<TMyType>() which returns this parser.
+
+#include "fwd.h"
+
+#include <yt/cpp/mapreduce/skiff/skiff_schema.h>
+
+#include <yt/cpp/mapreduce/interface/format.h>
+
+#include <library/cpp/skiff/skiff.h>
+
+#include <util/generic/maybe.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+//! Need to write `true_type` specialization for your row type `T`.
+/// And implement two functions: `GetSkiffSchema` and `CreateSkiffParser`.
+///
+/// Example:
+///
+/// template <>
+/// struct TIsSkiffRow<T>
+/// : std::true_type
+/// { };
+///
+template<class T>
+struct TIsSkiffRow
+ : std::false_type
+{ };
+
+////////////////////////////////////////////////////////////////////////////////
+
+//! Return skiff schema for row type `T`.
+/// Need to write its specialization.
+template <typename T>
+NSkiff::TSkiffSchemaPtr GetSkiffSchema(const TMaybe<TSkiffRowHints>& /*hints*/)
+{
+ static_assert(TDependentFalse<T>, "Unimplemented `GetSkiffSchema` method");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+//! Allow to parse rows as user's structs from stream (TCheckedInDebugSkiffParser).
+/// Need to write derived class for your own row type.
+///
+/// Example:
+///
+/// class TMySkiffRowParser : public ISkiffRowParser
+/// {
+/// public:
+/// TMySkiffRowParser(TMySkiffRow* row)
+/// : Row_(row)
+/// {}
+///
+/// void Parse(NSkiff::TCheckedInDebugSkiffParser* parser)
+/// . {
+/// Row_->SomeInt64Field = parser->ParseInt64();
+/// }
+///
+/// private:
+/// TMySkiffRow* Row_;
+/// }
+///
+class ISkiffRowParser
+ : public TThrRefBase
+{
+public:
+ //! Read one row from parser
+ virtual void Parse(NSkiff::TCheckedInDebugSkiffParser* /*parser*/) = 0;
+};
+
+//! Creates a parser for row type `T`.
+template <typename T>
+ISkiffRowParserPtr CreateSkiffParser(T* /*row*/, const TMaybe<TSkiffRowHints>& /*hints*/)
+{
+ static_assert(TDependentFalse<T>, "Unimplemented `CreateSkiffParser` function");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+//! Allow to skip row content without getting row.
+/// By default row will be parsed using your parser derived from ISkiffRowParser.
+/// If you want, you can write more optimal skipper, but it isn't required.
+class ISkiffRowSkipper
+ : public TThrRefBase
+{
+public:
+ virtual void SkipRow(NSkiff::TCheckedInDebugSkiffParser* /*parser*/) = 0;
+};
+
+//! Default ISkiffRowSkipper implementation.
+template <typename T>
+class TSkiffRowSkipper : public ISkiffRowSkipper {
+public:
+ explicit TSkiffRowSkipper(const TMaybe<TSkiffRowHints>& hints)
+ : Parser_(CreateSkiffParser<T>(&Row_, hints))
+ { }
+
+ void SkipRow(NSkiff::TCheckedInDebugSkiffParser* parser) {
+ Parser_->Parse(parser);
+ }
+
+private:
+ T Row_;
+ ISkiffRowParserPtr Parser_;
+};
+
+//! Creates a skipper for row type 'T'.
+/// You don't need to write its specialization.
+template <typename T>
+ISkiffRowSkipperPtr CreateSkiffSkipper(const TMaybe<TSkiffRowHints>& hints)
+{
+ return ::MakeIntrusive<TSkiffRowSkipper<T>>(hints);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/tvm.cpp b/yt/cpp/mapreduce/interface/tvm.cpp
new file mode 100644
index 0000000000..bfa3f0304e
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/tvm.cpp
@@ -0,0 +1 @@
+#include "tvm.h"
diff --git a/yt/cpp/mapreduce/interface/tvm.h b/yt/cpp/mapreduce/interface/tvm.h
new file mode 100644
index 0000000000..d8d16d841b
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/tvm.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <yt/yt/library/tvm/tvm_base.h>
+
+#include <library/cpp/yt/memory/intrusive_ptr.h>
+
+namespace NYT::NAuth {
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// This wrapper is required because NYT::NAuth::IServiceTicketAuthPtr is NYT::TIntrusivePtr,
+/// and, if we used this pointer in interfaces of `mapreduce/yt` client, a lot of users of this library
+/// could get unexpected build errors that `TIntrusivePtr` is ambigious
+/// (from `::` namespace and from `::NYT::` namespace).
+/// So we use this wrapper in our interfaces to avoid such problems for users.
+struct IServiceTicketAuthPtrWrapper
+{
+ //
+ /// Construct wrapper from NYT::TIntrusivePtr
+ ///
+ /// This constructor is implicit so users can transparently pass NYT::TIntrusivePtr to the functions of
+ /// mapreduce/yt client.
+ template <class T, class = typename std::enable_if_t<std::is_convertible_v<T*, IServiceTicketAuth*>>>
+ IServiceTicketAuthPtrWrapper(const TIntrusivePtr<T> ptr)
+ : Ptr(ptr)
+ {
+ }
+
+ /// Wrapped pointer
+ NYT::TIntrusivePtr<IServiceTicketAuth> Ptr;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NAuth
diff --git a/yt/cpp/mapreduce/interface/ut/ya.make b/yt/cpp/mapreduce/interface/ut/ya.make
new file mode 100644
index 0000000000..0219e6430c
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ut/ya.make
@@ -0,0 +1,25 @@
+UNITTEST_FOR(yt/cpp/mapreduce/interface)
+
+SRCS(
+ common_ut.cpp
+ config_ut.cpp
+ error_ut.cpp
+ format_ut.cpp
+ job_counters_ut.cpp
+ job_statistics_ut.cpp
+ operation_ut.cpp
+ proto3_ut.proto
+ protobuf_table_schema_ut.cpp
+ protobuf_file_options_ut.cpp
+ protobuf_table_schema_ut.proto
+ protobuf_file_options_ut.proto
+ serialize_ut.cpp
+)
+
+PEERDIR(
+ contrib/libs/protobuf
+ library/cpp/testing/unittest
+ yt/yt_proto/yt/formats
+)
+
+END()
diff --git a/yt/cpp/mapreduce/interface/wait_proxy.h b/yt/cpp/mapreduce/interface/wait_proxy.h
new file mode 100644
index 0000000000..f7d8e0638e
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/wait_proxy.h
@@ -0,0 +1,54 @@
+#pragma once
+
+///
+/// @file yt/cpp/mapreduce/interface/serialize.h
+///
+/// Header containing interface to enable customizable waiting.
+
+#include <yt/cpp/mapreduce/interface/common.h>
+
+#include <util/datetime/base.h>
+
+namespace NThreading {
+template <typename T>
+class TFuture;
+}
+
+class TSystemEvent;
+class TCondVar;
+class TMutex;
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+///
+/// @brief Interface to facilitate customizable waiting.
+///
+/// All the waiting functions in the library are obliged to use the methods of a wait proxy instead of direct function calls.
+class IWaitProxy
+ : public TThrRefBase
+{
+public:
+ virtual ~IWaitProxy() = default;
+
+ ///
+ /// @brief Wait for the future setting with timeout.
+ virtual bool WaitFuture(const ::NThreading::TFuture<void>& future, TDuration timeout) = 0;
+
+ ///
+ /// @brief Wait for a system event with timeout.
+ virtual bool WaitEvent(TSystemEvent& event, TDuration timeout) = 0;
+
+ ///
+ /// @brief Wait for the notification on the condition variable with timeout.
+ virtual bool WaitCondVar(TCondVar& condVar, TMutex& mutex, TDuration timeout) = 0;
+
+ ///
+ /// @brief Sleep in the current thread for (approximately) specified amount of time.
+ virtual void Sleep(TDuration timeout) = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/interface/ya.make b/yt/cpp/mapreduce/interface/ya.make
new file mode 100644
index 0000000000..0e94f14633
--- /dev/null
+++ b/yt/cpp/mapreduce/interface/ya.make
@@ -0,0 +1,46 @@
+LIBRARY()
+
+INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
+
+SRCS(
+ batch_request.cpp
+ client.cpp
+ client_method_options.cpp
+ common.cpp
+ config.cpp
+ cypress.cpp
+ errors.cpp
+ format.cpp
+ job_counters.cpp
+ job_statistics.cpp
+ io.cpp
+ operation.cpp
+ protobuf_format.cpp
+ serialize.cpp
+ skiff_row.cpp
+ tvm.cpp
+)
+
+PEERDIR(
+ contrib/libs/protobuf
+ library/cpp/type_info
+ library/cpp/threading/future
+ library/cpp/yson/node
+ yt/cpp/mapreduce/interface/logging
+ yt/yt_proto/yt/formats
+ yt/yt/library/tvm
+)
+
+GENERATE_ENUM_SERIALIZATION(client_method_options.h)
+GENERATE_ENUM_SERIALIZATION(client.h)
+GENERATE_ENUM_SERIALIZATION(common.h)
+GENERATE_ENUM_SERIALIZATION(config.h)
+GENERATE_ENUM_SERIALIZATION(cypress.h)
+GENERATE_ENUM_SERIALIZATION(job_counters.h)
+GENERATE_ENUM_SERIALIZATION(job_statistics.h)
+GENERATE_ENUM_SERIALIZATION(operation.h)
+GENERATE_ENUM_SERIALIZATION(protobuf_format.h)
+
+END()
+
+RECURSE_FOR_TESTS(ut)
diff --git a/yt/cpp/mapreduce/io/counting_raw_reader.cpp b/yt/cpp/mapreduce/io/counting_raw_reader.cpp
new file mode 100644
index 0000000000..6a918bdddb
--- /dev/null
+++ b/yt/cpp/mapreduce/io/counting_raw_reader.cpp
@@ -0,0 +1,38 @@
+#include "counting_raw_reader.h"
+
+namespace NYT {
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+bool TCountingRawTableReader::Retry(const TMaybe<ui32>& rangeIndex, const TMaybe<ui64>& rowIndex)
+{
+ return Reader_->Retry(rangeIndex, rowIndex);
+}
+
+void TCountingRawTableReader::ResetRetries()
+{
+ Reader_->ResetRetries();
+}
+
+bool TCountingRawTableReader::HasRangeIndices() const
+{
+ return Reader_->HasRangeIndices();
+}
+
+size_t TCountingRawTableReader::GetReadByteCount() const
+{
+ return ReadByteCount_;
+}
+
+size_t TCountingRawTableReader::DoRead(void* buf, size_t len)
+{
+ auto readLen = Reader_->Read(buf, len);
+ ReadByteCount_ += readLen;
+ return readLen;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/counting_raw_reader.h b/yt/cpp/mapreduce/io/counting_raw_reader.h
new file mode 100644
index 0000000000..3b6705c5e4
--- /dev/null
+++ b/yt/cpp/mapreduce/io/counting_raw_reader.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+namespace NYT {
+namespace NDetail {
+
+class TCountingRawTableReader
+ final : public TRawTableReader
+{
+public:
+ TCountingRawTableReader(::TIntrusivePtr<TRawTableReader> reader)
+ : Reader_(std::move(reader))
+ { }
+
+ bool Retry(const TMaybe<ui32>& rangeIndex, const TMaybe<ui64>& rowIndex) override;
+ void ResetRetries() override;
+ bool HasRangeIndices() const override;
+
+ size_t GetReadByteCount() const;
+
+protected:
+ size_t DoRead(void* buf, size_t len) override;
+
+private:
+ ::TIntrusivePtr<TRawTableReader> Reader_;
+ size_t ReadByteCount_ = 0;
+};
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/helpers.h b/yt/cpp/mapreduce/io/helpers.h
new file mode 100644
index 0000000000..5dbbf20906
--- /dev/null
+++ b/yt/cpp/mapreduce/io/helpers.h
@@ -0,0 +1,130 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/io.h>
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/common/helpers.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TOptions>
+struct TIOOptionsTraits;
+
+template <>
+struct TIOOptionsTraits<TFileReaderOptions>
+{
+ static constexpr const char* const ConfigName = "file_reader";
+};
+template <>
+struct TIOOptionsTraits<TFileWriterOptions>
+{
+ static constexpr const char* const ConfigName = "file_writer";
+};
+template <>
+struct TIOOptionsTraits<TTableReaderOptions>
+{
+ static constexpr const char* const ConfigName = "table_reader";
+};
+template <>
+struct TIOOptionsTraits<TTableWriterOptions>
+{
+ static constexpr const char* const ConfigName = "table_writer";
+};
+
+template <class TOptions>
+TNode FormIORequestParameters(
+ const TRichYPath& path,
+ const TOptions& options)
+{
+ auto params = PathToParamNode(path);
+ if (options.Config_) {
+ params[TIOOptionsTraits<TOptions>::ConfigName] = *options.Config_;
+ }
+ return params;
+}
+
+template <>
+inline TNode FormIORequestParameters(
+ const TRichYPath& path,
+ const TFileReaderOptions& options)
+{
+ auto params = PathToParamNode(path);
+ if (options.Config_) {
+ params[TIOOptionsTraits<TTableReaderOptions>::ConfigName] = *options.Config_;
+ }
+ if (options.Offset_) {
+ params["offset"] = *options.Offset_;
+ }
+ if (options.Length_) {
+ params["length"] = *options.Length_;
+ }
+ return params;
+}
+
+static void AddWriterOptionsToNode(const TWriterOptions& options, TNode* node)
+{
+ if (options.EnableEarlyFinish_) {
+ (*node)["enable_early_finish"] = *options.EnableEarlyFinish_;
+ }
+ if (options.UploadReplicationFactor_) {
+ (*node)["upload_replication_factor"] = *options.UploadReplicationFactor_;
+ }
+ if (options.MinUploadReplicationFactor_) {
+ (*node)["min_upload_replication_factor"] = *options.MinUploadReplicationFactor_;
+ }
+ if (options.DesiredChunkSize_) {
+ (*node)["desired_chunk_size"] = *options.DesiredChunkSize_;
+ }
+}
+
+template <>
+inline TNode FormIORequestParameters(
+ const TRichYPath& path,
+ const TFileWriterOptions& options)
+{
+ auto params = PathToParamNode(path);
+ TNode fileWriter = TNode::CreateMap();
+ if (options.Config_) {
+ fileWriter = *options.Config_;
+ }
+ if (options.WriterOptions_) {
+ AddWriterOptionsToNode(*options.WriterOptions_, &fileWriter);
+ }
+ if (fileWriter.Empty()) {
+ AddWriterOptionsToNode(
+ TWriterOptions()
+ .EnableEarlyFinish(true)
+ .UploadReplicationFactor(3)
+ .MinUploadReplicationFactor(2),
+ &fileWriter);
+ }
+ params[TIOOptionsTraits<TFileWriterOptions>::ConfigName] = fileWriter;
+ if (options.ComputeMD5_) {
+ params["compute_md5"] = *options.ComputeMD5_;
+ }
+ return params;
+}
+
+template <>
+inline TNode FormIORequestParameters(
+ const TRichYPath& path,
+ const TTableWriterOptions& options)
+{
+ auto params = PathToParamNode(path);
+ auto tableWriter = TConfig::Get()->TableWriter;
+ if (options.Config_) {
+ MergeNodes(tableWriter, *options.Config_);
+ }
+ if (options.WriterOptions_) {
+ AddWriterOptionsToNode(*options.WriterOptions_, &tableWriter);
+ }
+ if (!tableWriter.Empty()) {
+ params[TIOOptionsTraits<TTableWriterOptions>::ConfigName] = std::move(tableWriter);
+ }
+ return params;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+}
diff --git a/yt/cpp/mapreduce/io/job_reader.cpp b/yt/cpp/mapreduce/io/job_reader.cpp
new file mode 100644
index 0000000000..39056f00e2
--- /dev/null
+++ b/yt/cpp/mapreduce/io/job_reader.cpp
@@ -0,0 +1,46 @@
+#include "job_reader.h"
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TJobReader::TJobReader(int fd)
+ : TJobReader(Duplicate(fd))
+{ }
+
+TJobReader::TJobReader(const TFile& file)
+ : FdFile_(file)
+ , FdInput_(FdFile_)
+ , BufferedInput_(&FdInput_, BUFFER_SIZE)
+{ }
+
+bool TJobReader::Retry(const TMaybe<ui32>& /*rangeIndex*/, const TMaybe<ui64>& /*rowIndex*/)
+{
+ return false;
+}
+
+void TJobReader::ResetRetries()
+{ }
+
+bool TJobReader::HasRangeIndices() const
+{
+ return true;
+}
+
+size_t TJobReader::DoRead(void* buf, size_t len)
+{
+ return BufferedInput_.Read(buf, len);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TRawTableReaderPtr CreateRawJobReader(int fd)
+{
+ return ::MakeIntrusive<TJobReader>(fd);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/job_reader.h b/yt/cpp/mapreduce/io/job_reader.h
new file mode 100644
index 0000000000..ce62ec180f
--- /dev/null
+++ b/yt/cpp/mapreduce/io/job_reader.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+#include <util/stream/buffered.h>
+#include <util/stream/file.h>
+#include <util/system/file.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TJobReader
+ : public TRawTableReader
+{
+public:
+ explicit TJobReader(int fd);
+ explicit TJobReader(const TFile& file);
+
+ virtual bool Retry( const TMaybe<ui32>& /*rangeIndex*/, const TMaybe<ui64>& /*rowIndex*/) override;
+ virtual void ResetRetries() override;
+ virtual bool HasRangeIndices() const override;
+
+protected:
+ size_t DoRead(void* buf, size_t len) override;
+
+private:
+ TFile FdFile_;
+ TUnbufferedFileInput FdInput_;
+ TBufferedInput BufferedInput_;
+
+ static const size_t BUFFER_SIZE = 64 << 10;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/job_writer.cpp b/yt/cpp/mapreduce/io/job_writer.cpp
new file mode 100644
index 0000000000..d08bb0a665
--- /dev/null
+++ b/yt/cpp/mapreduce/io/job_writer.cpp
@@ -0,0 +1,68 @@
+#include "job_writer.h"
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+#include <util/system/file.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TJobWriter::TStream::TStream(int fd)
+ : TStream(Duplicate(fd))
+{ }
+
+TJobWriter::TStream::TStream(const TFile& file)
+ : FdFile(file)
+ , FdOutput(FdFile)
+ , BufferedOutput(&FdOutput, BUFFER_SIZE)
+{ }
+
+TJobWriter::TStream::~TStream()
+{
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TJobWriter::TJobWriter(size_t outputTableCount)
+{
+ for (size_t i = 0; i < outputTableCount; ++i) {
+ Streams_.emplace_back(MakeHolder<TStream>(int(i * 3 + 1)));
+ }
+}
+
+TJobWriter::TJobWriter(const TVector<TFile>& fileList)
+{
+ for (const auto& f : fileList) {
+ Streams_.emplace_back(MakeHolder<TStream>(f));
+ }
+}
+
+size_t TJobWriter::GetStreamCount() const
+{
+ return Streams_.size();
+}
+
+IOutputStream* TJobWriter::GetStream(size_t tableIndex) const
+{
+ if (tableIndex >= Streams_.size()) {
+ ythrow TIOException() <<
+ "Table index " << tableIndex <<
+ " is out of range [0, " << Streams_.size() << ")";
+ }
+ return &Streams_[tableIndex]->BufferedOutput;
+}
+
+void TJobWriter::OnRowFinished(size_t)
+{ }
+
+////////////////////////////////////////////////////////////////////////////////
+
+THolder<IProxyOutput> CreateRawJobWriter(size_t outputTableCount)
+{
+ return ::MakeHolder<TJobWriter>(outputTableCount);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/job_writer.h b/yt/cpp/mapreduce/io/job_writer.h
new file mode 100644
index 0000000000..9b24650640
--- /dev/null
+++ b/yt/cpp/mapreduce/io/job_writer.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+#include <util/generic/vector.h>
+#include <util/generic/ptr.h>
+#include <util/stream/file.h>
+#include <util/stream/buffered.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TJobWriter
+ : public IProxyOutput
+{
+public:
+ explicit TJobWriter(size_t outputTableCount);
+ explicit TJobWriter(const TVector<TFile>& fileList);
+
+ size_t GetStreamCount() const override;
+ IOutputStream* GetStream(size_t tableIndex) const override;
+ void OnRowFinished(size_t tableIndex) override;
+
+private:
+ struct TStream {
+ TFile FdFile;
+ TUnbufferedFileOutput FdOutput;
+ TBufferedOutput BufferedOutput;
+
+ explicit TStream(int fd);
+ explicit TStream(const TFile& file);
+ ~TStream();
+
+ static const size_t BUFFER_SIZE = 1 << 20;
+ };
+
+ TVector<THolder<TStream>> Streams_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/lenval_table_reader.cpp b/yt/cpp/mapreduce/io/lenval_table_reader.cpp
new file mode 100644
index 0000000000..98274c7996
--- /dev/null
+++ b/yt/cpp/mapreduce/io/lenval_table_reader.cpp
@@ -0,0 +1,198 @@
+#include "lenval_table_reader.h"
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <util/string/printf.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+const i32 CONTROL_ATTR_TABLE_INDEX = -1;
+const i32 CONTROL_ATTR_KEY_SWITCH = -2;
+const i32 CONTROL_ATTR_RANGE_INDEX = -3;
+const i32 CONTROL_ATTR_ROW_INDEX = -4;
+const i32 CONTROL_ATTR_END_OF_STREAM = -5;
+const i32 CONTROL_ATTR_TABLET_INDEX = -6;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TLenvalTableReader::TLenvalTableReader(::TIntrusivePtr<TRawTableReader> input)
+ : Input_(std::move(input))
+{
+ TLenvalTableReader::Next();
+}
+
+TLenvalTableReader::~TLenvalTableReader()
+{ }
+
+void TLenvalTableReader::CheckValidity() const
+{
+ if (!IsValid()) {
+ ythrow yexception() << "Iterator is not valid";
+ }
+}
+
+bool TLenvalTableReader::IsValid() const
+{
+ return Valid_;
+}
+
+void TLenvalTableReader::Next()
+{
+ if (!RowTaken_) {
+ SkipRow();
+ }
+
+ CheckValidity();
+
+ if (RowIndex_) {
+ ++*RowIndex_;
+ }
+
+ while (true) {
+ try {
+ i32 value = 0;
+ if (!ReadInteger(&value, true)) {
+ return;
+ }
+
+ while (value < 0 && !IsEndOfStream_) {
+ switch (value) {
+ case CONTROL_ATTR_KEY_SWITCH:
+ if (!AtStart_) {
+ Valid_ = false;
+ return;
+ } else {
+ ReadInteger(&value);
+ }
+ break;
+
+ case CONTROL_ATTR_TABLE_INDEX: {
+ ui32 tmp = 0;
+ ReadInteger(&tmp);
+ TableIndex_ = tmp;
+ ReadInteger(&value);
+ break;
+ }
+ case CONTROL_ATTR_ROW_INDEX: {
+ ui64 tmp = 0;
+ ReadInteger(&tmp);
+ RowIndex_ = tmp;
+ ReadInteger(&value);
+ break;
+ }
+ case CONTROL_ATTR_RANGE_INDEX: {
+ ui32 tmp = 0;
+ ReadInteger(&tmp);
+ RangeIndex_ = tmp;
+ ReadInteger(&value);
+ break;
+ }
+ case CONTROL_ATTR_TABLET_INDEX: {
+ ui64 tmp = 0;
+ ReadInteger(&tmp);
+ TabletIndex_ = tmp;
+ ReadInteger(&value);
+ break;
+ }
+ case CONTROL_ATTR_END_OF_STREAM: {
+ IsEndOfStream_ = true;
+ break;
+ }
+ default:
+ ythrow yexception() <<
+ Sprintf("Invalid control integer %d in lenval stream", value);
+ }
+ }
+
+ Length_ = static_cast<ui32>(value);
+ RowTaken_ = false;
+ AtStart_ = false;
+ } catch (const std::exception& e) {
+ if (!PrepareRetry()) {
+ throw;
+ }
+ continue;
+ }
+ break;
+ }
+}
+
+bool TLenvalTableReader::Retry()
+{
+ if (PrepareRetry()) {
+ RowTaken_ = true;
+ Next();
+ return true;
+ }
+ return false;
+}
+
+void TLenvalTableReader::NextKey()
+{
+ while (Valid_) {
+ Next();
+ }
+
+ if (Finished_) {
+ return;
+ }
+
+ Valid_ = true;
+
+ if (RowIndex_) {
+ --*RowIndex_;
+ }
+
+ RowTaken_ = true;
+}
+
+ui32 TLenvalTableReader::GetTableIndex() const
+{
+ CheckValidity();
+ return TableIndex_;
+}
+
+ui32 TLenvalTableReader::GetRangeIndex() const
+{
+ CheckValidity();
+ return RangeIndex_.GetOrElse(0);
+}
+
+ui64 TLenvalTableReader::GetRowIndex() const
+{
+ CheckValidity();
+ return RowIndex_.GetOrElse(0UL);
+}
+
+TMaybe<size_t> TLenvalTableReader::GetReadByteCount() const
+{
+ return Input_.GetReadByteCount();
+}
+
+bool TLenvalTableReader::IsEndOfStream() const
+{
+ return IsEndOfStream_;
+}
+
+bool TLenvalTableReader::IsRawReaderExhausted() const
+{
+ return Finished_;
+}
+
+bool TLenvalTableReader::PrepareRetry()
+{
+ if (Input_.Retry(RangeIndex_, RowIndex_)) {
+ RowIndex_.Clear();
+ RangeIndex_.Clear();
+ return true;
+ }
+ return false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/lenval_table_reader.h b/yt/cpp/mapreduce/io/lenval_table_reader.h
new file mode 100644
index 0000000000..990fe0b756
--- /dev/null
+++ b/yt/cpp/mapreduce/io/lenval_table_reader.h
@@ -0,0 +1,67 @@
+#pragma once
+
+#include "counting_raw_reader.h"
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TLenvalTableReader
+{
+public:
+ explicit TLenvalTableReader(::TIntrusivePtr<TRawTableReader> input);
+ virtual ~TLenvalTableReader();
+
+protected:
+ bool IsValid() const;
+ void Next();
+ ui32 GetTableIndex() const;
+ ui32 GetRangeIndex() const;
+ ui64 GetRowIndex() const;
+ void NextKey();
+ TMaybe<size_t> GetReadByteCount() const;
+ bool IsEndOfStream() const;
+ bool IsRawReaderExhausted() const;
+
+ void CheckValidity() const;
+
+ bool Retry();
+
+ template <class T>
+ bool ReadInteger(T* result, bool acceptEndOfStream = false)
+ {
+ size_t count = Input_.Load(result, sizeof(T));
+ if (acceptEndOfStream && count == 0) {
+ Finished_ = true;
+ Valid_ = false;
+ return false;
+ }
+ Y_ENSURE(count == sizeof(T), "Premature end of stream");
+ return true;
+ }
+
+ virtual void SkipRow() = 0;
+
+protected:
+ NDetail::TCountingRawTableReader Input_;
+
+ bool Valid_ = true;
+ bool Finished_ = false;
+ ui32 TableIndex_ = 0;
+ TMaybe<ui64> RowIndex_;
+ TMaybe<ui32> RangeIndex_;
+ TMaybe<ui64> TabletIndex_;
+ bool IsEndOfStream_ = false;
+ bool AtStart_ = true;
+ bool RowTaken_ = true;
+ ui32 Length_ = 0;
+
+private:
+ bool PrepareRetry();
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/node_table_reader.cpp b/yt/cpp/mapreduce/io/node_table_reader.cpp
new file mode 100644
index 0000000000..d39e1398a5
--- /dev/null
+++ b/yt/cpp/mapreduce/io/node_table_reader.cpp
@@ -0,0 +1,375 @@
+#include "node_table_reader.h"
+
+#include <yt/cpp/mapreduce/common/node_builder.h>
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <library/cpp/yson/parser.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TRowBuilder
+ : public ::NYson::TYsonConsumerBase
+{
+public:
+ explicit TRowBuilder(TMaybe<TRowElement>* resultRow);
+
+ void OnStringScalar(TStringBuf value) override;
+ void OnInt64Scalar(i64 value) override;
+ void OnUint64Scalar(ui64 value) override;
+ void OnDoubleScalar(double value) override;
+ void OnBooleanScalar(bool value) override;
+ void OnBeginList() override;
+ void OnEntity() override;
+ void OnListItem() override;
+ void OnEndList() override;
+ void OnBeginMap() override;
+ void OnKeyedItem(TStringBuf key) override;
+ void OnEndMap() override;
+ void OnBeginAttributes() override;
+ void OnEndAttributes() override;
+
+ void Finalize();
+
+private:
+ THolder<TNodeBuilder> Builder_;
+ TRowElement Row_;
+ int Depth_ = 0;
+ bool Started_ = false;
+ TMaybe<TRowElement>* ResultRow_;
+
+ void SaveResultRow();
+};
+
+TRowBuilder::TRowBuilder(TMaybe<TRowElement>* resultRow)
+ : ResultRow_(resultRow)
+{ }
+
+void TRowBuilder::OnStringScalar(TStringBuf value)
+{
+ Row_.Size += sizeof(TNode) + sizeof(TString) + value.size();
+ Builder_->OnStringScalar(value);
+}
+
+void TRowBuilder::OnInt64Scalar(i64 value)
+{
+ Row_.Size += sizeof(TNode);
+ Builder_->OnInt64Scalar(value);
+}
+
+void TRowBuilder::OnUint64Scalar(ui64 value)
+{
+ Row_.Size += sizeof(TNode);
+ Builder_->OnUint64Scalar(value);
+}
+
+void TRowBuilder::OnDoubleScalar(double value)
+{
+ Row_.Size += sizeof(TNode);
+ Builder_->OnDoubleScalar(value);
+}
+
+void TRowBuilder::OnBooleanScalar(bool value)
+{
+ Row_.Size += sizeof(TNode);
+ Builder_->OnBooleanScalar(value);
+}
+
+void TRowBuilder::OnBeginList()
+{
+ ++Depth_;
+ Builder_->OnBeginList();
+}
+
+void TRowBuilder::OnEntity()
+{
+ Row_.Size += sizeof(TNode);
+ Builder_->OnEntity();
+}
+
+void TRowBuilder::OnListItem()
+{
+ if (Depth_ == 0) {
+ SaveResultRow();
+ } else {
+ Builder_->OnListItem();
+ }
+}
+
+void TRowBuilder::OnEndList()
+{
+ --Depth_;
+ Builder_->OnEndList();
+}
+
+void TRowBuilder::OnBeginMap()
+{
+ ++Depth_;
+ Builder_->OnBeginMap();
+}
+
+void TRowBuilder::OnKeyedItem(TStringBuf key)
+{
+ Row_.Size += sizeof(TString) + key.size();
+ Builder_->OnKeyedItem(key);
+}
+
+void TRowBuilder::OnEndMap()
+{
+ --Depth_;
+ Builder_->OnEndMap();
+}
+
+void TRowBuilder::OnBeginAttributes()
+{
+ ++Depth_;
+ Builder_->OnBeginAttributes();
+}
+
+void TRowBuilder::OnEndAttributes()
+{
+ --Depth_;
+ Builder_->OnEndAttributes();
+}
+
+void TRowBuilder::SaveResultRow()
+{
+ if (!Started_) {
+ Started_ = true;
+ } else {
+ *ResultRow_ = std::move(Row_);
+ }
+ Row_.Reset();
+ Builder_.Reset(new TNodeBuilder(&Row_.Node));
+}
+
+void TRowBuilder::Finalize()
+{
+ if (Started_) {
+ *ResultRow_ = std::move(Row_);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TNodeTableReader::TNodeTableReader(::TIntrusivePtr<TRawTableReader> input)
+ : Input_(std::move(input))
+{
+ PrepareParsing();
+ Next();
+}
+
+TNodeTableReader::~TNodeTableReader()
+{
+}
+
+void TNodeTableReader::ParseListFragmentItem() {
+ if (!Parser_->Parse()) {
+ Builder_->Finalize();
+ IsLast_ = true;
+ }
+}
+
+const TNode& TNodeTableReader::GetRow() const
+{
+ CheckValidity();
+ if (!Row_) {
+ ythrow yexception() << "Row is moved";
+ }
+ return Row_->Node;
+}
+
+void TNodeTableReader::MoveRow(TNode* result)
+{
+ CheckValidity();
+ if (!Row_) {
+ ythrow yexception() << "Row is moved";
+ }
+ *result = std::move(Row_->Node);
+ Row_.Clear();
+}
+
+bool TNodeTableReader::IsValid() const
+{
+ return Valid_;
+}
+
+void TNodeTableReader::Next()
+{
+ try {
+ NextImpl();
+ } catch (const std::exception& ex) {
+ YT_LOG_ERROR("TNodeTableReader::Next failed: %v", ex.what());
+ throw;
+ }
+}
+
+void TNodeTableReader::NextImpl()
+{
+ CheckValidity();
+
+ if (RowIndex_) {
+ ++*RowIndex_;
+ }
+
+ // At the begin of stream parser doesn't return a finished row.
+ ParseFirstListFragmentItem();
+
+ while (true) {
+ if (IsLast_) {
+ Finished_ = true;
+ Valid_ = false;
+ break;
+ }
+
+ try {
+ ParseListFragmentItem();
+ } catch (std::exception& ex) {
+ NeedParseFirst_ = true;
+ OnStreamError(std::current_exception(), ex.what());
+ ParseFirstListFragmentItem();
+ continue;
+ }
+
+ Row_ = std::move(*NextRow_);
+ if (!Row_) {
+ throw yexception() << "No row in NextRow_";
+ }
+
+ // We successfully parsed one more row from the stream,
+ // so reset retry count to their initial value.
+ Input_.ResetRetries();
+
+ if (!Row_->Node.IsNull()) {
+ AtStart_ = false;
+ break;
+ }
+
+ for (auto& entry : Row_->Node.GetAttributes().AsMap()) {
+ if (entry.first == "key_switch") {
+ if (!AtStart_) {
+ Valid_ = false;
+ }
+ } else if (entry.first == "table_index") {
+ TableIndex_ = static_cast<ui32>(entry.second.AsInt64());
+ } else if (entry.first == "row_index") {
+ RowIndex_ = static_cast<ui64>(entry.second.AsInt64());
+ } else if (entry.first == "range_index") {
+ RangeIndex_ = static_cast<ui32>(entry.second.AsInt64());
+ } else if (entry.first == "tablet_index") {
+ TabletIndex_ = entry.second.AsInt64();
+ } else if (entry.first == "end_of_stream") {
+ IsEndOfStream_ = true;
+ }
+ }
+
+ if (!Valid_) {
+ break;
+ }
+ }
+}
+
+void TNodeTableReader::ParseFirstListFragmentItem()
+{
+ while (NeedParseFirst_) {
+ try {
+ ParseListFragmentItem();
+ NeedParseFirst_ = false;
+ break;
+ } catch (std::exception& ex) {
+ OnStreamError(std::current_exception(), ex.what());
+ }
+ }
+}
+
+ui32 TNodeTableReader::GetTableIndex() const
+{
+ CheckValidity();
+ return TableIndex_;
+}
+
+ui32 TNodeTableReader::GetRangeIndex() const
+{
+ CheckValidity();
+ return RangeIndex_.GetOrElse(0);
+}
+
+ui64 TNodeTableReader::GetRowIndex() const
+{
+ CheckValidity();
+ return RowIndex_.GetOrElse(0UL);
+}
+
+i64 TNodeTableReader::GetTabletIndex() const
+{
+ CheckValidity();
+ return TabletIndex_.GetOrElse(0L);
+}
+
+void TNodeTableReader::NextKey()
+{
+ while (Valid_) {
+ Next();
+ }
+
+ if (Finished_) {
+ return;
+ }
+
+ Valid_ = true;
+
+ if (RowIndex_) {
+ --*RowIndex_;
+ }
+}
+
+TMaybe<size_t> TNodeTableReader::GetReadByteCount() const
+{
+ return Input_.GetReadByteCount();
+}
+
+bool TNodeTableReader::IsEndOfStream() const
+{
+ return IsEndOfStream_;
+}
+
+bool TNodeTableReader::IsRawReaderExhausted() const
+{
+ return Finished_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void TNodeTableReader::PrepareParsing()
+{
+ NextRow_.Clear();
+ Builder_.Reset(new TRowBuilder(&NextRow_));
+ Parser_.Reset(new ::NYson::TYsonListParser(Builder_.Get(), &Input_));
+}
+
+void TNodeTableReader::OnStreamError(std::exception_ptr exception, TString error)
+{
+ YT_LOG_ERROR("Read error: %v", error);
+ Exception_ = exception;
+ if (Input_.Retry(RangeIndex_, RowIndex_)) {
+ RowIndex_.Clear();
+ RangeIndex_.Clear();
+ PrepareParsing();
+ } else {
+ std::rethrow_exception(Exception_);
+ }
+}
+
+void TNodeTableReader::CheckValidity() const
+{
+ if (!Valid_) {
+ ythrow yexception() << "Iterator is not valid";
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/node_table_reader.h b/yt/cpp/mapreduce/io/node_table_reader.h
new file mode 100644
index 0000000000..4fe839eeb6
--- /dev/null
+++ b/yt/cpp/mapreduce/io/node_table_reader.h
@@ -0,0 +1,91 @@
+#pragma once
+
+#include "counting_raw_reader.h"
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+#include <library/cpp/yson/public.h>
+
+#include <util/stream/input.h>
+#include <util/generic/buffer.h>
+#include <util/system/event.h>
+#include <util/system/thread.h>
+
+#include <atomic>
+
+namespace NYT {
+
+class TRawTableReader;
+class TRowBuilder;
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TRowElement
+{
+ TNode Node;
+ size_t Size = 0;
+
+ void Reset()
+ {
+ Node = TNode();
+ Size = 0;
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TNodeTableReader
+ : public INodeReaderImpl
+{
+public:
+ explicit TNodeTableReader(::TIntrusivePtr<TRawTableReader> input);
+ ~TNodeTableReader() override;
+
+ const TNode& GetRow() const override;
+ void MoveRow(TNode* result) override;
+
+ bool IsValid() const override;
+ void Next() override;
+ ui32 GetTableIndex() const override;
+ ui32 GetRangeIndex() const override;
+ ui64 GetRowIndex() const override;
+ i64 GetTabletIndex() const override;
+ void NextKey() override;
+ TMaybe<size_t> GetReadByteCount() const override;
+ bool IsEndOfStream() const override;
+ bool IsRawReaderExhausted() const override;
+
+private:
+ void NextImpl();
+ void OnStreamError(std::exception_ptr exception, TString error);
+ void CheckValidity() const;
+ void PrepareParsing();
+ void ParseListFragmentItem();
+ void ParseFirstListFragmentItem();
+
+private:
+ NDetail::TCountingRawTableReader Input_;
+
+ bool Valid_ = true;
+ bool Finished_ = false;
+ ui32 TableIndex_ = 0;
+ TMaybe<ui64> RowIndex_;
+ TMaybe<ui32> RangeIndex_;
+ TMaybe<i64> TabletIndex_;
+ bool IsEndOfStream_ = false;
+ bool AtStart_ = true;
+
+ TMaybe<TRowElement> Row_;
+ TMaybe<TRowElement> NextRow_;
+
+ THolder<TRowBuilder> Builder_;
+ THolder<::NYson::TYsonListParser> Parser_;
+
+ std::exception_ptr Exception_;
+ bool NeedParseFirst_ = true;
+ bool IsLast_ = false;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/node_table_writer.cpp b/yt/cpp/mapreduce/io/node_table_writer.cpp
new file mode 100644
index 0000000000..dcb5a0f5b5
--- /dev/null
+++ b/yt/cpp/mapreduce/io/node_table_writer.cpp
@@ -0,0 +1,72 @@
+#include "node_table_writer.h"
+
+#include <yt/cpp/mapreduce/common/node_visitor.h>
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <library/cpp/yson/writer.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TNodeTableWriter::TNodeTableWriter(THolder<IProxyOutput> output, NYson::EYsonFormat format)
+ : Output_(std::move(output))
+{
+ for (size_t i = 0; i < Output_->GetStreamCount(); ++i) {
+ Writers_.push_back(
+ MakeHolder<::NYson::TYsonWriter>(Output_->GetStream(i), format, NYT::NYson::EYsonType::ListFragment));
+ }
+}
+
+TNodeTableWriter::~TNodeTableWriter()
+{ }
+
+size_t TNodeTableWriter::GetTableCount() const
+{
+ return Output_->GetStreamCount();
+}
+
+void TNodeTableWriter::FinishTable(size_t tableIndex) {
+ Output_->GetStream(tableIndex)->Finish();
+}
+
+void TNodeTableWriter::AddRow(const TNode& row, size_t tableIndex)
+{
+ if (row.HasAttributes()) {
+ ythrow TIOException() << "Row cannot have attributes";
+ }
+
+ static const TNode emptyMap = TNode::CreateMap();
+ const TNode* outRow = &emptyMap;
+ if (row.GetType() != TNode::Undefined) {
+ if (!row.IsMap()) {
+ ythrow TIOException() << "Row should be a map node";
+ } else {
+ outRow = &row;
+ }
+ }
+
+ auto* writer = Writers_[tableIndex].Get();
+ writer->OnListItem();
+
+ TNodeVisitor visitor(writer);
+ visitor.Visit(*outRow);
+
+ Output_->OnRowFinished(tableIndex);
+}
+
+void TNodeTableWriter::AddRow(TNode&& row, size_t tableIndex) {
+ AddRow(row, tableIndex);
+}
+
+void TNodeTableWriter::Abort()
+{
+ Output_->Abort();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/node_table_writer.h b/yt/cpp/mapreduce/io/node_table_writer.h
new file mode 100644
index 0000000000..4bf8cb2fe7
--- /dev/null
+++ b/yt/cpp/mapreduce/io/node_table_writer.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/io.h>
+#include <library/cpp/yson/public.h>
+
+namespace NYT {
+
+class IProxyOutput;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TNodeTableWriter
+ : public INodeWriterImpl
+{
+public:
+ explicit TNodeTableWriter(THolder<IProxyOutput> output, ::NYson::EYsonFormat format = ::NYson::EYsonFormat::Binary);
+ ~TNodeTableWriter() override;
+
+ void AddRow(const TNode& row, size_t tableIndex) override;
+ void AddRow(TNode&& row, size_t tableIndex) override;
+
+ size_t GetTableCount() const override;
+ void FinishTable(size_t) override;
+ void Abort() override;
+
+private:
+ THolder<IProxyOutput> Output_;
+ TVector<THolder<::NYson::TYsonWriter>> Writers_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/proto_helpers.cpp b/yt/cpp/mapreduce/io/proto_helpers.cpp
new file mode 100644
index 0000000000..2ffbfd8d89
--- /dev/null
+++ b/yt/cpp/mapreduce/io/proto_helpers.cpp
@@ -0,0 +1,101 @@
+#include "proto_helpers.h"
+
+#include <yt/cpp/mapreduce/interface/io.h>
+#include <yt/cpp/mapreduce/interface/fluent.h>
+
+#include <yt/yt_proto/yt/formats/extension.pb.h>
+
+#include <google/protobuf/descriptor.h>
+#include <google/protobuf/descriptor.pb.h>
+#include <google/protobuf/messagext.h>
+#include <google/protobuf/io/coded_stream.h>
+
+#include <util/stream/str.h>
+#include <util/stream/file.h>
+#include <util/folder/path.h>
+
+namespace NYT {
+
+using ::google::protobuf::Message;
+using ::google::protobuf::Descriptor;
+using ::google::protobuf::DescriptorPool;
+
+using ::google::protobuf::io::CodedInputStream;
+using ::google::protobuf::io::TCopyingInputStreamAdaptor;
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+TVector<const Descriptor*> GetJobDescriptors(const TString& fileName)
+{
+ TVector<const Descriptor*> descriptors;
+ if (!TFsPath(fileName).Exists()) {
+ ythrow TIOException() <<
+ "Cannot load '" << fileName << "' file";
+ }
+
+ TIFStream input(fileName);
+ TString line;
+ while (input.ReadLine(line)) {
+ const auto* pool = DescriptorPool::generated_pool();
+ const auto* descriptor = pool->FindMessageTypeByName(line);
+ descriptors.push_back(descriptor);
+ }
+
+ return descriptors;
+}
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+TVector<const Descriptor*> GetJobInputDescriptors()
+{
+ return GetJobDescriptors("proto_input");
+}
+
+TVector<const Descriptor*> GetJobOutputDescriptors()
+{
+ return GetJobDescriptors("proto_output");
+}
+
+void ValidateProtoDescriptor(
+ const Message& row,
+ size_t tableIndex,
+ const TVector<const Descriptor*>& descriptors,
+ bool isRead)
+{
+ const char* direction = isRead ? "input" : "output";
+
+ if (tableIndex >= descriptors.size()) {
+ ythrow TIOException() <<
+ "Table index " << tableIndex <<
+ " is out of range [0, " << descriptors.size() <<
+ ") in " << direction;
+ }
+
+ if (row.GetDescriptor() != descriptors[tableIndex]) {
+ ythrow TIOException() <<
+ "Invalid row of type " << row.GetDescriptor()->full_name() <<
+ " at index " << tableIndex <<
+ ", row of type " << descriptors[tableIndex]->full_name() <<
+ " expected in " << direction;
+ }
+}
+
+void ParseFromArcadiaStream(IInputStream* stream, Message& row, ui32 length)
+{
+ TLengthLimitedInput input(stream, length);
+ TCopyingInputStreamAdaptor adaptor(&input);
+ CodedInputStream codedStream(&adaptor);
+ codedStream.SetTotalBytesLimit(length + 1);
+ bool parsedOk = row.ParseFromCodedStream(&codedStream);
+ Y_ENSURE(parsedOk, "Failed to parse protobuf message");
+
+ Y_ENSURE(input.Left() == 0);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/proto_helpers.h b/yt/cpp/mapreduce/io/proto_helpers.h
new file mode 100644
index 0000000000..9d1ec0027c
--- /dev/null
+++ b/yt/cpp/mapreduce/io/proto_helpers.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/node.h>
+
+namespace google {
+namespace protobuf {
+
+class Message;
+class Descriptor;
+
+}
+}
+
+class IInputStream;
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TVector<const ::google::protobuf::Descriptor*> GetJobInputDescriptors();
+TVector<const ::google::protobuf::Descriptor*> GetJobOutputDescriptors();
+
+void ValidateProtoDescriptor(
+ const ::google::protobuf::Message& row,
+ size_t tableIndex,
+ const TVector<const ::google::protobuf::Descriptor*>& descriptors,
+ bool isRead);
+
+void ParseFromArcadiaStream(
+ IInputStream* stream,
+ ::google::protobuf::Message& row,
+ ui32 size);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/proto_table_reader.cpp b/yt/cpp/mapreduce/io/proto_table_reader.cpp
new file mode 100644
index 0000000000..28a4bc8719
--- /dev/null
+++ b/yt/cpp/mapreduce/io/proto_table_reader.cpp
@@ -0,0 +1,305 @@
+#include "proto_table_reader.h"
+
+#include "node_table_reader.h"
+
+#include "proto_helpers.h"
+
+#include <yt/yt_proto/yt/formats/extension.pb.h>
+
+#include <util/string/escape.h>
+#include <util/string/printf.h>
+
+namespace NYT {
+
+using ::google::protobuf::Descriptor;
+using ::google::protobuf::FieldDescriptor;
+using ::google::protobuf::EnumValueDescriptor;
+
+const TString& GetFieldColumnName(const FieldDescriptor* fieldDesc) {
+ const auto& columnName = fieldDesc->options().GetExtension(column_name);
+ if (!columnName.empty()) {
+ return columnName;
+ }
+ const auto& keyColumnName = fieldDesc->options().GetExtension(key_column_name);
+ if (!keyColumnName.empty()) {
+ return keyColumnName;
+ }
+ return fieldDesc->name();
+}
+
+void ReadMessageFromNode(const TNode& node, Message* row)
+{
+ auto* descriptor = row->GetDescriptor();
+ auto* reflection = row->GetReflection();
+
+ int count = descriptor->field_count();
+ for (int i = 0; i < count; ++i) {
+ auto* fieldDesc = descriptor->field(i);
+
+ const auto& columnName = GetFieldColumnName(fieldDesc);
+
+ const auto& nodeMap = node.AsMap();
+ auto it = nodeMap.find(columnName);
+ if (it == nodeMap.end()) {
+ continue; // no such column
+ }
+ auto actualType = it->second.GetType();
+ if (actualType == TNode::Null) {
+ continue; // null field
+ }
+
+ auto checkType = [&columnName] (TNode::EType expected, TNode::EType actual) {
+ if (expected != actual) {
+ ythrow TNode::TTypeError() << "expected node type " << expected
+ << ", actual " << actual << " for node " << columnName.data();
+ }
+ };
+
+ switch (fieldDesc->type()) {
+ case FieldDescriptor::TYPE_STRING:
+ case FieldDescriptor::TYPE_BYTES:
+ checkType(TNode::String, actualType);
+ reflection->SetString(row, fieldDesc, it->second.AsString());
+ break;
+ case FieldDescriptor::TYPE_INT64:
+ case FieldDescriptor::TYPE_SINT64:
+ case FieldDescriptor::TYPE_SFIXED64:
+ checkType(TNode::Int64, actualType);
+ reflection->SetInt64(row, fieldDesc, it->second.AsInt64());
+ break;
+ case FieldDescriptor::TYPE_INT32:
+ case FieldDescriptor::TYPE_SINT32:
+ case FieldDescriptor::TYPE_SFIXED32:
+ checkType(TNode::Int64, actualType);
+ reflection->SetInt32(row, fieldDesc, it->second.AsInt64());
+ break;
+ case FieldDescriptor::TYPE_UINT64:
+ case FieldDescriptor::TYPE_FIXED64:
+ checkType(TNode::Uint64, actualType);
+ reflection->SetUInt64(row, fieldDesc, it->second.AsUint64());
+ break;
+ case FieldDescriptor::TYPE_UINT32:
+ case FieldDescriptor::TYPE_FIXED32:
+ checkType(TNode::Uint64, actualType);
+ reflection->SetUInt32(row, fieldDesc, it->second.AsUint64());
+ break;
+ case FieldDescriptor::TYPE_DOUBLE:
+ checkType(TNode::Double, actualType);
+ reflection->SetDouble(row, fieldDesc, it->second.AsDouble());
+ break;
+ case FieldDescriptor::TYPE_FLOAT:
+ checkType(TNode::Double, actualType);
+ reflection->SetFloat(row, fieldDesc, it->second.AsDouble());
+ break;
+ case FieldDescriptor::TYPE_BOOL:
+ checkType(TNode::Bool, actualType);
+ reflection->SetBool(row, fieldDesc, it->second.AsBool());
+ break;
+ case FieldDescriptor::TYPE_ENUM: {
+ TNode::EType columnType = TNode::String;
+ for (const auto& flag : fieldDesc->options().GetRepeatedExtension(flags)) {
+ if (flag == EWrapperFieldFlag::ENUM_INT) {
+ columnType = TNode::Int64;
+ break;
+ }
+ }
+ checkType(columnType, actualType);
+
+ const EnumValueDescriptor* valueDesc = nullptr;
+ TString stringValue;
+ if (columnType == TNode::String) {
+ const auto& value = it->second.AsString();
+ valueDesc = fieldDesc->enum_type()->FindValueByName(value);
+ stringValue = value;
+ } else if (columnType == TNode::Int64) {
+ const auto& value = it->second.AsInt64();
+ valueDesc = fieldDesc->enum_type()->FindValueByNumber(value);
+ stringValue = ToString(value);
+ } else {
+ Y_FAIL();
+ }
+
+ if (valueDesc == nullptr) {
+ ythrow yexception() << "Failed to parse value '" << EscapeC(stringValue) << "' as " << fieldDesc->enum_type()->full_name();
+ }
+
+ reflection->SetEnum(row, fieldDesc, valueDesc);
+
+ break;
+ }
+ case FieldDescriptor::TYPE_MESSAGE: {
+ checkType(TNode::String, actualType);
+ Message* message = reflection->MutableMessage(row, fieldDesc);
+ if (!message->ParseFromArray(it->second.AsString().data(), it->second.AsString().size())) {
+ ythrow yexception() << "Failed to parse protobuf message";
+ }
+ break;
+ }
+ default:
+ ythrow yexception() << "Incorrect protobuf type";
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TProtoTableReader::TProtoTableReader(
+ ::TIntrusivePtr<TRawTableReader> input,
+ TVector<const Descriptor*>&& descriptors)
+ : NodeReader_(new TNodeTableReader(std::move(input)))
+ , Descriptors_(std::move(descriptors))
+{ }
+
+TProtoTableReader::~TProtoTableReader()
+{ }
+
+void TProtoTableReader::ReadRow(Message* row)
+{
+ const auto& node = NodeReader_->GetRow();
+ ReadMessageFromNode(node, row);
+}
+
+bool TProtoTableReader::IsValid() const
+{
+ return NodeReader_->IsValid();
+}
+
+void TProtoTableReader::Next()
+{
+ NodeReader_->Next();
+}
+
+ui32 TProtoTableReader::GetTableIndex() const
+{
+ return NodeReader_->GetTableIndex();
+}
+
+ui32 TProtoTableReader::GetRangeIndex() const
+{
+ return NodeReader_->GetRangeIndex();
+}
+
+ui64 TProtoTableReader::GetRowIndex() const
+{
+ return NodeReader_->GetRowIndex();
+}
+
+void TProtoTableReader::NextKey()
+{
+ NodeReader_->NextKey();
+}
+
+TMaybe<size_t> TProtoTableReader::GetReadByteCount() const
+{
+ return NodeReader_->GetReadByteCount();
+}
+
+bool TProtoTableReader::IsEndOfStream() const
+{
+ return NodeReader_->IsEndOfStream();
+}
+
+bool TProtoTableReader::IsRawReaderExhausted() const
+{
+ return NodeReader_->IsRawReaderExhausted();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TLenvalProtoTableReader::TLenvalProtoTableReader(
+ ::TIntrusivePtr<TRawTableReader> input,
+ TVector<const Descriptor*>&& descriptors)
+ : TLenvalTableReader(std::move(input))
+ , Descriptors_(std::move(descriptors))
+{ }
+
+TLenvalProtoTableReader::~TLenvalProtoTableReader()
+{ }
+
+void TLenvalProtoTableReader::ReadRow(Message* row)
+{
+ ValidateProtoDescriptor(*row, GetTableIndex(), Descriptors_, true);
+
+ while (true) {
+ try {
+ ParseFromArcadiaStream(&Input_, *row, Length_);
+ RowTaken_ = true;
+
+ // We successfully parsed one more row from the stream,
+ // so reset retry count to their initial value.
+ Input_.ResetRetries();
+
+ break;
+ } catch (const std::exception& ) {
+ if (!TLenvalTableReader::Retry()) {
+ throw;
+ }
+ }
+ }
+}
+
+bool TLenvalProtoTableReader::IsValid() const
+{
+ return TLenvalTableReader::IsValid();
+}
+
+void TLenvalProtoTableReader::Next()
+{
+ TLenvalTableReader::Next();
+}
+
+ui32 TLenvalProtoTableReader::GetTableIndex() const
+{
+ return TLenvalTableReader::GetTableIndex();
+}
+
+ui32 TLenvalProtoTableReader::GetRangeIndex() const
+{
+ return TLenvalTableReader::GetRangeIndex();
+}
+
+ui64 TLenvalProtoTableReader::GetRowIndex() const
+{
+ return TLenvalTableReader::GetRowIndex();
+}
+
+void TLenvalProtoTableReader::NextKey()
+{
+ TLenvalTableReader::NextKey();
+}
+
+TMaybe<size_t> TLenvalProtoTableReader::GetReadByteCount() const
+{
+ return TLenvalTableReader::GetReadByteCount();
+}
+
+bool TLenvalProtoTableReader::IsEndOfStream() const
+{
+ return TLenvalTableReader::IsEndOfStream();
+}
+
+bool TLenvalProtoTableReader::IsRawReaderExhausted() const
+{
+ return TLenvalTableReader::IsRawReaderExhausted();
+}
+
+void TLenvalProtoTableReader::SkipRow()
+{
+ while (true) {
+ try {
+ size_t skipped = Input_.Skip(Length_);
+ if (skipped != Length_) {
+ ythrow yexception() << "Premature end of stream";
+ }
+ break;
+ } catch (const std::exception& ) {
+ if (!TLenvalTableReader::Retry()) {
+ throw;
+ }
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/proto_table_reader.h b/yt/cpp/mapreduce/io/proto_table_reader.h
new file mode 100644
index 0000000000..05a528b9c6
--- /dev/null
+++ b/yt/cpp/mapreduce/io/proto_table_reader.h
@@ -0,0 +1,76 @@
+#pragma once
+
+#include "lenval_table_reader.h"
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+namespace NYT {
+
+class TRawTableReader;
+class TNodeTableReader;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TProtoTableReader
+ : public IProtoReaderImpl
+{
+public:
+ explicit TProtoTableReader(
+ ::TIntrusivePtr<TRawTableReader> input,
+ TVector<const ::google::protobuf::Descriptor*>&& descriptors);
+ ~TProtoTableReader() override;
+
+ void ReadRow(Message* row) override;
+
+ bool IsValid() const override;
+ void Next() override;
+ ui32 GetTableIndex() const override;
+ ui32 GetRangeIndex() const override;
+ ui64 GetRowIndex() const override;
+ void NextKey() override;
+ TMaybe<size_t> GetReadByteCount() const override;
+ bool IsEndOfStream() const override;
+ bool IsRawReaderExhausted() const override;
+
+private:
+ THolder<TNodeTableReader> NodeReader_;
+ TVector<const ::google::protobuf::Descriptor*> Descriptors_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TLenvalProtoTableReader
+ : public IProtoReaderImpl
+ , public TLenvalTableReader
+{
+public:
+ explicit TLenvalProtoTableReader(
+ ::TIntrusivePtr<TRawTableReader> input,
+ TVector<const ::google::protobuf::Descriptor*>&& descriptors);
+ ~TLenvalProtoTableReader() override;
+
+ void ReadRow(Message* row) override;
+
+ bool IsValid() const override;
+ void Next() override;
+ ui32 GetTableIndex() const override;
+ ui32 GetRangeIndex() const override;
+ ui64 GetRowIndex() const override;
+ void NextKey() override;
+ TMaybe<size_t> GetReadByteCount() const override;
+ bool IsEndOfStream() const override;
+ bool IsRawReaderExhausted() const override;
+
+protected:
+ void SkipRow() override;
+
+private:
+ TVector<const ::google::protobuf::Descriptor*> Descriptors_;
+};
+
+// Sometime useful outside mapreduce/yt
+void ReadMessageFromNode(const TNode& node, Message* row);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/proto_table_writer.cpp b/yt/cpp/mapreduce/io/proto_table_writer.cpp
new file mode 100644
index 0000000000..1ce7811625
--- /dev/null
+++ b/yt/cpp/mapreduce/io/proto_table_writer.cpp
@@ -0,0 +1,184 @@
+#include "proto_table_writer.h"
+
+#include "node_table_writer.h"
+#include "proto_helpers.h"
+
+#include <yt/cpp/mapreduce/common/node_builder.h>
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+#include <yt/yt_proto/yt/formats/extension.pb.h>
+
+#include <google/protobuf/unknown_field_set.h>
+
+namespace NYT {
+
+using ::google::protobuf::Descriptor;
+using ::google::protobuf::FieldDescriptor;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TNode MakeNodeFromMessage(const Message& row)
+{
+ TNode node;
+ TNodeBuilder builder(&node);
+ builder.OnBeginMap();
+
+ auto* descriptor = row.GetDescriptor();
+ auto* reflection = row.GetReflection();
+
+ int count = descriptor->field_count();
+ for (int i = 0; i < count; ++i) {
+ auto* fieldDesc = descriptor->field(i);
+ if (fieldDesc->is_repeated()) {
+ Y_ENSURE(reflection->FieldSize(row, fieldDesc) == 0, "Storing repeated protobuf fields is not supported yet");
+ continue;
+ } else if (!reflection->HasField(row, fieldDesc)) {
+ continue;
+ }
+
+ TString columnName = fieldDesc->options().GetExtension(column_name);
+ if (columnName.empty()) {
+ const auto& keyColumnName = fieldDesc->options().GetExtension(key_column_name);
+ columnName = keyColumnName.empty() ? fieldDesc->name() : keyColumnName;
+ }
+
+ builder.OnKeyedItem(columnName);
+
+ switch (fieldDesc->type()) {
+ case FieldDescriptor::TYPE_STRING:
+ case FieldDescriptor::TYPE_BYTES:
+ builder.OnStringScalar(reflection->GetString(row, fieldDesc));
+ break;
+ case FieldDescriptor::TYPE_INT64:
+ case FieldDescriptor::TYPE_SINT64:
+ case FieldDescriptor::TYPE_SFIXED64:
+ builder.OnInt64Scalar(reflection->GetInt64(row, fieldDesc));
+ break;
+ case FieldDescriptor::TYPE_INT32:
+ case FieldDescriptor::TYPE_SINT32:
+ case FieldDescriptor::TYPE_SFIXED32:
+ builder.OnInt64Scalar(reflection->GetInt32(row, fieldDesc));
+ break;
+ case FieldDescriptor::TYPE_UINT64:
+ case FieldDescriptor::TYPE_FIXED64:
+ builder.OnUint64Scalar(reflection->GetUInt64(row, fieldDesc));
+ break;
+ case FieldDescriptor::TYPE_UINT32:
+ case FieldDescriptor::TYPE_FIXED32:
+ builder.OnUint64Scalar(reflection->GetUInt32(row, fieldDesc));
+ break;
+ case FieldDescriptor::TYPE_DOUBLE:
+ builder.OnDoubleScalar(reflection->GetDouble(row, fieldDesc));
+ break;
+ case FieldDescriptor::TYPE_FLOAT:
+ builder.OnDoubleScalar(reflection->GetFloat(row, fieldDesc));
+ break;
+ case FieldDescriptor::TYPE_BOOL:
+ builder.OnBooleanScalar(reflection->GetBool(row, fieldDesc));
+ break;
+ case FieldDescriptor::TYPE_ENUM:
+ builder.OnStringScalar(reflection->GetEnum(row, fieldDesc)->name());
+ break;
+ case FieldDescriptor::TYPE_MESSAGE:
+ builder.OnStringScalar(reflection->GetMessage(row, fieldDesc).SerializeAsString());
+ break;
+ default:
+ ythrow yexception() << "Invalid field type for column: " << columnName;
+ break;
+ }
+ }
+
+ builder.OnEndMap();
+ return node;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TProtoTableWriter::TProtoTableWriter(
+ THolder<IProxyOutput> output,
+ TVector<const Descriptor*>&& descriptors)
+ : NodeWriter_(new TNodeTableWriter(std::move(output)))
+ , Descriptors_(std::move(descriptors))
+{ }
+
+TProtoTableWriter::~TProtoTableWriter()
+{ }
+
+size_t TProtoTableWriter::GetTableCount() const
+{
+ return NodeWriter_->GetTableCount();
+}
+
+void TProtoTableWriter::FinishTable(size_t tableIndex)
+{
+ NodeWriter_->FinishTable(tableIndex);
+}
+
+void TProtoTableWriter::AddRow(const Message& row, size_t tableIndex)
+{
+ NodeWriter_->AddRow(MakeNodeFromMessage(row), tableIndex);
+}
+
+void TProtoTableWriter::AddRow(Message&& row, size_t tableIndex)
+{
+ TProtoTableWriter::AddRow(row, tableIndex);
+}
+
+
+void TProtoTableWriter::Abort()
+{
+ NodeWriter_->Abort();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TLenvalProtoTableWriter::TLenvalProtoTableWriter(
+ THolder<IProxyOutput> output,
+ TVector<const Descriptor*>&& descriptors)
+ : Output_(std::move(output))
+ , Descriptors_(std::move(descriptors))
+{ }
+
+TLenvalProtoTableWriter::~TLenvalProtoTableWriter()
+{ }
+
+size_t TLenvalProtoTableWriter::GetTableCount() const
+{
+ return Output_->GetStreamCount();
+}
+
+void TLenvalProtoTableWriter::FinishTable(size_t tableIndex)
+{
+ Output_->GetStream(tableIndex)->Finish();
+}
+
+void TLenvalProtoTableWriter::AddRow(const Message& row, size_t tableIndex)
+{
+ ValidateProtoDescriptor(row, tableIndex, Descriptors_, false);
+
+ Y_VERIFY(row.GetReflection()->GetUnknownFields(row).empty(),
+ "Message has unknown fields. This probably means bug in client code.\n"
+ "Message: %s", row.DebugString().data());
+
+ auto* stream = Output_->GetStream(tableIndex);
+ i32 size = row.ByteSize();
+ stream->Write(&size, sizeof(size));
+ bool serializedOk = row.SerializeToArcadiaStream(stream);
+ Y_ENSURE(serializedOk, "Failed to serialize protobuf message");
+ Output_->OnRowFinished(tableIndex);
+}
+
+void TLenvalProtoTableWriter::AddRow(Message&& row, size_t tableIndex)
+{
+ TLenvalProtoTableWriter::AddRow(row, tableIndex);
+}
+
+void TLenvalProtoTableWriter::Abort()
+{
+ Output_->Abort();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/proto_table_writer.h b/yt/cpp/mapreduce/io/proto_table_writer.h
new file mode 100644
index 0000000000..a6df69e6ae
--- /dev/null
+++ b/yt/cpp/mapreduce/io/proto_table_writer.h
@@ -0,0 +1,61 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+namespace NYT {
+
+class IProxyOutput;
+class TNodeTableWriter;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TProtoTableWriter
+ : public IProtoWriterImpl
+{
+public:
+ TProtoTableWriter(
+ THolder<IProxyOutput> output,
+ TVector<const ::google::protobuf::Descriptor*>&& descriptors);
+ ~TProtoTableWriter() override;
+
+ void AddRow(const Message& row, size_t tableIndex) override;
+ void AddRow(Message&& row, size_t tableIndex) override;
+
+ size_t GetTableCount() const override;
+ void FinishTable(size_t) override;
+ void Abort() override;
+
+private:
+ THolder<TNodeTableWriter> NodeWriter_;
+ TVector<const ::google::protobuf::Descriptor*> Descriptors_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TLenvalProtoTableWriter
+ : public IProtoWriterImpl
+{
+public:
+ TLenvalProtoTableWriter(
+ THolder<IProxyOutput> output,
+ TVector<const ::google::protobuf::Descriptor*>&& descriptors);
+ ~TLenvalProtoTableWriter() override;
+
+ void AddRow(const Message& row, size_t tableIndex) override;
+ void AddRow(Message&& row, size_t tableIndex) override;
+
+ size_t GetTableCount() const override;
+ void FinishTable(size_t) override;
+ void Abort() override;
+
+private:
+ THolder<IProxyOutput> Output_;
+ TVector<const ::google::protobuf::Descriptor*> Descriptors_;
+};
+
+// Sometime useful outside mapreduce/yt
+TNode MakeNodeFromMessage(const ::google::protobuf::Message& row);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/skiff_row_table_reader.cpp b/yt/cpp/mapreduce/io/skiff_row_table_reader.cpp
new file mode 100644
index 0000000000..8da3b2da31
--- /dev/null
+++ b/yt/cpp/mapreduce/io/skiff_row_table_reader.cpp
@@ -0,0 +1,232 @@
+#include "skiff_row_table_reader.h"
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <yt/cpp/mapreduce/interface/skiff_row.h>
+
+#include <library/cpp/skiff/skiff.h>
+
+#include <library/cpp/yt/logging/logger.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TSkiffRowTableReader::TSkiffRowTableReader(
+ ::TIntrusivePtr<TRawTableReader> input,
+ const NSkiff::TSkiffSchemaPtr& schema,
+ TVector<ISkiffRowSkipperPtr>&& skippers,
+ NDetail::TCreateSkiffSchemaOptions&& options)
+ : Input_(std::move(input))
+ , BufferedInput_(&Input_)
+ , Parser_({schema, &BufferedInput_})
+ , Skippers_(std::move(skippers))
+ , Options_(std::move(options))
+{
+ Next();
+}
+
+TSkiffRowTableReader::~TSkiffRowTableReader()
+{ }
+
+bool TSkiffRowTableReader::Retry()
+{
+ if (PrepareRetry()) {
+ RowTaken_ = true;
+ Next();
+ return true;
+ }
+ return false;
+}
+
+bool TSkiffRowTableReader::PrepareRetry()
+{
+ if (Input_.Retry(RangeIndex_, RowIndex_)) {
+ RowIndex_.Clear();
+ RangeIndex_.Clear();
+ BufferedInput_ = TBufferedInput(&Input_);
+ Parser_.emplace(&BufferedInput_);
+ return true;
+ }
+ return false;
+}
+
+void TSkiffRowTableReader::ReadRow(const ISkiffRowParserPtr& parser)
+{
+ while (true) {
+ try {
+ parser->Parse(&Parser_.value());
+ RowTaken_ = true;
+
+ // We successfully parsed one more row from the stream,
+ // so reset retry count to their initial value.
+ Input_.ResetRetries();
+
+ break;
+ } catch (const std::exception& ex) {
+ YT_LOG_ERROR("Read error during parsing: %v", ex.what());
+
+ if (!Retry()) {
+ throw;
+ }
+ }
+ }
+}
+
+bool TSkiffRowTableReader::IsValid() const
+{
+ return Valid_;
+}
+
+void TSkiffRowTableReader::SkipRow()
+{
+ CheckValidity();
+ while (true) {
+ try {
+ Skippers_[TableIndex_]->SkipRow(&Parser_.value());
+
+ break;
+ } catch (const std::exception& ex) {
+ YT_LOG_ERROR("Read error during skipping row: %v", ex.what());
+
+ if (!Retry()) {
+ throw;
+ }
+ }
+ }
+}
+
+void TSkiffRowTableReader::CheckValidity() const {
+ if (!IsValid()) {
+ ythrow yexception() << "Iterator is not valid";
+ }
+}
+
+void TSkiffRowTableReader::Next()
+{
+ if (!RowTaken_) {
+ SkipRow();
+ }
+
+ CheckValidity();
+
+ if (Y_UNLIKELY(Finished_ || !Parser_->HasMoreData())) {
+ Finished_ = true;
+ Valid_ = false;
+ return;
+ }
+
+ if (AfterKeySwitch_) {
+ AfterKeySwitch_ = false;
+ return;
+ }
+
+ if (RowIndex_) {
+ ++*RowIndex_;
+ }
+
+ while (true) {
+ try {
+ auto tag = Parser_->ParseVariant16Tag();
+ if (tag == NSkiff::EndOfSequenceTag<ui16>()) {
+ IsEndOfStream_ = true;
+ break;
+ } else {
+ TableIndex_ = tag;
+ }
+
+ if (TableIndex_ >= Skippers_.size()) {
+ ythrow TIOException() <<
+ "Table index " << TableIndex_ <<
+ " is out of range [0, " << Skippers_.size() <<
+ ") in read";
+ }
+
+ if (Options_.HasKeySwitch_) {
+ auto keySwitch = Parser_->ParseBoolean();
+ if (keySwitch) {
+ AfterKeySwitch_ = true;
+ Valid_ = false;
+ }
+ }
+
+ auto tagRowIndex = Parser_->ParseVariant8Tag();
+ if (tagRowIndex == 1) {
+ RowIndex_ = Parser_->ParseInt64();
+ } else {
+ Y_ENSURE(tagRowIndex == 0, "Tag for row_index was expected to be 0 or 1, got " << tagRowIndex);
+ }
+
+ if (Options_.HasRangeIndex_) {
+ auto tagRangeIndex = Parser_->ParseVariant8Tag();
+ if (tagRangeIndex == 1) {
+ RangeIndex_ = Parser_->ParseInt64();
+ } else {
+ Y_ENSURE(tagRangeIndex == 0, "Tag for range_index was expected to be 0 or 1, got " << tagRangeIndex);
+ }
+ }
+
+ break;
+ } catch (const std::exception& ex) {
+ YT_LOG_ERROR("Read error: %v", ex.what());
+
+ if (!PrepareRetry()) {
+ throw;
+ }
+ }
+ }
+
+ RowTaken_ = false;
+}
+
+ui32 TSkiffRowTableReader::GetTableIndex() const
+{
+ CheckValidity();
+ return TableIndex_;
+}
+
+ui32 TSkiffRowTableReader::GetRangeIndex() const
+{
+ CheckValidity();
+ return RangeIndex_.GetOrElse(0);
+}
+
+ui64 TSkiffRowTableReader::GetRowIndex() const
+{
+ CheckValidity();
+ return RowIndex_.GetOrElse(0ULL);
+}
+
+void TSkiffRowTableReader::NextKey() {
+ while (Valid_) {
+ Next();
+ }
+
+ if (Finished_) {
+ return;
+ }
+
+ Valid_ = true;
+
+ if (RowIndex_) {
+ --*RowIndex_;
+ }
+
+ RowTaken_ = true;
+}
+
+TMaybe<size_t> TSkiffRowTableReader::GetReadByteCount() const {
+ return Input_.GetReadByteCount();
+}
+
+bool TSkiffRowTableReader::IsEndOfStream() const {
+ return IsEndOfStream_;
+}
+
+bool TSkiffRowTableReader::IsRawReaderExhausted() const {
+ return Finished_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/skiff_row_table_reader.h b/yt/cpp/mapreduce/io/skiff_row_table_reader.h
new file mode 100644
index 0000000000..368968266c
--- /dev/null
+++ b/yt/cpp/mapreduce/io/skiff_row_table_reader.h
@@ -0,0 +1,67 @@
+#pragma once
+
+#include "counting_raw_reader.h"
+
+#include <yt/cpp/mapreduce/client/skiff.h>
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+#include <yt/cpp/mapreduce/skiff/unchecked_parser.h>
+
+#include <util/stream/buffered.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TSkiffRowTableReader
+ : public ISkiffRowReaderImpl
+{
+public:
+ explicit TSkiffRowTableReader(
+ ::TIntrusivePtr<TRawTableReader> input,
+ const NSkiff::TSkiffSchemaPtr& schema,
+ TVector<ISkiffRowSkipperPtr>&& skippers,
+ NDetail::TCreateSkiffSchemaOptions&& options);
+
+ ~TSkiffRowTableReader() override;
+
+ void ReadRow(const ISkiffRowParserPtr& parser) override;
+
+ bool IsValid() const override;
+ void Next() override;
+ ui32 GetTableIndex() const override;
+ ui32 GetRangeIndex() const override;
+ ui64 GetRowIndex() const override;
+ void NextKey() override;
+ TMaybe<size_t> GetReadByteCount() const override;
+ bool IsEndOfStream() const override;
+ bool IsRawReaderExhausted() const override;
+
+private:
+ bool Retry();
+ void SkipRow();
+ void CheckValidity() const;
+ bool PrepareRetry();
+
+private:
+ NDetail::TCountingRawTableReader Input_;
+ TBufferedInput BufferedInput_;
+ std::optional<NSkiff::TCheckedInDebugSkiffParser> Parser_;
+ TVector<ISkiffRowSkipperPtr> Skippers_;
+ NDetail::TCreateSkiffSchemaOptions Options_;
+
+ bool RowTaken_ = true;
+ bool Valid_ = true;
+ bool Finished_ = false;
+ bool AfterKeySwitch_ = false;
+ bool IsEndOfStream_ = false;
+
+ TMaybe<ui64> RowIndex_;
+ TMaybe<ui32> RangeIndex_;
+ ui32 TableIndex_ = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/skiff_table_reader.cpp b/yt/cpp/mapreduce/io/skiff_table_reader.cpp
new file mode 100644
index 0000000000..51c20609f0
--- /dev/null
+++ b/yt/cpp/mapreduce/io/skiff_table_reader.cpp
@@ -0,0 +1,293 @@
+#include "skiff_table_reader.h"
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+#include <yt/cpp/mapreduce/skiff/wire_type.h>
+#include <yt/cpp/mapreduce/skiff/skiff_schema.h>
+
+#include <util/string/cast.h>
+
+namespace NYT {
+namespace NDetail {
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+enum EColumnType : i8
+{
+ Dense,
+ KeySwitch,
+ RangeIndex,
+ RowIndex
+};
+
+struct TSkiffColumnSchema
+{
+ EColumnType Type;
+ bool Required;
+ NSkiff::EWireType WireType;
+ TString Name;
+
+ TSkiffColumnSchema(EColumnType type, bool required, NSkiff::EWireType wireType, const TString& name)
+ : Type(type)
+ , Required(required)
+ , WireType(wireType)
+ , Name(name)
+ { }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+
+struct TSkiffTableReader::TSkiffTableSchema
+{
+ TVector<TSkiffColumnSchema> Columns;
+};
+
+TSkiffTableReader::TSkiffTableReader(
+ ::TIntrusivePtr<TRawTableReader> input,
+ const NSkiff::TSkiffSchemaPtr& schema)
+ : Input_(std::move(input))
+ , BufferedInput_(&Input_)
+ , Parser_(&BufferedInput_)
+ , Schemas_(CreateSkiffTableSchemas(schema))
+{
+ Next();
+}
+
+TSkiffTableReader::~TSkiffTableReader() = default;
+
+const TNode& TSkiffTableReader::GetRow() const
+{
+ EnsureValidity();
+ Y_ENSURE(!Row_.IsUndefined(), "Row is moved");
+ return Row_;
+}
+
+void TSkiffTableReader::MoveRow(TNode* result)
+{
+ EnsureValidity();
+ Y_ENSURE(!Row_.IsUndefined(), "Row is moved");
+ *result = std::move(Row_);
+ Row_ = TNode();
+}
+
+bool TSkiffTableReader::IsValid() const
+{
+ return Valid_;
+}
+
+void TSkiffTableReader::Next()
+{
+ EnsureValidity();
+ if (Y_UNLIKELY(Finished_ || !Parser_->HasMoreData())) {
+ Finished_ = true;
+ Valid_ = false;
+ return;
+ }
+
+ if (AfterKeySwitch_) {
+ AfterKeySwitch_ = false;
+ return;
+ }
+
+ while (true) {
+ try {
+ ReadRow();
+ break;
+ } catch (const std::exception& exception) {
+ YT_LOG_ERROR("Read error: %v", exception.what());
+ if (!Input_.Retry(RangeIndex_, RowIndex_)) {
+ throw;
+ }
+ BufferedInput_ = TBufferedInput(&Input_);
+ Parser_.emplace(NSkiff::TUncheckedSkiffParser(&BufferedInput_));
+ RangeIndex_.Clear();
+ RowIndex_.Clear();
+ }
+ }
+}
+
+ui32 TSkiffTableReader::GetTableIndex() const
+{
+ EnsureValidity();
+ return TableIndex_;
+}
+
+ui32 TSkiffTableReader::GetRangeIndex() const
+{
+ EnsureValidity();
+ return RangeIndex_.GetOrElse(0);
+}
+
+ui64 TSkiffTableReader::GetRowIndex() const
+{
+ EnsureValidity();
+ return RowIndex_.GetOrElse(0ULL);
+}
+
+void TSkiffTableReader::NextKey()
+{
+ while (Valid_) {
+ Next();
+ }
+
+ if (Finished_) {
+ return;
+ }
+
+ Valid_ = true;
+}
+
+TMaybe<size_t> TSkiffTableReader::GetReadByteCount() const
+{
+ return Input_.GetReadByteCount();
+}
+
+bool TSkiffTableReader::IsRawReaderExhausted() const
+{
+ return Finished_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TVector<TSkiffTableReader::TSkiffTableSchema> TSkiffTableReader::CreateSkiffTableSchemas(
+ const NSkiff::TSkiffSchemaPtr& schema)
+{
+ using NSkiff::EWireType;
+
+ constexpr auto keySwitchColumnName = "$key_switch";
+ constexpr auto rangeIndexColumnName = "$range_index";
+ constexpr auto rowIndexColumnName = "$row_index";
+
+ static const THashMap<TString, TSkiffColumnSchema> specialColumns = {
+ {keySwitchColumnName, {EColumnType::KeySwitch, true, EWireType::Boolean, keySwitchColumnName}},
+ {rangeIndexColumnName, {EColumnType::RangeIndex, false, EWireType::Int64, rangeIndexColumnName}},
+ {rowIndexColumnName, {EColumnType::RowIndex, false, EWireType::Int64, rowIndexColumnName}},
+ };
+
+ Y_ENSURE(schema->GetWireType() == EWireType::Variant16,
+ "Expected 'variant16' wire type for schema, got '" << schema->GetWireType() << "'");
+ TVector<TSkiffTableSchema> result;
+ for (const auto& tableSchema : schema->GetChildren()) {
+ Y_ENSURE(tableSchema->GetWireType() == EWireType::Tuple,
+ "Expected 'tuple' wire type for table schema, got '" << tableSchema->GetWireType() << "'");
+ TVector<TSkiffColumnSchema> columns;
+ for (const auto& columnSchema : tableSchema->GetChildren()) {
+ if (columnSchema->GetName().StartsWith("$")) {
+ auto iter = specialColumns.find(columnSchema->GetName());
+ Y_ENSURE(iter != specialColumns.end(), "Unknown special column: " << columnSchema->GetName());
+ columns.push_back(iter->second);
+ } else {
+ auto wireType = columnSchema->GetWireType();
+ bool required = true;
+ if (wireType == EWireType::Variant8) {
+ const auto& children = columnSchema->GetChildren();
+ Y_ENSURE(
+ children.size() == 2 && children[0]->GetWireType() == EWireType::Nothing &&
+ NSkiff::IsSimpleType(children[1]->GetWireType()),
+ "Expected schema of form 'variant8<nothing, simple-type>', got "
+ << NSkiff::GetShortDebugString(columnSchema));
+ wireType = children[1]->GetWireType();
+ required = false;
+ }
+ Y_ENSURE(NSkiff::IsSimpleType(wireType),
+ "Expected column schema to be of simple type, got " << NSkiff::GetShortDebugString(columnSchema));
+ columns.emplace_back(
+ EColumnType::Dense,
+ required,
+ wireType,
+ columnSchema->GetName());
+ }
+ }
+ result.push_back({std::move(columns)});
+ }
+ return result;
+}
+
+void TSkiffTableReader::ReadRow()
+{
+ if (Row_.IsUndefined()) {
+ Row_ = TNode::CreateMap();
+ } else {
+ Row_.AsMap().clear();
+ }
+
+ if (RowIndex_) {
+ ++*RowIndex_;
+ }
+
+ TableIndex_ = Parser_->ParseVariant16Tag();
+ Y_ENSURE(TableIndex_ < Schemas_.size(), "Table index out of range: " << TableIndex_ << " >= " << Schemas_.size());
+ const auto& tableSchema = Schemas_[TableIndex_];
+
+ auto parse = [&](NSkiff::EWireType wireType) -> TNode {
+ switch (wireType) {
+ case NSkiff::EWireType::Int64:
+ return Parser_->ParseInt64();
+ case NSkiff::EWireType::Uint64:
+ return Parser_->ParseUint64();
+ case NSkiff::EWireType::Boolean:
+ return Parser_->ParseBoolean();
+ case NSkiff::EWireType::Double:
+ return Parser_->ParseDouble();
+ case NSkiff::EWireType::String32:
+ return Parser_->ParseString32();
+ case NSkiff::EWireType::Yson32:
+ return NodeFromYsonString(Parser_->ParseYson32());
+ case NSkiff::EWireType::Nothing:
+ return TNode::CreateEntity();
+ default:
+ Y_FAIL("Bad column wire type: '%s'", ::ToString(wireType).data());
+ }
+ };
+
+ for (const auto& columnSchema : tableSchema.Columns) {
+ if (!columnSchema.Required) {
+ auto tag = Parser_->ParseVariant8Tag();
+ if (tag == 0) {
+ if (columnSchema.Type == EColumnType::Dense) {
+ Row_[columnSchema.Name] = TNode::CreateEntity();
+ }
+ continue;
+ }
+ Y_ENSURE(tag == 1, "Tag for 'variant8<nothing," << columnSchema.WireType
+ << ">' expected to be 0 or 1, got " << tag);
+ }
+ auto value = parse(columnSchema.WireType);
+ switch (columnSchema.Type) {
+ case EColumnType::Dense:
+ Row_[columnSchema.Name] = std::move(value);
+ break;
+ case EColumnType::KeySwitch:
+ if (value.AsBool()) {
+ AfterKeySwitch_ = true;
+ Valid_ = false;
+ }
+ break;
+ case EColumnType::RangeIndex:
+ RangeIndex_ = value.AsInt64();
+ break;
+ case EColumnType::RowIndex:
+ RowIndex_ = value.AsInt64();
+ break;
+ default:
+ Y_FAIL("Bad column type: %d", static_cast<int>(columnSchema.Type));
+ }
+ }
+
+ // We successfully parsed one more row from the stream,
+ // so reset retry count to their initial value.
+ Input_.ResetRetries();
+}
+
+void TSkiffTableReader::EnsureValidity() const
+{
+ Y_ENSURE(Valid_, "Iterator is not valid");
+}
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/skiff_table_reader.h b/yt/cpp/mapreduce/io/skiff_table_reader.h
new file mode 100644
index 0000000000..95ece5f9c7
--- /dev/null
+++ b/yt/cpp/mapreduce/io/skiff_table_reader.h
@@ -0,0 +1,65 @@
+#pragma once
+
+#include "counting_raw_reader.h"
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+#include <yt/cpp/mapreduce/skiff/wire_type.h>
+#include <yt/cpp/mapreduce/skiff/unchecked_parser.h>
+
+#include <util/stream/buffered.h>
+
+namespace NYT {
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TSkiffTableReader
+ : public INodeReaderImpl
+{
+public:
+ TSkiffTableReader(
+ ::TIntrusivePtr<TRawTableReader> input,
+ const std::shared_ptr<NSkiff::TSkiffSchema>& schema);
+ ~TSkiffTableReader() override;
+
+ virtual const TNode& GetRow() const override;
+ virtual void MoveRow(TNode* row) override;
+
+ bool IsValid() const override;
+ void Next() override;
+ ui32 GetTableIndex() const override;
+ ui32 GetRangeIndex() const override;
+ ui64 GetRowIndex() const override;
+ void NextKey() override;
+ TMaybe<size_t> GetReadByteCount() const override;
+ bool IsRawReaderExhausted() const override;
+
+private:
+ struct TSkiffTableSchema;
+
+private:
+ void EnsureValidity() const;
+ void ReadRow();
+ static TVector<TSkiffTableSchema> CreateSkiffTableSchemas(const std::shared_ptr<NSkiff::TSkiffSchema>& schema);
+
+private:
+ NDetail::TCountingRawTableReader Input_;
+ TBufferedInput BufferedInput_;
+ std::optional<NSkiff::TUncheckedSkiffParser> Parser_;
+ TVector<TSkiffTableSchema> Schemas_;
+
+ TNode Row_;
+
+ bool Valid_ = true;
+ bool AfterKeySwitch_ = false;
+ bool Finished_ = false;
+ TMaybe<ui64> RangeIndex_;
+ TMaybe<ui64> RowIndex_;
+ ui32 TableIndex_ = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/stream_raw_reader.cpp b/yt/cpp/mapreduce/io/stream_raw_reader.cpp
new file mode 100644
index 0000000000..ec19b67d0b
--- /dev/null
+++ b/yt/cpp/mapreduce/io/stream_raw_reader.cpp
@@ -0,0 +1,59 @@
+#include "stream_table_reader.h"
+
+#include "node_table_reader.h"
+#include "proto_table_reader.h"
+#include "skiff_table_reader.h"
+#include "yamr_table_reader.h"
+
+#include <util/system/env.h>
+#include <util/string/type.h>
+
+namespace NYT {
+
+template <>
+TTableReaderPtr<TNode> CreateTableReader<TNode>(
+ IInputStream* stream, const TTableReaderOptions& /*options*/)
+{
+ auto impl = ::MakeIntrusive<TNodeTableReader>(
+ ::MakeIntrusive<NDetail::TInputStreamProxy>(stream));
+ return new TTableReader<TNode>(impl);
+}
+
+template <>
+TTableReaderPtr<TYaMRRow> CreateTableReader<TYaMRRow>(
+ IInputStream* stream, const TTableReaderOptions& /*options*/)
+{
+ auto impl = ::MakeIntrusive<TYaMRTableReader>(
+ ::MakeIntrusive<NDetail::TInputStreamProxy>(stream));
+ return new TTableReader<TYaMRRow>(impl);
+}
+
+
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader(
+ IInputStream* stream,
+ const TTableReaderOptions& /* options */,
+ const ::google::protobuf::Descriptor* descriptor)
+{
+ return new TLenvalProtoTableReader(
+ ::MakeIntrusive<TInputStreamProxy>(stream),
+ {descriptor});
+}
+
+::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader(
+ IInputStream* stream,
+ const TTableReaderOptions& /* options */,
+ TVector<const ::google::protobuf::Descriptor*> descriptors)
+{
+ return new TLenvalProtoTableReader(
+ ::MakeIntrusive<TInputStreamProxy>(stream),
+ std::move(descriptors));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/stream_table_reader.h b/yt/cpp/mapreduce/io/stream_table_reader.h
new file mode 100644
index 0000000000..d799c63cf4
--- /dev/null
+++ b/yt/cpp/mapreduce/io/stream_table_reader.h
@@ -0,0 +1,65 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+namespace NYT {
+namespace NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TInputStreamProxy
+ : public TRawTableReader
+{
+public:
+ TInputStreamProxy(IInputStream* stream)
+ : Stream_(stream)
+ { }
+
+ bool Retry(const TMaybe<ui32>& /* rangeIndex */, const TMaybe<ui64>& /* rowIndex */) override
+ {
+ return false;
+ }
+
+ void ResetRetries() override
+ { }
+
+ bool HasRangeIndices() const override
+ {
+ return false;
+ }
+
+protected:
+ size_t DoRead(void* buf, size_t len) override
+ {
+ return Stream_->Read(buf, len);
+ }
+
+private:
+ IInputStream* Stream_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader(
+ IInputStream* stream,
+ const TTableReaderOptions& /* options */,
+ const ::google::protobuf::Descriptor* descriptor);
+
+::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader(
+ IInputStream* stream,
+ const TTableReaderOptions& /* options */,
+ TVector<const ::google::protobuf::Descriptor*> descriptors);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail
+
+template <>
+TTableReaderPtr<TNode> CreateTableReader<TNode>(
+ IInputStream* stream, const TTableReaderOptions& options);
+
+template <>
+TTableReaderPtr<TYaMRRow> CreateTableReader<TYaMRRow>(
+ IInputStream* stream, const TTableReaderOptions& /*options*/);
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/ya.make b/yt/cpp/mapreduce/io/ya.make
new file mode 100644
index 0000000000..d355e86850
--- /dev/null
+++ b/yt/cpp/mapreduce/io/ya.make
@@ -0,0 +1,33 @@
+LIBRARY()
+
+INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
+
+SRCS(
+ counting_raw_reader.cpp
+ job_reader.cpp
+ job_writer.cpp
+ lenval_table_reader.cpp
+ node_table_reader.cpp
+ node_table_writer.cpp
+ proto_helpers.cpp
+ proto_table_reader.cpp
+ proto_table_writer.cpp
+ skiff_row_table_reader.cpp
+ skiff_table_reader.cpp
+ stream_raw_reader.cpp
+ yamr_table_reader.cpp
+ yamr_table_writer.cpp
+)
+
+PEERDIR(
+ contrib/libs/protobuf
+ library/cpp/yson
+ yt/cpp/mapreduce/common
+ yt/cpp/mapreduce/interface
+ yt/cpp/mapreduce/interface/logging
+ yt/yt_proto/yt/formats
+ library/cpp/yson/node
+ yt/cpp/mapreduce/skiff
+)
+
+END()
diff --git a/yt/cpp/mapreduce/io/yamr_table_reader.cpp b/yt/cpp/mapreduce/io/yamr_table_reader.cpp
new file mode 100644
index 0000000000..6204738e10
--- /dev/null
+++ b/yt/cpp/mapreduce/io/yamr_table_reader.cpp
@@ -0,0 +1,145 @@
+#include "yamr_table_reader.h"
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+#include <yt/cpp/mapreduce/raw_client/raw_requests.h>
+
+////////////////////////////////////////////////////////////////////
+
+static void CheckedSkip(IInputStream* input, size_t byteCount)
+{
+ size_t skipped = input->Skip(byteCount);
+ Y_ENSURE(skipped == byteCount, "Premature end of YaMR stream");
+}
+
+////////////////////////////////////////////////////////////////////
+
+namespace NYT {
+
+using namespace NYT::NDetail::NRawClient;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TYaMRTableReader::TYaMRTableReader(::TIntrusivePtr<TRawTableReader> input)
+ : TLenvalTableReader(std::move(input))
+{ }
+
+TYaMRTableReader::~TYaMRTableReader()
+{ }
+
+const TYaMRRow& TYaMRTableReader::GetRow() const
+{
+ CheckValidity();
+ if (!RowTaken_) {
+ const_cast<TYaMRTableReader*>(this)->ReadRow();
+ }
+ return Row_;
+}
+
+bool TYaMRTableReader::IsValid() const
+{
+ return Valid_;
+}
+
+void TYaMRTableReader::Next()
+{
+ TLenvalTableReader::Next();
+}
+
+void TYaMRTableReader::NextKey()
+{
+ TLenvalTableReader::NextKey();
+}
+
+ui32 TYaMRTableReader::GetTableIndex() const
+{
+ return TLenvalTableReader::GetTableIndex();
+}
+
+ui32 TYaMRTableReader::GetRangeIndex() const
+{
+ return TLenvalTableReader::GetRangeIndex();
+}
+
+ui64 TYaMRTableReader::GetRowIndex() const
+{
+ return TLenvalTableReader::GetRowIndex();
+}
+
+TMaybe<size_t> TYaMRTableReader::GetReadByteCount() const
+{
+ return TLenvalTableReader::GetReadByteCount();
+}
+
+bool TYaMRTableReader::IsEndOfStream() const
+{
+ return TLenvalTableReader::IsEndOfStream();
+}
+
+bool TYaMRTableReader::IsRawReaderExhausted() const
+{
+ return TLenvalTableReader::IsRawReaderExhausted();
+}
+
+void TYaMRTableReader::ReadField(TString* result, i32 length)
+{
+ result->resize(length);
+ size_t count = Input_.Load(result->begin(), length);
+ Y_ENSURE(count == static_cast<size_t>(length), "Premature end of YaMR stream");
+}
+
+void TYaMRTableReader::ReadRow()
+{
+ while (true) {
+ try {
+ i32 value = static_cast<i32>(Length_);
+ ReadField(&Key_, value);
+ Row_.Key = Key_;
+
+ ReadInteger(&value);
+ ReadField(&SubKey_, value);
+ Row_.SubKey = SubKey_;
+
+ ReadInteger(&value);
+ ReadField(&Value_, value);
+ Row_.Value = Value_;
+
+ RowTaken_ = true;
+
+ // We successfully parsed one more row from the stream,
+ // so reset retry count to their initial value.
+ Input_.ResetRetries();
+
+ break;
+ } catch (const std::exception& ) {
+ if (!TLenvalTableReader::Retry()) {
+ throw;
+ }
+ }
+ }
+}
+
+void TYaMRTableReader::SkipRow()
+{
+ while (true) {
+ try {
+ i32 value = static_cast<i32>(Length_);
+ CheckedSkip(&Input_, value);
+
+ ReadInteger(&value);
+ CheckedSkip(&Input_, value);
+
+ ReadInteger(&value);
+ CheckedSkip(&Input_, value);
+ break;
+ } catch (const std::exception& ) {
+ if (!TLenvalTableReader::Retry()) {
+ throw;
+ }
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/yamr_table_reader.h b/yt/cpp/mapreduce/io/yamr_table_reader.h
new file mode 100644
index 0000000000..39fdecfa71
--- /dev/null
+++ b/yt/cpp/mapreduce/io/yamr_table_reader.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#include "lenval_table_reader.h"
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+namespace NYT {
+
+class TRawTableReader;
+struct TClientContext;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TYaMRTableReader
+ : public IYaMRReaderImpl
+ , public TLenvalTableReader
+{
+public:
+ explicit TYaMRTableReader(::TIntrusivePtr<TRawTableReader> input);
+ ~TYaMRTableReader() override;
+
+ const TYaMRRow& GetRow() const override;
+
+ bool IsValid() const override;
+ void Next() override;
+ ui32 GetTableIndex() const override;
+ ui32 GetRangeIndex() const override;
+ ui64 GetRowIndex() const override;
+ void NextKey() override;
+ TMaybe<size_t> GetReadByteCount() const override;
+ bool IsEndOfStream() const override;
+ bool IsRawReaderExhausted() const override;
+
+private:
+ void ReadField(TString* result, i32 length);
+
+ void ReadRow();
+ void SkipRow() override;
+
+ TYaMRRow Row_;
+ TString Key_;
+ TString SubKey_;
+ TString Value_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/yamr_table_writer.cpp b/yt/cpp/mapreduce/io/yamr_table_writer.cpp
new file mode 100644
index 0000000000..cce7ceb0f0
--- /dev/null
+++ b/yt/cpp/mapreduce/io/yamr_table_writer.cpp
@@ -0,0 +1,53 @@
+#include "yamr_table_writer.h"
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TYaMRTableWriter::TYaMRTableWriter(THolder<IProxyOutput> output)
+ : Output_(std::move(output))
+{ }
+
+TYaMRTableWriter::~TYaMRTableWriter()
+{ }
+
+size_t TYaMRTableWriter::GetTableCount() const
+{
+ return Output_->GetStreamCount();
+}
+
+void TYaMRTableWriter::FinishTable(size_t tableIndex) {
+ Output_->GetStream(tableIndex)->Finish();
+}
+
+void TYaMRTableWriter::AddRow(const TYaMRRow& row, size_t tableIndex)
+{
+ auto* stream = Output_->GetStream(tableIndex);
+
+ auto writeField = [&stream] (const TStringBuf& field) {
+ i32 length = static_cast<i32>(field.length());
+ stream->Write(&length, sizeof(length));
+ stream->Write(field.data(), field.length());
+ };
+
+ writeField(row.Key);
+ writeField(row.SubKey);
+ writeField(row.Value);
+
+ Output_->OnRowFinished(tableIndex);
+}
+
+void TYaMRTableWriter::AddRow(TYaMRRow&& row, size_t tableIndex) {
+ TYaMRTableWriter::AddRow(row, tableIndex);
+}
+
+void TYaMRTableWriter::Abort()
+{
+ Output_->Abort();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/io/yamr_table_writer.h b/yt/cpp/mapreduce/io/yamr_table_writer.h
new file mode 100644
index 0000000000..cf88eaf287
--- /dev/null
+++ b/yt/cpp/mapreduce/io/yamr_table_writer.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/io.h>
+
+namespace NYT {
+
+class IProxyOutput;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TYaMRTableWriter
+ : public IYaMRWriterImpl
+{
+public:
+ explicit TYaMRTableWriter(THolder<IProxyOutput> output);
+ ~TYaMRTableWriter() override;
+
+ void AddRow(const TYaMRRow& row, size_t tableIndex) override;
+ void AddRow(TYaMRRow&& row, size_t tableIndex) override;
+
+ size_t GetTableCount() const override;
+ void FinishTable(size_t) override;
+ void Abort() override;
+
+private:
+ THolder<IProxyOutput> Output_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/library/table_schema/protobuf.cpp b/yt/cpp/mapreduce/library/table_schema/protobuf.cpp
new file mode 100644
index 0000000000..888da828e7
--- /dev/null
+++ b/yt/cpp/mapreduce/library/table_schema/protobuf.cpp
@@ -0,0 +1 @@
+#include "protobuf.h"
diff --git a/yt/cpp/mapreduce/library/table_schema/protobuf.h b/yt/cpp/mapreduce/library/table_schema/protobuf.h
new file mode 100644
index 0000000000..e29e096745
--- /dev/null
+++ b/yt/cpp/mapreduce/library/table_schema/protobuf.h
@@ -0,0 +1,3 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/common.h>
diff --git a/yt/cpp/mapreduce/library/table_schema/ya.make b/yt/cpp/mapreduce/library/table_schema/ya.make
new file mode 100644
index 0000000000..4aebad72dd
--- /dev/null
+++ b/yt/cpp/mapreduce/library/table_schema/ya.make
@@ -0,0 +1,14 @@
+LIBRARY()
+
+INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
+
+SRCS(
+ protobuf.h
+ protobuf.cpp
+)
+
+PEERDIR(
+ yt/cpp/mapreduce/interface
+)
+
+END()
diff --git a/yt/cpp/mapreduce/raw_client/raw_batch_request.cpp b/yt/cpp/mapreduce/raw_client/raw_batch_request.cpp
new file mode 100644
index 0000000000..be81f5a21a
--- /dev/null
+++ b/yt/cpp/mapreduce/raw_client/raw_batch_request.cpp
@@ -0,0 +1,687 @@
+#include "raw_batch_request.h"
+
+#include "raw_requests.h"
+#include "rpc_parameters_serialization.h"
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <yt/cpp/mapreduce/interface/client.h>
+#include <yt/cpp/mapreduce/interface/errors.h>
+#include <yt/cpp/mapreduce/interface/serialize.h>
+
+#include <library/cpp/yson/node/node.h>
+
+#include <yt/cpp/mapreduce/http/context.h>
+#include <yt/cpp/mapreduce/http/retry_request.h>
+
+#include <util/generic/guid.h>
+#include <util/string/builder.h>
+
+#include <exception>
+
+namespace NYT::NDetail::NRawClient {
+
+using NThreading::TFuture;
+using NThreading::TPromise;
+using NThreading::NewPromise;
+
+////////////////////////////////////////////////////////////////////
+
+static TString RequestInfo(const TNode& request)
+{
+ return ::TStringBuilder()
+ << request["command"].AsString() << ' ' << NodeToYsonString(request["parameters"]);
+}
+
+static void EnsureNothing(const TMaybe<TNode>& node)
+{
+ Y_ENSURE(!node, "Internal error: expected to have no response, but got response of type " << node->GetType());
+}
+
+static void EnsureSomething(const TMaybe<TNode>& node)
+{
+ Y_ENSURE(node, "Internal error: expected to have response of any type, but got no response.");
+}
+
+static void EnsureType(const TNode& node, TNode::EType type)
+{
+ Y_ENSURE(node.GetType() == type, "Internal error: unexpected response type. "
+ << "Expected: " << type << ", actual: " << node.GetType());
+}
+
+static void EnsureType(const TMaybe<TNode>& node, TNode::EType type)
+{
+ Y_ENSURE(node, "Internal error: expected to have response of type " << type << ", but got no response.");
+ EnsureType(*node, type);
+}
+
+////////////////////////////////////////////////////////////////////
+
+template <typename TReturnType>
+class TResponseParserBase
+ : public TRawBatchRequest::IResponseItemParser
+{
+public:
+ using TFutureResult = TFuture<TReturnType>;
+
+public:
+ TResponseParserBase()
+ : Result(NewPromise<TReturnType>())
+ { }
+
+ void SetException(std::exception_ptr e) override
+ {
+ Result.SetException(std::move(e));
+ }
+
+ TFuture<TReturnType> GetFuture()
+ {
+ return Result.GetFuture();
+ }
+
+protected:
+ TPromise<TReturnType> Result;
+};
+
+////////////////////////////////////////////////////////////////////
+
+
+class TGetResponseParser
+ : public TResponseParserBase<TNode>
+{
+public:
+ void SetResponse(TMaybe<TNode> node) override
+ {
+ EnsureSomething(node);
+ Result.SetValue(std::move(*node));
+ }
+};
+
+////////////////////////////////////////////////////////////////////
+
+class TVoidResponseParser
+ : public TResponseParserBase<void>
+{
+public:
+ void SetResponse(TMaybe<TNode> node) override
+ {
+ EnsureNothing(node);
+ Result.SetValue();
+ }
+};
+
+////////////////////////////////////////////////////////////////////
+
+class TListResponseParser
+ : public TResponseParserBase<TNode::TListType>
+{
+public:
+ void SetResponse(TMaybe<TNode> node) override
+ {
+ EnsureType(node, TNode::List);
+ Result.SetValue(std::move(node->AsList()));
+ }
+};
+
+////////////////////////////////////////////////////////////////////
+
+class TExistsResponseParser
+ : public TResponseParserBase<bool>
+{
+public:
+ void SetResponse(TMaybe<TNode> node) override
+ {
+ EnsureType(node, TNode::Bool);
+ Result.SetValue(std::move(node->AsBool()));
+ }
+};
+
+////////////////////////////////////////////////////////////////////
+
+class TGuidResponseParser
+ : public TResponseParserBase<TGUID>
+{
+public:
+ void SetResponse(TMaybe<TNode> node) override
+ {
+ EnsureType(node, TNode::String);
+ Result.SetValue(GetGuid(node->AsString()));
+ }
+};
+
+////////////////////////////////////////////////////////////////////
+
+class TCanonizeYPathResponseParser
+ : public TResponseParserBase<TRichYPath>
+{
+public:
+ explicit TCanonizeYPathResponseParser(TString pathPrefix, const TRichYPath& original)
+ : OriginalNode_(PathToNode(original))
+ , PathPrefix_(std::move(pathPrefix))
+ { }
+
+ void SetResponse(TMaybe<TNode> node) override
+ {
+ EnsureType(node, TNode::String);
+
+ for (const auto& item : OriginalNode_.GetAttributes().AsMap()) {
+ node->Attributes()[item.first] = item.second;
+ }
+ TRichYPath result;
+ Deserialize(result, *node);
+ result.Path_ = AddPathPrefix(result.Path_, PathPrefix_);
+ Result.SetValue(result);
+ }
+
+private:
+ TNode OriginalNode_;
+ TString PathPrefix_;
+};
+
+////////////////////////////////////////////////////////////////////
+
+class TGetOperationResponseParser
+ : public TResponseParserBase<TOperationAttributes>
+{
+public:
+ void SetResponse(TMaybe<TNode> node) override
+ {
+ EnsureType(node, TNode::Map);
+ Result.SetValue(ParseOperationAttributes(*node));
+ }
+};
+
+////////////////////////////////////////////////////////////////////
+
+class TTableColumnarStatisticsParser
+ : public TResponseParserBase<TVector<TTableColumnarStatistics>>
+{
+public:
+ void SetResponse(TMaybe<TNode> node) override
+ {
+ EnsureType(node, TNode::Map);
+ TVector<TTableColumnarStatistics> statistics;
+ Deserialize(statistics, *node);
+ Result.SetValue(std::move(statistics));
+ }
+};
+
+////////////////////////////////////////////////////////////////////
+
+class TTablePartitionsParser
+ : public TResponseParserBase<TMultiTablePartitions>
+{
+public:
+ void SetResponse(TMaybe<TNode> node) override
+ {
+ EnsureType(node, TNode::Map);
+ TMultiTablePartitions partitions;
+ Deserialize(partitions, *node);
+ Result.SetValue(std::move(partitions));
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TGetFileFromCacheParser
+ : public TResponseParserBase<TMaybe<TYPath>>
+{
+public:
+ void SetResponse(TMaybe<TNode> node) override
+ {
+ EnsureType(node, TNode::String);
+ if (node->AsString().empty()) {
+ Result.SetValue(Nothing());
+ } else {
+ Result.SetValue(node->AsString());
+ }
+ }
+};
+
+////////////////////////////////////////////////////////////////////
+
+class TYPathParser
+ : public TResponseParserBase<TYPath>
+{
+public:
+ void SetResponse(TMaybe<TNode> node) override
+ {
+ EnsureType(node, TNode::String);
+ Result.SetValue(node->AsString());
+ }
+};
+
+////////////////////////////////////////////////////////////////////
+
+class TCheckPermissionParser
+ : public TResponseParserBase<TCheckPermissionResponse>
+{
+public:
+ void SetResponse(TMaybe<TNode> node) override
+ {
+ EnsureType(node, TNode::Map);
+ Result.SetValue(ParseCheckPermissionResponse(*node));
+ }
+};
+
+////////////////////////////////////////////////////////////////////
+
+TRawBatchRequest::TBatchItem::TBatchItem(TNode parameters, ::TIntrusivePtr<IResponseItemParser> responseParser)
+ : Parameters(std::move(parameters))
+ , ResponseParser(std::move(responseParser))
+ , NextTry()
+{ }
+
+TRawBatchRequest::TBatchItem::TBatchItem(const TBatchItem& batchItem, TInstant nextTry)
+ : Parameters(batchItem.Parameters)
+ , ResponseParser(batchItem.ResponseParser)
+ , NextTry(nextTry)
+{ }
+
+////////////////////////////////////////////////////////////////////
+
+TRawBatchRequest::TRawBatchRequest(const TConfigPtr& config)
+ : Config_(config)
+{ }
+
+TRawBatchRequest::~TRawBatchRequest() = default;
+
+bool TRawBatchRequest::IsExecuted() const
+{
+ return Executed_;
+}
+
+void TRawBatchRequest::MarkExecuted()
+{
+ Executed_ = true;
+}
+
+template <typename TResponseParser>
+typename TResponseParser::TFutureResult TRawBatchRequest::AddRequest(
+ const TString& command,
+ TNode parameters,
+ TMaybe<TNode> input)
+{
+ return AddRequest(command, parameters, input, MakeIntrusive<TResponseParser>());
+}
+
+template <typename TResponseParser>
+typename TResponseParser::TFutureResult TRawBatchRequest::AddRequest(
+ const TString& command,
+ TNode parameters,
+ TMaybe<TNode> input,
+ ::TIntrusivePtr<TResponseParser> parser)
+{
+ Y_ENSURE(!Executed_, "Cannot add request: batch request is already executed");
+ TNode request;
+ request["command"] = command;
+ request["parameters"] = std::move(parameters);
+ if (input) {
+ request["input"] = std::move(*input);
+ }
+ BatchItemList_.emplace_back(std::move(request), parser);
+ return parser->GetFuture();
+}
+
+void TRawBatchRequest::AddRequest(TBatchItem batchItem)
+{
+ Y_ENSURE(!Executed_, "Cannot add request: batch request is already executed");
+ BatchItemList_.push_back(batchItem);
+}
+
+TFuture<TNodeId> TRawBatchRequest::Create(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ ENodeType type,
+ const TCreateOptions& options)
+{
+ return AddRequest<TGuidResponseParser>(
+ "create",
+ SerializeParamsForCreate(transaction, Config_->Prefix, path, type, options),
+ Nothing());
+}
+
+TFuture<void> TRawBatchRequest::Remove(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ const TRemoveOptions& options)
+{
+ return AddRequest<TVoidResponseParser>(
+ "remove",
+ SerializeParamsForRemove(transaction, Config_->Prefix, path, options),
+ Nothing());
+}
+
+TFuture<bool> TRawBatchRequest::Exists(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ const TExistsOptions& options)
+{
+ return AddRequest<TExistsResponseParser>(
+ "exists",
+ SerializeParamsForExists(transaction, Config_->Prefix, path, options),
+ Nothing());
+}
+
+TFuture<TNode> TRawBatchRequest::Get(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ const TGetOptions& options)
+{
+ return AddRequest<TGetResponseParser>(
+ "get",
+ SerializeParamsForGet(transaction, Config_->Prefix, path, options),
+ Nothing());
+}
+
+TFuture<void> TRawBatchRequest::Set(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ const TNode& node,
+ const TSetOptions& options)
+{
+ return AddRequest<TVoidResponseParser>(
+ "set",
+ SerializeParamsForSet(transaction, Config_->Prefix, path, options),
+ node);
+}
+
+TFuture<TNode::TListType> TRawBatchRequest::List(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ const TListOptions& options)
+{
+ return AddRequest<TListResponseParser>(
+ "list",
+ SerializeParamsForList(transaction, Config_->Prefix, path, options),
+ Nothing());
+}
+
+TFuture<TNodeId> TRawBatchRequest::Copy(
+ const TTransactionId& transaction,
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TCopyOptions& options)
+{
+ return AddRequest<TGuidResponseParser>(
+ "copy",
+ SerializeParamsForCopy(transaction, Config_->Prefix, sourcePath, destinationPath, options),
+ Nothing());
+}
+
+TFuture<TNodeId> TRawBatchRequest::Move(
+ const TTransactionId& transaction,
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TMoveOptions& options)
+{
+ return AddRequest<TGuidResponseParser>(
+ "move",
+ SerializeParamsForMove(transaction, Config_->Prefix, sourcePath, destinationPath, options),
+ Nothing());
+}
+
+TFuture<TNodeId> TRawBatchRequest::Link(
+ const TTransactionId& transaction,
+ const TYPath& targetPath,
+ const TYPath& linkPath,
+ const TLinkOptions& options)
+{
+ return AddRequest<TGuidResponseParser>(
+ "link",
+ SerializeParamsForLink(transaction, Config_->Prefix, targetPath, linkPath, options),
+ Nothing());
+}
+
+TFuture<TLockId> TRawBatchRequest::Lock(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ ELockMode mode,
+ const TLockOptions& options)
+{
+ return AddRequest<TGuidResponseParser>(
+ "lock",
+ SerializeParamsForLock(transaction, Config_->Prefix, path, mode, options),
+ Nothing());
+}
+
+TFuture<void> TRawBatchRequest::Unlock(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ const TUnlockOptions& options)
+{
+ return AddRequest<TVoidResponseParser>(
+ "unlock",
+ SerializeParamsForUnlock(transaction, Config_->Prefix, path, options),
+ Nothing());
+}
+
+TFuture<TMaybe<TYPath>> TRawBatchRequest::GetFileFromCache(
+ const TTransactionId& transactionId,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TGetFileFromCacheOptions& options)
+{
+ return AddRequest<TGetFileFromCacheParser>(
+ "get_file_from_cache",
+ SerializeParamsForGetFileFromCache(transactionId, md5Signature, cachePath, options),
+ Nothing());
+}
+
+TFuture<TYPath> TRawBatchRequest::PutFileToCache(
+ const TTransactionId& transactionId,
+ const TYPath& filePath,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TPutFileToCacheOptions& options)
+{
+ return AddRequest<TYPathParser>(
+ "put_file_to_cache",
+ SerializeParamsForPutFileToCache(transactionId, Config_->Prefix, filePath, md5Signature, cachePath, options),
+ Nothing());
+}
+
+TFuture<TCheckPermissionResponse> TRawBatchRequest::CheckPermission(
+ const TString& user,
+ EPermission permission,
+ const TYPath& path,
+ const TCheckPermissionOptions& options)
+{
+ return AddRequest<TCheckPermissionParser>(
+ "check_permission",
+ SerializeParamsForCheckPermission(user, permission, Config_->Prefix, path, options),
+ Nothing());
+}
+
+TFuture<TOperationAttributes> TRawBatchRequest::GetOperation(
+ const TOperationId& operationId,
+ const TGetOperationOptions& options)
+{
+ return AddRequest<TGetOperationResponseParser>(
+ "get_operation",
+ SerializeParamsForGetOperation(operationId, options),
+ Nothing());
+}
+
+TFuture<void> TRawBatchRequest::AbortOperation(const TOperationId& operationId)
+{
+ return AddRequest<TVoidResponseParser>(
+ "abort_op",
+ SerializeParamsForAbortOperation(operationId),
+ Nothing());
+}
+
+TFuture<void> TRawBatchRequest::CompleteOperation(const TOperationId& operationId)
+{
+ return AddRequest<TVoidResponseParser>(
+ "complete_op",
+ SerializeParamsForCompleteOperation(operationId),
+ Nothing());
+}
+TFuture<void> TRawBatchRequest::SuspendOperation(
+ const TOperationId& operationId,
+ const TSuspendOperationOptions& options)
+{
+ return AddRequest<TVoidResponseParser>(
+ "suspend_operation",
+ SerializeParamsForSuspendOperation(operationId, options),
+ Nothing());
+}
+TFuture<void> TRawBatchRequest::ResumeOperation(
+ const TOperationId& operationId,
+ const TResumeOperationOptions& options)
+{
+ return AddRequest<TVoidResponseParser>(
+ "resume_operation",
+ SerializeParamsForResumeOperation(operationId, options),
+ Nothing());
+}
+
+TFuture<void> TRawBatchRequest::UpdateOperationParameters(
+ const TOperationId& operationId,
+ const TUpdateOperationParametersOptions& options)
+{
+ return AddRequest<TVoidResponseParser>(
+ "update_op_parameters",
+ SerializeParamsForUpdateOperationParameters(operationId, options),
+ Nothing());
+}
+
+TFuture<TRichYPath> TRawBatchRequest::CanonizeYPath(const TRichYPath& path)
+{
+ if (path.Path_.find_first_of("<>{}[]") != TString::npos) {
+ return AddRequest<TCanonizeYPathResponseParser>(
+ "parse_ypath",
+ SerializeParamsForParseYPath(path),
+ Nothing(),
+ MakeIntrusive<TCanonizeYPathResponseParser>(Config_->Prefix, path));
+ } else {
+ TRichYPath result = path;
+ result.Path_ = AddPathPrefix(result.Path_, Config_->Prefix);
+ return NThreading::MakeFuture(result);
+ }
+}
+
+TFuture<TVector<TTableColumnarStatistics>> TRawBatchRequest::GetTableColumnarStatistics(
+ const TTransactionId& transaction,
+ const TVector<TRichYPath>& paths,
+ const TGetTableColumnarStatisticsOptions& options)
+{
+ return AddRequest<TTableColumnarStatisticsParser>(
+ "get_table_columnar_statistics",
+ SerializeParamsForGetTableColumnarStatistics(transaction, paths, options),
+ Nothing());
+}
+
+TFuture<TMultiTablePartitions> TRawBatchRequest::GetTablePartitions(
+ const TTransactionId& transaction,
+ const TVector<TRichYPath>& paths,
+ const TGetTablePartitionsOptions& options)
+{
+ return AddRequest<TTablePartitionsParser>(
+ "partition_tables",
+ SerializeParamsForGetTablePartitions(transaction, paths, options),
+ Nothing());
+}
+
+void TRawBatchRequest::FillParameterList(size_t maxSize, TNode* result, TInstant* nextTry) const
+{
+ Y_VERIFY(result);
+ Y_VERIFY(nextTry);
+
+ *nextTry = TInstant();
+ maxSize = Min(maxSize, BatchItemList_.size());
+ *result = TNode::CreateList();
+ for (size_t i = 0; i < maxSize; ++i) {
+ YT_LOG_DEBUG("ExecuteBatch preparing: %v",
+ RequestInfo(BatchItemList_[i].Parameters));
+
+ result->Add(BatchItemList_[i].Parameters);
+ if (BatchItemList_[i].NextTry > *nextTry) {
+ *nextTry = BatchItemList_[i].NextTry;
+ }
+ }
+}
+
+void TRawBatchRequest::ParseResponse(
+ const TResponseInfo& requestResult,
+ const IRequestRetryPolicyPtr& retryPolicy,
+ TRawBatchRequest* retryBatch,
+ TInstant now)
+{
+ TNode node = NodeFromYsonString(requestResult.Response);
+ return ParseResponse(node, requestResult.RequestId, retryPolicy, retryBatch, now);
+}
+
+void TRawBatchRequest::ParseResponse(
+ TNode node,
+ const TString& requestId,
+ const IRequestRetryPolicyPtr& retryPolicy,
+ TRawBatchRequest* retryBatch,
+ TInstant now)
+{
+ Y_VERIFY(retryBatch);
+
+ EnsureType(node, TNode::List);
+ auto& responseList = node.AsList();
+ const auto size = responseList.size();
+ Y_ENSURE(size <= BatchItemList_.size(),
+ "Size of server response exceeds size of batch request;"
+ " size of batch: " << BatchItemList_.size() <<
+ " size of server response: " << size << '.');
+
+ for (size_t i = 0; i != size; ++i) {
+ try {
+ EnsureType(responseList[i], TNode::Map);
+ auto& responseNode = responseList[i].AsMap();
+ const auto outputIt = responseNode.find("output");
+ if (outputIt != responseNode.end()) {
+ BatchItemList_[i].ResponseParser->SetResponse(std::move(outputIt->second));
+ } else {
+ const auto errorIt = responseNode.find("error");
+ if (errorIt == responseNode.end()) {
+ BatchItemList_[i].ResponseParser->SetResponse(Nothing());
+ } else {
+ TErrorResponse error(400, requestId);
+ error.SetError(TYtError(errorIt->second));
+ if (auto curInterval = IsRetriable(error) ? retryPolicy->OnRetriableError(error) : Nothing()) {
+ YT_LOG_INFO(
+ "Batch subrequest (%s) failed, will retry, error: %s",
+ RequestInfo(BatchItemList_[i].Parameters),
+ error.what());
+ retryBatch->AddRequest(TBatchItem(BatchItemList_[i], now + *curInterval));
+ } else {
+ YT_LOG_ERROR(
+ "Batch subrequest (%s) failed, error: %s",
+ RequestInfo(BatchItemList_[i].Parameters),
+ error.what());
+ BatchItemList_[i].ResponseParser->SetException(std::make_exception_ptr(error));
+ }
+ }
+ }
+ } catch (const std::exception& e) {
+ // We don't expect other exceptions, so we don't catch (...)
+ BatchItemList_[i].ResponseParser->SetException(std::current_exception());
+ }
+ }
+ BatchItemList_.erase(BatchItemList_.begin(), BatchItemList_.begin() + size);
+}
+
+void TRawBatchRequest::SetErrorResult(std::exception_ptr e) const
+{
+ for (const auto& batchItem : BatchItemList_) {
+ batchItem.ResponseParser->SetException(e);
+ }
+}
+
+size_t TRawBatchRequest::BatchSize() const
+{
+ return BatchItemList_.size();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail::NRawClient
diff --git a/yt/cpp/mapreduce/raw_client/raw_batch_request.h b/yt/cpp/mapreduce/raw_client/raw_batch_request.h
new file mode 100644
index 0000000000..7ed5bebf5e
--- /dev/null
+++ b/yt/cpp/mapreduce/raw_client/raw_batch_request.h
@@ -0,0 +1,190 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/common/fwd.h>
+
+#include <yt/cpp/mapreduce/interface/batch_request.h>
+#include <yt/cpp/mapreduce/interface/fwd.h>
+#include <yt/cpp/mapreduce/interface/node.h>
+#include <yt/cpp/mapreduce/interface/retry_policy.h>
+
+#include <yt/cpp/mapreduce/http/requests.h>
+
+#include <library/cpp/threading/future/future.h>
+
+#include <util/generic/ptr.h>
+#include <util/generic/deque.h>
+
+#include <exception>
+
+namespace NYT::NDetail {
+ struct TResponseInfo;
+}
+
+namespace NYT::NDetail::NRawClient {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TRawBatchRequest
+ : public TThrRefBase
+{
+public:
+ struct IResponseItemParser
+ : public TThrRefBase
+ {
+ ~IResponseItemParser() = default;
+
+ virtual void SetResponse(TMaybe<TNode> node) = 0;
+ virtual void SetException(std::exception_ptr e) = 0;
+ };
+
+public:
+ TRawBatchRequest(const TConfigPtr& config);
+ ~TRawBatchRequest();
+
+ bool IsExecuted() const;
+ void MarkExecuted();
+
+ void FillParameterList(size_t maxSize, TNode* result, TInstant* nextTry) const;
+
+ size_t BatchSize() const;
+
+ void ParseResponse(
+ const TResponseInfo& requestResult,
+ const IRequestRetryPolicyPtr& retryPolicy,
+ TRawBatchRequest* retryBatch,
+ TInstant now = TInstant::Now());
+ void ParseResponse(
+ TNode response,
+ const TString& requestId,
+ const IRequestRetryPolicyPtr& retryPolicy,
+ TRawBatchRequest* retryBatch,
+ TInstant now = TInstant::Now());
+ void SetErrorResult(std::exception_ptr e) const;
+
+ ::NThreading::TFuture<TNodeId> Create(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ ENodeType type,
+ const TCreateOptions& options);
+ ::NThreading::TFuture<void> Remove(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ const TRemoveOptions& options);
+ ::NThreading::TFuture<bool> Exists(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ const TExistsOptions& options);
+ ::NThreading::TFuture<TNode> Get(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ const TGetOptions& options);
+ ::NThreading::TFuture<void> Set(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ const TNode& value,
+ const TSetOptions& options);
+ ::NThreading::TFuture<TNode::TListType> List(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ const TListOptions& options);
+ ::NThreading::TFuture<TNodeId> Copy(
+ const TTransactionId& transaction,
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TCopyOptions& options);
+ ::NThreading::TFuture<TNodeId> Move(
+ const TTransactionId& transaction,
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TMoveOptions& options);
+ ::NThreading::TFuture<TNodeId> Link(
+ const TTransactionId& transaction,
+ const TYPath& targetPath,
+ const TYPath& linkPath,
+ const TLinkOptions& options);
+ ::NThreading::TFuture<TLockId> Lock(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ ELockMode mode,
+ const TLockOptions& options);
+ ::NThreading::TFuture<void> Unlock(
+ const TTransactionId& transaction,
+ const TYPath& path,
+ const TUnlockOptions& options);
+ ::NThreading::TFuture<TMaybe<TYPath>> GetFileFromCache(
+ const TTransactionId& transactionId,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TGetFileFromCacheOptions& options);
+ ::NThreading::TFuture<TYPath> PutFileToCache(
+ const TTransactionId& transactionId,
+ const TYPath& filePath,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TPutFileToCacheOptions& options);
+ ::NThreading::TFuture<TCheckPermissionResponse> CheckPermission(
+ const TString& user,
+ EPermission permission,
+ const TYPath& path,
+ const TCheckPermissionOptions& options);
+ ::NThreading::TFuture<TOperationAttributes> GetOperation(
+ const TOperationId& operationId,
+ const TGetOperationOptions& options);
+ ::NThreading::TFuture<void> AbortOperation(const TOperationId& operationId);
+ ::NThreading::TFuture<void> CompleteOperation(const TOperationId& operationId);
+ ::NThreading::TFuture<void> SuspendOperation(
+ const TOperationId& operationId,
+ const TSuspendOperationOptions& options);
+ ::NThreading::TFuture<void> ResumeOperation(
+ const TOperationId& operationId,
+ const TResumeOperationOptions& options);
+ ::NThreading::TFuture<void> UpdateOperationParameters(
+ const TOperationId& operationId,
+ const TUpdateOperationParametersOptions& options);
+ ::NThreading::TFuture<TRichYPath> CanonizeYPath(const TRichYPath& path);
+ ::NThreading::TFuture<TVector<TTableColumnarStatistics>> GetTableColumnarStatistics(
+ const TTransactionId& transaction,
+ const TVector<TRichYPath>& paths,
+ const TGetTableColumnarStatisticsOptions& options);
+ ::NThreading::TFuture<TMultiTablePartitions> GetTablePartitions(
+ const TTransactionId& transaction,
+ const TVector<TRichYPath>& paths,
+ const TGetTablePartitionsOptions& options);
+
+private:
+ struct TBatchItem {
+ TNode Parameters;
+ ::TIntrusivePtr<IResponseItemParser> ResponseParser;
+ TInstant NextTry;
+
+ TBatchItem(TNode parameters, ::TIntrusivePtr<IResponseItemParser> responseParser);
+
+ TBatchItem(const TBatchItem& batchItem, TInstant nextTry);
+ };
+
+private:
+ template <typename TResponseParser>
+ typename TResponseParser::TFutureResult AddRequest(
+ const TString& command,
+ TNode parameters,
+ TMaybe<TNode> input);
+
+ template <typename TResponseParser>
+ typename TResponseParser::TFutureResult AddRequest(
+ const TString& command,
+ TNode parameters,
+ TMaybe<TNode> input,
+ ::TIntrusivePtr<TResponseParser> parser);
+
+ void AddRequest(TBatchItem batchItem);
+
+private:
+ TConfigPtr Config_;
+
+ TDeque<TBatchItem> BatchItemList_;
+ bool Executed_ = false;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail::NRawClient
diff --git a/yt/cpp/mapreduce/raw_client/raw_requests.cpp b/yt/cpp/mapreduce/raw_client/raw_requests.cpp
new file mode 100644
index 0000000000..26120759fd
--- /dev/null
+++ b/yt/cpp/mapreduce/raw_client/raw_requests.cpp
@@ -0,0 +1,1027 @@
+#include "raw_requests.h"
+
+#include "raw_batch_request.h"
+#include "rpc_parameters_serialization.h"
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+#include <yt/cpp/mapreduce/common/retry_lib.h>
+#include <yt/cpp/mapreduce/common/wait_proxy.h>
+
+#include <yt/cpp/mapreduce/http/fwd.h>
+#include <yt/cpp/mapreduce/http/context.h>
+#include <yt/cpp/mapreduce/http/helpers.h>
+#include <yt/cpp/mapreduce/http/http_client.h>
+#include <yt/cpp/mapreduce/http/retry_request.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/client.h>
+#include <yt/cpp/mapreduce/interface/operation.h>
+#include <yt/cpp/mapreduce/interface/serialize.h>
+#include <yt/cpp/mapreduce/interface/tvm.h>
+
+#include <yt/cpp/mapreduce/interface/logging/yt_log.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+#include <util/generic/guid.h>
+#include <util/generic/scope.h>
+
+namespace NYT::NDetail::NRawClient {
+
+///////////////////////////////////////////////////////////////////////////////
+
+void ExecuteBatch(
+ IRequestRetryPolicyPtr retryPolicy,
+ const TClientContext& context,
+ TRawBatchRequest& batchRequest,
+ const TExecuteBatchOptions& options)
+{
+ if (batchRequest.IsExecuted()) {
+ ythrow yexception() << "Cannot execute batch request since it is already executed";
+ }
+ Y_DEFER {
+ batchRequest.MarkExecuted();
+ };
+
+ const auto concurrency = options.Concurrency_.GetOrElse(50);
+ const auto batchPartMaxSize = options.BatchPartMaxSize_.GetOrElse(concurrency * 5);
+
+ if (!retryPolicy) {
+ retryPolicy = CreateDefaultRequestRetryPolicy(context.Config);
+ }
+
+ while (batchRequest.BatchSize()) {
+ TRawBatchRequest retryBatch(context.Config);
+
+ while (batchRequest.BatchSize()) {
+ auto parameters = TNode::CreateMap();
+ TInstant nextTry;
+ batchRequest.FillParameterList(batchPartMaxSize, &parameters["requests"], &nextTry);
+ if (nextTry) {
+ SleepUntil(nextTry);
+ }
+ parameters["concurrency"] = concurrency;
+ auto body = NodeToYsonString(parameters);
+ THttpHeader header("POST", "execute_batch");
+ header.AddMutationId();
+ NDetail::TResponseInfo result;
+ try {
+ result = RetryRequestWithPolicy(retryPolicy, context, header, body);
+ } catch (const std::exception& e) {
+ batchRequest.SetErrorResult(std::current_exception());
+ retryBatch.SetErrorResult(std::current_exception());
+ throw;
+ }
+ batchRequest.ParseResponse(std::move(result), retryPolicy.Get(), &retryBatch);
+ }
+
+ batchRequest = std::move(retryBatch);
+ }
+}
+
+TNode Get(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TGetOptions& options)
+{
+ THttpHeader header("GET", "get");
+ header.MergeParameters(SerializeParamsForGet(transactionId, context.Config->Prefix, path, options));
+ return NodeFromYsonString(RetryRequestWithPolicy(retryPolicy, context, header).Response);
+}
+
+TNode TryGet(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TGetOptions& options)
+{
+ try {
+ return Get(retryPolicy, context, transactionId, path, options);
+ } catch (const TErrorResponse& error) {
+ if (!error.IsResolveError()) {
+ throw;
+ }
+ return TNode();
+ }
+}
+
+void Set(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TNode& value,
+ const TSetOptions& options)
+{
+ THttpHeader header("PUT", "set");
+ header.AddMutationId();
+ header.MergeParameters(SerializeParamsForSet(transactionId, context.Config->Prefix, path, options));
+ auto body = NodeToYsonString(value);
+ RetryRequestWithPolicy(retryPolicy, context, header, body);
+}
+
+void MultisetAttributes(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TNode::TMapType& value,
+ const TMultisetAttributesOptions& options)
+{
+ THttpHeader header("PUT", "api/v4/multiset_attributes", false);
+ header.AddMutationId();
+ header.MergeParameters(SerializeParamsForMultisetAttributes(transactionId, context.Config->Prefix, path, options));
+
+ auto body = NodeToYsonString(value);
+ RetryRequestWithPolicy(retryPolicy, context, header, body);
+}
+
+bool Exists(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TExistsOptions& options)
+{
+ THttpHeader header("GET", "exists");
+ header.MergeParameters(SerializeParamsForExists(transactionId, context.Config->Prefix, path, options));
+ return ParseBoolFromResponse(RetryRequestWithPolicy(retryPolicy, context, header).Response);
+}
+
+TNodeId Create(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const ENodeType& type,
+ const TCreateOptions& options)
+{
+ THttpHeader header("POST", "create");
+ header.AddMutationId();
+ header.MergeParameters(SerializeParamsForCreate(transactionId, context.Config->Prefix, path, type, options));
+ return ParseGuidFromResponse(RetryRequestWithPolicy(retryPolicy, context, header).Response);
+}
+
+TNodeId Copy(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TCopyOptions& options)
+{
+ THttpHeader header("POST", "copy");
+ header.AddMutationId();
+ header.MergeParameters(SerializeParamsForCopy(transactionId, context.Config->Prefix, sourcePath, destinationPath, options));
+ return ParseGuidFromResponse(RetryRequestWithPolicy(retryPolicy, context, header).Response);
+}
+
+TNodeId Move(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TMoveOptions& options)
+{
+ THttpHeader header("POST", "move");
+ header.AddMutationId();
+ header.MergeParameters(NRawClient::SerializeParamsForMove(transactionId, context.Config->Prefix, sourcePath, destinationPath, options));
+ return ParseGuidFromResponse(RetryRequestWithPolicy(retryPolicy, context, header).Response);
+}
+
+void Remove(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TRemoveOptions& options)
+{
+ THttpHeader header("POST", "remove");
+ header.AddMutationId();
+ header.MergeParameters(SerializeParamsForRemove(transactionId, context.Config->Prefix, path, options));
+ RetryRequestWithPolicy(retryPolicy, context, header);
+}
+
+TNode::TListType List(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TListOptions& options)
+{
+ THttpHeader header("GET", "list");
+
+ TYPath updatedPath = AddPathPrefix(path, context.Config->Prefix);
+ // Translate "//" to "/"
+ // Translate "//some/constom/prefix/from/config/" to "//some/constom/prefix/from/config"
+ if (path.empty() && updatedPath.EndsWith('/')) {
+ updatedPath.pop_back();
+ }
+ header.MergeParameters(SerializeParamsForList(transactionId, context.Config->Prefix, updatedPath, options));
+ auto result = RetryRequestWithPolicy(retryPolicy, context, header);
+ return NodeFromYsonString(result.Response).AsList();
+}
+
+TNodeId Link(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& targetPath,
+ const TYPath& linkPath,
+ const TLinkOptions& options)
+{
+ THttpHeader header("POST", "link");
+ header.AddMutationId();
+ header.MergeParameters(SerializeParamsForLink(transactionId, context.Config->Prefix, targetPath, linkPath, options));
+ return ParseGuidFromResponse(RetryRequestWithPolicy(retryPolicy, context, header).Response);
+}
+
+TLockId Lock(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ ELockMode mode,
+ const TLockOptions& options)
+{
+ THttpHeader header("POST", "lock");
+ header.AddMutationId();
+ header.MergeParameters(SerializeParamsForLock(transactionId, context.Config->Prefix, path, mode, options));
+ return ParseGuidFromResponse(RetryRequestWithPolicy(retryPolicy, context, header).Response);
+}
+
+void Unlock(
+ IRequestRetryPolicyPtr retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TUnlockOptions& options)
+{
+ THttpHeader header("POST", "unlock");
+ header.AddMutationId();
+ header.MergeParameters(SerializeParamsForUnlock(transactionId, context.Config->Prefix, path, options));
+ RetryRequestWithPolicy(retryPolicy, context, header);
+}
+
+void Concatenate(
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TVector<TRichYPath>& sourcePaths,
+ const TRichYPath& destinationPath,
+ const TConcatenateOptions& options)
+{
+ THttpHeader header("POST", "concatenate");
+ header.AddMutationId();
+ header.MergeParameters(SerializeParamsForConcatenate(transactionId, context.Config->Prefix, sourcePaths, destinationPath, options));
+ RequestWithoutRetry(context, header);
+}
+
+void PingTx(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId)
+{
+ THttpHeader header("POST", "ping_tx");
+ header.MergeParameters(SerializeParamsForPingTx(transactionId));
+ TRequestConfig requestConfig;
+ requestConfig.HttpConfig = NHttpClient::THttpConfig{
+ .SocketTimeout = context.Config->PingTimeout
+ };
+ RetryRequestWithPolicy(retryPolicy, context, header, {}, requestConfig);
+}
+
+TOperationAttributes ParseOperationAttributes(const TNode& node)
+{
+ const auto& mapNode = node.AsMap();
+ TOperationAttributes result;
+
+ if (auto idNode = mapNode.FindPtr("id")) {
+ result.Id = GetGuid(idNode->AsString());
+ }
+
+ if (auto typeNode = mapNode.FindPtr("type")) {
+ result.Type = FromString<EOperationType>(typeNode->AsString());
+ } else if (auto operationTypeNode = mapNode.FindPtr("operation_type")) {
+ // COMPAT(levysotsky): "operation_type" is a deprecated synonim for "type".
+ // This branch should be removed when all clusters are updated.
+ result.Type = FromString<EOperationType>(operationTypeNode->AsString());
+ }
+
+ if (auto stateNode = mapNode.FindPtr("state")) {
+ result.State = stateNode->AsString();
+ // We don't use FromString here, because OS_IN_PROGRESS unites many states: "initializing", "running", etc.
+ if (*result.State == "completed") {
+ result.BriefState = EOperationBriefState::Completed;
+ } else if (*result.State == "aborted") {
+ result.BriefState = EOperationBriefState::Aborted;
+ } else if (*result.State == "failed") {
+ result.BriefState = EOperationBriefState::Failed;
+ } else {
+ result.BriefState = EOperationBriefState::InProgress;
+ }
+ }
+ if (auto authenticatedUserNode = mapNode.FindPtr("authenticated_user")) {
+ result.AuthenticatedUser = authenticatedUserNode->AsString();
+ }
+ if (auto startTimeNode = mapNode.FindPtr("start_time")) {
+ result.StartTime = TInstant::ParseIso8601(startTimeNode->AsString());
+ }
+ if (auto finishTimeNode = mapNode.FindPtr("finish_time")) {
+ result.FinishTime = TInstant::ParseIso8601(finishTimeNode->AsString());
+ }
+ auto briefProgressNode = mapNode.FindPtr("brief_progress");
+ if (briefProgressNode && briefProgressNode->HasKey("jobs")) {
+ result.BriefProgress.ConstructInPlace();
+ static auto load = [] (const TNode& item) {
+ // Backward compatibility with old YT versions
+ return item.IsInt64() ? item.AsInt64() : item["total"].AsInt64();
+ };
+ const auto& jobs = (*briefProgressNode)["jobs"];
+ result.BriefProgress->Aborted = load(jobs["aborted"]);
+ result.BriefProgress->Completed = load(jobs["completed"]);
+ result.BriefProgress->Running = jobs["running"].AsInt64();
+ result.BriefProgress->Total = jobs["total"].AsInt64();
+ result.BriefProgress->Failed = jobs["failed"].AsInt64();
+ result.BriefProgress->Lost = jobs["lost"].AsInt64();
+ result.BriefProgress->Pending = jobs["pending"].AsInt64();
+ }
+ if (auto briefSpecNode = mapNode.FindPtr("brief_spec")) {
+ result.BriefSpec = *briefSpecNode;
+ }
+ if (auto specNode = mapNode.FindPtr("spec")) {
+ result.Spec = *specNode;
+ }
+ if (auto fullSpecNode = mapNode.FindPtr("full_spec")) {
+ result.FullSpec = *fullSpecNode;
+ }
+ if (auto unrecognizedSpecNode = mapNode.FindPtr("unrecognized_spec")) {
+ result.UnrecognizedSpec = *unrecognizedSpecNode;
+ }
+ if (auto suspendedNode = mapNode.FindPtr("suspended")) {
+ result.Suspended = suspendedNode->AsBool();
+ }
+ if (auto resultNode = mapNode.FindPtr("result")) {
+ result.Result.ConstructInPlace();
+ auto error = TYtError((*resultNode)["error"]);
+ if (error.GetCode() != 0) {
+ result.Result->Error = std::move(error);
+ }
+ }
+ if (auto progressNode = mapNode.FindPtr("progress")) {
+ const auto& progressMap = progressNode->AsMap();
+ TMaybe<TInstant> buildTime;
+ if (auto buildTimeNode = progressMap.FindPtr("build_time")) {
+ buildTime = TInstant::ParseIso8601(buildTimeNode->AsString());
+ }
+ TJobStatistics jobStatistics;
+ if (auto jobStatisticsNode = progressMap.FindPtr("job_statistics")) {
+ jobStatistics = TJobStatistics(*jobStatisticsNode);
+ }
+ TJobCounters jobCounters;
+ if (auto jobCountersNode = progressMap.FindPtr("total_job_counter")) {
+ jobCounters = TJobCounters(*jobCountersNode);
+ }
+ result.Progress = TOperationProgress{
+ .JobStatistics = std::move(jobStatistics),
+ .JobCounters = std::move(jobCounters),
+ .BuildTime = buildTime,
+ };
+ }
+ if (auto eventsNode = mapNode.FindPtr("events")) {
+ result.Events.ConstructInPlace().reserve(eventsNode->Size());
+ for (const auto& eventNode : eventsNode->AsList()) {
+ result.Events->push_back(TOperationEvent{
+ eventNode["state"].AsString(),
+ TInstant::ParseIso8601(eventNode["time"].AsString()),
+ });
+ }
+ }
+ if (auto alertsNode = mapNode.FindPtr("alerts")) {
+ result.Alerts.ConstructInPlace();
+ for (const auto& [alertType, alertError] : alertsNode->AsMap()) {
+ result.Alerts->emplace(alertType, TYtError(alertError));
+ }
+ }
+
+ return result;
+}
+
+TOperationAttributes GetOperation(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TGetOperationOptions& options)
+{
+ THttpHeader header("GET", "get_operation");
+ header.MergeParameters(SerializeParamsForGetOperation(operationId, options));
+ auto result = RetryRequestWithPolicy(retryPolicy, context, header);
+ return ParseOperationAttributes(NodeFromYsonString(result.Response));
+}
+
+void AbortOperation(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId)
+{
+ THttpHeader header("POST", "abort_op");
+ header.AddMutationId();
+ header.MergeParameters(SerializeParamsForAbortOperation(operationId));
+ RetryRequestWithPolicy(retryPolicy, context, header);
+}
+
+void CompleteOperation(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId)
+{
+ THttpHeader header("POST", "complete_op");
+ header.AddMutationId();
+ header.MergeParameters(SerializeParamsForCompleteOperation(operationId));
+ RetryRequestWithPolicy(retryPolicy, context, header);
+}
+
+void SuspendOperation(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TSuspendOperationOptions& options)
+{
+ THttpHeader header("POST", "suspend_op");
+ header.AddMutationId();
+ header.MergeParameters(SerializeParamsForSuspendOperation(operationId, options));
+ RetryRequestWithPolicy(retryPolicy, context, header);
+}
+
+void ResumeOperation(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TResumeOperationOptions& options)
+{
+ THttpHeader header("POST", "resume_op");
+ header.AddMutationId();
+ header.MergeParameters(SerializeParamsForResumeOperation(operationId, options));
+ RetryRequestWithPolicy(retryPolicy, context, header);
+}
+
+template <typename TKey>
+static THashMap<TKey, i64> GetCounts(const TNode& countsNode)
+{
+ THashMap<TKey, i64> counts;
+ for (const auto& entry : countsNode.AsMap()) {
+ counts.emplace(FromString<TKey>(entry.first), entry.second.AsInt64());
+ }
+ return counts;
+}
+
+TListOperationsResult ListOperations(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TListOperationsOptions& options)
+{
+ THttpHeader header("GET", "list_operations");
+ header.MergeParameters(SerializeParamsForListOperations(options));
+ auto responseInfo = RetryRequestWithPolicy(retryPolicy, context, header);
+ auto resultNode = NodeFromYsonString(responseInfo.Response);
+
+ TListOperationsResult result;
+ for (const auto& operationNode : resultNode["operations"].AsList()) {
+ result.Operations.push_back(ParseOperationAttributes(operationNode));
+ }
+
+ if (resultNode.HasKey("pool_counts")) {
+ result.PoolCounts = GetCounts<TString>(resultNode["pool_counts"]);
+ }
+ if (resultNode.HasKey("user_counts")) {
+ result.UserCounts = GetCounts<TString>(resultNode["user_counts"]);
+ }
+ if (resultNode.HasKey("type_counts")) {
+ result.TypeCounts = GetCounts<EOperationType>(resultNode["type_counts"]);
+ }
+ if (resultNode.HasKey("state_counts")) {
+ result.StateCounts = GetCounts<TString>(resultNode["state_counts"]);
+ }
+ if (resultNode.HasKey("failed_jobs_count")) {
+ result.WithFailedJobsCount = resultNode["failed_jobs_count"].AsInt64();
+ }
+
+ result.Incomplete = resultNode["incomplete"].AsBool();
+
+ return result;
+}
+
+void UpdateOperationParameters(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TUpdateOperationParametersOptions& options)
+{
+ THttpHeader header("POST", "update_op_parameters");
+ header.MergeParameters(SerializeParamsForUpdateOperationParameters(operationId, options));
+ RetryRequestWithPolicy(retryPolicy, context, header);
+}
+
+TJobAttributes ParseJobAttributes(const TNode& node)
+{
+ const auto& mapNode = node.AsMap();
+ TJobAttributes result;
+
+ // Currently "get_job" returns "job_id" field and "list_jobs" returns "id" field.
+ auto idNode = mapNode.FindPtr("id");
+ if (!idNode) {
+ idNode = mapNode.FindPtr("job_id");
+ }
+ if (idNode) {
+ result.Id = GetGuid(idNode->AsString());
+ }
+
+ if (auto typeNode = mapNode.FindPtr("type")) {
+ result.Type = FromString<EJobType>(typeNode->AsString());
+ }
+ if (auto stateNode = mapNode.FindPtr("state")) {
+ result.State = FromString<EJobState>(stateNode->AsString());
+ }
+ if (auto addressNode = mapNode.FindPtr("address")) {
+ result.Address = addressNode->AsString();
+ }
+ if (auto taskNameNode = mapNode.FindPtr("task_name")) {
+ result.TaskName = taskNameNode->AsString();
+ }
+ if (auto startTimeNode = mapNode.FindPtr("start_time")) {
+ result.StartTime = TInstant::ParseIso8601(startTimeNode->AsString());
+ }
+ if (auto finishTimeNode = mapNode.FindPtr("finish_time")) {
+ result.FinishTime = TInstant::ParseIso8601(finishTimeNode->AsString());
+ }
+ if (auto progressNode = mapNode.FindPtr("progress")) {
+ result.Progress = progressNode->AsDouble();
+ }
+ if (auto stderrSizeNode = mapNode.FindPtr("stderr_size")) {
+ result.StderrSize = stderrSizeNode->AsUint64();
+ }
+ if (auto errorNode = mapNode.FindPtr("error")) {
+ result.Error.ConstructInPlace(*errorNode);
+ }
+ if (auto briefStatisticsNode = mapNode.FindPtr("brief_statistics")) {
+ result.BriefStatistics = *briefStatisticsNode;
+ }
+ if (auto inputPathsNode = mapNode.FindPtr("input_paths")) {
+ const auto& inputPathNodesList = inputPathsNode->AsList();
+ result.InputPaths.ConstructInPlace();
+ result.InputPaths->reserve(inputPathNodesList.size());
+ for (const auto& inputPathNode : inputPathNodesList) {
+ TRichYPath path;
+ Deserialize(path, inputPathNode);
+ result.InputPaths->push_back(std::move(path));
+ }
+ }
+ if (auto coreInfosNode = mapNode.FindPtr("core_infos")) {
+ const auto& coreInfoNodesList = coreInfosNode->AsList();
+ result.CoreInfos.ConstructInPlace();
+ result.CoreInfos->reserve(coreInfoNodesList.size());
+ for (const auto& coreInfoNode : coreInfoNodesList) {
+ TCoreInfo coreInfo;
+ coreInfo.ProcessId = coreInfoNode["process_id"].AsInt64();
+ coreInfo.ExecutableName = coreInfoNode["executable_name"].AsString();
+ if (coreInfoNode.HasKey("size")) {
+ coreInfo.Size = coreInfoNode["size"].AsUint64();
+ }
+ if (coreInfoNode.HasKey("error")) {
+ coreInfo.Error.ConstructInPlace(coreInfoNode["error"]);
+ }
+ result.CoreInfos->push_back(std::move(coreInfo));
+ }
+ }
+ return result;
+}
+
+TJobAttributes GetJob(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobOptions& options)
+{
+ THttpHeader header("GET", "get_job");
+ header.MergeParameters(SerializeParamsForGetJob(operationId, jobId, options));
+ auto responseInfo = RetryRequestWithPolicy(retryPolicy, context, header);
+ auto resultNode = NodeFromYsonString(responseInfo.Response);
+ return ParseJobAttributes(resultNode);
+}
+
+TListJobsResult ListJobs(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TListJobsOptions& options)
+{
+ THttpHeader header("GET", "list_jobs");
+ header.MergeParameters(SerializeParamsForListJobs(operationId, options));
+ auto responseInfo = RetryRequestWithPolicy(retryPolicy, context, header);
+ auto resultNode = NodeFromYsonString(responseInfo.Response);
+
+ TListJobsResult result;
+
+ const auto& jobNodesList = resultNode["jobs"].AsList();
+ result.Jobs.reserve(jobNodesList.size());
+ for (const auto& jobNode : jobNodesList) {
+ result.Jobs.push_back(ParseJobAttributes(jobNode));
+ }
+
+ if (resultNode.HasKey("cypress_job_count") && !resultNode["cypress_job_count"].IsNull()) {
+ result.CypressJobCount = resultNode["cypress_job_count"].AsInt64();
+ }
+ if (resultNode.HasKey("controller_agent_job_count") && !resultNode["controller_agent_job_count"].IsNull()) {
+ result.ControllerAgentJobCount = resultNode["scheduler_job_count"].AsInt64();
+ }
+ if (resultNode.HasKey("archive_job_count") && !resultNode["archive_job_count"].IsNull()) {
+ result.ArchiveJobCount = resultNode["archive_job_count"].AsInt64();
+ }
+
+ return result;
+}
+
+class TResponseReader
+ : public IFileReader
+{
+public:
+ TResponseReader(const TClientContext& context, THttpHeader header)
+ {
+ if (context.ServiceTicketAuth) {
+ header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket());
+ } else {
+ header.SetToken(context.Token);
+ }
+
+ auto hostName = GetProxyForHeavyRequest(context);
+ auto requestId = CreateGuidAsString();
+
+ Response_ = context.HttpClient->Request(GetFullUrl(hostName, context, header), requestId, header);
+ ResponseStream_ = Response_->GetResponseStream();
+ }
+
+private:
+ size_t DoRead(void* buf, size_t len) override
+ {
+ return ResponseStream_->Read(buf, len);
+ }
+
+ size_t DoSkip(size_t len) override
+ {
+ return ResponseStream_->Skip(len);
+ }
+
+private:
+ THttpRequest Request_;
+ NHttpClient::IHttpResponsePtr Response_;
+ IInputStream* ResponseStream_;
+};
+
+IFileReaderPtr GetJobInput(
+ const TClientContext& context,
+ const TJobId& jobId,
+ const TGetJobInputOptions& /* options */)
+{
+ THttpHeader header("GET", "get_job_input");
+ header.AddParameter("job_id", GetGuidAsString(jobId));
+ return new TResponseReader(context, std::move(header));
+}
+
+IFileReaderPtr GetJobFailContext(
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobFailContextOptions& /* options */)
+{
+ THttpHeader header("GET", "get_job_fail_context");
+ header.AddOperationId(operationId);
+ header.AddParameter("job_id", GetGuidAsString(jobId));
+ return new TResponseReader(context, std::move(header));
+}
+
+TString GetJobStderrWithRetries(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobStderrOptions& /* options */)
+{
+ THttpHeader header("GET", "get_job_stderr");
+ header.AddOperationId(operationId);
+ header.AddParameter("job_id", GetGuidAsString(jobId));
+ TRequestConfig config;
+ config.IsHeavy = true;
+ auto responseInfo = RetryRequestWithPolicy(retryPolicy, context, header, {}, config);
+ return responseInfo.Response;
+}
+
+IFileReaderPtr GetJobStderr(
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobStderrOptions& /* options */)
+{
+ THttpHeader header("GET", "get_job_stderr");
+ header.AddOperationId(operationId);
+ header.AddParameter("job_id", GetGuidAsString(jobId));
+ return new TResponseReader(context, std::move(header));
+}
+
+TMaybe<TYPath> GetFileFromCache(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TGetFileFromCacheOptions& options)
+{
+ THttpHeader header("GET", "get_file_from_cache");
+ header.MergeParameters(SerializeParamsForGetFileFromCache(transactionId, md5Signature, cachePath, options));
+ auto responseInfo = RetryRequestWithPolicy(retryPolicy, context, header);
+ auto path = NodeFromYsonString(responseInfo.Response).AsString();
+ return path.empty() ? Nothing() : TMaybe<TYPath>(path);
+}
+
+TYPath PutFileToCache(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& filePath,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TPutFileToCacheOptions& options)
+{
+ THttpHeader header("POST", "put_file_to_cache");
+ header.MergeParameters(SerializeParamsForPutFileToCache(transactionId, context.Config->Prefix, filePath, md5Signature, cachePath, options));
+ auto result = RetryRequestWithPolicy(retryPolicy, context, header);
+ return NodeFromYsonString(result.Response).AsString();
+}
+
+TNode::TListType SkyShareTable(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const std::vector<TYPath>& tablePaths,
+ const TSkyShareTableOptions& options)
+{
+ THttpHeader header("POST", "api/v1/share", /*IsApi*/ false);
+
+ auto proxyName = context.ServerName.substr(0, context.ServerName.find('.'));
+
+ auto host = context.Config->SkynetApiHost;
+ if (host == "") {
+ host = "skynet." + proxyName + ".yt.yandex.net";
+ }
+
+ header.MergeParameters(SerializeParamsForSkyShareTable(proxyName, context.Config->Prefix, tablePaths, options));
+ TClientContext skyApiHost({ .ServerName = host, .HttpClient = NHttpClient::CreateDefaultHttpClient() });
+ TResponseInfo response = {};
+
+ // As documented at https://wiki.yandex-team.ru/yt/userdoc/blob_tables/#shag3.sozdajomrazdachu
+ // first request returns HTTP status code 202 (Accepted). And we need retrying until we have 200 (OK).
+ while (response.HttpCode != 200) {
+ response = RetryRequestWithPolicy(retryPolicy, skyApiHost, header, "");
+ TWaitProxy::Get()->Sleep(TDuration::Seconds(5));
+ }
+
+ if (options.KeyColumns_) {
+ return NodeFromJsonString(response.Response)["torrents"].AsList();
+ } else {
+ TNode torrent;
+
+ torrent["key"] = TNode::CreateList();
+ torrent["rbtorrent"] = response.Response;
+
+ return TNode::TListType{ torrent };
+ }
+}
+
+TCheckPermissionResponse ParseCheckPermissionResponse(const TNode& node)
+{
+ auto parseSingleResult = [] (const TNode::TMapType& node) {
+ TCheckPermissionResult result;
+ result.Action = ::FromString<ESecurityAction>(node.at("action").AsString());
+ if (auto objectId = node.FindPtr("object_id")) {
+ result.ObjectId = GetGuid(objectId->AsString());
+ }
+ if (auto objectName = node.FindPtr("object_name")) {
+ result.ObjectName = objectName->AsString();
+ }
+ if (auto subjectId = node.FindPtr("subject_id")) {
+ result.SubjectId = GetGuid(subjectId->AsString());
+ }
+ if (auto subjectName = node.FindPtr("subject_name")) {
+ result.SubjectName = subjectName->AsString();
+ }
+ return result;
+ };
+
+ const auto& mapNode = node.AsMap();
+ TCheckPermissionResponse result;
+ static_cast<TCheckPermissionResult&>(result) = parseSingleResult(mapNode);
+ if (auto columns = mapNode.FindPtr("columns")) {
+ result.Columns.reserve(columns->AsList().size());
+ for (const auto& columnNode : columns->AsList()) {
+ result.Columns.push_back(parseSingleResult(columnNode.AsMap()));
+ }
+ }
+ return result;
+}
+
+TCheckPermissionResponse CheckPermission(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TString& user,
+ EPermission permission,
+ const TYPath& path,
+ const TCheckPermissionOptions& options)
+{
+ THttpHeader header("GET", "check_permission");
+ header.MergeParameters(SerializeParamsForCheckPermission(user, permission, context.Config->Prefix, path, options));
+ auto response = RetryRequestWithPolicy(retryPolicy, context, header);
+ return ParseCheckPermissionResponse(NodeFromYsonString(response.Response));
+}
+
+TVector<TTabletInfo> GetTabletInfos(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TYPath& path,
+ const TVector<int>& tabletIndexes,
+ const TGetTabletInfosOptions& options)
+{
+ THttpHeader header("POST", "api/v4/get_tablet_infos", false);
+ header.MergeParameters(SerializeParamsForGetTabletInfos(context.Config->Prefix, path, tabletIndexes, options));
+ auto response = RetryRequestWithPolicy(retryPolicy, context, header);
+ TVector<TTabletInfo> result;
+ Deserialize(result, *NodeFromYsonString(response.Response).AsMap().FindPtr("tablets"));
+ return result;
+}
+
+TVector<TTableColumnarStatistics> GetTableColumnarStatistics(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TVector<TRichYPath>& paths,
+ const TGetTableColumnarStatisticsOptions& options)
+{
+ THttpHeader header("GET", "get_table_columnar_statistics");
+ header.MergeParameters(SerializeParamsForGetTableColumnarStatistics(transactionId, paths, options));
+ TRequestConfig config;
+ config.IsHeavy = true;
+ auto requestResult = RetryRequestWithPolicy(retryPolicy, context, header, {}, config);
+ auto response = NodeFromYsonString(requestResult.Response);
+ TVector<TTableColumnarStatistics> result;
+ Deserialize(result, response);
+ return result;
+}
+
+TMultiTablePartitions GetTablePartitions(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TVector<TRichYPath>& paths,
+ const TGetTablePartitionsOptions& options)
+{
+ THttpHeader header("GET", "partition_tables");
+ header.MergeParameters(SerializeParamsForGetTablePartitions(transactionId, paths, options));
+ TRequestConfig config;
+ config.IsHeavy = true;
+ auto requestResult = RetryRequestWithPolicy(retryPolicy, context, header, {}, config);
+ auto response = NodeFromYsonString(requestResult.Response);
+ TMultiTablePartitions result;
+ Deserialize(result, response);
+ return result;
+}
+
+TRichYPath CanonizeYPath(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TRichYPath& path)
+{
+ return CanonizeYPaths(retryPolicy, context, {path}).front();
+}
+
+TVector<TRichYPath> CanonizeYPaths(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TVector<TRichYPath>& paths)
+{
+ TRawBatchRequest batch(context.Config);
+ TVector<NThreading::TFuture<TRichYPath>> futures;
+ futures.reserve(paths.size());
+ for (int i = 0; i < static_cast<int>(paths.size()); ++i) {
+ futures.push_back(batch.CanonizeYPath(paths[i]));
+ }
+ ExecuteBatch(retryPolicy, context, batch, TExecuteBatchOptions{});
+ TVector<TRichYPath> result;
+ result.reserve(futures.size());
+ for (auto& future : futures) {
+ result.push_back(future.ExtractValueSync());
+ }
+ return result;
+}
+
+void AlterTable(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TAlterTableOptions& options)
+{
+ THttpHeader header("POST", "alter_table");
+ header.AddMutationId();
+ header.MergeParameters(SerializeParamsForAlterTable(transactionId, context.Config->Prefix, path, options));
+ RetryRequestWithPolicy(retryPolicy, context, header);
+}
+
+void AlterTableReplica(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TReplicaId& replicaId,
+ const TAlterTableReplicaOptions& options)
+{
+ THttpHeader header("POST", "alter_table_replica");
+ header.AddMutationId();
+ header.MergeParameters(NRawClient::SerializeParamsForAlterTableReplica(replicaId, options));
+ RetryRequestWithPolicy(retryPolicy, context, header);
+}
+
+void DeleteRows(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TYPath& path,
+ const TNode::TListType& keys,
+ const TDeleteRowsOptions& options)
+{
+ THttpHeader header("PUT", "delete_rows");
+ header.SetInputFormat(TFormat::YsonBinary());
+ header.MergeParameters(NRawClient::SerializeParametersForDeleteRows(context.Config->Prefix, path, options));
+
+ auto body = NodeListToYsonString(keys);
+ TRequestConfig requestConfig;
+ requestConfig.IsHeavy = true;
+ RetryRequestWithPolicy(retryPolicy, context, header, body, requestConfig);
+}
+
+void FreezeTable(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TYPath& path,
+ const TFreezeTableOptions& options)
+{
+ THttpHeader header("POST", "freeze_table");
+ header.MergeParameters(SerializeParamsForFreezeTable(context.Config->Prefix, path, options));
+ RetryRequestWithPolicy(retryPolicy, context, header);
+}
+
+void UnfreezeTable(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TYPath& path,
+ const TUnfreezeTableOptions& options)
+{
+ THttpHeader header("POST", "unfreeze_table");
+ header.MergeParameters(SerializeParamsForUnfreezeTable(context.Config->Prefix, path, options));
+ RetryRequestWithPolicy(retryPolicy, context, header);
+}
+
+void AbortTransaction(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId)
+{
+ THttpHeader header("POST", "abort_tx");
+ header.AddMutationId();
+ header.MergeParameters(NRawClient::SerializeParamsForAbortTransaction(transactionId));
+ RetryRequestWithPolicy(retryPolicy, context, header);
+}
+
+void CommitTransaction(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId)
+{
+ THttpHeader header("POST", "commit_tx");
+ header.AddMutationId();
+ header.MergeParameters(NRawClient::SerializeParamsForCommitTransaction(transactionId));
+ RetryRequestWithPolicy(retryPolicy, context, header);
+}
+
+TTransactionId StartTransaction(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& parentTransactionId,
+ const TStartTransactionOptions& options)
+{
+ THttpHeader header("POST", "start_tx");
+ header.AddMutationId();
+ header.MergeParameters(NRawClient::SerializeParamsForStartTransaction(parentTransactionId, context.Config->TxTimeout, options));
+ return ParseGuidFromResponse(RetryRequestWithPolicy(retryPolicy, context, header).Response);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail::NRawClient
diff --git a/yt/cpp/mapreduce/raw_client/raw_requests.h b/yt/cpp/mapreduce/raw_client/raw_requests.h
new file mode 100644
index 0000000000..05fcbade76
--- /dev/null
+++ b/yt/cpp/mapreduce/raw_client/raw_requests.h
@@ -0,0 +1,397 @@
+#pragma once
+
+#include "raw_batch_request.h"
+
+#include <yt/cpp/mapreduce/common/fwd.h>
+#include <yt/cpp/mapreduce/http/context.h>
+#include <yt/cpp/mapreduce/interface/client_method_options.h>
+#include <yt/cpp/mapreduce/interface/operation.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class IRequestRetryPolicy;
+struct TClientContext;
+struct TExecuteBatchOptions;
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail::NRawClient {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TOperationAttributes ParseOperationAttributes(const TNode& node);
+
+TCheckPermissionResponse ParseCheckPermissionResponse(const TNode& node);
+
+////////////////////////////////////////////////////////////////////////////////
+
+//
+// marks `batchRequest' as executed
+void ExecuteBatch(
+ IRequestRetryPolicyPtr retryPolicy,
+ const TClientContext& context,
+ TRawBatchRequest& batchRequest,
+ const TExecuteBatchOptions& options = TExecuteBatchOptions());
+
+//
+// Cypress
+//
+
+TNode Get(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TGetOptions& options = TGetOptions());
+
+TNode TryGet(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TGetOptions& options);
+
+void Set(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TNode& value,
+ const TSetOptions& options = TSetOptions());
+
+void MultisetAttributes(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TNode::TMapType& value,
+ const TMultisetAttributesOptions& options = TMultisetAttributesOptions());
+
+bool Exists(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TExistsOptions& options = TExistsOptions());
+
+TNodeId Create(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const ENodeType& type,
+ const TCreateOptions& options = TCreateOptions());
+
+TNodeId Copy(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TCopyOptions& options = TCopyOptions());
+
+TNodeId Move(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TMoveOptions& options = TMoveOptions());
+
+void Remove(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TRemoveOptions& options = TRemoveOptions());
+
+TNode::TListType List(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TListOptions& options = TListOptions());
+
+TNodeId Link(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& targetPath,
+ const TYPath& linkPath,
+ const TLinkOptions& options = TLinkOptions());
+
+TLockId Lock(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ ELockMode mode,
+ const TLockOptions& options = TLockOptions());
+
+void Unlock(
+ IRequestRetryPolicyPtr retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TUnlockOptions& options = TUnlockOptions());
+
+void Concatenate(
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TVector<TRichYPath>& sourcePaths,
+ const TRichYPath& destinationPath,
+ const TConcatenateOptions& options = TConcatenateOptions());
+
+//
+// Transactions
+//
+
+void PingTx(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId);
+
+//
+// Operations
+//
+
+TOperationAttributes GetOperation(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TGetOperationOptions& options = TGetOperationOptions());
+
+void AbortOperation(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId);
+
+void CompleteOperation(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId);
+
+void SuspendOperation(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TSuspendOperationOptions& options = TSuspendOperationOptions());
+
+void ResumeOperation(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TResumeOperationOptions& options = TResumeOperationOptions());
+
+TListOperationsResult ListOperations(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TListOperationsOptions& options = TListOperationsOptions());
+
+void UpdateOperationParameters(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TUpdateOperationParametersOptions& options = TUpdateOperationParametersOptions());
+
+//
+// Jobs
+//
+
+TJobAttributes GetJob(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobOptions& options = TGetJobOptions());
+
+TListJobsResult ListJobs(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TListJobsOptions& options = TListJobsOptions());
+
+::TIntrusivePtr<IFileReader> GetJobInput(
+ const TClientContext& context,
+ const TJobId& jobId,
+ const TGetJobInputOptions& options = TGetJobInputOptions());
+
+::TIntrusivePtr<IFileReader> GetJobFailContext(
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobFailContextOptions& options = TGetJobFailContextOptions());
+
+TString GetJobStderrWithRetries(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobStderrOptions& /* options */ = TGetJobStderrOptions());
+
+::TIntrusivePtr<IFileReader> GetJobStderr(
+ const TClientContext& context,
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobStderrOptions& options = TGetJobStderrOptions());
+
+//
+// File cache
+//
+
+TMaybe<TYPath> GetFileFromCache(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TGetFileFromCacheOptions& options = TGetFileFromCacheOptions());
+
+TYPath PutFileToCache(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& filePath,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TPutFileToCacheOptions& options = TPutFileToCacheOptions());
+
+//
+// SkyShare
+//
+
+TNode::TListType SkyShareTable(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const std::vector<TYPath>& tablePaths,
+ const TSkyShareTableOptions& options);
+
+//
+// Misc
+//
+
+TCheckPermissionResponse CheckPermission(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TString& user,
+ EPermission permission,
+ const TYPath& path,
+ const TCheckPermissionOptions& options = TCheckPermissionOptions());
+
+TVector<TTabletInfo> GetTabletInfos(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TYPath& path,
+ const TVector<int>& tabletIndexes,
+ const TGetTabletInfosOptions& options);
+
+TVector<TTableColumnarStatistics> GetTableColumnarStatistics(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TVector<TRichYPath>& paths,
+ const TGetTableColumnarStatisticsOptions& options);
+
+TMultiTablePartitions GetTablePartitions(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TVector<TRichYPath>& paths,
+ const TGetTablePartitionsOptions& options);
+
+TRichYPath CanonizeYPath(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TRichYPath& path);
+
+TVector<TRichYPath> CanonizeYPaths(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TVector<TRichYPath>& paths);
+
+//
+// Tables
+//
+
+void AlterTable(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId,
+ const TYPath& path,
+ const TAlterTableOptions& options);
+
+void AlterTableReplica(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TReplicaId& replicaId,
+ const TAlterTableReplicaOptions& options);
+
+void DeleteRows(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TYPath& path,
+ const TNode::TListType& keys,
+ const TDeleteRowsOptions& options);
+
+void FreezeTable(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TYPath& path,
+ const TFreezeTableOptions& options);
+
+void UnfreezeTable(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TYPath& path,
+ const TUnfreezeTableOptions& options);
+
+
+// Transactions
+void AbortTransaction(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId);
+
+void CommitTransaction(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& transactionId);
+
+TTransactionId StartTransaction(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TTransactionId& parentId,
+ const TStartTransactionOptions& options);
+
+////////////////////////////////////////////////////////////////////////////////
+
+template<typename TSrc, typename TBatchAdder>
+auto BatchTransform(
+ const IRequestRetryPolicyPtr& retryPolicy,
+ const TClientContext& context,
+ const TSrc& src,
+ TBatchAdder batchAdder,
+ const TExecuteBatchOptions& executeBatchOptions = {})
+{
+ TRawBatchRequest batch(context.Config);
+ using TFuture = decltype(batchAdder(batch, *std::begin(src)));
+ TVector<TFuture> futures;
+ for (const auto& el : src) {
+ futures.push_back(batchAdder(batch, el));
+ }
+ ExecuteBatch(retryPolicy, context, batch, executeBatchOptions);
+ using TDst = decltype(futures[0].ExtractValueSync());
+ TVector<TDst> result;
+ result.reserve(std::size(src));
+ for (auto& future : futures) {
+ result.push_back(future.ExtractValueSync());
+ }
+ return result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NDetail::NRawClient
+} // namespace NYT
diff --git a/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.cpp b/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.cpp
new file mode 100644
index 0000000000..1936266d0d
--- /dev/null
+++ b/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.cpp
@@ -0,0 +1,873 @@
+#include "rpc_parameters_serialization.h"
+
+#include <yt/cpp/mapreduce/common/helpers.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+#include <yt/cpp/mapreduce/interface/client_method_options.h>
+#include <yt/cpp/mapreduce/interface/operation.h>
+#include <yt/cpp/mapreduce/interface/serialize.h>
+
+#include <library/cpp/yson/node/node.h>
+#include <library/cpp/yson/node/node_io.h>
+#include <library/cpp/yson/node/node_builder.h>
+
+#include <util/generic/guid.h>
+#include <util/string/cast.h>
+
+namespace NYT::NDetail::NRawClient {
+
+using ::ToString;
+
+////////////////////////////////////////////////////////////////////
+
+static void SetTransactionIdParam(TNode* node, const TTransactionId& transactionId)
+{
+ if (transactionId != TTransactionId()) {
+ (*node)["transaction_id"] = GetGuidAsString(transactionId);
+ }
+}
+
+static void SetOperationIdParam(TNode* node, const TOperationId& operationId)
+{
+ (*node)["operation_id"] = GetGuidAsString(operationId);
+}
+
+static void SetPathParam(TNode* node, const TString& pathPrefix, const TYPath& path)
+{
+ (*node)["path"] = AddPathPrefix(path, pathPrefix);
+}
+
+static TNode SerializeAttributeFilter(const TAttributeFilter& attributeFilter)
+{
+ TNode result = TNode::CreateList();
+ for (const auto& attribute : attributeFilter.Attributes_) {
+ result.Add(attribute);
+ }
+ return result;
+}
+
+static TNode SerializeAttributeFilter(const TOperationAttributeFilter& attributeFilter)
+{
+ TNode result = TNode::CreateList();
+ for (const auto& attribute : attributeFilter.Attributes_) {
+ result.Add(ToString(attribute));
+ }
+ return result;
+}
+
+template <typename TOptions>
+static void SetFirstLastTabletIndex(TNode* node, const TOptions& options)
+{
+ if (options.FirstTabletIndex_) {
+ (*node)["first_tablet_index"] = *options.FirstTabletIndex_;
+ }
+ if (options.LastTabletIndex_) {
+ (*node)["last_tablet_index"] = *options.LastTabletIndex_;
+ }
+}
+
+static TString GetDefaultTransactionTitle()
+{
+ const auto processState = TProcessState::Get();
+ TStringStream res;
+
+ res << "User transaction. Created by: " << processState->UserName << " on " << processState->FqdnHostName
+ << " client: " << processState->ClientVersion << " pid: " << processState->Pid;
+ if (!processState->CommandLine.empty()) {
+ res << " program: " << processState->CommandLine[0];
+ } else {
+ res << " command line is unknown probably NYT::Initialize was never called";
+ }
+
+#ifndef NDEBUG
+ res << " build: debug";
+#endif
+
+ return res.Str();
+}
+
+template <typename T>
+void SerializeMasterReadOptions(TNode* node, const TMasterReadOptions<T>& options)
+{
+ if (options.ReadFrom_) {
+ (*node)["read_from"] = ToString(*options.ReadFrom_);
+ }
+}
+
+////////////////////////////////////////////////////////////////////
+
+TNode SerializeParamsForCreate(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ ENodeType type,
+ const TCreateOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ SetPathParam(&result, pathPrefix, path);
+ result["recursive"] = options.Recursive_;
+ result["type"] = ToString(type);
+ result["ignore_existing"] = options.IgnoreExisting_;
+ result["force"] = options.Force_;
+ if (options.Attributes_) {
+ result["attributes"] = *options.Attributes_;
+ }
+ return result;
+}
+
+TNode SerializeParamsForRemove(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TRemoveOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ SetPathParam(&result, pathPrefix, path);
+ result["recursive"] = options.Recursive_;
+ result["force"] = options.Force_;
+ return result;
+}
+
+TNode SerializeParamsForExists(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TExistsOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ SetPathParam(&result, pathPrefix, path);
+ SerializeMasterReadOptions(&result, options);
+ return result;
+}
+
+TNode SerializeParamsForGet(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TGetOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ SetPathParam(&result, pathPrefix, path);
+ SerializeMasterReadOptions(&result, options);
+ if (options.AttributeFilter_) {
+ result["attributes"] = SerializeAttributeFilter(*options.AttributeFilter_);
+ }
+ if (options.MaxSize_) {
+ result["max_size"] = *options.MaxSize_;
+ }
+ return result;
+}
+
+TNode SerializeParamsForSet(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TSetOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ SetPathParam(&result, pathPrefix, path);
+ result["recursive"] = options.Recursive_;
+ if (options.Force_) {
+ result["force"] = *options.Force_;
+ }
+ return result;
+}
+
+TNode SerializeParamsForMultisetAttributes(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ [[maybe_unused]] const TMultisetAttributesOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ SetPathParam(&result, pathPrefix, path);
+ return result;
+}
+
+TNode SerializeParamsForList(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TListOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ SetPathParam(&result, pathPrefix, path);
+ SerializeMasterReadOptions(&result, options);
+ if (options.MaxSize_) {
+ result["max_size"] = *options.MaxSize_;
+ }
+ if (options.AttributeFilter_) {
+ result["attributes"] = SerializeAttributeFilter(*options.AttributeFilter_);
+ }
+ return result;
+}
+
+TNode SerializeParamsForCopy(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TCopyOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ result["source_path"] = AddPathPrefix(sourcePath, pathPrefix);
+ result["destination_path"] = AddPathPrefix(destinationPath, pathPrefix);
+ result["recursive"] = options.Recursive_;
+ result["force"] = options.Force_;
+ result["preserve_account"] = options.PreserveAccount_;
+ if (options.PreserveExpirationTime_) {
+ result["preserve_expiration_time"] = *options.PreserveExpirationTime_;
+ }
+ return result;
+}
+
+TNode SerializeParamsForMove(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TMoveOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ result["source_path"] = AddPathPrefix(sourcePath, pathPrefix);
+ result["destination_path"] = AddPathPrefix(destinationPath, pathPrefix);
+ result["recursive"] = options.Recursive_;
+ result["force"] = options.Force_;
+ result["preserve_account"] = options.PreserveAccount_;
+ if (options.PreserveExpirationTime_) {
+ result["preserve_expiration_time"] = *options.PreserveExpirationTime_;
+ }
+ return result;
+}
+
+TNode SerializeParamsForLink(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& targetPath,
+ const TYPath& linkPath,
+ const TLinkOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ result["target_path"] = AddPathPrefix(targetPath, pathPrefix);
+ result["link_path"] = AddPathPrefix(linkPath, pathPrefix);
+ result["recursive"] = options.Recursive_;
+ result["ignore_existing"] = options.IgnoreExisting_;
+ result["force"] = options.Force_;
+ if (options.Attributes_) {
+ result["attributes"] = *options.Attributes_;
+ }
+ return result;
+}
+
+TNode SerializeParamsForLock(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ ELockMode mode,
+ const TLockOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ SetPathParam(&result, pathPrefix, path);
+ result["mode"] = ToString(mode);
+ result["waitable"] = options.Waitable_;
+ if (options.AttributeKey_) {
+ result["attribute_key"] = *options.AttributeKey_;
+ }
+ if (options.ChildKey_) {
+ result["child_key"] = *options.ChildKey_;
+ }
+ return result;
+}
+
+TNode SerializeParamsForUnlock(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TUnlockOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ SetPathParam(&result, pathPrefix, path);
+ Y_UNUSED(options);
+ return result;
+}
+
+TNode SerializeParamsForConcatenate(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TVector<TRichYPath>& sourcePaths,
+ const TRichYPath& destinationPath,
+ const TConcatenateOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ {
+ auto actualDestination = destinationPath;
+ actualDestination.Path(AddPathPrefix(actualDestination.Path_, pathPrefix));
+ if (options.Append_) {
+ actualDestination.Append(*options.Append_);
+ }
+ result["destination_path"] = PathToNode(actualDestination);
+ }
+ auto& sourcePathsNode = result["source_paths"];
+ for (const auto& path : sourcePaths) {
+ auto actualSource = path;
+ actualSource.Path(AddPathPrefix(actualSource.Path_, pathPrefix));
+ sourcePathsNode.Add(PathToNode(actualSource));
+ }
+ return result;
+}
+
+TNode SerializeParamsForPingTx(
+ const TTransactionId& transactionId)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ return result;
+}
+
+TNode SerializeParamsForListOperations(
+ const TListOperationsOptions& options)
+{
+ TNode result = TNode::CreateMap();
+ if (options.FromTime_) {
+ result["from_time"] = ToString(*options.FromTime_);
+ }
+ if (options.ToTime_) {
+ result["to_time"] = ToString(*options.ToTime_);
+ }
+ if (options.CursorTime_) {
+ result["cursor_time"] = ToString(*options.CursorTime_);
+ }
+ if (options.CursorDirection_) {
+ result["cursor_direction"] = ToString(*options.CursorDirection_);
+ }
+ if (options.Pool_) {
+ result["pool"] = *options.Pool_;
+ }
+ if (options.Filter_) {
+ result["filter"] = *options.Filter_;
+ }
+ if (options.User_) {
+ result["user"] = *options.User_;
+ }
+ if (options.State_) {
+ result["state"] = *options.State_;
+ }
+ if (options.Type_) {
+ result["type"] = ToString(*options.Type_);
+ }
+ if (options.WithFailedJobs_) {
+ result["with_failed_jobs"] = *options.WithFailedJobs_;
+ }
+ if (options.IncludeCounters_) {
+ result["include_counters"] = *options.IncludeCounters_;
+ }
+ if (options.IncludeArchive_) {
+ result["include_archive"] = *options.IncludeArchive_;
+ }
+ if (options.Limit_) {
+ result["limit"] = *options.Limit_;
+ }
+ return result;
+}
+
+TNode SerializeParamsForGetOperation(
+ const TOperationId& operationId,
+ const TGetOperationOptions& options)
+{
+ TNode result;
+ SetOperationIdParam(&result, operationId);
+ if (options.AttributeFilter_) {
+ result["attributes"] = SerializeAttributeFilter(*options.AttributeFilter_);
+ }
+ return result;
+}
+
+TNode SerializeParamsForAbortOperation(const TOperationId& operationId)
+{
+ TNode result;
+ SetOperationIdParam(&result, operationId);
+ return result;
+}
+
+TNode SerializeParamsForCompleteOperation(const TOperationId& operationId)
+{
+ TNode result;
+ SetOperationIdParam(&result, operationId);
+ return result;
+}
+
+TNode SerializeParamsForSuspendOperation(
+ const TOperationId& operationId,
+ const TSuspendOperationOptions& options)
+{
+ TNode result;
+ SetOperationIdParam(&result, operationId);
+ if (options.AbortRunningJobs_) {
+ result["abort_running_jobs"] = *options.AbortRunningJobs_;
+ }
+ return result;
+}
+
+TNode SerializeParamsForResumeOperation(
+ const TOperationId& operationId,
+ const TResumeOperationOptions& options)
+{
+ TNode result;
+ SetOperationIdParam(&result, operationId);
+ Y_UNUSED(options);
+ return result;
+}
+
+TNode SerializeParamsForUpdateOperationParameters(
+ const TOperationId& operationId,
+ const TUpdateOperationParametersOptions& options)
+{
+ TNode result;
+ SetOperationIdParam(&result, operationId);
+ TNode& parameters = result["parameters"];
+ if (options.Pool_) {
+ parameters["pool"] = *options.Pool_;
+ }
+ if (options.Weight_) {
+ parameters["weight"] = *options.Weight_;
+ }
+ if (!options.Owners_.empty()) {
+ parameters["owners"] = TNode::CreateList();
+ for (const auto& owner : options.Owners_) {
+ parameters["owners"].Add(owner);
+ }
+ }
+ if (options.SchedulingOptionsPerPoolTree_) {
+ parameters["scheduling_options_per_pool_tree"] = TNode::CreateMap();
+ for (const auto& entry : options.SchedulingOptionsPerPoolTree_->Options_) {
+ auto schedulingOptionsNode = TNode::CreateMap();
+ const auto& schedulingOptions = entry.second;
+ if (schedulingOptions.Pool_) {
+ schedulingOptionsNode["pool"] = *schedulingOptions.Pool_;
+ }
+ if (schedulingOptions.Weight_) {
+ schedulingOptionsNode["weight"] = *schedulingOptions.Weight_;
+ }
+ if (schedulingOptions.ResourceLimits_) {
+ auto resourceLimitsNode = TNode::CreateMap();
+ const auto& resourceLimits = *schedulingOptions.ResourceLimits_;
+ if (resourceLimits.UserSlots_) {
+ resourceLimitsNode["user_slots"] = *resourceLimits.UserSlots_;
+ }
+ if (resourceLimits.Memory_) {
+ resourceLimitsNode["memory"] = *resourceLimits.Memory_;
+ }
+ if (resourceLimits.Cpu_) {
+ resourceLimitsNode["cpu"] = *resourceLimits.Cpu_;
+ }
+ if (resourceLimits.Network_) {
+ resourceLimitsNode["network"] = *resourceLimits.Network_;
+ }
+ schedulingOptionsNode["resource_limits"] = std::move(resourceLimitsNode);
+ }
+ parameters["scheduling_options_per_pool_tree"][entry.first] = std::move(schedulingOptionsNode);
+ }
+ }
+ return result;
+}
+
+TNode SerializeParamsForGetJob(
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobOptions& /* options */)
+{
+ TNode result;
+ SetOperationIdParam(&result, operationId);
+ result["job_id"] = GetGuidAsString(jobId);
+ return result;
+}
+
+TNode SerializeParamsForListJobs(
+ const TOperationId& operationId,
+ const TListJobsOptions& options)
+{
+ TNode result;
+ SetOperationIdParam(&result, operationId);
+
+ if (options.Type_) {
+ result["type"] = ToString(*options.Type_);
+ }
+ if (options.State_) {
+ result["state"] = ToString(*options.State_);
+ }
+ if (options.Address_) {
+ result["address"] = *options.Address_;
+ }
+ if (options.WithStderr_) {
+ result["with_stderr"] = *options.WithStderr_;
+ }
+ if (options.WithSpec_) {
+ result["with_spec"] = *options.WithSpec_;
+ }
+ if (options.WithFailContext_) {
+ result["with_fail_context"] = *options.WithFailContext_;
+ }
+
+ if (options.SortField_) {
+ result["sort_field"] = ToString(*options.SortField_);
+ }
+ if (options.SortOrder_) {
+ result["sort_order"] = ToString(*options.SortOrder_);
+ }
+
+ if (options.Offset_) {
+ result["offset"] = *options.Offset_;
+ }
+ if (options.Limit_) {
+ result["limit"] = *options.Limit_;
+ }
+
+ if (options.IncludeCypress_) {
+ result["include_cypress"] = *options.IncludeCypress_;
+ }
+ if (options.IncludeArchive_) {
+ result["include_archive"] = *options.IncludeArchive_;
+ }
+ if (options.IncludeControllerAgent_) {
+ result["include_controller_agent"] = *options.IncludeControllerAgent_;
+ }
+ return result;
+}
+
+TNode SerializeParametersForInsertRows(
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TInsertRowsOptions& options)
+{
+ TNode result;
+ SetPathParam(&result, pathPrefix, path);
+ if (options.Aggregate_) {
+ result["aggregate"] = *options.Aggregate_;
+ }
+ if (options.Update_) {
+ result["update"] = *options.Update_;
+ }
+ if (options.Atomicity_) {
+ result["atomicity"] = ToString(*options.Atomicity_);
+ }
+ if (options.Durability_) {
+ result["durability"] = ToString(*options.Durability_);
+ }
+ if (options.RequireSyncReplica_) {
+ result["require_sync_replica"] = *options.RequireSyncReplica_;
+ }
+ return result;
+}
+
+TNode SerializeParametersForDeleteRows(
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TDeleteRowsOptions& options)
+{
+ TNode result;
+ SetPathParam(&result, pathPrefix, path);
+ if (options.Atomicity_) {
+ result["atomicity"] = ToString(*options.Atomicity_);
+ }
+ if (options.Durability_) {
+ result["durability"] = ToString(*options.Durability_);
+ }
+ if (options.RequireSyncReplica_) {
+ result["require_sync_replica"] = *options.RequireSyncReplica_;
+ }
+ return result;
+}
+
+TNode SerializeParametersForTrimRows(
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TTrimRowsOptions& /* options*/)
+{
+ TNode result;
+ SetPathParam(&result, pathPrefix, path);
+ return result;
+}
+
+TNode SerializeParamsForParseYPath(const TRichYPath& path)
+{
+ TNode result;
+ result["path"] = PathToNode(path);
+ return result;
+}
+
+TNode SerializeParamsForEnableTableReplica(
+ const TReplicaId& replicaId)
+{
+ TNode result;
+ result["replica_id"] = GetGuidAsString(replicaId);
+ return result;
+}
+
+TNode SerializeParamsForDisableTableReplica(
+ const TReplicaId& replicaId)
+{
+ TNode result;
+ result["replica_id"] = GetGuidAsString(replicaId);
+ return result;
+}
+
+TNode SerializeParamsForAlterTableReplica(const TReplicaId& replicaId, const TAlterTableReplicaOptions& options)
+{
+ TNode result;
+ result["replica_id"] = GetGuidAsString(replicaId);
+ if (options.Enabled_) {
+ result["enabled"] = *options.Enabled_;
+ }
+ if (options.Mode_) {
+ result["mode"] = ToString(*options.Mode_);
+ }
+ return result;
+}
+
+TNode SerializeParamsForFreezeTable(
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TFreezeTableOptions& options)
+{
+ TNode result;
+ SetPathParam(&result, pathPrefix, path);
+ SetFirstLastTabletIndex(&result, options);
+ return result;
+}
+
+TNode SerializeParamsForUnfreezeTable(
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TUnfreezeTableOptions& options)
+{
+ TNode result;
+ SetPathParam(&result, pathPrefix, path);
+ SetFirstLastTabletIndex(&result, options);
+ return result;
+}
+
+TNode SerializeParamsForAlterTable(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TAlterTableOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ SetPathParam(&result, pathPrefix, path);
+ if (options.Dynamic_) {
+ result["dynamic"] = *options.Dynamic_;
+ }
+ if (options.Schema_) {
+ TNode schema;
+ {
+ TNodeBuilder builder(&schema);
+ Serialize(*options.Schema_, &builder);
+ }
+ result["schema"] = schema;
+ }
+ if (options.UpstreamReplicaId_) {
+ result["upstream_replica_id"] = GetGuidAsString(*options.UpstreamReplicaId_);
+ }
+ return result;
+}
+
+TNode SerializeParamsForGetTableColumnarStatistics(
+ const TTransactionId& transactionId,
+ const TVector<TRichYPath>& paths,
+ const TGetTableColumnarStatisticsOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ for (const auto& path : paths) {
+ result["paths"].Add(PathToNode(path));
+ }
+ if (options.FetcherMode_) {
+ result["fetcher_mode"] = ToString(*options.FetcherMode_);
+ }
+ return result;
+}
+
+TNode SerializeParamsForGetTablePartitions(
+ const TTransactionId& transactionId,
+ const TVector<TRichYPath>& paths,
+ const TGetTablePartitionsOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ for (const auto& path : paths) {
+ result["paths"].Add(PathToNode(path));
+ }
+ result["partition_mode"] = ToString(options.PartitionMode_);
+ result["data_weight_per_partition"] = options.DataWeightPerPartition_;
+ if (options.MaxPartitionCount_) {
+ result["max_partition_count"] = *options.MaxPartitionCount_;
+ }
+ result["adjust_data_weight_per_partition"] = options.AdjustDataWeightPerPartition_;
+ return result;
+}
+
+TNode SerializeParamsForGetFileFromCache(
+ const TTransactionId& transactionId,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TGetFileFromCacheOptions&)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ result["md5"] = md5Signature;
+ result["cache_path"] = cachePath;
+ return result;
+}
+
+TNode SerializeParamsForPutFileToCache(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& filePath,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TPutFileToCacheOptions& options)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ SetPathParam(&result, pathPrefix, filePath);
+ result["md5"] = md5Signature;
+ result["cache_path"] = cachePath;
+ if (options.PreserveExpirationTimeout_) {
+ result["preserve_expiration_timeout"] = *options.PreserveExpirationTimeout_;
+ }
+ return result;
+}
+
+TNode SerializeParamsForSkyShareTable(
+ const TString& serverName,
+ const TString& pathPrefix,
+ const std::vector<TYPath>& tablePaths,
+ const TSkyShareTableOptions& options)
+{
+ TNode result;
+
+ if (tablePaths.size() == 1) {
+ SetPathParam(&result, pathPrefix, tablePaths[0]);
+ } else {
+ auto pathList = TNode::CreateList();
+ for (const auto& p : tablePaths) {
+ pathList.Add(AddPathPrefix(p, pathPrefix));
+ }
+ result["paths"] = pathList;
+ }
+ result["cluster"] = serverName;
+
+ if (options.KeyColumns_) {
+ auto keyColumnsList = TNode::CreateList();
+ for (const auto& s : options.KeyColumns_->Parts_) {
+ if (s.empty()) {
+ continue;
+ }
+ keyColumnsList.Add(s);
+ }
+ result["key_columns"] = keyColumnsList;
+ }
+
+ if (options.EnableFastbone_) {
+ result["enable_fastbone"] = *options.EnableFastbone_;
+ }
+
+ return result;
+}
+
+TNode SerializeParamsForCheckPermission(
+ const TString& user,
+ EPermission permission,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TCheckPermissionOptions& options)
+{
+ TNode result;
+ SetPathParam(&result, pathPrefix, path);
+ result["path"] = path;
+ result["user"] = user;
+ result["permission"] = ToString(permission);
+ if (!options.Columns_.empty()) {
+ result["columns"] = TNode::CreateList();
+ result["columns"].AsList().assign(options.Columns_.begin(), options.Columns_.end());
+ }
+ return result;
+}
+
+TNode SerializeParamsForGetTabletInfos(
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TVector<int>& tabletIndexes,
+ const TGetTabletInfosOptions& options)
+{
+ Y_UNUSED(options);
+ TNode result;
+ SetPathParam(&result, pathPrefix, path);
+ result["tablet_indexes"] = TNode::CreateList();
+ result["tablet_indexes"].AsList().assign(tabletIndexes.begin(), tabletIndexes.end());
+ return result;
+}
+
+TNode SerializeParamsForAbortTransaction(const TTransactionId& transactionId)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ return result;
+}
+
+TNode SerializeParamsForCommitTransaction(const TTransactionId& transactionId)
+{
+ TNode result;
+ SetTransactionIdParam(&result, transactionId);
+ return result;
+}
+
+TNode SerializeParamsForStartTransaction(
+ const TTransactionId& parentTransactionId,
+ TDuration txTimeout,
+ const TStartTransactionOptions& options)
+{
+ TNode result;
+
+ SetTransactionIdParam(&result, parentTransactionId);
+ result["timeout"] = static_cast<i64>((options.Timeout_.GetOrElse(txTimeout).MilliSeconds()));
+ if (options.Deadline_) {
+ result["deadline"] = ToString(options.Deadline_);
+ }
+
+ if (options.PingAncestors_) {
+ result["ping_ancestor_transactions"] = true;
+ }
+
+ if (options.Attributes_ && !options.Attributes_->IsMap()) {
+ ythrow TApiUsageError() << "Attributes must be a Map node";
+ }
+
+ auto attributes = options.Attributes_.GetOrElse(TNode::CreateMap());
+ if (options.Title_) {
+ attributes["title"] = *options.Title_;
+ } else if (!attributes.HasKey("title")) {
+ attributes["title"] = GetDefaultTransactionTitle();
+ }
+ result["attributes"] = attributes;
+
+ return result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail::NRawClient
diff --git a/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h b/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h
new file mode 100644
index 0000000000..a60e3ea369
--- /dev/null
+++ b/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h
@@ -0,0 +1,231 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/fwd.h>
+#include <yt/cpp/mapreduce/interface/client_method_options.h>
+
+namespace NYT::NDetail::NRawClient {
+
+////////////////////////////////////////////////////////////////////
+
+TNode SerializeParamsForCreate(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ ENodeType type,
+ const TCreateOptions& options);
+
+TNode SerializeParamsForRemove(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TRemoveOptions& options);
+
+TNode SerializeParamsForExists(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TExistsOptions& options);
+
+TNode SerializeParamsForGet(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TGetOptions& options);
+
+TNode SerializeParamsForSet(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TSetOptions& options);
+
+TNode SerializeParamsForMultisetAttributes(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TMultisetAttributesOptions& options);
+
+TNode SerializeParamsForList(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TListOptions& options);
+
+TNode SerializeParamsForCopy(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TCopyOptions& options);
+
+TNode SerializeParamsForMove(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& sourcePath,
+ const TYPath& destinationPath,
+ const TMoveOptions& options);
+
+TNode SerializeParamsForLink(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& targetPath,
+ const TYPath& linkPath,
+ const TLinkOptions& options);
+
+TNode SerializeParamsForLock(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ ELockMode mode,
+ const TLockOptions& options);
+
+TNode SerializeParamsForUnlock(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TUnlockOptions& options);
+
+TNode SerializeParamsForConcatenate(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TVector<TRichYPath>& sourcePaths,
+ const TRichYPath& destinationPath,
+ const TConcatenateOptions& options);
+
+TNode SerializeParamsForPingTx(
+ const TTransactionId& transactionId);
+
+TNode SerializeParamsForGetOperation(
+ const TOperationId& operationId,
+ const TGetOperationOptions& options);
+
+TNode SerializeParamsForAbortOperation(
+ const TOperationId& operationId);
+
+TNode SerializeParamsForCompleteOperation(
+ const TOperationId& operationId);
+
+TNode SerializeParamsForSuspendOperation(
+ const TOperationId& operationId,
+ const TSuspendOperationOptions& options);
+
+TNode SerializeParamsForResumeOperation(
+ const TOperationId& operationId,
+ const TResumeOperationOptions& options);
+
+TNode SerializeParamsForListOperations(
+ const TListOperationsOptions& options);
+
+TNode SerializeParamsForUpdateOperationParameters(
+ const TOperationId& operationId,
+ const TUpdateOperationParametersOptions& options);
+
+TNode SerializeParamsForGetJob(
+ const TOperationId& operationId,
+ const TJobId& jobId,
+ const TGetJobOptions& options);
+
+TNode SerializeParamsForListJobs(
+ const TOperationId& operationId,
+ const TListJobsOptions& options);
+
+TNode SerializeParametersForInsertRows(
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TInsertRowsOptions& options);
+
+TNode SerializeParametersForDeleteRows(
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TDeleteRowsOptions& options);
+
+TNode SerializeParametersForTrimRows(
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TTrimRowsOptions& options);
+
+TNode SerializeParamsForParseYPath(
+ const TRichYPath& path);
+
+TNode SerializeParamsForEnableTableReplica(
+ const TReplicaId& replicaId);
+
+TNode SerializeParamsForDisableTableReplica(
+ const TReplicaId& replicaId);
+
+TNode SerializeParamsForAlterTableReplica(
+ const TReplicaId& replicaId,
+ const TAlterTableReplicaOptions& options);
+
+TNode SerializeParamsForFreezeTable(
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TFreezeTableOptions& options);
+
+TNode SerializeParamsForUnfreezeTable(
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TUnfreezeTableOptions& options);
+
+TNode SerializeParamsForAlterTable(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TAlterTableOptions& options);
+
+TNode SerializeParamsForGetTableColumnarStatistics(
+ const TTransactionId& transactionId,
+ const TVector<TRichYPath>& paths,
+ const TGetTableColumnarStatisticsOptions& options);
+
+TNode SerializeParamsForGetTablePartitions(
+ const TTransactionId& transactionId,
+ const TVector<TRichYPath>& paths,
+ const TGetTablePartitionsOptions& options);
+
+TNode SerializeParamsForGetFileFromCache(
+ const TTransactionId& transactionId,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TGetFileFromCacheOptions&);
+
+TNode SerializeParamsForPutFileToCache(
+ const TTransactionId& transactionId,
+ const TString& pathPrefix,
+ const TYPath& filePath,
+ const TString& md5Signature,
+ const TYPath& cachePath,
+ const TPutFileToCacheOptions& options);
+
+TNode SerializeParamsForSkyShareTable(
+ const TString& serverName,
+ const TString& pathPrefix,
+ const std::vector<TYPath>& tablePaths,
+ const TSkyShareTableOptions& options);
+
+TNode SerializeParamsForCheckPermission(
+ const TString& user,
+ EPermission permission,
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TCheckPermissionOptions& options);
+
+TNode SerializeParamsForGetTabletInfos(
+ const TString& pathPrefix,
+ const TYPath& path,
+ const TVector<int>& tabletIndexes,
+ const TGetTabletInfosOptions& options);
+
+TNode SerializeParamsForAbortTransaction(
+ const TTransactionId& transactionId);
+
+TNode SerializeParamsForCommitTransaction(
+ const TTransactionId& transactionId);
+
+TNode SerializeParamsForStartTransaction(
+ const TTransactionId& parentTransactionId,
+ TDuration txTimeout,
+ const TStartTransactionOptions& options);
+
+////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NDetail::NRawClient
diff --git a/yt/cpp/mapreduce/raw_client/ya.make b/yt/cpp/mapreduce/raw_client/ya.make
new file mode 100644
index 0000000000..0d03aae80c
--- /dev/null
+++ b/yt/cpp/mapreduce/raw_client/ya.make
@@ -0,0 +1,19 @@
+LIBRARY()
+
+INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
+
+SRCS(
+ raw_batch_request.cpp
+ raw_requests.cpp
+ rpc_parameters_serialization.cpp
+)
+
+PEERDIR(
+ yt/cpp/mapreduce/common
+ yt/cpp/mapreduce/http
+ yt/cpp/mapreduce/interface
+ yt/cpp/mapreduce/interface/logging
+ library/cpp/yson/node
+)
+
+END()
diff --git a/yt/cpp/mapreduce/skiff/skiff_schema.h b/yt/cpp/mapreduce/skiff/skiff_schema.h
new file mode 100644
index 0000000000..e8c97de8e8
--- /dev/null
+++ b/yt/cpp/mapreduce/skiff/skiff_schema.h
@@ -0,0 +1,3 @@
+#pragma once
+
+#include <library/cpp/skiff/skiff_schema.h>
diff --git a/yt/cpp/mapreduce/skiff/unchecked_parser.h b/yt/cpp/mapreduce/skiff/unchecked_parser.h
new file mode 100644
index 0000000000..8fd9f90b0b
--- /dev/null
+++ b/yt/cpp/mapreduce/skiff/unchecked_parser.h
@@ -0,0 +1 @@
+#include <library/cpp/skiff/skiff.h>
diff --git a/yt/cpp/mapreduce/skiff/wire_type.h b/yt/cpp/mapreduce/skiff/wire_type.h
new file mode 100644
index 0000000000..96d19c06d3
--- /dev/null
+++ b/yt/cpp/mapreduce/skiff/wire_type.h
@@ -0,0 +1 @@
+#include <library/cpp/skiff/public.h>
diff --git a/yt/cpp/mapreduce/skiff/ya.make b/yt/cpp/mapreduce/skiff/ya.make
new file mode 100644
index 0000000000..95d91ecd47
--- /dev/null
+++ b/yt/cpp/mapreduce/skiff/ya.make
@@ -0,0 +1,9 @@
+LIBRARY()
+
+INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
+
+PEERDIR(
+ library/cpp/skiff
+)
+
+END()
diff --git a/yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h b/yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h
new file mode 100644
index 0000000000..37d9d501cd
--- /dev/null
+++ b/yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h
@@ -0,0 +1,194 @@
+#pragma once
+
+#include <yt/cpp/mapreduce/interface/logging/logger.h>
+#include <yt/cpp/mapreduce/interface/client.h>
+
+#include <yt/cpp/mapreduce/interface/config.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+#include <util/generic/bt_exception.h>
+
+#include <util/datetime/base.h>
+
+////////////////////////////////////////////////////////////////////////////////
+
+template<>
+void Out<NYT::TNode>(IOutputStream& s, const NYT::TNode& node);
+
+template<>
+void Out<TGUID>(IOutputStream& s, const TGUID& guid);
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NYT {
+namespace NTesting {
+
+////////////////////////////////////////////////////////////////////////////////
+
+IClientPtr CreateTestClient(TString proxy = "", const TCreateClientOptions& options = {});
+
+// Create map node by unique path in Cypress and return that path.
+TYPath CreateTestDirectory(const IClientBasePtr& client);
+
+TString GenerateRandomData(size_t size, ui64 seed = 42);
+
+TVector<TNode> ReadTable(const IClientBasePtr& client, const TString& tablePath);
+
+////////////////////////////////////////////////////////////////////////////////
+
+// TODO: should be removed, usages should be replaced with TConfigSaverGuard
+class TZeroWaitLockPollIntervalGuard
+{
+public:
+ TZeroWaitLockPollIntervalGuard();
+
+ ~TZeroWaitLockPollIntervalGuard();
+
+private:
+ TDuration OldWaitLockPollInterval_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TConfigSaverGuard
+{
+public:
+ TConfigSaverGuard();
+ ~TConfigSaverGuard();
+
+private:
+ TConfig Config_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TDebugMetricDiff
+{
+public:
+ TDebugMetricDiff(TString name);
+ ui64 GetTotal() const;
+
+private:
+ TString Name_;
+ ui64 InitialValue_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TOwningYaMRRow
+{
+ TString Key;
+ TString SubKey;
+ TString Value;
+
+ TOwningYaMRRow(const TYaMRRow& row = {});
+ TOwningYaMRRow(TString key, TString subKey, TString value);
+
+ operator TYaMRRow() const;
+};
+
+bool operator == (const TOwningYaMRRow& row1, const TOwningYaMRRow& row2);
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TTestFixture
+{
+public:
+ explicit TTestFixture(const TCreateClientOptions& options = {});
+ ~TTestFixture();
+
+ // Return precreated client.
+ IClientPtr GetClient() const;
+
+ // Return newly created client. Useful for cases:
+ // - when we want to have multiple clients objects;
+ // - when we want to control to control destruction of client object;
+ IClientPtr CreateClient(const TCreateClientOptions& options = {}) const;
+
+ IClientPtr CreateClientForUser(const TString& user, TCreateClientOptions options = {});
+
+ TYPath GetWorkingDir() const;
+
+private:
+ TConfigSaverGuard ConfigGuard_;
+ IClientPtr Client_;
+ TYPath WorkingDir_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TTabletFixture
+ : public TTestFixture
+{
+public:
+ TTabletFixture();
+
+private:
+ void WaitForTabletCell();
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Compares only columns and only "name" and "type" fields of columns.
+bool AreSchemasEqual(const TTableSchema& lhs, const TTableSchema& rhs);
+
+class TWaitFailedException
+ : public TWithBackTrace<yexception>
+{ };
+
+void WaitForPredicate(const std::function<bool()>& predicate, TDuration timeout = TDuration::Seconds(60));
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Redirects all the LOG_* calls with the corresponding level to `stream`.
+// Moreover, the LOG_* calls are delegated to `oldLogger`.
+class TStreamTeeLogger
+ : public ILogger
+{
+public:
+ TStreamTeeLogger(ELevel cutLevel, IOutputStream* stream, ILoggerPtr oldLogger);
+ void Log(ELevel level, const ::TSourceLocation& sourceLocation, const char* format, va_list args) override;
+
+private:
+ ILoggerPtr OldLogger_;
+ IOutputStream* Stream_;
+ ELevel Level_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+TString ToYson(const T& x)
+{
+ TNode result;
+ TNodeBuilder builder(&result);
+ Serialize(x, &builder);
+ return NodeToYsonString(result);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NTesting
+} // namespace NYT
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <>
+void Out<NYT::NTesting::TOwningYaMRRow>(IOutputStream& out, const NYT::NTesting::TOwningYaMRRow& row);
+
+////////////////////////////////////////////////////////////////////////////////
+
+// for UNITTEST()
+#define ASSERT_SERIALIZABLES_EQUAL(a, b) \
+ UNIT_ASSERT_EQUAL_C(a, b, NYT::NTesting::ToYson(a) << " != " << NYT::NTesting::ToYson(b))
+
+#define ASSERT_SERIALIZABLES_UNEQUAL(a, b) \
+ UNIT_ASSERT_UNEQUAL_C(a, b, NYT::NTesting::ToYson(a) << " == " << NYT::NTesting::ToYson(b))
+
+// for GTEST()
+#define ASSERT_SERIALIZABLES_EQ(a, b) \
+ ASSERT_EQ(a, b) << NYT::NTesting::ToYson(a) << " != " << NYT::NTesting::ToYson(b)
+
+#define ASSERT_SERIALIZABLES_NE(a, b) \
+ ASSERT_NE(a, b) << NYT::NTesting::ToYson(a) << " == " << NYT::NTesting::ToYson(b)