diff options
author | max42 <max42@yandex-team.com> | 2023-06-30 11:13:34 +0300 |
---|---|---|
committer | max42 <max42@yandex-team.com> | 2023-06-30 11:13:34 +0300 |
commit | 3e1899838408bbad47622007aa382bc8a2b01f87 (patch) | |
tree | 0f21c1e6add187ddb6c3ccc048a7d640ce03fb87 /yt/cpp/mapreduce | |
parent | 5463eb3f5e72a86f858a3d27c886470a724ede34 (diff) | |
download | ydb-3e1899838408bbad47622007aa382bc8a2b01f87.tar.gz |
Revert "YT-19324: move YT provider to ydb/library/yql"
This reverts commit ca272f12fdd0e8d5c3e957fc87939148f1caaf72, reversing
changes made to 49f8acfc8b0b5c0071b804423bcf53fda26c7c12.
Diffstat (limited to 'yt/cpp/mapreduce')
195 files changed, 0 insertions, 43102 deletions
diff --git a/yt/cpp/mapreduce/client/abortable_registry.cpp b/yt/cpp/mapreduce/client/abortable_registry.cpp deleted file mode 100644 index 283d39e049..0000000000 --- a/yt/cpp/mapreduce/client/abortable_registry.cpp +++ /dev/null @@ -1,125 +0,0 @@ -#include "abortable_registry.h" - -#include <yt/cpp/mapreduce/common/retry_lib.h> - -#include <yt/cpp/mapreduce/interface/common.h> -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <util/generic/singleton.h> - -namespace NYT { -namespace NDetail { - -using namespace NRawClient; - -//////////////////////////////////////////////////////////////////////////////// - -TTransactionAbortable::TTransactionAbortable(const TClientContext& context, const TTransactionId& transactionId) - : Context_(context) - , TransactionId_(transactionId) -{ } - -void TTransactionAbortable::Abort() -{ - AbortTransaction(nullptr, Context_, TransactionId_); -} - -TString TTransactionAbortable::GetType() const -{ - return "transaction"; -} - -//////////////////////////////////////////////////////////////////////////////// - -TOperationAbortable::TOperationAbortable(IClientRetryPolicyPtr clientRetryPolicy, TClientContext context, const TOperationId& operationId) - : ClientRetryPolicy_(std::move(clientRetryPolicy)) - , Context_(std::move(context)) - , OperationId_(operationId) -{ } - - -void TOperationAbortable::Abort() -{ - AbortOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, OperationId_); -} - -TString TOperationAbortable::GetType() const -{ - return "operation"; -} - -//////////////////////////////////////////////////////////////////////////////// - -void TAbortableRegistry::AbortAllAndBlockForever() -{ - auto guard = Guard(Lock_); - - for (const auto& entry : ActiveAbortables_) { - const auto& id = entry.first; - const auto& abortable = entry.second; - try { - abortable->Abort(); - } catch (std::exception& ex) { - YT_LOG_ERROR("Exception while aborting %v %v: %v", - abortable->GetType(), - id, - ex.what()); - } - } - - Running_ = false; -} - -void TAbortableRegistry::Add(const TGUID& id, IAbortablePtr abortable) -{ - auto guard = Guard(Lock_); - - if (!Running_) { - Sleep(TDuration::Max()); - } - - ActiveAbortables_[id] = abortable; -} - -void TAbortableRegistry::Remove(const TGUID& id) -{ - auto guard = Guard(Lock_); - - if (!Running_) { - Sleep(TDuration::Max()); - } - - ActiveAbortables_.erase(id); -} - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -class TRegistryHolder -{ -public: - TRegistryHolder() - : Registry_(::MakeIntrusive<TAbortableRegistry>()) - { } - - ::TIntrusivePtr<TAbortableRegistry> Get() - { - return Registry_; - } - -private: - ::TIntrusivePtr<TAbortableRegistry> Registry_; -}; - -} // namespace - -::TIntrusivePtr<TAbortableRegistry> TAbortableRegistry::Get() -{ - return Singleton<TRegistryHolder>()->Get(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/abortable_registry.h b/yt/cpp/mapreduce/client/abortable_registry.h deleted file mode 100644 index 119d685cad..0000000000 --- a/yt/cpp/mapreduce/client/abortable_registry.h +++ /dev/null @@ -1,81 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/common.h> - -#include <yt/cpp/mapreduce/http/context.h> - -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> - -#include <util/str_stl.h> -#include <util/system/mutex.h> -#include <util/generic/hash.h> - -namespace NYT { -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -class IAbortable - : public TThrRefBase -{ -public: - virtual void Abort() = 0; - virtual TString GetType() const = 0; -}; - -using IAbortablePtr = ::TIntrusivePtr<IAbortable>; - -//////////////////////////////////////////////////////////////////////////////// - -class TTransactionAbortable - : public IAbortable -{ -public: - TTransactionAbortable(const TClientContext& context, const TTransactionId& transactionId); - void Abort() override; - TString GetType() const override; - -private: - TClientContext Context_; - TTransactionId TransactionId_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TOperationAbortable - : public IAbortable -{ -public: - TOperationAbortable(IClientRetryPolicyPtr clientRetryPolicy, TClientContext context, const TOperationId& operationId); - void Abort() override; - TString GetType() const override; - -private: - const IClientRetryPolicyPtr ClientRetryPolicy_; - const TClientContext Context_; - const TOperationId OperationId_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TAbortableRegistry - : public TThrRefBase -{ -public: - TAbortableRegistry() = default; - static ::TIntrusivePtr<TAbortableRegistry> Get(); - - void AbortAllAndBlockForever(); - void Add(const TGUID& id, IAbortablePtr abortable); - void Remove(const TGUID& id); - -private: - THashMap<TGUID, IAbortablePtr> ActiveAbortables_; - TMutex Lock_; - bool Running_ = true; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/batch_request_impl.cpp b/yt/cpp/mapreduce/client/batch_request_impl.cpp deleted file mode 100644 index 6afa5665f1..0000000000 --- a/yt/cpp/mapreduce/client/batch_request_impl.cpp +++ /dev/null @@ -1,198 +0,0 @@ -#include "batch_request_impl.h" - -#include "lock.h" - -#include <yt/cpp/mapreduce/common/helpers.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> - -#include <yt/cpp/mapreduce/http/retry_request.h> - -#include <yt/cpp/mapreduce/interface/config.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <library/cpp/yson/node/node.h> -#include <library/cpp/yson/node/serialize.h> - -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> -#include <yt/cpp/mapreduce/raw_client/raw_batch_request.h> -#include <yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h> - -#include <util/generic/guid.h> -#include <util/string/builder.h> - -#include <exception> - -namespace NYT { -namespace NDetail { - -using namespace NRawClient; - -using ::NThreading::TFuture; -using ::NThreading::TPromise; -using ::NThreading::NewPromise; - -//////////////////////////////////////////////////////////////////// - -TBatchRequest::TBatchRequest(const TTransactionId& defaultTransaction, ::TIntrusivePtr<TClient> client) - : DefaultTransaction_(defaultTransaction) - , Impl_(MakeIntrusive<TRawBatchRequest>(client->GetContext().Config)) - , Client_(client) -{ } - -TBatchRequest::TBatchRequest(TRawBatchRequest* impl, ::TIntrusivePtr<TClient> client) - : Impl_(impl) - , Client_(std::move(client)) -{ } - -TBatchRequest::~TBatchRequest() = default; - -IBatchRequestBase& TBatchRequest::WithTransaction(const TTransactionId& transactionId) -{ - if (!TmpWithTransaction_) { - TmpWithTransaction_.Reset(new TBatchRequest(Impl_.Get(), Client_)); - } - TmpWithTransaction_->DefaultTransaction_ = transactionId; - return *TmpWithTransaction_; -} - -TFuture<TNode> TBatchRequest::Get( - const TYPath& path, - const TGetOptions& options) -{ - return Impl_->Get(DefaultTransaction_, path, options); -} - -TFuture<void> TBatchRequest::Set(const TYPath& path, const TNode& node, const TSetOptions& options) -{ - return Impl_->Set(DefaultTransaction_, path, node, options); -} - -TFuture<TNode::TListType> TBatchRequest::List(const TYPath& path, const TListOptions& options) -{ - return Impl_->List(DefaultTransaction_, path, options); -} - -TFuture<bool> TBatchRequest::Exists(const TYPath& path, const TExistsOptions& options) -{ - return Impl_->Exists(DefaultTransaction_, path, options); -} - -TFuture<ILockPtr> TBatchRequest::Lock( - const TYPath& path, - ELockMode mode, - const TLockOptions& options) -{ - auto convert = [waitable=options.Waitable_, client=Client_] (TFuture<TNodeId> nodeIdFuture) -> ILockPtr { - return ::MakeIntrusive<TLock>(nodeIdFuture.GetValue(), client, waitable); - }; - return Impl_->Lock(DefaultTransaction_, path, mode, options).Apply(convert); -} - -::NThreading::TFuture<void> TBatchRequest::Unlock( - const TYPath& path, - const TUnlockOptions& options = TUnlockOptions()) -{ - return Impl_->Unlock(DefaultTransaction_, path, options); -} - -TFuture<TLockId> TBatchRequest::Create( - const TYPath& path, - ENodeType type, - const TCreateOptions& options) -{ - return Impl_->Create(DefaultTransaction_, path, type, options); -} - -TFuture<void> TBatchRequest::Remove( - const TYPath& path, - const TRemoveOptions& options) -{ - return Impl_->Remove(DefaultTransaction_, path, options); -} - -TFuture<TNodeId> TBatchRequest::Move( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TMoveOptions& options) -{ - return Impl_->Move(DefaultTransaction_, sourcePath, destinationPath, options); -} - -TFuture<TNodeId> TBatchRequest::Copy( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TCopyOptions& options) -{ - return Impl_->Copy(DefaultTransaction_, sourcePath, destinationPath, options); -} - -TFuture<TNodeId> TBatchRequest::Link( - const TYPath& targetPath, - const TYPath& linkPath, - const TLinkOptions& options) -{ - return Impl_->Link(DefaultTransaction_, targetPath, linkPath, options); -} - -TFuture<void> TBatchRequest::AbortOperation(const NYT::TOperationId& operationId) -{ - return Impl_->AbortOperation(operationId); -} - -TFuture<void> TBatchRequest::CompleteOperation(const NYT::TOperationId& operationId) -{ - return Impl_->CompleteOperation(operationId); -} - -TFuture<void> TBatchRequest::SuspendOperation( - const TOperationId& operationId, - const TSuspendOperationOptions& options) -{ - return Impl_->SuspendOperation(operationId, options); -} - -TFuture<void> TBatchRequest::ResumeOperation( - const TOperationId& operationId, - const TResumeOperationOptions& options) -{ - return Impl_->ResumeOperation(operationId, options); -} - -TFuture<void> TBatchRequest::UpdateOperationParameters( - const NYT::TOperationId& operationId, - const NYT::TUpdateOperationParametersOptions& options) -{ - return Impl_->UpdateOperationParameters(operationId, options); -} - -TFuture<TRichYPath> TBatchRequest::CanonizeYPath(const TRichYPath& path) -{ - return Impl_->CanonizeYPath(path); -} - -TFuture<TVector<TTableColumnarStatistics>> TBatchRequest::GetTableColumnarStatistics( - const TVector<TRichYPath>& paths, - const NYT::TGetTableColumnarStatisticsOptions& options) -{ - return Impl_->GetTableColumnarStatistics(DefaultTransaction_, paths, options); -} - -TFuture<TCheckPermissionResponse> TBatchRequest::CheckPermission( - const TString& user, - EPermission permission, - const TYPath& path, - const TCheckPermissionOptions& options) -{ - return Impl_->CheckPermission(user, permission, path, options); -} - -void TBatchRequest::ExecuteBatch(const TExecuteBatchOptions& options) -{ - NYT::NDetail::ExecuteBatch(Client_->GetRetryPolicy()->CreatePolicyForGenericRequest(), Client_->GetContext(), *Impl_, options); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/batch_request_impl.h b/yt/cpp/mapreduce/client/batch_request_impl.h deleted file mode 100644 index 0a176417b3..0000000000 --- a/yt/cpp/mapreduce/client/batch_request_impl.h +++ /dev/null @@ -1,137 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/batch_request.h> -#include <yt/cpp/mapreduce/interface/fwd.h> -#include <yt/cpp/mapreduce/interface/node.h> - -#include <yt/cpp/mapreduce/http/requests.h> - -#include <library/cpp/threading/future/future.h> - -#include <util/generic/ptr.h> -#include <util/generic/deque.h> - -#include <exception> - -namespace NYT { -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -struct TResponseInfo; -class TClient; -using TClientPtr = ::TIntrusivePtr<TClient>; - -namespace NRawClient { - class TRawBatchRequest; -} - -//////////////////////////////////////////////////////////////////////////////// - -class TBatchRequest - : public IBatchRequest -{ -public: - TBatchRequest(const TTransactionId& defaultTransaction, ::TIntrusivePtr<TClient> client); - - ~TBatchRequest(); - - virtual IBatchRequestBase& WithTransaction(const TTransactionId& transactionId) override; - - virtual ::NThreading::TFuture<TLockId> Create( - const TYPath& path, - ENodeType type, - const TCreateOptions& options = TCreateOptions()) override; - - virtual ::NThreading::TFuture<void> Remove( - const TYPath& path, - const TRemoveOptions& options = TRemoveOptions()) override; - - virtual ::NThreading::TFuture<bool> Exists( - const TYPath& path, - const TExistsOptions& options = TExistsOptions()) override; - - virtual ::NThreading::TFuture<TNode> Get( - const TYPath& path, - const TGetOptions& options = TGetOptions()) override; - - virtual ::NThreading::TFuture<void> Set( - const TYPath& path, - const TNode& node, - const TSetOptions& options = TSetOptions()) override; - - virtual ::NThreading::TFuture<TNode::TListType> List( - const TYPath& path, - const TListOptions& options = TListOptions()) override; - - virtual ::NThreading::TFuture<TNodeId> Copy( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TCopyOptions& options = TCopyOptions()) override; - - virtual ::NThreading::TFuture<TNodeId> Move( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TMoveOptions& options = TMoveOptions()) override; - - virtual ::NThreading::TFuture<TNodeId> Link( - const TYPath& targetPath, - const TYPath& linkPath, - const TLinkOptions& options = TLinkOptions()) override; - - virtual ::NThreading::TFuture<ILockPtr> Lock( - const TYPath& path, - ELockMode mode, - const TLockOptions& options) override; - - virtual ::NThreading::TFuture<void> Unlock( - const TYPath& path, - const TUnlockOptions& options) override; - - virtual ::NThreading::TFuture<void> AbortOperation(const TOperationId& operationId) override; - - virtual ::NThreading::TFuture<void> CompleteOperation(const TOperationId& operationId) override; - - ::NThreading::TFuture<void> SuspendOperation( - const TOperationId& operationId, - const TSuspendOperationOptions& options) override; - - ::NThreading::TFuture<void> ResumeOperation( - const TOperationId& operationId, - const TResumeOperationOptions& options) override; - - virtual ::NThreading::TFuture<void> UpdateOperationParameters( - const TOperationId& operationId, - const TUpdateOperationParametersOptions& options) override; - - virtual ::NThreading::TFuture<TRichYPath> CanonizeYPath(const TRichYPath& path) override; - - virtual ::NThreading::TFuture<TVector<TTableColumnarStatistics>> GetTableColumnarStatistics( - const TVector<TRichYPath>& paths, - const TGetTableColumnarStatisticsOptions& options) override; - - ::NThreading::TFuture<TCheckPermissionResponse> CheckPermission( - const TString& user, - EPermission permission, - const TYPath& path, - const TCheckPermissionOptions& options) override; - - virtual void ExecuteBatch(const TExecuteBatchOptions& executeBatch) override; - -private: - TBatchRequest(NDetail::NRawClient::TRawBatchRequest* impl, ::TIntrusivePtr<TClient> client); - -private: - TTransactionId DefaultTransaction_; - ::TIntrusivePtr<NDetail::NRawClient::TRawBatchRequest> Impl_; - THolder<TBatchRequest> TmpWithTransaction_; - ::TIntrusivePtr<TClient> Client_; - -private: - friend class NYT::NDetail::TClient; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/client.cpp b/yt/cpp/mapreduce/client/client.cpp deleted file mode 100644 index ca979c5588..0000000000 --- a/yt/cpp/mapreduce/client/client.cpp +++ /dev/null @@ -1,1361 +0,0 @@ -#include "client.h" - -#include "batch_request_impl.h" -#include "client_reader.h" -#include "client_writer.h" -#include "file_reader.h" -#include "file_writer.h" -#include "format_hints.h" -#include "lock.h" -#include "operation.h" -#include "retry_transaction.h" -#include "retryful_writer.h" -#include "transaction.h" -#include "transaction_pinger.h" -#include "yt_poller.h" - -#include <yt/cpp/mapreduce/common/helpers.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> - -#include <yt/cpp/mapreduce/http/helpers.h> -#include <yt/cpp/mapreduce/http/http.h> -#include <yt/cpp/mapreduce/http/http_client.h> -#include <yt/cpp/mapreduce/http/requests.h> -#include <yt/cpp/mapreduce/http/retry_request.h> - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/client.h> -#include <yt/cpp/mapreduce/interface/fluent.h> -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> -#include <yt/cpp/mapreduce/interface/skiff_row.h> - -#include <yt/cpp/mapreduce/io/yamr_table_reader.h> -#include <yt/cpp/mapreduce/io/yamr_table_writer.h> -#include <yt/cpp/mapreduce/io/node_table_reader.h> -#include <yt/cpp/mapreduce/io/node_table_writer.h> -#include <yt/cpp/mapreduce/io/proto_table_reader.h> -#include <yt/cpp/mapreduce/io/proto_table_writer.h> -#include <yt/cpp/mapreduce/io/skiff_row_table_reader.h> -#include <yt/cpp/mapreduce/io/proto_helpers.h> - -#include <yt/cpp/mapreduce/library/table_schema/protobuf.h> - -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> -#include <yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h> - -#include <library/cpp/json/json_reader.h> - -#include <util/generic/algorithm.h> -#include <util/string/type.h> -#include <util/system/env.h> - -#include <exception> - -using namespace NYT::NDetail::NRawClient; - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -TClientBase::TClientBase( - const TClientContext& context, - const TTransactionId& transactionId, - IClientRetryPolicyPtr retryPolicy) - : Context_(context) - , TransactionId_(transactionId) - , ClientRetryPolicy_(std::move(retryPolicy)) -{ } - -ITransactionPtr TClientBase::StartTransaction( - const TStartTransactionOptions& options) -{ - return MakeIntrusive<TTransaction>(GetParentClientImpl(), Context_, TransactionId_, options); -} - -TNodeId TClientBase::Create( - const TYPath& path, - ENodeType type, - const TCreateOptions& options) -{ - return NRawClient::Create(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, type, options); -} - -void TClientBase::Remove( - const TYPath& path, - const TRemoveOptions& options) -{ - return NRawClient::Remove(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, options); -} - -bool TClientBase::Exists( - const TYPath& path, - const TExistsOptions& options) -{ - return NRawClient::Exists(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, options); -} - -TNode TClientBase::Get( - const TYPath& path, - const TGetOptions& options) -{ - return NRawClient::Get(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, options); -} - -void TClientBase::Set( - const TYPath& path, - const TNode& value, - const TSetOptions& options) -{ - NRawClient::Set(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, value, options); -} - -void TClientBase::MultisetAttributes( - const TYPath& path, const TNode::TMapType& value, const TMultisetAttributesOptions& options) -{ - NRawClient::MultisetAttributes(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, value, options); -} - - -TNode::TListType TClientBase::List( - const TYPath& path, - const TListOptions& options) -{ - return NRawClient::List(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, options); -} - -TNodeId TClientBase::Copy( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TCopyOptions& options) -{ - return NRawClient::Copy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, sourcePath, destinationPath, options); -} - -TNodeId TClientBase::Move( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TMoveOptions& options) -{ - return NRawClient::Move(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, sourcePath, destinationPath, options); -} - -TNodeId TClientBase::Link( - const TYPath& targetPath, - const TYPath& linkPath, - const TLinkOptions& options) -{ - return NRawClient::Link(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, targetPath, linkPath, options); -} - -void TClientBase::Concatenate( - const TVector<TRichYPath>& sourcePaths, - const TRichYPath& destinationPath, - const TConcatenateOptions& options) -{ - std::function<void(ITransactionPtr)> lambda = [&sourcePaths, &destinationPath, &options, this](ITransactionPtr transaction) { - if (!options.Append_ && !sourcePaths.empty() && !transaction->Exists(destinationPath.Path_)) { - auto typeNode = transaction->Get(CanonizeYPath(sourcePaths.front()).Path_ + "/@type"); - auto type = FromString<ENodeType>(typeNode.AsString()); - transaction->Create(destinationPath.Path_, type, TCreateOptions().IgnoreExisting(true)); - } - NRawClient::Concatenate(this->Context_, transaction->GetId(), sourcePaths, destinationPath, options); - }; - RetryTransactionWithPolicy(this, lambda, ClientRetryPolicy_->CreatePolicyForGenericRequest()); -} - -TRichYPath TClientBase::CanonizeYPath(const TRichYPath& path) -{ - return NRawClient::CanonizeYPath(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, path); -} - -TVector<TTableColumnarStatistics> TClientBase::GetTableColumnarStatistics( - const TVector<TRichYPath>& paths, - const TGetTableColumnarStatisticsOptions& options) -{ - return NRawClient::GetTableColumnarStatistics( - ClientRetryPolicy_->CreatePolicyForGenericRequest(), - Context_, - TransactionId_, - paths, - options); -} - -TMultiTablePartitions TClientBase::GetTablePartitions( - const TVector<TRichYPath>& paths, - const TGetTablePartitionsOptions& options) -{ - return NRawClient::GetTablePartitions( - ClientRetryPolicy_->CreatePolicyForGenericRequest(), - Context_, - TransactionId_, - paths, - options); -} - -TMaybe<TYPath> TClientBase::GetFileFromCache( - const TString& md5Signature, - const TYPath& cachePath, - const TGetFileFromCacheOptions& options) -{ - return NRawClient::GetFileFromCache(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, md5Signature, cachePath, options); -} - -TYPath TClientBase::PutFileToCache( - const TYPath& filePath, - const TString& md5Signature, - const TYPath& cachePath, - const TPutFileToCacheOptions& options) -{ - return NRawClient::PutFileToCache(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, filePath, md5Signature, cachePath, options); -} - -IFileReaderPtr TClientBase::CreateBlobTableReader( - const TYPath& path, - const TKey& key, - const TBlobTableReaderOptions& options) -{ - return new TBlobTableReader( - path, - key, - ClientRetryPolicy_, - GetTransactionPinger(), - Context_, - TransactionId_, - options); -} - -IFileReaderPtr TClientBase::CreateFileReader( - const TRichYPath& path, - const TFileReaderOptions& options) -{ - return new TFileReader( - CanonizeYPath(path), - ClientRetryPolicy_, - GetTransactionPinger(), - Context_, - TransactionId_, - options); -} - -IFileWriterPtr TClientBase::CreateFileWriter( - const TRichYPath& path, - const TFileWriterOptions& options) -{ - auto realPath = CanonizeYPath(path); - if (!NRawClient::Exists(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, realPath.Path_)) { - NRawClient::Create(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, realPath.Path_, NT_FILE, - TCreateOptions().IgnoreExisting(true)); - } - return new TFileWriter(realPath, ClientRetryPolicy_, GetTransactionPinger(), Context_, TransactionId_, options); -} - -TTableWriterPtr<::google::protobuf::Message> TClientBase::CreateTableWriter( - const TRichYPath& path, const ::google::protobuf::Descriptor& descriptor, const TTableWriterOptions& options) -{ - const Message* prototype = google::protobuf::MessageFactory::generated_factory()->GetPrototype(&descriptor); - return new TTableWriter<::google::protobuf::Message>(CreateProtoWriter(path, options, prototype)); -} - -TRawTableReaderPtr TClientBase::CreateRawReader( - const TRichYPath& path, - const TFormat& format, - const TTableReaderOptions& options) -{ - return CreateClientReader(path, format, options).Get(); -} - -TRawTableWriterPtr TClientBase::CreateRawWriter( - const TRichYPath& path, - const TFormat& format, - const TTableWriterOptions& options) -{ - return ::MakeIntrusive<TRetryfulWriter>( - ClientRetryPolicy_, - GetTransactionPinger(), - Context_, - TransactionId_, - GetWriteTableCommand(Context_.Config->ApiVersion), - format, - CanonizeYPath(path), - options).Get(); -} - -IOperationPtr TClientBase::DoMap( - const TMapOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> mapper, - const TOperationOptions& options) -{ - auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl()); - auto prepareOperation = [ - this_ = ::TIntrusivePtr(this), - operation, - spec, - mapper, - options - ] () { - ExecuteMap( - operation, - ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_), - spec, - mapper, - options); - }; - return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options); -} - -IOperationPtr TClientBase::RawMap( - const TRawMapOperationSpec& spec, - ::TIntrusivePtr<IRawJob> mapper, - const TOperationOptions& options) -{ - auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl()); - auto prepareOperation = [ - this_=::TIntrusivePtr(this), - operation, - spec, - mapper, - options - ] () { - ExecuteRawMap( - operation, - ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_), - spec, - mapper, - options); - }; - return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options); -} - -IOperationPtr TClientBase::DoReduce( - const TReduceOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> reducer, - const TOperationOptions& options) -{ - auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl()); - auto prepareOperation = [ - this_=::TIntrusivePtr(this), - operation, - spec, - reducer, - options - ] () { - ExecuteReduce( - operation, - ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_), - spec, - reducer, - options); - }; - return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options); -} - -IOperationPtr TClientBase::RawReduce( - const TRawReduceOperationSpec& spec, - ::TIntrusivePtr<IRawJob> reducer, - const TOperationOptions& options) -{ - auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl()); - auto prepareOperation = [ - this_=::TIntrusivePtr(this), - operation, - spec, - reducer, - options - ] () { - ExecuteRawReduce( - operation, - ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_), - spec, - reducer, - options); - }; - return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options); -} - -IOperationPtr TClientBase::DoJoinReduce( - const TJoinReduceOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> reducer, - const TOperationOptions& options) -{ - auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl()); - auto prepareOperation = [ - this_=::TIntrusivePtr(this), - operation, - spec, - reducer, - options - ] () { - ExecuteJoinReduce( - operation, - ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_), - spec, - reducer, - options); - }; - return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options); -} - -IOperationPtr TClientBase::RawJoinReduce( - const TRawJoinReduceOperationSpec& spec, - ::TIntrusivePtr<IRawJob> reducer, - const TOperationOptions& options) -{ - auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl()); - auto prepareOperation = [ - this_=::TIntrusivePtr(this), - operation, - spec, - reducer, - options - ] () { - ExecuteRawJoinReduce( - operation, - ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_), - spec, - reducer, - options); - }; - return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options); -} - -IOperationPtr TClientBase::DoMapReduce( - const TMapReduceOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> mapper, - ::TIntrusivePtr<IStructuredJob> reduceCombiner, - ::TIntrusivePtr<IStructuredJob> reducer, - const TOperationOptions& options) -{ - auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl()); - auto prepareOperation = [ - this_=::TIntrusivePtr(this), - operation, - spec, - mapper, - reduceCombiner, - reducer, - options - ] () { - ExecuteMapReduce( - operation, - ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_), - spec, - mapper, - reduceCombiner, - reducer, - options); - }; - return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options); -} - -IOperationPtr TClientBase::RawMapReduce( - const TRawMapReduceOperationSpec& spec, - ::TIntrusivePtr<IRawJob> mapper, - ::TIntrusivePtr<IRawJob> reduceCombiner, - ::TIntrusivePtr<IRawJob> reducer, - const TOperationOptions& options) -{ - auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl()); - auto prepareOperation = [ - this_=::TIntrusivePtr(this), - operation, - spec, - mapper, - reduceCombiner, - reducer, - options - ] () { - ExecuteRawMapReduce( - operation, - ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_), - spec, - mapper, - reduceCombiner, - reducer, - options); - }; - return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options); -} - -IOperationPtr TClientBase::Sort( - const TSortOperationSpec& spec, - const TOperationOptions& options) -{ - auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl()); - auto prepareOperation = [ - this_ = ::TIntrusivePtr(this), - operation, - spec, - options - ] () { - ExecuteSort( - operation, - ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_), - spec, - options); - }; - return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options); -} - -IOperationPtr TClientBase::Merge( - const TMergeOperationSpec& spec, - const TOperationOptions& options) -{ - auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl()); - auto prepareOperation = [ - this_ = ::TIntrusivePtr(this), - operation, - spec, - options - ] () { - ExecuteMerge( - operation, - ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_), - spec, - options); - }; - return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options); -} - -IOperationPtr TClientBase::Erase( - const TEraseOperationSpec& spec, - const TOperationOptions& options) -{ - auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl()); - auto prepareOperation = [ - this_ = ::TIntrusivePtr(this), - operation, - spec, - options - ] () { - ExecuteErase( - operation, - ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_), - spec, - options); - }; - return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options); -} - -IOperationPtr TClientBase::RemoteCopy( - const TRemoteCopyOperationSpec& spec, - const TOperationOptions& options) -{ - auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl()); - auto prepareOperation = [ - this_ = ::TIntrusivePtr(this), - operation, - spec, - options - ] () { - ExecuteRemoteCopy( - operation, - ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_), - spec, - options); - }; - return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options); -} - -IOperationPtr TClientBase::RunVanilla( - const TVanillaOperationSpec& spec, - const TOperationOptions& options) -{ - auto operation = ::MakeIntrusive<TOperation>(GetParentClientImpl()); - auto prepareOperation = [ - this_ = ::TIntrusivePtr(this), - operation, - spec, - options - ] () { - ExecuteVanilla( - operation, - ::MakeIntrusive<TOperationPreparer>(this_->GetParentClientImpl(), this_->TransactionId_), - spec, - options); - }; - return ProcessOperation(GetParentClientImpl(), std::move(prepareOperation), std::move(operation), options); -} - -IOperationPtr TClientBase::AttachOperation(const TOperationId& operationId) -{ - auto operation = ::MakeIntrusive<TOperation>(operationId, GetParentClientImpl()); - operation->GetBriefState(); // check that operation exists - return operation; -} - -EOperationBriefState TClientBase::CheckOperation(const TOperationId& operationId) -{ - return NYT::NDetail::CheckOperation(ClientRetryPolicy_, Context_, operationId); -} - -void TClientBase::AbortOperation(const TOperationId& operationId) -{ - NRawClient::AbortOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId); -} - -void TClientBase::CompleteOperation(const TOperationId& operationId) -{ - NRawClient::CompleteOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId); -} - -void TClientBase::WaitForOperation(const TOperationId& operationId) -{ - NYT::NDetail::WaitForOperation(ClientRetryPolicy_, Context_, operationId); -} - -void TClientBase::AlterTable( - const TYPath& path, - const TAlterTableOptions& options) -{ - NRawClient::AlterTable(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, options); -} - -::TIntrusivePtr<TClientReader> TClientBase::CreateClientReader( - const TRichYPath& path, - const TFormat& format, - const TTableReaderOptions& options, - bool useFormatFromTableAttributes) -{ - return ::MakeIntrusive<TClientReader>( - CanonizeYPath(path), - ClientRetryPolicy_, - GetTransactionPinger(), - Context_, - TransactionId_, - format, - options, - useFormatFromTableAttributes); -} - -THolder<TClientWriter> TClientBase::CreateClientWriter( - const TRichYPath& path, - const TFormat& format, - const TTableWriterOptions& options) -{ - auto realPath = CanonizeYPath(path); - if (!NRawClient::Exists(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, realPath.Path_)) { - NRawClient::Create(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, realPath.Path_, NT_TABLE, - TCreateOptions().IgnoreExisting(true)); - } - return MakeHolder<TClientWriter>( - realPath, - ClientRetryPolicy_, - GetTransactionPinger(), - Context_, - TransactionId_, - format, - options - ); -} - -::TIntrusivePtr<INodeReaderImpl> TClientBase::CreateNodeReader( - const TRichYPath& path, const TTableReaderOptions& options) -{ - auto format = TFormat::YsonBinary(); - ApplyFormatHints<TNode>(&format, options.FormatHints_); - - // Skiff is disabled here because of large header problem (see https://st.yandex-team.ru/YT-6926). - // Revert this code to r3614168 when it is fixed. - return new TNodeTableReader( - CreateClientReader(path, format, options)); -} - -::TIntrusivePtr<IYaMRReaderImpl> TClientBase::CreateYaMRReader( - const TRichYPath& path, const TTableReaderOptions& options) -{ - return new TYaMRTableReader( - CreateClientReader(path, TFormat::YaMRLenval(), options, /* useFormatFromTableAttributes = */ true)); -} - -::TIntrusivePtr<IProtoReaderImpl> TClientBase::CreateProtoReader( - const TRichYPath& path, - const TTableReaderOptions& options, - const Message* prototype) -{ - TVector<const ::google::protobuf::Descriptor*> descriptors; - descriptors.push_back(prototype->GetDescriptor()); - - if (Context_.Config->UseClientProtobuf) { - return new TProtoTableReader( - CreateClientReader(path, TFormat::YsonBinary(), options), - std::move(descriptors)); - } else { - auto format = TFormat::Protobuf({prototype->GetDescriptor()}, Context_.Config->ProtobufFormatWithDescriptors); - return new TLenvalProtoTableReader( - CreateClientReader(path, format, options), - std::move(descriptors)); - } -} - -::TIntrusivePtr<ISkiffRowReaderImpl> TClientBase::CreateSkiffRowReader( - const TRichYPath& path, - const TTableReaderOptions& options, - const ISkiffRowSkipperPtr& skipper, - const NSkiff::TSkiffSchemaPtr& schema) -{ - auto skiffOptions = TCreateSkiffSchemaOptions().HasRangeIndex(true); - auto resultSchema = NYT::NDetail::CreateSkiffSchema(TVector{schema}, skiffOptions); - return new TSkiffRowTableReader( - CreateClientReader(path, NYT::NDetail::CreateSkiffFormat(resultSchema), options), - resultSchema, - {skipper}, - std::move(skiffOptions)); -} - -::TIntrusivePtr<INodeWriterImpl> TClientBase::CreateNodeWriter( - const TRichYPath& path, const TTableWriterOptions& options) -{ - auto format = TFormat::YsonBinary(); - ApplyFormatHints<TNode>(&format, options.FormatHints_); - - return new TNodeTableWriter( - CreateClientWriter(path, format, options)); -} - -::TIntrusivePtr<IYaMRWriterImpl> TClientBase::CreateYaMRWriter( - const TRichYPath& path, const TTableWriterOptions& options) -{ - auto format = TFormat::YaMRLenval(); - ApplyFormatHints<TYaMRRow>(&format, options.FormatHints_); - - return new TYaMRTableWriter( - CreateClientWriter(path, format, options)); -} - -::TIntrusivePtr<IProtoWriterImpl> TClientBase::CreateProtoWriter( - const TRichYPath& path, - const TTableWriterOptions& options, - const Message* prototype) -{ - TVector<const ::google::protobuf::Descriptor*> descriptors; - descriptors.push_back(prototype->GetDescriptor()); - - auto pathWithSchema = path; - if (options.InferSchema_.GetOrElse(Context_.Config->InferTableSchema) && !path.Schema_) { - pathWithSchema.Schema(CreateTableSchema(*prototype->GetDescriptor())); - } - - if (Context_.Config->UseClientProtobuf) { - auto format = TFormat::YsonBinary(); - ApplyFormatHints<TNode>(&format, options.FormatHints_); - return new TProtoTableWriter( - CreateClientWriter(pathWithSchema, format, options), - std::move(descriptors)); - } else { - auto format = TFormat::Protobuf({prototype->GetDescriptor()}, Context_.Config->ProtobufFormatWithDescriptors); - ApplyFormatHints<::google::protobuf::Message>(&format, options.FormatHints_); - return new TLenvalProtoTableWriter( - CreateClientWriter(pathWithSchema, format, options), - std::move(descriptors)); - } -} - -TBatchRequestPtr TClientBase::CreateBatchRequest() -{ - return MakeIntrusive<TBatchRequest>(TransactionId_, GetParentClientImpl()); -} - -IClientPtr TClientBase::GetParentClient() -{ - return GetParentClientImpl(); -} - -const TClientContext& TClientBase::GetContext() const -{ - return Context_; -} - -const IClientRetryPolicyPtr& TClientBase::GetRetryPolicy() const -{ - return ClientRetryPolicy_; -} - -//////////////////////////////////////////////////////////////////////////////// - -TTransaction::TTransaction( - TClientPtr parentClient, - const TClientContext& context, - const TTransactionId& parentTransactionId, - const TStartTransactionOptions& options) - : TClientBase(context, parentTransactionId, parentClient->GetRetryPolicy()) - , TransactionPinger_(parentClient->GetTransactionPinger()) - , PingableTx_( - MakeHolder<TPingableTransaction>( - parentClient->GetRetryPolicy(), - context, - parentTransactionId, - TransactionPinger_->GetChildTxPinger(), - options)) - , ParentClient_(parentClient) -{ - TransactionId_ = PingableTx_->GetId(); -} - -TTransaction::TTransaction( - TClientPtr parentClient, - const TClientContext& context, - const TTransactionId& transactionId, - const TAttachTransactionOptions& options) - : TClientBase(context, transactionId, parentClient->GetRetryPolicy()) - , TransactionPinger_(parentClient->GetTransactionPinger()) - , PingableTx_( - new TPingableTransaction( - parentClient->GetRetryPolicy(), - context, - transactionId, - parentClient->GetTransactionPinger()->GetChildTxPinger(), - options)) - , ParentClient_(parentClient) -{ } - -const TTransactionId& TTransaction::GetId() const -{ - return TransactionId_; -} - -ILockPtr TTransaction::Lock( - const TYPath& path, - ELockMode mode, - const TLockOptions& options) -{ - auto lockId = NRawClient::Lock(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, mode, options); - return ::MakeIntrusive<TLock>(lockId, GetParentClientImpl(), options.Waitable_); -} - -void TTransaction::Unlock( - const TYPath& path, - const TUnlockOptions& options) -{ - NRawClient::Unlock(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_, path, options); -} - -void TTransaction::Commit() -{ - PingableTx_->Commit(); -} - -void TTransaction::Abort() -{ - PingableTx_->Abort(); -} - -void TTransaction::Ping() -{ - PingTx(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, TransactionId_); -} - -void TTransaction::Detach() -{ - PingableTx_->Detach(); -} - -ITransactionPingerPtr TTransaction::GetTransactionPinger() -{ - return TransactionPinger_; -} - -TClientPtr TTransaction::GetParentClientImpl() -{ - return ParentClient_; -} - -//////////////////////////////////////////////////////////////////////////////// - -TClient::TClient( - const TClientContext& context, - const TTransactionId& globalId, - IClientRetryPolicyPtr retryPolicy) - : TClientBase(context, globalId, retryPolicy) - , TransactionPinger_(nullptr) -{ } - -TClient::~TClient() = default; - -ITransactionPtr TClient::AttachTransaction( - const TTransactionId& transactionId, - const TAttachTransactionOptions& options) -{ - CheckShutdown(); - - return MakeIntrusive<TTransaction>(this, Context_, transactionId, options); -} - -void TClient::MountTable( - const TYPath& path, - const TMountTableOptions& options) -{ - CheckShutdown(); - - THttpHeader header("POST", "mount_table"); - SetTabletParams(header, path, options); - if (options.CellId_) { - header.AddParameter("cell_id", GetGuidAsString(*options.CellId_)); - } - header.AddParameter("freeze", options.Freeze_); - RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header); -} - -void TClient::UnmountTable( - const TYPath& path, - const TUnmountTableOptions& options) -{ - CheckShutdown(); - - THttpHeader header("POST", "unmount_table"); - SetTabletParams(header, path, options); - header.AddParameter("force", options.Force_); - RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header); -} - -void TClient::RemountTable( - const TYPath& path, - const TRemountTableOptions& options) -{ - CheckShutdown(); - - THttpHeader header("POST", "remount_table"); - SetTabletParams(header, path, options); - RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header); -} - -void TClient::FreezeTable( - const TYPath& path, - const TFreezeTableOptions& options) -{ - CheckShutdown(); - NRawClient::FreezeTable(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, path, options); -} - -void TClient::UnfreezeTable( - const TYPath& path, - const TUnfreezeTableOptions& options) -{ - CheckShutdown(); - NRawClient::UnfreezeTable(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, path, options); -} - -void TClient::ReshardTable( - const TYPath& path, - const TVector<TKey>& keys, - const TReshardTableOptions& options) -{ - CheckShutdown(); - - THttpHeader header("POST", "reshard_table"); - SetTabletParams(header, path, options); - header.AddParameter("pivot_keys", BuildYsonNodeFluently().List(keys)); - RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header); -} - -void TClient::ReshardTable( - const TYPath& path, - i64 tabletCount, - const TReshardTableOptions& options) -{ - CheckShutdown(); - - THttpHeader header("POST", "reshard_table"); - SetTabletParams(header, path, options); - header.AddParameter("tablet_count", tabletCount); - RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header); -} - -void TClient::InsertRows( - const TYPath& path, - const TNode::TListType& rows, - const TInsertRowsOptions& options) -{ - CheckShutdown(); - - THttpHeader header("PUT", "insert_rows"); - header.SetInputFormat(TFormat::YsonBinary()); - // TODO: use corresponding raw request - header.MergeParameters(SerializeParametersForInsertRows(Context_.Config->Prefix, path, options)); - - auto body = NodeListToYsonString(rows); - TRequestConfig config; - config.IsHeavy = true; - RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header, body, config); -} - -void TClient::DeleteRows( - const TYPath& path, - const TNode::TListType& keys, - const TDeleteRowsOptions& options) -{ - CheckShutdown(); - return NRawClient::DeleteRows(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, path, keys, options); -} - -void TClient::TrimRows( - const TYPath& path, - i64 tabletIndex, - i64 rowCount, - const TTrimRowsOptions& options) -{ - CheckShutdown(); - - THttpHeader header("POST", "trim_rows"); - header.AddParameter("trimmed_row_count", rowCount); - header.AddParameter("tablet_index", tabletIndex); - // TODO: use corresponding raw request - header.MergeParameters(NRawClient::SerializeParametersForTrimRows(Context_.Config->Prefix, path, options)); - - TRequestConfig config; - config.IsHeavy = true; - RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header, {}, config); -} - -TNode::TListType TClient::LookupRows( - const TYPath& path, - const TNode::TListType& keys, - const TLookupRowsOptions& options) -{ - CheckShutdown(); - - Y_UNUSED(options); - THttpHeader header("PUT", "lookup_rows"); - header.AddPath(AddPathPrefix(path, Context_.Config->ApiVersion)); - header.SetInputFormat(TFormat::YsonBinary()); - header.SetOutputFormat(TFormat::YsonBinary()); - - header.MergeParameters(BuildYsonNodeFluently().BeginMap() - .DoIf(options.Timeout_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("timeout").Value(static_cast<i64>(options.Timeout_->MilliSeconds())); - }) - .Item("keep_missing_rows").Value(options.KeepMissingRows_) - .DoIf(options.Versioned_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("versioned").Value(*options.Versioned_); - }) - .DoIf(options.Columns_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("column_names").Value(*options.Columns_); - }) - .EndMap()); - - auto body = NodeListToYsonString(keys); - TRequestConfig config; - config.IsHeavy = true; - auto result = RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header, body, config); - return NodeFromYsonString(result.Response, ::NYson::EYsonType::ListFragment).AsList(); -} - -TNode::TListType TClient::SelectRows( - const TString& query, - const TSelectRowsOptions& options) -{ - CheckShutdown(); - - THttpHeader header("GET", "select_rows"); - header.SetInputFormat(TFormat::YsonBinary()); - header.SetOutputFormat(TFormat::YsonBinary()); - - header.MergeParameters(BuildYsonNodeFluently().BeginMap() - .Item("query").Value(query) - .DoIf(options.Timeout_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("timeout").Value(static_cast<i64>(options.Timeout_->MilliSeconds())); - }) - .DoIf(options.InputRowLimit_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("input_row_limit").Value(*options.InputRowLimit_); - }) - .DoIf(options.OutputRowLimit_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("output_row_limit").Value(*options.OutputRowLimit_); - }) - .Item("range_expansion_limit").Value(options.RangeExpansionLimit_) - .Item("fail_on_incomplete_result").Value(options.FailOnIncompleteResult_) - .Item("verbose_logging").Value(options.VerboseLogging_) - .Item("enable_code_cache").Value(options.EnableCodeCache_) - .EndMap()); - - TRequestConfig config; - config.IsHeavy = true; - auto result = RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header, {}, config); - return NodeFromYsonString(result.Response, ::NYson::EYsonType::ListFragment).AsList(); -} - -void TClient::AlterTableReplica(const TReplicaId& replicaId, const TAlterTableReplicaOptions& options) -{ - CheckShutdown(); - NRawClient::AlterTableReplica(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, replicaId, options); -} - -ui64 TClient::GenerateTimestamp() -{ - CheckShutdown(); - THttpHeader header("GET", "generate_timestamp"); - TRequestConfig config; - config.IsHeavy = true; - auto requestResult = RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header, {}, config); - return NodeFromYsonString(requestResult.Response).AsUint64(); -} - -TAuthorizationInfo TClient::WhoAmI() -{ - CheckShutdown(); - - THttpHeader header("GET", "auth/whoami", /* isApi = */ false); - auto requestResult = RetryRequestWithPolicy(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, header); - TAuthorizationInfo result; - - NJson::TJsonValue jsonValue; - bool ok = NJson::ReadJsonTree(requestResult.Response, &jsonValue, /* throwOnError = */ true); - Y_VERIFY(ok); - result.Login = jsonValue["login"].GetString(); - result.Realm = jsonValue["realm"].GetString(); - return result; -} - -TOperationAttributes TClient::GetOperation( - const TOperationId& operationId, - const TGetOperationOptions& options) -{ - CheckShutdown(); - return NRawClient::GetOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId, options); -} - -TListOperationsResult TClient::ListOperations( - const TListOperationsOptions& options) -{ - CheckShutdown(); - return NRawClient::ListOperations(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, options); -} - -void TClient::UpdateOperationParameters( - const TOperationId& operationId, - const TUpdateOperationParametersOptions& options) -{ - CheckShutdown(); - return NRawClient::UpdateOperationParameters(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId, options); -} - -TJobAttributes TClient::GetJob( - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobOptions& options) -{ - CheckShutdown(); - return NRawClient::GetJob(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId, jobId, options); -} - -TListJobsResult TClient::ListJobs( - const TOperationId& operationId, - const TListJobsOptions& options) -{ - CheckShutdown(); - return NRawClient::ListJobs(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId, options); -} - -IFileReaderPtr TClient::GetJobInput( - const TJobId& jobId, - const TGetJobInputOptions& options) -{ - CheckShutdown(); - return NRawClient::GetJobInput(Context_, jobId, options); -} - -IFileReaderPtr TClient::GetJobFailContext( - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobFailContextOptions& options) -{ - CheckShutdown(); - return NRawClient::GetJobFailContext(Context_, operationId, jobId, options); -} - -IFileReaderPtr TClient::GetJobStderr( - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobStderrOptions& options) -{ - CheckShutdown(); - return NRawClient::GetJobStderr(Context_, operationId, jobId, options); -} - -TNode::TListType TClient::SkyShareTable( - const std::vector<TYPath>& tablePaths, - const TSkyShareTableOptions& options) -{ - CheckShutdown(); - return NRawClient::SkyShareTable( - ClientRetryPolicy_->CreatePolicyForGenericRequest(), - Context_, - tablePaths, - options); -} - -TCheckPermissionResponse TClient::CheckPermission( - const TString& user, - EPermission permission, - const TYPath& path, - const TCheckPermissionOptions& options) -{ - CheckShutdown(); - return NRawClient::CheckPermission(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, user, permission, path, options); -} - -TVector<TTabletInfo> TClient::GetTabletInfos( - const TYPath& path, - const TVector<int>& tabletIndexes, - const TGetTabletInfosOptions& options) -{ - CheckShutdown(); - return NRawClient::GetTabletInfos(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, path, tabletIndexes, options); -} - - -void TClient::SuspendOperation( - const TOperationId& operationId, - const TSuspendOperationOptions& options) -{ - CheckShutdown(); - NRawClient::SuspendOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId, options); -} - -void TClient::ResumeOperation( - const TOperationId& operationId, - const TResumeOperationOptions& options) -{ - CheckShutdown(); - NRawClient::ResumeOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, operationId, options); -} - -TYtPoller& TClient::GetYtPoller() -{ - auto g = Guard(YtPollerLock_); - if (!YtPoller_) { - CheckShutdown(); - // We don't use current client and create new client because YtPoller_ might use - // this client during current client shutdown. - // That might lead to incrementing of current client refcount and double delete of current client object. - YtPoller_ = MakeHolder<TYtPoller>(Context_, ClientRetryPolicy_); - } - return *YtPoller_; -} - -void TClient::Shutdown() -{ - auto g = Guard(YtPollerLock_); - - if (!Shutdown_.exchange(true) && YtPoller_) { - YtPoller_->Stop(); - } -} - -ITransactionPingerPtr TClient::GetTransactionPinger() -{ - if (!TransactionPinger_) { - TransactionPinger_ = CreateTransactionPinger(Context_.Config); - } - return TransactionPinger_; -} - -TClientPtr TClient::GetParentClientImpl() -{ - return this; -} - -template <class TOptions> -void TClient::SetTabletParams( - THttpHeader& header, - const TYPath& path, - const TOptions& options) -{ - header.AddPath(AddPathPrefix(path, Context_.Config->Prefix)); - if (options.FirstTabletIndex_) { - header.AddParameter("first_tablet_index", *options.FirstTabletIndex_); - } - if (options.LastTabletIndex_) { - header.AddParameter("last_tablet_index", *options.LastTabletIndex_); - } -} - -void TClient::CheckShutdown() const -{ - if (Shutdown_) { - ythrow TApiUsageError() << "Call client's methods after shutdown"; - } -} - -TClientPtr CreateClientImpl( - const TString& serverName, - const TCreateClientOptions& options) -{ - TClientContext context; - context.Config = options.Config_ ? options.Config_ : TConfig::Get(); - context.TvmOnly = options.TvmOnly_; - context.UseTLS = options.UseTLS_; - - context.ServerName = serverName; - if (serverName.find('.') == TString::npos && - serverName.find(':') == TString::npos) - { - context.ServerName += ".yt.yandex.net"; - } - - if (serverName.find(':') == TString::npos) { - context.ServerName = CreateHostNameWithPort(context.ServerName, context); - } - if (options.TvmOnly_) { - context.ServerName = Format("tvm.%v", context.ServerName); - } - - if (options.UseTLS_ || options.UseCoreHttpClient_) { - context.HttpClient = NHttpClient::CreateCoreHttpClient(options.UseTLS_, context.Config); - } else { - context.HttpClient = NHttpClient::CreateDefaultHttpClient(); - } - - context.Token = context.Config->Token; - if (options.Token_) { - context.Token = options.Token_; - } else if (options.TokenPath_) { - context.Token = TConfig::LoadTokenFromFile(options.TokenPath_); - } else if (options.ServiceTicketAuth_) { - context.ServiceTicketAuth = options.ServiceTicketAuth_; - } - - context.ImpersonationUser = options.ImpersonationUser_; - - if (context.Token) { - TConfig::ValidateToken(context.Token); - } - - auto globalTxId = GetGuid(context.Config->GlobalTxId); - - auto retryConfigProvider = options.RetryConfigProvider_; - if (!retryConfigProvider) { - retryConfigProvider = CreateDefaultRetryConfigProvider(); - } - return new NDetail::TClient(context, globalTxId, CreateDefaultClientRetryPolicy(retryConfigProvider, context.Config)); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail - -//////////////////////////////////////////////////////////////////////////////// - -IClientPtr CreateClient( - const TString& serverName, - const TCreateClientOptions& options) -{ - return NDetail::CreateClientImpl(serverName, options); -} - -IClientPtr CreateClientFromEnv(const TCreateClientOptions& options) -{ - auto serverName = GetEnv("YT_PROXY"); - if (!serverName) { - ythrow yexception() << "YT_PROXY is not set"; - } - - return NDetail::CreateClientImpl(serverName, options); -} - - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/client.h b/yt/cpp/mapreduce/client/client.h deleted file mode 100644 index 0f4df09d0b..0000000000 --- a/yt/cpp/mapreduce/client/client.h +++ /dev/null @@ -1,506 +0,0 @@ -#pragma once - -#include "client_reader.h" -#include "client_writer.h" -#include "transaction_pinger.h" - -#include <yt/cpp/mapreduce/interface/client.h> - -#include <yt/cpp/mapreduce/http/context.h> -#include <yt/cpp/mapreduce/http/requests.h> - -namespace NYT { -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -class TYtPoller; - -class TClientBase; -using TClientBasePtr = ::TIntrusivePtr<TClientBase>; - -class TClient; -using TClientPtr = ::TIntrusivePtr<TClient>; - -//////////////////////////////////////////////////////////////////////////////// - -class TClientBase - : virtual public IClientBase -{ -public: - TClientBase( - const TClientContext& context, - const TTransactionId& transactionId, - IClientRetryPolicyPtr retryPolicy); - - ITransactionPtr StartTransaction( - const TStartTransactionOptions& options) override; - - // cypress - - TNodeId Create( - const TYPath& path, - ENodeType type, - const TCreateOptions& options) override; - - void Remove( - const TYPath& path, - const TRemoveOptions& options) override; - - bool Exists( - const TYPath& path, - const TExistsOptions& options) override; - - TNode Get( - const TYPath& path, - const TGetOptions& options) override; - - void Set( - const TYPath& path, - const TNode& value, - const TSetOptions& options) override; - - void MultisetAttributes( - const TYPath& path, - const TNode::TMapType& value, - const TMultisetAttributesOptions& options) override; - - TNode::TListType List( - const TYPath& path, - const TListOptions& options) override; - - TNodeId Copy( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TCopyOptions& options) override; - - TNodeId Move( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TMoveOptions& options) override; - - TNodeId Link( - const TYPath& targetPath, - const TYPath& linkPath, - const TLinkOptions& options) override; - - void Concatenate( - const TVector<TRichYPath>& sourcePaths, - const TRichYPath& destinationPath, - const TConcatenateOptions& options) override; - - TRichYPath CanonizeYPath(const TRichYPath& path) override; - - TVector<TTableColumnarStatistics> GetTableColumnarStatistics( - const TVector<TRichYPath>& paths, - const TGetTableColumnarStatisticsOptions& options) override; - - TMultiTablePartitions GetTablePartitions( - const TVector<TRichYPath>& paths, - const TGetTablePartitionsOptions& options) override; - - TMaybe<TYPath> GetFileFromCache( - const TString& md5Signature, - const TYPath& cachePath, - const TGetFileFromCacheOptions& options = TGetFileFromCacheOptions()) override; - - TYPath PutFileToCache( - const TYPath& filePath, - const TString& md5Signature, - const TYPath& cachePath, - const TPutFileToCacheOptions& options = TPutFileToCacheOptions()) override; - - IFileReaderPtr CreateFileReader( - const TRichYPath& path, - const TFileReaderOptions& options) override; - - IFileWriterPtr CreateFileWriter( - const TRichYPath& path, - const TFileWriterOptions& options) override; - - TTableWriterPtr<::google::protobuf::Message> CreateTableWriter( - const TRichYPath& path, - const ::google::protobuf::Descriptor& descriptor, - const TTableWriterOptions& options) override; - - TRawTableReaderPtr CreateRawReader( - const TRichYPath& path, - const TFormat& format, - const TTableReaderOptions& options) override; - - TRawTableWriterPtr CreateRawWriter( - const TRichYPath& path, - const TFormat& format, - const TTableWriterOptions& options) override; - - IFileReaderPtr CreateBlobTableReader( - const TYPath& path, - const TKey& key, - const TBlobTableReaderOptions& options) override; - - // operations - - IOperationPtr DoMap( - const TMapOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> mapper, - const TOperationOptions& options) override; - - IOperationPtr RawMap( - const TRawMapOperationSpec& spec, - ::TIntrusivePtr<IRawJob> mapper, - const TOperationOptions& options) override; - - IOperationPtr DoReduce( - const TReduceOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> reducer, - const TOperationOptions& options) override; - - IOperationPtr RawReduce( - const TRawReduceOperationSpec& spec, - ::TIntrusivePtr<IRawJob> mapper, - const TOperationOptions& options) override; - - IOperationPtr DoJoinReduce( - const TJoinReduceOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> reducer, - const TOperationOptions& options) override; - - IOperationPtr RawJoinReduce( - const TRawJoinReduceOperationSpec& spec, - ::TIntrusivePtr<IRawJob> mapper, - const TOperationOptions& options) override; - - IOperationPtr DoMapReduce( - const TMapReduceOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> mapper, - ::TIntrusivePtr<IStructuredJob> reduceCombiner, - ::TIntrusivePtr<IStructuredJob> reducer, - const TOperationOptions& options) override; - - IOperationPtr RawMapReduce( - const TRawMapReduceOperationSpec& spec, - ::TIntrusivePtr<IRawJob> mapper, - ::TIntrusivePtr<IRawJob> reduceCombiner, - ::TIntrusivePtr<IRawJob> reducer, - const TOperationOptions& options) override; - - IOperationPtr Sort( - const TSortOperationSpec& spec, - const TOperationOptions& options) override; - - IOperationPtr Merge( - const TMergeOperationSpec& spec, - const TOperationOptions& options) override; - - IOperationPtr Erase( - const TEraseOperationSpec& spec, - const TOperationOptions& options) override; - - IOperationPtr RemoteCopy( - const TRemoteCopyOperationSpec& spec, - const TOperationOptions& options = TOperationOptions()) override; - - IOperationPtr RunVanilla( - const TVanillaOperationSpec& spec, - const TOperationOptions& options = TOperationOptions()) override; - - IOperationPtr AttachOperation(const TOperationId& operationId) override; - - EOperationBriefState CheckOperation(const TOperationId& operationId) override; - - void AbortOperation(const TOperationId& operationId) override; - - void CompleteOperation(const TOperationId& operationId) override; - - void WaitForOperation(const TOperationId& operationId) override; - - void AlterTable( - const TYPath& path, - const TAlterTableOptions& options) override; - - TBatchRequestPtr CreateBatchRequest() override; - - IClientPtr GetParentClient() override; - - const TClientContext& GetContext() const; - - const IClientRetryPolicyPtr& GetRetryPolicy() const; - - virtual ITransactionPingerPtr GetTransactionPinger() = 0; - -protected: - virtual TClientPtr GetParentClientImpl() = 0; - -protected: - const TClientContext Context_; - TTransactionId TransactionId_; - IClientRetryPolicyPtr ClientRetryPolicy_; - -private: - ::TIntrusivePtr<TClientReader> CreateClientReader( - const TRichYPath& path, - const TFormat& format, - const TTableReaderOptions& options, - bool useFormatFromTableAttributes = false); - - THolder<TClientWriter> CreateClientWriter( - const TRichYPath& path, - const TFormat& format, - const TTableWriterOptions& options); - - ::TIntrusivePtr<INodeReaderImpl> CreateNodeReader( - const TRichYPath& path, const TTableReaderOptions& options) override; - - ::TIntrusivePtr<IYaMRReaderImpl> CreateYaMRReader( - const TRichYPath& path, const TTableReaderOptions& options) override; - - ::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader( - const TRichYPath& path, - const TTableReaderOptions& options, - const Message* prototype) override; - - ::TIntrusivePtr<ISkiffRowReaderImpl> CreateSkiffRowReader( - const TRichYPath& path, - const TTableReaderOptions& options, - const ISkiffRowSkipperPtr& skipper, - const NSkiff::TSkiffSchemaPtr& schema) override; - - ::TIntrusivePtr<INodeWriterImpl> CreateNodeWriter( - const TRichYPath& path, const TTableWriterOptions& options) override; - - ::TIntrusivePtr<IYaMRWriterImpl> CreateYaMRWriter( - const TRichYPath& path, const TTableWriterOptions& options) override; - - ::TIntrusivePtr<IProtoWriterImpl> CreateProtoWriter( - const TRichYPath& path, - const TTableWriterOptions& options, - const Message* prototype) override; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TTransaction - : public ITransaction - , public TClientBase -{ -public: - // - // Start a new transaction. - TTransaction( - TClientPtr parentClient, - const TClientContext& context, - const TTransactionId& parentTransactionId, - const TStartTransactionOptions& options); - - // - // Attach an existing transaction. - TTransaction( - TClientPtr parentClient, - const TClientContext& context, - const TTransactionId& transactionId, - const TAttachTransactionOptions& options); - - const TTransactionId& GetId() const override; - - ILockPtr Lock( - const TYPath& path, - ELockMode mode, - const TLockOptions& options) override; - - void Unlock( - const TYPath& path, - const TUnlockOptions& options) override; - - void Commit() override; - - void Abort() override; - - void Ping() override; - - void Detach() override; - - ITransactionPingerPtr GetTransactionPinger() override; - -protected: - TClientPtr GetParentClientImpl() override; - -private: - ITransactionPingerPtr TransactionPinger_; - THolder<TPingableTransaction> PingableTx_; - TClientPtr ParentClient_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TClient - : public IClient - , public TClientBase -{ -public: - TClient( - const TClientContext& context, - const TTransactionId& globalId, - IClientRetryPolicyPtr retryPolicy); - - ~TClient(); - - ITransactionPtr AttachTransaction( - const TTransactionId& transactionId, - const TAttachTransactionOptions& options) override; - - void MountTable( - const TYPath& path, - const TMountTableOptions& options) override; - - void UnmountTable( - const TYPath& path, - const TUnmountTableOptions& options) override; - - void RemountTable( - const TYPath& path, - const TRemountTableOptions& options) override; - - void FreezeTable( - const TYPath& path, - const TFreezeTableOptions& options) override; - - void UnfreezeTable( - const TYPath& path, - const TUnfreezeTableOptions& options) override; - - void ReshardTable( - const TYPath& path, - const TVector<TKey>& keys, - const TReshardTableOptions& options) override; - - void ReshardTable( - const TYPath& path, - i64 tabletCount, - const TReshardTableOptions& options) override; - - void InsertRows( - const TYPath& path, - const TNode::TListType& rows, - const TInsertRowsOptions& options) override; - - void DeleteRows( - const TYPath& path, - const TNode::TListType& keys, - const TDeleteRowsOptions& options) override; - - void TrimRows( - const TYPath& path, - i64 tabletIndex, - i64 rowCount, - const TTrimRowsOptions& options) override; - - TNode::TListType LookupRows( - const TYPath& path, - const TNode::TListType& keys, - const TLookupRowsOptions& options) override; - - TNode::TListType SelectRows( - const TString& query, - const TSelectRowsOptions& options) override; - - void AlterTableReplica( - const TReplicaId& replicaId, - const TAlterTableReplicaOptions& alterTableReplicaOptions) override; - - ui64 GenerateTimestamp() override; - - TAuthorizationInfo WhoAmI() override; - - TOperationAttributes GetOperation( - const TOperationId& operationId, - const TGetOperationOptions& options) override; - - TListOperationsResult ListOperations( - const TListOperationsOptions& options) override; - - void UpdateOperationParameters( - const TOperationId& operationId, - const TUpdateOperationParametersOptions& options) override; - - TJobAttributes GetJob( - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobOptions& options) override; - - TListJobsResult ListJobs( - const TOperationId& operationId, - const TListJobsOptions& options = TListJobsOptions()) override; - - IFileReaderPtr GetJobInput( - const TJobId& jobId, - const TGetJobInputOptions& options = TGetJobInputOptions()) override; - - IFileReaderPtr GetJobFailContext( - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobFailContextOptions& options = TGetJobFailContextOptions()) override; - - IFileReaderPtr GetJobStderr( - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobStderrOptions& options = TGetJobStderrOptions()) override; - - TNode::TListType SkyShareTable( - const std::vector<TYPath>& tablePaths, - const TSkyShareTableOptions& options = TSkyShareTableOptions()) override; - - TCheckPermissionResponse CheckPermission( - const TString& user, - EPermission permission, - const TYPath& path, - const TCheckPermissionOptions& options) override; - - TVector<TTabletInfo> GetTabletInfos( - const TYPath& path, - const TVector<int>& tabletIndexes, - const TGetTabletInfosOptions& options) override; - - void SuspendOperation( - const TOperationId& operationId, - const TSuspendOperationOptions& options) override; - - void ResumeOperation( - const TOperationId& operationId, - const TResumeOperationOptions& options) override; - - void Shutdown() override; - - ITransactionPingerPtr GetTransactionPinger() override; - - // Helper methods - TYtPoller& GetYtPoller(); - -protected: - TClientPtr GetParentClientImpl() override; - -private: - template <class TOptions> - void SetTabletParams( - THttpHeader& header, - const TYPath& path, - const TOptions& options); - - void CheckShutdown() const; - - ITransactionPingerPtr TransactionPinger_; - - std::atomic<bool> Shutdown_ = false; - TMutex YtPollerLock_; - THolder<TYtPoller> YtPoller_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -TClientPtr CreateClientImpl( - const TString& serverName, - const TCreateClientOptions& options = TCreateClientOptions()); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/client_reader.cpp b/yt/cpp/mapreduce/client/client_reader.cpp deleted file mode 100644 index 80759b12dc..0000000000 --- a/yt/cpp/mapreduce/client/client_reader.cpp +++ /dev/null @@ -1,232 +0,0 @@ -#include "client_reader.h" - -#include "structured_table_formats.h" -#include "transaction.h" -#include "transaction_pinger.h" - -#include <yt/cpp/mapreduce/common/helpers.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> -#include <yt/cpp/mapreduce/common/wait_proxy.h> - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/tvm.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <yt/cpp/mapreduce/io/helpers.h> -#include <yt/cpp/mapreduce/io/yamr_table_reader.h> - -#include <yt/cpp/mapreduce/http/helpers.h> -#include <yt/cpp/mapreduce/http/requests.h> -#include <yt/cpp/mapreduce/http/retry_request.h> - -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> - -#include <library/cpp/yson/node/serialize.h> - -#include <util/random/random.h> -#include <util/stream/file.h> -#include <util/stream/str.h> -#include <util/string/builder.h> -#include <util/string/cast.h> - -namespace NYT { - -using ::ToString; - -//////////////////////////////////////////////////////////////////////////////// - -TClientReader::TClientReader( - const TRichYPath& path, - IClientRetryPolicyPtr clientRetryPolicy, - ITransactionPingerPtr transactionPinger, - const TClientContext& context, - const TTransactionId& transactionId, - const TFormat& format, - const TTableReaderOptions& options, - bool useFormatFromTableAttributes) - : Path_(path) - , ClientRetryPolicy_(std::move(clientRetryPolicy)) - , Context_(context) - , ParentTransactionId_(transactionId) - , Format_(format) - , Options_(options) - , ReadTransaction_(nullptr) -{ - if (options.CreateTransaction_) { - Y_VERIFY(transactionPinger, "Internal error: transactionPinger is null"); - ReadTransaction_ = MakeHolder<TPingableTransaction>( - ClientRetryPolicy_, - Context_, - transactionId, - transactionPinger->GetChildTxPinger(), - TStartTransactionOptions()); - Path_.Path(Snapshot( - ClientRetryPolicy_, - Context_, - ReadTransaction_->GetId(), - path.Path_)); - } - - if (useFormatFromTableAttributes) { - auto transactionId2 = ReadTransaction_ ? ReadTransaction_->GetId() : ParentTransactionId_; - auto newFormat = GetTableFormat(ClientRetryPolicy_, Context_, transactionId2, Path_); - if (newFormat) { - Format_->Config = *newFormat; - } - } - - TransformYPath(); - CreateRequest(); -} - -bool TClientReader::Retry( - const TMaybe<ui32>& rangeIndex, - const TMaybe<ui64>& rowIndex) -{ - if (CurrentRequestRetryPolicy_) { - // TODO we should pass actual exception in Retry function - yexception genericError; - auto backoff = CurrentRequestRetryPolicy_->OnGenericError(genericError); - if (!backoff) { - return false; - } - } - - try { - CreateRequest(rangeIndex, rowIndex); - return true; - } catch (const std::exception& ex) { - YT_LOG_ERROR("Client reader retry failed: %v", - ex.what()); - - return false; - } -} - -void TClientReader::ResetRetries() -{ - CurrentRequestRetryPolicy_ = nullptr; -} - -size_t TClientReader::DoRead(void* buf, size_t len) -{ - return Input_->Read(buf, len); -} - -void TClientReader::TransformYPath() -{ - for (auto& range : Path_.MutableRangesView()) { - auto& exact = range.Exact_; - if (IsTrivial(exact)) { - continue; - } - - if (exact.RowIndex_) { - range.LowerLimit(TReadLimit().RowIndex(*exact.RowIndex_)); - range.UpperLimit(TReadLimit().RowIndex(*exact.RowIndex_ + 1)); - exact.RowIndex_.Clear(); - - } else if (exact.Key_) { - range.LowerLimit(TReadLimit().Key(*exact.Key_)); - - auto lastPart = TNode::CreateEntity(); - lastPart.Attributes() = TNode()("type", "max"); - exact.Key_->Parts_.push_back(lastPart); - - range.UpperLimit(TReadLimit().Key(*exact.Key_)); - exact.Key_.Clear(); - } - } -} - -void TClientReader::CreateRequest(const TMaybe<ui32>& rangeIndex, const TMaybe<ui64>& rowIndex) -{ - if (!CurrentRequestRetryPolicy_) { - CurrentRequestRetryPolicy_ = ClientRetryPolicy_->CreatePolicyForGenericRequest(); - } - while (true) { - CurrentRequestRetryPolicy_->NotifyNewAttempt(); - - THttpHeader header("GET", GetReadTableCommand(Context_.Config->ApiVersion)); - if (Context_.ServiceTicketAuth) { - header.SetServiceTicket(Context_.ServiceTicketAuth->Ptr->IssueServiceTicket()); - } else { - header.SetToken(Context_.Token); - } - auto transactionId = (ReadTransaction_ ? ReadTransaction_->GetId() : ParentTransactionId_); - header.AddTransactionId(transactionId); - - const auto& controlAttributes = Options_.ControlAttributes_; - header.AddParameter("control_attributes", TNode() - ("enable_row_index", controlAttributes.EnableRowIndex_) - ("enable_range_index", controlAttributes.EnableRangeIndex_)); - header.SetOutputFormat(Format_); - - header.SetResponseCompression(ToString(Context_.Config->AcceptEncoding)); - - if (rowIndex.Defined()) { - auto& ranges = Path_.MutableRanges(); - if (ranges.Empty()) { - ranges.ConstructInPlace(TVector{TReadRange()}); - } else { - if (rangeIndex.GetOrElse(0) >= ranges->size()) { - ythrow yexception() - << "range index " << rangeIndex.GetOrElse(0) - << " is out of range, input range count is " << ranges->size(); - } - ranges->erase(ranges->begin(), ranges->begin() + rangeIndex.GetOrElse(0)); - } - ranges->begin()->LowerLimit(TReadLimit().RowIndex(*rowIndex)); - } - - header.MergeParameters(FormIORequestParameters(Path_, Options_)); - - auto requestId = CreateGuidAsString(); - - try { - const auto proxyName = GetProxyForHeavyRequest(Context_); - Response_ = Context_.HttpClient->Request(GetFullUrl(proxyName, Context_, header), requestId, header); - - Input_ = Response_->GetResponseStream(); - - YT_LOG_DEBUG("RSP %v - table stream", requestId); - - return; - } catch (const TErrorResponse& e) { - LogRequestError( - requestId, - header, - e.what(), - CurrentRequestRetryPolicy_->GetAttemptDescription()); - - if (!IsRetriable(e)) { - throw; - } - auto backoff = CurrentRequestRetryPolicy_->OnRetriableError(e); - if (!backoff) { - throw; - } - NDetail::TWaitProxy::Get()->Sleep(*backoff); - } catch (const std::exception& e) { - LogRequestError( - requestId, - header, - e.what(), - CurrentRequestRetryPolicy_->GetAttemptDescription()); - - Response_.reset(); - Input_ = nullptr; - - auto backoff = CurrentRequestRetryPolicy_->OnGenericError(e); - if (!backoff) { - throw; - } - NDetail::TWaitProxy::Get()->Sleep(*backoff); - } - } -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/client_reader.h b/yt/cpp/mapreduce/client/client_reader.h deleted file mode 100644 index 22f5a0ebb0..0000000000 --- a/yt/cpp/mapreduce/client/client_reader.h +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/common/fwd.h> - -#include <yt/cpp/mapreduce/interface/io.h> - -#include <yt/cpp/mapreduce/http/context.h> -#include <yt/cpp/mapreduce/http/requests.h> -#include <yt/cpp/mapreduce/http/http.h> -#include <yt/cpp/mapreduce/http/http_client.h> - -namespace NYT { - -class TPingableTransaction; - -//////////////////////////////////////////////////////////////////////////////// - -class TClientReader - : public TRawTableReader -{ -public: - TClientReader( - const TRichYPath& path, - IClientRetryPolicyPtr clientRetryPolicy, - ITransactionPingerPtr transactionPinger, - const TClientContext& context, - const TTransactionId& transactionId, - const TFormat& format, - const TTableReaderOptions& options, - bool useFormatFromTableAttributes); - - bool Retry( - const TMaybe<ui32>& rangeIndex, - const TMaybe<ui64>& rowIndex) override; - - void ResetRetries() override; - - bool HasRangeIndices() const override { return true; } - -protected: - size_t DoRead(void* buf, size_t len) override; - -private: - TRichYPath Path_; - const IClientRetryPolicyPtr ClientRetryPolicy_; - const TClientContext Context_; - TTransactionId ParentTransactionId_; - TMaybe<TFormat> Format_; - TTableReaderOptions Options_; - - THolder<TPingableTransaction> ReadTransaction_; - - NHttpClient::IHttpResponsePtr Response_; - IInputStream* Input_; - - IRequestRetryPolicyPtr CurrentRequestRetryPolicy_; - -private: - void TransformYPath(); - void CreateRequest(const TMaybe<ui32>& rangeIndex = Nothing(), const TMaybe<ui64>& rowIndex = Nothing()); -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/client_writer.cpp b/yt/cpp/mapreduce/client/client_writer.cpp deleted file mode 100644 index 357abd32eb..0000000000 --- a/yt/cpp/mapreduce/client/client_writer.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include "client_writer.h" - -#include "retryful_writer.h" -#include "retryless_writer.h" - -#include <yt/cpp/mapreduce/interface/io.h> -#include <yt/cpp/mapreduce/common/fwd.h> -#include <yt/cpp/mapreduce/common/helpers.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TClientWriter::TClientWriter( - const TRichYPath& path, - IClientRetryPolicyPtr clientRetryPolicy, - ITransactionPingerPtr transactionPinger, - const TClientContext& context, - const TTransactionId& transactionId, - const TMaybe<TFormat>& format, - const TTableWriterOptions& options) - : BUFFER_SIZE(options.BufferSize_) -{ - if (options.SingleHttpRequest_) { - RawWriter_.Reset(new TRetrylessWriter( - context, - transactionId, - GetWriteTableCommand(context.Config->ApiVersion), - format, - path, - BUFFER_SIZE, - options)); - } else { - RawWriter_.Reset(new TRetryfulWriter( - std::move(clientRetryPolicy), - std::move(transactionPinger), - context, - transactionId, - GetWriteTableCommand(context.Config->ApiVersion), - format, - path, - options)); - } -} - -size_t TClientWriter::GetStreamCount() const -{ - return 1; -} - -IOutputStream* TClientWriter::GetStream(size_t tableIndex) const -{ - Y_UNUSED(tableIndex); - return RawWriter_.Get(); -} - -void TClientWriter::OnRowFinished(size_t) -{ - RawWriter_->NotifyRowEnd(); -} - -void TClientWriter::Abort() -{ - RawWriter_->Abort(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/client_writer.h b/yt/cpp/mapreduce/client/client_writer.h deleted file mode 100644 index 010a88a8ff..0000000000 --- a/yt/cpp/mapreduce/client/client_writer.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/common/fwd.h> - -#include <yt/cpp/mapreduce/http/requests.h> -#include <yt/cpp/mapreduce/interface/io.h> - -namespace NYT { - -struct TTableWriterOptions; -class TRetryfulWriter; - -//////////////////////////////////////////////////////////////////////////////// - -class TClientWriter - : public IProxyOutput -{ -public: - TClientWriter( - const TRichYPath& path, - IClientRetryPolicyPtr clientRetryPolicy, - ITransactionPingerPtr transactionPinger, - const TClientContext& context, - const TTransactionId& transactionId, - const TMaybe<TFormat>& format, - const TTableWriterOptions& options); - - size_t GetStreamCount() const override; - IOutputStream* GetStream(size_t tableIndex) const override; - void OnRowFinished(size_t tableIndex) override; - void Abort() override; - -private: - ::TIntrusivePtr<TRawTableWriter> RawWriter_; - - const size_t BUFFER_SIZE = 64 << 20; -}; - -//////////////////////////////////////////////////////////////////////////////// - - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/dummy_job_profiler.cpp b/yt/cpp/mapreduce/client/dummy_job_profiler.cpp deleted file mode 100644 index 5a2f1e8d46..0000000000 --- a/yt/cpp/mapreduce/client/dummy_job_profiler.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include "job_profiler.h" - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TDummyJobProfiler - : public IJobProfiler -{ - void Start() override - { } - - void Stop() override - { } -}; - -//////////////////////////////////////////////////////////////////////////////// - -std::unique_ptr<IJobProfiler> CreateJobProfiler() -{ - return std::make_unique<TDummyJobProfiler>(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/file_reader.cpp b/yt/cpp/mapreduce/client/file_reader.cpp deleted file mode 100644 index fc21e0bc02..0000000000 --- a/yt/cpp/mapreduce/client/file_reader.cpp +++ /dev/null @@ -1,243 +0,0 @@ -#include "file_reader.h" - -#include "transaction.h" -#include "transaction_pinger.h" - -#include <yt/cpp/mapreduce/common/helpers.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> -#include <yt/cpp/mapreduce/common/wait_proxy.h> - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/tvm.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <yt/cpp/mapreduce/io/helpers.h> - -#include <yt/cpp/mapreduce/http/helpers.h> -#include <yt/cpp/mapreduce/http/http.h> -#include <yt/cpp/mapreduce/http/http_client.h> -#include <yt/cpp/mapreduce/http/retry_request.h> - -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> - -namespace NYT { -namespace NDetail { - -using ::ToString; - -//////////////////////////////////////////////////////////////////////////////// - -static TMaybe<ui64> GetEndOffset(const TFileReaderOptions& options) { - if (options.Length_) { - return options.Offset_.GetOrElse(0) + *options.Length_; - } else { - return Nothing(); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -TStreamReaderBase::TStreamReaderBase( - IClientRetryPolicyPtr clientRetryPolicy, - ITransactionPingerPtr transactionPinger, - const TClientContext& context, - const TTransactionId& transactionId) - : Context_(context) - , ClientRetryPolicy_(std::move(clientRetryPolicy)) - , ReadTransaction_(MakeHolder<TPingableTransaction>( - ClientRetryPolicy_, - context, - transactionId, - transactionPinger->GetChildTxPinger(), - TStartTransactionOptions())) -{ } - -TStreamReaderBase::~TStreamReaderBase() = default; - -TYPath TStreamReaderBase::Snapshot(const TYPath& path) -{ - return NYT::Snapshot(ClientRetryPolicy_, Context_, ReadTransaction_->GetId(), path); -} - -TString TStreamReaderBase::GetActiveRequestId() const -{ - if (Response_) { - return Response_->GetRequestId();; - } else { - return "<no-active-request>"; - } -} - -size_t TStreamReaderBase::DoRead(void* buf, size_t len) -{ - const int retryCount = Context_.Config->ReadRetryCount; - for (int attempt = 1; attempt <= retryCount; ++attempt) { - try { - if (!Input_) { - Response_ = Request(Context_, ReadTransaction_->GetId(), CurrentOffset_); - Input_ = Response_->GetResponseStream(); - } - if (len == 0) { - return 0; - } - const size_t read = Input_->Read(buf, len); - CurrentOffset_ += read; - return read; - } catch (TErrorResponse& e) { - YT_LOG_ERROR("RSP %v - failed: %v (attempt %v of %v)", - GetActiveRequestId(), - e.what(), - attempt, - retryCount); - - if (!IsRetriable(e) || attempt == retryCount) { - throw; - } - NDetail::TWaitProxy::Get()->Sleep(GetBackoffDuration(e, Context_.Config)); - } catch (std::exception& e) { - YT_LOG_ERROR("RSP %v - failed: %v (attempt %v of %v)", - GetActiveRequestId(), - e.what(), - attempt, - retryCount); - - // Invalidate connection. - Response_.reset(); - - if (attempt == retryCount) { - throw; - } - NDetail::TWaitProxy::Get()->Sleep(GetBackoffDuration(e, Context_.Config)); - } - Input_ = nullptr; - } - Y_UNREACHABLE(); // we should either return or throw from loop above -} - -//////////////////////////////////////////////////////////////////////////////// - -TFileReader::TFileReader( - const TRichYPath& path, - IClientRetryPolicyPtr clientRetryPolicy, - ITransactionPingerPtr transactionPinger, - const TClientContext& context, - const TTransactionId& transactionId, - const TFileReaderOptions& options) - : TStreamReaderBase(std::move(clientRetryPolicy), std::move(transactionPinger), context, transactionId) - , FileReaderOptions_(options) - , Path_(path) - , StartOffset_(FileReaderOptions_.Offset_.GetOrElse(0)) - , EndOffset_(GetEndOffset(FileReaderOptions_)) -{ - Path_.Path_ = TStreamReaderBase::Snapshot(Path_.Path_); -} - -NHttpClient::IHttpResponsePtr TFileReader::Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes) -{ - const ui64 currentOffset = StartOffset_ + readBytes; - TString hostName = GetProxyForHeavyRequest(context); - - THttpHeader header("GET", GetReadFileCommand(context.Config->ApiVersion)); - if (context.ServiceTicketAuth) { - header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket()); - } else { - header.SetToken(context.Token); - } - header.AddTransactionId(transactionId); - header.SetOutputFormat(TMaybe<TFormat>()); // Binary format - - if (EndOffset_) { - Y_VERIFY(*EndOffset_ >= currentOffset); - FileReaderOptions_.Length(*EndOffset_ - currentOffset); - } - FileReaderOptions_.Offset(currentOffset); - header.MergeParameters(FormIORequestParameters(Path_, FileReaderOptions_)); - - header.SetResponseCompression(ToString(context.Config->AcceptEncoding)); - - auto requestId = CreateGuidAsString(); - NHttpClient::IHttpResponsePtr response; - try { - response = context.HttpClient->Request(GetFullUrl(hostName, context, header), requestId, header); - } catch (const std::exception& ex) { - LogRequestError(requestId, header, ex.what(), ""); - throw; - } - - YT_LOG_DEBUG("RSP %v - file stream", - requestId); - - return response; -} - -//////////////////////////////////////////////////////////////////////////////// - -TBlobTableReader::TBlobTableReader( - const TYPath& path, - const TKey& key, - IClientRetryPolicyPtr retryPolicy, - ITransactionPingerPtr transactionPinger, - const TClientContext& context, - const TTransactionId& transactionId, - const TBlobTableReaderOptions& options) - : TStreamReaderBase(std::move(retryPolicy), std::move(transactionPinger), context, transactionId) - , Key_(key) - , Options_(options) -{ - Path_ = TStreamReaderBase::Snapshot(path); -} - -NHttpClient::IHttpResponsePtr TBlobTableReader::Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes) -{ - TString hostName = GetProxyForHeavyRequest(context); - - THttpHeader header("GET", "read_blob_table"); - if (context.ServiceTicketAuth) { - header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket()); - } else { - header.SetToken(context.Token); - } - header.AddTransactionId(transactionId); - header.SetOutputFormat(TMaybe<TFormat>()); // Binary format - - const ui64 currentOffset = Options_.Offset_ + readBytes; - const i64 startPartIndex = currentOffset / Options_.PartSize_; - const ui64 skipBytes = currentOffset - Options_.PartSize_ * startPartIndex; - auto lowerLimitKey = Key_; - lowerLimitKey.Parts_.push_back(startPartIndex); - auto upperLimitKey = Key_; - upperLimitKey.Parts_.push_back(std::numeric_limits<i64>::max()); - TNode params = PathToParamNode(TRichYPath(Path_).AddRange(TReadRange() - .LowerLimit(TReadLimit().Key(lowerLimitKey)) - .UpperLimit(TReadLimit().Key(upperLimitKey)))); - params["start_part_index"] = TNode(startPartIndex); - params["offset"] = skipBytes; - if (Options_.PartIndexColumnName_) { - params["part_index_column_name"] = *Options_.PartIndexColumnName_; - } - if (Options_.DataColumnName_) { - params["data_column_name"] = *Options_.DataColumnName_; - } - params["part_size"] = Options_.PartSize_; - header.MergeParameters(params); - header.SetResponseCompression(ToString(context.Config->AcceptEncoding)); - - auto requestId = CreateGuidAsString(); - NHttpClient::IHttpResponsePtr response; - try { - response = context.HttpClient->Request(GetFullUrl(hostName, context, header), requestId, header); - } catch (const std::exception& ex) { - LogRequestError(requestId, header, ex.what(), ""); - throw; - } - - YT_LOG_DEBUG("RSP %v - blob table stream", - requestId); - return response; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/file_reader.h b/yt/cpp/mapreduce/client/file_reader.h deleted file mode 100644 index d850008a31..0000000000 --- a/yt/cpp/mapreduce/client/file_reader.h +++ /dev/null @@ -1,105 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/common/fwd.h> - -#include <yt/cpp/mapreduce/interface/io.h> - -#include <yt/cpp/mapreduce/http/context.h> -#include <yt/cpp/mapreduce/http/requests.h> - -class IInputStream; - -namespace NYT { - -class THttpRequest; -class TPingableTransaction; - -namespace NDetail { -//////////////////////////////////////////////////////////////////////////////// - -class TStreamReaderBase - : public IFileReader -{ -public: - TStreamReaderBase( - IClientRetryPolicyPtr clientRetryPolicy, - ITransactionPingerPtr transactionPinger, - const TClientContext& context, - const TTransactionId& transactionId); - - ~TStreamReaderBase(); - -protected: - TYPath Snapshot(const TYPath& path); - -protected: - const TClientContext Context_; - -private: - size_t DoRead(void* buf, size_t len) override; - virtual NHttpClient::IHttpResponsePtr Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes) = 0; - TString GetActiveRequestId() const; - -private: - const IClientRetryPolicyPtr ClientRetryPolicy_; - TFileReaderOptions FileReaderOptions_; - - NHttpClient::IHttpResponsePtr Response_; - IInputStream* Input_ = nullptr; - - THolder<TPingableTransaction> ReadTransaction_; - - ui64 CurrentOffset_ = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TFileReader - : public TStreamReaderBase -{ -public: - TFileReader( - const TRichYPath& path, - IClientRetryPolicyPtr clientRetryPolicy, - ITransactionPingerPtr transactionPinger, - const TClientContext& context, - const TTransactionId& transactionId, - const TFileReaderOptions& options = TFileReaderOptions()); - -private: - NHttpClient::IHttpResponsePtr Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes) override; - -private: - TFileReaderOptions FileReaderOptions_; - - TRichYPath Path_; - const ui64 StartOffset_; - const TMaybe<ui64> EndOffset_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TBlobTableReader - : public TStreamReaderBase -{ -public: - TBlobTableReader( - const TYPath& path, - const TKey& key, - IClientRetryPolicyPtr clientRetryPolicy, - ITransactionPingerPtr transactionPinger, - const TClientContext& context, - const TTransactionId& transactionId, - const TBlobTableReaderOptions& options); - -private: - NHttpClient::IHttpResponsePtr Request(const TClientContext& context, const TTransactionId& transactionId, ui64 readBytes) override; - -private: - const TKey Key_; - const TBlobTableReaderOptions Options_; - TYPath Path_; -}; - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/file_writer.cpp b/yt/cpp/mapreduce/client/file_writer.cpp deleted file mode 100644 index daf6461edd..0000000000 --- a/yt/cpp/mapreduce/client/file_writer.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "file_writer.h" - -#include <yt/cpp/mapreduce/io/helpers.h> -#include <yt/cpp/mapreduce/interface/finish_or_die.h> - -#include <yt/cpp/mapreduce/common/helpers.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TFileWriter::TFileWriter( - const TRichYPath& path, - IClientRetryPolicyPtr clientRetryPolicy, - ITransactionPingerPtr transactionPinger, - const TClientContext& context, - const TTransactionId& transactionId, - const TFileWriterOptions& options) - : RetryfulWriter_( - std::move(clientRetryPolicy), - std::move(transactionPinger), - context, - transactionId, - GetWriteFileCommand(context.Config->ApiVersion), - TMaybe<TFormat>(), - path, - options) -{ } - -TFileWriter::~TFileWriter() -{ - NDetail::FinishOrDie(this, "TFileWriter"); -} - -void TFileWriter::DoWrite(const void* buf, size_t len) -{ - // If user tunes RetryBlockSize / DesiredChunkSize he expects - // us to send data exactly by RetryBlockSize. So behaviour of the writer is predictable. - // - // We want to avoid situation when size of sent data slightly exceeded DesiredChunkSize - // and server produced one chunk of desired size and one small chunk. - while (len > 0) { - const auto retryBlockRemainingSize = RetryfulWriter_.GetRetryBlockRemainingSize(); - Y_VERIFY(retryBlockRemainingSize > 0); - const auto firstWriteLen = Min(len, retryBlockRemainingSize); - RetryfulWriter_.Write(buf, firstWriteLen); - RetryfulWriter_.NotifyRowEnd(); - len -= firstWriteLen; - buf = static_cast<const char*>(buf) + firstWriteLen; - } -} - -void TFileWriter::DoFinish() -{ - RetryfulWriter_.Finish(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/file_writer.h b/yt/cpp/mapreduce/client/file_writer.h deleted file mode 100644 index f3b97b904e..0000000000 --- a/yt/cpp/mapreduce/client/file_writer.h +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once - -#include "retryful_writer.h" - -#include <yt/cpp/mapreduce/common/fwd.h> - -#include <yt/cpp/mapreduce/interface/io.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TFileWriter - : public IFileWriter -{ -public: - TFileWriter( - const TRichYPath& path, - IClientRetryPolicyPtr clientRetryPolicy, - ITransactionPingerPtr transactionPinger, - const TClientContext& context, - const TTransactionId& transactionId, - const TFileWriterOptions& options = TFileWriterOptions()); - - ~TFileWriter() override; - -protected: - void DoWrite(const void* buf, size_t len) override; - void DoFinish() override; - -private: - TRetryfulWriter RetryfulWriter_; - static const size_t BUFFER_SIZE = 64 << 20; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/format_hints.cpp b/yt/cpp/mapreduce/client/format_hints.cpp deleted file mode 100644 index 1f6eb173ad..0000000000 --- a/yt/cpp/mapreduce/client/format_hints.cpp +++ /dev/null @@ -1,84 +0,0 @@ -#include "format_hints.h" - -#include <yt/cpp/mapreduce/common/helpers.h> -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/operation.h> - -#include <util/string/builder.h> - -namespace NYT::NDetail { - -using ::ToString; - -//////////////////////////////////////////////////////////////////////////////// - -static void ApplyEnableTypeConversion(TFormat* format, const TFormatHints& formatHints) -{ - if (formatHints.EnableAllToStringConversion_) { - format->Config.Attributes()["enable_all_to_string_conversion"] = *formatHints.EnableAllToStringConversion_; - } - if (formatHints.EnableStringToAllConversion_) { - format->Config.Attributes()["enable_string_to_all_conversion"] = *formatHints.EnableStringToAllConversion_; - } - if (formatHints.EnableIntegralTypeConversion_) { - format->Config.Attributes()["enable_integral_type_conversion"] = *formatHints.EnableIntegralTypeConversion_; - } - if (formatHints.EnableIntegralToDoubleConversion_) { - format->Config.Attributes()["enable_integral_to_double_conversion"] = *formatHints.EnableIntegralToDoubleConversion_; - } - if (formatHints.EnableTypeConversion_) { - format->Config.Attributes()["enable_type_conversion"] = *formatHints.EnableTypeConversion_; - } -} - -template <> -void ApplyFormatHints<TNode>(TFormat* format, const TMaybe<TFormatHints>& formatHints) -{ - Y_VERIFY(format); - if (!formatHints) { - return; - } - - ApplyEnableTypeConversion(format, *formatHints); - - if (formatHints->SkipNullValuesForTNode_) { - Y_ENSURE_EX( - format->Config.AsString() == "yson", - TApiUsageError() << "SkipNullForTNode option must be used with yson format, actual format: " << format->Config.AsString()); - format->Config.Attributes()["skip_null_values"] = formatHints->SkipNullValuesForTNode_; - } - - if (formatHints->ComplexTypeMode_) { - Y_ENSURE_EX( - format->Config.AsString() == "yson", - TApiUsageError() << "ComplexTypeMode option must be used with yson format, actual format: " - << format->Config.AsString()); - format->Config.Attributes()["complex_type_mode"] = ToString(*formatHints->ComplexTypeMode_); - } -} - -template <> -void ApplyFormatHints<TYaMRRow>(TFormat* format, const TMaybe<TFormatHints>& formatHints) -{ - Y_VERIFY(format); - if (!formatHints) { - return; - } - - ythrow TApiUsageError() << "Yamr format currently has no supported format hints"; -} - -template <> -void ApplyFormatHints<::google::protobuf::Message>(TFormat* format, const TMaybe<TFormatHints>& formatHints) -{ - Y_VERIFY(format); - if (!formatHints) { - return; - } - - ythrow TApiUsageError() << "Protobuf format currently has no supported format hints"; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail diff --git a/yt/cpp/mapreduce/client/format_hints.h b/yt/cpp/mapreduce/client/format_hints.h deleted file mode 100644 index f6576b1045..0000000000 --- a/yt/cpp/mapreduce/client/format_hints.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/fwd.h> - -#include <util/generic/maybe.h> - -namespace NYT { -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TRow> -void ApplyFormatHints(TFormat* format, const TMaybe<TFormatHints>& formatHints); - -template <> -void ApplyFormatHints<TNode>(TFormat* format, const TMaybe<TFormatHints>& formatHints); - -template <> -void ApplyFormatHints<TYaMRRow>(TFormat* format, const TMaybe<TFormatHints>& formatHints); - -template <> -void ApplyFormatHints<::google::protobuf::Message>(TFormat* format, const TMaybe<TFormatHints>& formatHints); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/fwd.h b/yt/cpp/mapreduce/client/fwd.h deleted file mode 100644 index d4449d4ac1..0000000000 --- a/yt/cpp/mapreduce/client/fwd.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include <util/generic/ptr.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TPingableTransaction; - -class TClient; -using TClientPtr = ::TIntrusivePtr<TClient>; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/init.cpp b/yt/cpp/mapreduce/client/init.cpp deleted file mode 100644 index c74598ba14..0000000000 --- a/yt/cpp/mapreduce/client/init.cpp +++ /dev/null @@ -1,280 +0,0 @@ -#include "init.h" - -#include "abortable_registry.h" -#include "job_profiler.h" - -#include <yt/cpp/mapreduce/http/requests.h> - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/init.h> -#include <yt/cpp/mapreduce/interface/operation.h> - -#include <yt/cpp/mapreduce/interface/logging/logger.h> -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <yt/cpp/mapreduce/io/job_reader.h> - -#include <yt/cpp/mapreduce/common/helpers.h> -#include <yt/cpp/mapreduce/common/wait_proxy.h> - -#include <library/cpp/sighandler/async_signals_handler.h> - -#include <util/folder/dirut.h> - -#include <util/generic/singleton.h> - -#include <util/string/builder.h> -#include <util/string/cast.h> -#include <util/string/type.h> - -#include <util/system/env.h> -#include <util/system/thread.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -void WriteVersionToLog() -{ - YT_LOG_INFO("Wrapper version: %v", - TProcessState::Get()->ClientVersion); -} - -static TNode SecureVaultContents; // safe - -void InitializeSecureVault() -{ - SecureVaultContents = NodeFromYsonString( - GetEnv("YT_SECURE_VAULT", "{}")); -} - -} - -//////////////////////////////////////////////////////////////////////////////// - -const TNode& GetJobSecureVault() -{ - return SecureVaultContents; -} - -//////////////////////////////////////////////////////////////////////////////// - -class TAbnormalTerminator -{ -public: - TAbnormalTerminator() = default; - - static void SetErrorTerminationHandler() - { - if (Instance().OldHandler_ != nullptr) { - return; - } - - Instance().OldHandler_ = std::set_terminate(&TerminateHandler); - - SetAsyncSignalFunction(SIGINT, SignalHandler); - SetAsyncSignalFunction(SIGTERM, SignalHandler); - } - -private: - static TAbnormalTerminator& Instance() - { - return *Singleton<TAbnormalTerminator>(); - } - - static void* Invoke(void* opaque) - { - (*reinterpret_cast<std::function<void()>*>(opaque))(); - return nullptr; - } - - static void TerminateWithTimeout( - const TDuration& timeout, - const std::function<void(void)>& exitFunction, - const TString& logMessage) - { - std::function<void()> threadFun = [=] { - YT_LOG_INFO("%v", - logMessage); - NDetail::TAbortableRegistry::Get()->AbortAllAndBlockForever(); - }; - TThread thread(TThread::TParams(Invoke, &threadFun).SetName("aborter")); - thread.Start(); - thread.Detach(); - - Sleep(timeout); - exitFunction(); - } - - static void SignalHandler(int signalNumber) - { - TerminateWithTimeout( - TDuration::Seconds(5), - std::bind(_exit, -signalNumber), - ::TStringBuilder() << "Signal " << signalNumber << " received, aborting transactions. Waiting 5 seconds..."); - } - - static void TerminateHandler() - { - TerminateWithTimeout( - TDuration::Seconds(5), - [&] { - if (Instance().OldHandler_) { - Instance().OldHandler_(); - } else { - abort(); - } - }, - ::TStringBuilder() << "Terminate called, aborting transactions. Waiting 5 seconds..."); - } - -private: - std::terminate_handler OldHandler_ = nullptr; -}; - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -EInitStatus& GetInitStatus() -{ - static EInitStatus initStatus = EInitStatus::NotInitialized; - return initStatus; -} - -static void ElevateInitStatus(const EInitStatus newStatus) { - NDetail::GetInitStatus() = Max(NDetail::GetInitStatus(), newStatus); -} - -void CommonInitialize(int argc, const char** argv) -{ - auto logLevelStr = to_lower(TConfig::Get()->LogLevel); - ILogger::ELevel logLevel; - - if (!TryFromString(logLevelStr, logLevel)) { - Cerr << "Invalid log level: " << TConfig::Get()->LogLevel << Endl; - exit(1); - } - - SetLogger(CreateStdErrLogger(logLevel)); - - TProcessState::Get()->SetCommandLine(argc, argv); -} - -void NonJobInitialize(const TInitializeOptions& options) -{ - if (FromString<bool>(GetEnv("YT_CLEANUP_ON_TERMINATION", "0")) || options.CleanupOnTermination_) { - TAbnormalTerminator::SetErrorTerminationHandler(); - } - if (options.WaitProxy_) { - NDetail::TWaitProxy::Get()->SetProxy(options.WaitProxy_); - } - WriteVersionToLog(); -} - -void ExecJob(int argc, const char** argv, const TInitializeOptions& options) -{ - // Now we are definitely in job. - // We take this setting from environment variable to be consistent with client code. - TConfig::Get()->UseClientProtobuf = IsTrue(GetEnv("YT_USE_CLIENT_PROTOBUF", "")); - - auto execJobImpl = [&options](TString jobName, i64 outputTableCount, bool hasState) { - auto jobProfiler = CreateJobProfiler(); - jobProfiler->Start(); - - InitializeSecureVault(); - - NDetail::OutputTableCount = static_cast<i64>(outputTableCount); - - THolder<IInputStream> jobStateStream; - if (hasState) { - jobStateStream = MakeHolder<TIFStream>("jobstate"); - } else { - jobStateStream = MakeHolder<TBufferStream>(0); - } - - int ret = 1; - try { - ret = TJobFactory::Get()->GetJobFunction(jobName.data())(outputTableCount, *jobStateStream); - } catch (const TSystemError& ex) { - if (ex.Status() == EPIPE) { - // 32 == EPIPE, write number here so it's easier to grep this exit code in source files - exit(32); - } - throw; - } - - jobProfiler->Stop(); - - if (options.JobOnExitFunction_) { - (*options.JobOnExitFunction_)(); - } - exit(ret); - }; - - auto jobArguments = NodeFromYsonString(GetEnv("YT_JOB_ARGUMENTS", "#")); - if (jobArguments.HasValue()) { - execJobImpl( - jobArguments["job_name"].AsString(), - jobArguments["output_table_count"].AsInt64(), - jobArguments["has_state"].AsBool()); - Y_UNREACHABLE(); - } - - TString jobType = argc >= 2 ? argv[1] : TString(); - if (argc != 5 || jobType != "--yt-map" && jobType != "--yt-reduce") { - // We are inside job but probably using old API - // (i.e. both NYT::Initialize and NMR::Initialize are called). - WriteVersionToLog(); - return; - } - - TString jobName(argv[2]); - i64 outputTableCount = FromString<i64>(argv[3]); - int hasState = FromString<int>(argv[4]); - execJobImpl(jobName, outputTableCount, hasState); - Y_UNREACHABLE(); -} - -} // namespace NDetail - -//////////////////////////////////////////////////////////////////////////////// - -void JoblessInitialize(const TInitializeOptions& options) -{ - static const char* fakeArgv[] = {"unknown..."}; - NDetail::CommonInitialize(1, fakeArgv); - NDetail::NonJobInitialize(options); - NDetail::ElevateInitStatus(NDetail::EInitStatus::JoblessInitialization); -} - -void Initialize(int argc, const char* argv[], const TInitializeOptions& options) -{ - NDetail::CommonInitialize(argc, argv); - - NDetail::ElevateInitStatus(NDetail::EInitStatus::FullInitialization); - - const bool isInsideJob = !GetEnv("YT_JOB_ID").empty(); - if (isInsideJob) { - NDetail::ExecJob(argc, argv, options); - } else { - NDetail::NonJobInitialize(options); - } -} - -void Initialize(int argc, char* argv[], const TInitializeOptions& options) -{ - return Initialize(argc, const_cast<const char**>(argv), options); -} - -void Initialize(const TInitializeOptions& options) -{ - static const char* fakeArgv[] = {"unknown..."}; - Initialize(1, fakeArgv, options); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/init.h b/yt/cpp/mapreduce/client/init.h deleted file mode 100644 index af2fc80e55..0000000000 --- a/yt/cpp/mapreduce/client/init.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/init.h> - -namespace NYT { -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -enum class EInitStatus : int -{ - NotInitialized, - JoblessInitialization, - FullInitialization, -}; - -EInitStatus& GetInitStatus(); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/job_profiler.h b/yt/cpp/mapreduce/client/job_profiler.h deleted file mode 100644 index 6532871380..0000000000 --- a/yt/cpp/mapreduce/client/job_profiler.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include <memory> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -struct IJobProfiler -{ - virtual ~IJobProfiler() = default; - - //! Starts job profiling if corresponding options are set - //! in environment. - virtual void Start() = 0; - - //! Stops profiling and sends profile to job proxy. - virtual void Stop() = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -std::unique_ptr<IJobProfiler> CreateJobProfiler(); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/lock.cpp b/yt/cpp/mapreduce/client/lock.cpp deleted file mode 100644 index 88110f9266..0000000000 --- a/yt/cpp/mapreduce/client/lock.cpp +++ /dev/null @@ -1,105 +0,0 @@ -#include "lock.h" - -#include "yt_poller.h" - -#include <yt/cpp/mapreduce/http/retry_request.h> - -#include <yt/cpp/mapreduce/common/retry_lib.h> - -#include <yt/cpp/mapreduce/raw_client/raw_batch_request.h> - -#include <util/string/builder.h> - -namespace NYT { -namespace NDetail { - -using namespace NRawClient; - -//////////////////////////////////////////////////////////////////////////////// - -class TLockPollerItem - : public IYtPollerItem -{ -public: - TLockPollerItem(const TLockId& lockId, ::NThreading::TPromise<void> acquired) - : LockStateYPath_("#" + GetGuidAsString(lockId) + "/@state") - , Acquired_(acquired) - { } - - void PrepareRequest(TRawBatchRequest* batchRequest) override - { - LockState_ = batchRequest->Get(TTransactionId(), LockStateYPath_, TGetOptions()); - } - - EStatus OnRequestExecuted() override - { - try { - const auto& state = LockState_.GetValue().AsString(); - if (state == "acquired") { - Acquired_.SetValue(); - return PollBreak; - } - } catch (const TErrorResponse& e) { - if (!IsRetriable(e)) { - Acquired_.SetException(std::current_exception()); - return PollBreak; - } - } catch (const std::exception& e) { - if (!IsRetriable(e)) { - Acquired_.SetException(std::current_exception()); - return PollBreak; - } - } - return PollContinue; - } - - void OnItemDiscarded() override - { - Acquired_.SetException(std::make_exception_ptr(yexception() << "Operation cancelled")); - } - -private: - const TString LockStateYPath_; - ::NThreading::TPromise<void> Acquired_; - - ::NThreading::TFuture<TNode> LockState_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -TLock::TLock(const TLockId& lockId, TClientPtr client, bool waitable) - : LockId_(lockId) - , Client_(std::move(client)) -{ - if (!waitable) { - Acquired_ = ::NThreading::MakeFuture(); - } -} - -const TLockId& TLock::GetId() const -{ - return LockId_; -} - -TNodeId TLock::GetLockedNodeId() const -{ - auto nodeIdNode = Client_->Get( - ::TStringBuilder() << '#' << GetGuidAsString(LockId_) << "/@node_id", - TGetOptions()); - return GetGuid(nodeIdNode.AsString()); -} - -const ::NThreading::TFuture<void>& TLock::GetAcquiredFuture() const -{ - if (!Acquired_) { - auto promise = ::NThreading::NewPromise<void>(); - Client_->GetYtPoller().Watch(::MakeIntrusive<TLockPollerItem>(LockId_, promise)); - Acquired_ = promise.GetFuture(); - } - return *Acquired_; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/lock.h b/yt/cpp/mapreduce/client/lock.h deleted file mode 100644 index 7e2c7a127d..0000000000 --- a/yt/cpp/mapreduce/client/lock.h +++ /dev/null @@ -1,31 +0,0 @@ -#pragma once - -#include "client.h" - -#include <yt/cpp/mapreduce/interface/client.h> - -namespace NYT { -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -class TLock - : public ILock -{ -public: - TLock(const TLockId& lockId, TClientPtr client, bool waitable); - - virtual const TLockId& GetId() const override; - virtual TNodeId GetLockedNodeId() const override; - virtual const ::NThreading::TFuture<void>& GetAcquiredFuture() const override; - -private: - const TLockId LockId_; - mutable TMaybe<::NThreading::TFuture<void>> Acquired_; - TClientPtr Client_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/operation.cpp b/yt/cpp/mapreduce/client/operation.cpp deleted file mode 100644 index fc1600c240..0000000000 --- a/yt/cpp/mapreduce/client/operation.cpp +++ /dev/null @@ -1,2981 +0,0 @@ -#include "operation.h" - -#include "abortable_registry.h" -#include "client.h" -#include "operation_helpers.h" -#include "operation_tracker.h" -#include "transaction.h" -#include "prepare_operation.h" -#include "retry_heavy_write_request.h" -#include "skiff.h" -#include "structured_table_formats.h" -#include "yt_poller.h" - -#include <yt/cpp/mapreduce/common/helpers.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> -#include <yt/cpp/mapreduce/common/wait_proxy.h> - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/errors.h> -#include <yt/cpp/mapreduce/interface/fluent.h> -#include <yt/cpp/mapreduce/interface/format.h> -#include <yt/cpp/mapreduce/interface/job_statistics.h> -#include <yt/cpp/mapreduce/interface/protobuf_format.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <yt/cpp/mapreduce/http/requests.h> -#include <yt/cpp/mapreduce/http/retry_request.h> - -#include <yt/cpp/mapreduce/io/job_reader.h> -#include <yt/cpp/mapreduce/io/job_writer.h> -#include <yt/cpp/mapreduce/io/yamr_table_reader.h> -#include <yt/cpp/mapreduce/io/yamr_table_writer.h> -#include <yt/cpp/mapreduce/io/node_table_reader.h> -#include <yt/cpp/mapreduce/io/node_table_writer.h> -#include <yt/cpp/mapreduce/io/proto_table_reader.h> -#include <yt/cpp/mapreduce/io/proto_table_writer.h> -#include <yt/cpp/mapreduce/io/proto_helpers.h> -#include <yt/cpp/mapreduce/io/skiff_table_reader.h> - -#include <yt/cpp/mapreduce/raw_client/raw_batch_request.h> -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> - -#include <library/cpp/yson/node/serialize.h> - -#include <util/generic/hash_set.h> - -#include <util/string/builder.h> -#include <util/string/cast.h> - -#include <util/system/thread.h> - -namespace NYT { -namespace NDetail { - -using namespace NRawClient; - -using ::ToString; - -//////////////////////////////////////////////////////////////////////////////// - -static const ui64 DefaultExrtaTmpfsSize = 1024LL * 1024LL; - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -//////////////////////////////////////////////////////////////////////////////// - -struct TMapReduceOperationIo -{ - TVector<TRichYPath> Inputs; - TVector<TRichYPath> MapOutputs; - TVector<TRichYPath> Outputs; - - TMaybe<TFormat> MapperInputFormat; - TMaybe<TFormat> MapperOutputFormat; - - TMaybe<TFormat> ReduceCombinerInputFormat; - TMaybe<TFormat> ReduceCombinerOutputFormat; - - TFormat ReducerInputFormat = TFormat::YsonBinary(); - TFormat ReducerOutputFormat = TFormat::YsonBinary(); - - TVector<TSmallJobFile> MapperJobFiles; - TVector<TSmallJobFile> ReduceCombinerJobFiles; - TVector<TSmallJobFile> ReducerJobFiles; -}; - -template <typename T> -void VerifyHasElements(const TVector<T>& paths, TStringBuf name) -{ - if (paths.empty()) { - ythrow TApiUsageError() << "no " << name << " table is specified"; - } -} - -//////////////////////////////////////////////////////////////////////////////// - -TVector<TSmallJobFile> CreateFormatConfig( - TMaybe<TSmallJobFile> inputConfig, - const TMaybe<TSmallJobFile>& outputConfig) -{ - TVector<TSmallJobFile> result; - if (inputConfig) { - result.push_back(std::move(*inputConfig)); - } - if (outputConfig) { - result.push_back(std::move(*outputConfig)); - } - return result; -} - -template <typename T> -ENodeReaderFormat NodeReaderFormatFromHintAndGlobalConfig(const TUserJobFormatHintsBase<T>& formatHints) -{ - auto result = TConfig::Get()->NodeReaderFormat; - if (formatHints.InputFormatHints_ && formatHints.InputFormatHints_->SkipNullValuesForTNode_) { - Y_ENSURE_EX( - result != ENodeReaderFormat::Skiff, - TApiUsageError() << "skiff format doesn't support SkipNullValuesForTNode format hint"); - result = ENodeReaderFormat::Yson; - } - return result; -} - -template <class TSpec> -const TVector<TStructuredTablePath>& GetStructuredInputs(const TSpec& spec) -{ - if constexpr (std::is_same_v<TSpec, TVanillaTask>) { - static const TVector<TStructuredTablePath> empty; - return empty; - } else { - return spec.GetStructuredInputs(); - } -} - -template <class TSpec> -const TVector<TStructuredTablePath>& GetStructuredOutputs(const TSpec& spec) -{ - return spec.GetStructuredOutputs(); -} - -template <class TSpec> -const TMaybe<TFormatHints>& GetInputFormatHints(const TSpec& spec) -{ - if constexpr (std::is_same_v<TSpec, TVanillaTask>) { - static const TMaybe<TFormatHints> empty = Nothing(); - return empty; - } else { - return spec.InputFormatHints_; - } -} - -template <class TSpec> -const TMaybe<TFormatHints>& GetOutputFormatHints(const TSpec& spec) -{ - return spec.OutputFormatHints_; -} - -template <class TSpec> -ENodeReaderFormat GetNodeReaderFormat(const TSpec& spec, bool allowSkiff) -{ - if constexpr (std::is_same<TSpec, TVanillaTask>::value) { - return ENodeReaderFormat::Yson; - } else { - return allowSkiff - ? NodeReaderFormatFromHintAndGlobalConfig(spec) - : ENodeReaderFormat::Yson; - } -} - -static void SortColumnsToNames(const TSortColumns& sortColumns, THashSet<TString>* result) -{ - auto names = sortColumns.GetNames(); - result->insert(names.begin(), names.end()); -} - -static THashSet<TString> SortColumnsToNames(const TSortColumns& sortColumns) -{ - THashSet<TString> columnNames; - SortColumnsToNames(sortColumns, &columnNames); - return columnNames; -} - -THashSet<TString> GetColumnsUsedInOperation(const TJoinReduceOperationSpec& spec) -{ - return SortColumnsToNames(spec.JoinBy_); -} - -THashSet<TString> GetColumnsUsedInOperation(const TReduceOperationSpec& spec) { - auto result = SortColumnsToNames(spec.SortBy_); - SortColumnsToNames(spec.ReduceBy_, &result); - if (spec.JoinBy_) { - SortColumnsToNames(*spec.JoinBy_, &result); - } - return result; -} - -THashSet<TString> GetColumnsUsedInOperation(const TMapReduceOperationSpec& spec) -{ - auto result = SortColumnsToNames(spec.SortBy_); - SortColumnsToNames(spec.ReduceBy_, &result); - return result; -} - -THashSet<TString> GetColumnsUsedInOperation(const TMapOperationSpec&) -{ - return THashSet<TString>(); -} - -THashSet<TString> GetColumnsUsedInOperation(const TVanillaTask&) -{ - return THashSet<TString>(); -} - -TStructuredJobTableList ApplyProtobufColumnFilters( - const TStructuredJobTableList& tableList, - const TOperationPreparer& preparer, - const THashSet<TString>& columnsUsedInOperations, - const TOperationOptions& options) -{ - bool hasInputQuery = options.Spec_.Defined() && options.Spec_->IsMap() && options.Spec_->HasKey("input_query"); - if (hasInputQuery) { - return tableList; - } - - auto isDynamic = BatchTransform( - CreateDefaultRequestRetryPolicy(preparer.GetContext().Config), - preparer.GetContext(), - tableList, - [&] (TRawBatchRequest& batch, const auto& table) { - return batch.Get(preparer.GetTransactionId(), table.RichYPath->Path_ + "/@dynamic", TGetOptions()); - }); - - auto newTableList = tableList; - for (size_t tableIndex = 0; tableIndex < tableList.size(); ++tableIndex) { - if (isDynamic[tableIndex].AsBool()) { - continue; - } - auto& table = newTableList[tableIndex]; - Y_VERIFY(table.RichYPath); - if (table.RichYPath->Columns_) { - continue; - } - if (!std::holds_alternative<TProtobufTableStructure>(table.Description)) { - continue; - } - const auto& descriptor = std::get<TProtobufTableStructure>(table.Description).Descriptor; - if (!descriptor) { - continue; - } - auto fromDescriptor = NDetail::InferColumnFilter(*descriptor); - if (!fromDescriptor) { - continue; - } - THashSet<TString> columns(fromDescriptor->begin(), fromDescriptor->end()); - columns.insert(columnsUsedInOperations.begin(), columnsUsedInOperations.end()); - table.RichYPath->Columns(TVector<TString>(columns.begin(), columns.end())); - } - return newTableList; -} - -template <class TSpec> -TSimpleOperationIo CreateSimpleOperationIo( - const IStructuredJob& structuredJob, - const TOperationPreparer& preparer, - const TSpec& spec, - const TOperationOptions& options, - bool allowSkiff) -{ - if (!std::holds_alternative<TVoidStructuredRowStream>(structuredJob.GetInputRowStreamDescription())) { - VerifyHasElements(GetStructuredInputs(spec), "input"); - } - - TUserJobFormatHints hints; - hints.InputFormatHints_ = GetInputFormatHints(spec); - hints.OutputFormatHints_ = GetOutputFormatHints(spec); - ENodeReaderFormat nodeReaderFormat = GetNodeReaderFormat(spec, allowSkiff); - - return CreateSimpleOperationIoHelper( - structuredJob, - preparer, - options, - CanonizeStructuredTableList(preparer.GetContext(), GetStructuredInputs(spec)), - CanonizeStructuredTableList(preparer.GetContext(), GetStructuredOutputs(spec)), - hints, - nodeReaderFormat, - GetColumnsUsedInOperation(spec)); -} - -template <class T> -TSimpleOperationIo CreateSimpleOperationIo( - const IJob& job, - const TOperationPreparer& preparer, - const TSimpleRawOperationIoSpec<T>& spec) -{ - auto getFormatOrDefault = [&] (const TMaybe<TFormat>& maybeFormat, const char* formatName) { - if (maybeFormat) { - return *maybeFormat; - } else if (spec.Format_) { - return *spec.Format_; - } else { - ythrow TApiUsageError() << "Neither " << formatName << "format nor default format is specified for raw operation"; - } - }; - - auto inputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer.GetContext(), spec.GetInputs()); - auto outputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer.GetContext(), spec.GetOutputs()); - - VerifyHasElements(inputs, "input"); - VerifyHasElements(outputs, "output"); - - TUserJobFormatHints hints; - - auto outputSchemas = PrepareOperation( - job, - TOperationPreparationContext( - inputs, - outputs, - preparer.GetContext(), - preparer.GetClientRetryPolicy(), - preparer.GetTransactionId()), - &inputs, - &outputs, - hints); - - Y_VERIFY(outputs.size() == outputSchemas.size()); - for (int i = 0; i < static_cast<int>(outputs.size()); ++i) { - if (!outputs[i].Schema_ && !outputSchemas[i].Columns().empty()) { - outputs[i].Schema_ = outputSchemas[i]; - } - } - - return TSimpleOperationIo { - inputs, - outputs, - - getFormatOrDefault(spec.InputFormat_, "input"), - getFormatOrDefault(spec.OutputFormat_, "output"), - - TVector<TSmallJobFile>{}, - }; -} - -//////////////////////////////////////////////////////////////////// - -TString GetJobStderrWithRetriesAndIgnoreErrors( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TJobId& jobId, - const size_t stderrTailSize, - const TGetJobStderrOptions& options = TGetJobStderrOptions()) -{ - TString jobStderr; - try { - jobStderr = GetJobStderrWithRetries( - retryPolicy, - context, - operationId, - jobId, - options); - } catch (const TErrorResponse& e) { - YT_LOG_ERROR("Cannot get job stderr (OperationId: %v, JobId: %v, Error: %v)", - operationId, - jobId, - e.what()); - } - if (jobStderr.size() > stderrTailSize) { - jobStderr = jobStderr.substr(jobStderr.size() - stderrTailSize, stderrTailSize); - } - return jobStderr; -} - -TVector<TFailedJobInfo> GetFailedJobInfo( - const IClientRetryPolicyPtr& clientRetryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TGetFailedJobInfoOptions& options) -{ - const auto listJobsResult = ListJobs( - clientRetryPolicy->CreatePolicyForGenericRequest(), - context, - operationId, - TListJobsOptions() - .State(EJobState::Failed) - .Limit(options.MaxJobCount_)); - - const auto stderrTailSize = options.StderrTailSize_; - - TVector<TFailedJobInfo> result; - for (const auto& job : listJobsResult.Jobs) { - auto& info = result.emplace_back(); - Y_ENSURE(job.Id); - info.JobId = *job.Id; - info.Error = job.Error.GetOrElse(TYtError(TString("unknown error"))); - if (job.StderrSize.GetOrElse(0) != 0) { - // There are cases when due to bad luck we cannot read stderr even if - // list_jobs reports that stderr_size > 0. - // - // Such errors don't have special error code - // so we ignore all errors and try our luck on other jobs. - info.Stderr = GetJobStderrWithRetriesAndIgnoreErrors( - clientRetryPolicy->CreatePolicyForGenericRequest(), - context, - operationId, - *job.Id, - stderrTailSize); - } - } - return result; -} - -struct TGetJobsStderrOptions -{ - using TSelf = TGetJobsStderrOptions; - - // How many jobs to download. Which jobs will be chosen is undefined. - FLUENT_FIELD_DEFAULT(ui64, MaxJobCount, 10); - - // How much of stderr should be downloaded. - FLUENT_FIELD_DEFAULT(ui64, StderrTailSize, 64 * 1024); -}; - -static TVector<TString> GetJobsStderr( - const IClientRetryPolicyPtr& clientRetryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TGetJobsStderrOptions& options = TGetJobsStderrOptions()) -{ - const auto listJobsResult = ListJobs( - clientRetryPolicy->CreatePolicyForGenericRequest(), - context, - operationId, - TListJobsOptions().Limit(options.MaxJobCount_).WithStderr(true)); - const auto stderrTailSize = options.StderrTailSize_; - TVector<TString> result; - for (const auto& job : listJobsResult.Jobs) { - result.push_back( - // There are cases when due to bad luck we cannot read stderr even if - // list_jobs reports that stderr_size > 0. - // - // Such errors don't have special error code - // so we ignore all errors and try our luck on other jobs. - GetJobStderrWithRetriesAndIgnoreErrors( - clientRetryPolicy->CreatePolicyForGenericRequest(), - context, - operationId, - *job.Id, - stderrTailSize) - ); - } - return result; -} - -int CountIntermediateTables(const TStructuredJobTableList& tables) -{ - int result = 0; - for (const auto& table : tables) { - if (table.RichYPath) { - break; - } - ++result; - } - return result; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - -TSimpleOperationIo CreateSimpleOperationIoHelper( - const IStructuredJob& structuredJob, - const TOperationPreparer& preparer, - const TOperationOptions& options, - TStructuredJobTableList structuredInputs, - TStructuredJobTableList structuredOutputs, - TUserJobFormatHints hints, - ENodeReaderFormat nodeReaderFormat, - const THashSet<TString>& columnsUsedInOperations) -{ - auto intermediateInputTableCount = CountIntermediateTables(structuredInputs); - auto intermediateOutputTableCount = CountIntermediateTables(structuredOutputs); - - auto jobSchemaInferenceResult = PrepareOperation( - structuredJob, - TOperationPreparationContext( - structuredInputs, - structuredOutputs, - preparer.GetContext(), - preparer.GetClientRetryPolicy(), - preparer.GetTransactionId()), - &structuredInputs, - &structuredOutputs, - hints); - - TVector<TSmallJobFile> formatConfigList; - TFormatBuilder formatBuilder(preparer.GetClientRetryPolicy(), preparer.GetContext(), preparer.GetTransactionId(), options); - - auto [inputFormat, inputFormatConfig] = formatBuilder.CreateFormat( - structuredJob, - EIODirection::Input, - structuredInputs, - hints.InputFormatHints_, - nodeReaderFormat, - /* allowFormatFromTableAttribute = */ true); - - auto [outputFormat, outputFormatConfig] = formatBuilder.CreateFormat( - structuredJob, - EIODirection::Output, - structuredOutputs, - hints.OutputFormatHints_, - ENodeReaderFormat::Yson, - /* allowFormatFromTableAttribute = */ false); - - const bool inferOutputSchema = options.InferOutputSchema_.GetOrElse(preparer.GetContext().Config->InferTableSchema); - - auto outputPaths = GetPathList( - TStructuredJobTableList(structuredOutputs.begin() + intermediateOutputTableCount, structuredOutputs.end()), - TVector<TTableSchema>(jobSchemaInferenceResult.begin() + intermediateOutputTableCount, jobSchemaInferenceResult.end()), - inferOutputSchema); - - auto inputPaths = GetPathList( - ApplyProtobufColumnFilters( - TStructuredJobTableList(structuredInputs.begin() + intermediateInputTableCount, structuredInputs.end()), - preparer, - columnsUsedInOperations, - options), - /*schemaInferenceResult*/ Nothing(), - /*inferSchema*/ false); - - return TSimpleOperationIo { - inputPaths, - outputPaths, - - inputFormat, - outputFormat, - - CreateFormatConfig(inputFormatConfig, outputFormatConfig) - }; -} - -EOperationBriefState CheckOperation( - const IClientRetryPolicyPtr& clientRetryPolicy, - const TClientContext& context, - const TOperationId& operationId) -{ - auto attributes = GetOperation( - clientRetryPolicy->CreatePolicyForGenericRequest(), - context, - operationId, - TGetOperationOptions().AttributeFilter(TOperationAttributeFilter() - .Add(EOperationAttribute::State) - .Add(EOperationAttribute::Result))); - Y_VERIFY(attributes.BriefState, - "get_operation for operation %s has not returned \"state\" field", - GetGuidAsString(operationId).Data()); - if (*attributes.BriefState == EOperationBriefState::Completed) { - return EOperationBriefState::Completed; - } else if (*attributes.BriefState == EOperationBriefState::Aborted || *attributes.BriefState == EOperationBriefState::Failed) { - YT_LOG_ERROR("Operation %v %v (%v)", - operationId, - ToString(*attributes.BriefState), - ToString(TOperationExecutionTimeTracker::Get()->Finish(operationId))); - - auto failedJobInfoList = GetFailedJobInfo( - clientRetryPolicy, - context, - operationId, - TGetFailedJobInfoOptions()); - - Y_VERIFY(attributes.Result && attributes.Result->Error); - ythrow TOperationFailedError( - *attributes.BriefState == EOperationBriefState::Aborted - ? TOperationFailedError::Aborted - : TOperationFailedError::Failed, - operationId, - *attributes.Result->Error, - failedJobInfoList); - } - return EOperationBriefState::InProgress; -} - -void WaitForOperation( - const IClientRetryPolicyPtr& clientRetryPolicy, - const TClientContext& context, - const TOperationId& operationId) -{ - const TDuration checkOperationStateInterval = - UseLocalModeOptimization(context, clientRetryPolicy) - ? Min(TDuration::MilliSeconds(100), context.Config->OperationTrackerPollPeriod) - : context.Config->OperationTrackerPollPeriod; - - while (true) { - auto status = CheckOperation(clientRetryPolicy, context, operationId); - if (status == EOperationBriefState::Completed) { - YT_LOG_INFO("Operation %v completed (%v)", - operationId, - TOperationExecutionTimeTracker::Get()->Finish(operationId)); - break; - } - TWaitProxy::Get()->Sleep(checkOperationStateInterval); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -TNode BuildAutoMergeSpec(const TAutoMergeSpec& options) -{ - TNode result; - if (options.Mode_) { - result["mode"] = ToString(*options.Mode_); - } - if (options.MaxIntermediateChunkCount_) { - result["max_intermediate_chunk_count"] = *options.MaxIntermediateChunkCount_; - } - if (options.ChunkCountPerMergeJob_) { - result["chunk_count_per_merge_job"] = *options.ChunkCountPerMergeJob_; - } - if (options.ChunkSizeThreshold_) { - result["chunk_size_threshold"] = *options.ChunkSizeThreshold_; - } - return result; -} - -TNode BuildJobProfilerSpec(const TJobProfilerSpec& profilerSpec) -{ - TNode result; - if (profilerSpec.ProfilingBinary_) { - result["binary"] = ToString(*profilerSpec.ProfilingBinary_); - } - if (profilerSpec.ProfilerType_) { - result["type"] = ToString(*profilerSpec.ProfilerType_); - } - if (profilerSpec.ProfilingProbability_) { - result["profiling_probability"] = *profilerSpec.ProfilingProbability_; - } - if (profilerSpec.SamplingFrequency_) { - result["sampling_frequency"] = *profilerSpec.SamplingFrequency_; - } - - return result; -} - -// Returns undefined node if resources doesn't contain any meaningful field -TNode BuildSchedulerResourcesSpec(const TSchedulerResources& resources) -{ - TNode result; - if (resources.UserSlots().Defined()) { - result["user_slots"] = *resources.UserSlots(); - } - if (resources.Cpu().Defined()) { - result["cpu"] = *resources.Cpu(); - } - if (resources.Memory().Defined()) { - result["memory"] = *resources.Memory(); - } - return result; -} - -void BuildUserJobFluently( - const TJobPreparer& preparer, - const TMaybe<TFormat>& inputFormat, - const TMaybe<TFormat>& outputFormat, - TFluentMap fluent) -{ - const auto& userJobSpec = preparer.GetSpec(); - TMaybe<i64> memoryLimit = userJobSpec.MemoryLimit_; - TMaybe<double> cpuLimit = userJobSpec.CpuLimit_; - TMaybe<ui16> portCount = userJobSpec.PortCount_; - - // Use 1MB extra tmpfs size by default, it helps to detect job sandbox as tmp directory - // for standard python libraries. See YTADMINREQ-14505 for more details. - auto tmpfsSize = preparer.GetSpec().ExtraTmpfsSize_.GetOrElse(DefaultExrtaTmpfsSize); - if (preparer.ShouldMountSandbox()) { - tmpfsSize += preparer.GetTotalFileSize(); - if (tmpfsSize == 0) { - // This can be a case for example when it is local mode and we don't upload binary. - // NOTE: YT doesn't like zero tmpfs size. - tmpfsSize = RoundUpFileSize(1); - } - memoryLimit = memoryLimit.GetOrElse(512ll << 20) + tmpfsSize; - } - - fluent - .Item("file_paths").List(preparer.GetFiles()) - .Item("command").Value(preparer.GetCommand()) - .Item("class_name").Value(preparer.GetClassName()) - .DoIf(!userJobSpec.Environment_.empty(), [&] (TFluentMap fluentMap) { - TNode environment; - for (const auto& item : userJobSpec.Environment_) { - environment[item.first] = item.second; - } - fluentMap.Item("environment").Value(environment); - }) - .DoIf(userJobSpec.DiskSpaceLimit_.Defined(), [&] (TFluentMap fluentMap) { - fluentMap.Item("disk_space_limit").Value(*userJobSpec.DiskSpaceLimit_); - }) - .DoIf(inputFormat.Defined(), [&] (TFluentMap fluentMap) { - fluentMap.Item("input_format").Value(inputFormat->Config); - }) - .DoIf(outputFormat.Defined(), [&] (TFluentMap fluentMap) { - fluentMap.Item("output_format").Value(outputFormat->Config); - }) - .DoIf(memoryLimit.Defined(), [&] (TFluentMap fluentMap) { - fluentMap.Item("memory_limit").Value(*memoryLimit); - }) - .DoIf(userJobSpec.MemoryReserveFactor_.Defined(), [&] (TFluentMap fluentMap) { - fluentMap.Item("memory_reserve_factor").Value(*userJobSpec.MemoryReserveFactor_); - }) - .DoIf(cpuLimit.Defined(), [&] (TFluentMap fluentMap) { - fluentMap.Item("cpu_limit").Value(*cpuLimit); - }) - .DoIf(portCount.Defined(), [&] (TFluentMap fluentMap) { - fluentMap.Item("port_count").Value(*portCount); - }) - .DoIf(userJobSpec.JobTimeLimit_.Defined(), [&] (TFluentMap fluentMap) { - fluentMap.Item("job_time_limit").Value(userJobSpec.JobTimeLimit_->MilliSeconds()); - }) - .DoIf(userJobSpec.NetworkProject_.Defined(), [&] (TFluentMap fluentMap) { - fluentMap.Item("network_project").Value(*userJobSpec.NetworkProject_); - }) - .DoIf(preparer.ShouldMountSandbox(), [&] (TFluentMap fluentMap) { - fluentMap.Item("tmpfs_path").Value("."); - fluentMap.Item("tmpfs_size").Value(tmpfsSize); - fluentMap.Item("copy_files").Value(true); - }) - .Item("profilers") - .BeginList() - .DoFor(userJobSpec.JobProfilers_, [&] (TFluentList list, const auto& jobProfiler) { - list.Item().Value(BuildJobProfilerSpec(jobProfiler)); - }) - .EndList(); -} - -template <typename T> -void BuildCommonOperationPart(const TConfigPtr& config, const TOperationSpecBase<T>& baseSpec, const TOperationOptions& options, TFluentMap fluent) -{ - const TProcessState* properties = TProcessState::Get(); - TString pool = config->Pool; - - if (baseSpec.Pool_) { - pool = *baseSpec.Pool_; - } - - fluent - .Item("started_by") - .BeginMap() - .Item("hostname").Value(properties->FqdnHostName) - .Item("pid").Value(properties->Pid) - .Item("user").Value(properties->UserName) - .Item("command").List(properties->CensoredCommandLine) - .Item("wrapper_version").Value(properties->ClientVersion) - .EndMap() - .DoIf(!pool.empty(), [&] (TFluentMap fluentMap) { - fluentMap.Item("pool").Value(pool); - }) - .DoIf(baseSpec.Weight_.Defined(), [&] (TFluentMap fluentMap) { - fluentMap.Item("weight").Value(*baseSpec.Weight_); - }) - .DoIf(baseSpec.TimeLimit_.Defined(), [&] (TFluentMap fluentMap) { - fluentMap.Item("time_limit").Value(baseSpec.TimeLimit_->MilliSeconds()); - }) - .DoIf(baseSpec.PoolTrees().Defined(), [&] (TFluentMap fluentMap) { - TNode poolTreesSpec = TNode::CreateList(); - for (const auto& tree : *baseSpec.PoolTrees()) { - poolTreesSpec.Add(tree); - } - fluentMap.Item("pool_trees").Value(poolTreesSpec); - }) - .DoIf(baseSpec.ResourceLimits().Defined(), [&] (TFluentMap fluentMap) { - auto resourceLimitsSpec = BuildSchedulerResourcesSpec(*baseSpec.ResourceLimits()); - if (!resourceLimitsSpec.IsUndefined()) { - fluentMap.Item("resource_limits").Value(std::move(resourceLimitsSpec)); - } - }) - .DoIf(options.SecureVault_.Defined(), [&] (TFluentMap fluentMap) { - Y_ENSURE(options.SecureVault_->IsMap(), - "SecureVault must be a map node, got " << options.SecureVault_->GetType()); - fluentMap.Item("secure_vault").Value(*options.SecureVault_); - }) - .DoIf(baseSpec.Title_.Defined(), [&] (TFluentMap fluentMap) { - fluentMap.Item("title").Value(*baseSpec.Title_); - }); -} - -template <typename TSpec> -void BuildCommonUserOperationPart(const TSpec& baseSpec, TNode* spec) -{ - if (baseSpec.MaxFailedJobCount_.Defined()) { - (*spec)["max_failed_job_count"] = *baseSpec.MaxFailedJobCount_; - } - if (baseSpec.FailOnJobRestart_.Defined()) { - (*spec)["fail_on_job_restart"] = *baseSpec.FailOnJobRestart_; - } - if (baseSpec.StderrTablePath_.Defined()) { - (*spec)["stderr_table_path"] = *baseSpec.StderrTablePath_; - } - if (baseSpec.CoreTablePath_.Defined()) { - (*spec)["core_table_path"] = *baseSpec.CoreTablePath_; - } - if (baseSpec.WaitingJobTimeout_.Defined()) { - (*spec)["waiting_job_timeout"] = baseSpec.WaitingJobTimeout_->MilliSeconds(); - } -} - -template <typename TSpec> -void BuildJobCountOperationPart(const TSpec& spec, TNode* nodeSpec) -{ - if (spec.JobCount_.Defined()) { - (*nodeSpec)["job_count"] = *spec.JobCount_; - } - if (spec.DataSizePerJob_.Defined()) { - (*nodeSpec)["data_size_per_job"] = *spec.DataSizePerJob_; - } -} - -template <typename TSpec> -void BuildPartitionCountOperationPart(const TSpec& spec, TNode* nodeSpec) -{ - if (spec.PartitionCount_.Defined()) { - (*nodeSpec)["partition_count"] = *spec.PartitionCount_; - } - if (spec.PartitionDataSize_.Defined()) { - (*nodeSpec)["partition_data_size"] = *spec.PartitionDataSize_; - } -} - -template <typename TSpec> -void BuildDataSizePerSortJobPart(const TSpec& spec, TNode* nodeSpec) -{ - if (spec.DataSizePerSortJob_.Defined()) { - (*nodeSpec)["data_size_per_sort_job"] = *spec.DataSizePerSortJob_; - } -} - -template <typename TSpec> -void BuildPartitionJobCountOperationPart(const TSpec& spec, TNode* nodeSpec) -{ - if (spec.PartitionJobCount_.Defined()) { - (*nodeSpec)["partition_job_count"] = *spec.PartitionJobCount_; - } - if (spec.DataSizePerPartitionJob_.Defined()) { - (*nodeSpec)["data_size_per_partition_job"] = *spec.DataSizePerPartitionJob_; - } -} - -template <typename TSpec> -void BuildMapJobCountOperationPart(const TSpec& spec, TNode* nodeSpec) -{ - if (spec.MapJobCount_.Defined()) { - (*nodeSpec)["map_job_count"] = *spec.MapJobCount_; - } - if (spec.DataSizePerMapJob_.Defined()) { - (*nodeSpec)["data_size_per_map_job"] = *spec.DataSizePerMapJob_; - } -} - -template <typename TSpec> -void BuildIntermediateDataPart(const TSpec& spec, TNode* nodeSpec) -{ - if (spec.IntermediateDataAccount_.Defined()) { - (*nodeSpec)["intermediate_data_account"] = *spec.IntermediateDataAccount_; - } - if (spec.IntermediateDataReplicationFactor_.Defined()) { - (*nodeSpec)["intermediate_data_replication_factor"] = *spec.IntermediateDataReplicationFactor_; - } -} - -//////////////////////////////////////////////////////////////////////////////// - -TNode MergeSpec(TNode dst, TNode spec, const TOperationOptions& options) -{ - MergeNodes(dst["spec"], spec); - if (options.Spec_) { - MergeNodes(dst["spec"], *options.Spec_); - } - return dst; -} - -template <typename TSpec> -void CreateDebugOutputTables(const TSpec& spec, const TOperationPreparer& preparer) -{ - if (spec.StderrTablePath_.Defined()) { - NYT::NDetail::Create( - preparer.GetClientRetryPolicy()->CreatePolicyForGenericRequest(), - preparer.GetContext(), - TTransactionId(), - *spec.StderrTablePath_, - NT_TABLE, - TCreateOptions() - .IgnoreExisting(true) - .Recursive(true)); - } - if (spec.CoreTablePath_.Defined()) { - NYT::NDetail::Create( - preparer.GetClientRetryPolicy()->CreatePolicyForGenericRequest(), - preparer.GetContext(), - TTransactionId(), - *spec.CoreTablePath_, - NT_TABLE, - TCreateOptions() - .IgnoreExisting(true) - .Recursive(true)); - } -} - -void CreateOutputTable( - const TOperationPreparer& preparer, - const TRichYPath& path) -{ - Y_ENSURE(path.Path_, "Output table is not set"); - Create( - preparer.GetClientRetryPolicy()->CreatePolicyForGenericRequest(), - preparer.GetContext(), preparer.GetTransactionId(), path.Path_, NT_TABLE, - TCreateOptions() - .IgnoreExisting(true) - .Recursive(true)); -} - -void CreateOutputTables( - const TOperationPreparer& preparer, - const TVector<TRichYPath>& paths) -{ - for (auto& path : paths) { - CreateOutputTable(preparer, path); - } -} - -void CheckInputTablesExist( - const TOperationPreparer& preparer, - const TVector<TRichYPath>& paths) -{ - Y_ENSURE(!paths.empty(), "Input tables are not set"); - for (auto& path : paths) { - auto curTransactionId = path.TransactionId_.GetOrElse(preparer.GetTransactionId()); - Y_ENSURE_EX( - Exists( - preparer.GetClientRetryPolicy()->CreatePolicyForGenericRequest(), - preparer.GetContext(), - curTransactionId, - path.Path_), - TApiUsageError() << "Input table '" << path.Path_ << "' doesn't exist"); - } -} - -void LogJob(const TOperationId& opId, const IJob* job, const char* type) -{ - if (job) { - YT_LOG_INFO("Operation %v; %v = %v", - opId, - type, - TJobFactory::Get()->GetJobName(job)); - } -} - -void LogYPaths(const TOperationId& opId, const TVector<TRichYPath>& paths, const char* type) -{ - for (size_t i = 0; i < paths.size(); ++i) { - YT_LOG_INFO("Operation %v; %v[%v] = %v", - opId, - type, - i, - paths[i].Path_); - } -} - -void LogYPath(const TOperationId& opId, const TRichYPath& path, const char* type) -{ - YT_LOG_INFO("Operation %v; %v = %v", - opId, - type, - path.Path_); -} - -TString AddModeToTitleIfDebug(const TString& title) { -#ifndef NDEBUG - return title + " (debug build)"; -#else - return title; -#endif -} - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -void DoExecuteMap( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TSimpleOperationIo& operationIo, - TMapOperationSpecBase<T> spec, - const IJobPtr& mapper, - const TOperationOptions& options) -{ - if (options.CreateDebugOutputTables_) { - CreateDebugOutputTables(spec, *preparer); - } - if (options.CreateOutputTables_) { - CheckInputTablesExist(*preparer, operationIo.Inputs); - CreateOutputTables(*preparer, operationIo.Outputs); - } - - TJobPreparer map( - *preparer, - spec.MapperSpec_, - *mapper, - operationIo.Outputs.size(), - operationIo.JobFiles, - options); - - spec.Title_ = spec.Title_.GetOrElse(AddModeToTitleIfDebug(map.GetClassName())); - - TNode specNode = BuildYsonNodeFluently() - .BeginMap().Item("spec").BeginMap() - .Item("mapper").DoMap([&] (TFluentMap fluent) { - BuildUserJobFluently( - map, - operationIo.InputFormat, - operationIo.OutputFormat, - fluent); - }) - .DoIf(spec.AutoMerge_.Defined(), [&] (TFluentMap fluent) { - auto autoMergeSpec = BuildAutoMergeSpec(*spec.AutoMerge_); - if (!autoMergeSpec.IsUndefined()) { - fluent.Item("auto_merge").Value(std::move(autoMergeSpec)); - } - }) - .Item("input_table_paths").List(operationIo.Inputs) - .Item("output_table_paths").List(operationIo.Outputs) - .DoIf(spec.Ordered_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("ordered").Value(spec.Ordered_.GetRef()); - }) - .Do(std::bind(BuildCommonOperationPart<T>, preparer->GetContext().Config, spec, options, std::placeholders::_1)) - .EndMap().EndMap(); - - specNode["spec"]["job_io"]["control_attributes"]["enable_row_index"] = TNode(true); - specNode["spec"]["job_io"]["control_attributes"]["enable_range_index"] = TNode(true); - if (!preparer->GetContext().Config->TableWriter.Empty()) { - specNode["spec"]["job_io"]["table_writer"] = preparer->GetContext().Config->TableWriter; - } - - BuildCommonUserOperationPart(spec, &specNode["spec"]); - BuildJobCountOperationPart(spec, &specNode["spec"]); - - auto startOperation = [ - operation=operation.Get(), - spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options), - preparer, - operationIo, - mapper - ] () { - auto operationId = preparer->StartOperation(operation, "map", spec); - - LogJob(operationId, mapper.Get(), "mapper"); - LogYPaths(operationId, operationIo.Inputs, "input"); - LogYPaths(operationId, operationIo.Outputs, "output"); - - return operationId; - }; - operation->SetDelayedStartFunction(std::move(startOperation)); -} - -void ExecuteMap( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TMapOperationSpec& spec, - const ::TIntrusivePtr<IStructuredJob>& mapper, - const TOperationOptions& options) -{ - YT_LOG_DEBUG("Starting map operation (PreparationId: %v)", - preparer->GetPreparationId()); - auto operationIo = CreateSimpleOperationIo(*mapper, *preparer, spec, options, /* allowSkiff = */ true); - DoExecuteMap( - operation, - preparer, - operationIo, - spec, - mapper, - options); -} - -void ExecuteRawMap( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TRawMapOperationSpec& spec, - const ::TIntrusivePtr<IRawJob>& mapper, - const TOperationOptions& options) -{ - YT_LOG_DEBUG("Starting raw map operation (PreparationId: %v)", - preparer->GetPreparationId()); - auto operationIo = CreateSimpleOperationIo(*mapper, *preparer, spec); - DoExecuteMap( - operation, - preparer, - operationIo, - spec, - mapper, - options); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -void DoExecuteReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TSimpleOperationIo& operationIo, - TReduceOperationSpecBase<T> spec, - const IJobPtr& reducer, - const TOperationOptions& options) -{ - if (options.CreateDebugOutputTables_) { - CreateDebugOutputTables(spec, *preparer); - } - if (options.CreateOutputTables_) { - CheckInputTablesExist(*preparer, operationIo.Inputs); - CreateOutputTables(*preparer, operationIo.Outputs); - } - - TJobPreparer reduce( - *preparer, - spec.ReducerSpec_, - *reducer, - operationIo.Outputs.size(), - operationIo.JobFiles, - options); - - spec.Title_ = spec.Title_.GetOrElse(AddModeToTitleIfDebug(reduce.GetClassName())); - - TNode specNode = BuildYsonNodeFluently() - .BeginMap().Item("spec").BeginMap() - .Item("reducer").DoMap([&] (TFluentMap fluent) { - BuildUserJobFluently( - reduce, - operationIo.InputFormat, - operationIo.OutputFormat, - fluent); - }) - .Item("sort_by").Value(spec.SortBy_) - .Item("reduce_by").Value(spec.ReduceBy_) - .DoIf(spec.JoinBy_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("join_by").Value(spec.JoinBy_.GetRef()); - }) - .DoIf(spec.EnableKeyGuarantee_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("enable_key_guarantee").Value(spec.EnableKeyGuarantee_.GetRef()); - }) - .Item("input_table_paths").List(operationIo.Inputs) - .Item("output_table_paths").List(operationIo.Outputs) - .Item("job_io").BeginMap() - .Item("control_attributes").BeginMap() - .Item("enable_key_switch").Value(true) - .Item("enable_row_index").Value(true) - .Item("enable_range_index").Value(true) - .EndMap() - .DoIf(!preparer->GetContext().Config->TableWriter.Empty(), [&] (TFluentMap fluent) { - fluent.Item("table_writer").Value(preparer->GetContext().Config->TableWriter); - }) - .EndMap() - .DoIf(spec.AutoMerge_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("auto_merge").Value(BuildAutoMergeSpec(*spec.AutoMerge_)); - }) - .Do(std::bind(BuildCommonOperationPart<T>, preparer->GetContext().Config, spec, options, std::placeholders::_1)) - .EndMap().EndMap(); - - BuildCommonUserOperationPart(spec, &specNode["spec"]); - BuildJobCountOperationPart(spec, &specNode["spec"]); - - auto startOperation = [ - operation=operation.Get(), - spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options), - preparer, - operationIo, - reducer - ] () { - auto operationId = preparer->StartOperation(operation, "reduce", spec); - - LogJob(operationId, reducer.Get(), "reducer"); - LogYPaths(operationId, operationIo.Inputs, "input"); - LogYPaths(operationId, operationIo.Outputs, "output"); - - return operationId; - }; - - operation->SetDelayedStartFunction(std::move(startOperation)); -} - -void ExecuteReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TReduceOperationSpec& spec, - const ::TIntrusivePtr<IStructuredJob>& reducer, - const TOperationOptions& options) -{ - YT_LOG_DEBUG("Starting reduce operation (PreparationId: %v)", - preparer->GetPreparationId()); - auto operationIo = CreateSimpleOperationIo(*reducer, *preparer, spec, options, /* allowSkiff = */ false); - DoExecuteReduce( - operation, - preparer, - operationIo, - spec, - reducer, - options); -} - -void ExecuteRawReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TRawReduceOperationSpec& spec, - const ::TIntrusivePtr<IRawJob>& reducer, - const TOperationOptions& options) -{ - YT_LOG_DEBUG("Starting raw reduce operation (PreparationId: %v)", - preparer->GetPreparationId()); - auto operationIo = CreateSimpleOperationIo(*reducer, *preparer, spec); - DoExecuteReduce( - operation, - preparer, - operationIo, - spec, - reducer, - options); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -void DoExecuteJoinReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TSimpleOperationIo& operationIo, - TJoinReduceOperationSpecBase<T> spec, - const IJobPtr& reducer, - const TOperationOptions& options) -{ - if (options.CreateDebugOutputTables_) { - CreateDebugOutputTables(spec, *preparer); - } - if (options.CreateOutputTables_) { - CheckInputTablesExist(*preparer, operationIo.Inputs); - CreateOutputTables(*preparer, operationIo.Outputs); - } - - TJobPreparer reduce( - *preparer, - spec.ReducerSpec_, - *reducer, - operationIo.Outputs.size(), - operationIo.JobFiles, - options); - - spec.Title_ = spec.Title_.GetOrElse(AddModeToTitleIfDebug(reduce.GetClassName())); - - TNode specNode = BuildYsonNodeFluently() - .BeginMap().Item("spec").BeginMap() - .Item("reducer").DoMap([&] (TFluentMap fluent) { - BuildUserJobFluently( - reduce, - operationIo.InputFormat, - operationIo.OutputFormat, - fluent); - }) - .Item("join_by").Value(spec.JoinBy_) - .Item("input_table_paths").List(operationIo.Inputs) - .Item("output_table_paths").List(operationIo.Outputs) - .Item("job_io").BeginMap() - .Item("control_attributes").BeginMap() - .Item("enable_key_switch").Value(true) - .Item("enable_row_index").Value(true) - .Item("enable_range_index").Value(true) - .EndMap() - .DoIf(!preparer->GetContext().Config->TableWriter.Empty(), [&] (TFluentMap fluent) { - fluent.Item("table_writer").Value(preparer->GetContext().Config->TableWriter); - }) - .EndMap() - .Do(std::bind(BuildCommonOperationPart<T>, preparer->GetContext().Config, spec, options, std::placeholders::_1)) - .EndMap().EndMap(); - - BuildCommonUserOperationPart(spec, &specNode["spec"]); - BuildJobCountOperationPart(spec, &specNode["spec"]); - - auto startOperation = [ - operation=operation.Get(), - spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options), - preparer, - reducer, - operationIo - ] () { - auto operationId = preparer->StartOperation(operation, "join_reduce", spec); - - LogJob(operationId, reducer.Get(), "reducer"); - LogYPaths(operationId, operationIo.Inputs, "input"); - LogYPaths(operationId, operationIo.Outputs, "output"); - - return operationId; - }; - - operation->SetDelayedStartFunction(std::move(startOperation)); -} - -void ExecuteJoinReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TJoinReduceOperationSpec& spec, - const ::TIntrusivePtr<IStructuredJob>& reducer, - const TOperationOptions& options) -{ - YT_LOG_DEBUG("Starting join reduce operation (PreparationId: %v)", - preparer->GetPreparationId()); - auto operationIo = CreateSimpleOperationIo(*reducer, *preparer, spec, options, /* allowSkiff = */ false); - return DoExecuteJoinReduce( - operation, - preparer, - operationIo, - spec, - reducer, - options); -} - -void ExecuteRawJoinReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TRawJoinReduceOperationSpec& spec, - const ::TIntrusivePtr<IRawJob>& reducer, - const TOperationOptions& options) -{ - YT_LOG_DEBUG("Starting raw join reduce operation (PreparationId: %v)", - preparer->GetPreparationId()); - auto operationIo = CreateSimpleOperationIo(*reducer, *preparer, spec); - return DoExecuteJoinReduce( - operation, - preparer, - operationIo, - spec, - reducer, - options); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -void DoExecuteMapReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TMapReduceOperationIo& operationIo, - TMapReduceOperationSpecBase<T> spec, - const IJobPtr& mapper, - const IJobPtr& reduceCombiner, - const IJobPtr& reducer, - const TOperationOptions& options) -{ - TVector<TRichYPath> allOutputs; - allOutputs.insert(allOutputs.end(), operationIo.MapOutputs.begin(), operationIo.MapOutputs.end()); - allOutputs.insert(allOutputs.end(), operationIo.Outputs.begin(), operationIo.Outputs.end()); - - if (options.CreateDebugOutputTables_) { - CreateDebugOutputTables(spec, *preparer); - } - if (options.CreateOutputTables_) { - CheckInputTablesExist(*preparer, operationIo.Inputs); - CreateOutputTables(*preparer, allOutputs); - } - - TSortColumns sortBy = spec.SortBy_; - TSortColumns reduceBy = spec.ReduceBy_; - - if (sortBy.Parts_.empty()) { - sortBy = reduceBy; - } - - const bool hasMapper = mapper != nullptr; - const bool hasCombiner = reduceCombiner != nullptr; - - TVector<TRichYPath> files; - - TJobPreparer reduce( - *preparer, - spec.ReducerSpec_, - *reducer, - operationIo.Outputs.size(), - operationIo.ReducerJobFiles, - options); - - TString title; - - TNode specNode = BuildYsonNodeFluently() - .BeginMap().Item("spec").BeginMap() - .DoIf(hasMapper, [&] (TFluentMap fluent) { - TJobPreparer map( - *preparer, - spec.MapperSpec_, - *mapper, - 1 + operationIo.MapOutputs.size(), - operationIo.MapperJobFiles, - options); - fluent.Item("mapper").DoMap([&] (TFluentMap fluent) { - BuildUserJobFluently( - std::cref(map), - *operationIo.MapperInputFormat, - *operationIo.MapperOutputFormat, - fluent); - }); - - title = "mapper:" + map.GetClassName() + " "; - }) - .DoIf(hasCombiner, [&] (TFluentMap fluent) { - TJobPreparer combine( - *preparer, - spec.ReduceCombinerSpec_, - *reduceCombiner, - size_t(1), - operationIo.ReduceCombinerJobFiles, - options); - fluent.Item("reduce_combiner").DoMap([&] (TFluentMap fluent) { - BuildUserJobFluently( - combine, - *operationIo.ReduceCombinerInputFormat, - *operationIo.ReduceCombinerOutputFormat, - fluent); - }); - title += "combiner:" + combine.GetClassName() + " "; - }) - .Item("reducer").DoMap([&] (TFluentMap fluent) { - BuildUserJobFluently( - reduce, - operationIo.ReducerInputFormat, - operationIo.ReducerOutputFormat, - fluent); - }) - .Item("sort_by").Value(sortBy) - .Item("reduce_by").Value(reduceBy) - .Item("input_table_paths").List(operationIo.Inputs) - .Item("output_table_paths").List(allOutputs) - .Item("mapper_output_table_count").Value(operationIo.MapOutputs.size()) - .DoIf(spec.ForceReduceCombiners_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("force_reduce_combiners").Value(*spec.ForceReduceCombiners_); - }) - .Item("map_job_io").BeginMap() - .Item("control_attributes").BeginMap() - .Item("enable_row_index").Value(true) - .Item("enable_range_index").Value(true) - .EndMap() - .DoIf(!preparer->GetContext().Config->TableWriter.Empty(), [&] (TFluentMap fluent) { - fluent.Item("table_writer").Value(preparer->GetContext().Config->TableWriter); - }) - .EndMap() - .Item("sort_job_io").BeginMap() - .Item("control_attributes").BeginMap() - .Item("enable_key_switch").Value(true) - .EndMap() - .DoIf(!preparer->GetContext().Config->TableWriter.Empty(), [&] (TFluentMap fluent) { - fluent.Item("table_writer").Value(preparer->GetContext().Config->TableWriter); - }) - .EndMap() - .Item("reduce_job_io").BeginMap() - .Item("control_attributes").BeginMap() - .Item("enable_key_switch").Value(true) - .EndMap() - .DoIf(!preparer->GetContext().Config->TableWriter.Empty(), [&] (TFluentMap fluent) { - fluent.Item("table_writer").Value(preparer->GetContext().Config->TableWriter); - }) - .EndMap() - .Do([&] (TFluentMap) { - spec.Title_ = spec.Title_.GetOrElse(AddModeToTitleIfDebug(title + "reducer:" + reduce.GetClassName())); - }) - .Do(std::bind(BuildCommonOperationPart<T>, preparer->GetContext().Config, spec, options, std::placeholders::_1)) - .EndMap().EndMap(); - - if (spec.Ordered_) { - specNode["spec"]["ordered"] = *spec.Ordered_; - } - - BuildCommonUserOperationPart(spec, &specNode["spec"]); - BuildMapJobCountOperationPart(spec, &specNode["spec"]); - BuildPartitionCountOperationPart(spec, &specNode["spec"]); - BuildIntermediateDataPart(spec, &specNode["spec"]); - BuildDataSizePerSortJobPart(spec, &specNode["spec"]); - - auto startOperation = [ - operation=operation.Get(), - spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options), - preparer, - mapper, - reduceCombiner, - reducer, - inputs=operationIo.Inputs, - allOutputs - ] () { - auto operationId = preparer->StartOperation(operation, "map_reduce", spec); - - LogJob(operationId, mapper.Get(), "mapper"); - LogJob(operationId, reduceCombiner.Get(), "reduce_combiner"); - LogJob(operationId, reducer.Get(), "reducer"); - LogYPaths(operationId, inputs, "input"); - LogYPaths(operationId, allOutputs, "output"); - - return operationId; - }; - - operation->SetDelayedStartFunction(std::move(startOperation)); -} - -void ExecuteMapReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TMapReduceOperationSpec& spec_, - const ::TIntrusivePtr<IStructuredJob>& mapper, - const ::TIntrusivePtr<IStructuredJob>& reduceCombiner, - const ::TIntrusivePtr<IStructuredJob>& reducer, - const TOperationOptions& options) -{ - YT_LOG_DEBUG("Starting map-reduce operation (PreparationId: %v)", - preparer->GetPreparationId()); - TMapReduceOperationSpec spec = spec_; - - TMapReduceOperationIo operationIo; - auto structuredInputs = CanonizeStructuredTableList(preparer->GetContext(), spec.GetStructuredInputs()); - auto structuredMapOutputs = CanonizeStructuredTableList(preparer->GetContext(), spec.GetStructuredMapOutputs()); - auto structuredOutputs = CanonizeStructuredTableList(preparer->GetContext(), spec.GetStructuredOutputs()); - - const bool inferOutputSchema = options.InferOutputSchema_.GetOrElse(preparer->GetContext().Config->InferTableSchema); - - TVector<TTableSchema> currentInferenceResult; - - auto fixSpec = [&] (const TFormat& format) { - if (format.IsYamredDsv()) { - spec.SortBy_.Parts_.clear(); - spec.ReduceBy_.Parts_.clear(); - - const TYamredDsvAttributes attributes = format.GetYamredDsvAttributes(); - for (auto& column : attributes.KeyColumnNames) { - spec.SortBy_.Parts_.push_back(column); - spec.ReduceBy_.Parts_.push_back(column); - } - for (const auto& column : attributes.SubkeyColumnNames) { - spec.SortBy_.Parts_.push_back(column); - } - } - }; - - VerifyHasElements(structuredInputs, "inputs"); - - TFormatBuilder formatBuilder( - preparer->GetClientRetryPolicy(), - preparer->GetContext(), - preparer->GetTransactionId(), - options); - - if (mapper) { - auto mapperOutputDescription = - spec.GetIntermediateMapOutputDescription() - .GetOrElse(TUnspecifiedTableStructure()); - TStructuredJobTableList mapperOutput = { - TStructuredJobTable::Intermediate(mapperOutputDescription), - }; - - for (const auto& table : structuredMapOutputs) { - mapperOutput.push_back(TStructuredJobTable{table.Description, table.RichYPath}); - } - - auto hints = spec.MapperFormatHints_; - - auto mapperInferenceResult = PrepareOperation<TStructuredJobTableList>( - *mapper, - TOperationPreparationContext( - structuredInputs, - mapperOutput, - preparer->GetContext(), - preparer->GetClientRetryPolicy(), - preparer->GetTransactionId()), - &structuredInputs, - /* outputs */ nullptr, - hints); - - auto nodeReaderFormat = NodeReaderFormatFromHintAndGlobalConfig(spec.MapperFormatHints_); - - auto [inputFormat, inputFormatConfig] = formatBuilder.CreateFormat( - *mapper, - EIODirection::Input, - structuredInputs, - hints.InputFormatHints_, - nodeReaderFormat, - /* allowFormatFromTableAttribute */ true); - - auto [outputFormat, outputFormatConfig] = formatBuilder.CreateFormat( - *mapper, - EIODirection::Output, - mapperOutput, - hints.OutputFormatHints_, - ENodeReaderFormat::Yson, - /* allowFormatFromTableAttribute */ false); - - operationIo.MapperJobFiles = CreateFormatConfig(inputFormatConfig, outputFormatConfig); - operationIo.MapperInputFormat = inputFormat; - operationIo.MapperOutputFormat = outputFormat; - - Y_VERIFY(mapperInferenceResult.size() >= 1); - currentInferenceResult = TVector<TTableSchema>{mapperInferenceResult[0]}; - // The first output as it corresponds to the intermediate data. - TVector<TTableSchema> additionalOutputsInferenceResult(mapperInferenceResult.begin() + 1, mapperInferenceResult.end()); - - operationIo.MapOutputs = GetPathList( - structuredMapOutputs, - additionalOutputsInferenceResult, - inferOutputSchema); - } - - if (reduceCombiner) { - const bool isFirstStep = !mapper; - TStructuredJobTableList inputs; - if (isFirstStep) { - inputs = structuredInputs; - } else { - auto reduceCombinerIntermediateInput = - spec.GetIntermediateReduceCombinerInputDescription() - .GetOrElse(TUnspecifiedTableStructure()); - inputs = { - TStructuredJobTable::Intermediate(reduceCombinerIntermediateInput), - }; - } - - auto reduceCombinerOutputDescription = spec.GetIntermediateReduceCombinerOutputDescription() - .GetOrElse(TUnspecifiedTableStructure()); - - TStructuredJobTableList outputs = { - TStructuredJobTable::Intermediate(reduceCombinerOutputDescription), - }; - - auto hints = spec.ReduceCombinerFormatHints_; - - if (isFirstStep) { - currentInferenceResult = PrepareOperation<TStructuredJobTableList>( - *reduceCombiner, - TOperationPreparationContext( - inputs, - outputs, - preparer->GetContext(), - preparer->GetClientRetryPolicy(), - preparer->GetTransactionId()), - &inputs, - /* outputs */ nullptr, - hints); - } else { - currentInferenceResult = PrepareOperation<TStructuredJobTableList>( - *reduceCombiner, - TSpeculativeOperationPreparationContext( - currentInferenceResult, - inputs, - outputs), - /* inputs */ nullptr, - /* outputs */ nullptr, - hints); - } - - auto [inputFormat, inputFormatConfig] = formatBuilder.CreateFormat( - *reduceCombiner, - EIODirection::Input, - inputs, - hints.InputFormatHints_, - ENodeReaderFormat::Yson, - /* allowFormatFromTableAttribute = */ isFirstStep); - - auto [outputFormat, outputFormatConfig] = formatBuilder.CreateFormat( - *reduceCombiner, - EIODirection::Output, - outputs, - hints.OutputFormatHints_, - ENodeReaderFormat::Yson, - /* allowFormatFromTableAttribute = */ false); - - operationIo.ReduceCombinerJobFiles = CreateFormatConfig(inputFormatConfig, outputFormatConfig); - operationIo.ReduceCombinerInputFormat = inputFormat; - operationIo.ReduceCombinerOutputFormat = outputFormat; - - if (isFirstStep) { - fixSpec(*operationIo.ReduceCombinerInputFormat); - } - } - - const bool isFirstStep = (!mapper && !reduceCombiner); - TStructuredJobTableList reducerInputs; - if (isFirstStep) { - reducerInputs = structuredInputs; - } else { - auto reducerInputDescription = - spec.GetIntermediateReducerInputDescription() - .GetOrElse(TUnspecifiedTableStructure()); - reducerInputs = { - TStructuredJobTable::Intermediate(reducerInputDescription), - }; - } - - auto hints = spec.ReducerFormatHints_; - - TVector<TTableSchema> reducerInferenceResult; - if (isFirstStep) { - reducerInferenceResult = PrepareOperation( - *reducer, - TOperationPreparationContext( - structuredInputs, - structuredOutputs, - preparer->GetContext(), - preparer->GetClientRetryPolicy(), - preparer->GetTransactionId()), - &structuredInputs, - &structuredOutputs, - hints); - } else { - reducerInferenceResult = PrepareOperation<TStructuredJobTableList>( - *reducer, - TSpeculativeOperationPreparationContext( - currentInferenceResult, - reducerInputs, - structuredOutputs), - /* inputs */ nullptr, - &structuredOutputs, - hints); - } - - auto [inputFormat, inputFormatConfig] = formatBuilder.CreateFormat( - *reducer, - EIODirection::Input, - reducerInputs, - hints.InputFormatHints_, - ENodeReaderFormat::Yson, - /* allowFormatFromTableAttribute = */ isFirstStep); - - auto [outputFormat, outputFormatConfig] = formatBuilder.CreateFormat( - *reducer, - EIODirection::Output, - ToStructuredJobTableList(spec.GetStructuredOutputs()), - hints.OutputFormatHints_, - ENodeReaderFormat::Yson, - /* allowFormatFromTableAttribute = */ false); - operationIo.ReducerJobFiles = CreateFormatConfig(inputFormatConfig, outputFormatConfig); - operationIo.ReducerInputFormat = inputFormat; - operationIo.ReducerOutputFormat = outputFormat; - - if (isFirstStep) { - fixSpec(operationIo.ReducerInputFormat); - } - - operationIo.Inputs = GetPathList( - ApplyProtobufColumnFilters( - structuredInputs, - *preparer, - GetColumnsUsedInOperation(spec), - options), - /* jobSchemaInferenceResult */ Nothing(), - /* inferSchema */ false); - - operationIo.Outputs = GetPathList( - structuredOutputs, - reducerInferenceResult, - inferOutputSchema); - - VerifyHasElements(operationIo.Outputs, "outputs"); - - return DoExecuteMapReduce( - operation, - preparer, - operationIo, - spec, - mapper, - reduceCombiner, - reducer, - options); -} - -void ExecuteRawMapReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TRawMapReduceOperationSpec& spec, - const ::TIntrusivePtr<IRawJob>& mapper, - const ::TIntrusivePtr<IRawJob>& reduceCombiner, - const ::TIntrusivePtr<IRawJob>& reducer, - const TOperationOptions& options) -{ - YT_LOG_DEBUG("Starting raw map-reduce operation (PreparationId: %v)", - preparer->GetPreparationId()); - TMapReduceOperationIo operationIo; - operationIo.Inputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer->GetContext(), spec.GetInputs()); - operationIo.MapOutputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer->GetContext(), spec.GetMapOutputs()); - operationIo.Outputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer->GetContext(), spec.GetOutputs()); - - VerifyHasElements(operationIo.Inputs, "inputs"); - VerifyHasElements(operationIo.Outputs, "outputs"); - - auto getFormatOrDefault = [&] (const TMaybe<TFormat>& maybeFormat, const TMaybe<TFormat> stageDefaultFormat, const char* formatName) { - if (maybeFormat) { - return *maybeFormat; - } else if (stageDefaultFormat) { - return *stageDefaultFormat; - } else { - ythrow TApiUsageError() << "Cannot derive " << formatName; - } - }; - - if (mapper) { - operationIo.MapperInputFormat = getFormatOrDefault(spec.MapperInputFormat_, spec.MapperFormat_, "mapper input format"); - operationIo.MapperOutputFormat = getFormatOrDefault(spec.MapperOutputFormat_, spec.MapperFormat_, "mapper output format"); - } - - if (reduceCombiner) { - operationIo.ReduceCombinerInputFormat = getFormatOrDefault(spec.ReduceCombinerInputFormat_, spec.ReduceCombinerFormat_, "reduce combiner input format"); - operationIo.ReduceCombinerOutputFormat = getFormatOrDefault(spec.ReduceCombinerOutputFormat_, spec.ReduceCombinerFormat_, "reduce combiner output format"); - } - - operationIo.ReducerInputFormat = getFormatOrDefault(spec.ReducerInputFormat_, spec.ReducerFormat_, "reducer input format"); - operationIo.ReducerOutputFormat = getFormatOrDefault(spec.ReducerOutputFormat_, spec.ReducerFormat_, "reducer output format"); - - return DoExecuteMapReduce( - operation, - preparer, - operationIo, - spec, - mapper, - reduceCombiner, - reducer, - options); -} - -void ExecuteSort( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TSortOperationSpec& spec, - const TOperationOptions& options) -{ - YT_LOG_DEBUG("Starting sort operation (PreparationId: %v)", - preparer->GetPreparationId()); - auto inputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer->GetContext(), spec.Inputs_); - auto output = CanonizeYPath(nullptr, preparer->GetContext(), spec.Output_); - - if (options.CreateOutputTables_) { - CheckInputTablesExist(*preparer, inputs); - CreateOutputTable(*preparer, output); - } - - TNode specNode = BuildYsonNodeFluently() - .BeginMap().Item("spec").BeginMap() - .Item("input_table_paths").List(inputs) - .Item("output_table_path").Value(output) - .Item("sort_by").Value(spec.SortBy_) - .DoIf(spec.SchemaInferenceMode_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("schema_inference_mode").Value(ToString(*spec.SchemaInferenceMode_)); - }) - .Do(std::bind(BuildCommonOperationPart<TSortOperationSpec>, preparer->GetContext().Config, spec, options, std::placeholders::_1)) - .EndMap().EndMap(); - - BuildPartitionCountOperationPart(spec, &specNode["spec"]); - BuildPartitionJobCountOperationPart(spec, &specNode["spec"]); - BuildIntermediateDataPart(spec, &specNode["spec"]); - - auto startOperation = [ - operation=operation.Get(), - spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options), - preparer, - inputs, - output - ] () { - auto operationId = preparer->StartOperation(operation, "sort", spec); - - LogYPaths(operationId, inputs, "input"); - LogYPath(operationId, output, "output"); - - return operationId; - }; - - operation->SetDelayedStartFunction(std::move(startOperation)); -} - -void ExecuteMerge( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TMergeOperationSpec& spec, - const TOperationOptions& options) -{ - YT_LOG_DEBUG("Starting merge operation (PreparationId: %v)", - preparer->GetPreparationId()); - auto inputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer->GetContext(), spec.Inputs_); - auto output = CanonizeYPath(nullptr, preparer->GetContext(), spec.Output_); - - if (options.CreateOutputTables_) { - CheckInputTablesExist(*preparer, inputs); - CreateOutputTable(*preparer, output); - } - - TNode specNode = BuildYsonNodeFluently() - .BeginMap().Item("spec").BeginMap() - .Item("input_table_paths").List(inputs) - .Item("output_table_path").Value(output) - .Item("mode").Value(ToString(spec.Mode_)) - .Item("combine_chunks").Value(spec.CombineChunks_) - .Item("force_transform").Value(spec.ForceTransform_) - .Item("merge_by").Value(spec.MergeBy_) - .DoIf(spec.SchemaInferenceMode_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("schema_inference_mode").Value(ToString(*spec.SchemaInferenceMode_)); - }) - .Do(std::bind(BuildCommonOperationPart<TMergeOperationSpec>, preparer->GetContext().Config, spec, options, std::placeholders::_1)) - .EndMap().EndMap(); - - BuildJobCountOperationPart(spec, &specNode["spec"]); - - auto startOperation = [ - operation=operation.Get(), - spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options), - preparer, - inputs, - output - ] () { - auto operationId = preparer->StartOperation(operation, "merge", spec); - - LogYPaths(operationId, inputs, "input"); - LogYPath(operationId, output, "output"); - - return operationId; - }; - - operation->SetDelayedStartFunction(std::move(startOperation)); -} - -void ExecuteErase( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TEraseOperationSpec& spec, - const TOperationOptions& options) -{ - YT_LOG_DEBUG("Starting erase operation (PreparationId: %v)", - preparer->GetPreparationId()); - auto tablePath = CanonizeYPath(nullptr, preparer->GetContext(), spec.TablePath_); - - TNode specNode = BuildYsonNodeFluently() - .BeginMap().Item("spec").BeginMap() - .Item("table_path").Value(tablePath) - .Item("combine_chunks").Value(spec.CombineChunks_) - .DoIf(spec.SchemaInferenceMode_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("schema_inference_mode").Value(ToString(*spec.SchemaInferenceMode_)); - }) - .Do(std::bind(BuildCommonOperationPart<TEraseOperationSpec>, preparer->GetContext().Config, spec, options, std::placeholders::_1)) - .EndMap().EndMap(); - - auto startOperation = [ - operation=operation.Get(), - spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options), - preparer, - tablePath - ] () { - auto operationId = preparer->StartOperation(operation, "erase", spec); - - LogYPath(operationId, tablePath, "table_path"); - - return operationId; - }; - - operation->SetDelayedStartFunction(std::move(startOperation)); -} - -void ExecuteRemoteCopy( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TRemoteCopyOperationSpec& spec, - const TOperationOptions& options) -{ - YT_LOG_DEBUG("Starting remote copy operation (PreparationId: %v)", - preparer->GetPreparationId()); - auto inputs = CanonizeYPaths(/* retryPolicy */ nullptr, preparer->GetContext(), spec.Inputs_); - auto output = CanonizeYPath(nullptr, preparer->GetContext(), spec.Output_); - - if (options.CreateOutputTables_) { - CreateOutputTable(*preparer, output); - } - - Y_ENSURE_EX(!spec.ClusterName_.empty(), TApiUsageError() << "ClusterName parameter is required"); - - TNode specNode = BuildYsonNodeFluently() - .BeginMap().Item("spec").BeginMap() - .Item("cluster_name").Value(spec.ClusterName_) - .Item("input_table_paths").List(inputs) - .Item("output_table_path").Value(output) - .DoIf(spec.NetworkName_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("network_name").Value(*spec.NetworkName_); - }) - .DoIf(spec.SchemaInferenceMode_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("schema_inference_mode").Value(ToString(*spec.SchemaInferenceMode_)); - }) - .Item("copy_attributes").Value(spec.CopyAttributes_) - .DoIf(!spec.AttributeKeys_.empty(), [&] (TFluentMap fluent) { - Y_ENSURE_EX(spec.CopyAttributes_, TApiUsageError() << - "Specifying nonempty AttributeKeys in RemoteCopy " - "doesn't make sense without CopyAttributes == true"); - fluent.Item("attribute_keys").List(spec.AttributeKeys_); - }) - .Do(std::bind(BuildCommonOperationPart<TRemoteCopyOperationSpec>, preparer->GetContext().Config, spec, options, std::placeholders::_1)) - .EndMap().EndMap(); - - auto startOperation = [ - operation=operation.Get(), - spec=MergeSpec(specNode, preparer->GetContext().Config->Spec, options), - preparer, - inputs, - output - ] () { - auto operationId = preparer->StartOperation(operation, "remote_copy", spec); - - LogYPaths(operationId, inputs, "input"); - LogYPath(operationId, output, "output"); - - return operationId; - }; - - operation->SetDelayedStartFunction(std::move(startOperation)); -} - -void ExecuteVanilla( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TVanillaOperationSpec& spec, - const TOperationOptions& options) -{ - YT_LOG_DEBUG("Starting vanilla operation (PreparationId: %v)", - preparer->GetPreparationId()); - - auto addTask = [&](TFluentMap fluent, const TVanillaTask& task) { - Y_VERIFY(task.Job_.Get()); - if (std::holds_alternative<TVoidStructuredRowStream>(task.Job_->GetOutputRowStreamDescription())) { - Y_ENSURE_EX(task.Outputs_.empty(), - TApiUsageError() << "Vanilla task with void IVanillaJob doesn't expect output tables"); - TJobPreparer jobPreparer( - *preparer, - task.Spec_, - *task.Job_, - /* outputTableCount */ 0, - /* smallFileList */ {}, - options); - fluent - .Item(task.Name_).BeginMap() - .Item("job_count").Value(task.JobCount_) - .DoIf(task.NetworkProject_.Defined(), [&](TFluentMap fluent) { - fluent.Item("network_project").Value(*task.NetworkProject_); - }) - .Do([&] (TFluentMap fluent) { - BuildUserJobFluently( - std::cref(jobPreparer), - /* inputFormat */ Nothing(), - /* outputFormat */ Nothing(), - fluent); - }) - .EndMap(); - } else { - auto operationIo = CreateSimpleOperationIo( - *task.Job_, - *preparer, - task, - options, - false); - Y_ENSURE_EX(operationIo.Outputs.size() > 0, - TApiUsageError() << "Vanilla task with IVanillaJob that has table writer expects output tables"); - if (options.CreateOutputTables_) { - CreateOutputTables(*preparer, operationIo.Outputs); - } - TJobPreparer jobPreparer( - *preparer, - task.Spec_, - *task.Job_, - operationIo.Outputs.size(), - operationIo.JobFiles, - options); - fluent - .Item(task.Name_).BeginMap() - .Item("job_count").Value(task.JobCount_) - .DoIf(task.NetworkProject_.Defined(), [&](TFluentMap fluent) { - fluent.Item("network_project").Value(*task.NetworkProject_); - }) - .Do([&] (TFluentMap fluent) { - BuildUserJobFluently( - std::cref(jobPreparer), - /* inputFormat */ Nothing(), - operationIo.OutputFormat, - fluent); - }) - .Item("output_table_paths").List(operationIo.Outputs) - .Item("job_io").BeginMap() - .DoIf(!preparer->GetContext().Config->TableWriter.Empty(), [&](TFluentMap fluent) { - fluent.Item("table_writer").Value(preparer->GetContext().Config->TableWriter); - }) - .Item("control_attributes").BeginMap() - .Item("enable_row_index").Value(TNode(true)) - .Item("enable_range_index").Value(TNode(true)) - .EndMap() - .EndMap() - .EndMap(); - } - }; - - if (options.CreateDebugOutputTables_) { - CreateDebugOutputTables(spec, *preparer); - } - - TNode specNode = BuildYsonNodeFluently() - .BeginMap().Item("spec").BeginMap() - .Item("tasks").DoMapFor(spec.Tasks_, addTask) - .Do(std::bind(BuildCommonOperationPart<TVanillaOperationSpec>, preparer->GetContext().Config, spec, options, std::placeholders::_1)) - .EndMap().EndMap(); - - BuildCommonUserOperationPart(spec, &specNode["spec"]); - - auto startOperation = [operation=operation.Get(), spec=MergeSpec(std::move(specNode), preparer->GetContext().Config->Spec, options), preparer] () { - auto operationId = preparer->StartOperation(operation, "vanilla", spec, /* useStartOperationRequest */ true); - return operationId; - }; - - operation->SetDelayedStartFunction(std::move(startOperation)); -} - -//////////////////////////////////////////////////////////////////////////////// - -class TOperation::TOperationImpl - : public TThrRefBase -{ -public: - TOperationImpl( - IClientRetryPolicyPtr clientRetryPolicy, - TClientContext context, - const TMaybe<TOperationId>& operationId = {}) - : ClientRetryPolicy_(clientRetryPolicy) - , Context_(std::move(context)) - , Id_(operationId) - , PreparedPromise_(::NThreading::NewPromise<void>()) - , StartedPromise_(::NThreading::NewPromise<void>()) - { - if (Id_) { - PreparedPromise_.SetValue(); - StartedPromise_.SetValue(); - } else { - PreparedPromise_.GetFuture().Subscribe([this_=::TIntrusivePtr(this)] (const ::NThreading::TFuture<void>& preparedResult) { - try { - preparedResult.GetValue(); - } catch (...) { - this_->StartedPromise_.SetException(std::current_exception()); - return; - } - }); - } - } - - const TOperationId& GetId() const; - TString GetWebInterfaceUrl() const; - - void OnPrepared(); - void SetDelayedStartFunction(std::function<TOperationId()> start); - void Start(); - bool IsStarted() const; - void OnPreparationException(std::exception_ptr e); - - TString GetStatus(); - void OnStatusUpdated(const TString& newStatus); - - ::NThreading::TFuture<void> GetPreparedFuture(); - ::NThreading::TFuture<void> GetStartedFuture(); - ::NThreading::TFuture<void> Watch(TClientPtr client); - - EOperationBriefState GetBriefState(); - TMaybe<TYtError> GetError(); - TJobStatistics GetJobStatistics(); - TMaybe<TOperationBriefProgress> GetBriefProgress(); - void AbortOperation(); - void CompleteOperation(); - void SuspendOperation(const TSuspendOperationOptions& options); - void ResumeOperation(const TResumeOperationOptions& options); - TOperationAttributes GetAttributes(const TGetOperationOptions& options); - void UpdateParameters(const TUpdateOperationParametersOptions& options); - TJobAttributes GetJob(const TJobId& jobId, const TGetJobOptions& options); - TListJobsResult ListJobs(const TListJobsOptions& options); - - void AsyncFinishOperation(TOperationAttributes operationAttributes); - void FinishWithException(std::exception_ptr exception); - void UpdateBriefProgress(TMaybe<TOperationBriefProgress> briefProgress); - void AnalyzeUnrecognizedSpec(TNode unrecognizedSpec); - - const TClientContext& GetContext() const; - -private: - void OnStarted(const TOperationId& operationId); - - void UpdateAttributesAndCall(bool needJobStatistics, std::function<void(const TOperationAttributes&)> func); - - void SyncFinishOperationImpl(const TOperationAttributes&); - static void* SyncFinishOperationProc(void* ); - - void ValidateOperationStarted() const; - -private: - IClientRetryPolicyPtr ClientRetryPolicy_; - const TClientContext Context_; - TMaybe<TOperationId> Id_; - TMutex Lock_; - - ::NThreading::TPromise<void> PreparedPromise_; - ::NThreading::TPromise<void> StartedPromise_; - TMaybe<::NThreading::TPromise<void>> CompletePromise_; - - std::function<TOperationId()> DelayedStartFunction_; - TString Status_; - TOperationAttributes Attributes_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TOperationPollerItem - : public IYtPollerItem -{ -public: - TOperationPollerItem(::TIntrusivePtr<TOperation::TOperationImpl> operationImpl) - : OperationImpl_(std::move(operationImpl)) - { } - - void PrepareRequest(TRawBatchRequest* batchRequest) override - { - auto filter = TOperationAttributeFilter() - .Add(EOperationAttribute::State) - .Add(EOperationAttribute::BriefProgress) - .Add(EOperationAttribute::Result); - - if (!UnrecognizedSpecAnalyzed_) { - filter.Add(EOperationAttribute::UnrecognizedSpec); - } - - OperationState_ = batchRequest->GetOperation( - OperationImpl_->GetId(), - TGetOperationOptions().AttributeFilter(filter)); - } - - EStatus OnRequestExecuted() override - { - try { - const auto& attributes = OperationState_.GetValue(); - if (!UnrecognizedSpecAnalyzed_ && !attributes.UnrecognizedSpec.Empty()) { - OperationImpl_->AnalyzeUnrecognizedSpec(*attributes.UnrecognizedSpec); - UnrecognizedSpecAnalyzed_ = true; - } - Y_VERIFY(attributes.BriefState, - "get_operation for operation %s has not returned \"state\" field", - GetGuidAsString(OperationImpl_->GetId()).Data()); - if (*attributes.BriefState != EOperationBriefState::InProgress) { - OperationImpl_->AsyncFinishOperation(attributes); - return PollBreak; - } else { - OperationImpl_->UpdateBriefProgress(attributes.BriefProgress); - } - } catch (const TErrorResponse& e) { - if (!IsRetriable(e)) { - OperationImpl_->FinishWithException(std::current_exception()); - return PollBreak; - } - } catch (const std::exception& e) { - OperationImpl_->FinishWithException(std::current_exception()); - return PollBreak; - } - return PollContinue; - } - - void OnItemDiscarded() override { - OperationImpl_->FinishWithException(std::make_exception_ptr(yexception() << "Operation cancelled")); - } - -private: - ::TIntrusivePtr<TOperation::TOperationImpl> OperationImpl_; - ::NThreading::TFuture<TOperationAttributes> OperationState_; - bool UnrecognizedSpecAnalyzed_ = false; -}; - -//////////////////////////////////////////////////////////////////////////////// - -const TOperationId& TOperation::TOperationImpl::GetId() const -{ - ValidateOperationStarted(); - return *Id_; -} - -TString TOperation::TOperationImpl::GetWebInterfaceUrl() const -{ - ValidateOperationStarted(); - return GetOperationWebInterfaceUrl(Context_.ServerName, *Id_); -} - -void TOperation::TOperationImpl::OnPrepared() -{ - Y_VERIFY(!PreparedPromise_.HasException() && !PreparedPromise_.HasValue()); - PreparedPromise_.SetValue(); -} - -void TOperation::TOperationImpl::SetDelayedStartFunction(std::function<TOperationId()> start) -{ - DelayedStartFunction_ = std::move(start); -} - -void TOperation::TOperationImpl::Start() -{ - { - auto guard = Guard(Lock_); - if (Id_) { - ythrow TApiUsageError() << "Start() should not be called on running operations"; - } - } - GetPreparedFuture().GetValueSync(); - - std::function<TOperationId()> startStuff; - { - auto guard = Guard(Lock_); - startStuff.swap(DelayedStartFunction_); - } - if (!startStuff) { - ythrow TApiUsageError() << "Seems that Start() was called multiple times. If not, contact yt@"; - } - - TOperationId operationId; - try { - operationId = startStuff(); - } catch (...) { - auto exception = std::current_exception(); - StartedPromise_.SetException(exception); - std::rethrow_exception(exception); - } - OnStarted(operationId); -} - -bool TOperation::TOperationImpl::IsStarted() const { - auto guard = Guard(Lock_); - return bool(Id_); -} - -void TOperation::TOperationImpl::OnPreparationException(std::exception_ptr e) -{ - Y_VERIFY(!PreparedPromise_.HasValue() && !PreparedPromise_.HasException()); - PreparedPromise_.SetException(e); -} - -TString TOperation::TOperationImpl::GetStatus() -{ - { - auto guard = Guard(Lock_); - if (!Id_) { - return Status_; - } - } - TMaybe<TString> state; - UpdateAttributesAndCall(false, [&] (const TOperationAttributes& attributes) { - state = attributes.State; - }); - - return "On YT cluster: " + state.GetOrElse("undefined state"); -} - -void TOperation::TOperationImpl::OnStatusUpdated(const TString& newStatus) -{ - auto guard = Guard(Lock_); - Status_ = newStatus; -} - -::NThreading::TFuture<void> TOperation::TOperationImpl::GetPreparedFuture() -{ - return PreparedPromise_.GetFuture(); -} - -::NThreading::TFuture<void> TOperation::TOperationImpl::GetStartedFuture() -{ - return StartedPromise_.GetFuture(); -} - -::NThreading::TFuture<void> TOperation::TOperationImpl::Watch(TClientPtr client) -{ - { - auto guard = Guard(Lock_); - if (CompletePromise_) { - return *CompletePromise_; - } - CompletePromise_ = ::NThreading::NewPromise<void>(); - } - GetStartedFuture().Subscribe([ - this_=::TIntrusivePtr(this), - client=std::move(client) - ] (const ::NThreading::TFuture<void>& startedResult) { - try { - startedResult.GetValue(); - } catch (...) { - this_->CompletePromise_->SetException(std::current_exception()); - return; - } - client->GetYtPoller().Watch(::MakeIntrusive<TOperationPollerItem>(this_)); - auto operationId = this_->GetId(); - auto registry = TAbortableRegistry::Get(); - registry->Add( - operationId, - ::MakeIntrusive<TOperationAbortable>(this_->ClientRetryPolicy_, this_->Context_, operationId)); - // We have to own an IntrusivePtr to registry to prevent use-after-free - auto removeOperation = [registry, operationId] (const ::NThreading::TFuture<void>&) { - registry->Remove(operationId); - }; - this_->CompletePromise_->GetFuture().Subscribe(removeOperation); - }); - - return *CompletePromise_; -} - -EOperationBriefState TOperation::TOperationImpl::GetBriefState() -{ - ValidateOperationStarted(); - EOperationBriefState result = EOperationBriefState::InProgress; - UpdateAttributesAndCall(false, [&] (const TOperationAttributes& attributes) { - Y_VERIFY(attributes.BriefState, - "get_operation for operation %s has not returned \"state\" field", - GetGuidAsString(*Id_).Data()); - result = *attributes.BriefState; - }); - return result; -} - -TMaybe<TYtError> TOperation::TOperationImpl::GetError() -{ - ValidateOperationStarted(); - TMaybe<TYtError> result; - UpdateAttributesAndCall(false, [&] (const TOperationAttributes& attributes) { - Y_VERIFY(attributes.Result); - result = attributes.Result->Error; - }); - return result; -} - -TJobStatistics TOperation::TOperationImpl::GetJobStatistics() -{ - ValidateOperationStarted(); - TJobStatistics result; - UpdateAttributesAndCall(true, [&] (const TOperationAttributes& attributes) { - if (attributes.Progress) { - result = attributes.Progress->JobStatistics; - } - }); - return result; -} - -TMaybe<TOperationBriefProgress> TOperation::TOperationImpl::GetBriefProgress() -{ - ValidateOperationStarted(); - { - auto g = Guard(Lock_); - if (CompletePromise_.Defined()) { - // Poller do this job for us - return Attributes_.BriefProgress; - } - } - TMaybe<TOperationBriefProgress> result; - UpdateAttributesAndCall(false, [&] (const TOperationAttributes& attributes) { - result = attributes.BriefProgress; - }); - return result; -} - -void TOperation::TOperationImpl::UpdateBriefProgress(TMaybe<TOperationBriefProgress> briefProgress) -{ - auto g = Guard(Lock_); - Attributes_.BriefProgress = std::move(briefProgress); -} - -void TOperation::TOperationImpl::AnalyzeUnrecognizedSpec(TNode unrecognizedSpec) -{ - static const TVector<TVector<TString>> knownUnrecognizedSpecFieldPaths = { - {"mapper", "class_name"}, - {"reducer", "class_name"}, - {"reduce_combiner", "class_name"}, - }; - - auto removeByPath = [] (TNode& node, auto pathBegin, auto pathEnd, auto& removeByPath) { - if (pathBegin == pathEnd) { - return; - } - if (!node.IsMap()) { - return; - } - auto* child = node.AsMap().FindPtr(*pathBegin); - if (!child) { - return; - } - removeByPath(*child, std::next(pathBegin), pathEnd, removeByPath); - if (std::next(pathBegin) == pathEnd || (child->IsMap() && child->Empty())) { - node.AsMap().erase(*pathBegin); - } - }; - - Y_VERIFY(unrecognizedSpec.IsMap()); - for (const auto& knownFieldPath : knownUnrecognizedSpecFieldPaths) { - Y_VERIFY(!knownFieldPath.empty()); - removeByPath(unrecognizedSpec, knownFieldPath.cbegin(), knownFieldPath.cend(), removeByPath); - } - - if (!unrecognizedSpec.Empty()) { - YT_LOG_INFO( - "WARNING! Unrecognized spec for operation %s is not empty " - "(fields added by the YT API library are excluded): %s", - GetGuidAsString(*Id_).Data(), - NodeToYsonString(unrecognizedSpec).Data()); - } -} - -void TOperation::TOperationImpl::OnStarted(const TOperationId& operationId) -{ - auto guard = Guard(Lock_); - Y_VERIFY(!Id_, - "OnStarted() called with operationId = %s for operation with id %s", - GetGuidAsString(operationId).Data(), - GetGuidAsString(*Id_).Data()); - Id_ = operationId; - - Y_VERIFY(!StartedPromise_.HasValue() && !StartedPromise_.HasException()); - StartedPromise_.SetValue(); -} - -void TOperation::TOperationImpl::UpdateAttributesAndCall(bool needJobStatistics, std::function<void(const TOperationAttributes&)> func) -{ - { - auto g = Guard(Lock_); - if (Attributes_.BriefState - && *Attributes_.BriefState != EOperationBriefState::InProgress - && (!needJobStatistics || Attributes_.Progress)) - { - func(Attributes_); - return; - } - } - - TOperationAttributes attributes = NDetail::GetOperation( - ClientRetryPolicy_->CreatePolicyForGenericRequest(), - Context_, - *Id_, - TGetOperationOptions().AttributeFilter(TOperationAttributeFilter() - .Add(EOperationAttribute::Result) - .Add(EOperationAttribute::Progress) - .Add(EOperationAttribute::State) - .Add(EOperationAttribute::BriefProgress))); - - func(attributes); - - Y_ENSURE(attributes.BriefState); - if (*attributes.BriefState != EOperationBriefState::InProgress) { - auto g = Guard(Lock_); - Attributes_ = std::move(attributes); - } -} - -void TOperation::TOperationImpl::FinishWithException(std::exception_ptr e) -{ - CompletePromise_->SetException(std::move(e)); -} - -void TOperation::TOperationImpl::AbortOperation() -{ - ValidateOperationStarted(); - NYT::NDetail::AbortOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_); -} - -void TOperation::TOperationImpl::CompleteOperation() -{ - ValidateOperationStarted(); - NYT::NDetail::CompleteOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_); -} - -void TOperation::TOperationImpl::SuspendOperation(const TSuspendOperationOptions& options) -{ - ValidateOperationStarted(); - NYT::NDetail::SuspendOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_, options); -} - -void TOperation::TOperationImpl::ResumeOperation(const TResumeOperationOptions& options) -{ - ValidateOperationStarted(); - NYT::NDetail::ResumeOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_, options); -} - -TOperationAttributes TOperation::TOperationImpl::GetAttributes(const TGetOperationOptions& options) -{ - ValidateOperationStarted(); - return NYT::NDetail::GetOperation(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_, options); -} - -void TOperation::TOperationImpl::UpdateParameters(const TUpdateOperationParametersOptions& options) -{ - ValidateOperationStarted(); - return NYT::NDetail::UpdateOperationParameters(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_, options); -} - -TJobAttributes TOperation::TOperationImpl::GetJob(const TJobId& jobId, const TGetJobOptions& options) -{ - ValidateOperationStarted(); - return NYT::NDetail::GetJob(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_, jobId, options); -} - -TListJobsResult TOperation::TOperationImpl::ListJobs(const TListJobsOptions& options) -{ - ValidateOperationStarted(); - return NYT::NDetail::ListJobs(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, *Id_, options); -} - -struct TAsyncFinishOperationsArgs -{ - ::TIntrusivePtr<TOperation::TOperationImpl> OperationImpl; - TOperationAttributes OperationAttributes; -}; - -void TOperation::TOperationImpl::AsyncFinishOperation(TOperationAttributes operationAttributes) -{ - auto args = new TAsyncFinishOperationsArgs; - args->OperationImpl = this; - args->OperationAttributes = std::move(operationAttributes); - - TThread thread(TThread::TParams(&TOperation::TOperationImpl::SyncFinishOperationProc, args).SetName("finish operation")); - thread.Start(); - thread.Detach(); -} - -void* TOperation::TOperationImpl::SyncFinishOperationProc(void* pArgs) -{ - THolder<TAsyncFinishOperationsArgs> args(static_cast<TAsyncFinishOperationsArgs*>(pArgs)); - args->OperationImpl->SyncFinishOperationImpl(args->OperationAttributes); - return nullptr; -} - -void TOperation::TOperationImpl::SyncFinishOperationImpl(const TOperationAttributes& attributes) -{ - { - auto guard = Guard(Lock_); - Y_VERIFY(Id_); - } - Y_VERIFY(attributes.BriefState, - "get_operation for operation %s has not returned \"state\" field", - GetGuidAsString(*Id_).Data()); - Y_VERIFY(*attributes.BriefState != EOperationBriefState::InProgress); - - { - try { - // `attributes' that came from poller don't have JobStatistics - // so we call `GetJobStatistics' in order to get it from server - // and cache inside object. - GetJobStatistics(); - } catch (const TErrorResponse& ) { - // But if for any reason we failed to get attributes - // we complete operation using what we have. - auto g = Guard(Lock_); - Attributes_ = attributes; - } - } - - if (*attributes.BriefState == EOperationBriefState::Completed) { - CompletePromise_->SetValue(); - } else if (*attributes.BriefState == EOperationBriefState::Aborted || *attributes.BriefState == EOperationBriefState::Failed) { - Y_VERIFY(attributes.Result && attributes.Result->Error); - const auto& error = *attributes.Result->Error; - YT_LOG_ERROR("Operation %v is `%v' with error: %v", - *Id_, - ToString(*attributes.BriefState), - error.FullDescription()); - - TString additionalExceptionText; - TVector<TFailedJobInfo> failedJobStderrInfo; - if (*attributes.BriefState == EOperationBriefState::Failed) { - try { - failedJobStderrInfo = NYT::NDetail::GetFailedJobInfo(ClientRetryPolicy_, Context_, *Id_, TGetFailedJobInfoOptions()); - } catch (const std::exception& e) { - additionalExceptionText = "Cannot get job stderrs: "; - additionalExceptionText += e.what(); - } - } - CompletePromise_->SetException( - std::make_exception_ptr( - TOperationFailedError( - *attributes.BriefState == EOperationBriefState::Failed - ? TOperationFailedError::Failed - : TOperationFailedError::Aborted, - *Id_, - error, - failedJobStderrInfo) << additionalExceptionText)); - } -} - -void TOperation::TOperationImpl::ValidateOperationStarted() const -{ - auto guard = Guard(Lock_); - if (!Id_) { - ythrow TApiUsageError() << "Operation is not started"; - } -} - -const TClientContext& TOperation::TOperationImpl::GetContext() const -{ - return Context_; -} - -//////////////////////////////////////////////////////////////////////////////// - -TOperation::TOperation(TClientPtr client) - : Client_(std::move(client)) - , Impl_(::MakeIntrusive<TOperationImpl>(Client_->GetRetryPolicy(), Client_->GetContext())) -{ -} - -TOperation::TOperation(TOperationId id, TClientPtr client) - : Client_(std::move(client)) - , Impl_(::MakeIntrusive<TOperationImpl>(Client_->GetRetryPolicy(), Client_->GetContext(), id)) -{ -} - -const TOperationId& TOperation::GetId() const -{ - return Impl_->GetId(); -} - -TString TOperation::GetWebInterfaceUrl() const -{ - return Impl_->GetWebInterfaceUrl(); -} - -void TOperation::OnPrepared() -{ - Impl_->OnPrepared(); -} - -void TOperation::SetDelayedStartFunction(std::function<TOperationId()> start) -{ - Impl_->SetDelayedStartFunction(std::move(start)); -} - -void TOperation::Start() -{ - Impl_->Start(); -} - -bool TOperation::IsStarted() const -{ - return Impl_->IsStarted(); -} - -void TOperation::OnPreparationException(std::exception_ptr e) -{ - Impl_->OnPreparationException(std::move(e)); -} - -TString TOperation::GetStatus() const -{ - return Impl_->GetStatus(); -} - -void TOperation::OnStatusUpdated(const TString& newStatus) -{ - Impl_->OnStatusUpdated(newStatus); -} - -::NThreading::TFuture<void> TOperation::GetPreparedFuture() -{ - return Impl_->GetPreparedFuture(); -} - -::NThreading::TFuture<void> TOperation::GetStartedFuture() -{ - return Impl_->GetStartedFuture(); -} - -::NThreading::TFuture<void> TOperation::Watch() -{ - return Impl_->Watch(Client_); -} - -TVector<TFailedJobInfo> TOperation::GetFailedJobInfo(const TGetFailedJobInfoOptions& options) -{ - return NYT::NDetail::GetFailedJobInfo(Client_->GetRetryPolicy(), Client_->GetContext(), GetId(), options); -} - -EOperationBriefState TOperation::GetBriefState() -{ - return Impl_->GetBriefState(); -} - -TMaybe<TYtError> TOperation::GetError() -{ - return Impl_->GetError(); -} - -TJobStatistics TOperation::GetJobStatistics() -{ - return Impl_->GetJobStatistics(); -} - -TMaybe<TOperationBriefProgress> TOperation::GetBriefProgress() -{ - return Impl_->GetBriefProgress(); -} - -void TOperation::AbortOperation() -{ - Impl_->AbortOperation(); -} - -void TOperation::CompleteOperation() -{ - Impl_->CompleteOperation(); -} - -void TOperation::SuspendOperation(const TSuspendOperationOptions& options) -{ - Impl_->SuspendOperation(options); -} - -void TOperation::ResumeOperation(const TResumeOperationOptions& options) -{ - Impl_->ResumeOperation(options); -} - -TOperationAttributes TOperation::GetAttributes(const TGetOperationOptions& options) -{ - return Impl_->GetAttributes(options); -} - -void TOperation::UpdateParameters(const TUpdateOperationParametersOptions& options) -{ - Impl_->UpdateParameters(options); -} - -TJobAttributes TOperation::GetJob(const TJobId& jobId, const TGetJobOptions& options) -{ - return Impl_->GetJob(jobId, options); -} - -TListJobsResult TOperation::ListJobs(const TListJobsOptions& options) -{ - return Impl_->ListJobs(options); -} - -//////////////////////////////////////////////////////////////////////////////// - -struct TAsyncPrepareAndStartOperationArgs -{ - std::function<void()> PrepareAndStart; -}; - -void* SyncPrepareAndStartOperation(void* pArgs) -{ - THolder<TAsyncPrepareAndStartOperationArgs> args(static_cast<TAsyncPrepareAndStartOperationArgs*>(pArgs)); - args->PrepareAndStart(); - return nullptr; -} - -::TIntrusivePtr<TOperation> ProcessOperation( - NYT::NDetail::TClientPtr client, - std::function<void()> prepare, - ::TIntrusivePtr<TOperation> operation, - const TOperationOptions& options) -{ - auto prepareAndStart = [prepare = std::move(prepare), operation, mode = options.StartOperationMode_] () { - try { - prepare(); - operation->OnPrepared(); - } catch (...) { - operation->OnPreparationException(std::current_exception()); - } - if (mode >= TOperationOptions::EStartOperationMode::AsyncStart) { - try { - operation->Start(); - } catch (...) { } - } - }; - if (options.StartOperationMode_ >= TOperationOptions::EStartOperationMode::SyncStart) { - prepareAndStart(); - WaitIfRequired(operation, client, options); - } else { - auto args = new TAsyncPrepareAndStartOperationArgs; - args->PrepareAndStart = std::move(prepareAndStart); - - TThread thread(TThread::TParams(SyncPrepareAndStartOperation, args).SetName("prepare and start operation")); - thread.Start(); - thread.Detach(); - } - return operation; -} - -void WaitIfRequired(const TOperationPtr& operation, const TClientPtr& client, const TOperationOptions& options) -{ - auto retryPolicy = client->GetRetryPolicy(); - auto context = client->GetContext(); - if (options.StartOperationMode_ >= TOperationOptions::EStartOperationMode::SyncStart) { - operation->GetStartedFuture().GetValueSync(); - } - if (options.StartOperationMode_ == TOperationOptions::EStartOperationMode::SyncWait) { - auto finishedFuture = operation->Watch(); - TWaitProxy::Get()->WaitFuture(finishedFuture); - finishedFuture.GetValue(); - if (context.Config->WriteStderrSuccessfulJobs) { - auto stderrs = GetJobsStderr(retryPolicy, context, operation->GetId()); - for (const auto& jobStderr : stderrs) { - if (!jobStderr.empty()) { - Cerr << jobStderr << '\n'; - } - } - } - } -} - -//////////////////////////////////////////////////////////////////////////////// - -void ResetUseClientProtobuf(const char* methodName) -{ - Cerr << "WARNING! OPTION `TConfig::UseClientProtobuf' IS RESET TO `true'; " - << "IT CAN DETERIORATE YOUR CODE PERFORMANCE!!! DON'T USE DEPRECATED METHOD `" - << "TOperationIOSpec::" << methodName << "' TO AVOID THIS RESET" << Endl; - // Give users some time to contemplate about usage of deprecated functions. - Cerr << "Sleeping for 5 seconds..." << Endl; - Sleep(TDuration::Seconds(5)); - TConfig::Get()->UseClientProtobuf = true; -} - -} // namespace NDetail - -//////////////////////////////////////////////////////////////////////////////// - -::TIntrusivePtr<INodeReaderImpl> CreateJobNodeReader(TRawTableReaderPtr rawTableReader) -{ - if (auto schema = NDetail::GetJobInputSkiffSchema()) { - return new NDetail::TSkiffTableReader(rawTableReader, schema); - } else { - return new TNodeTableReader(rawTableReader); - } -} - -::TIntrusivePtr<IYaMRReaderImpl> CreateJobYaMRReader(TRawTableReaderPtr rawTableReader) -{ - return new TYaMRTableReader(rawTableReader); -} - -::TIntrusivePtr<IProtoReaderImpl> CreateJobProtoReader(TRawTableReaderPtr rawTableReader) -{ - if (TConfig::Get()->UseClientProtobuf) { - return new TProtoTableReader( - rawTableReader, - GetJobInputDescriptors()); - } else { - return new TLenvalProtoTableReader( - rawTableReader, - GetJobInputDescriptors()); - } -} - -::TIntrusivePtr<INodeWriterImpl> CreateJobNodeWriter(THolder<IProxyOutput> rawJobWriter) -{ - return new TNodeTableWriter(std::move(rawJobWriter)); -} - -::TIntrusivePtr<IYaMRWriterImpl> CreateJobYaMRWriter(THolder<IProxyOutput> rawJobWriter) -{ - return new TYaMRTableWriter(std::move(rawJobWriter)); -} - -::TIntrusivePtr<IProtoWriterImpl> CreateJobProtoWriter(THolder<IProxyOutput> rawJobWriter) -{ - if (TConfig::Get()->UseClientProtobuf) { - return new TProtoTableWriter( - std::move(rawJobWriter), - GetJobOutputDescriptors()); - } else { - return new TLenvalProtoTableWriter( - std::move(rawJobWriter), - GetJobOutputDescriptors()); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/operation.h b/yt/cpp/mapreduce/client/operation.h deleted file mode 100644 index 141161b0b7..0000000000 --- a/yt/cpp/mapreduce/client/operation.h +++ /dev/null @@ -1,203 +0,0 @@ -#pragma once - -#include "fwd.h" -#include "structured_table_formats.h" -#include "operation_preparer.h" - -#include <yt/cpp/mapreduce/http/fwd.h> - -#include <yt/cpp/mapreduce/interface/client.h> -#include <yt/cpp/mapreduce/interface/operation.h> -#include <yt/cpp/mapreduce/interface/retry_policy.h> - -#include <util/generic/ptr.h> -#include <util/generic/vector.h> - -namespace NYT::NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -class TOperation - : public IOperation -{ -public: - class TOperationImpl; - -public: - explicit TOperation(TClientPtr client); - TOperation(TOperationId id, TClientPtr client); - virtual const TOperationId& GetId() const override; - virtual TString GetWebInterfaceUrl() const override; - - void OnPrepared(); - void SetDelayedStartFunction(std::function<TOperationId()> start); - virtual void Start() override; - void OnPreparationException(std::exception_ptr e); - virtual bool IsStarted() const override; - - virtual TString GetStatus() const override; - void OnStatusUpdated(const TString& newStatus); - - virtual ::NThreading::TFuture<void> GetPreparedFuture() override; - virtual ::NThreading::TFuture<void> GetStartedFuture() override; - virtual ::NThreading::TFuture<void> Watch() override; - - virtual TVector<TFailedJobInfo> GetFailedJobInfo(const TGetFailedJobInfoOptions& options = TGetFailedJobInfoOptions()) override; - virtual EOperationBriefState GetBriefState() override; - virtual TMaybe<TYtError> GetError() override; - virtual TJobStatistics GetJobStatistics() override; - virtual TMaybe<TOperationBriefProgress> GetBriefProgress() override; - virtual void AbortOperation() override; - virtual void CompleteOperation() override; - virtual void SuspendOperation(const TSuspendOperationOptions& options) override; - virtual void ResumeOperation(const TResumeOperationOptions& options) override; - virtual TOperationAttributes GetAttributes(const TGetOperationOptions& options) override; - virtual void UpdateParameters(const TUpdateOperationParametersOptions& options) override; - virtual TJobAttributes GetJob(const TJobId& jobId, const TGetJobOptions& options) override; - virtual TListJobsResult ListJobs(const TListJobsOptions& options) override; - -private: - TClientPtr Client_; - ::TIntrusivePtr<TOperationImpl> Impl_; -}; - -using TOperationPtr = ::TIntrusivePtr<TOperation>; - -//////////////////////////////////////////////////////////////////////////////// - -struct TSimpleOperationIo -{ - TVector<TRichYPath> Inputs; - TVector<TRichYPath> Outputs; - - TFormat InputFormat; - TFormat OutputFormat; - - TVector<TSmallJobFile> JobFiles; -}; - -TSimpleOperationIo CreateSimpleOperationIoHelper( - const IStructuredJob& structuredJob, - const TOperationPreparer& preparer, - const TOperationOptions& options, - TStructuredJobTableList structuredInputs, - TStructuredJobTableList structuredOutputs, - TUserJobFormatHints hints, - ENodeReaderFormat nodeReaderFormat, - const THashSet<TString>& columnsUsedInOperations); - -//////////////////////////////////////////////////////////////////////////////// - -void ExecuteMap( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TMapOperationSpec& spec, - const ::TIntrusivePtr<IStructuredJob>& mapper, - const TOperationOptions& options); - -void ExecuteRawMap( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TRawMapOperationSpec& spec, - const ::TIntrusivePtr<IRawJob>& mapper, - const TOperationOptions& options); - -void ExecuteReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TReduceOperationSpec& spec, - const ::TIntrusivePtr<IStructuredJob>& reducer, - const TOperationOptions& options); - -void ExecuteRawReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TRawReduceOperationSpec& spec, - const ::TIntrusivePtr<IRawJob>& reducer, - const TOperationOptions& options); - -void ExecuteJoinReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TJoinReduceOperationSpec& spec, - const ::TIntrusivePtr<IStructuredJob>& reducer, - const TOperationOptions& options); - -void ExecuteRawJoinReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TRawJoinReduceOperationSpec& spec, - const ::TIntrusivePtr<IRawJob>& reducer, - const TOperationOptions& options); - -void ExecuteMapReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TMapReduceOperationSpec& spec, - const ::TIntrusivePtr<IStructuredJob>& mapper, - const ::TIntrusivePtr<IStructuredJob>& reduceCombiner, - const ::TIntrusivePtr<IStructuredJob>& reducer, - const TOperationOptions& options); - -void ExecuteRawMapReduce( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TRawMapReduceOperationSpec& spec, - const ::TIntrusivePtr<IRawJob>& mapper, - const ::TIntrusivePtr<IRawJob>& reduceCombiner, - const ::TIntrusivePtr<IRawJob>& reducer, - const TOperationOptions& options); - -void ExecuteSort( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TSortOperationSpec& spec, - const TOperationOptions& options); - -void ExecuteMerge( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TMergeOperationSpec& spec, - const TOperationOptions& options); - -void ExecuteErase( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TEraseOperationSpec& spec, - const TOperationOptions& options); - -void ExecuteRemoteCopy( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TRemoteCopyOperationSpec& spec, - const TOperationOptions& options); - -void ExecuteVanilla( - const TOperationPtr& operation, - const TOperationPreparerPtr& preparer, - const TVanillaOperationSpec& spec, - const TOperationOptions& options); - -EOperationBriefState CheckOperation( - const IClientRetryPolicyPtr& clientRetryPolicy, - const TClientContext& context, - const TOperationId& operationId); - -void WaitForOperation( - const IClientRetryPolicyPtr& clientRetryPolicy, - const TClientContext& context, - const TOperationId& operationId); - -//////////////////////////////////////////////////////////////////////////////// - -::TIntrusivePtr<TOperation> ProcessOperation( - NYT::NDetail::TClientPtr client, - std::function<void()> prepare, - ::TIntrusivePtr<TOperation> operation, - const TOperationOptions& options); - -void WaitIfRequired(const TOperationPtr& operation, const TClientPtr& client, const TOperationOptions& options); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail diff --git a/yt/cpp/mapreduce/client/operation_helpers.cpp b/yt/cpp/mapreduce/client/operation_helpers.cpp deleted file mode 100644 index abb2185662..0000000000 --- a/yt/cpp/mapreduce/client/operation_helpers.cpp +++ /dev/null @@ -1,91 +0,0 @@ -#include "operation_helpers.h" - -#include <yt/cpp/mapreduce/common/retry_lib.h> - -#include <yt/cpp/mapreduce/interface/config.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> - -#include <yt/cpp/mapreduce/http/context.h> -#include <yt/cpp/mapreduce/http/requests.h> - -#include <util/string/builder.h> - -#include <util/system/mutex.h> -#include <util/system/rwlock.h> - -namespace NYT::NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -ui64 RoundUpFileSize(ui64 size) -{ - constexpr ui64 roundUpTo = 4ull << 10; - return (size + roundUpTo - 1) & ~(roundUpTo - 1); -} - -bool UseLocalModeOptimization(const TClientContext& context, const IClientRetryPolicyPtr& clientRetryPolicy) -{ - if (!context.Config->EnableLocalModeOptimization) { - return false; - } - - static THashMap<TString, bool> localModeMap; - static TRWMutex mutex; - - { - TReadGuard guard(mutex); - auto it = localModeMap.find(context.ServerName); - if (it != localModeMap.end()) { - return it->second; - } - } - - bool isLocalMode = false; - TString localModeAttr("//sys/@local_mode_fqdn"); - // We don't want to pollute logs with errors about failed request, - // so we check if path exists before getting it. - if (NRawClient::Exists(clientRetryPolicy->CreatePolicyForGenericRequest(), - context, - TTransactionId(), - localModeAttr, - TExistsOptions().ReadFrom(EMasterReadKind::Cache))) - { - auto fqdnNode = NRawClient::TryGet( - clientRetryPolicy->CreatePolicyForGenericRequest(), - context, - TTransactionId(), - localModeAttr, - TGetOptions().ReadFrom(EMasterReadKind::Cache)); - if (!fqdnNode.IsUndefined()) { - auto fqdn = fqdnNode.AsString(); - isLocalMode = (fqdn == TProcessState::Get()->FqdnHostName); - YT_LOG_DEBUG("Checking local mode; LocalModeFqdn: %v FqdnHostName: %v IsLocalMode: %v", - fqdn, - TProcessState::Get()->FqdnHostName, - isLocalMode ? "true" : "false"); - } - } - - { - TWriteGuard guard(mutex); - localModeMap[context.ServerName] = isLocalMode; - } - - return isLocalMode; -} - -TString GetOperationWebInterfaceUrl(TStringBuf serverName, TOperationId operationId) -{ - serverName.ChopSuffix(":80"); - serverName.ChopSuffix(".yt.yandex-team.ru"); - serverName.ChopSuffix(".yt.yandex.net"); - return ::TStringBuilder() << "https://yt.yandex-team.ru/" << serverName << - "/operations/" << GetGuidAsString(operationId); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail diff --git a/yt/cpp/mapreduce/client/operation_helpers.h b/yt/cpp/mapreduce/client/operation_helpers.h deleted file mode 100644 index 7fd2ffb0de..0000000000 --- a/yt/cpp/mapreduce/client/operation_helpers.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/common/fwd.h> -#include <yt/cpp/mapreduce/interface/fwd.h> - -#include <yt/cpp/mapreduce/http/fwd.h> - -namespace NYT::NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -ui64 RoundUpFileSize(ui64 size); - -bool UseLocalModeOptimization(const TClientContext& context, const IClientRetryPolicyPtr& clientRetryPolicy); - -TString GetOperationWebInterfaceUrl(TStringBuf serverName, TOperationId operationId); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail diff --git a/yt/cpp/mapreduce/client/operation_preparer.cpp b/yt/cpp/mapreduce/client/operation_preparer.cpp deleted file mode 100644 index e06fac4061..0000000000 --- a/yt/cpp/mapreduce/client/operation_preparer.cpp +++ /dev/null @@ -1,881 +0,0 @@ -#include "operation_preparer.h" - -#include "init.h" -#include "file_writer.h" -#include "operation.h" -#include "operation_helpers.h" -#include "operation_tracker.h" -#include "transaction.h" -#include "transaction_pinger.h" -#include "yt_poller.h" - -#include <yt/cpp/mapreduce/common/helpers.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> -#include <yt/cpp/mapreduce/common/wait_proxy.h> - -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> -#include <yt/cpp/mapreduce/raw_client/raw_batch_request.h> - -#include <yt/cpp/mapreduce/interface/error_codes.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <library/cpp/digest/md5/md5.h> - -#include <util/folder/path.h> - -#include <util/string/builder.h> - -#include <util/system/execpath.h> - -namespace NYT::NDetail { - -using namespace NRawClient; - -//////////////////////////////////////////////////////////////////////////////// - -class TWaitOperationStartPollerItem - : public IYtPollerItem -{ -public: - TWaitOperationStartPollerItem(TOperationId operationId, THolder<TPingableTransaction> transaction) - : OperationId_(operationId) - , Transaction_(std::move(transaction)) - { } - - void PrepareRequest(TRawBatchRequest* batchRequest) override - { - Future_ = batchRequest->GetOperation( - OperationId_, - TGetOperationOptions().AttributeFilter( - TOperationAttributeFilter().Add(EOperationAttribute::State))); - } - - EStatus OnRequestExecuted() override - { - try { - auto attributes = Future_.GetValue(); - Y_ENSURE(attributes.State.Defined()); - bool operationHasLockedFiles = - *attributes.State != "starting" && - *attributes.State != "pending" && - *attributes.State != "orphaned" && - *attributes.State != "waiting_for_agent" && - *attributes.State != "initializing"; - return operationHasLockedFiles ? EStatus::PollBreak : EStatus::PollContinue; - } catch (const TErrorResponse& e) { - YT_LOG_ERROR("get_operation request failed: %v (RequestId: %v)", - e.GetError().GetMessage(), - e.GetRequestId()); - return IsRetriable(e) ? PollContinue : PollBreak; - } catch (const std::exception& e) { - YT_LOG_ERROR("%v", e.what()); - return PollBreak; - } - } - - void OnItemDiscarded() override { - } - -private: - TOperationId OperationId_; - THolder<TPingableTransaction> Transaction_; - ::NThreading::TFuture<TOperationAttributes> Future_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TOperationForwardingRequestRetryPolicy - : public IRequestRetryPolicy -{ -public: - TOperationForwardingRequestRetryPolicy(const IRequestRetryPolicyPtr& underlying, const TOperationPtr& operation) - : Underlying_(underlying) - , Operation_(operation) - { } - - void NotifyNewAttempt() override - { - Underlying_->NotifyNewAttempt(); - } - - TMaybe<TDuration> OnGenericError(const std::exception& e) override - { - UpdateOperationStatus(e.what()); - return Underlying_->OnGenericError(e); - } - - TMaybe<TDuration> OnRetriableError(const TErrorResponse& e) override - { - auto msg = e.GetError().ShortDescription(); - UpdateOperationStatus(msg); - return Underlying_->OnRetriableError(e); - } - - void OnIgnoredError(const TErrorResponse& e) override - { - Underlying_->OnIgnoredError(e); - } - - TString GetAttemptDescription() const override - { - return Underlying_->GetAttemptDescription(); - } - -private: - void UpdateOperationStatus(TStringBuf err) - { - Y_VERIFY(Operation_); - Operation_->OnStatusUpdated( - ::TStringBuilder() << "Retriable error during operation start: " << err); - } - -private: - IRequestRetryPolicyPtr Underlying_; - TOperationPtr Operation_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -TOperationPreparer::TOperationPreparer(TClientPtr client, TTransactionId transactionId) - : Client_(std::move(client)) - , TransactionId_(transactionId) - , FileTransaction_(MakeHolder<TPingableTransaction>( - Client_->GetRetryPolicy(), - Client_->GetContext(), - TransactionId_, - Client_->GetTransactionPinger()->GetChildTxPinger(), - TStartTransactionOptions())) - , ClientRetryPolicy_(Client_->GetRetryPolicy()) - , PreparationId_(CreateGuidAsString()) -{ } - -const TClientContext& TOperationPreparer::GetContext() const -{ - return Client_->GetContext(); -} - -TTransactionId TOperationPreparer::GetTransactionId() const -{ - return TransactionId_; -} - -TClientPtr TOperationPreparer::GetClient() const -{ - return Client_; -} - -const TString& TOperationPreparer::GetPreparationId() const -{ - return PreparationId_; -} - -const IClientRetryPolicyPtr& TOperationPreparer::GetClientRetryPolicy() const -{ - return ClientRetryPolicy_; -} - -TOperationId TOperationPreparer::StartOperation( - TOperation* operation, - const TString& operationType, - const TNode& spec, - bool useStartOperationRequest) -{ - CheckValidity(); - - THttpHeader header("POST", (useStartOperationRequest ? "start_op" : operationType)); - if (useStartOperationRequest) { - header.AddParameter("operation_type", operationType); - } - header.AddTransactionId(TransactionId_); - header.AddMutationId(); - - auto ysonSpec = NodeToYsonString(spec); - auto responseInfo = RetryRequestWithPolicy( - ::MakeIntrusive<TOperationForwardingRequestRetryPolicy>( - ClientRetryPolicy_->CreatePolicyForStartOperationRequest(), - TOperationPtr(operation)), - GetContext(), - header, - ysonSpec); - TOperationId operationId = ParseGuidFromResponse(responseInfo.Response); - YT_LOG_DEBUG("Operation started (OperationId: %v; PreparationId: %v)", - operationId, - GetPreparationId()); - - YT_LOG_INFO("Operation %v started (%v): %v", - operationId, - operationType, - GetOperationWebInterfaceUrl(GetContext().ServerName, operationId)); - - TOperationExecutionTimeTracker::Get()->Start(operationId); - - Client_->GetYtPoller().Watch( - new TWaitOperationStartPollerItem(operationId, std::move(FileTransaction_))); - - return operationId; -} - -void TOperationPreparer::LockFiles(TVector<TRichYPath>* paths) -{ - CheckValidity(); - - TVector<::NThreading::TFuture<TLockId>> lockIdFutures; - lockIdFutures.reserve(paths->size()); - TRawBatchRequest lockRequest(GetContext().Config); - for (const auto& path : *paths) { - lockIdFutures.push_back(lockRequest.Lock( - FileTransaction_->GetId(), - path.Path_, - ELockMode::LM_SNAPSHOT, - TLockOptions().Waitable(true))); - } - ExecuteBatch(ClientRetryPolicy_->CreatePolicyForGenericRequest(), GetContext(), lockRequest); - - TVector<::NThreading::TFuture<TNode>> nodeIdFutures; - nodeIdFutures.reserve(paths->size()); - TRawBatchRequest getNodeIdRequest(GetContext().Config); - for (const auto& lockIdFuture : lockIdFutures) { - nodeIdFutures.push_back(getNodeIdRequest.Get( - FileTransaction_->GetId(), - ::TStringBuilder() << '#' << GetGuidAsString(lockIdFuture.GetValue()) << "/@node_id", - TGetOptions())); - } - ExecuteBatch(ClientRetryPolicy_->CreatePolicyForGenericRequest(), GetContext(), getNodeIdRequest); - - for (size_t i = 0; i != paths->size(); ++i) { - auto& richPath = (*paths)[i]; - richPath.OriginalPath(richPath.Path_); - richPath.Path("#" + nodeIdFutures[i].GetValue().AsString()); - YT_LOG_DEBUG("Locked file %v, new path is %v", - *richPath.OriginalPath_, - richPath.Path_); - } -} - -void TOperationPreparer::CheckValidity() const -{ - Y_ENSURE( - FileTransaction_, - "File transaction is already moved, are you trying to use preparer for more than one operation?"); -} - -//////////////////////////////////////////////////////////////////////////////// - -class TRetryPolicyIgnoringLockConflicts - : public TAttemptLimitedRetryPolicy -{ -public: - using TAttemptLimitedRetryPolicy::TAttemptLimitedRetryPolicy; - using TAttemptLimitedRetryPolicy::OnGenericError; - - TMaybe<TDuration> OnRetriableError(const TErrorResponse& e) override - { - if (IsAttemptLimitExceeded()) { - return Nothing(); - } - if (e.IsConcurrentTransactionLockConflict()) { - return GetBackoffDuration(Config_); - } - return TAttemptLimitedRetryPolicy::OnRetriableError(e); - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TFileToUpload - : public IItemToUpload -{ -public: - TFileToUpload(TString fileName, TMaybe<TString> md5) - : FileName_(std::move(fileName)) - , MD5_(std::move(md5)) - { } - - TString CalculateMD5() const override - { - if (MD5_) { - return *MD5_; - } - constexpr size_t md5Size = 32; - TString result; - result.ReserveAndResize(md5Size); - MD5::File(FileName_.data(), result.Detach()); - MD5_ = result; - return result; - } - - THolder<IInputStream> CreateInputStream() const override - { - return MakeHolder<TFileInput>(FileName_); - } - - TString GetDescription() const override - { - return FileName_; - } - - ui64 GetDataSize() const override - { - return GetFileLength(FileName_); - } - -private: - TString FileName_; - mutable TMaybe<TString> MD5_; -}; - -class TDataToUpload - : public IItemToUpload -{ -public: - TDataToUpload(TString data, TString description) - : Data_(std::move(data)) - , Description_(std::move(description)) - { } - - TString CalculateMD5() const override - { - constexpr size_t md5Size = 32; - TString result; - result.ReserveAndResize(md5Size); - MD5::Data(reinterpret_cast<const unsigned char*>(Data_.data()), Data_.size(), result.Detach()); - return result; - } - - THolder<IInputStream> CreateInputStream() const override - { - return MakeHolder<TMemoryInput>(Data_.data(), Data_.size()); - } - - TString GetDescription() const override - { - return Description_; - } - - ui64 GetDataSize() const override - { - return Data_.size(); - } - -private: - TString Data_; - TString Description_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -static const TString& GetPersistentExecPathMd5() -{ - static TString md5 = MD5::File(GetPersistentExecPath()); - return md5; -} - -static TMaybe<TSmallJobFile> GetJobState(const IJob& job) -{ - TString result; - { - TStringOutput output(result); - job.Save(output); - output.Finish(); - } - if (result.empty()) { - return Nothing(); - } else { - return TSmallJobFile{"jobstate", result}; - } -} - -//////////////////////////////////////////////////////////////////////////////// - -TJobPreparer::TJobPreparer( - TOperationPreparer& operationPreparer, - const TUserJobSpec& spec, - const IJob& job, - size_t outputTableCount, - const TVector<TSmallJobFile>& smallFileList, - const TOperationOptions& options) - : OperationPreparer_(operationPreparer) - , Spec_(spec) - , Options_(options) -{ - - CreateStorage(); - auto cypressFileList = CanonizeYPaths(/* retryPolicy */ nullptr, OperationPreparer_.GetContext(), spec.Files_); - - for (const auto& file : cypressFileList) { - UseFileInCypress(file); - } - for (const auto& localFile : spec.GetLocalFiles()) { - UploadLocalFile(std::get<0>(localFile), std::get<1>(localFile)); - } - auto jobStateSmallFile = GetJobState(job); - if (jobStateSmallFile) { - UploadSmallFile(*jobStateSmallFile); - } - for (const auto& smallFile : smallFileList) { - UploadSmallFile(smallFile); - } - - if (auto commandJob = dynamic_cast<const ICommandJob*>(&job)) { - ClassName_ = TJobFactory::Get()->GetJobName(&job); - Command_ = commandJob->GetCommand(); - } else { - PrepareJobBinary(job, outputTableCount, jobStateSmallFile.Defined()); - } - - operationPreparer.LockFiles(&CachedFiles_); -} - -TVector<TRichYPath> TJobPreparer::GetFiles() const -{ - TVector<TRichYPath> allFiles = CypressFiles_; - allFiles.insert(allFiles.end(), CachedFiles_.begin(), CachedFiles_.end()); - return allFiles; -} - -const TString& TJobPreparer::GetClassName() const -{ - return ClassName_; -} - -const TString& TJobPreparer::GetCommand() const -{ - return Command_; -} - -const TUserJobSpec& TJobPreparer::GetSpec() const -{ - return Spec_; -} - -bool TJobPreparer::ShouldMountSandbox() const -{ - return OperationPreparer_.GetContext().Config->MountSandboxInTmpfs || Options_.MountSandboxInTmpfs_; -} - -ui64 TJobPreparer::GetTotalFileSize() const -{ - return TotalFileSize_; -} - -TString TJobPreparer::GetFileStorage() const -{ - return Options_.FileStorage_ ? - *Options_.FileStorage_ : - OperationPreparer_.GetContext().Config->RemoteTempFilesDirectory; -} - -TYPath TJobPreparer::GetCachePath() const -{ - return AddPathPrefix( - ::TStringBuilder() << GetFileStorage() << "/new_cache", - OperationPreparer_.GetContext().Config->Prefix); -} - -void TJobPreparer::CreateStorage() const -{ - Create( - OperationPreparer_.GetClientRetryPolicy()->CreatePolicyForGenericRequest(), - OperationPreparer_.GetContext(), - Options_.FileStorageTransactionId_, - GetCachePath(), - NT_MAP, - TCreateOptions() - .IgnoreExisting(true) - .Recursive(true)); -} - -int TJobPreparer::GetFileCacheReplicationFactor() const -{ - if (IsLocalMode()) { - return 1; - } else { - return OperationPreparer_.GetContext().Config->FileCacheReplicationFactor; - } -} - -void TJobPreparer::CreateFileInCypress(const TString& path) const -{ - auto attributes = TNode()("replication_factor", GetFileCacheReplicationFactor()); - if (Options_.FileExpirationTimeout_) { - attributes["expiration_timeout"] = Options_.FileExpirationTimeout_->MilliSeconds(); - } - - Create( - OperationPreparer_.GetClientRetryPolicy()->CreatePolicyForGenericRequest(), - OperationPreparer_.GetContext(), - Options_.FileStorageTransactionId_, - path, - NT_FILE, - TCreateOptions() - .IgnoreExisting(true) - .Recursive(true) - .Attributes(attributes) - ); -} - -TString TJobPreparer::PutFileToCypressCache( - const TString& path, - const TString& md5Signature, - TTransactionId transactionId) const -{ - constexpr ui32 LockConflictRetryCount = 30; - auto retryPolicy = MakeIntrusive<TRetryPolicyIgnoringLockConflicts>( - LockConflictRetryCount, - OperationPreparer_.GetContext().Config); - - auto putFileToCacheOptions = TPutFileToCacheOptions(); - if (Options_.FileExpirationTimeout_) { - putFileToCacheOptions.PreserveExpirationTimeout(true); - } - - auto cachePath = PutFileToCache( - retryPolicy, - OperationPreparer_.GetContext(), - transactionId, - path, - md5Signature, - GetCachePath(), - putFileToCacheOptions); - - Remove( - OperationPreparer_.GetClientRetryPolicy()->CreatePolicyForGenericRequest(), - OperationPreparer_.GetContext(), - transactionId, - path, - TRemoveOptions().Force(true)); - - return cachePath; -} - -TMaybe<TString> TJobPreparer::GetItemFromCypressCache(const TString& md5Signature, const TString& fileName) const -{ - constexpr ui32 LockConflictRetryCount = 30; - auto retryPolicy = MakeIntrusive<TRetryPolicyIgnoringLockConflicts>( - LockConflictRetryCount, - OperationPreparer_.GetContext().Config); - auto maybePath = GetFileFromCache( - retryPolicy, - OperationPreparer_.GetContext(), - TTransactionId(), - md5Signature, - GetCachePath(), - TGetFileFromCacheOptions()); - if (maybePath) { - YT_LOG_DEBUG("File is already in cache (FileName: %v)", - fileName, - *maybePath); - } - return maybePath; -} - -TDuration TJobPreparer::GetWaitForUploadTimeout(const IItemToUpload& itemToUpload) const -{ - const TDuration extraTime = OperationPreparer_.GetContext().Config->WaitLockPollInterval + - TDuration::MilliSeconds(100); - const double dataSizeGb = static_cast<double>(itemToUpload.GetDataSize()) / 1_GB; - return extraTime + dataSizeGb * OperationPreparer_.GetContext().Config->CacheLockTimeoutPerGb; -} - -TString TJobPreparer::UploadToRandomPath(const IItemToUpload& itemToUpload) const -{ - TString uniquePath = AddPathPrefix( - ::TStringBuilder() << GetFileStorage() << "/cpp_" << CreateGuidAsString(), - OperationPreparer_.GetContext().Config->Prefix); - YT_LOG_INFO("Uploading file to random cypress path (FileName: %v; CypressPath: %v; PreparationId: %v)", - itemToUpload.GetDescription(), - uniquePath, - OperationPreparer_.GetPreparationId()); - - CreateFileInCypress(uniquePath); - - { - TFileWriter writer( - uniquePath, - OperationPreparer_.GetClientRetryPolicy(), - OperationPreparer_.GetClient()->GetTransactionPinger(), - OperationPreparer_.GetContext(), - Options_.FileStorageTransactionId_, - TFileWriterOptions().ComputeMD5(true)); - itemToUpload.CreateInputStream()->ReadAll(writer); - writer.Finish(); - } - return uniquePath; -} - -TMaybe<TString> TJobPreparer::TryUploadWithDeduplication(const IItemToUpload& itemToUpload) const -{ - const auto md5Signature = itemToUpload.CalculateMD5(); - - auto fileName = ::TStringBuilder() << GetFileStorage() << "/cpp_md5_" << md5Signature; - if (OperationPreparer_.GetContext().Config->CacheUploadDeduplicationMode == EUploadDeduplicationMode::Host) { - fileName << "_" << MD5::Data(TProcessState::Get()->FqdnHostName); - } - TString cypressPath = AddPathPrefix(fileName, OperationPreparer_.GetContext().Config->Prefix); - - CreateFileInCypress(cypressPath); - - auto uploadTx = MakeIntrusive<TTransaction>( - OperationPreparer_.GetClient(), - OperationPreparer_.GetContext(), - TTransactionId(), - TStartTransactionOptions()); - - ILockPtr lock; - try { - lock = uploadTx->Lock(cypressPath, ELockMode::LM_EXCLUSIVE, TLockOptions().Waitable(true)); - } catch (const TErrorResponse& e) { - if (e.IsResolveError()) { - // If the node doesn't exist, it must be removed by concurrent uploading process. - // Let's try to find it in the cache. - return GetItemFromCypressCache(md5Signature, itemToUpload.GetDescription()); - } - throw; - } - - auto waitTimeout = GetWaitForUploadTimeout(itemToUpload); - YT_LOG_DEBUG("Waiting for the lock on file (FileName: %v; CypressPath: %v; LockTimeout: %v)", - itemToUpload.GetDescription(), - cypressPath, - waitTimeout); - - if (!TWaitProxy::Get()->WaitFuture(lock->GetAcquiredFuture(), waitTimeout)) { - YT_LOG_DEBUG("Waiting for the lock timed out. Fallback to random path uploading (FileName: %v; CypressPath: %v)", - itemToUpload.GetDescription(), - cypressPath); - return Nothing(); - } - - YT_LOG_DEBUG("Exclusive lock successfully acquired (FileName: %v; CypressPath: %v)", - itemToUpload.GetDescription(), - cypressPath); - - // Ensure that this process is the first to take a lock. - if (auto cachedItemPath = GetItemFromCypressCache(md5Signature, itemToUpload.GetDescription())) { - return *cachedItemPath; - } - - YT_LOG_INFO("Uploading file to cypress (FileName: %v; CypressPath: %v; PreparationId: %v)", - itemToUpload.GetDescription(), - cypressPath, - OperationPreparer_.GetPreparationId()); - - { - auto writer = uploadTx->CreateFileWriter(cypressPath, TFileWriterOptions().ComputeMD5(true)); - YT_VERIFY(writer); - itemToUpload.CreateInputStream()->ReadAll(*writer); - writer->Finish(); - } - - auto path = PutFileToCypressCache(cypressPath, md5Signature, uploadTx->GetId()); - - uploadTx->Commit(); - return path; -} - -TString TJobPreparer::UploadToCacheUsingApi(const IItemToUpload& itemToUpload) const -{ - auto md5Signature = itemToUpload.CalculateMD5(); - Y_VERIFY(md5Signature.size() == 32); - - if (auto cachedItemPath = GetItemFromCypressCache(md5Signature, itemToUpload.GetDescription())) { - return *cachedItemPath; - } - - YT_LOG_INFO("File not found in cache; uploading to cypress (FileName: %v; PreparationId: %v)", - itemToUpload.GetDescription(), - OperationPreparer_.GetPreparationId()); - - if (OperationPreparer_.GetContext().Config->CacheUploadDeduplicationMode != EUploadDeduplicationMode::Disabled) { - if (auto path = TryUploadWithDeduplication(itemToUpload)) { - return *path; - } - } - - auto path = UploadToRandomPath(itemToUpload); - return PutFileToCypressCache(path, md5Signature, Options_.FileStorageTransactionId_); -} - -TString TJobPreparer::UploadToCache(const IItemToUpload& itemToUpload) const -{ - YT_LOG_INFO("Uploading file (FileName: %v; PreparationId: %v)", - itemToUpload.GetDescription(), - OperationPreparer_.GetPreparationId()); - - TString result; - switch (Options_.FileCacheMode_) { - case TOperationOptions::EFileCacheMode::ApiCommandBased: - Y_ENSURE_EX(Options_.FileStorageTransactionId_.IsEmpty(), TApiUsageError() << - "Default cache mode (API command-based) doesn't allow non-default 'FileStorageTransactionId_'"); - result = UploadToCacheUsingApi(itemToUpload); - break; - case TOperationOptions::EFileCacheMode::CachelessRandomPathUpload: - result = UploadToRandomPath(itemToUpload); - break; - default: - Y_FAIL("Unknown file cache mode: %d", static_cast<int>(Options_.FileCacheMode_)); - } - - YT_LOG_INFO("Complete uploading file (FileName: %v; PreparationId: %v)", - itemToUpload.GetDescription(), - OperationPreparer_.GetPreparationId()); - - return result; -} - -void TJobPreparer::UseFileInCypress(const TRichYPath& file) -{ - if (!Exists( - OperationPreparer_.GetClientRetryPolicy()->CreatePolicyForGenericRequest(), - OperationPreparer_.GetContext(), - file.TransactionId_.GetOrElse(OperationPreparer_.GetTransactionId()), - file.Path_)) - { - ythrow yexception() << "File " << file.Path_ << " does not exist"; - } - - if (ShouldMountSandbox()) { - auto size = Get( - OperationPreparer_.GetClientRetryPolicy()->CreatePolicyForGenericRequest(), - OperationPreparer_.GetContext(), - file.TransactionId_.GetOrElse(OperationPreparer_.GetTransactionId()), - file.Path_ + "/@uncompressed_data_size") - .AsInt64(); - - TotalFileSize_ += RoundUpFileSize(static_cast<ui64>(size)); - } - CypressFiles_.push_back(file); -} - -void TJobPreparer::UploadLocalFile( - const TLocalFilePath& localPath, - const TAddLocalFileOptions& options, - bool isApiFile) -{ - TFsPath fsPath(localPath); - fsPath.CheckExists(); - - TFileStat stat; - fsPath.Stat(stat); - - bool isExecutable = stat.Mode & (S_IXUSR | S_IXGRP | S_IXOTH); - auto cachePath = UploadToCache(TFileToUpload(localPath, options.MD5CheckSum_)); - - TRichYPath cypressPath; - if (isApiFile) { - cypressPath = OperationPreparer_.GetContext().Config->ApiFilePathOptions; - } - cypressPath.Path(cachePath).FileName(options.PathInJob_.GetOrElse(fsPath.Basename())); - if (isExecutable) { - cypressPath.Executable(true); - } - if (options.BypassArtifactCache_) { - cypressPath.BypassArtifactCache(*options.BypassArtifactCache_); - } - - if (ShouldMountSandbox()) { - TotalFileSize_ += RoundUpFileSize(stat.Size); - } - - CachedFiles_.push_back(cypressPath); -} - -void TJobPreparer::UploadBinary(const TJobBinaryConfig& jobBinary) -{ - if (std::holds_alternative<TJobBinaryLocalPath>(jobBinary)) { - auto binaryLocalPath = std::get<TJobBinaryLocalPath>(jobBinary); - auto opts = TAddLocalFileOptions().PathInJob("cppbinary"); - if (binaryLocalPath.MD5CheckSum) { - opts.MD5CheckSum(*binaryLocalPath.MD5CheckSum); - } - UploadLocalFile(binaryLocalPath.Path, opts, /* isApiFile */ true); - } else if (std::holds_alternative<TJobBinaryCypressPath>(jobBinary)) { - auto binaryCypressPath = std::get<TJobBinaryCypressPath>(jobBinary); - TRichYPath ytPath = OperationPreparer_.GetContext().Config->ApiFilePathOptions; - ytPath.Path(binaryCypressPath.Path); - if (binaryCypressPath.TransactionId) { - ytPath.TransactionId(*binaryCypressPath.TransactionId); - } - UseFileInCypress(ytPath.FileName("cppbinary").Executable(true)); - } else { - Y_FAIL("%s", (::TStringBuilder() << "Unexpected jobBinary tag: " << jobBinary.index()).data()); - } -} - -void TJobPreparer::UploadSmallFile(const TSmallJobFile& smallFile) -{ - auto cachePath = UploadToCache(TDataToUpload(smallFile.Data, smallFile.FileName + " [generated-file]")); - auto path = OperationPreparer_.GetContext().Config->ApiFilePathOptions; - CachedFiles_.push_back(path.Path(cachePath).FileName(smallFile.FileName)); - if (ShouldMountSandbox()) { - TotalFileSize_ += RoundUpFileSize(smallFile.Data.size()); - } -} - -bool TJobPreparer::IsLocalMode() const -{ - return UseLocalModeOptimization(OperationPreparer_.GetContext(), OperationPreparer_.GetClientRetryPolicy()); -} - -void TJobPreparer::PrepareJobBinary(const IJob& job, int outputTableCount, bool hasState) -{ - auto jobBinary = TJobBinaryConfig(); - if (!std::holds_alternative<TJobBinaryDefault>(Spec_.GetJobBinary())) { - jobBinary = Spec_.GetJobBinary(); - } - TString binaryPathInsideJob; - if (std::holds_alternative<TJobBinaryDefault>(jobBinary)) { - if (GetInitStatus() != EInitStatus::FullInitialization) { - ythrow yexception() << "NYT::Initialize() must be called prior to any operation"; - } - - const bool isLocalMode = IsLocalMode(); - const TMaybe<TString> md5 = !isLocalMode ? MakeMaybe(GetPersistentExecPathMd5()) : Nothing(); - jobBinary = TJobBinaryLocalPath{GetPersistentExecPath(), md5}; - - if (isLocalMode) { - binaryPathInsideJob = GetExecPath(); - } - } else if (std::holds_alternative<TJobBinaryLocalPath>(jobBinary)) { - const bool isLocalMode = IsLocalMode(); - if (isLocalMode) { - binaryPathInsideJob = TFsPath(std::get<TJobBinaryLocalPath>(jobBinary).Path).RealPath(); - } - } - Y_ASSERT(!std::holds_alternative<TJobBinaryDefault>(jobBinary)); - - // binaryPathInsideJob is only set when LocalModeOptimization option is on, so upload is not needed - if (!binaryPathInsideJob) { - binaryPathInsideJob = "./cppbinary"; - UploadBinary(jobBinary); - } - - TString jobCommandPrefix = Options_.JobCommandPrefix_; - if (!Spec_.JobCommandPrefix_.empty()) { - jobCommandPrefix = Spec_.JobCommandPrefix_; - } - - TString jobCommandSuffix = Options_.JobCommandSuffix_; - if (!Spec_.JobCommandSuffix_.empty()) { - jobCommandSuffix = Spec_.JobCommandSuffix_; - } - - ClassName_ = TJobFactory::Get()->GetJobName(&job); - - auto jobArguments = TNode::CreateMap(); - jobArguments["job_name"] = ClassName_; - jobArguments["output_table_count"] = static_cast<i64>(outputTableCount); - jobArguments["has_state"] = hasState; - Spec_.AddEnvironment("YT_JOB_ARGUMENTS", NodeToYsonString(jobArguments)); - - Command_ = ::TStringBuilder() << - jobCommandPrefix << - (OperationPreparer_.GetContext().Config->UseClientProtobuf ? "YT_USE_CLIENT_PROTOBUF=1" : "YT_USE_CLIENT_PROTOBUF=0") << " " << - binaryPathInsideJob << - jobCommandSuffix; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail diff --git a/yt/cpp/mapreduce/client/operation_preparer.h b/yt/cpp/mapreduce/client/operation_preparer.h deleted file mode 100644 index 7ced54e3b5..0000000000 --- a/yt/cpp/mapreduce/client/operation_preparer.h +++ /dev/null @@ -1,129 +0,0 @@ -#pragma once - -#include "client.h" -#include "structured_table_formats.h" - -#include <yt/cpp/mapreduce/interface/operation.h> - -namespace NYT::NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -class TOperation; - -class TOperationPreparer - : public TThrRefBase -{ -public: - TOperationPreparer(TClientPtr client, TTransactionId transactionId); - - const TClientContext& GetContext() const; - TTransactionId GetTransactionId() const; - ITransactionPingerPtr GetTransactionPinger() const; - TClientPtr GetClient() const; - - const TString& GetPreparationId() const; - - void LockFiles(TVector<TRichYPath>* paths); - - TOperationId StartOperation( - TOperation* operation, - const TString& operationType, - const TNode& spec, - bool useStartOperationRequest = false); - - const IClientRetryPolicyPtr& GetClientRetryPolicy() const; - -private: - TClientPtr Client_; - TTransactionId TransactionId_; - THolder<TPingableTransaction> FileTransaction_; - IClientRetryPolicyPtr ClientRetryPolicy_; - const TString PreparationId_; - -private: - void CheckValidity() const; -}; - -using TOperationPreparerPtr = ::TIntrusivePtr<TOperationPreparer>; - -//////////////////////////////////////////////////////////////////////////////// - -struct IItemToUpload -{ - virtual ~IItemToUpload() = default; - - virtual TString CalculateMD5() const = 0; - virtual THolder<IInputStream> CreateInputStream() const = 0; - virtual TString GetDescription() const = 0; - virtual ui64 GetDataSize() const = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TJobPreparer - : private TNonCopyable -{ -public: - TJobPreparer( - TOperationPreparer& operationPreparer, - const TUserJobSpec& spec, - const IJob& job, - size_t outputTableCount, - const TVector<TSmallJobFile>& smallFileList, - const TOperationOptions& options); - - TVector<TRichYPath> GetFiles() const; - const TString& GetClassName() const; - const TString& GetCommand() const; - const TUserJobSpec& GetSpec() const; - bool ShouldMountSandbox() const; - ui64 GetTotalFileSize() const; - -private: - TOperationPreparer& OperationPreparer_; - TUserJobSpec Spec_; - TOperationOptions Options_; - - TVector<TRichYPath> CypressFiles_; - TVector<TRichYPath> CachedFiles_; - - TString ClassName_; - TString Command_; - ui64 TotalFileSize_ = 0; - -private: - TString GetFileStorage() const; - TYPath GetCachePath() const; - - bool IsLocalMode() const; - int GetFileCacheReplicationFactor() const; - - void CreateStorage() const; - - void CreateFileInCypress(const TString& path) const; - TString PutFileToCypressCache(const TString& path, const TString& md5Signature, TTransactionId transactionId) const; - TMaybe<TString> GetItemFromCypressCache(const TString& md5Signature, const TString& fileName) const; - - TDuration GetWaitForUploadTimeout(const IItemToUpload& itemToUpload) const; - TString UploadToRandomPath(const IItemToUpload& itemToUpload) const; - TString UploadToCacheUsingApi(const IItemToUpload& itemToUpload) const; - TMaybe<TString> TryUploadWithDeduplication(const IItemToUpload& itemToUpload) const; - TString UploadToCache(const IItemToUpload& itemToUpload) const; - - void UseFileInCypress(const TRichYPath& file); - - void UploadLocalFile( - const TLocalFilePath& localPath, - const TAddLocalFileOptions& options, - bool isApiFile = false); - - void UploadBinary(const TJobBinaryConfig& jobBinary); - void UploadSmallFile(const TSmallJobFile& smallFile); - - void PrepareJobBinary(const IJob& job, int outputTableCount, bool hasState); -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail diff --git a/yt/cpp/mapreduce/client/operation_tracker.cpp b/yt/cpp/mapreduce/client/operation_tracker.cpp deleted file mode 100644 index 56623e9927..0000000000 --- a/yt/cpp/mapreduce/client/operation_tracker.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "operation_tracker.h" - -#include <yt/cpp/mapreduce/interface/config.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -void TOperationExecutionTimeTracker::Start(const TOperationId& operationId) { - with_lock(Lock_) { - StartTimes_[operationId] = TInstant::Now(); - } -} - -TMaybe<TDuration> TOperationExecutionTimeTracker::Finish(const TOperationId& operationId) { - TDuration duration; - with_lock(Lock_) { - auto i = StartTimes_.find(operationId); - if (i == StartTimes_.end()) { - return Nothing(); - } - duration = TInstant::Now() - i->second; - StartTimes_.erase(i); - } - return duration; -} - -TOperationExecutionTimeTracker* TOperationExecutionTimeTracker::Get() { - return Singleton<TOperationExecutionTimeTracker>(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/operation_tracker.h b/yt/cpp/mapreduce/client/operation_tracker.h deleted file mode 100644 index 9f1504ea91..0000000000 --- a/yt/cpp/mapreduce/client/operation_tracker.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/operation.h> - -#include <util/datetime/base.h> -#include <util/generic/hash.h> -#include <util/generic/maybe.h> -#include <util/system/mutex.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TOperationExecutionTimeTracker { -public: - void Start(const TOperationId& operationId); - TMaybe<TDuration> Finish(const TOperationId& operationId); - static TOperationExecutionTimeTracker* Get(); - -private: - THashMap<TOperationId, TInstant> StartTimes_; - TMutex Lock_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/prepare_operation.cpp b/yt/cpp/mapreduce/client/prepare_operation.cpp deleted file mode 100644 index 7f772dc99a..0000000000 --- a/yt/cpp/mapreduce/client/prepare_operation.cpp +++ /dev/null @@ -1,286 +0,0 @@ -#include "prepare_operation.h" - -#include <yt/cpp/mapreduce/common/retry_lib.h> - -#include <yt/cpp/mapreduce/interface/serialize.h> - -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> -#include <yt/cpp/mapreduce/raw_client/raw_batch_request.h> - -#include <library/cpp/iterator/functools.h> - -namespace NYT::NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -TOperationPreparationContext::TOperationPreparationContext( - const TStructuredJobTableList& structuredInputs, - const TStructuredJobTableList& structuredOutputs, - const TClientContext& context, - const IClientRetryPolicyPtr& retryPolicy, - TTransactionId transactionId) - : Context_(context) - , RetryPolicy_(retryPolicy) - , TransactionId_(transactionId) - , InputSchemas_(structuredInputs.size()) - , InputSchemasLoaded_(structuredInputs.size(), false) -{ - Inputs_.reserve(structuredInputs.size()); - for (const auto& input : structuredInputs) { - Inputs_.push_back(input.RichYPath); - } - Outputs_.reserve(structuredOutputs.size()); - for (const auto& output : structuredOutputs) { - Outputs_.push_back(output.RichYPath); - } -} - -TOperationPreparationContext::TOperationPreparationContext( - TVector<TRichYPath> inputs, - TVector<TRichYPath> outputs, - const TClientContext& context, - const IClientRetryPolicyPtr& retryPolicy, - TTransactionId transactionId) - : Context_(context) - , RetryPolicy_(retryPolicy) - , TransactionId_(transactionId) - , InputSchemas_(inputs.size()) - , InputSchemasLoaded_(inputs.size(), false) -{ - Inputs_.reserve(inputs.size()); - for (auto& input : inputs) { - Inputs_.push_back(std::move(input)); - } - Outputs_.reserve(outputs.size()); - for (const auto& output : outputs) { - Outputs_.push_back(std::move(output)); - } -} - -int TOperationPreparationContext::GetInputCount() const -{ - return static_cast<int>(Inputs_.size()); -} - -int TOperationPreparationContext::GetOutputCount() const -{ - return static_cast<int>(Outputs_.size()); -} - -const TVector<TTableSchema>& TOperationPreparationContext::GetInputSchemas() const -{ - TVector<::NThreading::TFuture<TNode>> schemaFutures; - NRawClient::TRawBatchRequest batch(Context_.Config); - for (int tableIndex = 0; tableIndex < static_cast<int>(InputSchemas_.size()); ++tableIndex) { - if (InputSchemasLoaded_[tableIndex]) { - schemaFutures.emplace_back(); - continue; - } - Y_VERIFY(Inputs_[tableIndex]); - schemaFutures.push_back(batch.Get(TransactionId_, Inputs_[tableIndex]->Path_ + "/@schema", TGetOptions{})); - } - - NRawClient::ExecuteBatch( - RetryPolicy_->CreatePolicyForGenericRequest(), - Context_, - batch); - - for (int tableIndex = 0; tableIndex < static_cast<int>(InputSchemas_.size()); ++tableIndex) { - if (schemaFutures[tableIndex].Initialized()) { - Deserialize(InputSchemas_[tableIndex], schemaFutures[tableIndex].ExtractValueSync()); - } - } - - return InputSchemas_; -} - -const TTableSchema& TOperationPreparationContext::GetInputSchema(int index) const -{ - auto& schema = InputSchemas_[index]; - if (!InputSchemasLoaded_[index]) { - Y_VERIFY(Inputs_[index]); - auto schemaNode = NRawClient::Get( - RetryPolicy_->CreatePolicyForGenericRequest(), - Context_, - TransactionId_, - Inputs_[index]->Path_ + "/@schema"); - Deserialize(schema, schemaNode); - } - return schema; -} - -TMaybe<TYPath> TOperationPreparationContext::GetInputPath(int index) const -{ - Y_VERIFY(index < static_cast<int>(Inputs_.size())); - if (Inputs_[index]) { - return Inputs_[index]->Path_; - } - return Nothing(); -} - -TMaybe<TYPath> TOperationPreparationContext::GetOutputPath(int index) const -{ - Y_VERIFY(index < static_cast<int>(Outputs_.size())); - if (Outputs_[index]) { - return Outputs_[index]->Path_; - } - return Nothing(); -} - -//////////////////////////////////////////////////////////////////////////////// - -TSpeculativeOperationPreparationContext::TSpeculativeOperationPreparationContext( - const TVector<TTableSchema>& previousResult, - TStructuredJobTableList inputs, - TStructuredJobTableList outputs) - : InputSchemas_(previousResult) - , Inputs_(std::move(inputs)) - , Outputs_(std::move(outputs)) -{ - Y_VERIFY(Inputs_.size() == previousResult.size()); -} - -int TSpeculativeOperationPreparationContext::GetInputCount() const -{ - return static_cast<int>(Inputs_.size()); -} - -int TSpeculativeOperationPreparationContext::GetOutputCount() const -{ - return static_cast<int>(Outputs_.size()); -} - -const TVector<TTableSchema>& TSpeculativeOperationPreparationContext::GetInputSchemas() const -{ - return InputSchemas_; -} - -const TTableSchema& TSpeculativeOperationPreparationContext::GetInputSchema(int index) const -{ - Y_VERIFY(index < static_cast<int>(InputSchemas_.size())); - return InputSchemas_[index]; -} - -TMaybe<TYPath> TSpeculativeOperationPreparationContext::GetInputPath(int index) const -{ - Y_VERIFY(index < static_cast<int>(Inputs_.size())); - if (Inputs_[index].RichYPath) { - return Inputs_[index].RichYPath->Path_; - } - return Nothing(); -} - -TMaybe<TYPath> TSpeculativeOperationPreparationContext::GetOutputPath(int index) const -{ - Y_VERIFY(index < static_cast<int>(Outputs_.size())); - if (Outputs_[index].RichYPath) { - return Outputs_[index].RichYPath->Path_; - } - return Nothing(); -} - -//////////////////////////////////////////////////////////////////////////////// - -static void FixInputTable(TRichYPath& table, int index, const TJobOperationPreparer& preparer) -{ - const auto& columnRenamings = preparer.GetInputColumnRenamings(); - const auto& columnFilters = preparer.GetInputColumnFilters(); - - if (!columnRenamings[index].empty()) { - table.RenameColumns(columnRenamings[index]); - } - if (columnFilters[index]) { - table.Columns(*columnFilters[index]); - } -} - -static void FixInputTable(TStructuredJobTable& table, int index, const TJobOperationPreparer& preparer) -{ - const auto& inputDescriptions = preparer.GetInputDescriptions(); - - if (inputDescriptions[index] && std::holds_alternative<TUnspecifiedTableStructure>(table.Description)) { - table.Description = *inputDescriptions[index]; - } - if (table.RichYPath) { - FixInputTable(*table.RichYPath, index, preparer); - } -} - -static void FixOutputTable(TRichYPath& /* table */, int /* index */, const TJobOperationPreparer& /* preparer */) -{ } - -static void FixOutputTable(TStructuredJobTable& table, int index, const TJobOperationPreparer& preparer) -{ - const auto& outputDescriptions = preparer.GetOutputDescriptions(); - - if (outputDescriptions[index] && std::holds_alternative<TUnspecifiedTableStructure>(table.Description)) { - table.Description = *outputDescriptions[index]; - } - if (table.RichYPath) { - FixOutputTable(*table.RichYPath, index, preparer); - } -} - -template <typename TTables> -TVector<TTableSchema> PrepareOperation( - const IJob& job, - const IOperationPreparationContext& context, - TTables* inputsPtr, - TTables* outputsPtr, - TUserJobFormatHints& hints) -{ - TJobOperationPreparer preparer(context); - job.PrepareOperation(context, preparer); - preparer.Finish(); - - if (inputsPtr) { - auto& inputs = *inputsPtr; - for (int i = 0; i < static_cast<int>(inputs.size()); ++i) { - FixInputTable(inputs[i], i, preparer); - } - } - - if (outputsPtr) { - auto& outputs = *outputsPtr; - for (int i = 0; i < static_cast<int>(outputs.size()); ++i) { - FixOutputTable(outputs[i], i, preparer); - } - } - - auto applyPatch = [](TMaybe<TFormatHints>& origin, const TMaybe<TFormatHints>& patch) { - if (origin) { - if (patch) { - origin->Merge(*patch); - } - } else { - origin = patch; - } - }; - - auto preparerHints = preparer.GetFormatHints(); - applyPatch(preparerHints.InputFormatHints_, hints.InputFormatHints_); - applyPatch(preparerHints.OutputFormatHints_, hints.OutputFormatHints_); - hints = std::move(preparerHints); - - return preparer.GetOutputSchemas(); -} - -template -TVector<TTableSchema> PrepareOperation<TStructuredJobTableList>( - const IJob& job, - const IOperationPreparationContext& context, - TStructuredJobTableList* inputsPtr, - TStructuredJobTableList* outputsPtr, - TUserJobFormatHints& hints); - -template -TVector<TTableSchema> PrepareOperation<TVector<TRichYPath>>( - const IJob& job, - const IOperationPreparationContext& context, - TVector<TRichYPath>* inputsPtr, - TVector<TRichYPath>* outputsPtr, - TUserJobFormatHints& hints); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail diff --git a/yt/cpp/mapreduce/client/prepare_operation.h b/yt/cpp/mapreduce/client/prepare_operation.h deleted file mode 100644 index 3b64aa2856..0000000000 --- a/yt/cpp/mapreduce/client/prepare_operation.h +++ /dev/null @@ -1,93 +0,0 @@ -#pragma once - -#include "structured_table_formats.h" - -#include <yt/cpp/mapreduce/interface/operation.h> - -namespace NYT::NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -class TOperationPreparationContext - : public IOperationPreparationContext -{ -public: - TOperationPreparationContext( - const TStructuredJobTableList& structuredInputs, - const TStructuredJobTableList& structuredOutputs, - const TClientContext& context, - const IClientRetryPolicyPtr& retryPolicy, - TTransactionId transactionId); - - TOperationPreparationContext( - TVector<TRichYPath> inputs, - TVector<TRichYPath> outputs, - const TClientContext& context, - const IClientRetryPolicyPtr& retryPolicy, - TTransactionId transactionId); - - int GetInputCount() const override; - int GetOutputCount() const override; - - const TVector<TTableSchema>& GetInputSchemas() const override; - const TTableSchema& GetInputSchema(int index) const override; - - TMaybe<TYPath> GetInputPath(int index) const override; - TMaybe<TYPath> GetOutputPath(int index) const override; - -private: - TVector<TMaybe<TRichYPath>> Inputs_; - TVector<TMaybe<TRichYPath>> Outputs_; - const TClientContext& Context_; - const IClientRetryPolicyPtr RetryPolicy_; - TTransactionId TransactionId_; - - mutable TVector<TTableSchema> InputSchemas_; - mutable TVector<bool> InputSchemasLoaded_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TSpeculativeOperationPreparationContext - : public IOperationPreparationContext -{ -public: - TSpeculativeOperationPreparationContext( - const TVector<TTableSchema>& previousResult, - TStructuredJobTableList inputs, - TStructuredJobTableList outputs); - - int GetInputCount() const override; - int GetOutputCount() const override; - - const TVector<TTableSchema>& GetInputSchemas() const override; - const TTableSchema& GetInputSchema(int index) const override; - - TMaybe<TYPath> GetInputPath(int index) const override; - TMaybe<TYPath> GetOutputPath(int index) const override; - -private: - TVector<TTableSchema> InputSchemas_; - TStructuredJobTableList Inputs_; - TStructuredJobTableList Outputs_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TTables> -TVector<TTableSchema> PrepareOperation( - const IJob& job, - const IOperationPreparationContext& context, - TTables* inputsPtr, - TTables* outputsPtr, - TUserJobFormatHints& hints); - -//////////////////////////////////////////////////////////////////////////////// - -TJobOperationPreparer GetOperationPreparer( - const IJob& job, - const IOperationPreparationContext& context); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail diff --git a/yt/cpp/mapreduce/client/py_helpers.cpp b/yt/cpp/mapreduce/client/py_helpers.cpp deleted file mode 100644 index 3072449866..0000000000 --- a/yt/cpp/mapreduce/client/py_helpers.cpp +++ /dev/null @@ -1,112 +0,0 @@ -#include "py_helpers.h" - -#include "client.h" -#include "operation.h" -#include "transaction.h" - -#include <yt/cpp/mapreduce/interface/client.h> -#include <yt/cpp/mapreduce/interface/fluent.h> - -#include <yt/cpp/mapreduce/common/retry_lib.h> -#include <yt/cpp/mapreduce/common/helpers.h> - -#include <library/cpp/yson/node/node_io.h> - -#include <util/generic/hash_set.h> - -namespace NYT { - -using namespace NDetail; - -//////////////////////////////////////////////////////////////////////////////// - -IStructuredJobPtr ConstructJob(const TString& jobName, const TString& state) -{ - auto node = TNode(); - if (!state.empty()) { - node = NodeFromYsonString(state); - } - return TJobFactory::Get()->GetConstructingFunction(jobName.data())(node); -} - -TString GetJobStateString(const IStructuredJob& job) -{ - TString result; - { - TStringOutput output(result); - job.Save(output); - output.Finish(); - } - return result; -} - -TStructuredJobTableList NodeToStructuredTablePaths(const TNode& node, const TOperationPreparer& preparer) -{ - int intermediateTableCount = 0; - TVector<TRichYPath> paths; - for (const auto& inputNode : node.AsList()) { - if (inputNode.IsNull()) { - ++intermediateTableCount; - } else { - paths.emplace_back(inputNode.AsString()); - } - } - paths = NRawClient::CanonizeYPaths(/* retryPolicy */ nullptr, preparer.GetContext(), paths); - TStructuredJobTableList result(intermediateTableCount, TStructuredJobTable::Intermediate(TUnspecifiedTableStructure())); - for (const auto& path : paths) { - result.emplace_back(TStructuredJobTable{TUnspecifiedTableStructure(), path}); - } - return result; -} - -TString GetIOInfo( - const IStructuredJob& job, - const TCreateClientOptions& options, - const TString& cluster, - const TString& transactionId, - const TString& inputPaths, - const TString& outputPaths, - const TString& neededColumns) -{ - auto client = NDetail::CreateClientImpl(cluster, options); - TOperationPreparer preparer(client, GetGuid(transactionId)); - - auto structuredInputs = NodeToStructuredTablePaths(NodeFromYsonString(inputPaths), preparer); - auto structuredOutputs = NodeToStructuredTablePaths(NodeFromYsonString(outputPaths), preparer); - - auto neededColumnsNode = NodeFromYsonString(neededColumns); - THashSet<TString> columnsUsedInOperations; - for (const auto& columnNode : neededColumnsNode.AsList()) { - columnsUsedInOperations.insert(columnNode.AsString()); - } - - auto operationIo = CreateSimpleOperationIoHelper( - job, - preparer, - TOperationOptions(), - std::move(structuredInputs), - std::move(structuredOutputs), - TUserJobFormatHints(), - ENodeReaderFormat::Yson, - columnsUsedInOperations); - - return BuildYsonStringFluently().BeginMap() - .Item("input_format").Value(operationIo.InputFormat.Config) - .Item("output_format").Value(operationIo.OutputFormat.Config) - .Item("input_table_paths").List(operationIo.Inputs) - .Item("output_table_paths").List(operationIo.Outputs) - .Item("small_files").DoListFor( - operationIo.JobFiles.begin(), - operationIo.JobFiles.end(), - [] (TFluentList fluent, auto fileIt) { - fluent.Item().BeginMap() - .Item("file_name").Value(fileIt->FileName) - .Item("data").Value(fileIt->Data) - .EndMap(); - }) - .EndMap(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/py_helpers.h b/yt/cpp/mapreduce/client/py_helpers.h deleted file mode 100644 index 85aa0a93f3..0000000000 --- a/yt/cpp/mapreduce/client/py_helpers.h +++ /dev/null @@ -1,25 +0,0 @@ -#include <yt/cpp/mapreduce/interface/client_method_options.h> -#include <yt/cpp/mapreduce/interface/operation.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -using IStructuredJobPtr = TIntrusiveConstPtr<IStructuredJob>; - -IStructuredJobPtr ConstructJob(const TString& jobName, const TString& state); - -TString GetJobStateString(const IStructuredJob& job); - -TString GetIOInfo( - const IStructuredJob& job, - const TCreateClientOptions& options, - const TString& cluster, - const TString& transactionId, - const TString& inputPaths, - const TString& outputPaths, - const TString& neededColumns); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/retry_heavy_write_request.cpp b/yt/cpp/mapreduce/client/retry_heavy_write_request.cpp deleted file mode 100644 index b4e4975d7f..0000000000 --- a/yt/cpp/mapreduce/client/retry_heavy_write_request.cpp +++ /dev/null @@ -1,87 +0,0 @@ -#include "retry_heavy_write_request.h" - -#include "transaction.h" -#include "transaction_pinger.h" - -#include <yt/cpp/mapreduce/common/retry_lib.h> -#include <yt/cpp/mapreduce/common/wait_proxy.h> - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/tvm.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <yt/cpp/mapreduce/http/helpers.h> -#include <yt/cpp/mapreduce/http/http_client.h> -#include <yt/cpp/mapreduce/http/requests.h> -#include <yt/cpp/mapreduce/http/retry_request.h> - -namespace NYT { - -using ::ToString; - -//////////////////////////////////////////////////////////////////////////////// - -void RetryHeavyWriteRequest( - const IClientRetryPolicyPtr& clientRetryPolicy, - const ITransactionPingerPtr& transactionPinger, - const TClientContext& context, - const TTransactionId& parentId, - THttpHeader& header, - std::function<THolder<IInputStream>()> streamMaker) -{ - int retryCount = context.Config->RetryCount; - if (context.ServiceTicketAuth) { - header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket()); - } else { - header.SetToken(context.Token); - } - - for (int attempt = 0; attempt < retryCount; ++attempt) { - TPingableTransaction attemptTx(clientRetryPolicy, context, parentId, transactionPinger->GetChildTxPinger(), TStartTransactionOptions()); - - auto input = streamMaker(); - TString requestId; - - try { - auto hostName = GetProxyForHeavyRequest(context); - requestId = CreateGuidAsString(); - - header.AddTransactionId(attemptTx.GetId(), /* overwrite = */ true); - header.SetRequestCompression(ToString(context.Config->ContentEncoding)); - - auto request = context.HttpClient->StartRequest(GetFullUrl(hostName, context, header), requestId, header); - TransferData(input.Get(), request->GetStream()); - request->Finish()->GetResponse(); - } catch (TErrorResponse& e) { - YT_LOG_ERROR("RSP %v - attempt %v failed", - requestId, - attempt); - - if (!IsRetriable(e) || attempt + 1 == retryCount) { - throw; - } - NDetail::TWaitProxy::Get()->Sleep(GetBackoffDuration(e, context.Config)); - continue; - - } catch (std::exception& e) { - YT_LOG_ERROR("RSP %v - %v - attempt %v failed", - requestId, - e.what(), - attempt); - - if (attempt + 1 == retryCount) { - throw; - } - NDetail::TWaitProxy::Get()->Sleep(GetBackoffDuration(e, context.Config)); - continue; - } - - attemptTx.Commit(); - return; - } -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/retry_heavy_write_request.h b/yt/cpp/mapreduce/client/retry_heavy_write_request.h deleted file mode 100644 index 647cad302c..0000000000 --- a/yt/cpp/mapreduce/client/retry_heavy_write_request.h +++ /dev/null @@ -1,21 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/common/fwd.h> - -#include <yt/cpp/mapreduce/http/requests.h> - -namespace NYT { - -/////////////////////////////////////////////////////////////////////////////// - -void RetryHeavyWriteRequest( - const IClientRetryPolicyPtr& clientRetryPolicy, - const ITransactionPingerPtr& transactionPinger, - const TClientContext& context, - const TTransactionId& parentId, - THttpHeader& header, - std::function<THolder<IInputStream>()> streamMaker); - -/////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/retry_transaction.h b/yt/cpp/mapreduce/client/retry_transaction.h deleted file mode 100644 index 5220c222b8..0000000000 --- a/yt/cpp/mapreduce/client/retry_transaction.h +++ /dev/null @@ -1,71 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/http/retry_request.h> - -#include <yt/cpp/mapreduce/client/client.h> - -#include <yt/cpp/mapreduce/common/wait_proxy.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -namespace NYT::NDetail { - -template <typename TResult> -TResult RetryTransactionWithPolicy( - const TClientBasePtr& client, - std::function<TResult(ITransactionPtr)> func, - IRequestRetryPolicyPtr retryPolicy) -{ - if (!retryPolicy) { - retryPolicy = CreateDefaultRequestRetryPolicy(client->GetContext().Config); - } - - while (true) { - try { - retryPolicy->NotifyNewAttempt(); - auto transaction = client->StartTransaction(TStartTransactionOptions()); - if constexpr (std::is_same<TResult, void>::value) { - func(transaction); - transaction->Commit(); - return; - } else { - auto result = func(transaction); - transaction->Commit(); - return result; - } - } catch (const TErrorResponse& e) { - YT_LOG_ERROR("Retry failed %v - %v", - e.GetError().GetMessage(), - retryPolicy->GetAttemptDescription()); - - if (!IsRetriable(e)) { - throw; - } - - auto maybeRetryTimeout = retryPolicy->OnRetriableError(e); - if (maybeRetryTimeout) { - TWaitProxy::Get()->Sleep(*maybeRetryTimeout); - } else { - throw; - } - } catch (const std::exception& e) { - YT_LOG_ERROR("Retry failed %v - %v", - e.what(), - retryPolicy->GetAttemptDescription()); - - if (!IsRetriable(e)) { - throw; - } - - auto maybeRetryTimeout = retryPolicy->OnGenericError(e); - if (maybeRetryTimeout) { - TWaitProxy::Get()->Sleep(*maybeRetryTimeout); - } else { - throw; - } - } - } -} - -} // namespace NYT::NDetail diff --git a/yt/cpp/mapreduce/client/retryful_writer.cpp b/yt/cpp/mapreduce/client/retryful_writer.cpp deleted file mode 100644 index 12b2939ffa..0000000000 --- a/yt/cpp/mapreduce/client/retryful_writer.cpp +++ /dev/null @@ -1,163 +0,0 @@ -#include "retryful_writer.h" - -#include "retry_heavy_write_request.h" - -#include <yt/cpp/mapreduce/http/requests.h> - -#include <yt/cpp/mapreduce/interface/errors.h> -#include <yt/cpp/mapreduce/interface/finish_or_die.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <util/generic/size_literals.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TRetryfulWriter::~TRetryfulWriter() -{ - NDetail::FinishOrDie(this, "TRetryfulWriter"); -} - -void TRetryfulWriter::CheckWriterState() -{ - switch (WriterState_) { - case Ok: - break; - case Completed: - ythrow TApiUsageError() << "Cannot use table writer that is finished"; - case Error: - ythrow TApiUsageError() << "Cannot use table writer that finished with error"; - } -} - -void TRetryfulWriter::NotifyRowEnd() -{ - CheckWriterState(); - if (Buffer_.Size() >= BufferSize_) { - FlushBuffer(false); - } -} - -void TRetryfulWriter::DoWrite(const void* buf, size_t len) -{ - CheckWriterState(); - while (Buffer_.Size() + len > Buffer_.Capacity()) { - Buffer_.Reserve(Buffer_.Capacity() * 2); - } - Buffer_.Append(static_cast<const char*>(buf), len); -} - -void TRetryfulWriter::DoFinish() -{ - if (WriterState_ != Ok) { - return; - } - FlushBuffer(true); - if (Started_) { - FilledBuffers_.Stop(); - Thread_.Join(); - } - if (Exception_) { - WriterState_ = Error; - std::rethrow_exception(Exception_); - } - if (WriteTransaction_) { - WriteTransaction_->Commit(); - } - WriterState_ = Completed; -} - -void TRetryfulWriter::FlushBuffer(bool lastBlock) -{ - if (!Started_) { - if (lastBlock) { - try { - Send(Buffer_); - } catch (...) { - WriterState_ = Error; - throw; - } - return; - } else { - Started_ = true; - Thread_.Start(); - } - } - - auto emptyBuffer = EmptyBuffers_.Pop(); - if (!emptyBuffer) { - WriterState_ = Error; - std::rethrow_exception(Exception_); - } - FilledBuffers_.Push(std::move(Buffer_)); - Buffer_ = std::move(emptyBuffer.GetRef()); -} - -void TRetryfulWriter::Send(const TBuffer& buffer) -{ - THttpHeader header("PUT", Command_); - header.SetInputFormat(Format_); - header.MergeParameters(Parameters_); - - auto streamMaker = [&buffer] () { - return MakeHolder<TBufferInput>(buffer); - }; - - auto transactionId = (WriteTransaction_ ? WriteTransaction_->GetId() : ParentTransactionId_); - RetryHeavyWriteRequest(ClientRetryPolicy_, TransactionPinger_, Context_, transactionId, header, streamMaker); - - Parameters_ = SecondaryParameters_; // all blocks except the first one are appended -} - -void TRetryfulWriter::SendThread() -{ - while (auto maybeBuffer = FilledBuffers_.Pop()) { - auto& buffer = maybeBuffer.GetRef(); - try { - Send(buffer); - } catch (const std::exception&) { - Exception_ = std::current_exception(); - EmptyBuffers_.Stop(); - break; - } - buffer.Clear(); - EmptyBuffers_.Push(std::move(buffer)); - } -} - -void* TRetryfulWriter::SendThread(void* opaque) -{ - static_cast<TRetryfulWriter*>(opaque)->SendThread(); - return nullptr; -} - -void TRetryfulWriter::Abort() -{ - if (Started_) { - FilledBuffers_.Stop(); - Thread_.Join(); - } - if (WriteTransaction_) { - WriteTransaction_->Abort(); - } - WriterState_ = Completed; -} - -size_t TRetryfulWriter::GetBufferSize(const TMaybe<TWriterOptions>& writerOptions) -{ - auto retryBlockSize = TMaybe<size_t>(); - if (writerOptions) { - if (writerOptions->RetryBlockSize_) { - retryBlockSize = *writerOptions->RetryBlockSize_; - } else if (writerOptions->DesiredChunkSize_) { - retryBlockSize = *writerOptions->DesiredChunkSize_; - } - } - return retryBlockSize.GetOrElse(64_MB); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/retryful_writer.h b/yt/cpp/mapreduce/client/retryful_writer.h deleted file mode 100644 index 38e351977d..0000000000 --- a/yt/cpp/mapreduce/client/retryful_writer.h +++ /dev/null @@ -1,130 +0,0 @@ -#pragma once - -#include "transaction.h" -#include "transaction_pinger.h" - -#include <yt/cpp/mapreduce/common/retry_lib.h> -#include <yt/cpp/mapreduce/http/http.h> -#include <yt/cpp/mapreduce/interface/common.h> -#include <yt/cpp/mapreduce/interface/io.h> -#include <yt/cpp/mapreduce/io/helpers.h> -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> - -#include <library/cpp/threading/blocking_queue/blocking_queue.h> - -#include <util/stream/output.h> -#include <util/generic/buffer.h> -#include <util/stream/buffer.h> -#include <util/system/thread.h> -#include <util/system/event.h> - -#include <atomic> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TRetryfulWriter - : public TRawTableWriter -{ -public: - template <class TWriterOptions> - TRetryfulWriter( - IClientRetryPolicyPtr clientRetryPolicy, - ITransactionPingerPtr transactionPinger, - const TClientContext& context, - const TTransactionId& parentId, - const TString& command, - const TMaybe<TFormat>& format, - const TRichYPath& path, - const TWriterOptions& options) - : ClientRetryPolicy_(std::move(clientRetryPolicy)) - , TransactionPinger_(std::move(transactionPinger)) - , Context_(context) - , Command_(command) - , Format_(format) - , BufferSize_(GetBufferSize(options.WriterOptions_)) - , ParentTransactionId_(parentId) - , WriteTransaction_() - , FilledBuffers_(2) - , EmptyBuffers_(2) - , Buffer_(BufferSize_ * 2) - , Thread_(TThread::TParams{SendThread, this}.SetName("retryful_writer")) - { - Parameters_ = FormIORequestParameters(path, options); - - auto secondaryPath = path; - secondaryPath.Append_ = true; - secondaryPath.Schema_.Clear(); - secondaryPath.CompressionCodec_.Clear(); - secondaryPath.ErasureCodec_.Clear(); - secondaryPath.OptimizeFor_.Clear(); - SecondaryParameters_ = FormIORequestParameters(secondaryPath, options); - - if (options.CreateTransaction_) { - WriteTransaction_.ConstructInPlace(ClientRetryPolicy_, context, parentId, TransactionPinger_->GetChildTxPinger(), TStartTransactionOptions()); - auto append = path.Append_.GetOrElse(false); - auto lockMode = (append ? LM_SHARED : LM_EXCLUSIVE); - NDetail::NRawClient::Lock(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, WriteTransaction_->GetId(), path.Path_, lockMode); - } - - EmptyBuffers_.Push(TBuffer(BufferSize_ * 2)); - } - - ~TRetryfulWriter() override; - void NotifyRowEnd() override; - void Abort() override; - - size_t GetRetryBlockRemainingSize() const - { - return (BufferSize_ > Buffer_.size()) ? (BufferSize_ - Buffer_.size()) : 0; - } - -protected: - void DoWrite(const void* buf, size_t len) override; - void DoFinish() override; - -private: - static size_t GetBufferSize(const TMaybe<TWriterOptions>& writerOptions); - -private: - const IClientRetryPolicyPtr ClientRetryPolicy_; - const ITransactionPingerPtr TransactionPinger_; - const TClientContext Context_; - TString Command_; - TMaybe<TFormat> Format_; - const size_t BufferSize_; - - TNode Parameters_; - TNode SecondaryParameters_; - - TTransactionId ParentTransactionId_; - TMaybe<TPingableTransaction> WriteTransaction_; - - ::NThreading::TBlockingQueue<TBuffer> FilledBuffers_; - ::NThreading::TBlockingQueue<TBuffer> EmptyBuffers_; - - TBuffer Buffer_; - - TThread Thread_; - bool Started_ = false; - std::exception_ptr Exception_ = nullptr; - - enum EWriterState { - Ok, - Completed, - Error, - } WriterState_ = Ok; - -private: - void FlushBuffer(bool lastBlock); - void Send(const TBuffer& buffer); - void CheckWriterState(); - - void SendThread(); - static void* SendThread(void* opaque); -}; - -//////////////////////////////////////////////////////////////////////////////// - -} diff --git a/yt/cpp/mapreduce/client/retryless_writer.cpp b/yt/cpp/mapreduce/client/retryless_writer.cpp deleted file mode 100644 index 4c25c1a1dd..0000000000 --- a/yt/cpp/mapreduce/client/retryless_writer.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#include "retryless_writer.h" - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TRetrylessWriter::~TRetrylessWriter() -{ - NDetail::FinishOrDie(this, "TRetrylessWriter"); -} - -void TRetrylessWriter::DoFinish() -{ - if (!Running_) { - return; - } - Running_ = false; - - BufferedOutput_->Finish(); - Request_->Finish()->GetResponse(); -} - -void TRetrylessWriter::DoWrite(const void* buf, size_t len) -{ - try { - BufferedOutput_->Write(buf, len); - } catch (...) { - Running_ = false; - throw; - } -} - -void TRetrylessWriter::NotifyRowEnd() -{ } - -void TRetrylessWriter::Abort() -{ - Running_ = false; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/retryless_writer.h b/yt/cpp/mapreduce/client/retryless_writer.h deleted file mode 100644 index baf49a258f..0000000000 --- a/yt/cpp/mapreduce/client/retryless_writer.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once - -#include "transaction.h" - -#include <yt/cpp/mapreduce/http/helpers.h> -#include <yt/cpp/mapreduce/http/http.h> -#include <yt/cpp/mapreduce/http/http_client.h> - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/common.h> -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/io.h> -#include <yt/cpp/mapreduce/interface/tvm.h> - -#include <yt/cpp/mapreduce/io/helpers.h> - -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> - -#include <util/stream/buffered.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TRetrylessWriter - : public TRawTableWriter -{ -public: - template <class TWriterOptions> - TRetrylessWriter( - const TClientContext& context, - const TTransactionId& parentId, - const TString& command, - const TMaybe<TFormat>& format, - const TRichYPath& path, - size_t bufferSize, - const TWriterOptions& options) - { - THttpHeader header("PUT", command); - header.SetInputFormat(format); - header.MergeParameters(FormIORequestParameters(path, options)); - header.AddTransactionId(parentId); - header.SetRequestCompression(ToString(context.Config->ContentEncoding)); - if (context.ServiceTicketAuth) { - header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket()); - } else { - header.SetToken(context.Token); - } - - TString requestId = CreateGuidAsString(); - - auto hostName = GetProxyForHeavyRequest(context); - Request_ = context.HttpClient->StartRequest(GetFullUrl(hostName, context, header), requestId, header); - BufferedOutput_.Reset(new TBufferedOutput(Request_->GetStream(), bufferSize)); - } - - ~TRetrylessWriter() override; - void NotifyRowEnd() override; - void Abort() override; - -protected: - void DoWrite(const void* buf, size_t len) override; - void DoFinish() override; - -private: - bool Running_ = true; - NHttpClient::IHttpRequestPtr Request_; - THolder<TBufferedOutput> BufferedOutput_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/skiff.cpp b/yt/cpp/mapreduce/client/skiff.cpp deleted file mode 100644 index 67a0f960ae..0000000000 --- a/yt/cpp/mapreduce/client/skiff.cpp +++ /dev/null @@ -1,396 +0,0 @@ -#include "skiff.h" - -#include <yt/cpp/mapreduce/common/retry_lib.h> - -#include <yt/cpp/mapreduce/http/retry_request.h> -#include <yt/cpp/mapreduce/http/requests.h> - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/common.h> -#include <yt/cpp/mapreduce/interface/serialize.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <library/cpp/yson/node/node_builder.h> -#include <library/cpp/yson/node/node_io.h> - -#include <yt/cpp/mapreduce/raw_client/raw_batch_request.h> -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> - -#include <yt/cpp/mapreduce/skiff/skiff_schema.h> - -#include <library/cpp/yson/consumer.h> -#include <library/cpp/yson/writer.h> - -#include <util/string/cast.h> -#include <util/stream/str.h> -#include <util/stream/file.h> -#include <util/folder/path.h> - -namespace NYT { -namespace NDetail { - -using namespace NRawClient; - -using ::ToString; - -//////////////////////////////////////////////////////////////////////////////// - -static NSkiff::TSkiffSchemaPtr ReadSkiffSchema(const TString& fileName) -{ - if (!TFsPath(fileName).Exists()) { - return nullptr; - } - TIFStream input(fileName); - NSkiff::TSkiffSchemaPtr schema; - Deserialize(schema, NodeFromYsonStream(&input)); - return schema; -} - -NSkiff::TSkiffSchemaPtr GetJobInputSkiffSchema() -{ - return ReadSkiffSchema("skiff_input"); -} - -NSkiff::EWireType ValueTypeToSkiffType(EValueType valueType) -{ - using NSkiff::EWireType; - switch (valueType) { - case VT_INT64: - case VT_INT32: - case VT_INT16: - case VT_INT8: - return EWireType::Int64; - - case VT_UINT64: - case VT_UINT32: - case VT_UINT16: - case VT_UINT8: - return EWireType::Uint64; - - case VT_DOUBLE: - case VT_FLOAT: - return EWireType::Double; - - case VT_BOOLEAN: - return EWireType::Boolean; - - case VT_STRING: - case VT_UTF8: - case VT_JSON: - return EWireType::String32; - - case VT_ANY: - return EWireType::Yson32; - - case VT_NULL: - case VT_VOID: - return EWireType::Nothing; - - case VT_DATE: - case VT_DATETIME: - case VT_TIMESTAMP: - return EWireType::Uint64; - - case VT_INTERVAL: - return EWireType::Int64; - }; - ythrow yexception() << "Cannot convert EValueType '" << valueType << "' to NSkiff::EWireType"; -} - -NSkiff::TSkiffSchemaPtr CreateSkiffSchema( - const TTableSchema& schema, - const TCreateSkiffSchemaOptions& options) -{ - using namespace NSkiff; - - Y_ENSURE(schema.Strict(), "Cannot create Skiff schema for non-strict table schema"); - TVector<TSkiffSchemaPtr> skiffColumns; - for (const auto& column: schema.Columns()) { - TSkiffSchemaPtr skiffColumn; - if (column.Type() == VT_ANY && *column.TypeV3() != *NTi::Optional(NTi::Yson())) { - // We ignore all complex types until YT-12717 is done. - return nullptr; - } - if (column.Required() || NTi::IsSingular(column.TypeV3()->GetTypeName())) { - skiffColumn = CreateSimpleTypeSchema(ValueTypeToSkiffType(column.Type())); - } else { - skiffColumn = CreateVariant8Schema({ - CreateSimpleTypeSchema(EWireType::Nothing), - CreateSimpleTypeSchema(ValueTypeToSkiffType(column.Type()))}); - } - if (options.RenameColumns_) { - auto maybeName = options.RenameColumns_->find(column.Name()); - skiffColumn->SetName(maybeName == options.RenameColumns_->end() ? column.Name() : maybeName->second); - } else { - skiffColumn->SetName(column.Name()); - } - skiffColumns.push_back(skiffColumn); - } - - if (options.HasKeySwitch_) { - skiffColumns.push_back( - CreateSimpleTypeSchema(EWireType::Boolean)->SetName("$key_switch")); - } - if (options.HasRangeIndex_) { - skiffColumns.push_back( - CreateVariant8Schema({ - CreateSimpleTypeSchema(EWireType::Nothing), - CreateSimpleTypeSchema(EWireType::Int64)}) - ->SetName("$range_index")); - } - - skiffColumns.push_back( - CreateVariant8Schema({ - CreateSimpleTypeSchema(EWireType::Nothing), - CreateSimpleTypeSchema(EWireType::Int64)}) - ->SetName("$row_index")); - - return CreateTupleSchema(std::move(skiffColumns)); -} - -NSkiff::TSkiffSchemaPtr CreateSkiffSchema( - const TNode& schemaNode, - const TCreateSkiffSchemaOptions& options) -{ - TTableSchema schema; - Deserialize(schema, schemaNode); - return CreateSkiffSchema(schema, options); -} - -void Serialize(const NSkiff::TSkiffSchemaPtr& schema, NYson::IYsonConsumer* consumer) -{ - consumer->OnBeginMap(); - if (schema->GetName().size() > 0) { - consumer->OnKeyedItem("name"); - consumer->OnStringScalar(schema->GetName()); - } - consumer->OnKeyedItem("wire_type"); - consumer->OnStringScalar(ToString(schema->GetWireType())); - if (schema->GetChildren().size() > 0) { - consumer->OnKeyedItem("children"); - consumer->OnBeginList(); - for (const auto& child : schema->GetChildren()) { - consumer->OnListItem(); - Serialize(child, consumer); - } - consumer->OnEndList(); - } - consumer->OnEndMap(); -} - -void Deserialize(NSkiff::TSkiffSchemaPtr& schema, const TNode& node) -{ - using namespace NSkiff; - - static auto createSchema = [](EWireType wireType, TVector<TSkiffSchemaPtr>&& children) -> TSkiffSchemaPtr { - switch (wireType) { - case EWireType::Tuple: - return CreateTupleSchema(std::move(children)); - case EWireType::Variant8: - return CreateVariant8Schema(std::move(children)); - case EWireType::Variant16: - return CreateVariant16Schema(std::move(children)); - case EWireType::RepeatedVariant8: - return CreateRepeatedVariant8Schema(std::move(children)); - case EWireType::RepeatedVariant16: - return CreateRepeatedVariant16Schema(std::move(children)); - default: - return CreateSimpleTypeSchema(wireType); - } - }; - - const auto& map = node.AsMap(); - const auto* wireTypePtr = map.FindPtr("wire_type"); - Y_ENSURE(wireTypePtr, "'wire_type' is a required key"); - auto wireType = FromString<NSkiff::EWireType>(wireTypePtr->AsString()); - - const auto* childrenPtr = map.FindPtr("children"); - Y_ENSURE(NSkiff::IsSimpleType(wireType) || childrenPtr, - "'children' key is required for complex node '" << wireType << "'"); - TVector<TSkiffSchemaPtr> children; - if (childrenPtr) { - for (const auto& childNode : childrenPtr->AsList()) { - TSkiffSchemaPtr childSchema; - Deserialize(childSchema, childNode); - children.push_back(std::move(childSchema)); - } - } - - schema = createSchema(wireType, std::move(children)); - - const auto* namePtr = map.FindPtr("name"); - if (namePtr) { - schema->SetName(namePtr->AsString()); - } -} - -TFormat CreateSkiffFormat(const NSkiff::TSkiffSchemaPtr& schema) { - Y_ENSURE(schema->GetWireType() == NSkiff::EWireType::Variant16, - "Bad wire type for schema; expected 'variant16', got " << schema->GetWireType()); - - THashMap< - NSkiff::TSkiffSchemaPtr, - size_t, - NSkiff::TSkiffSchemaPtrHasher, - NSkiff::TSkiffSchemaPtrEqual> schemasMap; - size_t tableIndex = 0; - auto config = TNode("skiff"); - config.Attributes()["table_skiff_schemas"] = TNode::CreateList(); - - for (const auto& schemaChild : schema->GetChildren()) { - auto [iter, inserted] = schemasMap.emplace(schemaChild, tableIndex); - size_t currentIndex; - if (inserted) { - currentIndex = tableIndex; - ++tableIndex; - } else { - currentIndex = iter->second; - } - config.Attributes()["table_skiff_schemas"].Add("$" + ToString(currentIndex)); - } - - config.Attributes()["skiff_schema_registry"] = TNode::CreateMap(); - - for (const auto& [tableSchema, index] : schemasMap) { - TNode node; - TNodeBuilder nodeBuilder(&node); - Serialize(tableSchema, &nodeBuilder); - config.Attributes()["skiff_schema_registry"][ToString(index)] = std::move(node); - } - - return TFormat(config); -} - -NSkiff::TSkiffSchemaPtr CreateSkiffSchemaIfNecessary( - const TClientContext& context, - const IClientRetryPolicyPtr& clientRetryPolicy, - const TTransactionId& transactionId, - ENodeReaderFormat nodeReaderFormat, - const TVector<TRichYPath>& tablePaths, - const TCreateSkiffSchemaOptions& options) -{ - if (nodeReaderFormat == ENodeReaderFormat::Yson) { - return nullptr; - } - - for (const auto& path : tablePaths) { - if (path.Columns_) { - switch (nodeReaderFormat) { - case ENodeReaderFormat::Skiff: - ythrow TApiUsageError() << "Cannot use Skiff format with column selectors"; - case ENodeReaderFormat::Auto: - return nullptr; - default: - Y_FAIL("Unexpected node reader format: %d", static_cast<int>(nodeReaderFormat)); - } - } - } - - auto nodes = NRawClient::BatchTransform( - clientRetryPolicy->CreatePolicyForGenericRequest(), - context, - NRawClient::CanonizeYPaths(clientRetryPolicy->CreatePolicyForGenericRequest(), context, tablePaths), - [&] (TRawBatchRequest& batch, const TRichYPath& path) { - auto getOptions = TGetOptions() - .AttributeFilter( - TAttributeFilter() - .AddAttribute("schema") - .AddAttribute("dynamic") - .AddAttribute("type") - ); - return batch.Get(transactionId, path.Path_, getOptions); - }); - - TVector<NSkiff::TSkiffSchemaPtr> schemas; - for (size_t tableIndex = 0; tableIndex < nodes.size(); ++tableIndex) { - const auto& tablePath = tablePaths[tableIndex].Path_; - const auto& attributes = nodes[tableIndex].GetAttributes(); - Y_ENSURE_EX(attributes["type"] == TNode("table"), - TApiUsageError() << "Operation input path " << tablePath << " is not a table"); - bool dynamic = attributes["dynamic"].AsBool(); - bool strict = attributes["schema"].GetAttributes()["strict"].AsBool(); - switch (nodeReaderFormat) { - case ENodeReaderFormat::Skiff: - Y_ENSURE_EX(strict, - TApiUsageError() << "Cannot use skiff format for table with non-strict schema '" << tablePath << "'"); - Y_ENSURE_EX(!dynamic, - TApiUsageError() << "Cannot use skiff format for dynamic table '" << tablePath << "'"); - break; - case ENodeReaderFormat::Auto: - if (dynamic || !strict) { - YT_LOG_DEBUG("Cannot use skiff format for table '%v' as it is dynamic or has non-strict schema", - tablePath); - return nullptr; - } - break; - default: - Y_FAIL("Unexpected node reader format: %d", static_cast<int>(nodeReaderFormat)); - } - - NSkiff::TSkiffSchemaPtr curSkiffSchema; - if (tablePaths[tableIndex].RenameColumns_) { - auto customOptions = options; - customOptions.RenameColumns(*tablePaths[tableIndex].RenameColumns_); - curSkiffSchema = CreateSkiffSchema(attributes["schema"], customOptions); - } else { - curSkiffSchema = CreateSkiffSchema(attributes["schema"], options); - } - - if (!curSkiffSchema) { - return nullptr; - } - schemas.push_back(curSkiffSchema); - } - return NSkiff::CreateVariant16Schema(std::move(schemas)); -} - -//////////////////////////////////////////////////////////////////////////////// - -NSkiff::TSkiffSchemaPtr CreateSkiffSchema( - const TVector<NSkiff::TSkiffSchemaPtr>& tableSchemas, - const TCreateSkiffSchemaOptions& options -) { - constexpr auto KEY_SWITCH_COLUMN = "$key_switch"; - constexpr auto ROW_INDEX_COLUMN = "$row_index"; - constexpr auto RANGE_INDEX_COLUMN = "$range_index"; - - TVector<NSkiff::TSkiffSchemaPtr> schemas; - schemas.reserve(tableSchemas.size()); - - for (const auto& tableSchema : tableSchemas) { - Y_ENSURE(tableSchema->GetWireType() == NSkiff::EWireType::Tuple, - "Expected 'tuple' wire type for table schema, got '" << tableSchema->GetWireType() << "'"); - - const auto& children = tableSchema->GetChildren(); - NSkiff::TSkiffSchemaList columns; - - columns.reserve(children.size() + 3); - if (options.HasKeySwitch_) { - columns.push_back( - CreateSimpleTypeSchema(NSkiff::EWireType::Boolean)->SetName(KEY_SWITCH_COLUMN)); - } - columns.push_back( - NSkiff::CreateVariant8Schema({ - CreateSimpleTypeSchema(NSkiff::EWireType::Nothing), - CreateSimpleTypeSchema(NSkiff::EWireType::Int64)}) - ->SetName(ROW_INDEX_COLUMN)); - if (options.HasRangeIndex_) { - columns.push_back( - NSkiff::CreateVariant8Schema({ - CreateSimpleTypeSchema(NSkiff::EWireType::Nothing), - CreateSimpleTypeSchema(NSkiff::EWireType::Int64)}) - ->SetName(RANGE_INDEX_COLUMN)); - } - columns.insert(columns.end(), children.begin(), children.end()); - - schemas.push_back(NSkiff::CreateTupleSchema(columns)); - } - - return NSkiff::CreateVariant16Schema(schemas); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/skiff.h b/yt/cpp/mapreduce/client/skiff.h deleted file mode 100644 index 82d80a4967..0000000000 --- a/yt/cpp/mapreduce/client/skiff.h +++ /dev/null @@ -1,72 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/common/fwd.h> - -#include <yt/cpp/mapreduce/interface/fwd.h> -#include <yt/cpp/mapreduce/interface/common.h> - -#include <yt/cpp/mapreduce/skiff/wire_type.h> -#include <yt/cpp/mapreduce/skiff/skiff_schema.h> - -#include <util/generic/vector.h> - -namespace NYT::NYson { -struct IYsonConsumer; -} // namespace NYT::NYson - -namespace NYT { - -struct TClientContext; -enum class ENodeReaderFormat : int; - -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -struct TCreateSkiffSchemaOptions -{ - using TSelf = TCreateSkiffSchemaOptions; - - FLUENT_FIELD_DEFAULT(bool, HasKeySwitch, false); - FLUENT_FIELD_DEFAULT(bool, HasRangeIndex, false); - - using TRenameColumnsDescriptor = THashMap<TString, TString>; - FLUENT_FIELD_OPTION(TRenameColumnsDescriptor, RenameColumns); -}; - -//////////////////////////////////////////////////////////////////////////////// - -NSkiff::TSkiffSchemaPtr CreateSkiffSchema( - const TVector<NSkiff::TSkiffSchemaPtr>& tableSchemas, - const TCreateSkiffSchemaOptions& options); - -NSkiff::TSkiffSchemaPtr GetJobInputSkiffSchema(); - -NSkiff::EWireType ValueTypeToSkiffType(EValueType valueType); - -NSkiff::TSkiffSchemaPtr CreateSkiffSchema( - const TTableSchema& schema, - const TCreateSkiffSchemaOptions& options = TCreateSkiffSchemaOptions()); - -NSkiff::TSkiffSchemaPtr CreateSkiffSchema( - const TNode& schemaNode, - const TCreateSkiffSchemaOptions& options = TCreateSkiffSchemaOptions()); - -void Serialize(const NSkiff::TSkiffSchemaPtr& schema, NYson::IYsonConsumer* consumer); - -void Deserialize(NSkiff::TSkiffSchemaPtr& schema, const TNode& node); - -TFormat CreateSkiffFormat(const NSkiff::TSkiffSchemaPtr& schema); - -NSkiff::TSkiffSchemaPtr CreateSkiffSchemaIfNecessary( - const TClientContext& context, - const IClientRetryPolicyPtr& clientRetryPolicy, - const TTransactionId& transactionId, - ENodeReaderFormat nodeReaderFormat, - const TVector<TRichYPath>& tablePaths, - const TCreateSkiffSchemaOptions& options = TCreateSkiffSchemaOptions()); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/structured_table_formats.cpp b/yt/cpp/mapreduce/client/structured_table_formats.cpp deleted file mode 100644 index b6e82c6c15..0000000000 --- a/yt/cpp/mapreduce/client/structured_table_formats.cpp +++ /dev/null @@ -1,572 +0,0 @@ -#include "structured_table_formats.h" - -#include "format_hints.h" -#include "skiff.h" - -#include <yt/cpp/mapreduce/common/retry_lib.h> - -#include <yt/cpp/mapreduce/io/yamr_table_reader.h> - -#include <yt/cpp/mapreduce/library/table_schema/protobuf.h> - -#include <yt/cpp/mapreduce/interface/common.h> - -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> - -#include <library/cpp/type_info/type_info.h> -#include <library/cpp/yson/writer.h> - -#include <memory> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TMaybe<TNode> GetCommonTableFormat( - const TVector<TMaybe<TNode>>& formats) -{ - TMaybe<TNode> result; - bool start = true; - for (auto& format : formats) { - if (start) { - result = format; - start = false; - continue; - } - - if (result.Defined() != format.Defined()) { - ythrow yexception() << "Different formats of input tables"; - } - - if (!result.Defined()) { - continue; - } - - auto& resultAttrs = result.Get()->GetAttributes(); - auto& formatAttrs = format.Get()->GetAttributes(); - - if (resultAttrs["key_column_names"] != formatAttrs["key_column_names"]) { - ythrow yexception() << "Different formats of input tables"; - } - - bool hasSubkeyColumns = resultAttrs.HasKey("subkey_column_names"); - if (hasSubkeyColumns != formatAttrs.HasKey("subkey_column_names")) { - ythrow yexception() << "Different formats of input tables"; - } - - if (hasSubkeyColumns && - resultAttrs["subkey_column_names"] != formatAttrs["subkey_column_names"]) - { - ythrow yexception() << "Different formats of input tables"; - } - } - - return result; -} - -TMaybe<TNode> GetTableFormat( - const IClientRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TRichYPath& path) -{ - auto formatPath = path.Path_ + "/@_format"; - if (!NDetail::NRawClient::Exists(retryPolicy->CreatePolicyForGenericRequest(), context, transactionId, formatPath)) { - return TMaybe<TNode>(); - } - TMaybe<TNode> format = NDetail::NRawClient::Get(retryPolicy->CreatePolicyForGenericRequest(), context, transactionId, formatPath); - if (format.Get()->AsString() != "yamred_dsv") { - return TMaybe<TNode>(); - } - auto& formatAttrs = format.Get()->Attributes(); - if (!formatAttrs.HasKey("key_column_names")) { - ythrow yexception() << - "Table '" << path.Path_ << "': attribute 'key_column_names' is missing"; - } - formatAttrs["has_subkey"] = "true"; - formatAttrs["lenval"] = "true"; - return format; -} - -TMaybe<TNode> GetTableFormats( - const IClientRetryPolicyPtr& clientRetryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TVector<TRichYPath>& inputs) -{ - TVector<TMaybe<TNode>> formats; - for (auto& table : inputs) { - formats.push_back(GetTableFormat(clientRetryPolicy, context, transactionId, table)); - } - - return GetCommonTableFormat(formats); -} - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -NSkiff::TSkiffSchemaPtr TryCreateSkiffSchema( - const TClientContext& context, - const IClientRetryPolicyPtr& clientRetryPolicy, - const TTransactionId& transactionId, - const TVector<TRichYPath>& tables, - const TOperationOptions& options, - ENodeReaderFormat nodeReaderFormat) -{ - bool hasInputQuery = options.Spec_.Defined() && options.Spec_->IsMap() && options.Spec_->HasKey("input_query"); - if (hasInputQuery) { - Y_ENSURE_EX(nodeReaderFormat != ENodeReaderFormat::Skiff, - TApiUsageError() << "Cannot use Skiff format for operations with 'input_query' in spec"); - return nullptr; - } - return CreateSkiffSchemaIfNecessary( - context, - clientRetryPolicy, - transactionId, - nodeReaderFormat, - tables, - TCreateSkiffSchemaOptions() - .HasKeySwitch(true) - .HasRangeIndex(true)); -} - -TString CreateSkiffConfig(const NSkiff::TSkiffSchemaPtr& schema) -{ - TString result; - TStringOutput stream(result); - ::NYson::TYsonWriter writer(&stream); - Serialize(schema, &writer); - return result; -} - -TString CreateProtoConfig(const TVector<const ::google::protobuf::Descriptor*>& descriptorList) -{ - TString result; - TStringOutput messageTypeList(result); - for (const auto& descriptor : descriptorList) { - messageTypeList << descriptor->full_name() << Endl; - } - return result; -} - -//////////////////////////////////////////////////////////////////////////////// - -struct TGetTableStructureDescriptionStringImpl { - template<typename T> - TString operator()(const T& description) { - if constexpr (std::is_same_v<T, TUnspecifiedTableStructure>) { - return "Unspecified"; - } else if constexpr (std::is_same_v<T, TProtobufTableStructure>) { - TString res; - TStringStream out(res); - if (description.Descriptor) { - out << description.Descriptor->full_name(); - } else { - out << "<unknown>"; - } - out << " protobuf message"; - return res; - } else { - static_assert(TDependentFalse<T>, "Unknown type"); - } - } -}; - -TString GetTableStructureDescriptionString(const TTableStructure& tableStructure) -{ - return std::visit(TGetTableStructureDescriptionStringImpl(), tableStructure); -} - -//////////////////////////////////////////////////////////////////////////////// - -TString JobTablePathString(const TStructuredJobTable& jobTable) -{ - if (jobTable.RichYPath) { - return jobTable.RichYPath->Path_; - } else { - return "<intermediate-table>"; - } -} - -TStructuredJobTableList ToStructuredJobTableList(const TVector<TStructuredTablePath>& tableList) -{ - TStructuredJobTableList result; - for (const auto& table : tableList) { - result.push_back(TStructuredJobTable{table.Description, table.RichYPath}); - } - return result; -} - -TStructuredJobTableList CanonizeStructuredTableList(const TClientContext& context, const TVector<TStructuredTablePath>& tableList) -{ - TVector<TRichYPath> toCanonize; - toCanonize.reserve(tableList.size()); - for (const auto& table : tableList) { - toCanonize.emplace_back(table.RichYPath); - } - const auto canonized = NRawClient::CanonizeYPaths(/* retryPolicy */ nullptr, context, toCanonize); - Y_VERIFY(canonized.size() == tableList.size()); - - TStructuredJobTableList result; - result.reserve(tableList.size()); - for (size_t i = 0; i != canonized.size(); ++i) { - result.emplace_back(TStructuredJobTable{tableList[i].Description, canonized[i]}); - } - return result; -} - -TVector<TRichYPath> GetPathList( - const TStructuredJobTableList& tableList, - const TMaybe<TVector<TTableSchema>>& jobSchemaInferenceResult, - bool inferSchemaFromDescriptions) -{ - Y_VERIFY(!jobSchemaInferenceResult || tableList.size() == jobSchemaInferenceResult->size()); - - auto maybeInferSchema = [&] (const TStructuredJobTable& table, ui32 tableIndex) -> TMaybe<TTableSchema> { - if (jobSchemaInferenceResult && !jobSchemaInferenceResult->at(tableIndex).Empty()) { - return jobSchemaInferenceResult->at(tableIndex); - } - if (inferSchemaFromDescriptions) { - return GetTableSchema(table.Description); - } - return Nothing(); - }; - - TVector<TRichYPath> result; - result.reserve(tableList.size()); - for (size_t tableIndex = 0; tableIndex != tableList.size(); ++tableIndex) { - const auto& table = tableList[tableIndex]; - Y_VERIFY(table.RichYPath, "Cannot get path for intermediate table"); - auto richYPath = *table.RichYPath; - if (!richYPath.Schema_) { - if (auto schema = maybeInferSchema(table, tableIndex)) { - richYPath.Schema(std::move(*schema)); - } - } - - result.emplace_back(std::move(richYPath)); - } - return result; -} - - -TStructuredRowStreamDescription GetJobStreamDescription( - const IStructuredJob& job, - EIODirection direction) -{ - switch (direction) { - case EIODirection::Input: - return job.GetInputRowStreamDescription(); - case EIODirection::Output: - return job.GetOutputRowStreamDescription(); - default: - Y_FAIL("unreachable"); - } -} - -TString GetSuffix(EIODirection direction) -{ - switch (direction) { - case EIODirection::Input: - return "_input"; - case EIODirection::Output: - return "_output"; - } - Y_FAIL("unreachable"); -} - -TString GetAddIOMethodName(EIODirection direction) -{ - switch (direction) { - case EIODirection::Input: - return "AddInput<>"; - case EIODirection::Output: - return "AddOutput<>"; - } - Y_FAIL("unreachable"); -} - -//////////////////////////////////////////////////////////////////////////////// - -struct TFormatBuilder::TFormatSwitcher -{ - template <typename T> - auto operator() (const T& /*t*/) { - if constexpr (std::is_same_v<T, TTNodeStructuredRowStream>) { - return &TFormatBuilder::CreateNodeFormat; - } else if constexpr (std::is_same_v<T, TTYaMRRowStructuredRowStream>) { - return &TFormatBuilder::CreateYamrFormat; - } else if constexpr (std::is_same_v<T, TProtobufStructuredRowStream>) { - return &TFormatBuilder::CreateProtobufFormat; - } else if constexpr (std::is_same_v<T, TVoidStructuredRowStream>) { - return &TFormatBuilder::CreateVoidFormat; - } else { - static_assert(TDependentFalse<T>, "unknown stream description"); - } - } -}; - -TFormatBuilder::TFormatBuilder( - IClientRetryPolicyPtr clientRetryPolicy, - TClientContext context, - TTransactionId transactionId, - TOperationOptions operationOptions) - : ClientRetryPolicy_(std::move(clientRetryPolicy)) - , Context_(std::move(context)) - , TransactionId_(transactionId) - , OperationOptions_(std::move(operationOptions)) -{ } - -std::pair <TFormat, TMaybe<TSmallJobFile>> TFormatBuilder::CreateFormat( - const IStructuredJob& job, - const EIODirection& direction, - const TStructuredJobTableList& structuredTableList, - const TMaybe <TFormatHints>& formatHints, - ENodeReaderFormat nodeReaderFormat, - bool allowFormatFromTableAttribute) -{ - auto jobStreamDescription = GetJobStreamDescription(job, direction); - auto method = std::visit(TFormatSwitcher(), jobStreamDescription); - return (this->*method)( - job, - direction, - structuredTableList, - formatHints, - nodeReaderFormat, - allowFormatFromTableAttribute); -} - -std::pair<TFormat, TMaybe<TSmallJobFile>> TFormatBuilder::CreateVoidFormat( - const IStructuredJob& /*job*/, - const EIODirection& /*direction*/, - const TStructuredJobTableList& /*structuredTableList*/, - const TMaybe<TFormatHints>& /*formatHints*/, - ENodeReaderFormat /*nodeReaderFormat*/, - bool /*allowFormatFromTableAttribute*/) -{ - return { - TFormat(), - Nothing() - }; -} - -std::pair<TFormat, TMaybe<TSmallJobFile>> TFormatBuilder::CreateYamrFormat( - const IStructuredJob& job, - const EIODirection& direction, - const TStructuredJobTableList& structuredTableList, - const TMaybe<TFormatHints>& /*formatHints*/, - ENodeReaderFormat /*nodeReaderFormat*/, - bool allowFormatFromTableAttribute) -{ - for (const auto& table: structuredTableList) { - if (!std::holds_alternative<TUnspecifiedTableStructure>(table.Description)) { - ythrow TApiUsageError() - << "cannot use " << direction << " table '" << JobTablePathString(table) - << "' with job " << TJobFactory::Get()->GetJobName(&job) << "; " - << "table has unsupported structure description; check " << GetAddIOMethodName(direction) << " for this table"; - } - } - TMaybe<TNode> formatFromTableAttributes; - if (allowFormatFromTableAttribute && OperationOptions_.UseTableFormats_) { - TVector<TRichYPath> tableList; - for (const auto& table: structuredTableList) { - Y_VERIFY(table.RichYPath, "Cannot use format from table for intermediate table"); - tableList.push_back(*table.RichYPath); - } - formatFromTableAttributes = GetTableFormats(ClientRetryPolicy_, Context_, TransactionId_, tableList); - } - if (formatFromTableAttributes) { - return { - TFormat(*formatFromTableAttributes), - Nothing() - }; - } else { - auto formatNode = TNode("yamr"); - formatNode.Attributes() = TNode() - ("lenval", true) - ("has_subkey", true) - ("enable_table_index", true); - return { - TFormat(formatNode), - Nothing() - }; - } -} - -std::pair<TFormat, TMaybe<TSmallJobFile>> TFormatBuilder::CreateNodeFormat( - const IStructuredJob& job, - const EIODirection& direction, - const TStructuredJobTableList& structuredTableList, - const TMaybe<TFormatHints>& formatHints, - ENodeReaderFormat nodeReaderFormat, - bool /*allowFormatFromTableAttribute*/) -{ - for (const auto& table: structuredTableList) { - if (!std::holds_alternative<TUnspecifiedTableStructure>(table.Description)) { - ythrow TApiUsageError() - << "cannot use " << direction << " table '" << JobTablePathString(table) - << "' with job " << TJobFactory::Get()->GetJobName(&job) << "; " - << "table has unsupported structure description; check AddInput<> / AddOutput<> for this table"; - } - } - NSkiff::TSkiffSchemaPtr skiffSchema = nullptr; - if (nodeReaderFormat != ENodeReaderFormat::Yson) { - TVector<TRichYPath> tableList; - for (const auto& table: structuredTableList) { - Y_VERIFY(table.RichYPath, "Cannot use skiff with temporary tables"); - tableList.emplace_back(*table.RichYPath); - } - skiffSchema = TryCreateSkiffSchema( - Context_, - ClientRetryPolicy_, - TransactionId_, - tableList, - OperationOptions_, - nodeReaderFormat); - } - if (skiffSchema) { - auto format = CreateSkiffFormat(skiffSchema); - NYT::NDetail::ApplyFormatHints<TNode>(&format, formatHints); - return { - CreateSkiffFormat(skiffSchema), - TSmallJobFile{ - TString("skiff") + GetSuffix(direction), - CreateSkiffConfig(skiffSchema) - } - }; - } else { - auto format = TFormat::YsonBinary(); - NYT::NDetail::ApplyFormatHints<TNode>(&format, formatHints); - return { - format, - Nothing() - }; - } -} - -[[noreturn]] static void ThrowUnsupportedStructureDescription( - const EIODirection& direction, - const TStructuredJobTable& table, - const IStructuredJob& job) -{ - ythrow TApiUsageError() - << "cannot use " << direction << " table '" << JobTablePathString(table) - << "' with job " << TJobFactory::Get()->GetJobName(&job) << "; " - << "table has unsupported structure description; check " << GetAddIOMethodName(direction) << " for this table"; -} - -[[noreturn]] static void ThrowTypeDeriveFail( - const EIODirection& direction, - const IStructuredJob& job, - const TString& type) -{ - ythrow TApiUsageError() - << "Cannot derive exact " << type << " type for intermediate " << direction << " table for job " - << TJobFactory::Get()->GetJobName(&job) - << "; use one of TMapReduceOperationSpec::Hint* methods to specifiy intermediate table structure"; -} - -[[noreturn]] static void ThrowUnexpectedDifferentDescriptors( - const EIODirection& direction, - const TStructuredJobTable& table, - const IStructuredJob& job, - const TMaybe<TStringBuf> jobDescriptorName, - const TMaybe<TStringBuf> descriptorName) -{ - ythrow TApiUsageError() - << "Job " << TJobFactory::Get()->GetJobName(&job) << " expects " - << jobDescriptorName << " as " << direction << ", but table " << JobTablePathString(table) - << " is tagged with " << descriptorName; -} - -std::pair<TFormat, TMaybe<TSmallJobFile>> TFormatBuilder::CreateProtobufFormat( - const IStructuredJob& job, - const EIODirection& direction, - const TStructuredJobTableList& structuredTableList, - const TMaybe<TFormatHints>& /*formatHints*/, - ENodeReaderFormat /*nodeReaderFormat*/, - bool /*allowFormatFromTableAttribute*/) -{ - if (Context_.Config->UseClientProtobuf) { - return { - TFormat::YsonBinary(), - TSmallJobFile{ - TString("proto") + GetSuffix(direction), - CreateProtoConfig({}), - }, - }; - } - const ::google::protobuf::Descriptor* const jobDescriptor = - std::get<TProtobufStructuredRowStream>(GetJobStreamDescription(job, direction)).Descriptor; - Y_ENSURE(!structuredTableList.empty(), - "empty " << direction << " tables for job " << TJobFactory::Get()->GetJobName(&job)); - - TVector<const ::google::protobuf::Descriptor*> descriptorList; - for (const auto& table : structuredTableList) { - const ::google::protobuf::Descriptor* descriptor = nullptr; - if (std::holds_alternative<TProtobufTableStructure>(table.Description)) { - descriptor = std::get<TProtobufTableStructure>(table.Description).Descriptor; - } else if (table.RichYPath) { - ThrowUnsupportedStructureDescription(direction, table, job); - } - if (!descriptor) { - // It must be intermediate table, because there is no proper way to add such table to spec - // (AddInput requires to specify proper message). - Y_VERIFY(!table.RichYPath, "Descriptors for all tables except intermediate must be known"); - if (jobDescriptor) { - descriptor = jobDescriptor; - } else { - ThrowTypeDeriveFail(direction, job, "protobuf"); - } - } - if (jobDescriptor && descriptor != jobDescriptor) { - ThrowUnexpectedDifferentDescriptors( - direction, - table, - job, - jobDescriptor->full_name(), - descriptor->full_name()); - } - descriptorList.push_back(descriptor); - } - Y_VERIFY(!descriptorList.empty(), "Messages for proto format are unknown (empty ProtoDescriptors)"); - return { - TFormat::Protobuf(descriptorList, Context_.Config->ProtobufFormatWithDescriptors), - TSmallJobFile{ - TString("proto") + GetSuffix(direction), - CreateProtoConfig(descriptorList) - }, - }; -} - -//////////////////////////////////////////////////////////////////////////////// - -struct TGetTableSchemaImpl -{ - template <typename T> - TMaybe<TTableSchema> operator() (const T& description) { - if constexpr (std::is_same_v<T, TUnspecifiedTableStructure>) { - return Nothing(); - } else if constexpr (std::is_same_v<T, TProtobufTableStructure>) { - if (!description.Descriptor) { - return Nothing(); - } - return CreateTableSchema(*description.Descriptor); - } else { - static_assert(TDependentFalse<T>, "unknown type"); - } - } -}; - -TMaybe<TTableSchema> GetTableSchema(const TTableStructure& tableStructure) -{ - return std::visit(TGetTableSchemaImpl(), tableStructure); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/structured_table_formats.h b/yt/cpp/mapreduce/client/structured_table_formats.h deleted file mode 100644 index 27d980c587..0000000000 --- a/yt/cpp/mapreduce/client/structured_table_formats.h +++ /dev/null @@ -1,146 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/fwd.h> - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/operation.h> - -#include <yt/cpp/mapreduce/common/fwd.h> - -#include <yt/cpp/mapreduce/http/context.h> -#include <yt/cpp/mapreduce/http/requests.h> - -#include <utility> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TMaybe<TNode> GetCommonTableFormat( - const TVector<TMaybe<TNode>>& formats); - -TMaybe<TNode> GetTableFormat( - const IClientRetryPolicyPtr& clientRetryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TRichYPath& path); - -TMaybe<TNode> GetTableFormats( - const IClientRetryPolicyPtr& clientRetryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TVector<TRichYPath>& paths); - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -enum class EIODirection -{ - Input, - Output, -}; - -//////////////////////////////////////////////////////////////////////////////// - -struct TSmallJobFile -{ - TString FileName; - TString Data; -}; - -//////////////////////////////////////////////////////////////////////////////// - -// Table that is used while preparing operation formats. Can be real table or intermediate -struct TStructuredJobTable -{ - TTableStructure Description; - // Might be null for intermediate tables in MapReduce operation - TMaybe<TRichYPath> RichYPath; - - static TStructuredJobTable Intermediate(TTableStructure description) - { - return TStructuredJobTable{std::move(description), Nothing()}; - } -}; -using TStructuredJobTableList = TVector<TStructuredJobTable>; -TString JobTablePathString(const TStructuredJobTable& jobTable); -TStructuredJobTableList ToStructuredJobTableList(const TVector<TStructuredTablePath>& tableList); - -TStructuredJobTableList CanonizeStructuredTableList(const TClientContext& context, const TVector<TStructuredTablePath>& tableList); -TVector<TRichYPath> GetPathList( - const TStructuredJobTableList& tableList, - const TMaybe<TVector<TTableSchema>>& schemaInferenceResult, - bool inferSchema); - -//////////////////////////////////////////////////////////////////////////////// - -class TFormatBuilder -{ -private: - struct TFormatSwitcher; - -public: - TFormatBuilder( - IClientRetryPolicyPtr clientRetryPolicy, - TClientContext context, - TTransactionId transactionId, - TOperationOptions operationOptions); - - std::pair<TFormat, TMaybe<TSmallJobFile>> CreateFormat( - const IStructuredJob& job, - const EIODirection& direction, - const TStructuredJobTableList& structuredTableList, - const TMaybe<TFormatHints>& formatHints, - ENodeReaderFormat nodeReaderFormat, - bool allowFormatFromTableAttribute); - - std::pair<TFormat, TMaybe<TSmallJobFile>> CreateVoidFormat( - const IStructuredJob& job, - const EIODirection& direction, - const TStructuredJobTableList& structuredTableList, - const TMaybe<TFormatHints>& formatHints, - ENodeReaderFormat nodeReaderFormat, - bool allowFormatFromTableAttribute); - - std::pair<TFormat, TMaybe<TSmallJobFile>> CreateYamrFormat( - const IStructuredJob& job, - const EIODirection& direction, - const TStructuredJobTableList& structuredTableList, - const TMaybe<TFormatHints>& formatHints, - ENodeReaderFormat nodeReaderFormat, - bool allowFormatFromTableAttribute); - - std::pair<TFormat, TMaybe<TSmallJobFile>> CreateNodeFormat( - const IStructuredJob& job, - const EIODirection& direction, - const TStructuredJobTableList& structuredTableList, - const TMaybe<TFormatHints>& formatHints, - ENodeReaderFormat nodeReaderFormat, - bool allowFormatFromTableAttribute); - - std::pair<TFormat, TMaybe<TSmallJobFile>> CreateProtobufFormat( - const IStructuredJob& job, - const EIODirection& direction, - const TStructuredJobTableList& structuredTableList, - const TMaybe<TFormatHints>& formatHints, - ENodeReaderFormat nodeReaderFormat, - bool allowFormatFromTableAttribute); - -private: - const IClientRetryPolicyPtr ClientRetryPolicy_; - const TClientContext Context_; - const TTransactionId TransactionId_; - const TOperationOptions OperationOptions_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -TMaybe<TTableSchema> GetTableSchema(const TTableStructure& tableStructure); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/transaction.cpp b/yt/cpp/mapreduce/client/transaction.cpp deleted file mode 100644 index 0aa1a7a1c3..0000000000 --- a/yt/cpp/mapreduce/client/transaction.cpp +++ /dev/null @@ -1,195 +0,0 @@ -#include "transaction.h" - -#include "transaction_pinger.h" - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/error_codes.h> - -#include <yt/cpp/mapreduce/common/wait_proxy.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> - -#include <yt/cpp/mapreduce/http/requests.h> -#include <yt/cpp/mapreduce/http/retry_request.h> - -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> - -#include <util/datetime/base.h> - -#include <util/generic/scope.h> - -#include <util/random/random.h> - -#include <util/string/builder.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TPingableTransaction::TPingableTransaction( - const IClientRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& parentId, - ITransactionPingerPtr transactionPinger, - const TStartTransactionOptions& options) - : ClientRetryPolicy_(retryPolicy) - , Context_(context) - , AbortableRegistry_(NDetail::TAbortableRegistry::Get()) - , AbortOnTermination_(true) - , AutoPingable_(options.AutoPingable_) - , Pinger_(std::move(transactionPinger)) -{ - auto transactionId = NDetail::NRawClient::StartTransaction( - ClientRetryPolicy_->CreatePolicyForGenericRequest(), - context, - parentId, - options); - - auto actualTimeout = options.Timeout_.GetOrElse(Context_.Config->TxTimeout); - Init(context, transactionId, actualTimeout); -} - -TPingableTransaction::TPingableTransaction( - const IClientRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - ITransactionPingerPtr transactionPinger, - const TAttachTransactionOptions& options) - : ClientRetryPolicy_(retryPolicy) - , Context_(context) - , AbortableRegistry_(NDetail::TAbortableRegistry::Get()) - , AbortOnTermination_(options.AbortOnTermination_) - , AutoPingable_(options.AutoPingable_) - , Pinger_(std::move(transactionPinger)) -{ - auto timeoutNode = NDetail::NRawClient::TryGet( - ClientRetryPolicy_->CreatePolicyForGenericRequest(), - context, - TTransactionId(), - "#" + GetGuidAsString(transactionId) + "/@timeout", - TGetOptions()); - if (timeoutNode.IsUndefined()) { - throw yexception() << "Transaction " << GetGuidAsString(transactionId) << " does not exist"; - } - auto timeout = TDuration::MilliSeconds(timeoutNode.AsInt64()); - Init(context, transactionId, timeout); -} - -void TPingableTransaction::Init( - const TClientContext& context, - const TTransactionId& transactionId, - TDuration timeout) -{ - TransactionId_ = transactionId; - - if (AbortOnTermination_) { - AbortableRegistry_->Add( - TransactionId_, - ::MakeIntrusive<NDetail::TTransactionAbortable>(context, TransactionId_)); - } - - if (AutoPingable_) { - // Compute 'MaxPingInterval_' and 'MinPingInterval_' such that 'pingInterval == (max + min) / 2'. - auto pingInterval = Context_.Config->PingInterval; - auto safeTimeout = timeout - TDuration::Seconds(5); - MaxPingInterval_ = Max(pingInterval, Min(safeTimeout, pingInterval * 1.5)); - MinPingInterval_ = pingInterval - (MaxPingInterval_ - pingInterval); - - Pinger_->RegisterTransaction(*this); - } -} - -TPingableTransaction::~TPingableTransaction() -{ - try { - Stop(AbortOnTermination_ ? EStopAction::Abort : EStopAction::Detach); - } catch (...) { - } -} - -const TTransactionId TPingableTransaction::GetId() const -{ - return TransactionId_; -} - -const std::pair<TDuration, TDuration> TPingableTransaction::GetPingInterval() const { - return {MinPingInterval_, MaxPingInterval_}; -} - -const TClientContext TPingableTransaction::GetContext() const { - return Context_; -} - -void TPingableTransaction::Commit() -{ - Stop(EStopAction::Commit); -} - -void TPingableTransaction::Abort() -{ - Stop(EStopAction::Abort); -} - -void TPingableTransaction::Detach() -{ - Stop(EStopAction::Detach); -} - -void TPingableTransaction::Stop(EStopAction action) -{ - if (Finalized_) { - return; - } - - Y_DEFER { - Finalized_ = true; - if (AutoPingable_ && Pinger_->HasTransaction(*this)) { - Pinger_->RemoveTransaction(*this); - } - }; - - switch (action) { - case EStopAction::Commit: - NDetail::NRawClient::CommitTransaction( - ClientRetryPolicy_->CreatePolicyForGenericRequest(), - Context_, - TransactionId_); - break; - case EStopAction::Abort: - NDetail::NRawClient::AbortTransaction( - ClientRetryPolicy_->CreatePolicyForGenericRequest(), - Context_, - TransactionId_); - break; - case EStopAction::Detach: - // Do nothing. - break; - } - - AbortableRegistry_->Remove(TransactionId_); -} - -//////////////////////////////////////////////////////////////////////////////// - -TYPath Snapshot( - const IClientRetryPolicyPtr& clientRetryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path) -{ - auto lockId = NDetail::NRawClient::Lock( - clientRetryPolicy->CreatePolicyForGenericRequest(), - context, - transactionId, - path, - ELockMode::LM_SNAPSHOT); - auto lockedNodeId = NDetail::NRawClient::Get( - clientRetryPolicy->CreatePolicyForGenericRequest(), - context, - transactionId, - ::TStringBuilder() << '#' << GetGuidAsString(lockId) << "/@node_id"); - return "#" + lockedNodeId.AsString(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/transaction.h b/yt/cpp/mapreduce/client/transaction.h deleted file mode 100644 index 559fca619e..0000000000 --- a/yt/cpp/mapreduce/client/transaction.h +++ /dev/null @@ -1,95 +0,0 @@ -#pragma once - -#include "abortable_registry.h" - -#include <yt/cpp/mapreduce/http/requests.h> -#include <yt/cpp/mapreduce/http/retry_request.h> - -#include <util/datetime/base.h> -#include <util/generic/maybe.h> -#include <util/generic/ptr.h> -#include <util/system/thread.h> - -#include <atomic> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TPingableTransaction -{ -public: - // - // Start a new transaction. - TPingableTransaction( - const IClientRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& parentId, - ITransactionPingerPtr transactionPinger, - const TStartTransactionOptions& options); - - // - // Attach to an existing transaction. - TPingableTransaction( - const IClientRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - ITransactionPingerPtr transactionPinger, - const TAttachTransactionOptions& options); - - ~TPingableTransaction(); - - const TTransactionId GetId() const; - - const std::pair<TDuration, TDuration> GetPingInterval() const; - const TClientContext GetContext() const; - - void Commit(); - void Abort(); - void Detach(); - - -private: - enum class EStopAction - { - Detach, - Abort, - Commit, - }; - -private: - IClientRetryPolicyPtr ClientRetryPolicy_; - TClientContext Context_; - TTransactionId TransactionId_; - TDuration MinPingInterval_; - TDuration MaxPingInterval_; - - // We have to own an IntrusivePtr to registry to prevent use-after-free. - ::TIntrusivePtr<NDetail::TAbortableRegistry> AbortableRegistry_; - - bool AbortOnTermination_; - - bool AutoPingable_; - bool Finalized_ = false; - ITransactionPingerPtr Pinger_; - -private: - void Init( - const TClientContext& context, - const TTransactionId& transactionId, - TDuration timeout); - - void Stop(EStopAction action); -}; - -//////////////////////////////////////////////////////////////////////////////// - -TYPath Snapshot( - const IClientRetryPolicyPtr& clientRetryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/transaction_pinger.cpp b/yt/cpp/mapreduce/client/transaction_pinger.cpp deleted file mode 100644 index 2b51e47f9f..0000000000 --- a/yt/cpp/mapreduce/client/transaction_pinger.cpp +++ /dev/null @@ -1,321 +0,0 @@ -#include "transaction_pinger.h" - -#include "transaction.h" - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/error_codes.h> -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <yt/cpp/mapreduce/common/wait_proxy.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> - -#include <yt/cpp/mapreduce/http/requests.h> -#include <yt/cpp/mapreduce/http/retry_request.h> - -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> - -#if defined(__x86_64__) || defined(__arm64__) - #include <yt/yt/core/concurrency/periodic_executor.h> - #include <yt/yt/core/concurrency/poller.h> - #include <yt/yt/core/concurrency/scheduler_api.h> - #include <yt/yt/core/concurrency/thread_pool_poller.h> - #include <yt/yt/core/concurrency/thread_pool.h> - - #include <yt/yt/core/http/client.h> - #include <yt/yt/core/http/http.h> -#endif // defined(__x86_64__) || defined(__arm64__) - -#include <library/cpp/yson/node/node_io.h> - -#include <library/cpp/yt/threading/spin_lock.h> -#include <library/cpp/yt/assert/assert.h> - -#include <util/datetime/base.h> -#include <util/random/random.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__x86_64__) || defined(__arm64__) - -namespace { - -//////////////////////////////////////////////////////////////////////////////// - -void CheckError(const TString& requestId, NHttp::IResponsePtr response) -{ - TErrorResponse errorResponse(static_cast<int>(response->GetStatusCode()), requestId); - - if (const auto* ytError = response->GetHeaders()->Find("X-YT-Error")) { - errorResponse.ParseFromJsonError(*ytError); - } - if (errorResponse.IsOk()) { - return; - } - - YT_LOG_ERROR("RSP %v - HTTP %v - %v", - requestId, - response->GetStatusCode(), - errorResponse.AsStrBuf()); - - ythrow errorResponse; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace - -void PingTx(NHttp::IClientPtr httpClient, const TPingableTransaction& tx) -{ - auto url = TString::Join("http://", tx.GetContext().ServerName, "/api/", tx.GetContext().Config->ApiVersion, "/ping_tx"); - auto headers = New<NHttp::THeaders>(); - auto requestId = CreateGuidAsString(); - - headers->Add("Host", url); - headers->Add("User-Agent", TProcessState::Get()->ClientVersion); - - const auto& token = tx.GetContext().Token; - if (!token.empty()) { - headers->Add("Authorization", "OAuth " + token); - } - - headers->Add("Transfer-Encoding", "chunked"); - headers->Add("X-YT-Correlation-Id", requestId); - headers->Add("X-YT-Header-Format", "<format=text>yson"); - headers->Add("Content-Encoding", "identity"); - headers->Add("Accept-Encoding", "identity"); - - TNode node; - node["transaction_id"] = GetGuidAsString(tx.GetId()); - auto strParams = NodeToYsonString(node); - - YT_LOG_DEBUG("REQ %v - sending request (HostName: %v; Method POST %v; X-YT-Parameters (sent in body): %v)", - requestId, - tx.GetContext().ServerName, - url, - strParams - ); - - auto response = NConcurrency::WaitFor(httpClient->Post(url, TSharedRef::FromString(strParams), headers)).ValueOrThrow(); - CheckError(requestId, response); - - YT_LOG_DEBUG("RSP %v - received response %v bytes. (%v)", - requestId, - response->ReadAll().size(), - strParams); -} - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - -class TSharedTransactionPinger - : public ITransactionPinger -{ -public: - TSharedTransactionPinger(NHttp::IClientPtr httpClient, int poolThreadCount) - : PingerPool_(NConcurrency::CreateThreadPool( - poolThreadCount, "tx_pinger_pool")) - , HttpClient_(std::move(httpClient)) - { } - - ~TSharedTransactionPinger() override - { - PingerPool_->Shutdown(); - } - - ITransactionPingerPtr GetChildTxPinger() override - { - return this; - } - - void RegisterTransaction(const TPingableTransaction& pingableTx) override - { - auto [minPingInterval, maxPingInterval] = pingableTx.GetPingInterval(); - auto pingInterval = (minPingInterval + maxPingInterval) / 2; - double jitter = (maxPingInterval - pingInterval) / pingInterval; - - auto opts = NConcurrency::TPeriodicExecutorOptions{pingInterval, pingInterval, jitter}; - auto periodic = std::make_shared<NConcurrency::TPeriodicExecutorPtr>(nullptr); - // Have to use weak_ptr in order to break reference cycle - // This weak_ptr holds pointer to periodic, which will contain this lambda - // Also we consider that lifetime of this lambda is no longer than lifetime of pingableTx - // because every pingableTx have to call RemoveTransaction before it is destroyed - auto pingRoutine = BIND([this, &pingableTx, periodic = std::weak_ptr{periodic}] { - auto strong_ptr = periodic.lock(); - YT_VERIFY(strong_ptr); - DoPingTransaction(pingableTx, *strong_ptr); - }); - *periodic = New<NConcurrency::TPeriodicExecutor>(PingerPool_->GetInvoker(), pingRoutine, opts); - (*periodic)->Start(); - - auto guard = Guard(SpinLock_); - YT_VERIFY(!Transactions_.contains(pingableTx.GetId())); - Transactions_[pingableTx.GetId()] = std::move(periodic); - } - - bool HasTransaction(const TPingableTransaction& pingableTx) override - { - auto guard = Guard(SpinLock_); - return Transactions_.contains(pingableTx.GetId()); - } - - - void RemoveTransaction(const TPingableTransaction& pingableTx) override - { - std::shared_ptr<NConcurrency::TPeriodicExecutorPtr> periodic; - { - auto guard = Guard(SpinLock_); - - auto it = Transactions_.find(pingableTx.GetId()); - - YT_VERIFY(it != Transactions_.end()); - - periodic = std::move(it->second); - Transactions_.erase(it); - } - NConcurrency::WaitUntilSet((*periodic)->Stop()); - } - -private: - void DoPingTransaction(const TPingableTransaction& pingableTx, - NConcurrency::TPeriodicExecutorPtr periodic) - { - try { - PingTx(HttpClient_, pingableTx); - } catch (const std::exception& e) { - if (auto* errorResponse = dynamic_cast<const TErrorResponse*>(&e)) { - if (errorResponse->GetError().ContainsErrorCode(NYT::NClusterErrorCodes::NTransactionClient::NoSuchTransaction)) { - YT_UNUSED_FUTURE(periodic->Stop()); - } else if (errorResponse->GetError().ContainsErrorCode(NYT::NClusterErrorCodes::Timeout)) { - periodic->ScheduleOutOfBand(); - } - } - } - } - - -private: - YT_DECLARE_SPIN_LOCK(NThreading::TSpinLock, SpinLock_); - THashMap<TTransactionId, std::shared_ptr<NConcurrency::TPeriodicExecutorPtr>> Transactions_; - - NConcurrency::IThreadPoolPtr PingerPool_; - NHttp::IClientPtr HttpClient_; -}; - -#endif // defined(__x86_64__) || defined(__arm64__) - -//////////////////////////////////////////////////////////////////////////////// - -class TThreadPerTransactionPinger - : public ITransactionPinger -{ -public: - ~TThreadPerTransactionPinger() override - { - if (Running_) { - RemoveTransaction(*PingableTx_); - } - } - - ITransactionPingerPtr GetChildTxPinger() override - { - return MakeIntrusive<TThreadPerTransactionPinger>(); - } - - void RegisterTransaction(const TPingableTransaction& pingableTx) override - { - YT_VERIFY(!Running_); - YT_VERIFY(PingableTx_ == nullptr); - - PingableTx_ = &pingableTx; - Running_ = true; - - PingerThread_ = MakeHolder<TThread>( - TThread::TParams{Pinger, this}.SetName("pingable_tx")); - PingerThread_->Start(); - } - - bool HasTransaction(const TPingableTransaction& pingableTx) override - { - return PingableTx_ == &pingableTx && Running_; - } - - void RemoveTransaction(const TPingableTransaction& pingableTx) override - { - YT_VERIFY(HasTransaction(pingableTx)); - - Running_ = false; - if (PingerThread_) { - PingerThread_->Join(); - } - } - -private: - static void* Pinger(void* opaque) - { - static_cast<TThreadPerTransactionPinger*>(opaque)->Pinger(); - return nullptr; - } - - void Pinger() - { - auto [minPingInterval, maxPingInterval] = PingableTx_->GetPingInterval(); - while (Running_) { - TDuration waitTime = minPingInterval + (maxPingInterval - minPingInterval) * RandomNumber<float>(); - try { - auto noRetryPolicy = MakeIntrusive<TAttemptLimitedRetryPolicy>(1u, PingableTx_->GetContext().Config); - NDetail::NRawClient::PingTx(noRetryPolicy, PingableTx_->GetContext(), PingableTx_->GetId()); - } catch (const std::exception& e) { - if (auto* errorResponse = dynamic_cast<const TErrorResponse*>(&e)) { - if (errorResponse->GetError().ContainsErrorCode(NYT::NClusterErrorCodes::NTransactionClient::NoSuchTransaction)) { - break; - } else if (errorResponse->GetError().ContainsErrorCode(NYT::NClusterErrorCodes::Timeout)) { - waitTime = TDuration::MilliSeconds(0); - } - } - // Else do nothing, going to retry this error. - } - - TInstant t = Now(); - while (Running_ && Now() - t < waitTime) { - NDetail::TWaitProxy::Get()->Sleep(TDuration::MilliSeconds(100)); - } - } - } - -private: - const TPingableTransaction* PingableTx_ = nullptr; - - std::atomic<bool> Running_ = false; - THolder<TThread> PingerThread_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -ITransactionPingerPtr CreateTransactionPinger(const TConfigPtr& config) -{ - if (config->UseAsyncTxPinger) { -// TODO(aleexfi): Remove it after YT-17689 -#if defined(__x86_64__) || defined(__arm64__) - YT_LOG_DEBUG("Using async transaction pinger"); - auto httpClientConfig = NYT::New<NHttp::TClientConfig>(); - httpClientConfig->MaxIdleConnections = 16; - auto httpPoller = NConcurrency::CreateThreadPoolPoller( - config->AsyncHttpClientThreads, - "tx_http_client_poller"); - auto httpClient = NHttp::CreateClient(std::move(httpClientConfig), std::move(httpPoller)); - - return MakeIntrusive<TSharedTransactionPinger>( - std::move(httpClient), - config->AsyncTxPingerPoolThreads); -#else - YT_LOG_WARNING("Async transaction pinger is not supported on your platform. Fallback to TThreadPerTransactionPinger..."); -#endif // defined(__x86_64__) || defined(__arm64__) - } - return MakeIntrusive<TThreadPerTransactionPinger>(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/transaction_pinger.h b/yt/cpp/mapreduce/client/transaction_pinger.h deleted file mode 100644 index 98e8b5cb2f..0000000000 --- a/yt/cpp/mapreduce/client/transaction_pinger.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/common/fwd.h> - -#include <yt/cpp/mapreduce/http/requests.h> - -#include <util/generic/ptr.h> -#include <util/system/thread.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TPingableTransaction; - -//////////////////////////////////////////////////////////////////////////////// - -// Each registered transaction must be removed from pinger -// (using RemoveTransaction) before it is destroyed -class ITransactionPinger - : public TThrRefBase -{ -public: - virtual ~ITransactionPinger() = default; - - virtual ITransactionPingerPtr GetChildTxPinger() = 0; - - virtual void RegisterTransaction(const TPingableTransaction& pingableTx) = 0; - - virtual bool HasTransaction(const TPingableTransaction& pingableTx) = 0; - - virtual void RemoveTransaction(const TPingableTransaction& pingableTx) = 0; -}; - -ITransactionPingerPtr CreateTransactionPinger(const TConfigPtr& config); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/ya.make b/yt/cpp/mapreduce/client/ya.make deleted file mode 100644 index a1b3b4da69..0000000000 --- a/yt/cpp/mapreduce/client/ya.make +++ /dev/null @@ -1,75 +0,0 @@ -LIBRARY() - -INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc) - -SRCS( - abortable_registry.cpp - batch_request_impl.cpp - client_reader.cpp - client_writer.cpp - client.cpp - file_reader.cpp - file_writer.cpp - format_hints.cpp - init.cpp - lock.cpp - operation_helpers.cpp - operation_preparer.cpp - operation_tracker.cpp - operation.cpp - prepare_operation.cpp - py_helpers.cpp - retry_heavy_write_request.cpp - retryful_writer.cpp - retryless_writer.cpp - skiff.cpp - structured_table_formats.cpp - transaction.cpp - transaction_pinger.cpp - yt_poller.cpp -) - -PEERDIR( - library/cpp/digest/md5 - library/cpp/sighandler - library/cpp/threading/blocking_queue - library/cpp/threading/future - library/cpp/type_info - library/cpp/yson - yt/cpp/mapreduce/common - yt/cpp/mapreduce/http - yt/cpp/mapreduce/interface - yt/cpp/mapreduce/io - yt/cpp/mapreduce/library/table_schema - yt/cpp/mapreduce/raw_client -) - -IF (ARCH_X86_64 OR OS_DARWIN) - PEERDIR( - yt/yt/core - yt/yt/core/http - ) -ELSE() - # Suppress yamaker's WBadIncl error on exotic platforms - PEERDIR( - yt/yt_proto/yt/core - ) -ENDIF() - -IF (BUILD_TYPE == "PROFILE") - PEERDIR( - yt/yt/library/ytprof - ) - - SRCS( - job_profiler.cpp - ) -ELSE() - SRCS( - dummy_job_profiler.cpp - ) -ENDIF() - -GENERATE_ENUM_SERIALIZATION(structured_table_formats.h) - -END() diff --git a/yt/cpp/mapreduce/client/yt_poller.cpp b/yt/cpp/mapreduce/client/yt_poller.cpp deleted file mode 100644 index e0bea1690e..0000000000 --- a/yt/cpp/mapreduce/client/yt_poller.cpp +++ /dev/null @@ -1,132 +0,0 @@ -#include "yt_poller.h" - -#include <yt/cpp/mapreduce/raw_client/raw_batch_request.h> -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> - -#include <yt/cpp/mapreduce/common/debug_metrics.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> -#include <yt/cpp/mapreduce/common/wait_proxy.h> - -#include <yt/cpp/mapreduce/http/retry_request.h> - -#include <yt/cpp/mapreduce/interface/config.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -namespace NYT { -namespace NDetail { - -using namespace NRawClient; - -//////////////////////////////////////////////////////////////////////////////// - -TYtPoller::TYtPoller( - TClientContext context, - const IClientRetryPolicyPtr& retryPolicy) - : Context_(std::move(context)) - , ClientRetryPolicy_(retryPolicy) - , WaiterThread_(&TYtPoller::WatchLoopProc, this) -{ - WaiterThread_.Start(); -} - -TYtPoller::~TYtPoller() -{ - Stop(); -} - -void TYtPoller::Watch(IYtPollerItemPtr item) -{ - auto g = Guard(Lock_); - Pending_.emplace_back(std::move(item)); - HasData_.Signal(); -} - - -void TYtPoller::Stop() -{ - { - auto g = Guard(Lock_); - if (!IsRunning_) { - return; - } - IsRunning_ = false; - HasData_.Signal(); - } - WaiterThread_.Join(); -} - -void TYtPoller::DiscardQueuedItems() -{ - for (auto& item : Pending_) { - item->OnItemDiscarded(); - } - for (auto& item : InProgress_) { - item->OnItemDiscarded(); - } -} - -void TYtPoller::WatchLoop() -{ - TInstant nextRequest = TInstant::Zero(); - while (true) { - { - auto g = Guard(Lock_); - if (IsRunning_ && Pending_.empty() && InProgress_.empty()) { - TWaitProxy::Get()->WaitCondVar(HasData_, Lock_); - } - - if (!IsRunning_) { - DiscardQueuedItems(); - return; - } - - { - auto ug = Unguard(Lock_); // allow adding new items into Pending_ - TWaitProxy::Get()->SleepUntil(nextRequest); - nextRequest = TInstant::Now() + Context_.Config->WaitLockPollInterval; - } - if (!Pending_.empty()) { - InProgress_.splice(InProgress_.end(), Pending_); - } - Y_VERIFY(!InProgress_.empty()); - } - - TRawBatchRequest rawBatchRequest(Context_.Config); - - for (auto& item : InProgress_) { - item->PrepareRequest(&rawBatchRequest); - } - - try { - ExecuteBatch(ClientRetryPolicy_->CreatePolicyForGenericRequest(), Context_, rawBatchRequest); - } catch (const std::exception& ex) { - YT_LOG_ERROR("Exception while executing batch request: %v", ex.what()); - } - - for (auto it = InProgress_.begin(); it != InProgress_.end();) { - auto& item = *it; - - IYtPollerItem::EStatus status = item->OnRequestExecuted(); - - if (status == IYtPollerItem::PollBreak) { - it = InProgress_.erase(it); - } else { - ++it; - } - } - - IncDebugMetric(TStringBuf("yt_poller_top_loop_repeat_count")); - } -} - -void* TYtPoller::WatchLoopProc(void* data) -{ - static_cast<TYtPoller*>(data)->WatchLoop(); - return nullptr; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/client/yt_poller.h b/yt/cpp/mapreduce/client/yt_poller.h deleted file mode 100644 index 4f4e9eb7ab..0000000000 --- a/yt/cpp/mapreduce/client/yt_poller.h +++ /dev/null @@ -1,86 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/common/fwd.h> - -#include <yt/cpp/mapreduce/http/context.h> -#include <yt/cpp/mapreduce/http/requests.h> - -#include <yt/cpp/mapreduce/interface/client.h> - -#include <util/generic/list.h> -#include <util/system/mutex.h> -#include <util/system/thread.h> -#include <util/system/condvar.h> - -namespace NYT { -namespace NDetail { - -namespace NRawClient { - class TRawBatchRequest; -} - -//////////////////////////////////////////////////////////////////////////////// - -class IYtPollerItem - : public TThrRefBase -{ -public: - enum EStatus { - PollContinue, - PollBreak, - }; - -public: - virtual ~IYtPollerItem() = default; - - virtual void PrepareRequest(NRawClient::TRawBatchRequest* batchRequest) = 0; - - // Should return PollContinue if poller should continue polling this item. - // Should return PollBreak if poller should stop polling this item. - virtual EStatus OnRequestExecuted() = 0; - - virtual void OnItemDiscarded() = 0; - -}; -using IYtPollerItemPtr = ::TIntrusivePtr<IYtPollerItem>; - -//////////////////////////////////////////////////////////////////////////////// - -class TYtPoller - : public TThrRefBase -{ -public: - TYtPoller(TClientContext context, const IClientRetryPolicyPtr& retryPolicy); - ~TYtPoller(); - - void Watch(IYtPollerItemPtr item); - - void Stop(); - -private: - void DiscardQueuedItems(); - - void WatchLoop(); - static void* WatchLoopProc(void*); - -private: - struct TItem; - - const TClientContext Context_; - const IClientRetryPolicyPtr ClientRetryPolicy_; - - - TList<IYtPollerItemPtr> InProgress_; - TList<IYtPollerItemPtr> Pending_; - - TThread WaiterThread_; - TMutex Lock_; - TCondVar HasData_; - - bool IsRunning_ = true; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/common/debug_metrics.cpp b/yt/cpp/mapreduce/common/debug_metrics.cpp deleted file mode 100644 index 6235e55f7e..0000000000 --- a/yt/cpp/mapreduce/common/debug_metrics.cpp +++ /dev/null @@ -1,62 +0,0 @@ -#include "debug_metrics.h" - -#include <util/generic/hash.h> -#include <util/generic/singleton.h> - -#include <util/string/cast.h> -#include <util/system/mutex.h> - -namespace NYT { -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -class TDebugMetrics { -public: - static TDebugMetrics& Get() - { - return *Singleton<TDebugMetrics>(); - } - - void Inc(TStringBuf name) - { - auto g = Guard(Lock_); - auto it = Metrics_.find(name); - if (it == Metrics_.end()) { - it = Metrics_.emplace(ToString(name), 0).first; - } - ++it->second; - } - - ui64 Get(TStringBuf name) const - { - auto g = Guard(Lock_); - auto it = Metrics_.find(name); - if (it == Metrics_.end()) { - return 0; - } else { - return it->second; - } - } - -private: - TMutex Lock_; - THashMap<TString, ui64> Metrics_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -void IncDebugMetricImpl(TStringBuf name) -{ - TDebugMetrics::Get().Inc(name); -} - -ui64 GetDebugMetric(TStringBuf name) -{ - return TDebugMetrics::Get().Get(name); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/common/debug_metrics.h b/yt/cpp/mapreduce/common/debug_metrics.h deleted file mode 100644 index 6ebbc89f72..0000000000 --- a/yt/cpp/mapreduce/common/debug_metrics.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/config.h> - -#include <util/generic/strbuf.h> - -namespace NYT { -namespace NDetail { - -void IncDebugMetricImpl(TStringBuf name); - -// Helper functions that allows to track various events inside YT library, useful for testing. -inline void IncDebugMetric(TStringBuf name) -{ - if (TConfig::Get()->EnableDebugMetrics) { - IncDebugMetricImpl(name); - } -} -ui64 GetDebugMetric(TStringBuf name); - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/common/fwd.h b/yt/cpp/mapreduce/common/fwd.h deleted file mode 100644 index a195e727be..0000000000 --- a/yt/cpp/mapreduce/common/fwd.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -#include <util/generic/fwd.h> - -namespace NYT { - class IRequestRetryPolicy; - using IRequestRetryPolicyPtr = ::TIntrusivePtr<IRequestRetryPolicy>; - - class IClientRetryPolicy; - using IClientRetryPolicyPtr = ::TIntrusivePtr<IClientRetryPolicy>; -} diff --git a/yt/cpp/mapreduce/common/helpers.cpp b/yt/cpp/mapreduce/common/helpers.cpp deleted file mode 100644 index 95924d812c..0000000000 --- a/yt/cpp/mapreduce/common/helpers.cpp +++ /dev/null @@ -1,126 +0,0 @@ -#include "helpers.h" - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/serialize.h> -#include <yt/cpp/mapreduce/interface/fluent.h> - -#include <library/cpp/yson/node/node_builder.h> -#include <library/cpp/yson/node/node_visitor.h> - -#include <library/cpp/yson/parser.h> -#include <library/cpp/yson/writer.h> - -#include <library/cpp/json/json_reader.h> -#include <library/cpp/json/json_value.h> - -#include <util/stream/input.h> -#include <util/stream/output.h> -#include <util/stream/str.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TString NodeListToYsonString(const TNode::TListType& nodes) -{ - TStringStream stream; - ::NYson::TYsonWriter writer(&stream, NYson::EYsonFormat::Binary, ::NYson::EYsonType::ListFragment); - auto list = BuildYsonListFluently(&writer); - for (const auto& node : nodes) { - list.Item().Value(node); - } - return stream.Str(); -} - -TNode PathToNode(const TRichYPath& path) -{ - TNode result; - TNodeBuilder builder(&result); - Serialize(path, &builder); - return result; -} - -TNode PathToParamNode(const TRichYPath& path) -{ - return TNode()("path", PathToNode(path)); -} - -TString AttributesToYsonString(const TNode& node) -{ - return BuildYsonStringFluently().BeginMap() - .Item("attributes").Value(node) - .EndMap(); -} - -TString AttributeFilterToYsonString(const TAttributeFilter& filter) -{ - return BuildYsonStringFluently().BeginMap() - .Item("attributes").Value(filter) - .EndMap(); -} - -TNode NodeFromTableSchema(const TTableSchema& schema) -{ - TNode result; - TNodeBuilder builder(&result); - Serialize(schema, &builder); - return result; -} - -void MergeNodes(TNode& dst, const TNode& src) -{ - if (dst.IsMap() && src.IsMap()) { - auto& dstMap = dst.AsMap(); - const auto& srcMap = src.AsMap(); - for (const auto& srcItem : srcMap) { - const auto& key = srcItem.first; - auto dstItem = dstMap.find(key); - if (dstItem != dstMap.end()) { - MergeNodes(dstItem->second, srcItem.second); - } else { - dstMap[key] = srcItem.second; - } - } - } else { - if (dst.GetType() == src.GetType() && src.HasAttributes()) { - auto attributes = dst.GetAttributes(); - MergeNodes(attributes, src.GetAttributes()); - dst = src; - dst.Attributes() = attributes; - } else { - dst = src; - } - } -} - -TYPath AddPathPrefix(const TYPath& path, const TString& pathPrefix) -{ - if (path.StartsWith("//") || path.StartsWith("#")) { - return path; - } - return pathPrefix + path; -} - -TString GetWriteTableCommand(const TString& apiVersion) -{ - return apiVersion == "v2" ? "write" : "write_table"; -} - -TString GetReadTableCommand(const TString& apiVersion) -{ - return apiVersion == "v2" ? "read" : "read_table"; -} - -TString GetWriteFileCommand(const TString& apiVersion) -{ - return apiVersion == "v2" ? "upload" : "write_file"; -} - -TString GetReadFileCommand(const TString& apiVersion) -{ - return apiVersion == "v2" ? "download" : "read_file"; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/common/helpers.h b/yt/cpp/mapreduce/common/helpers.h deleted file mode 100644 index 2174ba820b..0000000000 --- a/yt/cpp/mapreduce/common/helpers.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include "fwd.h" - -#include <library/cpp/yson/node/node_io.h> // backward compatibility - -#include <yt/cpp/mapreduce/interface/node.h> -#include <yt/cpp/mapreduce/interface/common.h> -#include <library/cpp/yson/public.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TString NodeListToYsonString(const TNode::TListType& nodes); - -TNode PathToNode(const TRichYPath& path); -TNode PathToParamNode(const TRichYPath& path); - -TString AttributesToYsonString(const TNode& attributes); - -TString AttributeFilterToYsonString(const TAttributeFilter& filter); - -TNode NodeFromTableSchema(const TTableSchema& schema); - -void MergeNodes(TNode& dst, const TNode& src); - -TYPath AddPathPrefix(const TYPath& path, const TString& pathPrefix); - -TString GetWriteTableCommand(const TString& apiVersion); -TString GetReadTableCommand(const TString& apiVersion); -TString GetWriteFileCommand(const TString& apiVersion); -TString GetReadFileCommand(const TString& apiVersion); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/common/node_builder.h b/yt/cpp/mapreduce/common/node_builder.h deleted file mode 100644 index c7f731cf09..0000000000 --- a/yt/cpp/mapreduce/common/node_builder.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -// Backward compatibility. -#include <library/cpp/yson/node/node_builder.h> diff --git a/yt/cpp/mapreduce/common/node_visitor.h b/yt/cpp/mapreduce/common/node_visitor.h deleted file mode 100644 index a8bde52b5a..0000000000 --- a/yt/cpp/mapreduce/common/node_visitor.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -// Backward compatibility. -#include <library/cpp/yson/node/node_visitor.h> diff --git a/yt/cpp/mapreduce/common/retry_lib.cpp b/yt/cpp/mapreduce/common/retry_lib.cpp deleted file mode 100644 index cf2c021eb4..0000000000 --- a/yt/cpp/mapreduce/common/retry_lib.cpp +++ /dev/null @@ -1,267 +0,0 @@ -#include "retry_lib.h" - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/errors.h> -#include <yt/cpp/mapreduce/interface/error_codes.h> -#include <yt/cpp/mapreduce/interface/retry_policy.h> - -#include <util/string/builder.h> -#include <util/generic/set.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TAttemptLimitedRetryPolicy::TAttemptLimitedRetryPolicy(ui32 attemptLimit, const TConfigPtr& config) - : Config_(config) - , AttemptLimit_(attemptLimit) -{ } - -void TAttemptLimitedRetryPolicy::NotifyNewAttempt() -{ - ++Attempt_; -} - -TMaybe<TDuration> TAttemptLimitedRetryPolicy::OnGenericError(const std::exception& e) -{ - if (IsAttemptLimitExceeded()) { - return Nothing(); - } - return GetBackoffDuration(e, Config_); -} - -TMaybe<TDuration> TAttemptLimitedRetryPolicy::OnRetriableError(const TErrorResponse& e) -{ - if (IsAttemptLimitExceeded()) { - return Nothing(); - } - return GetBackoffDuration(e, Config_); -} - -void TAttemptLimitedRetryPolicy::OnIgnoredError(const TErrorResponse& /*e*/) -{ - --Attempt_; -} - -TString TAttemptLimitedRetryPolicy::GetAttemptDescription() const -{ - return ::TStringBuilder() << "attempt " << Attempt_ << " of " << AttemptLimit_; -} - -bool TAttemptLimitedRetryPolicy::IsAttemptLimitExceeded() const -{ - return Attempt_ >= AttemptLimit_; -} -//////////////////////////////////////////////////////////////////////////////// - -class TTimeLimitedRetryPolicy - : public IRequestRetryPolicy -{ -public: - TTimeLimitedRetryPolicy(IRequestRetryPolicyPtr retryPolicy, TDuration timeout) - : RetryPolicy_(retryPolicy) - , Deadline_(TInstant::Now() + timeout) - , Timeout_(timeout) - { } - void NotifyNewAttempt() override - { - if (TInstant::Now() >= Deadline_) { - ythrow TRequestRetriesTimeout() << "retry timeout exceeded (timeout: " << Timeout_ << ")"; - } - RetryPolicy_->NotifyNewAttempt(); - } - - TMaybe<TDuration> OnGenericError(const std::exception& e) override - { - return RetryPolicy_->OnGenericError(e); - } - - TMaybe<TDuration> OnRetriableError(const TErrorResponse& e) override - { - return RetryPolicy_->OnRetriableError(e); - } - - void OnIgnoredError(const TErrorResponse& e) override - { - return RetryPolicy_->OnIgnoredError(e); - } - - TString GetAttemptDescription() const override - { - return RetryPolicy_->GetAttemptDescription(); - } - -private: - const IRequestRetryPolicyPtr RetryPolicy_; - const TInstant Deadline_; - const TDuration Timeout_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TDefaultClientRetryPolicy - : public IClientRetryPolicy -{ -public: - explicit TDefaultClientRetryPolicy(IRetryConfigProviderPtr retryConfigProvider, const TConfigPtr& config) - : RetryConfigProvider_(std::move(retryConfigProvider)) - , Config_(config) - { } - - IRequestRetryPolicyPtr CreatePolicyForGenericRequest() override - { - return Wrap(CreateDefaultRequestRetryPolicy(Config_)); - } - - IRequestRetryPolicyPtr CreatePolicyForStartOperationRequest() override - { - return Wrap(MakeIntrusive<TAttemptLimitedRetryPolicy>(static_cast<ui32>(Config_->StartOperationRetryCount), Config_)); - } - - IRequestRetryPolicyPtr Wrap(IRequestRetryPolicyPtr basePolicy) - { - auto config = RetryConfigProvider_->CreateRetryConfig(); - if (config.RetriesTimeLimit < TDuration::Max()) { - return ::MakeIntrusive<TTimeLimitedRetryPolicy>(std::move(basePolicy), config.RetriesTimeLimit); - } - return basePolicy; - } - -private: - IRetryConfigProviderPtr RetryConfigProvider_; - const TConfigPtr Config_; -}; - -class TDefaultRetryConfigProvider - : public IRetryConfigProvider -{ -public: - TRetryConfig CreateRetryConfig() override - { - return {}; - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -IRequestRetryPolicyPtr CreateDefaultRequestRetryPolicy(const TConfigPtr& config) -{ - return MakeIntrusive<TAttemptLimitedRetryPolicy>(static_cast<ui32>(config->RetryCount), config); -} - -IClientRetryPolicyPtr CreateDefaultClientRetryPolicy(IRetryConfigProviderPtr retryConfigProvider, const TConfigPtr& config) -{ - return MakeIntrusive<TDefaultClientRetryPolicy>(std::move(retryConfigProvider), config); -} -IRetryConfigProviderPtr CreateDefaultRetryConfigProvider() -{ - return MakeIntrusive<TDefaultRetryConfigProvider>(); -} - -//////////////////////////////////////////////////////////////////////////////// - -static bool IsChunkError(int code) -{ - return code / 100 == 7; -} - -// Check whether: -// 1) codes contain at least one chunk error AND -// 2) codes don't contain non-retriable chunk errors. -static bool IsRetriableChunkError(const TSet<int>& codes) -{ - using namespace NClusterErrorCodes; - auto isChunkError = false; - for (auto code : codes) { - switch (code) { - case NChunkClient::SessionAlreadyExists: - case NChunkClient::ChunkAlreadyExists: - case NChunkClient::WindowError: - case NChunkClient::BlockContentMismatch: - case NChunkClient::InvalidBlockChecksum: - case NChunkClient::BlockOutOfRange: - case NChunkClient::MissingExtension: - case NChunkClient::NoSuchBlock: - case NChunkClient::NoSuchChunk: - case NChunkClient::NoSuchChunkList: - case NChunkClient::NoSuchChunkTree: - case NChunkClient::NoSuchChunkView: - case NChunkClient::NoSuchMedium: - return false; - default: - isChunkError |= IsChunkError(code); - break; - } - } - return isChunkError; -} - -static TMaybe<TDuration> TryGetBackoffDuration(const TErrorResponse& errorResponse, const TConfigPtr& config) -{ - int httpCode = errorResponse.GetHttpCode(); - if (httpCode / 100 != 4 && !errorResponse.IsFromTrailers()) { - return config->RetryInterval; - } - - auto allCodes = errorResponse.GetError().GetAllErrorCodes(); - using namespace NClusterErrorCodes; - if (httpCode == 429 - || allCodes.count(NSecurityClient::RequestQueueSizeLimitExceeded) - || allCodes.count(NRpc::RequestQueueSizeLimitExceeded)) - { - // request rate limit exceeded - return config->RateLimitExceededRetryInterval; - } - if (errorResponse.IsConcurrentOperationsLimitReached()) { - // limit for the number of concurrent operations exceeded - return config->StartOperationRetryInterval; - } - if (IsRetriableChunkError(allCodes)) { - // chunk client errors - return config->ChunkErrorsRetryInterval; - } - for (auto code : TVector<int>{ - NRpc::TransportError, - NRpc::Unavailable, - NApi::RetriableArchiveError, - Canceled, - }) { - if (allCodes.contains(code)) { - return config->RetryInterval; - } - } - return Nothing(); -} - -TDuration GetBackoffDuration(const TErrorResponse& errorResponse, const TConfigPtr& config) -{ - return TryGetBackoffDuration(errorResponse, config).GetOrElse(config->RetryInterval); -} - -bool IsRetriable(const TErrorResponse& errorResponse) -{ - // Retriability of an error doesn't depend on config, so just use global one. - return TryGetBackoffDuration(errorResponse, TConfig::Get()).Defined(); -} - -bool IsRetriable(const std::exception& ex) -{ - if (dynamic_cast<const TRequestRetriesTimeout*>(&ex)) { - return false; - } - return true; -} - -TDuration GetBackoffDuration(const std::exception& /*error*/, const TConfigPtr& config) -{ - return GetBackoffDuration(config); -} - -TDuration GetBackoffDuration(const TConfigPtr& config) -{ - return config->RetryInterval; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/common/retry_lib.h b/yt/cpp/mapreduce/common/retry_lib.h deleted file mode 100644 index c6c061f614..0000000000 --- a/yt/cpp/mapreduce/common/retry_lib.h +++ /dev/null @@ -1,100 +0,0 @@ -#pragma once - -#include "fwd.h" - -#include <yt/cpp/mapreduce/interface/fwd.h> - -#include <util/datetime/base.h> -#include <util/generic/maybe.h> -#include <util/generic/ptr.h> -#include <util/generic/string.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -// IRequestRetryPolicy class controls retries of single request. -class IRequestRetryPolicy - : public virtual TThrRefBase -{ -public: - // Helper function that returns text description of current attempt, e.g. - // "attempt 3 / 10" - // used in logs. - virtual TString GetAttemptDescription() const = 0; - - // Library code calls this function before any request attempt. - virtual void NotifyNewAttempt() = 0; - - // OnRetriableError is called whenever client gets YT error that can be retried (e.g. operation limit exceeded). - // OnGenericError is called whenever request failed due to generic error like network error. - // - // Both methods must return nothing if policy doesn't want to retry this error. - // Otherwise method should return backoff time. - virtual TMaybe<TDuration> OnRetriableError(const TErrorResponse& e) = 0; - virtual TMaybe<TDuration> OnGenericError(const std::exception& e) = 0; - - // OnIgnoredError is called whenever client gets an error but is going to ignore it. - virtual void OnIgnoredError(const TErrorResponse& /*e*/) = 0; -}; -using IRequestRetryPolicyPtr = ::TIntrusivePtr<IRequestRetryPolicy>; - -//////////////////////////////////////////////////////////////////////////////// - -// IClientRetryPolicy controls creation of policies for individual requests. -class IClientRetryPolicy - : public virtual TThrRefBase -{ -public: - virtual IRequestRetryPolicyPtr CreatePolicyForGenericRequest() = 0; - virtual IRequestRetryPolicyPtr CreatePolicyForStartOperationRequest() = 0; -}; - - -//////////////////////////////////////////////////////////////////////////////// - -class TAttemptLimitedRetryPolicy - : public IRequestRetryPolicy -{ -public: - explicit TAttemptLimitedRetryPolicy(ui32 attemptLimit, const TConfigPtr& config); - - void NotifyNewAttempt() override; - - TMaybe<TDuration> OnGenericError(const std::exception& e) override; - TMaybe<TDuration> OnRetriableError(const TErrorResponse& e) override; - void OnIgnoredError(const TErrorResponse& e) override; - TString GetAttemptDescription() const override; - - bool IsAttemptLimitExceeded() const; - -protected: - const TConfigPtr Config_; - -private: - const ui32 AttemptLimit_; - ui32 Attempt_ = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -IRequestRetryPolicyPtr CreateDefaultRequestRetryPolicy(const TConfigPtr& config); -IClientRetryPolicyPtr CreateDefaultClientRetryPolicy(IRetryConfigProviderPtr retryConfigProvider, const TConfigPtr& config); -IRetryConfigProviderPtr CreateDefaultRetryConfigProvider(); - -//////////////////////////////////////////////////////////////////////////////// - -// Check if error returned by YT can be retried -bool IsRetriable(const TErrorResponse& errorResponse); -bool IsRetriable(const std::exception& ex); - -// Get backoff duration for errors returned by YT. -TDuration GetBackoffDuration(const TErrorResponse& errorResponse, const TConfigPtr& config); - -// Get backoff duration for errors that are not TErrorResponse. -TDuration GetBackoffDuration(const std::exception& error, const TConfigPtr& config); -TDuration GetBackoffDuration(const TConfigPtr& config); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/common/wait_proxy.cpp b/yt/cpp/mapreduce/common/wait_proxy.cpp deleted file mode 100644 index 3db034a098..0000000000 --- a/yt/cpp/mapreduce/common/wait_proxy.cpp +++ /dev/null @@ -1,118 +0,0 @@ -#include "wait_proxy.h" - - -#include <library/cpp/threading/future/future.h> - -#include <util/system/event.h> -#include <util/system/condvar.h> - -namespace NYT { -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -bool TDefaultWaitProxy::WaitFuture(const NThreading::TFuture<void>& future, TDuration timeout) -{ - return future.Wait(timeout); -} - -bool TDefaultWaitProxy::WaitEvent(TSystemEvent& event, TDuration timeout) -{ - return event.WaitT(timeout); -} - -bool TDefaultWaitProxy::WaitCondVar(TCondVar &condVar, TMutex &mutex, TDuration timeout) -{ - return condVar.WaitT(mutex, timeout); -} - -void TDefaultWaitProxy::Sleep(TDuration timeout) -{ - ::Sleep(timeout); -} - -//////////////////////////////////////////////////////////////////////////////// - -TWaitProxy::TWaitProxy() - : Proxy_(::MakeIntrusive<TDefaultWaitProxy>()) -{ } - -TWaitProxy* TWaitProxy::Get() -{ - return Singleton<TWaitProxy>(); -} - -void TWaitProxy::SetProxy(::TIntrusivePtr<IWaitProxy> proxy) -{ - Proxy_ = std::move(proxy); -} - -bool TWaitProxy::WaitFuture(const NThreading::TFuture<void>& future) -{ - return Proxy_->WaitFuture(future, TDuration::Max()); -} - -bool TWaitProxy::WaitFuture(const NThreading::TFuture<void>& future, TInstant deadLine) -{ - return Proxy_->WaitFuture(future, deadLine - TInstant::Now()); -} - -bool TWaitProxy::WaitFuture(const NThreading::TFuture<void>& future, TDuration timeout) -{ - return Proxy_->WaitFuture(future, timeout); -} - -bool TWaitProxy::WaitEventD(TSystemEvent& event, TInstant deadLine) -{ - return Proxy_->WaitEvent(event, deadLine - TInstant::Now()); -} - -bool TWaitProxy::WaitEventT(TSystemEvent& event, TDuration timeout) -{ - return Proxy_->WaitEvent(event, timeout); -} - -void TWaitProxy::WaitEventI(TSystemEvent& event) -{ - Proxy_->WaitEvent(event, TDuration::Max()); -} - -bool TWaitProxy::WaitEvent(TSystemEvent& event) -{ - return Proxy_->WaitEvent(event, TDuration::Max()); -} - -bool TWaitProxy::WaitCondVarD(TCondVar& condVar, TMutex& m, TInstant deadLine) -{ - return Proxy_->WaitCondVar(condVar, m, deadLine - TInstant::Now()); -} - -bool TWaitProxy::WaitCondVarT(TCondVar& condVar, TMutex& m, TDuration timeOut) -{ - return Proxy_->WaitCondVar(condVar, m, timeOut); -} - -void TWaitProxy::WaitCondVarI(TCondVar& condVar, TMutex& m) -{ - Proxy_->WaitCondVar(condVar, m, TDuration::Max()); -} - -void TWaitProxy::WaitCondVar(TCondVar& condVar, TMutex& m) -{ - Proxy_->WaitCondVar(condVar, m, TDuration::Max()); -} - -void TWaitProxy::Sleep(TDuration timeout) -{ - Proxy_->Sleep(timeout); -} - -void TWaitProxy::SleepUntil(TInstant instant) -{ - Proxy_->Sleep(instant - TInstant::Now()); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/common/wait_proxy.h b/yt/cpp/mapreduce/common/wait_proxy.h deleted file mode 100644 index e7c944cf24..0000000000 --- a/yt/cpp/mapreduce/common/wait_proxy.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/wait_proxy.h> - -namespace NYT { -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -class TDefaultWaitProxy - : public IWaitProxy -{ -public: - bool WaitFuture(const ::NThreading::TFuture<void>& future, TDuration timeout) override; - bool WaitEvent(TSystemEvent& event, TDuration timeout) override; - bool WaitCondVar(TCondVar& condVar, TMutex& mutex, TDuration timeout) override; - void Sleep(TDuration timeout) override; -}; - -class TWaitProxy { -public: - TWaitProxy(); - - static TWaitProxy* Get(); - - // NB: Non thread-safe, should be called only in initialization code. - void SetProxy(::TIntrusivePtr<IWaitProxy> proxy); - - bool WaitFuture(const ::NThreading::TFuture<void>& future); - bool WaitFuture(const ::NThreading::TFuture<void>& future, TInstant deadLine); - bool WaitFuture(const ::NThreading::TFuture<void>& future, TDuration timeout); - - bool WaitEventD(TSystemEvent& event, TInstant deadLine); - bool WaitEventT(TSystemEvent& event, TDuration timeout); - void WaitEventI(TSystemEvent& event); - bool WaitEvent(TSystemEvent& event); - - bool WaitCondVarD(TCondVar& condVar, TMutex& m, TInstant deadLine); - bool WaitCondVarT(TCondVar& condVar, TMutex& m, TDuration timeOut); - void WaitCondVarI(TCondVar& condVar, TMutex& m); - void WaitCondVar(TCondVar& condVar, TMutex& m); - - void Sleep(TDuration timeout); - void SleepUntil(TInstant instant); - -private: - ::TIntrusivePtr<IWaitProxy> Proxy_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/common/ya.make b/yt/cpp/mapreduce/common/ya.make deleted file mode 100644 index 004708cb44..0000000000 --- a/yt/cpp/mapreduce/common/ya.make +++ /dev/null @@ -1,23 +0,0 @@ -LIBRARY() - -INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc) - -SRCS( - debug_metrics.cpp - helpers.cpp - retry_lib.cpp - wait_proxy.cpp -) - -PEERDIR( - library/cpp/json - library/cpp/svnversion - library/cpp/threading/future - library/cpp/yson - library/cpp/yson/json - library/cpp/yson/node - yt/cpp/mapreduce/interface - yt/cpp/mapreduce/interface/logging -) - -END() diff --git a/yt/cpp/mapreduce/http/abortable_http_response.cpp b/yt/cpp/mapreduce/http/abortable_http_response.cpp deleted file mode 100644 index 9da9241d33..0000000000 --- a/yt/cpp/mapreduce/http/abortable_http_response.cpp +++ /dev/null @@ -1,223 +0,0 @@ -#include "abortable_http_response.h" - -#include <util/system/mutex.h> -#include <util/generic/singleton.h> -#include <util/generic/hash_set.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TAbortableHttpResponseRegistry { -public: - TOutageId StartOutage(TString urlPattern, const TOutageOptions& options) - { - auto g = Guard(Lock_); - auto id = NextId_++; - IdToOutage.emplace(id, TOutageEntry{std::move(urlPattern), options.ResponseCount_, options.LengthLimit_}); - return id; - } - - void StopOutage(TOutageId id) - { - auto g = Guard(Lock_); - IdToOutage.erase(id); - } - - void Add(IAbortableHttpResponse* response) - { - auto g = Guard(Lock_); - for (auto& [id, entry] : IdToOutage) { - if (entry.Counter > 0 && response->GetUrl().find(entry.Pattern) != TString::npos) { - response->SetLengthLimit(entry.LengthLimit); - entry.Counter -= 1; - } - } - ResponseList_.PushBack(response); - } - - void Remove(IAbortableHttpResponse* response) - { - auto g = Guard(Lock_); - response->Unlink(); - } - - static TAbortableHttpResponseRegistry& Get() - { - return *Singleton<TAbortableHttpResponseRegistry>(); - } - - int AbortAll(const TString& urlPattern) - { - int result = 0; - for (auto& response : ResponseList_) { - if (!response.IsAborted() && response.GetUrl().find(urlPattern) != TString::npos) { - response.Abort(); - ++result; - } - } - return result; - } - -private: - struct TOutageEntry - { - TString Pattern; - size_t Counter; - size_t LengthLimit; - }; - -private: - TOutageId NextId_ = 0; - TIntrusiveList<IAbortableHttpResponse> ResponseList_; - THashMap<TOutageId, TOutageEntry> IdToOutage; - TMutex Lock_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -TAbortableHttpResponse::TOutage::TOutage( - TString urlPattern, - TAbortableHttpResponseRegistry& registry, - const TOutageOptions& options) - : UrlPattern_(std::move(urlPattern)) - , Registry_(registry) - , Id_(registry.StartOutage(UrlPattern_, options)) -{ } - -TAbortableHttpResponse::TOutage::~TOutage() -{ - Stop(); -} - -void TAbortableHttpResponse::TOutage::Stop() -{ - if (!Stopped_) { - Registry_.StopOutage(Id_); - Stopped_ = true; - } -} - -//////////////////////////////////////////////////////////////////////////////// - -TAbortableHttpResponseBase::TAbortableHttpResponseBase(const TString& url) - : Url_(url) -{ - TAbortableHttpResponseRegistry::Get().Add(this); -} - -TAbortableHttpResponseBase::~TAbortableHttpResponseBase() -{ - TAbortableHttpResponseRegistry::Get().Remove(this); -} - -void TAbortableHttpResponseBase::Abort() -{ - Aborted_ = true; -} - -void TAbortableHttpResponseBase::SetLengthLimit(size_t limit) -{ - LengthLimit_ = limit; - if (LengthLimit_ == 0) { - Abort(); - } -} - -const TString& TAbortableHttpResponseBase::GetUrl() const -{ - return Url_; -} - -bool TAbortableHttpResponseBase::IsAborted() const -{ - return Aborted_; -} - -//////////////////////////////////////////////////////////////////////////////// - -TAbortableHttpResponse::TAbortableHttpResponse( - IInputStream* socketStream, - const TString& requestId, - const TString& hostName, - const TString& url) - : THttpResponse(socketStream, requestId, hostName) - , TAbortableHttpResponseBase(url) -{ -} - -size_t TAbortableHttpResponse::DoRead(void* buf, size_t len) -{ - if (Aborted_) { - ythrow TAbortedForTestPurpose() << "response was aborted"; - } - len = std::min(len, LengthLimit_); - auto read = THttpResponse::DoRead(buf, len); - LengthLimit_ -= read; - if (LengthLimit_ == 0) { - Abort(); - } - return read; -} - -size_t TAbortableHttpResponse::DoSkip(size_t len) -{ - if (Aborted_) { - ythrow TAbortedForTestPurpose() << "response was aborted"; - } - return THttpResponse::DoSkip(len); -} - -int TAbortableHttpResponse::AbortAll(const TString& urlPattern) -{ - return TAbortableHttpResponseRegistry::Get().AbortAll(urlPattern); -} - -TAbortableHttpResponse::TOutage TAbortableHttpResponse::StartOutage( - const TString& urlPattern, - const TOutageOptions& options) -{ - return TOutage(urlPattern, TAbortableHttpResponseRegistry::Get(), options); -} - -TAbortableHttpResponse::TOutage TAbortableHttpResponse::StartOutage( - const TString& urlPattern, - size_t responseCount) -{ - return StartOutage(urlPattern, TOutageOptions().ResponseCount(responseCount)); -} - -TAbortableCoreHttpResponse::TAbortableCoreHttpResponse( - std::unique_ptr<IInputStream> stream, - const TString& url) - : TAbortableHttpResponseBase(url) - , Stream_(std::move(stream)) -{ -} - -size_t TAbortableCoreHttpResponse::DoRead(void* buf, size_t len) -{ - if (Aborted_) { - ythrow TAbortedForTestPurpose() << "response was aborted"; - } - len = std::min(len, LengthLimit_); - auto read = Stream_->Read(buf, len); - LengthLimit_ -= read; - if (LengthLimit_ == 0) { - Abort(); - } - - return read; -} - -size_t TAbortableCoreHttpResponse::DoSkip(size_t len) -{ - if (Aborted_) { - ythrow TAbortedForTestPurpose() << "response was aborted"; - } - return Stream_->Skip(len); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/http/abortable_http_response.h b/yt/cpp/mapreduce/http/abortable_http_response.h deleted file mode 100644 index d72bcfa0a6..0000000000 --- a/yt/cpp/mapreduce/http/abortable_http_response.h +++ /dev/null @@ -1,142 +0,0 @@ -#pragma once - -#include "http.h" - -#include <util/generic/intrlist.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TAbortableHttpResponseRegistry; - -using TOutageId = size_t; - -//////////////////////////////////////////////////////////////////////////////// - -class TAbortedForTestPurpose - : public yexception -{ }; - -struct TOutageOptions -{ - using TSelf = TOutageOptions; - - /// @brief Number of responses to abort. - FLUENT_FIELD_DEFAULT(size_t, ResponseCount, std::numeric_limits<size_t>::max()); - - /// @brief Number of bytes to read before abortion. If zero, abort immediately. - FLUENT_FIELD_DEFAULT(size_t, LengthLimit, 0); -}; - -//////////////////////////////////////////////////////////////////////////////// - -class IAbortableHttpResponse - : public TIntrusiveListItem<IAbortableHttpResponse> -{ -public: - virtual void Abort() = 0; - virtual const TString& GetUrl() const = 0; - virtual bool IsAborted() const = 0; - virtual void SetLengthLimit(size_t limit) = 0; - - virtual ~IAbortableHttpResponse() = default; -}; - -class TAbortableHttpResponseBase - : public IAbortableHttpResponse -{ -public: - TAbortableHttpResponseBase(const TString& url); - ~TAbortableHttpResponseBase(); - - void Abort() override; - const TString& GetUrl() const override; - bool IsAborted() const override; - void SetLengthLimit(size_t limit) override; - -protected: - TString Url_; - std::atomic<bool> Aborted_ = {false}; - size_t LengthLimit_ = std::numeric_limits<size_t>::max(); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Stream wrapper for @ref NYT::NHttpClient::TCoreHttpResponse with possibility to emulate errors. -class TAbortableCoreHttpResponse - : public IInputStream - , public TAbortableHttpResponseBase -{ -public: - TAbortableCoreHttpResponse( - std::unique_ptr<IInputStream> stream, - const TString& url); - -private: - size_t DoRead(void* buf, size_t len) override; - size_t DoSkip(size_t len) override; - -private: - std::unique_ptr<IInputStream> Stream_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Class extends @ref NYT::THttpResponse with possibility to emulate errors. -class TAbortableHttpResponse - : public THttpResponse - , public TAbortableHttpResponseBase -{ -public: - class TOutage - { - public: - TOutage(TString urlPattern, TAbortableHttpResponseRegistry& registry, const TOutageOptions& options); - TOutage(TOutage&&) = default; - TOutage(const TOutage&) = delete; - ~TOutage(); - - void Stop(); - - private: - TString UrlPattern_; - TAbortableHttpResponseRegistry& Registry_; - TOutageId Id_; - bool Stopped_ = false; - }; - -public: - TAbortableHttpResponse( - IInputStream* socketStream, - const TString& requestId, - const TString& hostName, - const TString& url); - - /// @brief Abort any responses which match `urlPattern` (i.e. contain it in url). - /// - /// @return number of aborted responses. - static int AbortAll(const TString& urlPattern); - - /// @brief Start outage. Future responses which match `urlPattern` (i.e. contain it in url) will fail. - /// - /// @return outage object controlling the lifetime of outage (outage stops when object is destroyed) - [[nodiscard]] static TOutage StartOutage( - const TString& urlPattern, - const TOutageOptions& options = TOutageOptions()); - - /// @brief Start outage. Future `responseCount` responses which match `urlPattern` (i.e. contain it in url) will fail. - /// - /// @return outage object controlling the lifetime of outage (outage stops when object is destroyed) - [[nodiscard]] static TOutage StartOutage( - const TString& urlPattern, - size_t responseCount); - -private: - size_t DoRead(void* buf, size_t len) override; - size_t DoSkip(size_t len) override; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/http/context.cpp b/yt/cpp/mapreduce/http/context.cpp deleted file mode 100644 index 1c016263c5..0000000000 --- a/yt/cpp/mapreduce/http/context.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include "context.h" - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -bool operator==(const TClientContext& lhs, const TClientContext& rhs) -{ - return lhs.ServerName == rhs.ServerName && - lhs.Token == rhs.Token && - lhs.ImpersonationUser == rhs.ImpersonationUser && - lhs.ServiceTicketAuth == rhs.ServiceTicketAuth && - lhs.HttpClient == rhs.HttpClient && - lhs.UseTLS == rhs.UseTLS && - lhs.TvmOnly == rhs.TvmOnly; -} - -bool operator!=(const TClientContext& lhs, const TClientContext& rhs) -{ - return !(rhs == lhs); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/http/context.h b/yt/cpp/mapreduce/http/context.h deleted file mode 100644 index 3926373e17..0000000000 --- a/yt/cpp/mapreduce/http/context.h +++ /dev/null @@ -1,31 +0,0 @@ -#pragma once - -#include "fwd.h" - -#include <yt/cpp/mapreduce/interface/common.h> -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/public.h> - - -namespace NYT { - -/////////////////////////////////////////////////////////////////////////////// - -struct TClientContext -{ - TString ServerName; - TString Token; - TMaybe<TString> ImpersonationUser; - NAuth::IServiceTicketAuthPtrWrapperPtr ServiceTicketAuth; - NHttpClient::IHttpClientPtr HttpClient; - bool TvmOnly = false; - bool UseTLS = false; - TConfigPtr Config = TConfig::Get(); -}; - -bool operator==(const TClientContext& lhs, const TClientContext& rhs); -bool operator!=(const TClientContext& lhs, const TClientContext& rhs); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/http/core.h b/yt/cpp/mapreduce/http/core.h deleted file mode 100644 index 37c74d7551..0000000000 --- a/yt/cpp/mapreduce/http/core.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include <yt/yt/core/http/public.h> - -#include <memory> - -namespace NYT::NHttp { - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Wrapper for THeaderPtr which allows to hide NYT::IntrusivePtr from interfaces. -struct THeadersPtrWrapper -{ - THeadersPtrWrapper(THeadersPtr ptr) - : Ptr(std::make_shared<THeadersPtr>(std::move(ptr))) - { } - - THeadersPtr Get() { - return *Ptr; - } - - std::shared_ptr<THeadersPtr> Ptr; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NHttp diff --git a/yt/cpp/mapreduce/http/fwd.h b/yt/cpp/mapreduce/http/fwd.h deleted file mode 100644 index 62891731f6..0000000000 --- a/yt/cpp/mapreduce/http/fwd.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -#include <memory> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -struct TClientContext; -class THttpHeader; - -namespace NHttpClient { - -class IHttpClient; -class IHttpRequest; -class IHttpResponse; - -using IHttpClientPtr = std::shared_ptr<IHttpClient>; -using IHttpResponsePtr = std::unique_ptr<IHttpResponse>; -using IHttpRequestPtr = std::unique_ptr<IHttpRequest>; - -} // namespace NHttpClient - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/http/helpers.cpp b/yt/cpp/mapreduce/http/helpers.cpp deleted file mode 100644 index 233a565f20..0000000000 --- a/yt/cpp/mapreduce/http/helpers.cpp +++ /dev/null @@ -1,88 +0,0 @@ -#include "helpers.h" - -#include "context.h" -#include "requests.h" - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <library/cpp/yson/node/node_io.h> - -namespace NYT { - -/////////////////////////////////////////////////////////////////////////////// - -TString CreateHostNameWithPort(const TString& hostName, const TClientContext& context) -{ - static constexpr int HttpProxyPort = 80; - static constexpr int HttpsProxyPort = 443; - - static constexpr int TvmOnlyHttpProxyPort = 9026; - static constexpr int TvmOnlyHttpsProxyPort = 9443; - - if (hostName.find(':') == TString::npos) { - int port; - if (context.TvmOnly) { - port = context.UseTLS - ? TvmOnlyHttpsProxyPort - : TvmOnlyHttpProxyPort; - } else { - port = context.UseTLS - ? HttpsProxyPort - : HttpProxyPort; - } - return Format("%v:%v", hostName, port); - } - return hostName; -} - -TString GetFullUrl(const TString& hostName, const TClientContext& context, THttpHeader& header) -{ - Y_UNUSED(context); - return Format("http://%v%v", hostName, header.GetUrl()); -} - -static TString GetParametersDebugString(const THttpHeader& header) -{ - const auto& parameters = header.GetParameters(); - if (parameters.Empty()) { - return "<empty>"; - } else { - return NodeToYsonString(parameters); - } -} - -TString TruncateForLogs(const TString& text, size_t maxSize) -{ - Y_VERIFY(maxSize > 10); - if (text.empty()) { - static TString empty = "empty"; - return empty; - } else if (text.size() > maxSize) { - TStringStream out; - out << text.substr(0, maxSize) + "... (" << text.size() << " bytes total)"; - return out.Str(); - } else { - return text; - } -} - -TString GetLoggedAttributes(const THttpHeader& header, const TString& url, bool includeParameters, size_t sizeLimit) -{ - const auto parametersDebugString = GetParametersDebugString(header); - TStringStream out; - out << "Method: " << url << "; " - << "X-YT-Parameters (sent in " << (includeParameters ? "header" : "body") << "): " << TruncateForLogs(parametersDebugString, sizeLimit); - return out.Str(); -} - -void LogRequest(const THttpHeader& header, const TString& url, bool includeParameters, const TString& requestId, const TString& hostName) -{ - YT_LOG_DEBUG("REQ %v - sending request (HostName: %v; %v)", - requestId, - hostName, - GetLoggedAttributes(header, url, includeParameters, Max<size_t>())); -} - -/////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/http/helpers.h b/yt/cpp/mapreduce/http/helpers.h deleted file mode 100644 index 0c510fa2e8..0000000000 --- a/yt/cpp/mapreduce/http/helpers.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include "fwd.h" - -#include "http.h" - -#include <util/generic/fwd.h> - -namespace NYT { - -/////////////////////////////////////////////////////////////////////////////// - -TString CreateHostNameWithPort(const TString& name, const TClientContext& context); - -TString GetFullUrl(const TString& hostName, const TClientContext& context, THttpHeader& header); - -TString TruncateForLogs(const TString& text, size_t maxSize); - -TString GetLoggedAttributes(const THttpHeader& header, const TString& url, bool includeParameters, size_t sizeLimit); - -void LogRequest(const THttpHeader& header, const TString& url, bool includeParameters, const TString& requestId, const TString& hostName); - -/////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/http/host_manager.cpp b/yt/cpp/mapreduce/http/host_manager.cpp deleted file mode 100644 index a239dde769..0000000000 --- a/yt/cpp/mapreduce/http/host_manager.cpp +++ /dev/null @@ -1,140 +0,0 @@ -#include "host_manager.h" - -#include "context.h" -#include "helpers.h" -#include "http.h" -#include "http_client.h" -#include "requests.h" - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <yt/cpp/mapreduce/interface/config.h> - -#include <library/cpp/json/json_reader.h> - -#include <util/generic/guid.h> -#include <util/generic/vector.h> -#include <util/generic/singleton.h> -#include <util/generic/ymath.h> - -#include <util/random/random.h> - -#include <util/string/vector.h> - -namespace NYT::NPrivate { - -//////////////////////////////////////////////////////////////////////////////// - -static TVector<TString> ParseJsonStringArray(const TString& response) -{ - NJson::TJsonValue value; - TStringInput input(response); - NJson::ReadJsonTree(&input, &value); - - const NJson::TJsonValue::TArray& array = value.GetArray(); - TVector<TString> result; - result.reserve(array.size()); - for (size_t i = 0; i < array.size(); ++i) { - result.push_back(array[i].GetString()); - } - return result; -} - -//////////////////////////////////////////////////////////////////////////////// - -class THostManager::TClusterHostList -{ -public: - explicit TClusterHostList(TVector<TString> hosts) - : Hosts_(std::move(hosts)) - , Timestamp_(TInstant::Now()) - { } - - explicit TClusterHostList(std::exception_ptr error) - : Error_(std::move(error)) - , Timestamp_(TInstant::Now()) - { } - - TString ChooseHostOrThrow() const - { - if (Error_) { - std::rethrow_exception(Error_); - } - - if (Hosts_.empty()) { - ythrow yexception() << "fetched list of proxies is empty"; - } - - return Hosts_[RandomNumber<size_t>(Hosts_.size())]; - } - - TDuration GetAge() const - { - return TInstant::Now() - Timestamp_; - } - -private: - TVector<TString> Hosts_; - std::exception_ptr Error_; - TInstant Timestamp_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -THostManager& THostManager::Get() -{ - return *Singleton<THostManager>(); -} - -void THostManager::Reset() -{ - auto guard = Guard(Lock_); - ClusterHosts_.clear(); -} - -TString THostManager::GetProxyForHeavyRequest(const TClientContext& context) -{ - auto cluster = context.ServerName; - { - auto guard = Guard(Lock_); - auto it = ClusterHosts_.find(cluster); - if (it != ClusterHosts_.end() && it->second.GetAge() < context.Config->HostListUpdateInterval) { - return it->second.ChooseHostOrThrow(); - } - } - - auto hostList = GetHosts(context); - auto result = hostList.ChooseHostOrThrow(); - { - auto guard = Guard(Lock_); - ClusterHosts_.emplace(cluster, std::move(hostList)); - } - return result; -} - -THostManager::TClusterHostList THostManager::GetHosts(const TClientContext& context) -{ - TString hostsEndpoint = context.Config->Hosts; - while (hostsEndpoint.StartsWith("/")) { - hostsEndpoint = hostsEndpoint.substr(1); - } - THttpHeader header("GET", hostsEndpoint, false); - - try { - auto hostName = context.ServerName; - auto requestId = CreateGuidAsString(); - // TODO: we need to set socket timeout here - auto response = context.HttpClient->Request(GetFullUrl(hostName, context, header), requestId, header); - auto hosts = ParseJsonStringArray(response->GetResponse()); - for (auto& host : hosts) { - host = CreateHostNameWithPort(host, context); - } - return TClusterHostList(std::move(hosts)); - } catch (const std::exception& e) { - return TClusterHostList(std::current_exception()); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NPrivate diff --git a/yt/cpp/mapreduce/http/host_manager.h b/yt/cpp/mapreduce/http/host_manager.h deleted file mode 100644 index fdbb740566..0000000000 --- a/yt/cpp/mapreduce/http/host_manager.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include "fwd.h" - -#include <util/generic/string.h> -#include <util/generic/hash.h> -#include <util/system/spinlock.h> - - -namespace NYT::NPrivate { - -//////////////////////////////////////////////////////////////////////////////// - -class THostManager -{ -public: - static THostManager& Get(); - - TString GetProxyForHeavyRequest(const TClientContext& context); - - // For testing purposes only. - void Reset(); - -private: - class TClusterHostList; - -private: - TAdaptiveLock Lock_; - THashMap<TString, TClusterHostList> ClusterHosts_; - -private: - static TClusterHostList GetHosts(const TClientContext& context); -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NPrivate diff --git a/yt/cpp/mapreduce/http/http.cpp b/yt/cpp/mapreduce/http/http.cpp deleted file mode 100644 index d44b2638a0..0000000000 --- a/yt/cpp/mapreduce/http/http.cpp +++ /dev/null @@ -1,1014 +0,0 @@ -#include "http.h" - -#include "abortable_http_response.h" -#include "core.h" -#include "helpers.h" - -#include <yt/cpp/mapreduce/common/helpers.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> -#include <yt/cpp/mapreduce/common/wait_proxy.h> - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/errors.h> -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <yt/yt/core/http/http.h> - -#include <library/cpp/json/json_writer.h> - -#include <library/cpp/string_utils/base64/base64.h> -#include <library/cpp/string_utils/quote/quote.h> - -#include <util/generic/singleton.h> -#include <util/generic/algorithm.h> - -#include <util/stream/mem.h> - -#include <util/string/builder.h> -#include <util/string/cast.h> -#include <util/string/escape.h> -#include <util/string/printf.h> - -#include <util/system/byteorder.h> -#include <util/system/getpid.h> - -#include <exception> - - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class THttpRequest::TRequestStream - : public IOutputStream -{ -public: - TRequestStream(THttpRequest* httpRequest, const TSocket& s) - : HttpRequest_(httpRequest) - , SocketOutput_(s) - , HttpOutput_(static_cast<IOutputStream*>(&SocketOutput_)) - { - HttpOutput_.EnableKeepAlive(true); - } - -private: - void DoWrite(const void* buf, size_t len) override - { - WrapWriteFunc([&] { - HttpOutput_.Write(buf, len); - }); - } - - void DoWriteV(const TPart* parts, size_t count) override - { - WrapWriteFunc([&] { - HttpOutput_.Write(parts, count); - }); - } - - void DoWriteC(char ch) override - { - WrapWriteFunc([&] { - HttpOutput_.Write(ch); - }); - } - - void DoFlush() override - { - WrapWriteFunc([&] { - HttpOutput_.Flush(); - }); - } - - void DoFinish() override - { - WrapWriteFunc([&] { - HttpOutput_.Finish(); - }); - } - - void WrapWriteFunc(std::function<void()> func) - { - CheckErrorState(); - try { - func(); - } catch (const std::exception&) { - HandleWriteException(); - } - } - - // In many cases http proxy stops reading request and resets connection - // if error has happend. This function tries to read error response - // in such cases. - void HandleWriteException() { - Y_VERIFY(WriteError_ == nullptr); - WriteError_ = std::current_exception(); - Y_VERIFY(WriteError_ != nullptr); - try { - HttpRequest_->GetResponseStream(); - } catch (const TErrorResponse &) { - throw; - } catch (...) { - } - std::rethrow_exception(WriteError_); - } - - void CheckErrorState() - { - if (WriteError_) { - std::rethrow_exception(WriteError_); - } - } - -private: - THttpRequest* const HttpRequest_; - TSocketOutput SocketOutput_; - THttpOutput HttpOutput_; - std::exception_ptr WriteError_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -THttpHeader::THttpHeader(const TString& method, const TString& command, bool isApi) - : Method(method) - , Command(command) - , IsApi(isApi) -{ } - -void THttpHeader::AddParameter(const TString& key, TNode value, bool overwrite) -{ - auto it = Parameters.find(key); - if (it == Parameters.end()) { - Parameters.emplace(key, std::move(value)); - } else { - if (overwrite) { - it->second = std::move(value); - } else { - ythrow yexception() << "Duplicate key: " << key; - } - } -} - -void THttpHeader::MergeParameters(const TNode& newParameters, bool overwrite) -{ - for (const auto& p : newParameters.AsMap()) { - AddParameter(p.first, p.second, overwrite); - } -} - -void THttpHeader::RemoveParameter(const TString& key) -{ - Parameters.erase(key); -} - -TNode THttpHeader::GetParameters() const -{ - return Parameters; -} - -void THttpHeader::AddTransactionId(const TTransactionId& transactionId, bool overwrite) -{ - if (transactionId) { - AddParameter("transaction_id", GetGuidAsString(transactionId), overwrite); - } else { - RemoveParameter("transaction_id"); - } -} - -void THttpHeader::AddPath(const TString& path, bool overwrite) -{ - AddParameter("path", path, overwrite); -} - -void THttpHeader::AddOperationId(const TOperationId& operationId, bool overwrite) -{ - AddParameter("operation_id", GetGuidAsString(operationId), overwrite); -} - -void THttpHeader::AddMutationId() -{ - TGUID guid; - - // Some users use `fork()' with yt wrapper - // (actually they use python + multiprocessing) - // and CreateGuid is not resistant to `fork()', so spice it a little bit. - // - // Check IGNIETFERRO-610 - CreateGuid(&guid); - guid.dw[2] = GetPID() ^ MicroSeconds(); - - AddParameter("mutation_id", GetGuidAsString(guid), true); -} - -bool THttpHeader::HasMutationId() const -{ - return Parameters.contains("mutation_id"); -} - -void THttpHeader::SetToken(const TString& token) -{ - Token = token; -} - -void THttpHeader::SetImpersonationUser(const TString& impersonationUser) -{ - ImpersonationUser = impersonationUser; -} - -void THttpHeader::SetServiceTicket(const TString& ticket) -{ - ServiceTicket = ticket; -} - -void THttpHeader::SetInputFormat(const TMaybe<TFormat>& format) -{ - InputFormat = format; -} - -void THttpHeader::SetOutputFormat(const TMaybe<TFormat>& format) -{ - OutputFormat = format; -} - -TMaybe<TFormat> THttpHeader::GetOutputFormat() const -{ - return OutputFormat; -} - -void THttpHeader::SetRequestCompression(const TString& compression) -{ - RequestCompression = compression; -} - -void THttpHeader::SetResponseCompression(const TString& compression) -{ - ResponseCompression = compression; -} - -TString THttpHeader::GetCommand() const -{ - return Command; -} - -TString THttpHeader::GetUrl() const -{ - TStringStream url; - - if (IsApi) { - url << "/api/" << TConfig::Get()->ApiVersion << "/" << Command; - } else { - url << "/" << Command; - } - - return url.Str(); -} - -bool THttpHeader::ShouldAcceptFraming() const -{ - return TConfig::Get()->CommandsWithFraming.contains(Command); -} - -TString THttpHeader::GetHeaderAsString(const TString& hostName, const TString& requestId, bool includeParameters) const -{ - TStringStream result; - - result << Method << " " << GetUrl() << " HTTP/1.1\r\n"; - - GetHeader(hostName, requestId, includeParameters).Get()->WriteTo(&result); - - if (ShouldAcceptFraming()) { - result << "X-YT-Accept-Framing: 1\r\n"; - } - - result << "\r\n"; - - return result.Str(); -} - -NHttp::THeadersPtrWrapper THttpHeader::GetHeader(const TString& hostName, const TString& requestId, bool includeParameters) const -{ - auto headers = New<NHttp::THeaders>(); - - headers->Add("Host", hostName); - headers->Add("User-Agent", TProcessState::Get()->ClientVersion); - - if (!Token.empty()) { - headers->Add("Authorization", "OAuth " + Token); - } - if (!ServiceTicket.empty()) { - headers->Add("X-Ya-Service-Ticket", ServiceTicket); - } - if (!ImpersonationUser.empty()) { - headers->Add("X-Yt-User-Name", ImpersonationUser); - } - - if (Method == "PUT" || Method == "POST") { - headers->Add("Transfer-Encoding", "chunked"); - } - - headers->Add("X-YT-Correlation-Id", requestId); - headers->Add("X-YT-Header-Format", "<format=text>yson"); - - headers->Add("Content-Encoding", RequestCompression); - headers->Add("Accept-Encoding", ResponseCompression); - - auto printYTHeader = [&headers] (const char* headerName, const TString& value) { - static const size_t maxHttpHeaderSize = 64 << 10; - if (!value) { - return; - } - if (value.size() <= maxHttpHeaderSize) { - headers->Add(headerName, value); - return; - } - - TString encoded; - Base64Encode(value, encoded); - auto ptr = encoded.data(); - auto finish = encoded.data() + encoded.size(); - size_t index = 0; - do { - auto end = Min(ptr + maxHttpHeaderSize, finish); - headers->Add(Format("%v%v", headerName, index++), TString(ptr, end)); - ptr = end; - } while (ptr != finish); - }; - - if (InputFormat) { - printYTHeader("X-YT-Input-Format", NodeToYsonString(InputFormat->Config)); - } - if (OutputFormat) { - printYTHeader("X-YT-Output-Format", NodeToYsonString(OutputFormat->Config)); - } - if (includeParameters) { - printYTHeader("X-YT-Parameters", NodeToYsonString(Parameters)); - } - - return NHttp::THeadersPtrWrapper(std::move(headers)); -} - -const TString& THttpHeader::GetMethod() const -{ - return Method; -} - -//////////////////////////////////////////////////////////////////////////////// - -TAddressCache* TAddressCache::Get() -{ - return Singleton<TAddressCache>(); -} - -bool ContainsAddressOfRequiredVersion(const TAddressCache::TAddressPtr& address) -{ - if (!TConfig::Get()->ForceIpV4 && !TConfig::Get()->ForceIpV6) { - return true; - } - - for (auto i = address->Begin(); i != address->End(); ++i) { - const auto& addressInfo = *i; - if (TConfig::Get()->ForceIpV4 && addressInfo.ai_family == AF_INET) { - return true; - } - if (TConfig::Get()->ForceIpV6 && addressInfo.ai_family == AF_INET6) { - return true; - } - } - return false; -} - -TAddressCache::TAddressPtr TAddressCache::Resolve(const TString& hostName) -{ - auto address = FindAddress(hostName); - if (address) { - return address; - } - - TString host(hostName); - ui16 port = 80; - - auto colon = hostName.find(':'); - if (colon != TString::npos) { - port = FromString<ui16>(hostName.substr(colon + 1)); - host = hostName.substr(0, colon); - } - - auto retryPolicy = CreateDefaultRequestRetryPolicy(TConfig::Get()); - auto error = yexception() << "can not resolve address of required version for host " << hostName; - while (true) { - address = new TNetworkAddress(host, port); - if (ContainsAddressOfRequiredVersion(address)) { - break; - } - retryPolicy->NotifyNewAttempt(); - YT_LOG_DEBUG("Failed to resolve address of required version for host %v, retrying: %v", - hostName, - retryPolicy->GetAttemptDescription()); - if (auto backoffDuration = retryPolicy->OnGenericError(error)) { - NDetail::TWaitProxy::Get()->Sleep(*backoffDuration); - } else { - ythrow error; - } - } - - AddAddress(hostName, address); - return address; -} - -TAddressCache::TAddressPtr TAddressCache::FindAddress(const TString& hostName) const -{ - TCacheEntry entry; - { - TReadGuard guard(Lock_); - auto it = Cache_.find(hostName); - if (it == Cache_.end()) { - return nullptr; - } - entry = it->second; - } - - if (TInstant::Now() > entry.ExpirationTime) { - YT_LOG_DEBUG("Address resolution cache entry for host %v is expired, will retry resolution", - hostName); - return nullptr; - } - - if (!ContainsAddressOfRequiredVersion(entry.Address)) { - YT_LOG_DEBUG("Address of required version not found for host %v, will retry resolution", - hostName); - return nullptr; - } - - return entry.Address; -} - -void TAddressCache::AddAddress(TString hostName, TAddressPtr address) -{ - auto entry = TCacheEntry{ - .Address = std::move(address), - .ExpirationTime = TInstant::Now() + TConfig::Get()->AddressCacheExpirationTimeout, - }; - - { - TWriteGuard guard(Lock_); - Cache_.emplace(std::move(hostName), std::move(entry)); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -TConnectionPool* TConnectionPool::Get() -{ - return Singleton<TConnectionPool>(); -} - -TConnectionPtr TConnectionPool::Connect( - const TString& hostName, - TDuration socketTimeout) -{ - Refresh(); - - if (socketTimeout == TDuration::Zero()) { - socketTimeout = TConfig::Get()->SocketTimeout; - } - - { - auto guard = Guard(Lock_); - auto now = TInstant::Now(); - auto range = Connections_.equal_range(hostName); - for (auto it = range.first; it != range.second; ++it) { - auto& connection = it->second; - if (connection->DeadLine < now) { - continue; - } - if (!AtomicCas(&connection->Busy, 1, 0)) { - continue; - } - - connection->DeadLine = now + socketTimeout; - connection->Socket->SetSocketTimeout(socketTimeout.Seconds()); - return connection; - } - } - - TConnectionPtr connection(new TConnection); - - auto networkAddress = TAddressCache::Get()->Resolve(hostName); - TSocketHolder socket(DoConnect(networkAddress)); - SetNonBlock(socket, false); - - connection->Socket.Reset(new TSocket(socket.Release())); - - connection->DeadLine = TInstant::Now() + socketTimeout; - connection->Socket->SetSocketTimeout(socketTimeout.Seconds()); - - { - auto guard = Guard(Lock_); - static ui32 connectionId = 0; - connection->Id = ++connectionId; - Connections_.insert({hostName, connection}); - } - - YT_LOG_DEBUG("New connection to %v #%v opened", - hostName, - connection->Id); - - return connection; -} - -void TConnectionPool::Release(TConnectionPtr connection) -{ - auto socketTimeout = TConfig::Get()->SocketTimeout; - auto newDeadline = TInstant::Now() + socketTimeout; - - { - auto guard = Guard(Lock_); - connection->DeadLine = newDeadline; - } - - connection->Socket->SetSocketTimeout(socketTimeout.Seconds()); - AtomicSet(connection->Busy, 0); - - Refresh(); -} - -void TConnectionPool::Invalidate( - const TString& hostName, - TConnectionPtr connection) -{ - auto guard = Guard(Lock_); - auto range = Connections_.equal_range(hostName); - for (auto it = range.first; it != range.second; ++it) { - if (it->second == connection) { - YT_LOG_DEBUG("Closing connection #%v", - connection->Id); - Connections_.erase(it); - return; - } - } -} - -void TConnectionPool::Refresh() -{ - auto guard = Guard(Lock_); - - // simple, since we don't expect too many connections - using TItem = std::pair<TInstant, TConnectionMap::iterator>; - std::vector<TItem> sortedConnections; - for (auto it = Connections_.begin(); it != Connections_.end(); ++it) { - sortedConnections.emplace_back(it->second->DeadLine, it); - } - - std::sort( - sortedConnections.begin(), - sortedConnections.end(), - [] (const TItem& a, const TItem& b) -> bool { - return a.first < b.first; - }); - - auto removeCount = static_cast<int>(Connections_.size()) - TConfig::Get()->ConnectionPoolSize; - - const auto now = TInstant::Now(); - for (const auto& item : sortedConnections) { - const auto& mapIterator = item.second; - auto connection = mapIterator->second; - if (AtomicGet(connection->Busy)) { - continue; - } - - if (removeCount > 0) { - Connections_.erase(mapIterator); - YT_LOG_DEBUG("Closing connection #%v (too many opened connections)", - connection->Id); - --removeCount; - continue; - } - - if (connection->DeadLine < now) { - Connections_.erase(mapIterator); - YT_LOG_DEBUG("Closing connection #%v (timeout)", - connection->Id); - } - } -} - -SOCKET TConnectionPool::DoConnect(TAddressCache::TAddressPtr address) -{ - int lastError = 0; - - for (auto i = address->Begin(); i != address->End(); ++i) { - struct addrinfo* info = &*i; - - if (TConfig::Get()->ForceIpV4 && info->ai_family != AF_INET) { - continue; - } - - if (TConfig::Get()->ForceIpV6 && info->ai_family != AF_INET6) { - continue; - } - - TSocketHolder socket( - ::socket(info->ai_family, info->ai_socktype, info->ai_protocol)); - - if (socket.Closed()) { - lastError = LastSystemError(); - continue; - } - - SetNonBlock(socket, true); - if (TConfig::Get()->SocketPriority) { - SetSocketPriority(socket, *TConfig::Get()->SocketPriority); - } - - if (connect(socket, info->ai_addr, info->ai_addrlen) == 0) - return socket.Release(); - - int err = LastSystemError(); - if (err == EINPROGRESS || err == EAGAIN || err == EWOULDBLOCK) { - struct pollfd p = { - socket, - POLLOUT, - 0 - }; - const ssize_t n = PollD(&p, 1, TInstant::Now() + TConfig::Get()->ConnectTimeout); - if (n < 0) { - ythrow TSystemError(-(int)n) << "can not connect to " << info; - } - CheckedGetSockOpt(socket, SOL_SOCKET, SO_ERROR, err, "socket error"); - if (!err) - return socket.Release(); - } - - lastError = err; - continue; - } - - ythrow TSystemError(lastError) << "can not connect to " << *address; -} - -//////////////////////////////////////////////////////////////////////////////// - -static TMaybe<TString> GetProxyName(const THttpInput& input) -{ - if (auto proxyHeader = input.Headers().FindHeader("X-YT-Proxy")) { - return proxyHeader->Value(); - } - return Nothing(); -} - -THttpResponse::THttpResponse( - IInputStream* socketStream, - const TString& requestId, - const TString& hostName) - : HttpInput_(socketStream) - , RequestId_(requestId) - , HostName_(GetProxyName(HttpInput_).GetOrElse(hostName)) - , Unframe_(HttpInput_.Headers().HasHeader("X-YT-Framing")) -{ - HttpCode_ = ParseHttpRetCode(HttpInput_.FirstLine()); - if (HttpCode_ == 200 || HttpCode_ == 202) { - return; - } - - ErrorResponse_ = TErrorResponse(HttpCode_, RequestId_); - - auto logAndSetError = [&] (const TString& rawError) { - YT_LOG_ERROR("RSP %v - HTTP %v - %v", - RequestId_, - HttpCode_, - rawError.data()); - ErrorResponse_->SetRawError(rawError); - }; - - switch (HttpCode_) { - case 429: - logAndSetError("request rate limit exceeded"); - break; - - case 500: - logAndSetError(::TStringBuilder() << "internal error in proxy " << HostName_); - break; - - default: { - TStringStream httpHeaders; - httpHeaders << "HTTP headers ("; - for (const auto& header : HttpInput_.Headers()) { - httpHeaders << header.Name() << ": " << header.Value() << "; "; - } - httpHeaders << ")"; - - auto errorString = Sprintf("RSP %s - HTTP %d - %s", - RequestId_.data(), - HttpCode_, - httpHeaders.Str().data()); - - YT_LOG_ERROR("%v", - errorString.data()); - - if (auto parsedResponse = ParseError(HttpInput_.Headers())) { - ErrorResponse_ = parsedResponse.GetRef(); - } else { - ErrorResponse_->SetRawError( - errorString + " - X-YT-Error is missing in headers"); - } - break; - } - } -} - -const THttpHeaders& THttpResponse::Headers() const -{ - return HttpInput_.Headers(); -} - -void THttpResponse::CheckErrorResponse() const -{ - if (ErrorResponse_) { - throw *ErrorResponse_; - } -} - -bool THttpResponse::IsExhausted() const -{ - return IsExhausted_; -} - -int THttpResponse::GetHttpCode() const -{ - return HttpCode_; -} - -const TString& THttpResponse::GetHostName() const -{ - return HostName_; -} - -bool THttpResponse::IsKeepAlive() const -{ - return HttpInput_.IsKeepAlive(); -} - -TMaybe<TErrorResponse> THttpResponse::ParseError(const THttpHeaders& headers) -{ - for (const auto& header : headers) { - if (header.Name() == "X-YT-Error") { - TErrorResponse errorResponse(HttpCode_, RequestId_); - errorResponse.ParseFromJsonError(header.Value()); - if (errorResponse.IsOk()) { - return Nothing(); - } - return errorResponse; - } - } - return Nothing(); -} - -size_t THttpResponse::DoRead(void* buf, size_t len) -{ - size_t read; - if (Unframe_) { - read = UnframeRead(buf, len); - } else { - read = HttpInput_.Read(buf, len); - } - if (read == 0 && len != 0) { - // THttpInput MUST return defined (but may be empty) - // trailers when it is exhausted. - Y_VERIFY(HttpInput_.Trailers().Defined(), - "trailers MUST be defined for exhausted stream"); - CheckTrailers(HttpInput_.Trailers().GetRef()); - IsExhausted_ = true; - } - return read; -} - -size_t THttpResponse::DoSkip(size_t len) -{ - size_t skipped; - if (Unframe_) { - skipped = UnframeSkip(len); - } else { - skipped = HttpInput_.Skip(len); - } - if (skipped == 0 && len != 0) { - // THttpInput MUST return defined (but may be empty) - // trailers when it is exhausted. - Y_VERIFY(HttpInput_.Trailers().Defined(), - "trailers MUST be defined for exhausted stream"); - CheckTrailers(HttpInput_.Trailers().GetRef()); - IsExhausted_ = true; - } - return skipped; -} - -void THttpResponse::CheckTrailers(const THttpHeaders& trailers) -{ - if (auto errorResponse = ParseError(trailers)) { - errorResponse->SetIsFromTrailers(true); - YT_LOG_ERROR("RSP %v - %v", - RequestId_, - errorResponse.GetRef().what()); - ythrow errorResponse.GetRef(); - } -} - -static ui32 ReadDataFrameSize(THttpInput* stream) -{ - ui32 littleEndianSize; - auto read = stream->Load(&littleEndianSize, sizeof(littleEndianSize)); - if (read < sizeof(littleEndianSize)) { - ythrow yexception() << "Bad data frame header: " << - "expected " << sizeof(littleEndianSize) << " bytes, got " << read; - } - return LittleToHost(littleEndianSize); -} - -bool THttpResponse::RefreshFrameIfNecessary() -{ - while (RemainingFrameSize_ == 0) { - ui8 frameTypeByte; - auto read = HttpInput_.Read(&frameTypeByte, sizeof(frameTypeByte)); - if (read == 0) { - return false; - } - auto frameType = static_cast<EFrameType>(frameTypeByte); - switch (frameType) { - case EFrameType::KeepAlive: - break; - case EFrameType::Data: - RemainingFrameSize_ = ReadDataFrameSize(&HttpInput_); - break; - default: - ythrow yexception() << "Bad frame type " << static_cast<int>(frameTypeByte); - } - } - return true; -} - -size_t THttpResponse::UnframeRead(void* buf, size_t len) -{ - if (!RefreshFrameIfNecessary()) { - return 0; - } - auto read = HttpInput_.Read(buf, Min(len, RemainingFrameSize_)); - RemainingFrameSize_ -= read; - return read; -} - -size_t THttpResponse::UnframeSkip(size_t len) -{ - if (!RefreshFrameIfNecessary()) { - return 0; - } - auto skipped = HttpInput_.Skip(Min(len, RemainingFrameSize_)); - RemainingFrameSize_ -= skipped; - return skipped; -} - -//////////////////////////////////////////////////////////////////////////////// - -THttpRequest::THttpRequest() -{ - RequestId = CreateGuidAsString(); -} - -THttpRequest::THttpRequest(const TString& requestId) - : RequestId(requestId) -{ } - -THttpRequest::~THttpRequest() -{ - if (!Connection) { - return; - } - - if (Input && Input->IsKeepAlive() && Input->IsExhausted()) { - // We should return to the pool only connections where HTTP response was fully read. - // Otherwise next reader might read our remaining data and misinterpret them (YT-6510). - TConnectionPool::Get()->Release(Connection); - } else { - TConnectionPool::Get()->Invalidate(HostName, Connection); - } -} - -TString THttpRequest::GetRequestId() const -{ - return RequestId; -} - -void THttpRequest::Connect(TString hostName, TDuration socketTimeout) -{ - HostName = std::move(hostName); - YT_LOG_DEBUG("REQ %v - requesting connection to %v from connection pool", - RequestId, - HostName); - - StartTime_ = TInstant::Now(); - Connection = TConnectionPool::Get()->Connect(HostName, socketTimeout); - - YT_LOG_DEBUG("REQ %v - connection #%v", - RequestId, - Connection->Id); -} - -IOutputStream* THttpRequest::StartRequestImpl(const THttpHeader& header, bool includeParameters) -{ - auto strHeader = header.GetHeaderAsString(HostName, RequestId, includeParameters); - Url_ = header.GetUrl(); - - LogRequest(header, Url_, includeParameters, RequestId, HostName); - - LoggedAttributes_ = GetLoggedAttributes(header, Url_, includeParameters, 128); - - auto outputFormat = header.GetOutputFormat(); - if (outputFormat && outputFormat->IsTextYson()) { - LogResponse = true; - } - - RequestStream_ = MakeHolder<TRequestStream>(this, *Connection->Socket.Get()); - - RequestStream_->Write(strHeader.data(), strHeader.size()); - return RequestStream_.Get(); -} - -IOutputStream* THttpRequest::StartRequest(const THttpHeader& header) -{ - return StartRequestImpl(header, true); -} - -void THttpRequest::FinishRequest() -{ - RequestStream_->Flush(); - RequestStream_->Finish(); -} - -void THttpRequest::SmallRequest(const THttpHeader& header, TMaybe<TStringBuf> body) -{ - if (!body && (header.GetMethod() == "PUT" || header.GetMethod() == "POST")) { - const auto& parameters = header.GetParameters(); - auto parametersStr = NodeToYsonString(parameters); - auto* output = StartRequestImpl(header, false); - output->Write(parametersStr); - FinishRequest(); - } else { - auto* output = StartRequest(header); - if (body) { - output->Write(*body); - } - FinishRequest(); - } -} - -THttpResponse* THttpRequest::GetResponseStream() -{ - if (!Input) { - SocketInput.Reset(new TSocketInput(*Connection->Socket.Get())); - if (TConfig::Get()->UseAbortableResponse) { - Y_VERIFY(!Url_.empty()); - Input.Reset(new TAbortableHttpResponse(SocketInput.Get(), RequestId, HostName, Url_)); - } else { - Input.Reset(new THttpResponse(SocketInput.Get(), RequestId, HostName)); - } - Input->CheckErrorResponse(); - } - return Input.Get(); -} - -TString THttpRequest::GetResponse() -{ - TString result = GetResponseStream()->ReadAll(); - - TStringStream loggedAttributes; - loggedAttributes - << "Time: " << TInstant::Now() - StartTime_ << "; " - << "HostName: " << GetResponseStream()->GetHostName() << "; " - << LoggedAttributes_; - - if (LogResponse) { - constexpr auto sizeLimit = 1 << 7; - YT_LOG_DEBUG("RSP %v - received response (Response: '%v'; %v)", - RequestId, - TruncateForLogs(result, sizeLimit), - loggedAttributes.Str()); - } else { - YT_LOG_DEBUG("RSP %v - received response of %v bytes (%v)", - RequestId, - result.size(), - loggedAttributes.Str()); - } - return result; -} - -int THttpRequest::GetHttpCode() { - return GetResponseStream()->GetHttpCode(); -} - -void THttpRequest::InvalidateConnection() -{ - TConnectionPool::Get()->Invalidate(HostName, Connection); - Connection.Reset(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/http/http.h b/yt/cpp/mapreduce/http/http.h deleted file mode 100644 index ee8783088d..0000000000 --- a/yt/cpp/mapreduce/http/http.h +++ /dev/null @@ -1,256 +0,0 @@ -#pragma once - -#include "fwd.h" - -#include <yt/cpp/mapreduce/interface/common.h> -#include <yt/cpp/mapreduce/interface/errors.h> -#include <yt/cpp/mapreduce/interface/format.h> -#include <yt/cpp/mapreduce/interface/io.h> -#include <yt/cpp/mapreduce/interface/node.h> - -#include <library/cpp/deprecated/atomic/atomic.h> -#include <library/cpp/http/io/stream.h> - -#include <util/generic/hash.h> -#include <util/generic/hash_multi_map.h> -#include <util/generic/strbuf.h> -#include <util/generic/guid.h> -#include <util/network/socket.h> -#include <util/stream/input.h> -#include <util/system/mutex.h> -#include <util/system/rwlock.h> -#include <util/generic/ptr.h> - -namespace NYT { - -class TNode; - -namespace NHttp { - -struct THeadersPtrWrapper; - -} // NHttp - -/////////////////////////////////////////////////////////////////////////////// - -enum class EFrameType -{ - Data = 0x01, - KeepAlive = 0x02, -}; - - -class THttpHeader -{ -public: - THttpHeader(const TString& method, const TString& command, bool isApi = true); - - void AddParameter(const TString& key, TNode value, bool overwrite = false); - void RemoveParameter(const TString& key); - void MergeParameters(const TNode& parameters, bool overwrite = false); - TNode GetParameters() const; - - void AddTransactionId(const TTransactionId& transactionId, bool overwrite = false); - void AddPath(const TString& path, bool overwrite = false); - void AddOperationId(const TOperationId& operationId, bool overwrite = false); - void AddMutationId(); - bool HasMutationId() const; - - void SetToken(const TString& token); - void SetImpersonationUser(const TString& impersonationUser); - - void SetServiceTicket(const TString& ticket); - - void SetInputFormat(const TMaybe<TFormat>& format); - - void SetOutputFormat(const TMaybe<TFormat>& format); - TMaybe<TFormat> GetOutputFormat() const; - - void SetRequestCompression(const TString& compression); - void SetResponseCompression(const TString& compression); - - TString GetCommand() const; - TString GetUrl() const; - TString GetHeaderAsString(const TString& hostName, const TString& requestId, bool includeParameters = true) const; - NHttp::THeadersPtrWrapper GetHeader(const TString& hostName, const TString& requestId, bool includeParameters) const; - - const TString& GetMethod() const; - -private: - bool ShouldAcceptFraming() const; - -private: - const TString Method; - const TString Command; - const bool IsApi; - - TNode::TMapType Parameters; - TString ImpersonationUser; - TString Token; - TString ServiceTicket; - TNode Attributes; - -private: - TMaybe<TFormat> InputFormat = TFormat::YsonText(); - TMaybe<TFormat> OutputFormat = TFormat::YsonText(); - - TString RequestCompression = "identity"; - TString ResponseCompression = "identity"; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TAddressCache -{ -public: - using TAddressPtr = TAtomicSharedPtr<TNetworkAddress>; - - static TAddressCache* Get(); - - TAddressPtr Resolve(const TString& hostName); - -private: - struct TCacheEntry { - TAddressPtr Address; - TInstant ExpirationTime; - }; - -private: - TAddressPtr FindAddress(const TString& hostName) const; - void AddAddress(TString hostName, TAddressPtr address); - -private: - TRWMutex Lock_; - THashMap<TString, TCacheEntry> Cache_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -struct TConnection -{ - THolder<TSocket> Socket; - TAtomic Busy = 1; - TInstant DeadLine; - ui32 Id; -}; - -using TConnectionPtr = TAtomicSharedPtr<TConnection>; - -class TConnectionPool -{ -public: - using TConnectionMap = THashMultiMap<TString, TConnectionPtr>; - - static TConnectionPool* Get(); - - TConnectionPtr Connect(const TString& hostName, TDuration socketTimeout); - void Release(TConnectionPtr connection); - void Invalidate(const TString& hostName, TConnectionPtr connection); - -private: - void Refresh(); - static SOCKET DoConnect(TAddressCache::TAddressPtr address); - -private: - TMutex Lock_; - TConnectionMap Connections_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -// -// Input stream that handles YT-specific header/trailer errors -// and throws TErrorResponse if it finds any. -class THttpResponse - : public IInputStream -{ -public: - // 'requestId' and 'hostName' are provided for debug reasons - // (they will appear in some error messages). - THttpResponse( - IInputStream* socketStream, - const TString& requestId, - const TString& hostName); - - const THttpHeaders& Headers() const; - - void CheckErrorResponse() const; - bool IsExhausted() const; - int GetHttpCode() const; - const TString& GetHostName() const; - bool IsKeepAlive() const; - -protected: - size_t DoRead(void* buf, size_t len) override; - size_t DoSkip(size_t len) override; - -private: - void CheckTrailers(const THttpHeaders& trailers); - TMaybe<TErrorResponse> ParseError(const THttpHeaders& headers); - size_t UnframeRead(void* buf, size_t len); - size_t UnframeSkip(size_t len); - bool RefreshFrameIfNecessary(); - -private: - THttpInput HttpInput_; - const TString RequestId_; - const TString HostName_; - int HttpCode_ = 0; - TMaybe<TErrorResponse> ErrorResponse_; - bool IsExhausted_ = false; - const bool Unframe_; - size_t RemainingFrameSize_ = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class THttpRequest -{ -public: - THttpRequest(); - THttpRequest(const TString& requestId); - ~THttpRequest(); - - TString GetRequestId() const; - - void Connect(TString hostName, TDuration socketTimeout = TDuration::Zero()); - - IOutputStream* StartRequest(const THttpHeader& header); - void FinishRequest(); - - void SmallRequest(const THttpHeader& header, TMaybe<TStringBuf> body); - - THttpResponse* GetResponseStream(); - - TString GetResponse(); - - void InvalidateConnection(); - - int GetHttpCode(); - -private: - IOutputStream* StartRequestImpl(const THttpHeader& header, bool includeParameters); - -private: - class TRequestStream; - -private: - TString HostName; - TString RequestId; - TString Url_; - TInstant StartTime_; - TString LoggedAttributes_; - - TConnectionPtr Connection; - - THolder<TRequestStream> RequestStream_; - - THolder<TSocketInput> SocketInput; - THolder<THttpResponse> Input; - - bool LogResponse = false; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/http/http_client.cpp b/yt/cpp/mapreduce/http/http_client.cpp deleted file mode 100644 index a2af1182dc..0000000000 --- a/yt/cpp/mapreduce/http/http_client.cpp +++ /dev/null @@ -1,603 +0,0 @@ -#include "http_client.h" - -#include "abortable_http_response.h" -#include "core.h" -#include "helpers.h" -#include "http.h" - -#include <yt/cpp/mapreduce/interface/config.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <yt/yt/core/concurrency/thread_pool_poller.h> - -#include <yt/yt/core/http/client.h> -#include <yt/yt/core/http/config.h> -#include <yt/yt/core/http/http.h> - -#include <yt/yt/core/https/client.h> -#include <yt/yt/core/https/config.h> - -#include <library/cpp/yson/node/node_io.h> - -namespace NYT::NHttpClient { - -namespace { - -TString CreateHost(TStringBuf host, TStringBuf port) -{ - if (!port.empty()) { - return Format("%v:%v", host, port); - } - - return TString(host); -} - -TMaybe<TErrorResponse> GetErrorResponse(const TString& hostName, const TString& requestId, const NHttp::IResponsePtr& response) -{ - auto httpCode = response->GetStatusCode(); - if (httpCode == NHttp::EStatusCode::OK || httpCode == NHttp::EStatusCode::Accepted) { - return {}; - } - - TErrorResponse errorResponse(static_cast<int>(httpCode), requestId); - - auto logAndSetError = [&] (const TString& rawError) { - YT_LOG_ERROR("RSP %v - HTTP %v - %v", - requestId, - httpCode, - rawError.data()); - errorResponse.SetRawError(rawError); - }; - - switch (httpCode) { - case NHttp::EStatusCode::TooManyRequests: - logAndSetError("request rate limit exceeded"); - break; - - case NHttp::EStatusCode::InternalServerError: - logAndSetError("internal error in proxy " + hostName); - break; - - default: { - TStringStream httpHeaders; - httpHeaders << "HTTP headers ("; - for (const auto& [headerName, headerValue] : response->GetHeaders()->Dump()) { - httpHeaders << headerName << ": " << headerValue << "; "; - } - httpHeaders << ")"; - - auto errorString = Sprintf("RSP %s - HTTP %d - %s", - requestId.data(), - static_cast<int>(httpCode), - httpHeaders.Str().data()); - - YT_LOG_ERROR("%v", - errorString.data()); - - if (auto errorHeader = response->GetHeaders()->Find("X-YT-Error")) { - errorResponse.ParseFromJsonError(*errorHeader); - if (errorResponse.IsOk()) { - return Nothing(); - } - return errorResponse; - } - - errorResponse.SetRawError( - errorString + " - X-YT-Error is missing in headers"); - break; - } - } - - return errorResponse; -} - -void CheckErrorResponse(const TString& hostName, const TString& requestId, const NHttp::IResponsePtr& response) -{ - auto errorResponse = GetErrorResponse(hostName, requestId, response); - if (errorResponse) { - throw *errorResponse; - } -} - -} // namespace - -/////////////////////////////////////////////////////////////////////////////// - -class TDefaultHttpResponse - : public IHttpResponse -{ -public: - TDefaultHttpResponse(std::unique_ptr<THttpRequest> request) - : Request_(std::move(request)) - { } - - int GetStatusCode() override - { - return Request_->GetHttpCode(); - } - - IInputStream* GetResponseStream() override - { - return Request_->GetResponseStream(); - } - - TString GetResponse() override - { - return Request_->GetResponse(); - } - - TString GetRequestId() const override - { - return Request_->GetRequestId(); - } - -private: - std::unique_ptr<THttpRequest> Request_; -}; - -class TDefaultHttpRequest - : public IHttpRequest -{ -public: - TDefaultHttpRequest(std::unique_ptr<THttpRequest> request, IOutputStream* stream) - : Request_(std::move(request)) - , Stream_(stream) - { } - - IOutputStream* GetStream() override - { - return Stream_; - } - - IHttpResponsePtr Finish() override - { - Request_->FinishRequest(); - return std::make_unique<TDefaultHttpResponse>(std::move(Request_)); - } - -private: - std::unique_ptr<THttpRequest> Request_; - IOutputStream* Stream_; -}; - -class TDefaultHttpClient - : public IHttpClient -{ -public: - IHttpResponsePtr Request(const TString& url, const TString& requestId, const THttpConfig& config, const THttpHeader& header, TMaybe<TStringBuf> body) override - { - auto request = std::make_unique<THttpRequest>(requestId); - - auto urlRef = NHttp::ParseUrl(url); - - request->Connect(CreateHost(urlRef.Host, urlRef.PortStr), config.SocketTimeout); - request->SmallRequest(header, body); - return std::make_unique<TDefaultHttpResponse>(std::move(request)); - } - - IHttpRequestPtr StartRequest(const TString& url, const TString& requestId, const THttpConfig& config, const THttpHeader& header) override - { - auto request = std::make_unique<THttpRequest>(requestId); - - auto urlRef = NHttp::ParseUrl(url); - - request->Connect(CreateHost(urlRef.Host, urlRef.PortStr), config.SocketTimeout); - auto stream = request->StartRequest(header); - return std::make_unique<TDefaultHttpRequest>(std::move(request), stream); - } -}; - -/////////////////////////////////////////////////////////////////////////////// - -struct TCoreRequestContext -{ - TString HostName; - TString Url; - TString RequestId; - bool LogResponse; - TInstant StartTime; - TString LoggedAttributes; -}; - -class TCoreHttpResponse - : public IHttpResponse -{ -public: - TCoreHttpResponse( - TCoreRequestContext context, - NHttp::IResponsePtr response) - : Context_(std::move(context)) - , Response_(std::move(response)) - { } - - int GetStatusCode() override - { - return static_cast<int>(Response_->GetStatusCode()); - } - - IInputStream* GetResponseStream() override - { - if (!Stream_) { - auto stream = std::make_unique<TWrappedStream>( - NConcurrency::CreateSyncAdapter(NConcurrency::CreateCopyingAdapter(Response_), NConcurrency::EWaitForStrategy::WaitFor), - Response_, - Context_.RequestId); - CheckErrorResponse(Context_.HostName, Context_.RequestId, Response_); - - if (TConfig::Get()->UseAbortableResponse) { - Y_VERIFY(!Context_.Url.empty()); - Stream_ = std::make_unique<TAbortableCoreHttpResponse>(std::move(stream), Context_.Url); - } else { - Stream_ = std::move(stream); - } - } - - return Stream_.get(); - } - - TString GetResponse() override - { - auto result = GetResponseStream()->ReadAll(); - - TStringStream loggedAttributes; - loggedAttributes - << "Time: " << TInstant::Now() - Context_.StartTime << "; " - << "HostName: " << Context_.HostName << "; " - << Context_.LoggedAttributes; - - if (Context_.LogResponse) { - constexpr auto sizeLimit = 1 << 7; - YT_LOG_DEBUG("RSP %v - received response (Response: '%v'; %v)", - Context_.RequestId, - TruncateForLogs(result, sizeLimit), - loggedAttributes.Str()); - } else { - YT_LOG_DEBUG("RSP %v - received response of %v bytes (%v)", - Context_.RequestId, - result.size(), - loggedAttributes.Str()); - } - return result; - } - - TString GetRequestId() const override - { - return Context_.RequestId; - } - -private: - class TWrappedStream - : public IInputStream - { - public: - TWrappedStream(std::unique_ptr<IInputStream> underlying, NHttp::IResponsePtr response, TString requestId) - : Underlying_(std::move(underlying)) - , Response_(std::move(response)) - , RequestId_(std::move(requestId)) - { } - - protected: - size_t DoRead(void* buf, size_t len) override - { - size_t read = Underlying_->Read(buf, len); - - if (read == 0 && len != 0) { - CheckTrailers(Response_->GetTrailers()); - } - return read; - } - - size_t DoSkip(size_t len) override - { - size_t skipped = Underlying_->Skip(len); - if (skipped == 0 && len != 0) { - CheckTrailers(Response_->GetTrailers()); - } - return skipped; - } - - private: - void CheckTrailers(const NHttp::THeadersPtr& trailers) - { - if (auto errorResponse = ParseError(trailers)) { - errorResponse->SetIsFromTrailers(true); - YT_LOG_ERROR("RSP %v - %v", - RequestId_, - errorResponse.GetRef().what()); - ythrow errorResponse.GetRef(); - } - } - - TMaybe<TErrorResponse> ParseError(const NHttp::THeadersPtr& headers) - { - if (auto errorHeader = headers->Find("X-YT-Error")) { - TErrorResponse errorResponse(static_cast<int>(Response_->GetStatusCode()), RequestId_); - errorResponse.ParseFromJsonError(*errorHeader); - if (errorResponse.IsOk()) { - return Nothing(); - } - return errorResponse; - } - return Nothing(); - } - - private: - std::unique_ptr<IInputStream> Underlying_; - NHttp::IResponsePtr Response_; - TString RequestId_; - }; - -private: - TCoreRequestContext Context_; - NHttp::IResponsePtr Response_; - std::unique_ptr<IInputStream> Stream_; -}; - -class TCoreHttpRequest - : public IHttpRequest -{ -public: - TCoreHttpRequest(TCoreRequestContext context, NHttp::IActiveRequestPtr activeRequest) - : Context_(std::move(context)) - , ActiveRequest_(std::move(activeRequest)) - , Stream_(NConcurrency::CreateBufferedSyncAdapter(ActiveRequest_->GetRequestStream())) - , WrappedStream_(this, Stream_.get()) - { } - - IOutputStream* GetStream() override - { - return &WrappedStream_; - } - - IHttpResponsePtr Finish() override - { - WrappedStream_.Flush(); - auto response = ActiveRequest_->Finish().Get().ValueOrThrow(); - return std::make_unique<TCoreHttpResponse>(std::move(Context_), std::move(response)); - } - - IHttpResponsePtr FinishWithError() - { - auto response = ActiveRequest_->GetResponse(); - return std::make_unique<TCoreHttpResponse>(std::move(Context_), std::move(response)); - } - -private: - class TWrappedStream - : public IOutputStream - { - public: - TWrappedStream(TCoreHttpRequest* httpRequest, IOutputStream* underlying) - : HttpRequest_(httpRequest) - , Underlying_(underlying) - { } - - private: - void DoWrite(const void* buf, size_t len) override - { - WrapWriteFunc([&] { - Underlying_->Write(buf, len); - }); - } - - void DoWriteV(const TPart* parts, size_t count) override - { - WrapWriteFunc([&] { - Underlying_->Write(parts, count); - }); - } - - void DoWriteC(char ch) override - { - WrapWriteFunc([&] { - Underlying_->Write(ch); - }); - } - - void DoFlush() override - { - WrapWriteFunc([&] { - Underlying_->Flush(); - }); - } - - void DoFinish() override - { - WrapWriteFunc([&] { - Underlying_->Finish(); - }); - } - - void WrapWriteFunc(std::function<void()> func) - { - CheckErrorState(); - try { - func(); - } catch (const std::exception&) { - HandleWriteException(); - } - } - - // In many cases http proxy stops reading request and resets connection - // if error has happend. This function tries to read error response - // in such cases. - void HandleWriteException() { - Y_VERIFY(WriteError_ == nullptr); - WriteError_ = std::current_exception(); - Y_VERIFY(WriteError_ != nullptr); - try { - HttpRequest_->FinishWithError()->GetResponseStream(); - } catch (const TErrorResponse &) { - throw; - } catch (...) { - } - std::rethrow_exception(WriteError_); - } - - void CheckErrorState() - { - if (WriteError_) { - std::rethrow_exception(WriteError_); - } - } - - private: - TCoreHttpRequest* const HttpRequest_; - IOutputStream* Underlying_; - std::exception_ptr WriteError_; - }; - -private: - TCoreRequestContext Context_; - NHttp::IActiveRequestPtr ActiveRequest_; - std::unique_ptr<IOutputStream> Stream_; - TWrappedStream WrappedStream_; -}; - -class TCoreHttpClient - : public IHttpClient -{ -public: - TCoreHttpClient(bool useTLS, const TConfigPtr& config) - : Poller_(NConcurrency::CreateThreadPoolPoller(1, "http_poller")) // TODO(nadya73): YT-18363: move threads count to config - { - if (useTLS) { - auto httpsConfig = NYT::New<NYT::NHttps::TClientConfig>(); - httpsConfig->MaxIdleConnections = config->ConnectionPoolSize; - Client_ = NHttps::CreateClient(httpsConfig, Poller_); - } else { - auto httpConfig = NYT::New<NYT::NHttp::TClientConfig>(); - httpConfig->MaxIdleConnections = config->ConnectionPoolSize; - Client_ = NHttp::CreateClient(httpConfig, Poller_); - } - } - - IHttpResponsePtr Request(const TString& url, const TString& requestId, const THttpConfig& /*config*/, const THttpHeader& header, TMaybe<TStringBuf> body) override - { - TCoreRequestContext context = CreateContext(url, requestId, header); - - // TODO(nadya73): YT-18363: pass socket timeouts from THttpConfig - - NHttp::IResponsePtr response; - - auto logRequest = [&](bool includeParameters) { - LogRequest(header, url, includeParameters, requestId, context.HostName); - context.LoggedAttributes = GetLoggedAttributes(header, url, includeParameters, 128); - }; - - if (!body && (header.GetMethod() == "PUT" || header.GetMethod() == "POST")) { - const auto& parameters = header.GetParameters(); - auto parametersStr = NodeToYsonString(parameters); - - bool includeParameters = false; - auto headers = header.GetHeader(context.HostName, requestId, includeParameters).Get(); - - logRequest(includeParameters); - - auto activeRequest = StartRequestImpl(header.GetMethod(), url, headers); - - activeRequest->GetRequestStream()->Write(TSharedRef::FromString(parametersStr)).Get().ThrowOnError(); - response = activeRequest->Finish().Get().ValueOrThrow(); - } else { - auto bodyRef = TSharedRef::FromString(TString(body ? *body : "")); - bool includeParameters = true; - auto headers = header.GetHeader(context.HostName, requestId, includeParameters).Get(); - - logRequest(includeParameters); - - if (header.GetMethod() == "GET") { - response = RequestImpl(header.GetMethod(), url, headers, bodyRef); - } else { - auto activeRequest = StartRequestImpl(header.GetMethod(), url, headers); - - auto request = std::make_unique<TCoreHttpRequest>(std::move(context), std::move(activeRequest)); - if (body) { - request->GetStream()->Write(*body); - } - return request->Finish(); - } - } - - return std::make_unique<TCoreHttpResponse>(std::move(context), std::move(response)); - } - - IHttpRequestPtr StartRequest(const TString& url, const TString& requestId, const THttpConfig& /*config*/, const THttpHeader& header) override - { - TCoreRequestContext context = CreateContext(url, requestId, header); - - LogRequest(header, url, true, requestId, context.HostName); - context.LoggedAttributes = GetLoggedAttributes(header, url, true, 128); - - auto headers = header.GetHeader(context.HostName, requestId, true).Get(); - auto activeRequest = StartRequestImpl(header.GetMethod(), url, headers); - - return std::make_unique<TCoreHttpRequest>(std::move(context), std::move(activeRequest)); - } - -private: - TCoreRequestContext CreateContext(const TString& url, const TString& requestId, const THttpHeader& header) - { - TCoreRequestContext context; - context.Url = url; - context.RequestId = requestId; - - auto urlRef = NHttp::ParseUrl(url); - context.HostName = CreateHost(urlRef.Host, urlRef.PortStr); - - context.LogResponse = false; - auto outputFormat = header.GetOutputFormat(); - if (outputFormat && outputFormat->IsTextYson()) { - context.LogResponse = true; - } - context.StartTime = TInstant::Now(); - return context; - } - - NHttp::IResponsePtr RequestImpl(const TString& method, const TString& url, const NHttp::THeadersPtr& headers, const TSharedRef& body) - { - if (method == "GET") { - return Client_->Get(url, headers).Get().ValueOrThrow(); - } else if (method == "POST") { - return Client_->Post(url, body, headers).Get().ValueOrThrow(); - } else if (method == "PUT") { - return Client_->Put(url, body, headers).Get().ValueOrThrow(); - } else { - YT_LOG_FATAL("Unsupported http method (Method: %v, Url: %v)", - method, - url); - } - } - - NHttp::IActiveRequestPtr StartRequestImpl(const TString& method, const TString& url, const NHttp::THeadersPtr& headers) - { - if (method == "POST") { - return Client_->StartPost(url, headers).Get().ValueOrThrow(); - } else if (method == "PUT") { - return Client_->StartPut(url, headers).Get().ValueOrThrow(); - } else { - YT_LOG_FATAL("Unsupported http method (Method: %v, Url: %v)", - method, - url); - } - } - - NConcurrency::IThreadPoolPollerPtr Poller_; - NHttp::IClientPtr Client_; -}; - -/////////////////////////////////////////////////////////////////////////////// - -IHttpClientPtr CreateDefaultHttpClient() -{ - return std::make_shared<TDefaultHttpClient>(); -} - -IHttpClientPtr CreateCoreHttpClient(bool useTLS, const TConfigPtr& config) -{ - return std::make_shared<TCoreHttpClient>(useTLS, config); -} - -/////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NHttpClient diff --git a/yt/cpp/mapreduce/http/http_client.h b/yt/cpp/mapreduce/http/http_client.h deleted file mode 100644 index 859f0423cb..0000000000 --- a/yt/cpp/mapreduce/http/http_client.h +++ /dev/null @@ -1,76 +0,0 @@ -#pragma once - -#include "fwd.h" - -#include <yt/cpp/mapreduce/interface/fwd.h> - -#include <util/datetime/base.h> - -#include <util/generic/maybe.h> -#include <util/generic/string.h> - -#include <util/stream/fwd.h> - -#include <memory> - -namespace NYT::NHttpClient { - -/////////////////////////////////////////////////////////////////////////////// - -struct THttpConfig -{ - TDuration SocketTimeout = TDuration::Zero(); -}; - -/////////////////////////////////////////////////////////////////////////////// - -class IHttpResponse -{ -public: - virtual ~IHttpResponse() = default; - - virtual int GetStatusCode() = 0; - virtual IInputStream* GetResponseStream() = 0; - virtual TString GetResponse() = 0; - virtual TString GetRequestId() const = 0; -}; - -class IHttpRequest -{ -public: - virtual ~IHttpRequest() = default; - - virtual IOutputStream* GetStream() = 0; - virtual IHttpResponsePtr Finish() = 0; -}; - - -class IHttpClient -{ -public: - virtual ~IHttpClient() = default; - - virtual IHttpResponsePtr Request(const TString& url, const TString& requestId, const THttpConfig& config, const THttpHeader& header, TMaybe<TStringBuf> body = {}) = 0; - - virtual IHttpResponsePtr Request(const TString& url, const TString& requestId, const THttpHeader& header, TMaybe<TStringBuf> body = {}) - { - return Request(url, requestId, /*config*/ {}, header, body); - } - - virtual IHttpRequestPtr StartRequest(const TString& url, const TString& requestId, const THttpConfig& config, const THttpHeader& header) = 0; - - virtual IHttpRequestPtr StartRequest(const TString& url, const TString& requestId, const THttpHeader& header) - { - return StartRequest(url, requestId, /*config*/ {}, header); - } -}; - -/////////////////////////////////////////////////////////////////////////////// - -IHttpClientPtr CreateDefaultHttpClient(); - -IHttpClientPtr CreateCoreHttpClient(bool useTLS, const TConfigPtr& config); - -/////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NHttpClient diff --git a/yt/cpp/mapreduce/http/requests.cpp b/yt/cpp/mapreduce/http/requests.cpp deleted file mode 100644 index 7cf0f673bb..0000000000 --- a/yt/cpp/mapreduce/http/requests.cpp +++ /dev/null @@ -1,66 +0,0 @@ -#include "requests.h" - -#include "context.h" -#include "host_manager.h" -#include "retry_request.h" - -#include <yt/cpp/mapreduce/client/transaction.h> - -#include <yt/cpp/mapreduce/common/helpers.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> -#include <yt/cpp/mapreduce/common/node_builder.h> -#include <yt/cpp/mapreduce/common/wait_proxy.h> - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/errors.h> -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> -#include <yt/cpp/mapreduce/interface/serialize.h> - -#include <util/stream/file.h> -#include <util/string/builder.h> -#include <util/generic/buffer.h> - - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -bool ParseBoolFromResponse(const TString& response) -{ - return GetBool(NodeFromYsonString(response)); -} - -TGUID ParseGuidFromResponse(const TString& response) -{ - auto node = NodeFromYsonString(response); - return GetGuid(node.AsString()); -} - -//////////////////////////////////////////////////////////////////////////////// - -TString GetProxyForHeavyRequest(const TClientContext& context) -{ - if (!context.Config->UseHosts) { - return context.ServerName; - } - - return NPrivate::THostManager::Get().GetProxyForHeavyRequest(context); -} - -void LogRequestError( - const TString& requestId, - const THttpHeader& header, - const TString& message, - const TString& attemptDescription) -{ - YT_LOG_ERROR("RSP %v - %v - %v - %v - X-YT-Parameters: %v", - requestId, - header.GetUrl(), - message, - attemptDescription, - NodeToYsonString(header.GetParameters())); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/http/requests.h b/yt/cpp/mapreduce/http/requests.h deleted file mode 100644 index 2c692475d1..0000000000 --- a/yt/cpp/mapreduce/http/requests.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include "fwd.h" -#include "http.h" - -#include <util/generic/maybe.h> -#include <util/str_stl.h> - -namespace NYT { - -/////////////////////////////////////////////////////////////////////////////// - -bool ParseBoolFromResponse(const TString& response); - -TGUID ParseGuidFromResponse(const TString& response); - -//////////////////////////////////////////////////////////////////////////////// - -TString GetProxyForHeavyRequest(const TClientContext& context); - -void LogRequestError( - const TString& requestId, - const THttpHeader& header, - const TString& message, - const TString& attemptDescription); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/http/retry_request.cpp b/yt/cpp/mapreduce/http/retry_request.cpp deleted file mode 100644 index ba116edcf7..0000000000 --- a/yt/cpp/mapreduce/http/retry_request.cpp +++ /dev/null @@ -1,149 +0,0 @@ -#include "retry_request.h" - -#include "context.h" -#include "helpers.h" -#include "http_client.h" -#include "requests.h" - -#include <yt/cpp/mapreduce/common/wait_proxy.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/tvm.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <library/cpp/yson/node/node_io.h> - -namespace NYT { -namespace NDetail { - -/////////////////////////////////////////////////////////////////////////////// - -static TResponseInfo Request( - const TClientContext& context, - THttpHeader& header, - TMaybe<TStringBuf> body, - const TString& requestId, - const TRequestConfig& config) -{ - TString hostName; - if (config.IsHeavy) { - hostName = GetProxyForHeavyRequest(context); - } else { - hostName = context.ServerName; - } - - auto url = GetFullUrl(hostName, context, header); - - auto response = context.HttpClient->Request(url, requestId, config.HttpConfig, header, body); - - TResponseInfo result; - result.RequestId = requestId; - result.Response = response->GetResponse(); - result.HttpCode = response->GetStatusCode(); - return result; -} - -TResponseInfo RequestWithoutRetry( - const TClientContext& context, - THttpHeader& header, - TMaybe<TStringBuf> body, - const TRequestConfig& config) -{ - if (context.ServiceTicketAuth) { - header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket()); - } else { - header.SetToken(context.Token); - } - - if (context.ImpersonationUser) { - header.SetImpersonationUser(*context.ImpersonationUser); - } - - if (header.HasMutationId()) { - header.RemoveParameter("retry"); - header.AddMutationId(); - } - auto requestId = CreateGuidAsString(); - return Request(context, header, body, requestId, config); -} - - -TResponseInfo RetryRequestWithPolicy( - IRequestRetryPolicyPtr retryPolicy, - const TClientContext& context, - THttpHeader& header, - TMaybe<TStringBuf> body, - const TRequestConfig& config) -{ - if (context.ServiceTicketAuth) { - header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket()); - } else { - header.SetToken(context.Token); - } - - if (context.ImpersonationUser) { - header.SetImpersonationUser(*context.ImpersonationUser); - } - - bool useMutationId = header.HasMutationId(); - bool retryWithSameMutationId = false; - - if (!retryPolicy) { - retryPolicy = CreateDefaultRequestRetryPolicy(context.Config); - } - - while (true) { - auto requestId = CreateGuidAsString(); - try { - retryPolicy->NotifyNewAttempt(); - - if (useMutationId) { - if (retryWithSameMutationId) { - header.AddParameter("retry", true, /* overwrite = */ true); - } else { - header.RemoveParameter("retry"); - header.AddMutationId(); - } - } - - return Request(context, header, body, requestId, config); - } catch (const TErrorResponse& e) { - LogRequestError(requestId, header, e.GetError().GetMessage(), retryPolicy->GetAttemptDescription()); - retryWithSameMutationId = e.IsTransportError(); - - if (!IsRetriable(e)) { - throw; - } - - auto maybeRetryTimeout = retryPolicy->OnRetriableError(e); - if (maybeRetryTimeout) { - TWaitProxy::Get()->Sleep(*maybeRetryTimeout); - } else { - throw; - } - } catch (const std::exception& e) { - LogRequestError(requestId, header, e.what(), retryPolicy->GetAttemptDescription()); - retryWithSameMutationId = true; - - if (!IsRetriable(e)) { - throw; - } - - auto maybeRetryTimeout = retryPolicy->OnGenericError(e); - if (maybeRetryTimeout) { - TWaitProxy::Get()->Sleep(*maybeRetryTimeout); - } else { - throw; - } - } - } - - Y_FAIL("Retries must have either succeeded or thrown an exception"); -} - -/////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/http/retry_request.h b/yt/cpp/mapreduce/http/retry_request.h deleted file mode 100644 index 2210e318f1..0000000000 --- a/yt/cpp/mapreduce/http/retry_request.h +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#include "fwd.h" - -#include <yt/cpp/mapreduce/interface/fwd.h> -#include <yt/cpp/mapreduce/common/fwd.h> - -#include <yt/cpp/mapreduce/http/http_client.h> - -#include <util/datetime/base.h> -#include <util/generic/maybe.h> -#include <util/generic/string.h> - -namespace NYT::NDetail { - -//////////////////////////////////////////////////////////////////// - -struct TResponseInfo -{ - TString RequestId; - TString Response; - int HttpCode = 0; -}; - -//////////////////////////////////////////////////////////////////// - -struct TRequestConfig -{ - NHttpClient::THttpConfig HttpConfig; - bool IsHeavy = false; -}; - -//////////////////////////////////////////////////////////////////// - -// Retry request with given `header' and `body' using `retryPolicy'. -// If `retryPolicy == nullptr' use default, currently `TAttemptLimitedRetryPolicy(TConfig::Get()->RetryCount)`. -TResponseInfo RetryRequestWithPolicy( - IRequestRetryPolicyPtr retryPolicy, - const TClientContext& context, - THttpHeader& header, - TMaybe<TStringBuf> body = {}, - const TRequestConfig& config = TRequestConfig()); - -TResponseInfo RequestWithoutRetry( - const TClientContext& context, - THttpHeader& header, - TMaybe<TStringBuf> body = {}, - const TRequestConfig& config = TRequestConfig()); - -//////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail diff --git a/yt/cpp/mapreduce/http/ya.make b/yt/cpp/mapreduce/http/ya.make deleted file mode 100644 index ef81a4b64a..0000000000 --- a/yt/cpp/mapreduce/http/ya.make +++ /dev/null @@ -1,29 +0,0 @@ -LIBRARY() - -INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc) - -SRCS( - abortable_http_response.cpp - context.cpp - helpers.cpp - host_manager.cpp - http.cpp - http_client.cpp - requests.cpp - retry_request.cpp -) - -PEERDIR( - library/cpp/deprecated/atomic - library/cpp/http/io - library/cpp/string_utils/base64 - library/cpp/string_utils/quote - library/cpp/threading/cron - yt/cpp/mapreduce/common - yt/cpp/mapreduce/interface - yt/cpp/mapreduce/interface/logging - yt/yt/core/http - yt/yt/core/https -) - -END() diff --git a/yt/cpp/mapreduce/interface/batch_request.cpp b/yt/cpp/mapreduce/interface/batch_request.cpp deleted file mode 100644 index fefdacb61a..0000000000 --- a/yt/cpp/mapreduce/interface/batch_request.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include "batch_request.h" -#include "client.h" - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -IBatchRequestBase& IBatchRequest::WithTransaction(const ITransactionPtr& transaction) -{ - return WithTransaction(transaction->GetId()); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/batch_request.h b/yt/cpp/mapreduce/interface/batch_request.h deleted file mode 100644 index 3ea28f76fd..0000000000 --- a/yt/cpp/mapreduce/interface/batch_request.h +++ /dev/null @@ -1,222 +0,0 @@ -#pragma once - -#include "fwd.h" - -#include "client_method_options.h" - -#include <library/cpp/threading/future/future.h> -#include <util/generic/ptr.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////// - -/// Helper base of @ref NYT::IBatchRequest holding most of useful methods. -class IBatchRequestBase - : public TThrRefBase -{ -public: - virtual ~IBatchRequestBase() = default; - - /// - /// @brief Create cypress node. - /// - /// @see NYT::ICypressClient::Create - virtual ::NThreading::TFuture<TNodeId> Create( - const TYPath& path, - ENodeType type, - const TCreateOptions& options = TCreateOptions()) = 0; - - /// - /// @brief Remove cypress node. - /// - /// @see NYT::ICypressClient::Remove - virtual ::NThreading::TFuture<void> Remove( - const TYPath& path, - const TRemoveOptions& options = TRemoveOptions()) = 0; - - /// - /// @brief Check wether cypress node exists. - /// - /// @see NYT::ICypressClient::Exists - virtual ::NThreading::TFuture<bool> Exists( - const TYPath& path, - const TExistsOptions& options = TExistsOptions()) = 0; - - /// - /// @brief Get cypress node. - /// - /// @see NYT::ICypressClient::Get - virtual ::NThreading::TFuture<TNode> Get( - const TYPath& path, - const TGetOptions& options = TGetOptions()) = 0; - - /// - /// @brief Set cypress node. - /// - /// @see NYT::ICypressClient::Set - virtual ::NThreading::TFuture<void> Set( - const TYPath& path, - const TNode& node, - const TSetOptions& options = TSetOptions()) = 0; - - /// - /// @brief List cypress directory. - /// - /// @see NYT::ICypressClient::List - virtual ::NThreading::TFuture<TNode::TListType> List( - const TYPath& path, - const TListOptions& options = TListOptions()) = 0; - - /// - /// @brief Copy cypress node. - /// - /// @see NYT::ICypressClient::Copy - virtual ::NThreading::TFuture<TNodeId> Copy( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TCopyOptions& options = TCopyOptions()) = 0; - - /// - /// @brief Move cypress node. - /// - /// @see NYT::ICypressClient::Move - virtual ::NThreading::TFuture<TNodeId> Move( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TMoveOptions& options = TMoveOptions()) = 0; - - /// - /// @brief Create symbolic link. - /// - /// @see NYT::ICypressClient::Link. - virtual ::NThreading::TFuture<TNodeId> Link( - const TYPath& targetPath, - const TYPath& linkPath, - const TLinkOptions& options = TLinkOptions()) = 0; - - /// - /// @brief Lock cypress node. - /// - /// @see NYT::ICypressClient::Lock - virtual ::NThreading::TFuture<ILockPtr> Lock( - const TYPath& path, - ELockMode mode, - const TLockOptions& options = TLockOptions()) = 0; - - /// - /// @brief Unlock cypress node. - /// - /// @see NYT::ICypressClient::Unlock - virtual ::NThreading::TFuture<void> Unlock( - const TYPath& path, - const TUnlockOptions& options = TUnlockOptions()) = 0; - - /// - /// @brief Abort operation. - /// - /// @see NYT::IClient::AbortOperation - virtual ::NThreading::TFuture<void> AbortOperation(const TOperationId& operationId) = 0; - - /// - /// @brief Force complete operation. - /// - /// @see NYT::IClient::CompleteOperation - virtual ::NThreading::TFuture<void> CompleteOperation(const TOperationId& operationId) = 0; - - /// - /// @brief Suspend operation. - /// - /// @see NYT::IClient::SuspendOperation - virtual ::NThreading::TFuture<void> SuspendOperation( - const TOperationId& operationId, - const TSuspendOperationOptions& options = TSuspendOperationOptions()) = 0; - - /// - /// @brief Resume operation. - /// - /// @see NYT::IClient::ResumeOperation - virtual ::NThreading::TFuture<void> ResumeOperation( - const TOperationId& operationId, - const TResumeOperationOptions& options = TResumeOperationOptions()) = 0; - - /// - /// @brief Update parameters of running operation. - /// - /// @see NYT::IClient::UpdateOperationParameters - virtual ::NThreading::TFuture<void> UpdateOperationParameters( - const TOperationId& operationId, - const TUpdateOperationParametersOptions& options = TUpdateOperationParametersOptions()) = 0; - - /// - /// @brief Canonize cypress path - /// - /// @see NYT::ICypressClient::CanonizeYPath - virtual ::NThreading::TFuture<TRichYPath> CanonizeYPath(const TRichYPath& path) = 0; - - /// - /// @brief Get table columnar statistic - /// - /// @see NYT::ICypressClient::GetTableColumnarStatistics - virtual ::NThreading::TFuture<TVector<TTableColumnarStatistics>> GetTableColumnarStatistics( - const TVector<TRichYPath>& paths, - const TGetTableColumnarStatisticsOptions& options = {}) = 0; - - /// - /// @brief Check permission for given path. - /// - /// @see NYT::IClient::CheckPermission - virtual ::NThreading::TFuture<TCheckPermissionResponse> CheckPermission( - const TString& user, - EPermission permission, - const TYPath& path, - const TCheckPermissionOptions& options = TCheckPermissionOptions()) = 0; -}; - -/// -/// @brief Batch request object. -/// -/// Allows to send multiple lightweight requests at once significantly -/// reducing time of their execution. -/// -/// Methods of this class accept same arguments as @ref NYT::IClient methods but -/// return TFuture that is set after execution of @ref NYT::IBatchRequest::ExecuteBatch -/// -/// @see [Example of usage](https://a.yandex-team.ru/arc/trunk/arcadia/yt/cpp/mapreduce/examples/tutorial/batch_request/main.cpp) -class IBatchRequest - : public IBatchRequestBase -{ -public: - /// - /// @brief Temporary override current transaction. - /// - /// Using WithTransaction user can temporary override default transaction. - /// Example of usage: - /// TBatchRequest batchRequest; - /// auto noTxResult = batchRequest.Get("//some/path"); - /// auto txResult = batchRequest.WithTransaction(tx).Get("//some/path"); - virtual IBatchRequestBase& WithTransaction(const TTransactionId& transactionId) = 0; - IBatchRequestBase& WithTransaction(const ITransactionPtr& transaction); - - /// - /// @brief Executes all subrequests of batch request. - /// - /// After execution of this method all TFuture objects returned by subrequests will - /// be filled with either result or error. - /// - /// @note It is undefined in which order these requests are executed. - /// - /// @note This method doesn't throw if subrequest emits error. - /// Instead corresponding future is set with exception. - /// So it is always important to check TFuture status. - /// - /// Single TBatchRequest instance may be executed only once - /// and cannot be modified (filled with additional requests) after execution. - /// Exception is thrown on attempt to modify executed batch request - /// or execute it again. - virtual void ExecuteBatch(const TExecuteBatchOptions& options = TExecuteBatchOptions()) = 0; -}; - -//////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/client.cpp b/yt/cpp/mapreduce/interface/client.cpp deleted file mode 100644 index 11d308b809..0000000000 --- a/yt/cpp/mapreduce/interface/client.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include "client.h" - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -void ILock::Wait(TDuration timeout) -{ - return GetAcquiredFuture().GetValue(timeout); -} - -void ITransaction::Detach() -{ - Y_FAIL("ITransaction::Detach() is not implemented"); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/client.h b/yt/cpp/mapreduce/interface/client.h deleted file mode 100644 index 54f37c3ae0..0000000000 --- a/yt/cpp/mapreduce/interface/client.h +++ /dev/null @@ -1,568 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/client.h -/// -/// Main header of the C++ YT Wrapper. - -/// -/// @mainpage C++ library for working with YT -/// -/// This library provides possibilities to work with YT as a [MapReduce](https://en.wikipedia.org/wiki/MapReduce) system. It allows: -/// - to read/write tables and files -/// - to run operations -/// - to work with transactions. -/// -/// This library provides only basic functions for working with dynamic tables. -/// To access full powers of YT dynamic tables one should use -/// [yt/client](https://a.yandex-team.ru/arc/trunk/arcadia/yt/19_4/yt/client) library. -/// -/// Entry points to this library: -/// - @ref NYT::Initialize() initialization function for this library; -/// - @ref NYT::IClient main interface to work with YT cluster; -/// - @ref NYT::CreateClient() function that creates client for particular cluster; -/// - @ref NYT::IOperationClient ancestor of @ref NYT::IClient containing the set of methods to run operations. -/// -/// Tutorial on how to use this library can be found [here](https://yt.yandex-team.ru/docs/api/c++/examples). - -#include "fwd.h" - -#include "client_method_options.h" -#include "constants.h" -#include "batch_request.h" -#include "cypress.h" -#include "init.h" -#include "io.h" -#include "node.h" -#include "operation.h" - -#include <library/cpp/threading/future/future.h> - -#include <util/datetime/base.h> -#include <util/generic/maybe.h> -#include <util/system/compiler.h> - -/// Main namespace of YT client -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// OAuth info (returned by @ref NYT::IClient::WhoAmI). -struct TAuthorizationInfo -{ - /// User's login. - TString Login; - - /// Realm. - TString Realm; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Part of @ref NYT::TCheckPermissionResponse. -/// -/// In case when 'Action == ESecurityAction::Deny' because of a 'deny' rule, -/// the "denying" object name and id and "denied" subject name an id may be returned. -struct TCheckPermissionResult -{ - /// Was the access granted or not. - ESecurityAction Action; - - /// Id of the object whose ACL's "deny" rule forbids the access. - TMaybe<TGUID> ObjectId; - - /// - /// @brief Name of the object whose ACL's "deny" rule forbids the access. - /// - /// Example is "node //tmp/x/y". - TMaybe<TString> ObjectName; - - /// Id of the subject for whom the access was denied by a "deny" rule. - TMaybe<TGUID> SubjectId; - - /// Name of the subject for whom the access was denied by a "deny" rule. - TMaybe<TString> SubjectName; -}; - -/// @brief Result of @ref NYT::IClient::CheckPermission command. -/// -/// The base part of the response corresponds to the check result for the node itself. -/// `Columns` vector contains check results for the columns (in the same order as in the request). -struct TCheckPermissionResponse - : public TCheckPermissionResult -{ - /// @brief Results for the table columns access permissions. - /// - /// @see [Columnar ACL doc](https://yt.yandex-team.ru/docs/description/common/columnar_acl) - TVector<TCheckPermissionResult> Columns; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Interface representing a lock obtained from @ref NYT::ITransaction::Lock. -/// -/// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#start-tx) -class ILock - : public TThrRefBase -{ -public: - virtual ~ILock() = default; - - /// Get cypress node id of lock itself. - virtual const TLockId& GetId() const = 0; - - /// Get cypress node id of locked object. - virtual TNodeId GetLockedNodeId() const = 0; - - /// - /// @brief Get future that will be set once lock is in "acquired" state. - /// - /// Note that future might contain exception if some error occurred - /// e.g. lock transaction was aborted. - virtual const ::NThreading::TFuture<void>& GetAcquiredFuture() const = 0; - - /// - /// @brief Wait until lock is in "acquired" state. - /// - /// Throws exception if timeout exceeded or some error occurred - /// e.g. lock transaction was aborted. - void Wait(TDuration timeout = TDuration::Max()); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Base class for @ref NYT::IClient and @ref NYT::ITransaction. -/// -/// This class contains transactional commands. -class IClientBase - : public TThrRefBase - , public ICypressClient - , public IIOClient - , public IOperationClient -{ -public: - /// - /// @brief Start a [transaction] (https://yt.yandex-team.ru/docs/description/storage/transactions.html#master_transactions). - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#start-tx) - [[nodiscard]] virtual ITransactionPtr StartTransaction( - const TStartTransactionOptions& options = TStartTransactionOptions()) = 0; - - /// - /// @brief Change properties of table. - /// - /// Allows to: - /// - switch table between dynamic/static mode - /// - or change table schema - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#alter-table) - virtual void AlterTable( - const TYPath& path, - const TAlterTableOptions& options = TAlterTableOptions()) = 0; - - /// - /// @brief Create batch request object that allows to execute several light requests in parallel. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#execute-batch) - virtual TBatchRequestPtr CreateBatchRequest() = 0; - - /// @brief Get root client outside of all transactions. - virtual IClientPtr GetParentClient() = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - - -/// @brief Interface representing a master transaction. -/// -/// @see [YT doc](https://yt.yandex-team.ru/docs/description/storage/transactions.html#master_transactions) -class ITransaction - : virtual public IClientBase -{ -public: - /// Get id of transaction. - virtual const TTransactionId& GetId() const = 0; - - /// - /// @brief Try to lock given path. - /// - /// Lock will be held until transaction is commited/aborted or @ref NYT::ITransaction::Unlock method is called. - /// Lock modes: - /// - `LM_EXCLUSIVE`: if exclusive lock is taken no other transaction can take exclusive or shared lock. - /// - `LM_SHARED`: if shared lock is taken other transactions can take shared lock but not exclusive. - /// - `LM_SNAPSHOT`: snapshot lock always succeeds, when snapshot lock is taken current transaction snapshots object. - /// It will not see changes that occurred to it in other transactions. - /// - /// Exclusive/shared lock can be waitable or not. - /// If nonwaitable lock cannot be taken exception is thrown. - /// If waitable lock cannot be taken it is created in pending state and client can wait until it actually taken. - /// Check @ref NYT::TLockOptions::Waitable and @ref NYT::ILock::GetAcquiredFuture for more details. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#lock) - virtual ILockPtr Lock( - const TYPath& path, - ELockMode mode, - const TLockOptions& options = TLockOptions()) = 0; - - /// - /// @brief Remove all the locks (including pending ones) for this transaction from a Cypress node at `path`. - /// - /// If the locked version of the node differs from the original one, - /// an error will be thrown. - /// - /// Command is successful even if the node has no locks. - /// Only explicit (created by @ref NYT::ITransaction::Lock) locks are removed. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#unlock) - virtual void Unlock( - const TYPath& path, - const TUnlockOptions& options = TUnlockOptions()) = 0; - - /// - /// @brief Commit transaction. - /// - /// All changes that are made by transactions become visible globally or to parent transaction. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#commit) - virtual void Commit() = 0; - - /// - /// @brief Abort transaction. - /// - /// All changes made by current transaction are lost. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#abort) - virtual void Abort() = 0; - - /// @brief Explicitly ping transaction. - /// - /// User usually does not need this method (as transactions are pinged automatically, - /// see @ref NYT::TStartTransactionOptions::AutoPingable). - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#ping) - virtual void Ping() = 0; - - /// - /// @brief Detach transaction. - /// - /// Stop any activities connected with it: pinging, aborting on crashes etc. - /// Forget about the transaction totally. - virtual void Detach(); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// Interface containing non-transactional commands. -class IClient - : virtual public IClientBase -{ -public: - /// - /// @brief Attach to existing master transaction. - /// - /// Returned object WILL NOT: - /// - ping transaction automatically (unless @ref NYT::TAttachTransactionOptions::AutoPing is set) - /// - abort it on program termination (unless @ref NYT::TAttachTransactionOptions::AbortOnTermination is set). - /// Otherwise returned object is similar to the object returned by @ref NYT::IClientBase::StartTransaction. - /// and it can see all the changes made inside the transaction. - [[nodiscard]] virtual ITransactionPtr AttachTransaction( - const TTransactionId& transactionId, - const TAttachTransactionOptions& options = TAttachTransactionOptions()) = 0; - - /// - /// @brief Mount dynamic table. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#mount-table) - virtual void MountTable( - const TYPath& path, - const TMountTableOptions& options = TMountTableOptions()) = 0; - - /// - /// @brief Unmount dynamic table. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#unmount-table) - virtual void UnmountTable( - const TYPath& path, - const TUnmountTableOptions& options = TUnmountTableOptions()) = 0; - - /// - /// @brief Remount dynamic table. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#remount-table) - virtual void RemountTable( - const TYPath& path, - const TRemountTableOptions& options = TRemountTableOptions()) = 0; - - /// - /// @brief Switch dynamic table from `mounted' into `frozen' state. - /// - /// When table is in frozen state all its data is flushed to disk and writes are disabled. - /// - /// @note this function launches the process of switching, but doesn't wait until switching is accomplished. - /// Waiting has to be performed by user. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#freeze-table) - virtual void FreezeTable( - const TYPath& path, - const TFreezeTableOptions& options = TFreezeTableOptions()) = 0; - - /// - /// @brief Switch dynamic table from `frozen` into `mounted` state. - /// - /// @note this function launches the process of switching, but doesn't wait until switching is accomplished. - /// Waiting has to be performed by user. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#unfreeze-table) - virtual void UnfreezeTable( - const TYPath& path, - const TUnfreezeTableOptions& options = TUnfreezeTableOptions()) = 0; - - /// - /// @brief Reshard dynamic table (break it into tablets) by given pivot keys. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#reshard-table) - virtual void ReshardTable( - const TYPath& path, - const TVector<TKey>& pivotKeys, - const TReshardTableOptions& options = TReshardTableOptions()) = 0; - - /// - /// @brief Reshard dynamic table, breaking it into given number of tablets. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#reshard-table) - virtual void ReshardTable( - const TYPath& path, - i64 tabletCount, - const TReshardTableOptions& options = TReshardTableOptions()) = 0; - - /// - /// @brief Insert rows into dynamic table. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#insert-rows) - virtual void InsertRows( - const TYPath& path, - const TNode::TListType& rows, - const TInsertRowsOptions& options = TInsertRowsOptions()) = 0; - - /// - /// @brief Delete rows from dynamic table. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#delete-rows) - virtual void DeleteRows( - const TYPath& path, - const TNode::TListType& keys, - const TDeleteRowsOptions& options = TDeleteRowsOptions()) = 0; - - /// - /// @brief Trim rows from the beginning of ordered dynamic table. - /// - /// Asynchronously removes `rowCount` rows from the beginning of ordered dynamic table. - /// Numeration of remaining rows *does not change*, e.g. after `trim(10)` and `trim(20)` - /// you get in total `20` deleted rows. - /// - /// @param path Path to ordered dynamic table. - /// @param tabletIndex Which tablet to trim. - /// @param rowCount How many trimmed rows will be in the table after command. - /// @param options Optional parameters. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#trim-rows) - virtual void TrimRows( - const TYPath& path, - i64 tabletIndex, - i64 rowCount, - const TTrimRowsOptions& options = TTrimRowsOptions()) = 0; - - /// - /// @brief Lookup rows with given keys from dynamic table. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#lookup-rows) - virtual TNode::TListType LookupRows( - const TYPath& path, - const TNode::TListType& keys, - const TLookupRowsOptions& options = TLookupRowsOptions()) = 0; - - /// - /// @brief Select rows from dynamic table, using [SQL dialect](https://yt.yandex-team.ru/docs//description/dynamic_tables/dyn_query_language.html). - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#select-rows) - virtual TNode::TListType SelectRows( - const TString& query, - const TSelectRowsOptions& options = TSelectRowsOptions()) = 0; - - /// - /// @brief Change properties of table replica. - /// - /// Allows to enable/disable replica and/or change its mode. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#alter-table-replica) - virtual void AlterTableReplica( - const TReplicaId& replicaId, - const TAlterTableReplicaOptions& alterTableReplicaOptions) = 0; - - /// - /// @brief Generate a monotonously increasing master timestamp. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#generate-timestamp) - virtual ui64 GenerateTimestamp() = 0; - - /// Return YT username of current client. - virtual TAuthorizationInfo WhoAmI() = 0; - - /// - /// @brief Get operation attributes. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-operation) - virtual TOperationAttributes GetOperation( - const TOperationId& operationId, - const TGetOperationOptions& options = TGetOperationOptions()) = 0; - - /// - /// @brief List operations satisfying given filters. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#list-operations) - virtual TListOperationsResult ListOperations( - const TListOperationsOptions& options = TListOperationsOptions()) = 0; - - /// - /// @brief Update operation runtime parameters. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#update-op-parameters) - virtual void UpdateOperationParameters( - const TOperationId& operationId, - const TUpdateOperationParametersOptions& options) = 0; - - /// - /// @brief Get job attributes. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-job) - virtual TJobAttributes GetJob( - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobOptions& options = TGetJobOptions()) = 0; - - /// - /// List attributes of jobs satisfying given filters. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#list-jobs) - virtual TListJobsResult ListJobs( - const TOperationId& operationId, - const TListJobsOptions& options = TListJobsOptions()) = 0; - - /// - /// @brief Get the input of a running or failed job. - /// - /// @ref NYT::TErrorResponse exception is thrown if job is missing. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-job-input) - virtual IFileReaderPtr GetJobInput( - const TJobId& jobId, - const TGetJobInputOptions& options = TGetJobInputOptions()) = 0; - - /// - /// @brief Get fail context of a failed job. - /// - /// @ref NYT::TErrorResponse exception is thrown if it is missing. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-job-fail-context) - virtual IFileReaderPtr GetJobFailContext( - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobFailContextOptions& options = TGetJobFailContextOptions()) = 0; - - /// - /// @brief Get stderr of a running or failed job. - /// - /// @ref NYT::TErrorResponse exception is thrown if it is missing. - /// - /// @note YT doesn't store all job stderrs - /// - /// @note If job stderr exceeds few megabytes YT will store only head and tail of stderr. - /// - /// @see Description of `max_stderr_size` spec option [here](https://yt.yandex-team.ru/docs//description/mr/operations_options.html). - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-job-stderr) - virtual IFileReaderPtr GetJobStderr( - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobStderrOptions& options = TGetJobStderrOptions()) = 0; - - /// - /// @brief Create one or several rbtorrents for files in a blob table. - /// - /// If specified, one torrent is created for each value of `KeyColumns` option. - /// Otherwise, a single torrent with all files of a table is created. - /// - /// @return list of nodes, each node has two fields - /// * `key`: list of key columns values. Empty if `KeyColumns` is not specified. - /// * `rbtorrent`: rbtorrent string (with `rbtorrent:` prefix) - /// - /// @see [More info.](https://docs.yandex-team.ru/docs/yt/description/storage/blobtables#sky_share) - virtual TNode::TListType SkyShareTable( - const std::vector<TYPath>& tablePaths, - const TSkyShareTableOptions& options) = 0; - - /// - /// @brief Check if `user` has `permission` to access a Cypress node at `path`. - /// - /// For tables access to columns specified in `options.Columns_` can be checked - /// (@see [the doc](https://yt.yandex-team.ru/docs/description/common/columnar_acl)). - /// - /// If access is denied (the returned result has `.Action == ESecurityAction::Deny`) - /// because of a `deny` rule, the "denying" object name and id - /// and "denied" subject name an id may be returned. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#check_permission) - virtual TCheckPermissionResponse CheckPermission( - const TString& user, - EPermission permission, - const TYPath& path, - const TCheckPermissionOptions& options = TCheckPermissionOptions()) = 0; - - /// @brief Get information about tablet - /// @see NYT::TTabletInfo - virtual TVector<TTabletInfo> GetTabletInfos( - const TYPath& path, - const TVector<int>& tabletIndexes, - const TGetTabletInfosOptions& options = TGetTabletInfosOptions()) = 0; - - /// - /// @brief Suspend operation. - /// - /// Jobs will be aborted. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#suspend_op) - virtual void SuspendOperation( - const TOperationId& operationId, - const TSuspendOperationOptions& options = TSuspendOperationOptions()) = 0; - - /// @brief Resume previously suspended operation. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#resume_op) - virtual void ResumeOperation( - const TOperationId& operationId, - const TResumeOperationOptions& options = TResumeOperationOptions()) = 0; - - /// - /// @brief Synchronously terminates all client's background activities - /// - /// e.g. no callbacks will be executed after the function is completed - /// - /// @note It is safe to call Shutdown multiple times - /// - /// @note @ref NYT::TApiUsageError will be thrown if any client's method is called after shutdown - /// - virtual void Shutdown() = 0; -}; - - -/// Create a client for particular MapReduce cluster. -IClientPtr CreateClient( - const TString& serverName, - const TCreateClientOptions& options = TCreateClientOptions()); - - -/// Create a client for mapreduce cluster specified in `YT_PROXY` environment variable. -IClientPtr CreateClientFromEnv( - const TCreateClientOptions& options = TCreateClientOptions()); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/client_method_options.cpp b/yt/cpp/mapreduce/interface/client_method_options.cpp deleted file mode 100644 index 66f72bfe5f..0000000000 --- a/yt/cpp/mapreduce/interface/client_method_options.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "client_method_options.h" - -#include "tvm.h" - -namespace NYT { - -template <typename T> -static void MergeMaybe(TMaybe<T>& origin, const TMaybe<T>& patch) -{ - if (patch) { - origin = patch; - } -} - -void TFormatHints::Merge(const TFormatHints& patch) -{ - if (patch.SkipNullValuesForTNode_) { - SkipNullValuesForTNode(true); - } - MergeMaybe(EnableStringToAllConversion_, patch.EnableStringToAllConversion_); - MergeMaybe(EnableAllToStringConversion_, patch.EnableAllToStringConversion_); - MergeMaybe(EnableIntegralTypeConversion_, patch.EnableIntegralTypeConversion_); - MergeMaybe(EnableIntegralToDoubleConversion_, patch.EnableIntegralToDoubleConversion_); - MergeMaybe(EnableTypeConversion_, patch.EnableTypeConversion_); - MergeMaybe(ComplexTypeMode_, patch.ComplexTypeMode_); -} - -TCreateClientOptions& TCreateClientOptions::ServiceTicketAuth(const NAuth::IServiceTicketAuthPtrWrapper& wrapper) -{ - ServiceTicketAuth_ = std::make_shared<NAuth::IServiceTicketAuthPtrWrapper>(wrapper); - return *this; -} - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/client_method_options.h b/yt/cpp/mapreduce/interface/client_method_options.h deleted file mode 100644 index 8074632353..0000000000 --- a/yt/cpp/mapreduce/interface/client_method_options.h +++ /dev/null @@ -1,1452 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/client_method_options.h -/// -/// Header containing options for @ref NYT::IClient methods. - -#include "common.h" -#include "config.h" -#include "format.h" -#include "public.h" -#include "retry_policy.h" - -#include <util/datetime/base.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// Type of the cypress node. -enum ENodeType : int -{ - NT_STRING /* "string_node" */, - NT_INT64 /* "int64_node" */, - NT_UINT64 /* "uint64_node" */, - NT_DOUBLE /* "double_node" */, - NT_BOOLEAN /* "boolean_node" */, - NT_MAP /* "map_node" */, - NT_LIST /* "list_node" */, - NT_FILE /* "file" */, - NT_TABLE /* "table" */, - NT_DOCUMENT /* "document" */, - NT_REPLICATED_TABLE /* "replicated_table" */, - NT_TABLE_REPLICA /* "table_replica" */, - NT_USER /* "user" */, - NT_SCHEDULER_POOL /* "scheduler_pool" */, - NT_LINK /* "link" */, -}; - -/// -/// @brief Mode of composite type representation in yson. -/// -/// @see https://yt.yandex-team.ru/docs/description/storage/data_types#yson -enum class EComplexTypeMode : int -{ - Named /* "named" */, - Positional /* "positional" */, -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Create -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#create -struct TCreateOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TCreateOptions; - /// @endcond - - /// Create missing parent directories if required. - FLUENT_FIELD_DEFAULT(bool, Recursive, false); - - /// - /// @brief Do not raise error if node already exists. - /// - /// Node is not recreated. - /// Force and IgnoreExisting MUST NOT be used simultaneously. - FLUENT_FIELD_DEFAULT(bool, IgnoreExisting, false); - - /// - /// @brief Recreate node if it exists. - /// - /// Force and IgnoreExisting MUST NOT be used simultaneously. - FLUENT_FIELD_DEFAULT(bool, Force, false); - - /// @brief Set node attributes. - FLUENT_FIELD_OPTION(TNode, Attributes); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Remove -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#remove -struct TRemoveOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TRemoveOptions; - /// @endcond - - /// - /// @brief Remove whole tree when removing composite cypress node (e.g. `map_node`). - /// - /// Without this option removing nonempty composite node will fail. - FLUENT_FIELD_DEFAULT(bool, Recursive, false); - - /// @brief Do not fail if removing node doesn't exist. - FLUENT_FIELD_DEFAULT(bool, Force, false); -}; - -/// Base class for options for operations that read from master. -template <typename TDerived> -struct TMasterReadOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// @brief Where to read from. - FLUENT_FIELD_OPTION(EMasterReadKind, ReadFrom); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Exists -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#exists -struct TExistsOptions - : public TMasterReadOptions<TExistsOptions> -{ -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Get -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#get -struct TGetOptions - : public TMasterReadOptions<TGetOptions> -{ - /// @brief Attributes that should be fetched with each node. - FLUENT_FIELD_OPTION(TAttributeFilter, AttributeFilter); - - /// @brief Limit for the number of children node. - FLUENT_FIELD_OPTION(i64, MaxSize); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Set -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#set -struct TSetOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TSetOptions; - /// @endcond - - /// Create missing parent directories if required. - FLUENT_FIELD_DEFAULT(bool, Recursive, false); - - /// Allow setting any nodes, not only attribute and document ones. - FLUENT_FIELD_OPTION(bool, Force); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::MultisetAttributes -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#multiset_attributes -struct TMultisetAttributesOptions -{ }; - -/// -/// @brief Options for @ref NYT::ICypressClient::List -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#list -struct TListOptions - : public TMasterReadOptions<TListOptions> -{ - /// @cond Doxygen_Suppress - using TSelf = TListOptions; - /// @endcond - - /// Attributes that should be fetched for each node. - FLUENT_FIELD_OPTION(TAttributeFilter, AttributeFilter); - - /// Limit for the number of children that will be fetched. - FLUENT_FIELD_OPTION(i64, MaxSize); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Copy -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#copy -struct TCopyOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TCopyOptions; - /// @endcond - - /// Create missing directories in destination path if required. - FLUENT_FIELD_DEFAULT(bool, Recursive, false); - - /// Allows to use existing node as destination, it will be overwritten. - FLUENT_FIELD_DEFAULT(bool, Force, false); - - /// Whether to preserves account of source node. - FLUENT_FIELD_DEFAULT(bool, PreserveAccount, false); - - /// Whether to preserve `expiration_time` attribute of source node. - FLUENT_FIELD_OPTION(bool, PreserveExpirationTime); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Move -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#move -struct TMoveOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TMoveOptions; - /// @endcond - - /// Create missing directories in destination path if required. - FLUENT_FIELD_DEFAULT(bool, Recursive, false); - - /// Allows to use existing node as destination, it will be overwritten. - FLUENT_FIELD_DEFAULT(bool, Force, false); - - /// Whether to preserves account of source node. - FLUENT_FIELD_DEFAULT(bool, PreserveAccount, false); - - /// Whether to preserve `expiration_time` attribute of source node. - FLUENT_FIELD_OPTION(bool, PreserveExpirationTime); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Link -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#link -struct TLinkOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TLinkOptions; - /// @endcond - - /// Create parent directories of destination if they don't exist. - FLUENT_FIELD_DEFAULT(bool, Recursive, false); - - /// Do not raise error if link already exists. - FLUENT_FIELD_DEFAULT(bool, IgnoreExisting, false); - - /// Force rewrite target node. - FLUENT_FIELD_DEFAULT(bool, Force, false); - - /// Attributes of created link. - FLUENT_FIELD_OPTION(TNode, Attributes); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Concatenate -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#concatenate -struct TConcatenateOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TConcatenateOptions; - /// @endcond - - /// Whether we should append to destination or rewrite it. - FLUENT_FIELD_OPTION(bool, Append); -}; - -/// -/// @brief Options for @ref NYT::IIOClient::CreateBlobTableReader -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#read_blob_table -struct TBlobTableReaderOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TBlobTableReaderOptions; - /// @endcond - - /// Name of the part index column. By default it is "part_index". - FLUENT_FIELD_OPTION(TString, PartIndexColumnName); - - /// Name of the data column. By default it is "data". - FLUENT_FIELD_OPTION(TString, DataColumnName); - - /// - /// @brief Size of each part. - /// - /// All blob parts except the last part of the blob must be of this size - /// otherwise blob table reader emits error. - FLUENT_FIELD_DEFAULT(ui64, PartSize, 4 * 1024 * 1024); - - /// @brief Offset from which to start reading - FLUENT_FIELD_DEFAULT(i64, Offset, 0); -}; - -/// -/// @brief Resource limits for operation (or pool) -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/scheduler/scheduler_and_pools#resursy -/// @see NYT::TUpdateOperationParametersOptions -struct TResourceLimits -{ - /// @cond Doxygen_Suppress - using TSelf = TResourceLimits; - /// @endcond - - /// Number of slots for user jobs. - FLUENT_FIELD_OPTION(i64, UserSlots); - - /// Number of cpu cores. - FLUENT_FIELD_OPTION(double, Cpu); - - /// Network usage. Doesn't have precise physical unit. - FLUENT_FIELD_OPTION(i64, Network); - - /// Memory in bytes. - FLUENT_FIELD_OPTION(i64, Memory); -}; - -/// -/// @brief Scheduling options for single pool tree. -/// -/// @see NYT::TUpdateOperationParametersOptions -struct TSchedulingOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TSchedulingOptions; - /// @endcond - - /// - /// @brief Pool to switch operation to. - /// - /// @note Switching is currently disabled on the server (will induce an exception). - FLUENT_FIELD_OPTION(TString, Pool); - - /// @brief Operation weight. - FLUENT_FIELD_OPTION(double, Weight); - - /// @brief Operation resource limits. - FLUENT_FIELD_OPTION(TResourceLimits, ResourceLimits); -}; - -/// -/// @brief Collection of scheduling options for multiple pool trees. -/// -/// @see NYT::TUpdateOperationParametersOptions -struct TSchedulingOptionsPerPoolTree -{ - /// @cond Doxygen_Suppress - using TSelf = TSchedulingOptionsPerPoolTree; - /// @endcond - - TSchedulingOptionsPerPoolTree(const THashMap<TString, TSchedulingOptions>& options = {}) - : Options_(options) - { } - - /// Add scheduling options for pool tree. - TSelf& Add(TStringBuf poolTreeName, const TSchedulingOptions& schedulingOptions) - { - Y_ENSURE(Options_.emplace(poolTreeName, schedulingOptions).second); - return *this; - } - - THashMap<TString, TSchedulingOptions> Options_; -}; - -/// -/// @brief Options for @ref NYT::IOperation::SuspendOperation -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#suspend_op -struct TSuspendOperationOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TSuspendOperationOptions; - /// @endcond - - /// - /// @brief Whether to abort already running jobs. - /// - /// By default running jobs are not aborted. - FLUENT_FIELD_OPTION(bool, AbortRunningJobs); -}; - -/// -/// @brief Options for @ref NYT::IOperation::ResumeOperation -/// -/// @note They are empty for now but options might appear in the future. -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#resume_op -struct TResumeOperationOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TResumeOperationOptions; - /// @endcond -}; - -/// -/// @brief Options for @ref NYT::IOperation::UpdateParameters -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#update_op_parameters -struct TUpdateOperationParametersOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TUpdateOperationParametersOptions; - /// @endcond - - /// New owners of the operation. - FLUENT_VECTOR_FIELD(TString, Owner); - - /// Pool to switch operation to (for all pool trees it is running in). - FLUENT_FIELD_OPTION(TString, Pool); - - /// New operation weight (for all pool trees it is running in). - FLUENT_FIELD_OPTION(double, Weight); - - /// Scheduling options for each pool tree the operation is running in. - FLUENT_FIELD_OPTION(TSchedulingOptionsPerPoolTree, SchedulingOptionsPerPoolTree); -}; - -/// -/// @brief Base class for many options related to IO. -/// -/// @ref NYT::TFileWriterOptions -/// @ref NYT::TFileReaderOptions -/// @ref NYT::TTableReaderOptions -/// @ref NYT::TTableWriterOptions -template <class TDerived> -struct TIOOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// - /// @brief Advanced options for reader/writer. - /// - /// Readers/writers have many options not of all of them are supported by library. - /// If you need such unsupported option, you might use `Config` option until - /// option is supported. - /// - /// Example: - /// - /// TTableWriterOptions().Config(TNode()("max_row_weight", 64 << 20))) - /// - /// @note We encourage you to ask yt@ to add native C++ support of required options - /// and use `Config` only as temporary solution while native support is not ready. - FLUENT_FIELD_OPTION(TNode, Config); - - /// - /// @brief Whether to create internal client transaction for reading / writing table. - /// - /// This is advanced option. - /// - /// If `CreateTransaction` is set to `false` reader/writer doesn't create internal transaction - /// and doesn't lock table. This option is overriden (effectively `false`) for writers by - /// @ref NYT::TTableWriterOptions::SingleHttpRequest - /// - /// WARNING: if `CreateTransaction` is `false`, read/write might become non-atomic. - /// Change ONLY if you are sure what you are doing! - FLUENT_FIELD_DEFAULT(bool, CreateTransaction, true); -}; - -/// @brief Options for reading file from YT. -struct TFileReaderOptions - : public TIOOptions<TFileReaderOptions> -{ - /// - /// @brief Offset to start reading from. - /// - /// By default reading is started from the beginning of the file. - FLUENT_FIELD_OPTION(i64, Offset); - - /// - /// @brief Maximum length to read. - /// - /// By default file is read until the end. - FLUENT_FIELD_OPTION(i64, Length); -}; - -/// @brief Options that control how server side of YT stores data. -struct TWriterOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TWriterOptions; - /// @endcond - - /// - /// @brief Whether to wait all replicas to be written. - /// - /// When set to true upload will be considered successful as soon as - /// @ref NYT::TWriterOptions::MinUploadReplicationFactor number of replicas are created. - FLUENT_FIELD_OPTION(bool, EnableEarlyFinish); - - /// Number of replicas to be created. - FLUENT_FIELD_OPTION(ui64, UploadReplicationFactor); - - /// - /// Min number of created replicas needed to consider upload successful. - /// - /// @see NYT::TWriterOptions::EnableEarlyFinish - FLUENT_FIELD_OPTION(ui64, MinUploadReplicationFactor); - - /// - /// @brief Desired size of a chunk. - /// - /// @see @ref NYT::TWriterOptions::RetryBlockSize - FLUENT_FIELD_OPTION(ui64, DesiredChunkSize); - - /// - /// @brief Size of data block accumulated in memory to provide retries. - /// - /// Data is accumulated in memory buffer so in case error occurs data could be resended. - /// - /// If `RetryBlockSize` is not set buffer size is set to `DesiredChunkSize`. - /// If niether `RetryBlockSize` nor `DesiredChunkSize` is set size of buffer is 64MB. - /// - /// @note Written chunks cannot be larger than size of this memory buffer. - /// - /// Since DesiredChunkSize is compared against data already compressed with compression codec - /// it makes sense to set `RetryBlockSize = DesiredChunkSize / ExpectedCompressionRatio` - /// - /// @see @ref NYT::TWriterOptions::DesiredChunkSize - /// @see @ref NYT::TTableWriterOptions::SingleHttpRequest - FLUENT_FIELD_OPTION(size_t, RetryBlockSize); -}; - -/// -/// @brief Options for writing file -/// -/// @see NYT::IIOClient::CreateFileWriter -struct TFileWriterOptions - : public TIOOptions<TFileWriterOptions> -{ - /// - /// @brief Whether to compute MD5 sum of written file. - /// - /// If ComputeMD5 is set to `true` and we are appending to an existing file - /// the `md5` attribute must be set (i.e. it was previously written only with `ComputeMD5 == true`). - FLUENT_FIELD_OPTION(bool, ComputeMD5); - - /// - /// @brief Options to control how YT server side writes data. - /// - /// @see NYT::TWriterOptions - FLUENT_FIELD_OPTION(TWriterOptions, WriterOptions); -}; - -class TSkiffRowHints { -public: - /// @cond Doxygen_Suppress - using TSelf = TSkiffRowHints; - /// @endcond - - /// - /// @brief Library doesn't interpret it, only pass it to CreateSkiffParser<...>() and GetSkiffSchema<...>() functions. - /// - /// You can set something in it to pass necessary information to CreateSkiffParser<...>() and GetSkiffSchema<...>() functions. - FLUENT_FIELD_OPTION(TNode, Attributes); -}; - -/// Options that control how C++ objects represent table rows when reading or writing a table. -class TFormatHints -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TFormatHints; - /// @endcond - - /// - /// @brief Whether to skip null values. - /// - /// When set to true TNode doesn't contain null column values - /// (e.g. corresponding keys will be missing instead of containing null value). - /// - /// Only meaningful for TNode representation. - /// - /// Useful for sparse tables which have many columns in schema - /// but only few columns are set in any row. - FLUENT_FIELD_DEFAULT(bool, SkipNullValuesForTNode, false); - - /// - /// @brief Whether to convert string to numeric and boolean types (e.g. "42u" -> 42u, "false" -> %false) - /// when writing to schemaful table. - FLUENT_FIELD_OPTION(bool, EnableStringToAllConversion); - - /// - /// @brief Whether to convert numeric and boolean types to string (e.g., 3.14 -> "3.14", %true -> "true") - /// when writing to schemaful table. - FLUENT_FIELD_OPTION(bool, EnableAllToStringConversion); - - /// - /// @brief Whether to convert uint64 <-> int64 when writing to schemaful table. - /// - /// On overflow the corresponding error with be raised. - /// - /// This options is enabled by default. - FLUENT_FIELD_OPTION(bool, EnableIntegralTypeConversion); - - /// Whether to convert uint64 and int64 to double (e.g. 42 -> 42.0) when writing to schemaful table. - FLUENT_FIELD_OPTION(bool, EnableIntegralToDoubleConversion); - - /// Shortcut for enabling all type conversions. - FLUENT_FIELD_OPTION(bool, EnableTypeConversion); - - /// - /// @brief Controls how complex types are represented in TNode or yson-strings. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/data_types#yson - FLUENT_FIELD_OPTION(EComplexTypeMode, ComplexTypeMode); - - /// - /// @brief Allow to use any meta-information for creating skiff schema and parser for reading ISkiffRow. - FLUENT_FIELD_OPTION(TSkiffRowHints, SkiffRowHints); - - /// - /// @brief Apply the patch to the fields. - /// - /// Non-default and non-empty values replace the default and empty ones. - void Merge(const TFormatHints& patch); -}; - -/// Options that control which control attributes (like row_index) are added to rows during read. -class TControlAttributes -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TControlAttributes; - /// @endcond - - /// - /// @brief Whether to add "row_index" attribute to rows read. - FLUENT_FIELD_DEFAULT(bool, EnableRowIndex, true); - - /// - /// @brief Whether to add "range_index" attribute to rows read. - FLUENT_FIELD_DEFAULT(bool, EnableRangeIndex, true); -}; - -/// Options for @ref NYT::IClient::CreateTableReader -struct TTableReaderOptions - : public TIOOptions<TTableReaderOptions> -{ - /// @deprecated Size of internal client buffer. - FLUENT_FIELD_DEFAULT(size_t, SizeLimit, 4 << 20); - - /// - /// @brief Allows to fine tune format that is used for reading tables. - /// - /// Has no effect when used with raw-reader. - FLUENT_FIELD_OPTION(TFormatHints, FormatHints); - - /// - /// @brief Allows to tune which attributes are added to rows while reading tables. - /// - FLUENT_FIELD_DEFAULT(TControlAttributes, ControlAttributes, TControlAttributes()); -}; - -/// Options for @ref NYT::IClient::CreateTableWriter -struct TTableWriterOptions - : public TIOOptions<TTableWriterOptions> -{ - /// - /// @brief Enable or disable retryful writing. - /// - /// If set to true no retry is made but we also make less requests to master. - /// If set to false writer can make up to `TConfig::RetryCount` attempts to send each block of data. - /// - /// @note Writers' methods might throw strange exceptions that might look like network error - /// when `SingleHttpRequest == true` and YT node encounters an error - /// (due to limitations of HTTP protocol YT node have no chance to report error - /// before it reads the whole input so it just drops the connection). - FLUENT_FIELD_DEFAULT(bool, SingleHttpRequest, false); - - /// - /// @brief Allows to change the size of locally buffered rows before flushing to yt. - /// - /// Used only with @ref NYT::TTableWriterOptions::SingleHttpRequest - FLUENT_FIELD_DEFAULT(size_t, BufferSize, 64 << 20); - - /// - /// @brief Allows to fine tune format that is used for writing tables. - /// - /// Has no effect when used with raw-writer. - FLUENT_FIELD_OPTION(TFormatHints, FormatHints); - - /// @brief Try to infer schema of inexistent table from the type of written rows. - /// - /// @note Default values for this option may differ depending on the row type. - /// For protobuf it's currently false by default. - FLUENT_FIELD_OPTION(bool, InferSchema); - - /// - /// @brief Options to control how YT server side writes data. - /// - /// @see NYT::TWriterOptions - FLUENT_FIELD_OPTION(TWriterOptions, WriterOptions); -}; - -/// -/// @brief Options for @ref NYT::IClient::StartTransaction -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#start_tx -struct TStartTransactionOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TStartTransactionOptions; - /// @endcond - - FLUENT_FIELD_DEFAULT(bool, PingAncestors, false); - - /// - /// @brief How long transaction lives after last ping. - /// - /// If server doesn't receive any pings for transaction for this time - /// transaction will be aborted. By default timeout is 15 seconds. - FLUENT_FIELD_OPTION(TDuration, Timeout); - - /// - /// @brief Moment in the future when transaction is aborted. - FLUENT_FIELD_OPTION(TInstant, Deadline); - - /// - /// @brief Whether to ping created transaction automatically. - /// - /// When set to true library creates a thread that pings transaction. - /// When set to false library doesn't ping transaction and it's user responsibility to ping it. - FLUENT_FIELD_DEFAULT(bool, AutoPingable, true); - - /// - /// @brief Set the title attribute of transaction. - /// - /// If title was not specified - /// neither using this option nor using @ref NYT::TStartTransactionOptions::Attributes option - /// library will generate default title for transaction. - /// Such default title includes machine name, pid, user name and some other useful info. - FLUENT_FIELD_OPTION(TString, Title); - - /// - /// @brief Set custom transaction attributes - /// - /// @note @ref NYT::TStartTransactionOptions::Title option overrides `"title"` attribute. - FLUENT_FIELD_OPTION(TNode, Attributes); -}; - -/// -/// @brief Options for attaching transaction. -/// -/// @see NYT::IClient::AttachTransaction -struct TAttachTransactionOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TAttachTransactionOptions; - /// @endcond - - /// - /// @brief Ping transaction automatically. - /// - /// When set to |true| library creates a thread that pings transaction. - /// When set to |false| library doesn't ping transaction and - /// it's user responsibility to ping it. - FLUENT_FIELD_DEFAULT(bool, AutoPingable, false); - - /// - /// @brief Abort transaction on program termination. - /// - /// Should the transaction be aborted on program termination - /// (either normal or by a signal or uncaught exception -- two latter - /// only if @ref TInitializeOptions::CleanupOnTermination is set). - FLUENT_FIELD_DEFAULT(bool, AbortOnTermination, false); -}; - -/// -/// @brief Type of the lock. -/// -/// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locking_mode -/// @see NYT::ITransaction::Lock -enum ELockMode : int -{ - /// Exclusive lock. - LM_EXCLUSIVE /* "exclusive" */, - - /// Shared lock. - LM_SHARED /* "shared" */, - - /// Snapshot lock. - LM_SNAPSHOT /* "snapshot" */, -}; - -/// -/// @brief Options for locking cypress node -/// -/// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locks -/// @see NYT::ITransaction::Lock -struct TLockOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TLockOptions; - /// @endcond - - /// - /// @brief Whether to wait already locked node to be unlocked. - /// - /// If `Waitable' is set to true Lock method will create - /// waitable lock, that will be taken once other transactions - /// that hold lock to that node are commited / aborted. - /// - /// @note Lock method DOES NOT wait until lock is actually acquired. - /// Waiting should be done using corresponding methods of ILock. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locking_queue - FLUENT_FIELD_DEFAULT(bool, Waitable, false); - - /// - /// @brief Also take attribute_key lock. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locks_compatibility - FLUENT_FIELD_OPTION(TString, AttributeKey); - - /// - /// @brief Also take child_key lock. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locks_compatibility - FLUENT_FIELD_OPTION(TString, ChildKey); -}; - -/// -/// @brief Options for @ref NYT::ITransaction::Unlock -/// -/// @note They are empty for now but options might appear in the future. -/// -/// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locks_compatibility -struct TUnlockOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TUnlockOptions; - /// @endcond -}; - -/// Base class for options that deal with tablets. -template <class TDerived> -struct TTabletOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// Index of a first tablet to deal with. - FLUENT_FIELD_OPTION(i64, FirstTabletIndex); - - /// Index of a last tablet to deal with. - FLUENT_FIELD_OPTION(i64, LastTabletIndex); -}; - -/// -/// @brief Options for @ref NYT::IClient::MountTable -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#mount_table -struct TMountTableOptions - : public TTabletOptions<TMountTableOptions> -{ - /// @cond Doxygen_Suppress - using TSelf = TMountTableOptions; - /// @endcond - - /// If specified table will be mounted to this cell. - FLUENT_FIELD_OPTION(TTabletCellId, CellId); - - /// If set to true tablets will be mounted in freezed state. - FLUENT_FIELD_DEFAULT(bool, Freeze, false); -}; - -/// -/// @brief Options for @ref NYT::IClient::UnmountTable -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#unmount_table -struct TUnmountTableOptions - : public TTabletOptions<TUnmountTableOptions> -{ - /// @cond Doxygen_Suppress - using TSelf = TUnmountTableOptions; - /// @endcond - - /// Advanced option, don't use unless yt team told you so. - FLUENT_FIELD_DEFAULT(bool, Force, false); -}; - -/// -/// @brief Options for @ref NYT::IClient::RemountTable -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#remount_table -struct TRemountTableOptions - : public TTabletOptions<TRemountTableOptions> -{ }; - -/// -/// @brief Options for @ref NYT::IClient::ReshardTable -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#reshard_table -struct TReshardTableOptions - : public TTabletOptions<TReshardTableOptions> -{ }; - -/// -/// @brief Options for @ref NYT::IClient::FreezeTable -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#freeze_table -struct TFreezeTableOptions - : public TTabletOptions<TFreezeTableOptions> -{ }; - -/// -/// @brief Options for @ref NYT::IClient::UnfreezeTable -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#unfreeze_table -struct TUnfreezeTableOptions - : public TTabletOptions<TUnfreezeTableOptions> -{ }; - -/// -/// @brief Options for @ref NYT::IClient::AlterTable -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#alter_table -struct TAlterTableOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TAlterTableOptions; - /// @endcond - - /// Change table schema. - FLUENT_FIELD_OPTION(TTableSchema, Schema); - - /// Alter table between static and dynamic mode. - FLUENT_FIELD_OPTION(bool, Dynamic); - - /// - /// @brief Changes id of upstream replica on metacluster. - /// - /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables - FLUENT_FIELD_OPTION(TReplicaId, UpstreamReplicaId); -}; - -/// -/// @brief Options for @ref NYT::IClient::LookupRows -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#lookup_rows -struct TLookupRowsOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TLookupRowsOptions; - /// @endcond - - /// Timeout for operation. - FLUENT_FIELD_OPTION(TDuration, Timeout); - - /// Column names to return. - FLUENT_FIELD_OPTION(TColumnNames, Columns); - - /// - /// @brief Whether to return rows that were not found in table. - /// - /// If set to true List returned by LookupRows method will have same - /// length as list of keys. If row is not found in table corresponding item in list - /// will have null value. - FLUENT_FIELD_DEFAULT(bool, KeepMissingRows, false); - - /// If set to true returned values will have "timestamp" attribute. - FLUENT_FIELD_OPTION(bool, Versioned); -}; - -/// -/// @brief Options for @ref NYT::IClient::SelectRows -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#select_rows -struct TSelectRowsOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TSelectRowsOptions; - /// @endcond - - /// Timeout for operation. - FLUENT_FIELD_OPTION(TDuration, Timeout); - - /// - /// @brief Limitation for number of rows read by single node. - /// - /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/dyn_query_language#ogranicheniya-na-slozhnost-zaprosa-(opcii) - FLUENT_FIELD_OPTION(i64, InputRowLimit); - - /// - /// @brief Limitation for number of output rows on single cluster node. - /// - /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/dyn_query_language#ogranicheniya-na-slozhnost-zaprosa-(opcii) - FLUENT_FIELD_OPTION(i64, OutputRowLimit); - - /// - /// @brief Maximum row ranges derived from WHERE clause. - /// - /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/dyn_query_language#ogranicheniya-na-slozhnost-zaprosa-(opcii) - FLUENT_FIELD_DEFAULT(ui64, RangeExpansionLimit, 1000); - - /// - /// @brief Whether to fail if InputRowLimit or OutputRowLimit is exceeded. - /// - /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/dyn_query_language#ogranicheniya-na-slozhnost-zaprosa-(opcii) - FLUENT_FIELD_DEFAULT(bool, FailOnIncompleteResult, true); - - /// @brief Enable verbose logging on server side. - FLUENT_FIELD_DEFAULT(bool, VerboseLogging, false); - - FLUENT_FIELD_DEFAULT(bool, EnableCodeCache, true); -}; - -/// Options for NYT::CreateClient; -struct TCreateClientOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TCreateClientOptions; - /// @endcond - - /// @brief Impersonated user name. - /// - /// If authenticated user is allowed to impersonate other YT users (e.g. yql_agent), this field may be used to override user name. - FLUENT_FIELD_OPTION(TString, ImpersonationUser); - - /// @brief User token. - /// - /// @see NYT::TCreateClientOptions::TokenPath - FLUENT_FIELD(TString, Token); - - /// @brief Path to the file where user token is stored. - /// - /// Token is looked in these places in following order: - /// - @ref NYT::TCreateClientOptions::Token - /// - @ref NYT::TCreateClientOptions::TokenPath - /// - `TConfig::Get()->Token` option. - /// - `YT_TOKEN` environment variable - /// - `YT_SECURE_VAULT_YT_TOKEN` environment variable - /// - File specified in `YT_TOKEN_PATH` environment variable - /// - `$HOME/.yt/token` file. - FLUENT_FIELD(TString, TokenPath); - - /// @brief TVM service ticket producer. - /// - /// We store a wrapper of NYT::TIntrusivePtr here (not a NYT::TIntrusivePtr), - /// because otherwise other projects will have build problems - /// because of visibility of two different `TIntrusivePtr`-s (::TInstrusivePtr and NYT::TInstrusivePtr). - /// - /// @see NYT::NAuth::TServiceTicketClientAuth - /// {@ - NAuth::IServiceTicketAuthPtrWrapperPtr ServiceTicketAuth_ = nullptr; - TSelf& ServiceTicketAuth(const NAuth::IServiceTicketAuthPtrWrapper& wrapper); - /// @} - - /// @brief Use tvm-only endpoints in cluster connection. - FLUENT_FIELD_DEFAULT(bool, TvmOnly, false); - - /// @brief Use HTTPs (use HTTP client from yt/yt/core always). - /// - /// @see UseCoreHttpClient - FLUENT_FIELD_DEFAULT(bool, UseTLS, false); - - /// @brief Use HTTP client from yt/yt/core. - FLUENT_FIELD_DEFAULT(bool, UseCoreHttpClient, false); - - /// - /// @brief RetryConfig provider allows to fine tune request retries. - /// - /// E.g. set total timeout for all retries. - FLUENT_FIELD_DEFAULT(IRetryConfigProviderPtr, RetryConfigProvider, nullptr); - - /// @brief Override global config for the client. - /// - /// The config contains implementation parameters such as connection timeouts, - /// access token, api version and more. - /// @see NYT::TConfig - FLUENT_FIELD_DEFAULT(TConfigPtr, Config, nullptr); -}; - -/// -/// @brief Options for @ref NYT::IBatchRequest::ExecuteBatch -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#execute_batch -struct TExecuteBatchOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TExecuteBatchOptions; - /// @endcond - - /// - /// @brief How many requests will be executed in parallel on the cluster. - /// - /// This parameter could be used to avoid RequestLimitExceeded errors. - FLUENT_FIELD_OPTION(ui64, Concurrency); - - /// - /// @brief Maximum size of batch sent in one request to server. - /// - /// Huge batches are executed using multiple requests. - /// BatchPartMaxSize is maximum size of single request that goes to server - /// If not specified it is set to `Concurrency * 5' - FLUENT_FIELD_OPTION(ui64, BatchPartMaxSize); -}; - -/// -/// @brief Durability mode. -/// -/// @see NYT::TTabletTransactionOptions::TDurability -/// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#sohrannost -enum class EDurability -{ - /// Sync mode (default). - Sync /* "sync" */, - - /// Async mode (might reduce latency of write requests, but less reliable). - Async /* "async" */, -}; - -/// -/// @brief Atomicity mode. -/// -/// @see NYT::TTabletTransactionOptions::TDurability -/// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#sohrannost -enum class EAtomicity -{ - /// Transactions are non atomic (might reduce latency of write requests). - None /* "none" */, - - /// Transactions are atomic (default). - Full /* "full" */, -}; - -/// -/// @brief Table replica mode. -/// -/// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables#atributy -enum class ETableReplicaMode -{ - Sync /* "sync" */, - Async /* "async" */, -}; - -/// Base class for options dealing with io to dynamic tables. -template <typename TDerived> -struct TTabletTransactionOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// - /// @brief Atomicity mode of operation - /// - /// Setting to NYT::EAtomicity::None allows to improve latency of operations - /// at the cost of weakening contracts. - /// - /// @note Use with care. - /// - /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#oslablenie-garantij - FLUENT_FIELD_OPTION(EAtomicity, Atomicity); - - /// - /// @brief Durability mode of operation - /// - /// Setting to NYT::EDurability::Async allows to improve latency of operations - /// at the cost of weakening contracts. - /// - /// @note Use with care. - /// - /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#oslablenie-garantij - FLUENT_FIELD_OPTION(EDurability, Durability); -}; - -/// -/// @brief Options for NYT::IClient::InsertRows -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#insert_rows -struct TInsertRowsOptions - : public TTabletTransactionOptions<TInsertRowsOptions> -{ - /// - /// @brief Whether to overwrite missing columns with nulls. - /// - /// By default all columns missing in input data are set to Null and overwrite currently stored value. - /// If `Update' is set to true currently stored value will not be overwritten for columns that are missing in input data. - FLUENT_FIELD_OPTION(bool, Update); - - /// - /// @brief Whether to overwrite or aggregate aggregated columns. - /// - /// Used with aggregating columns. - /// By default value in aggregating column will be overwritten. - /// If `Aggregate' is set to true row will be considered as delta and it will be aggregated with currently stored value. - FLUENT_FIELD_OPTION(bool, Aggregate); - - /// - /// @brief Whether to fail when inserting to table without sync replica. - /// - /// Used for insert operation for tables without sync replica. - /// https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables#write - /// Default value is 'false'. So insertion into table without sync replicas fails. - FLUENT_FIELD_OPTION(bool, RequireSyncReplica); -}; - -/// -/// @brief Options for NYT::IClient::DeleteRows -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#delete_rows -struct TDeleteRowsOptions - : public TTabletTransactionOptions<TDeleteRowsOptions> -{ - /// - /// @brief Whether to fail when deleting from table without sync replica. - /// - // Used for delete operation for tables without sync replica. - // https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables#write - // Default value is 'false'. So deletion into table without sync replicas fails. - FLUENT_FIELD_OPTION(bool, RequireSyncReplica); -}; - -/// -/// @brief Options for NYT::IClient::TrimRows -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#trim_rows -struct TTrimRowsOptions - : public TTabletTransactionOptions<TTrimRowsOptions> -{ }; - -/// @brief Options for NYT::IClient::AlterTableReplica -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#alter_table_replica -/// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables -struct TAlterTableReplicaOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TAlterTableReplicaOptions; - /// @endcond - - /// - /// @brief Whether to enable or disable replica. - /// - /// Doesn't change state of replica if `Enabled' is not set. - FLUENT_FIELD_OPTION(bool, Enabled); - - /// - /// @brief Change replica mode. - /// - /// Doesn't change replica mode if `Mode` is not set. - FLUENT_FIELD_OPTION(ETableReplicaMode, Mode); -}; - -/// -/// @brief Options for @ref NYT::IClient::GetFileFromCache -/// -/// @note They are empty for now but options might appear in the future. -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#get_file_from_cache -struct TGetFileFromCacheOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetFileFromCacheOptions; - /// @endcond -}; - -/// -/// @brief Options for @ref NYT::IClient::GetTableColumnarStatistics -/// -/// @note They are empty for now but options might appear in the future. -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#put_file_to_cache -struct TPutFileToCacheOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TPutFileToCacheOptions; - /// @endcond - - /// Whether to preserve `expiration_timeout` attribute of source node. - FLUENT_FIELD_OPTION(bool, PreserveExpirationTimeout); -}; - -/// -/// Type of permission used in ACL. -/// -/// @see https://yt.yandex-team.ru/docs/description/common/access_control -enum class EPermission : int -{ - /// Applies to: all objects. - Read /* "read" */, - - /// Applies to: all objects. - Write /* "write" */, - - /// Applies to: accounts / pools. - Use /* "use" */, - - /// Applies to: all objects. - Administer /* "administer" */, - - /// Applies to: schemas. - Create /* "create" */, - - /// Applies to: all objects. - Remove /* "remove" */, - - /// Applies to: tables. - Mount /* "mount" */, - - /// Applies to: operations. - Manage /* "manage" */, -}; - -/// Whether permission is granted or denied. -enum class ESecurityAction : int -{ - /// Permission is granted. - Allow /* "allow" */, - - /// Permission is denied. - Deny /* "deny" */, -}; - -/// -/// @brief Options for @ref NYT::IClient::CheckPermission -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#check_permission -struct TCheckPermissionOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TCheckPermissionOptions; - /// @endcond - - /// Columns to check permission to (for tables only). - FLUENT_VECTOR_FIELD(TString, Column); -}; - -/// -/// @brief Columnar statistics fetching mode. -/// -/// @ref NYT::TGetTableColumnarStatisticsOptions::FetcherMode -enum class EColumnarStatisticsFetcherMode -{ - /// Slow mode for fetching precise columnar statistics. - FromNodes /* "from_nodes" */, - - /// - /// @brief Fast mode for fetching lightweight columnar statistics. - /// - /// Relative precision is 1 / 256. - /// - /// @note Might be unavailable for old tables in that case some upper bound is returned. - FromMaster /* "from_master" */, - - /// Use lightweight columnar statistics (FromMaster) if available otherwise switch to slow but precise mode (FromNodes). - Fallback /* "fallback" */, -}; - -/// -/// @brief Options for @ref NYT::IClient::GetTableColumnarStatistics -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#get_table_columnar_statistics -struct TGetTableColumnarStatisticsOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetTableColumnarStatisticsOptions; - /// @endcond - - /// - /// @brief Mode of statistics fetching. - /// - /// @ref NYT::EColumnarStatisticsFetcherMode - FLUENT_FIELD_OPTION(EColumnarStatisticsFetcherMode, FetcherMode); -}; - -/// -/// @brief Table partitioning mode. -/// -/// @ref NYT::TGetTablePartitionsOptions::PartitionMode -enum class ETablePartitionMode -{ - /// - /// @brief Ignores the order of input tables and their chunk and sorting orders. - /// - Unordered /* "unordered" */, - - /// - /// @brief The order of table ranges inside each partition obey the order of input tables and their chunk orders. - /// - Ordered /* "ordered" */, -}; - -/// -/// @brief Options for @ref NYT::IClient::GetTablePartitions -/// -struct TGetTablePartitionsOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetTablePartitionsOptions; - /// @endcond - - /// - /// @brief Table partitioning mode. - /// - /// @ref NYT::ETablePartitionMode - FLUENT_FIELD(ETablePartitionMode, PartitionMode); - - /// - /// @brief Approximate data weight of each output partition. - /// - FLUENT_FIELD(i64, DataWeightPerPartition); - - /// - /// @brief Maximum output partition count. - /// - /// Consider the situation when the `MaxPartitionCount` is given - /// and the total data weight exceeds `MaxPartitionCount * DataWeightPerPartition`. - /// If `AdjustDataWeightPerPartition` is |true| - /// `GetTablePartitions` will yield partitions exceeding the `DataWeightPerPartition`. - /// If `AdjustDataWeightPerPartition` is |false| - /// the partitioning will be aborted as soon as the output partition count exceeds this limit. - FLUENT_FIELD_OPTION(int, MaxPartitionCount); - - /// - /// @brief Allow the data weight per partition to exceed `DataWeightPerPartition` when `MaxPartitionCount` is set. - /// - /// |True| by default. - FLUENT_FIELD_DEFAULT(bool, AdjustDataWeightPerPartition, true); -}; - -/// -/// @brief Options for @ref NYT::IClient::GetTabletInfos -/// -/// @note They are empty for now but options might appear in the future. -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#get_tablet_infos -struct TGetTabletInfosOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetTabletInfosOptions; - /// @endcond -}; - -/// Options for @ref NYT::IClient::SkyShareTable -struct TSkyShareTableOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TSkyShareTableOptions; - /// @endcond - - /// - /// @brief Key columns that are used to group files in a table into torrents. - /// - /// One torrent is created for each value of `KeyColumns` columns. - /// If not specified, all files go into single torrent. - FLUENT_FIELD_OPTION(TColumnNames, KeyColumns); - - /// @brief Allow skynet manager to return fastbone links to skynet. See YT-11437 - FLUENT_FIELD_OPTION(bool, EnableFastbone); -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/common.cpp b/yt/cpp/mapreduce/interface/common.cpp deleted file mode 100644 index f6d60127ce..0000000000 --- a/yt/cpp/mapreduce/interface/common.cpp +++ /dev/null @@ -1,664 +0,0 @@ -#include "common.h" - -#include "errors.h" -#include "format.h" -#include "serialize.h" -#include "fluent.h" - -#include <yt/yt_proto/yt/formats/extension.pb.h> - -#include <library/cpp/yson/node/node_builder.h> -#include <library/cpp/yson/node/node_io.h> -#include <library/cpp/type_info/type.h> - -#include <util/generic/xrange.h> - -namespace NYT { - -using ::google::protobuf::FieldDescriptor; -using ::google::protobuf::Descriptor; - -//////////////////////////////////////////////////////////////////////////////// - -TSortColumn::TSortColumn(TStringBuf name, ESortOrder sortOrder) - : Name_(name) - , SortOrder_(sortOrder) -{ } - -TSortColumn::TSortColumn(const TString& name, ESortOrder sortOrder) - : TSortColumn(static_cast<TStringBuf>(name), sortOrder) -{ } - -TSortColumn::TSortColumn(const char* name, ESortOrder sortOrder) - : TSortColumn(static_cast<TStringBuf>(name), sortOrder) -{ } - -const TSortColumn& TSortColumn::EnsureAscending() const -{ - Y_ENSURE(SortOrder() == ESortOrder::SO_ASCENDING); - return *this; -} - -TNode TSortColumn::ToNode() const -{ - return BuildYsonNodeFluently().Value(*this); -} - -//////////////////////////////////////////////////////////////////////////////// -// Below lie backward compatibility methods. -//////////////////////////////////////////////////////////////////////////////// - -TSortColumn& TSortColumn::operator = (TStringBuf name) -{ - EnsureAscending(); - Name_ = name; - return *this; -} - -TSortColumn& TSortColumn::operator = (const TString& name) -{ - return (*this = static_cast<TStringBuf>(name)); -} - -TSortColumn& TSortColumn::operator = (const char* name) -{ - return (*this = static_cast<TStringBuf>(name)); -} - -bool TSortColumn::operator == (TStringBuf rhsName) const -{ - EnsureAscending(); - return Name_ == rhsName; -} - -bool TSortColumn::operator != (TStringBuf rhsName) const -{ - return !(*this == rhsName); -} - -bool TSortColumn::operator == (const TString& rhsName) const -{ - return *this == static_cast<TStringBuf>(rhsName); -} - -bool TSortColumn::operator != (const TString& rhsName) const -{ - return !(*this == rhsName); -} - -bool TSortColumn::operator == (const char* rhsName) const -{ - return *this == static_cast<TStringBuf>(rhsName); -} - -bool TSortColumn::operator != (const char* rhsName) const -{ - return !(*this == rhsName); -} - -TSortColumn::operator TStringBuf() const -{ - EnsureAscending(); - return Name_; -} - -TSortColumn::operator TString() const -{ - return TString(static_cast<TStringBuf>(*this)); -} - -TSortColumn::operator std::string() const -{ - EnsureAscending(); - return static_cast<std::string>(Name_); -} - -//////////////////////////////////////////////////////////////////////////////// - -TSortColumns::TSortColumns() -{ } - -TSortColumns::TSortColumns(const TVector<TString>& names) -{ - Parts_.assign(names.begin(), names.end()); -} - -TSortColumns::TSortColumns(const TColumnNames& names) - : TSortColumns(names.Parts_) -{ } - -TSortColumns::operator TColumnNames() const -{ - return TColumnNames(EnsureAscending().GetNames()); -} - -const TSortColumns& TSortColumns::EnsureAscending() const -{ - for (const auto& sortColumn : Parts_) { - sortColumn.EnsureAscending(); - } - return *this; -} - -TVector<TString> TSortColumns::GetNames() const -{ - TVector<TString> names; - names.reserve(Parts_.size()); - for (const auto& sortColumn : Parts_) { - names.push_back(sortColumn.Name()); - } - return names; -} - -//////////////////////////////////////////////////////////////////////////////// - -static NTi::TTypePtr OldTypeToTypeV3(EValueType type) -{ - switch (type) { - case VT_INT64: - return NTi::Int64(); - case VT_UINT64: - return NTi::Uint64(); - - case VT_DOUBLE: - return NTi::Double(); - - case VT_BOOLEAN: - return NTi::Bool(); - - case VT_STRING: - return NTi::String(); - - case VT_ANY: - return NTi::Yson(); - - case VT_INT8: - return NTi::Int8(); - case VT_INT16: - return NTi::Int16(); - case VT_INT32: - return NTi::Int32(); - - case VT_UINT8: - return NTi::Uint8(); - case VT_UINT16: - return NTi::Uint16(); - case VT_UINT32: - return NTi::Uint32(); - - case VT_UTF8: - return NTi::Utf8(); - - case VT_NULL: - return NTi::Null(); - - case VT_VOID: - return NTi::Void(); - - case VT_DATE: - return NTi::Date(); - case VT_DATETIME: - return NTi::Datetime(); - case VT_TIMESTAMP: - return NTi::Timestamp(); - case VT_INTERVAL: - return NTi::Interval(); - - case VT_FLOAT: - return NTi::Float(); - case VT_JSON: - return NTi::Json(); - } -} - -static std::pair<EValueType, bool> Simplify(const NTi::TTypePtr& type) -{ - using namespace NTi; - const auto typeName = type->GetTypeName(); - switch (typeName) { - case ETypeName::Bool: - return {VT_BOOLEAN, true}; - - case ETypeName::Int8: - return {VT_INT8, true}; - case ETypeName::Int16: - return {VT_INT16, true}; - case ETypeName::Int32: - return {VT_INT32, true}; - case ETypeName::Int64: - return {VT_INT64, true}; - - case ETypeName::Uint8: - return {VT_UINT8, true}; - case ETypeName::Uint16: - return {VT_UINT16, true}; - case ETypeName::Uint32: - return {VT_UINT32, true}; - case ETypeName::Uint64: - return {VT_UINT64, true}; - - case ETypeName::Float: - return {VT_FLOAT, true}; - case ETypeName::Double: - return {VT_DOUBLE, true}; - - case ETypeName::String: - return {VT_STRING, true}; - case ETypeName::Utf8: - return {VT_UTF8, true}; - - case ETypeName::Date: - return {VT_DATE, true}; - case ETypeName::Datetime: - return {VT_DATETIME, true}; - case ETypeName::Timestamp: - return {VT_TIMESTAMP, true}; - case ETypeName::Interval: - return {VT_INTERVAL, true}; - - case ETypeName::TzDate: - case ETypeName::TzDatetime: - case ETypeName::TzTimestamp: - break; - - case ETypeName::Json: - return {VT_JSON, true}; - case ETypeName::Decimal: - return {VT_STRING, true}; - case ETypeName::Uuid: - break; - case ETypeName::Yson: - return {VT_ANY, true}; - - case ETypeName::Void: - return {VT_VOID, false}; - case ETypeName::Null: - return {VT_NULL, false}; - - case ETypeName::Optional: - { - auto itemType = type->AsOptional()->GetItemType(); - if (itemType->IsPrimitive()) { - auto simplified = Simplify(itemType->AsPrimitive()); - if (simplified.second) { - simplified.second = false; - return simplified; - } - } - return {VT_ANY, false}; - } - case ETypeName::List: - return {VT_ANY, true}; - case ETypeName::Dict: - return {VT_ANY, true}; - case ETypeName::Struct: - return {VT_ANY, true}; - case ETypeName::Tuple: - return {VT_ANY, true}; - case ETypeName::Variant: - return {VT_ANY, true}; - case ETypeName::Tagged: - return Simplify(type->AsTagged()->GetItemType()); - } - ythrow TApiUsageError() << "Unsupported type: " << typeName; -} - -NTi::TTypePtr ToTypeV3(EValueType type, bool required) -{ - auto typeV3 = OldTypeToTypeV3(type); - if (!Simplify(typeV3).second) { - if (required) { - ythrow TApiUsageError() << "type: " << type << " cannot be required"; - } else { - return typeV3; - } - } - if (required) { - return typeV3; - } else { - return NTi::Optional(typeV3); - } -} - -TColumnSchema::TColumnSchema() - : TypeV3_(NTi::Optional(NTi::Int64())) -{ } - -EValueType TColumnSchema::Type() const -{ - return Simplify(TypeV3_).first; -} - -TColumnSchema& TColumnSchema::Type(EValueType type) & -{ - return Type(ToTypeV3(type, false)); -} - -TColumnSchema TColumnSchema::Type(EValueType type) && -{ - return Type(ToTypeV3(type, false)); -} - -TColumnSchema& TColumnSchema::Type(const NTi::TTypePtr& type) & -{ - Y_VERIFY(type.Get(), "Cannot create column schema with nullptr type"); - TypeV3_ = type; - return *this; -} - -TColumnSchema TColumnSchema::Type(const NTi::TTypePtr& type) && -{ - Y_VERIFY(type.Get(), "Cannot create column schema with nullptr type"); - TypeV3_ = type; - return *this; -} - -TColumnSchema& TColumnSchema::TypeV3(const NTi::TTypePtr& type) & -{ - return Type(type); -} - -TColumnSchema TColumnSchema::TypeV3(const NTi::TTypePtr& type) && -{ - return Type(type); -} - -NTi::TTypePtr TColumnSchema::TypeV3() const -{ - return TypeV3_; -} - -bool TColumnSchema::Required() const -{ - return Simplify(TypeV3_).second; -} - -TColumnSchema& TColumnSchema::Type(EValueType type, bool required) & -{ - return Type(ToTypeV3(type, required)); -} - -TColumnSchema TColumnSchema::Type(EValueType type, bool required) && -{ - return Type(ToTypeV3(type, required)); -} - -bool operator==(const TColumnSchema& lhs, const TColumnSchema& rhs) -{ - return - lhs.Name() == rhs.Name() && - NTi::NEq::TStrictlyEqual()(lhs.TypeV3(), rhs.TypeV3()) && - lhs.SortOrder() == rhs.SortOrder() && - lhs.Lock() == rhs.Lock() && - lhs.Expression() == rhs.Expression() && - lhs.Aggregate() == rhs.Aggregate() && - lhs.Group() == rhs.Group(); -} - -//////////////////////////////////////////////////////////////////////////////// - -bool TTableSchema::Empty() const -{ - return Columns_.empty(); -} - -TTableSchema& TTableSchema::AddColumn(const TString& name, EValueType type) & -{ - Columns_.push_back(TColumnSchema().Name(name).Type(type)); - return *this; -} - -TTableSchema TTableSchema::AddColumn(const TString& name, EValueType type) && -{ - return std::move(AddColumn(name, type)); -} - -TTableSchema& TTableSchema::AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) & -{ - Columns_.push_back(TColumnSchema().Name(name).Type(type).SortOrder(sortOrder)); - return *this; -} - -TTableSchema TTableSchema::AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) && -{ - return std::move(AddColumn(name, type, sortOrder)); -} - -TTableSchema& TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type) & -{ - Columns_.push_back(TColumnSchema().Name(name).Type(type)); - return *this; -} - -TTableSchema TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type) && -{ - return std::move(AddColumn(name, type)); -} - -TTableSchema& TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) & -{ - Columns_.push_back(TColumnSchema().Name(name).Type(type).SortOrder(sortOrder)); - return *this; -} - -TTableSchema TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) && -{ - return std::move(AddColumn(name, type, sortOrder)); -} - -TTableSchema& TTableSchema::SortBy(const TSortColumns& sortColumns) & -{ - Y_ENSURE(sortColumns.Parts_.size() <= Columns_.size()); - - THashMap<TString, ui64> sortColumnIndex; - for (auto i: xrange(sortColumns.Parts_.size())) { - Y_ENSURE(sortColumnIndex.emplace(sortColumns.Parts_[i].Name(), i).second, - "Key column name '" << sortColumns.Parts_[i].Name() << "' repeats in columns list"); - } - - TVector<TColumnSchema> newColumnsSorted(sortColumns.Parts_.size()); - TVector<TColumnSchema> newColumnsUnsorted; - for (auto& column : Columns_) { - auto it = sortColumnIndex.find(column.Name()); - if (it == sortColumnIndex.end()) { - column.ResetSortOrder(); - newColumnsUnsorted.push_back(std::move(column)); - } else { - auto index = it->second; - const auto& sortColumn = sortColumns.Parts_[index]; - column.SortOrder(sortColumn.SortOrder()); - newColumnsSorted[index] = std::move(column); - sortColumnIndex.erase(it); - } - } - - Y_ENSURE(sortColumnIndex.empty(), "Column name '" << sortColumnIndex.begin()->first - << "' not found in table schema"); - - newColumnsSorted.insert(newColumnsSorted.end(), newColumnsUnsorted.begin(), newColumnsUnsorted.end()); - Columns_ = std::move(newColumnsSorted); - - return *this; -} - -TTableSchema TTableSchema::SortBy(const TSortColumns& sortColumns) && -{ - return std::move(SortBy(sortColumns)); -} - -TVector<TColumnSchema>& TTableSchema::MutableColumns() -{ - return Columns_; -} - -TNode TTableSchema::ToNode() const -{ - TNode result; - TNodeBuilder builder(&result); - Serialize(*this, &builder); - return result; -} - -TTableSchema TTableSchema::FromNode(const TNode& node) -{ - TTableSchema schema; - Deserialize(schema, node); - return schema; -} - -bool operator==(const TTableSchema& lhs, const TTableSchema& rhs) -{ - return - lhs.Columns() == rhs.Columns() && - lhs.Strict() == rhs.Strict() && - lhs.UniqueKeys() == rhs.UniqueKeys(); -} - -//////////////////////////////////////////////////////////////////////////////// - -TKeyBound::TKeyBound(ERelation relation, TKey key) - : Relation_(relation) - , Key_(std::move(key)) -{ } - -//////////////////////////////////////////////////////////////////////////////// - -TTableSchema CreateTableSchema( - const Descriptor& messageDescriptor, - const TSortColumns& sortColumns, - bool keepFieldsWithoutExtension) -{ - auto result = CreateTableSchema(messageDescriptor, keepFieldsWithoutExtension); - if (!sortColumns.Parts_.empty()) { - result.SortBy(sortColumns.Parts_); - } - return result; -} - -TTableSchema CreateTableSchema(NTi::TTypePtr type) -{ - Y_VERIFY(type); - TTableSchema schema; - Deserialize(schema, NodeFromYsonString(NTi::NIo::AsYtSchema(type.Get()))); - return schema; -} - -//////////////////////////////////////////////////////////////////////////////// - -bool IsTrivial(const TReadLimit& readLimit) -{ - return !readLimit.Key_ && !readLimit.RowIndex_ && !readLimit.Offset_ && !readLimit.TabletIndex_ && !readLimit.KeyBound_; -} - -EValueType NodeTypeToValueType(TNode::EType nodeType) -{ - switch (nodeType) { - case TNode::EType::Int64: return VT_INT64; - case TNode::EType::Uint64: return VT_UINT64; - case TNode::EType::String: return VT_STRING; - case TNode::EType::Double: return VT_DOUBLE; - case TNode::EType::Bool: return VT_BOOLEAN; - default: - ythrow yexception() << "Cannot convert TNode type " << nodeType << " to EValueType"; - } -} - -//////////////////////////////////////////////////////////////////////////////// - -const TVector<TReadRange>& GetRangesCompat(const TRichYPath& path) -{ - static const TVector<TReadRange> empty; - - const auto& maybeRanges = path.GetRanges(); - if (maybeRanges.Empty()) { - return empty; - } else if (maybeRanges->size() > 0) { - return *maybeRanges; - } else { - // If you see this exception, that means that caller of this function doesn't known what to do - // with RichYPath that has set range list, but the range list is empty. - // - // To avoid this exception caller must explicitly handle such case. - // NB. YT-17683 - ythrow TApiUsageError() << "Unsupported RichYPath: explicitly empty range list"; - } -} - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -TString ToString(EValueType type) -{ - switch (type) { - case VT_INT8: - return "int8"; - case VT_INT16: - return "int16"; - case VT_INT32: - return "int32"; - case VT_INT64: - return "int64"; - - case VT_UINT8: - return "uint8"; - case VT_UINT16: - return "uint16"; - case VT_UINT32: - return "uint32"; - case VT_UINT64: - return "uint64"; - - case VT_DOUBLE: - return "double"; - - case VT_BOOLEAN: - return "boolean"; - - case VT_STRING: - return "string"; - case VT_UTF8: - return "utf8"; - - case VT_ANY: - return "any"; - - case VT_NULL: - return "null"; - case VT_VOID: - return "void"; - - case VT_DATE: - return "date"; - case VT_DATETIME: - return "datetime"; - case VT_TIMESTAMP: - return "timestamp"; - case VT_INTERVAL: - return "interval"; - - case VT_FLOAT: - return "float"; - - case VT_JSON: - return "json"; - } - ythrow yexception() << "Invalid value type " << static_cast<int>(type); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT - -template <> -void Out<NYT::TSortColumn>(IOutputStream& os, const NYT::TSortColumn& sortColumn) -{ - if (sortColumn.SortOrder() == NYT::ESortOrder::SO_ASCENDING) { - os << sortColumn.Name(); - } else { - os << NYT::BuildYsonStringFluently(NYson::EYsonFormat::Text).Value(sortColumn); - } -} diff --git a/yt/cpp/mapreduce/interface/common.h b/yt/cpp/mapreduce/interface/common.h deleted file mode 100644 index b1754ade70..0000000000 --- a/yt/cpp/mapreduce/interface/common.h +++ /dev/null @@ -1,1301 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/common.h -/// -/// Header containing miscellaneous structs and classes used in library. - -#include "fwd.h" - -#include <library/cpp/type_info/type_info.h> -#include <library/cpp/yson/node/node.h> - -#include <util/generic/guid.h> -#include <util/generic/map.h> -#include <util/generic/maybe.h> -#include <util/generic/ptr.h> -#include <util/system/type_name.h> -#include <util/generic/vector.h> - -#include <google/protobuf/message.h> - -#include <initializer_list> -#include <type_traits> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// @cond Doxygen_Suppress -#define FLUENT_FIELD(type, name) \ - type name##_; \ - TSelf& name(const type& value) \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - static_assert(true) - -#define FLUENT_FIELD_ENCAPSULATED(type, name) \ -private: \ - type name##_; \ -public: \ - TSelf& name(const type& value) & \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - TSelf name(const type& value) && \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - const type& name() const & \ - { \ - return name##_; \ - } \ - type name() && \ - { \ - return name##_; \ - } \ - static_assert(true) - -#define FLUENT_FIELD_OPTION(type, name) \ - TMaybe<type> name##_; \ - TSelf& name(const type& value) \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - static_assert(true) - -#define FLUENT_FIELD_OPTION_ENCAPSULATED(type, name) \ -private: \ - TMaybe<type> name##_; \ -public: \ - TSelf& name(const type& value) & \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - TSelf name(const type& value) && \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - TSelf& Reset##name() & \ - { \ - name##_ = Nothing(); \ - return static_cast<TSelf&>(*this); \ - } \ - TSelf Reset##name() && \ - { \ - name##_ = Nothing(); \ - return static_cast<TSelf&>(*this); \ - } \ - const TMaybe<type>& name() const& \ - { \ - return name##_; \ - } \ - TMaybe<type> name() && \ - { \ - return name##_; \ - } \ - static_assert(true) - -#define FLUENT_FIELD_DEFAULT(type, name, defaultValue) \ - type name##_ = defaultValue; \ - TSelf& name(const type& value) \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - static_assert(true) - -#define FLUENT_FIELD_DEFAULT_ENCAPSULATED(type, name, defaultValue) \ -private: \ - type name##_ = defaultValue; \ -public: \ - TSelf& name(const type& value) & \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - TSelf name(const type& value) && \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - const type& name() const & \ - { \ - return name##_; \ - } \ - type name() && \ - { \ - return name##_; \ - } \ - static_assert(true) - -#define FLUENT_VECTOR_FIELD(type, name) \ - TVector<type> name##s_; \ - TSelf& Add##name(const type& value) \ - { \ - name##s_.push_back(value); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf& name##s(TVector<type> values) \ - { \ - name##s_ = std::move(values); \ - return static_cast<TSelf&>(*this);\ - } \ - static_assert(true) - -#define FLUENT_OPTIONAL_VECTOR_FIELD_ENCAPSULATED(type, name) \ -private: \ - TMaybe<TVector<type>> name##s_; \ -public: \ - const TMaybe<TVector<type>>& name##s() const & { \ - return name##s_; \ - } \ - TMaybe<TVector<type>>& name##s() & { \ - return name##s_; \ - } \ - TMaybe<TVector<type>> name##s() && { \ - return std::move(name##s_); \ - } \ - TSelf& Add##name(const type& value) & \ - { \ - if (name##s_.Empty()) { \ - name##s_.ConstructInPlace(); \ - } \ - name##s_->push_back(value); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf Add##name(const type& value) && \ - { \ - if (name##s_.Empty()) { \ - name##s_.ConstructInPlace(); \ - } \ - name##s_->push_back(value); \ - return static_cast<TSelf&&>(*this);\ - } \ - TSelf& name##s(TVector<type> values) & \ - { \ - name##s_ = std::move(values); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf name##s(TVector<type> values) && \ - { \ - name##s_ = std::move(values); \ - return static_cast<TSelf&&>(*this);\ - } \ - TSelf& name##s(TNothing) & \ - { \ - name##s_ = Nothing(); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf name##s(TNothing) && \ - { \ - name##s_ = Nothing(); \ - return static_cast<TSelf&&>(*this);\ - } \ - TSelf& Reset##name##s() & \ - { \ - name##s_ = Nothing(); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf Reset##name##s() && \ - { \ - name##s_ = Nothing(); \ - return static_cast<TSelf&&>(*this);\ - } \ - static_assert(true) - -#define FLUENT_VECTOR_FIELD_ENCAPSULATED(type, name) \ -private: \ - TVector<type> name##s_; \ -public: \ - TSelf& Add##name(const type& value) & \ - { \ - name##s_.push_back(value); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf Add##name(const type& value) && \ - { \ - name##s_.push_back(value); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf& name##s(TVector<type> value) & \ - { \ - name##s_ = std::move(value); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf name##s(TVector<type> value) && \ - { \ - name##s_ = std::move(value); \ - return static_cast<TSelf&>(*this);\ - } \ - const TVector<type>& name##s() const & \ - { \ - return name##s_; \ - } \ - TVector<type> name##s() && \ - { \ - return name##s_; \ - } \ - static_assert(true) - -#define FLUENT_MAP_FIELD(keytype, valuetype, name) \ - TMap<keytype,valuetype> name##_; \ - TSelf& Add##name(const keytype& key, const valuetype& value) \ - { \ - name##_.emplace(key, value); \ - return static_cast<TSelf&>(*this);\ - } \ - static_assert(true) - -/// @endcond - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Convenience class that keeps sequence of items. -/// -/// Designed to be used as function parameter. -/// -/// Users of such function can then pass: -/// - single item, -/// - initializer list of items, -/// - vector of items; -/// as argument to this function. -/// -/// Example: -/// ``` -/// void Foo(const TOneOrMany<int>& arg); -/// ... -/// Foo(1); // ok -/// Foo({1, 2, 3}); // ok -/// ``` -template <class T, class TDerived> -struct TOneOrMany -{ - /// @cond Doxygen_Suppress - using TSelf = std::conditional_t<std::is_void_v<TDerived>, TOneOrMany, TDerived>; - /// @endcond - - /// Initialize with empty sequence. - TOneOrMany() = default; - - // Initialize from initializer list. - template<class U> - TOneOrMany(std::initializer_list<U> il) - { - Parts_.assign(il.begin(), il.end()); - } - - /// Put arguments to sequence - template <class U, class... TArgs> - requires std::is_convertible_v<U, T> - TOneOrMany(U&& arg, TArgs&&... args) - { - Add(arg, std::forward<TArgs>(args)...); - } - - /// Initialize from vector. - TOneOrMany(TVector<T> args) - : Parts_(std::move(args)) - { } - - /// @brief Order is defined the same way as in TVector - bool operator==(const TOneOrMany& rhs) const - { - // N.B. We would like to make this method to be `= default`, - // but this breaks MSVC compiler for the cases when T doesn't - // support comparison. - return Parts_ == rhs.Parts_; - } - - /// - /// @{ - /// - /// @brief Add all arguments to sequence - template <class U, class... TArgs> - requires std::is_convertible_v<U, T> - TSelf& Add(U&& part, TArgs&&... args) & - { - Parts_.push_back(std::forward<U>(part)); - if constexpr (sizeof...(args) > 0) { - [[maybe_unused]] int dummy[sizeof...(args)] = {(Parts_.push_back(std::forward<TArgs>(args)), 0) ... }; - } - return static_cast<TSelf&>(*this); - } - - template <class U, class... TArgs> - requires std::is_convertible_v<U, T> - TSelf Add(U&& part, TArgs&&... args) && - { - return std::move(Add(std::forward<U>(part), std::forward<TArgs>(args)...)); - } - /// @} - - /// Content of sequence. - TVector<T> Parts_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Type of the value that can occur in YT table. -/// -/// @ref NYT::TTableSchema -/// https://yt.yandex-team.ru/docs/description/storage/data_types -enum EValueType : int -{ - /// Int64, signed integer of 64 bits. - VT_INT64, - - /// Uint64, unsigned integer of 64 bits. - VT_UINT64, - - /// Double, floating point number of double precision (64 bits). - VT_DOUBLE, - /// Boolean, `true` or `false`. - VT_BOOLEAN, - - /// String, arbitrary byte sequence. - VT_STRING, - - /// Any, arbitrary yson document. - VT_ANY, - - /// Int8, signed integer of 8 bits. - VT_INT8, - /// Int16, signed integer of 16 bits. - VT_INT16, - /// Int32, signed integer of 32 bits. - VT_INT32, - - /// Uint8, unsigned integer of 8 bits. - VT_UINT8, - /// Uint16, unsigned integer of 16 bits. - VT_UINT16, - /// Uint32, unsigned integer of 32 bits. - VT_UINT32, - - /// Utf8, byte sequence that is valid utf8. - VT_UTF8, - - /// Null, absence of value (almost never used in schemas) - VT_NULL, - /// Void, absence of value (almost never used in schemas) the difference between null, and void is yql-specific. - VT_VOID, - - /// Date, number of days since Unix epoch (unsigned) - VT_DATE, - /// Datetime, number of seconds since Unix epoch (unsigned) - VT_DATETIME, - /// Timestamp, number of milliseconds since Unix epoch (unsigned) - VT_TIMESTAMP, - /// Interval, difference between two timestamps (signed) - VT_INTERVAL, - - /// Float, floating point number (32 bits) - VT_FLOAT, - /// Json, sequence of bytes that is valid json. - VT_JSON, -}; - -/// -/// @brief Sort order. -/// -/// @ref NYT::TTableSchema -enum ESortOrder : int -{ - /// Ascending sort order. - SO_ASCENDING /* "ascending" */, - /// Descending sort order. - SO_DESCENDING /* "descending" */, -}; - -/// -/// @brief Value of "optimize_for" attribute. -/// -/// @ref NYT::TRichYPath -enum EOptimizeForAttr : i8 -{ - /// Optimize for scan - OF_SCAN_ATTR /* "scan" */, - - /// Optimize for lookup - OF_LOOKUP_ATTR /* "lookup" */, -}; - -/// -/// @brief Value of "erasure_codec" attribute. -/// -/// @ref NYT::TRichYPath -enum EErasureCodecAttr : i8 -{ - /// @cond Doxygen_Suppress - EC_NONE_ATTR /* "none" */, - EC_REED_SOLOMON_6_3_ATTR /* "reed_solomon_6_3" */, - EC_LRC_12_2_2_ATTR /* "lrc_12_2_2" */, - EC_ISA_LRC_12_2_2_ATTR /* "isa_lrc_12_2_2" */, - /// @endcond -}; - -/// -/// @brief Value of "schema_modification" attribute. -/// -/// @ref NYT::TRichYPath -enum ESchemaModificationAttr : i8 -{ - SM_NONE_ATTR /* "none" */, - SM_UNVERSIONED_UPDATE /* "unversioned_update" */, -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Table key column description. -/// -/// The description includes column name and sort order. -/// -/// @anchor TSortOrder_backward_compatibility -/// @note -/// Many functions that use `TSortOrder` as argument used to take `TString` -/// (the only allowed sort order was "ascending" and user didn't have to specify it). -/// @note -/// This class is designed to provide backward compatibility for such code and therefore -/// objects of this class can be constructed and assigned from TString-like objects only. -/// -/// @see NYT::TSortOperationSpec -class TSortColumn -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TSortColumn; - /// @endcond - - /// Column name - FLUENT_FIELD_ENCAPSULATED(TString, Name); - - /// Sort order - FLUENT_FIELD_DEFAULT_ENCAPSULATED(ESortOrder, SortOrder, ESortOrder::SO_ASCENDING); - - /// - /// @{ - /// - /// @brief Construct object from name and sort order - /// - /// Constructors are intentionally implicit so `TSortColumn` can be compatible with old code. - /// @ref TSortOrder_backward_compatibility - TSortColumn(TStringBuf name = {}, ESortOrder sortOrder = ESortOrder::SO_ASCENDING); - TSortColumn(const TString& name, ESortOrder sortOrder = ESortOrder::SO_ASCENDING); - TSortColumn(const char* name, ESortOrder sortOrder = ESortOrder::SO_ASCENDING); - /// @} - - /// Check that sort order is ascending, throw exception otherwise. - const TSortColumn& EnsureAscending() const; - - /// @brief Convert sort to yson representation as YT API expects it. - TNode ToNode() const; - - /// @brief Comparison is default and checks both name and sort order. - bool operator == (const TSortColumn& rhs) const = default; - - /// - /// @{ - /// - /// @brief Assign object from column name, and set sort order to `ascending`. - /// - /// This is backward compatibility methods. - /// - /// @ref TSortOrder_backward_compatibility - TSortColumn& operator = (TStringBuf name); - TSortColumn& operator = (const TString& name); - TSortColumn& operator = (const char* name); - /// @} - - bool operator == (const TStringBuf rhsName) const; - bool operator != (const TStringBuf rhsName) const; - bool operator == (const TString& rhsName) const; - bool operator != (const TString& rhsName) const; - bool operator == (const char* rhsName) const; - bool operator != (const char* rhsName) const; - - // Intentionally implicit conversions. - operator TString() const; - operator TStringBuf() const; - operator std::string() const; - - Y_SAVELOAD_DEFINE(Name_, SortOrder_); -}; - -/// -/// @brief List of @ref TSortColumn -/// -/// Contains a bunch of helper methods such as constructing from single object. -class TSortColumns - : public TOneOrMany<TSortColumn, TSortColumns> -{ -public: - using TOneOrMany<TSortColumn, TSortColumns>::TOneOrMany; - - /// Construct empty list. - TSortColumns(); - - /// - /// @{ - /// - /// @brief Construct list of ascending sort order columns by their names. - /// - /// Required for backward compatibility. - /// - /// @ref TSortOrder_backward_compatibility - TSortColumns(const TVector<TString>& names); - TSortColumns(const TColumnNames& names); - /// @} - - - /// - /// @brief Implicit conversion to column list. - /// - /// If all columns has ascending sort order return list of their names. - /// Throw exception otherwise. - /// - /// Required for backward compatibility. - /// - /// @ref TSortOrder_backward_compatibility - operator TColumnNames() const; - - /// Make sure that all columns are of ascending sort order. - const TSortColumns& EnsureAscending() const; - - /// Get list of column names. - TVector<TString> GetNames() const; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// Helper function to create new style type from old style one. -NTi::TTypePtr ToTypeV3(EValueType type, bool required); - -/// -/// @brief Single column description -/// -/// Each field describing column has setter and getter. -/// -/// Example reading field: -/// ``` -/// ... columnSchema.Name() ... -/// ``` -/// -/// Example setting field: -/// ``` -/// columnSchema.Name("my-column").Type(VT_INT64); // set name and type -/// ``` -/// -/// @ref https://yt.yandex-team.ru/docs/description/storage/static_schema -class TColumnSchema -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TColumnSchema; - /// @endcond - - /// - /// @brief Construct empty column schemas - /// - /// @note - /// Such schema cannot be used in schema as it it doesn't have name. - TColumnSchema(); - - /// - /// @{ - /// - /// @brief Copy and move constructors are default. - TColumnSchema(const TColumnSchema&) = default; - TColumnSchema& operator=(const TColumnSchema&) = default; - /// @} - - - FLUENT_FIELD_ENCAPSULATED(TString, Name); - - /// - /// @brief Functions to work with type in old manner. - /// - /// @deprecated New code is recommended to work with types using @ref NTi::TTypePtr from type_info library. - TColumnSchema& Type(EValueType type) &; - TColumnSchema Type(EValueType type) &&; - EValueType Type() const; - - /// @brief Set and get column type. - /// @{ - TColumnSchema& Type(const NTi::TTypePtr& type) &; - TColumnSchema Type(const NTi::TTypePtr& type) &&; - - TColumnSchema& TypeV3(const NTi::TTypePtr& type) &; - TColumnSchema TypeV3(const NTi::TTypePtr& type) &&; - NTi::TTypePtr TypeV3() const; - /// @} - - /// - /// @brief Raw yson representation of column type - /// @deprecated Prefer to use `TypeV3` methods. - FLUENT_FIELD_OPTION_ENCAPSULATED(TNode, RawTypeV3); - - /// Column sort order - FLUENT_FIELD_OPTION_ENCAPSULATED(ESortOrder, SortOrder); - - /// - /// @brief Lock group name - /// - /// @ref https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#blokirovka-stroki - FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Lock); - - /// Expression defining column value - FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Expression); - - /// Aggregating function name - FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Aggregate); - - /// - /// @brief Storage group name - /// - /// @ref https://yt.yandex-team.ru/docs/description/storage/static_schema - FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Group); - - /// - /// @brief Column requiredness. - /// - /// Required columns doesn't accept NULL values. - /// Usually if column is required it means that it has Optional<...> type - bool Required() const; - - /// - /// @{ - /// - /// @brief Set type in old-style manner - TColumnSchema& Type(EValueType type, bool required) &; - TColumnSchema Type(EValueType type, bool required) &&; - /// @} - -private: - friend void Deserialize(TColumnSchema& columnSchema, const TNode& node); - NTi::TTypePtr TypeV3_; - bool Required_ = false; -}; - -/// Equality check checks all fields of column schema. -bool operator==(const TColumnSchema& lhs, const TColumnSchema& rhs); - -/// -/// @brief Description of table schema -/// -/// @see https://yt.yandex-team.ru/docs/description/storage/static_schema -class TTableSchema -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TTableSchema; - /// @endcond - - /// Column schema - FLUENT_VECTOR_FIELD_ENCAPSULATED(TColumnSchema, Column); - - /// - /// @brief Strictness of the schema - /// - /// Strict schemas are not allowed to have columns not described in schema. - /// Nonstrict schemas are allowed to have such columns, all such missing columns are assumed to have - FLUENT_FIELD_DEFAULT_ENCAPSULATED(bool, Strict, true); - - /// - /// @brief Whether keys are unique - /// - /// This flag can be set only for schemas that have sorted columns. - /// If flag is set table cannot have multiple rows with same key. - FLUENT_FIELD_DEFAULT_ENCAPSULATED(bool, UniqueKeys, false); - - /// Get modifiable column list - TVector<TColumnSchema>& MutableColumns(); - - /// Check if schema has any described column - [[nodiscard]] bool Empty() const; - - /// Add column - TTableSchema& AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &; - /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&; - TTableSchema AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &&; - - /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&; - TTableSchema& AddColumn(const TString& name, const NTi::TTypePtr& type) &; - /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&; - TTableSchema AddColumn(const TString& name, const NTi::TTypePtr& type) &&; - - /// Add optional column of specified type - TTableSchema& AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &; - /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&; - TTableSchema AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &&; - - /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&; - TTableSchema& AddColumn(const TString& name, EValueType type) &; - /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&; - TTableSchema AddColumn(const TString& name, EValueType type) &&; - - /// - /// @brief Make table schema sorted by specified columns - /// - /// Resets old key columns if any - TTableSchema& SortBy(const TSortColumns& columns) &; - - /// @copydoc NYT::TTableSchema::SortBy(const TSortColumns&)&; - TTableSchema SortBy(const TSortColumns& columns) &&; - - /// Get yson description of table schema - [[nodiscard]] TNode ToNode() const; - - /// Parse schema from yson node - static NYT::TTableSchema FromNode(const TNode& node); - - friend void Deserialize(TTableSchema& tableSchema, const TNode& node); -}; - -/// Check for equality of all columns and all schema attributes -bool operator==(const TTableSchema& lhs, const TTableSchema& rhs); - -/// Create table schema by protobuf message descriptor -TTableSchema CreateTableSchema( - const ::google::protobuf::Descriptor& messageDescriptor, - const TSortColumns& sortColumns = TSortColumns(), - bool keepFieldsWithoutExtension = true); - -/// Create table schema by protobuf message type -template <class TProtoType, typename = std::enable_if_t<std::is_base_of_v<::google::protobuf::Message, TProtoType>>> -inline TTableSchema CreateTableSchema( - const TSortColumns& sortColumns = TSortColumns(), - bool keepFieldsWithoutExtension = true) -{ - static_assert( - std::is_base_of_v<::google::protobuf::Message, TProtoType>, - "Template argument must be derived from ::google::protobuf::Message"); - - return CreateTableSchema( - *TProtoType::descriptor(), - sortColumns, - keepFieldsWithoutExtension); -} - -/// -/// @brief Create strict table schema from `struct` type. -/// -/// Names and types of columns are taken from struct member names and types. -/// `Strict` flag is set to true, all other attribute of schema and columns -/// are left with default values -TTableSchema CreateTableSchema(NTi::TTypePtr type); - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Enumeration describing comparison operation used in key bound. -/// -/// ERelation is a part of @ref NYT::TKeyBound that can be used as -/// lower or upper key limit in @ref TReadLimit. -/// -/// Relations `Less` and `LessOrEqual` are for upper limit and -/// relations `Greater` and `GreaterOrEqual` are for lower limit. -/// -/// It is a error to use relation in the limit of wrong kind. -/// -/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath -enum class ERelation -{ - /// - /// @brief Relation "less" - /// - /// Specifies range of keys that are before specified key. - /// Can only be used in upper limit. - Less /* "<" */, - - /// - /// @brief Relation "less or equal" - /// - /// Specifies range of keys that are before or equal specified key. - /// Can only be used in upper limit. - LessOrEqual /* "<=" */, - - /// - /// @brief Relation "greater" - /// - /// Specifies range of keys that are after specified key. - /// Can only be used in lower limit. - Greater /* ">" */, - - /// - /// @brief Relation "greater or equal" - /// - /// Specifies range of keys that are after or equal than specified key. - /// Can only be used in lower limit. - GreaterOrEqual /* ">=" */, -}; - -/// -/// @brief Key with relation specifying interval of keys in lower or upper limit of @ref NYT::TReadRange -/// -/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath -struct TKeyBound -{ - /// @cond Doxygen_Suppress - using TSelf = TKeyBound; - - explicit TKeyBound(ERelation relation = ERelation::Less, TKey key = TKey{}); - - FLUENT_FIELD_DEFAULT_ENCAPSULATED(ERelation, Relation, ERelation::Less); - FLUENT_FIELD_DEFAULT_ENCAPSULATED(TKey, Key, TKey{}); - /// @endcond -}; - -/// -/// @brief Description of the read limit. -/// -/// It is actually a variant and must store exactly one field. -/// -/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath -struct TReadLimit -{ - /// @cond Doxygen_Suppress - using TSelf = TReadLimit; - /// @endcond - - /// - /// @brief KeyBound specifies table key and whether to include it - /// - /// It can be used in lower or upper limit when reading tables. - FLUENT_FIELD_OPTION(TKeyBound, KeyBound); - - /// - /// @brief Table key - /// - /// It can be used in exact, lower or upper limit when reading tables. - FLUENT_FIELD_OPTION(TKey, Key); - - /// - /// @brief Row index - /// - /// It can be used in exact, lower or upper limit when reading tables. - FLUENT_FIELD_OPTION(i64, RowIndex); - - /// - /// @brief File offset - /// - /// It can be used in lower or upper limit when reading files. - FLUENT_FIELD_OPTION(i64, Offset); - - /// - /// @brief Tablet index - /// - /// It can be used in lower or upper limit in dynamic table operations - FLUENT_FIELD_OPTION(i64, TabletIndex); -}; - -/// -/// @brief Range of a table or a file -/// -/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath -struct TReadRange -{ - using TSelf = TReadRange; - - /// - /// @brief Lower limit of the range - /// - /// It is usually inclusive (except when @ref NYT::TKeyBound with relation @ref NYT::ERelation::Greater is used). - FLUENT_FIELD(TReadLimit, LowerLimit); - - /// - /// @brief Lower limit of the range - /// - /// It is usually exclusive (except when @ref NYT::TKeyBound with relation @ref NYT::ERelation::LessOrEqual is used). - FLUENT_FIELD(TReadLimit, UpperLimit); - - /// Exact key or row index. - FLUENT_FIELD(TReadLimit, Exact); - - /// Create read range from row indexes. - static TReadRange FromRowIndices(i64 lowerLimit, i64 upperLimit) - { - return TReadRange() - .LowerLimit(TReadLimit().RowIndex(lowerLimit)) - .UpperLimit(TReadLimit().RowIndex(upperLimit)); - } - - /// Create read range from keys. - static TReadRange FromKeys(const TKey& lowerKeyInclusive, const TKey& upperKeyExclusive) - { - return TReadRange() - .LowerLimit(TReadLimit().Key(lowerKeyInclusive)) - .UpperLimit(TReadLimit().Key(upperKeyExclusive)); - } -}; - -/// -/// @brief Path with additional attributes. -/// -/// Allows to specify additional attributes for path used in some operations. -/// -/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath -struct TRichYPath -{ - /// @cond Doxygen_Suppress - using TSelf = TRichYPath; - /// @endcond - - /// Path itself. - FLUENT_FIELD(TYPath, Path); - - /// Specifies that path should be appended not overwritten - FLUENT_FIELD_OPTION(bool, Append); - - /// @deprecated Deprecated attribute. - FLUENT_FIELD_OPTION(bool, PartiallySorted); - - /// Specifies that path is expected to be sorted by these columns. - FLUENT_FIELD(TSortColumns, SortedBy); - - /// Add range to read. - TRichYPath& AddRange(TReadRange range) - { - if (!Ranges_) { - Ranges_.ConstructInPlace(); - } - Ranges_->push_back(std::move(range)); - return *this; - } - - TRichYPath& ResetRanges() - { - Ranges_.Clear(); - return *this; - } - - /// - /// @{ - /// - /// Return ranges to read. - /// - /// NOTE: Nothing (in TMaybe) and empty TVector are different ranges. - /// Nothing represents universal range (reader reads all table rows). - /// Empty TVector represents empty range (reader returns empty set of rows). - const TMaybe<TVector<TReadRange>>& GetRanges() const - { - return Ranges_; - } - - TMaybe<TVector<TReadRange>>& MutableRanges() - { - return Ranges_; - } - - /// - /// @{ - /// - /// Get range view, that is convenient way to iterate through all ranges. - TArrayRef<TReadRange> MutableRangesView() - { - if (Ranges_.Defined()) { - return TArrayRef(Ranges_->data(), Ranges_->size()); - } else { - return {}; - } - } - - TArrayRef<const TReadRange> GetRangesView() const - { - if (Ranges_.Defined()) { - return TArrayRef(Ranges_->data(), Ranges_->size()); - } else { - return {}; - } - } - /// @} - - /// @{ - /// - /// Get range by index. - const TReadRange& GetRange(ssize_t i) const - { - return Ranges_.GetRef()[i]; - } - - TReadRange& MutableRange(ssize_t i) - { - return Ranges_.GetRef()[i]; - } - /// @} - - /// - /// @brief Specifies columns that should be read. - /// - /// If it's set to Nothing then all columns will be read. - /// If empty TColumnNames is specified then each read row will be empty. - FLUENT_FIELD_OPTION(TColumnNames, Columns); - - FLUENT_FIELD_OPTION(bool, Teleport); - FLUENT_FIELD_OPTION(bool, Primary); - FLUENT_FIELD_OPTION(bool, Foreign); - FLUENT_FIELD_OPTION(i64, RowCountLimit); - - FLUENT_FIELD_OPTION(TString, FileName); - - /// Specifies original path to be shown in Web UI - FLUENT_FIELD_OPTION(TYPath, OriginalPath); - - /// - /// @brief Specifies that this path points to executable file - /// - /// Used in operation specs. - FLUENT_FIELD_OPTION(bool, Executable); - - /// - /// @brief Specify format to use when loading table. - /// - /// Used in operation specs. - FLUENT_FIELD_OPTION(TNode, Format); - - /// @brief Specifies table schema that will be set on the path - FLUENT_FIELD_OPTION(TTableSchema, Schema); - - /// Specifies compression codec that will be set on the path - FLUENT_FIELD_OPTION(TString, CompressionCodec); - - /// Specifies erasure codec that will be set on the path - FLUENT_FIELD_OPTION(EErasureCodecAttr, ErasureCodec); - - /// Specifies schema modification that will be set on the path - FLUENT_FIELD_OPTION(ESchemaModificationAttr, SchemaModification); - - /// Specifies optimize_for attribute that will be set on the path - FLUENT_FIELD_OPTION(EOptimizeForAttr, OptimizeFor); - - /// - /// @brief Do not put file used in operation into node cache - /// - /// If BypassArtifactCache == true, file will be loaded into the job's sandbox bypassing the cache on the YT node. - /// It helps jobs that use tmpfs to start faster, - /// because files will be loaded into tmpfs directly bypassing disk cache - FLUENT_FIELD_OPTION(bool, BypassArtifactCache); - - /// - /// @brief Timestamp of dynamic table. - /// - /// NOTE: it is _not_ unix timestamp - /// (instead it's transaction timestamp, that is more complex structure). - FLUENT_FIELD_OPTION(i64, Timestamp); - - /// - /// @brief Specify transaction that should be used to access this path. - /// - /// Allows to start cross-transactional operations. - FLUENT_FIELD_OPTION(TTransactionId, TransactionId); - - using TRenameColumnsDescriptor = THashMap<TString, TString>; - - /// Specifies columnar mapping which will be applied to columns before transfer to job. - FLUENT_FIELD_OPTION(TRenameColumnsDescriptor, RenameColumns); - - /// Create empty path with no attributes - TRichYPath() - { } - - /// - /// @{ - /// - /// @brief Create path from string - TRichYPath(const char* path) - : Path_(path) - { } - - TRichYPath(const TYPath& path) - : Path_(path) - { } - /// @} - -private: - TMaybe<TVector<TReadRange>> Ranges_; -}; - -/// -/// @ref Create copy of @ref NYT::TRichYPath with schema derived from proto message. -/// -/// -template <typename TProtoType> -TRichYPath WithSchema(const TRichYPath& path, const TSortColumns& sortBy = TSortColumns()) -{ - static_assert(std::is_base_of_v<::google::protobuf::Message, TProtoType>, "TProtoType must be Protobuf message"); - - auto schemedPath = path; - if (!schemedPath.Schema_) { - schemedPath.Schema(CreateTableSchema<TProtoType>(sortBy)); - } - return schemedPath; -} - -/// -/// @brief Create copy of @ref NYT::TRichYPath with schema derived from TRowType if possible. -/// -/// If TRowType is protobuf message schema is derived from it and set to returned path. -/// Otherwise schema of original path is left unchanged (and probably unset). -template <typename TRowType> -TRichYPath MaybeWithSchema(const TRichYPath& path, const TSortColumns& sortBy = TSortColumns()) -{ - if constexpr (std::is_base_of_v<::google::protobuf::Message, TRowType>) { - return WithSchema<TRowType>(path, sortBy); - } else { - return path; - } -} - -/// -/// @brief Get the list of ranges related to path in compatibility mode. -/// -/// - If path is missing ranges, empty list is returned. -/// - If path has associated range list and the list is not empty, function returns this list. -/// - If path has associated range list and this list is empty, exception is thrown. -/// -/// Before YT-17683 RichYPath didn't support empty range list and empty range actualy meant universal range. -/// This function emulates this old behavior. -/// -/// @see https://st.yandex-team.ru/YT-17683 -const TVector<TReadRange>& GetRangesCompat(const TRichYPath& path); - -//////////////////////////////////////////////////////////////////////////////// - -/// Statistics about table columns. -struct TTableColumnarStatistics -{ - /// Total data weight for all chunks for each of requested columns. - THashMap<TString, i64> ColumnDataWeight; - - /// Total weight of all old chunks that don't keep columnar statistics. - i64 LegacyChunksDataWeight = 0; - - /// Timestamps total weight (only for dynamic tables). - TMaybe<i64> TimestampTotalWeight; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// Description of a partition. -struct TMultiTablePartition -{ - struct TStatistics - { - i64 ChunkCount = 0; - i64 DataWeight = 0; - i64 RowCount = 0; - }; - - /// Ranges of input tables for this partition. - TVector<TRichYPath> TableRanges; - - /// Aggregate statistics of all the table ranges in the partition. - TStatistics AggregateStatistics; -}; - -/// Table partitions from GetTablePartitions command. -struct TMultiTablePartitions -{ - /// Disjoint partitions into which the input tables were divided. - TVector<TMultiTablePartition> Partitions; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Contains information about tablet -/// -/// @see NYT::IClient::GetTabletInfos -struct TTabletInfo -{ - /// - /// @brief Indicates the total number of rows added to the tablet (including trimmed ones). - /// - /// Currently only provided for ordered tablets. - i64 TotalRowCount = 0; - - /// - /// @brief Contains the number of front rows that are trimmed and are not guaranteed to be accessible. - /// - /// Only makes sense for ordered tablet. - i64 TrimmedRowCount = 0; - - /// - /// @brief Tablet cell barrier timestamp, which lags behind the current timestamp - /// - /// It is guaranteed that all transactions with commit timestamp not exceeding the barrier are fully committed; - /// e.g. all their added rows are visible (and are included in @ref NYT::TTabletInfo::TotalRowCount). - /// Mostly makes sense for ordered tablets. - ui64 BarrierTimestamp; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// List of attributes to retrieve in operations like @ref NYT::ICypressClient::Get -struct TAttributeFilter -{ - /// @cond Doxygen_Suppress - using TSelf = TAttributeFilter; - /// @endcond - - /// List of attributes. - FLUENT_VECTOR_FIELD(TString, Attribute); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Check if none of the fields of @ref NYT::TReadLimit is set. -/// -/// @return true if any field of readLimit is set and false otherwise. -bool IsTrivial(const TReadLimit& readLimit); - -/// Convert yson node type to table schema type -EValueType NodeTypeToValueType(TNode::EType nodeType); - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Enumeration for specifying how reading from master is performed. -/// -/// Used in operations like NYT::ICypressClient::Get -enum class EMasterReadKind : int -{ - /// - /// @brief Reading from leader. - /// - /// Should almost never be used since it's expensive and for regular uses has no difference from - /// "follower" read. - Leader /* "leader" */, - - /// @brief Reading from master follower (default). - Follower /* "follower" */, - Cache /* "cache" */, - MasterCache /* "master_cache" */, -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @cond Doxygen_Suppress -namespace NDetail { - -// MUST NOT BE USED BY CLIENTS -// TODO: we should use default GENERATE_ENUM_SERIALIZATION -TString ToString(EValueType type); - -} // namespace NDetail -/// @endcond - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/common_ut.cpp b/yt/cpp/mapreduce/interface/common_ut.cpp deleted file mode 100644 index 3f19433816..0000000000 --- a/yt/cpp/mapreduce/interface/common_ut.cpp +++ /dev/null @@ -1,303 +0,0 @@ -#include "common_ut.h" - -#include "fluent.h" - -#include <yt/cpp/mapreduce/interface/common.h> - -#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h> - -#include <library/cpp/testing/unittest/registar.h> - -#include <library/cpp/yson/node/node_io.h> -#include <library/cpp/yson/node/node_builder.h> - -#include <util/generic/xrange.h> - -using namespace NYT; - -template <class T> -TString SaveToString(const T& obj) -{ - TString s; - TStringOutput out(s); - ::Save(&out, obj); - return s; -} - -template <class T> -T LoadFromString(TStringBuf s) -{ - TMemoryInput in(s); - T obj; - ::Load(&in, obj); - return obj; -} - -template <class T> -T SaveLoad(const T& obj) -{ - return LoadFromString<T>(SaveToString(obj)); -} - -Y_UNIT_TEST_SUITE(Common) -{ - Y_UNIT_TEST(SortColumnsLegacy) - { - TSortColumns keys1("a", "b"); - UNIT_ASSERT((keys1.Parts_ == TSortColumns{"a", "b"})); - - keys1.Add("c", "d"); - UNIT_ASSERT((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"})); - - auto keys2 = TSortColumns(keys1).Add("e", "f"); - UNIT_ASSERT((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"})); - UNIT_ASSERT((keys2.Parts_ == TSortColumns{"a", "b", "c", "d", "e", "f"})); - - auto keys3 = TSortColumns(keys1).Add("e").Add("f").Add("g"); - UNIT_ASSERT((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"})); - UNIT_ASSERT((keys3.Parts_ == TSortColumns{"a", "b", "c", "d", "e", "f", "g"})); - } - - Y_UNIT_TEST(SortColumn) - { - auto ascending = TSortColumn("a"); - UNIT_ASSERT_VALUES_EQUAL(ascending.Name(), "a"); - UNIT_ASSERT_VALUES_EQUAL(ascending.SortOrder(), ESortOrder::SO_ASCENDING); - UNIT_ASSERT_VALUES_EQUAL(ascending, TSortColumn("a", ESortOrder::SO_ASCENDING)); - UNIT_ASSERT_VALUES_UNEQUAL(ascending, TSortColumn("a", ESortOrder::SO_DESCENDING)); - - UNIT_ASSERT_NO_EXCEPTION(ascending.EnsureAscending()); - UNIT_ASSERT_VALUES_EQUAL(static_cast<TString>(ascending), "a"); - UNIT_ASSERT_VALUES_EQUAL(ascending, "a"); - - auto another = ascending; - UNIT_ASSERT_NO_EXCEPTION(another = "another"); - UNIT_ASSERT_VALUES_EQUAL(another.Name(), "another"); - UNIT_ASSERT_VALUES_EQUAL(another.SortOrder(), ESortOrder::SO_ASCENDING); - UNIT_ASSERT_VALUES_EQUAL(another, TSortColumn("another", ESortOrder::SO_ASCENDING)); - UNIT_ASSERT_VALUES_UNEQUAL(another, TSortColumn("another", ESortOrder::SO_DESCENDING)); - - auto ascendingNode = BuildYsonNodeFluently().Value(ascending); - UNIT_ASSERT_VALUES_EQUAL(ascendingNode, TNode("a")); - - UNIT_ASSERT_VALUES_EQUAL(SaveLoad(ascending), ascending); - UNIT_ASSERT_VALUES_UNEQUAL(SaveToString(ascending), SaveToString(TString("a"))); - - auto descending = TSortColumn("a", ESortOrder::SO_DESCENDING); - UNIT_ASSERT_VALUES_EQUAL(descending.Name(), "a"); - UNIT_ASSERT_VALUES_EQUAL(descending.SortOrder(), ESortOrder::SO_DESCENDING); - UNIT_ASSERT_VALUES_EQUAL(descending, TSortColumn("a", ESortOrder::SO_DESCENDING)); - UNIT_ASSERT_VALUES_UNEQUAL(descending, TSortColumn("a", ESortOrder::SO_ASCENDING)); - - UNIT_ASSERT_EXCEPTION(descending.EnsureAscending(), yexception); - UNIT_ASSERT_EXCEPTION(static_cast<TString>(descending), yexception); - UNIT_ASSERT_EXCEPTION(descending == "a", yexception); - UNIT_ASSERT_EXCEPTION(descending = "a", yexception); - - auto descendingNode = BuildYsonNodeFluently().Value(descending); - UNIT_ASSERT_VALUES_EQUAL(descendingNode, TNode()("name", "a")("sort_order", "descending")); - - UNIT_ASSERT_VALUES_EQUAL(SaveLoad(descending), descending); - UNIT_ASSERT_VALUES_UNEQUAL(SaveToString(descending), SaveToString("a")); - - UNIT_ASSERT_VALUES_EQUAL(ToString(TSortColumn("blah")), "blah"); - UNIT_ASSERT_VALUES_EQUAL(ToString(TSortColumn("blah", ESortOrder::SO_DESCENDING)), "{\"name\"=\"blah\";\"sort_order\"=\"descending\"}"); - } - - Y_UNIT_TEST(SortColumns) - { - TSortColumns ascending("a", "b"); - UNIT_ASSERT(ascending.Parts_ == (TSortColumns{"a", "b"})); - UNIT_ASSERT_NO_EXCEPTION(ascending.EnsureAscending()); - UNIT_ASSERT_VALUES_EQUAL(static_cast<TColumnNames>(ascending).Parts_, (TVector<TString>{"a", "b"})); - UNIT_ASSERT_VALUES_EQUAL(ascending.GetNames(), (TVector<TString>{"a", "b"})); - - auto mixed = ascending; - mixed.Add(TSortColumn("c", ESortOrder::SO_DESCENDING), "d"); - UNIT_ASSERT((mixed.Parts_ != TVector<TSortColumn>{"a", "b", "c", "d"})); - UNIT_ASSERT((mixed.Parts_ == TVector<TSortColumn>{"a", "b", TSortColumn("c", ESortOrder::SO_DESCENDING), "d"})); - UNIT_ASSERT_VALUES_EQUAL(mixed.GetNames(), (TVector<TString>{"a", "b", "c", "d"})); - UNIT_ASSERT_EXCEPTION(mixed.EnsureAscending(), yexception); - UNIT_ASSERT_EXCEPTION(static_cast<TColumnNames>(mixed), yexception); - } - - Y_UNIT_TEST(KeyBound) - { - auto keyBound = TKeyBound(ERelation::Greater, TKey(7, "a", TNode()("x", "y"))); - UNIT_ASSERT_VALUES_EQUAL(keyBound.Relation(), ERelation::Greater); - UNIT_ASSERT_EQUAL(keyBound.Key(), TKey(7, "a", TNode()("x", "y"))); - - auto keyBound1 = TKeyBound().Relation(ERelation::Greater).Key(TKey(7, "a", TNode()("x", "y"))); - auto expectedNode = TNode() - .Add(">") - .Add(TNode().Add(7).Add("a").Add(TNode()("x", "y"))); - - UNIT_ASSERT_VALUES_EQUAL(expectedNode, BuildYsonNodeFluently().Value(keyBound)); - UNIT_ASSERT_VALUES_EQUAL(expectedNode, BuildYsonNodeFluently().Value(keyBound1)); - - keyBound.Relation(ERelation::LessOrEqual); - keyBound.Key(TKey("A", 7)); - UNIT_ASSERT_VALUES_EQUAL(keyBound.Relation(), ERelation::LessOrEqual); - UNIT_ASSERT_EQUAL(keyBound.Key(), TKey("A", 7)); - - UNIT_ASSERT_VALUES_EQUAL( - BuildYsonNodeFluently().Value(keyBound), - TNode() - .Add("<=") - .Add(TNode().Add("A").Add(7))); - } - - Y_UNIT_TEST(TTableSchema) - { - TTableSchema schema; - schema - .AddColumn(TColumnSchema().Name("a").Type(EValueType::VT_STRING).SortOrder(SO_ASCENDING)) - .AddColumn(TColumnSchema().Name("b").Type(EValueType::VT_UINT64)) - .AddColumn(TColumnSchema().Name("c").Type(EValueType::VT_INT64)); - auto checkSortBy = [](TTableSchema schema, const TVector<TString>& columns) { - auto initialSchema = schema; - schema.SortBy(columns); - for (auto i: xrange(columns.size())) { - UNIT_ASSERT_VALUES_EQUAL(schema.Columns()[i].Name(), columns[i]); - UNIT_ASSERT_VALUES_EQUAL(schema.Columns()[i].SortOrder(), ESortOrder::SO_ASCENDING); - } - for (auto i: xrange(columns.size(), (size_t)initialSchema.Columns().size())) { - UNIT_ASSERT_VALUES_EQUAL(schema.Columns()[i].SortOrder(), Nothing()); - } - UNIT_ASSERT_VALUES_EQUAL(initialSchema.Columns().size(), schema.Columns().size()); - return schema; - }; - auto newSchema = checkSortBy(schema, {"b"}); - UNIT_ASSERT_VALUES_EQUAL(newSchema.Columns()[1].Name(), TString("a")); - UNIT_ASSERT_VALUES_EQUAL(newSchema.Columns()[2].Name(), TString("c")); - checkSortBy(schema, {"b", "c"}); - checkSortBy(schema, {"c", "a"}); - UNIT_ASSERT_EXCEPTION(checkSortBy(schema, {"b", "b"}), yexception); - UNIT_ASSERT_EXCEPTION(checkSortBy(schema, {"a", "junk"}), yexception); - } - - Y_UNIT_TEST(TColumnSchema_TypeV3) - { - { - auto column = TColumnSchema().Type(NTi::Interval()); - UNIT_ASSERT_VALUES_EQUAL(column.Required(), true); - UNIT_ASSERT_VALUES_EQUAL(column.Type(), VT_INTERVAL); - } - { - auto column = TColumnSchema().Type(NTi::Optional(NTi::Date())); - UNIT_ASSERT_VALUES_EQUAL(column.Required(), false); - UNIT_ASSERT_VALUES_EQUAL(column.Type(), VT_DATE); - } - { - auto column = TColumnSchema().Type(NTi::Null()); - UNIT_ASSERT_VALUES_EQUAL(column.Required(), false); - UNIT_ASSERT_VALUES_EQUAL(column.Type(), VT_NULL); - } - { - auto column = TColumnSchema().Type(NTi::Optional(NTi::Null())); - UNIT_ASSERT_VALUES_EQUAL(column.Required(), false); - UNIT_ASSERT_VALUES_EQUAL(column.Type(), VT_ANY); - } - } - - Y_UNIT_TEST(ToTypeV3) - { - UNIT_ASSERT_VALUES_EQUAL(*ToTypeV3(VT_INT32, true), *NTi::Int32()); - UNIT_ASSERT_VALUES_EQUAL(*ToTypeV3(VT_UTF8, false), *NTi::Optional(NTi::Utf8())); - } - - Y_UNIT_TEST(DeserializeColumn) - { - auto deserialize = [] (TStringBuf yson) { - auto node = NodeFromYsonString(yson); - TColumnSchema column; - Deserialize(column, node); - return column; - }; - - auto column = deserialize("{name=foo; type=int64; required=%false}"); - UNIT_ASSERT_VALUES_EQUAL(column.Name(), "foo"); - UNIT_ASSERT_VALUES_EQUAL(*column.TypeV3(), *NTi::Optional(NTi::Int64())); - - column = deserialize("{name=bar; type=utf8; required=%true; type_v3=utf8}"); - UNIT_ASSERT_VALUES_EQUAL(column.Name(), "bar"); - UNIT_ASSERT_VALUES_EQUAL(*column.TypeV3(), *NTi::Utf8()); - } - - Y_UNIT_TEST(ColumnSchemaEquality) - { - auto base = TColumnSchema() - .Name("col") - .TypeV3(NTi::Optional(NTi::List(NTi::String()))) - .SortOrder(ESortOrder::SO_ASCENDING) - .Lock("lock") - .Expression("x + 12") - .Aggregate("sum") - .Group("group"); - - auto other = base; - ASSERT_SERIALIZABLES_EQUAL(other, base); - other.Name("other"); - ASSERT_SERIALIZABLES_UNEQUAL(other, base); - - other = base; - other.TypeV3(NTi::List(NTi::String())); - ASSERT_SERIALIZABLES_UNEQUAL(other, base); - - other = base; - other.ResetSortOrder(); - ASSERT_SERIALIZABLES_UNEQUAL(other, base); - - other = base; - other.Lock("lock1"); - ASSERT_SERIALIZABLES_UNEQUAL(other, base); - - other = base; - other.Expression("x + 13"); - ASSERT_SERIALIZABLES_UNEQUAL(other, base); - - other = base; - other.ResetAggregate(); - ASSERT_SERIALIZABLES_UNEQUAL(other, base); - - other = base; - other.Group("group1"); - ASSERT_SERIALIZABLES_UNEQUAL(other, base); - } - - Y_UNIT_TEST(TableSchemaEquality) - { - auto col1 = TColumnSchema() - .Name("col1") - .TypeV3(NTi::Optional(NTi::List(NTi::String()))) - .SortOrder(ESortOrder::SO_ASCENDING); - - auto col2 = TColumnSchema() - .Name("col2") - .TypeV3(NTi::Uint32()); - - auto schema = TTableSchema() - .AddColumn(col1) - .AddColumn(col2) - .Strict(true) - .UniqueKeys(true); - - auto other = schema; - ASSERT_SERIALIZABLES_EQUAL(other, schema); - - other.Strict(false); - ASSERT_SERIALIZABLES_UNEQUAL(other, schema); - - other = schema; - other.MutableColumns()[0].TypeV3(NTi::List(NTi::String())); - ASSERT_SERIALIZABLES_UNEQUAL(other, schema); - - other = schema; - other.MutableColumns().push_back(col1); - ASSERT_SERIALIZABLES_UNEQUAL(other, schema); - - other = schema; - other.UniqueKeys(false); - ASSERT_SERIALIZABLES_UNEQUAL(other, schema); - } -} diff --git a/yt/cpp/mapreduce/interface/common_ut.h b/yt/cpp/mapreduce/interface/common_ut.h deleted file mode 100644 index 6f70f09bee..0000000000 --- a/yt/cpp/mapreduce/interface/common_ut.h +++ /dev/null @@ -1 +0,0 @@ -#pragma once diff --git a/yt/cpp/mapreduce/interface/config.cpp b/yt/cpp/mapreduce/interface/config.cpp deleted file mode 100644 index b474dc0844..0000000000 --- a/yt/cpp/mapreduce/interface/config.cpp +++ /dev/null @@ -1,321 +0,0 @@ -#include "config.h" - -#include "operation.h" - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <library/cpp/json/json_reader.h> -#include <library/cpp/svnversion/svnversion.h> - -#include <library/cpp/yson/node/node_builder.h> -#include <library/cpp/yson/node/node_io.h> - -#include <library/cpp/yson/json/yson2json_adapter.h> - -#include <util/string/strip.h> -#include <util/folder/dirut.h> -#include <util/folder/path.h> -#include <util/stream/file.h> -#include <util/generic/singleton.h> -#include <util/string/builder.h> -#include <util/string/cast.h> -#include <util/string/type.h> -#include <util/system/hostname.h> -#include <util/system/user.h> -#include <util/system/env.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -bool TConfig::GetBool(const char* var, bool defaultValue) -{ - TString val = GetEnv(var, ""); - if (val.empty()) { - return defaultValue; - } - return IsTrue(val); -} - -int TConfig::GetInt(const char* var, int defaultValue) -{ - int result = 0; - TString val = GetEnv(var, ""); - if (val.empty()) { - return defaultValue; - } - try { - result = FromString<int>(val); - } catch (const yexception& e) { - ythrow yexception() << "Cannot parse " << var << '=' << val << " as integer: " << e.what(); - } - return result; -} - -TDuration TConfig::GetDuration(const char* var, TDuration defaultValue) -{ - return TDuration::Seconds(GetInt(var, defaultValue.Seconds())); -} - -EEncoding TConfig::GetEncoding(const char* var) -{ - const TString encodingName = GetEnv(var, "identity"); - EEncoding encoding; - if (TryFromString(encodingName, encoding)) { - return encoding; - } else { - ythrow yexception() << var << ": encoding '" << encodingName << "' is not supported"; - } -} - - EUploadDeduplicationMode TConfig::GetUploadingDeduplicationMode( - const char* var, - EUploadDeduplicationMode defaultValue) -{ - const TString deduplicationMode = GetEnv(var, TEnumTraits<EUploadDeduplicationMode>::ToString(defaultValue)); - return TEnumTraits<EUploadDeduplicationMode>::FromString(deduplicationMode); -} - -void TConfig::ValidateToken(const TString& token) -{ - for (size_t i = 0; i < token.size(); ++i) { - ui8 ch = token[i]; - if (ch < 0x21 || ch > 0x7e) { - ythrow yexception() << "Incorrect token character '" << ch << "' at position " << i; - } - } -} - -TString TConfig::LoadTokenFromFile(const TString& tokenPath) -{ - TFsPath path(tokenPath); - return path.IsFile() ? Strip(TIFStream(path).ReadAll()) : TString(); -} - -TNode TConfig::LoadJsonSpec(const TString& strSpec) -{ - TNode spec; - TStringInput input(strSpec); - TNodeBuilder builder(&spec); - TYson2JsonCallbacksAdapter callbacks(&builder); - - Y_ENSURE(NJson::ReadJson(&input, &callbacks), "Cannot parse json spec: " << strSpec); - Y_ENSURE(spec.IsMap(), "Json spec is not a map"); - - return spec; -} - -TRichYPath TConfig::LoadApiFilePathOptions(const TString& ysonMap) -{ - TNode attributes; - try { - attributes = NodeFromYsonString(ysonMap); - } catch (const yexception& exc) { - ythrow yexception() << "Failed to parse YT_API_FILE_PATH_OPTIONS (it must be yson map): " << exc; - } - TNode pathNode = ""; - pathNode.Attributes() = attributes; - TRichYPath path; - Deserialize(path, pathNode); - return path; -} - -void TConfig::LoadToken() -{ - if (auto envToken = GetEnv("YT_TOKEN")) { - Token = envToken; - } else if (auto envToken = GetEnv("YT_SECURE_VAULT_YT_TOKEN")) { - // If this code runs inside an vanilla peration in YT - // it should not use regular environment variable `YT_TOKEN` - // because it would be visible in UI. - // Token should be passed via `secure_vault` parameter in operation spec. - Token = envToken; - } else if (auto tokenPath = GetEnv("YT_TOKEN_PATH")) { - Token = LoadTokenFromFile(tokenPath); - } else { - Token = LoadTokenFromFile(GetHomeDir() + "/.yt/token"); - } - ValidateToken(Token); -} - -void TConfig::LoadSpec() -{ - TString strSpec = GetEnv("YT_SPEC", "{}"); - Spec = LoadJsonSpec(strSpec); - - strSpec = GetEnv("YT_TABLE_WRITER", "{}"); - TableWriter = LoadJsonSpec(strSpec); -} - -void TConfig::LoadTimings() -{ - ConnectTimeout = GetDuration("YT_CONNECT_TIMEOUT", - TDuration::Seconds(10)); - - SocketTimeout = GetDuration("YT_SOCKET_TIMEOUT", - GetDuration("YT_SEND_RECEIVE_TIMEOUT", // common - TDuration::Seconds(60))); - - AddressCacheExpirationTimeout = TDuration::Minutes(15); - - CacheLockTimeoutPerGb = TDuration::MilliSeconds(1000.0 * 1_GB * 8 / 20_MB); // 20 Mbps = 20 MBps / 8. - - TxTimeout = GetDuration("YT_TX_TIMEOUT", - TDuration::Seconds(120)); - - PingTimeout = GetDuration("YT_PING_TIMEOUT", - TDuration::Seconds(5)); - - PingInterval = GetDuration("YT_PING_INTERVAL", - TDuration::Seconds(5)); - - WaitLockPollInterval = TDuration::Seconds(5); - - RetryInterval = GetDuration("YT_RETRY_INTERVAL", - TDuration::Seconds(3)); - - ChunkErrorsRetryInterval = GetDuration("YT_CHUNK_ERRORS_RETRY_INTERVAL", - TDuration::Seconds(60)); - - RateLimitExceededRetryInterval = GetDuration("YT_RATE_LIMIT_EXCEEDED_RETRY_INTERVAL", - TDuration::Seconds(60)); - - StartOperationRetryInterval = GetDuration("YT_START_OPERATION_RETRY_INTERVAL", - TDuration::Seconds(60)); - - HostListUpdateInterval = TDuration::Seconds(60); -} - -void TConfig::Reset() -{ - Hosts = GetEnv("YT_HOSTS", "hosts"); - Pool = GetEnv("YT_POOL"); - Prefix = GetEnv("YT_PREFIX"); - ApiVersion = GetEnv("YT_VERSION", "v3"); - LogLevel = GetEnv("YT_LOG_LEVEL", "error"); - - ContentEncoding = GetEncoding("YT_CONTENT_ENCODING"); - AcceptEncoding = GetEncoding("YT_ACCEPT_ENCODING"); - - GlobalTxId = GetEnv("YT_TRANSACTION", ""); - - UseAsyncTxPinger = false; - AsyncHttpClientThreads = 1; - AsyncTxPingerPoolThreads = 1; - - ForceIpV4 = GetBool("YT_FORCE_IPV4"); - ForceIpV6 = GetBool("YT_FORCE_IPV6"); - UseHosts = GetBool("YT_USE_HOSTS", true); - - LoadToken(); - LoadSpec(); - LoadTimings(); - - CacheUploadDeduplicationMode = GetUploadingDeduplicationMode("YT_UPLOAD_DEDUPLICATION", EUploadDeduplicationMode::Host); - - RetryCount = Max(GetInt("YT_RETRY_COUNT", 10), 1); - ReadRetryCount = Max(GetInt("YT_READ_RETRY_COUNT", 30), 1); - StartOperationRetryCount = Max(GetInt("YT_START_OPERATION_RETRY_COUNT", 30), 1); - - RemoteTempFilesDirectory = GetEnv("YT_FILE_STORAGE", - "//tmp/yt_wrapper/file_storage"); - RemoteTempTablesDirectory = GetEnv("YT_TEMP_TABLES_STORAGE", - "//tmp/yt_wrapper/table_storage"); - RemoteTempTablesDirectory = GetEnv("YT_TEMP_DIR", - RemoteTempTablesDirectory); - - InferTableSchema = false; - - UseClientProtobuf = GetBool("YT_USE_CLIENT_PROTOBUF", false); - NodeReaderFormat = ENodeReaderFormat::Auto; - ProtobufFormatWithDescriptors = true; - - MountSandboxInTmpfs = GetBool("YT_MOUNT_SANDBOX_IN_TMPFS"); - - ApiFilePathOptions = LoadApiFilePathOptions(GetEnv("YT_API_FILE_PATH_OPTIONS", "{}")); - - ConnectionPoolSize = GetInt("YT_CONNECTION_POOL_SIZE", 16); - - TraceHttpRequestsMode = FromString<ETraceHttpRequestsMode>(to_lower(GetEnv("YT_TRACE_HTTP_REQUESTS", "never"))); - - CommandsWithFraming = { - "read_table", - "get_table_columnar_statistics", - "get_job_input", - "concatenate", - "partition_tables", - }; -} - -TConfig::TConfig() -{ - Reset(); -} - -TConfigPtr TConfig::Get() -{ - struct TConfigHolder - { - TConfigHolder() - : Config(::MakeIntrusive<TConfig>()) - { } - - TConfigPtr Config; - }; - - return Singleton<TConfigHolder>()->Config; -} - -//////////////////////////////////////////////////////////////////////////////// - -TProcessState::TProcessState() -{ - try { - FqdnHostName = ::FQDNHostName(); - } catch (const yexception& e) { - try { - FqdnHostName = ::HostName(); - } catch (const yexception& e) { - ythrow yexception() << "Cannot get fqdn and host name: " << e.what(); - } - } - - try { - UserName = ::GetUsername(); - } catch (const yexception& e) { - ythrow yexception() << "Cannot get user name: " << e.what(); - } - - Pid = static_cast<int>(getpid()); - - if (!ClientVersion) { - ClientVersion = ::TStringBuilder() << "YT C++ native " << GetProgramCommitId(); - } -} - -static TString CensorString(TString input) -{ - static const TString prefix = "AQAD-"; - if (input.find(prefix) == TString::npos) { - return input; - } else { - return TString(input.size(), '*'); - } -} - -void TProcessState::SetCommandLine(int argc, const char* argv[]) -{ - for (int i = 0; i < argc; ++i) { - CommandLine.push_back(argv[i]); - CensoredCommandLine.push_back(CensorString(CommandLine.back())); - } -} - -TProcessState* TProcessState::Get() -{ - return Singleton<TProcessState>(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/config.h b/yt/cpp/mapreduce/interface/config.h deleted file mode 100644 index c44ad25f1c..0000000000 --- a/yt/cpp/mapreduce/interface/config.h +++ /dev/null @@ -1,228 +0,0 @@ -#pragma once - -#include "fwd.h" -#include "common.h" -#include "node.h" - -#include <library/cpp/yt/misc/enum.h> - -#include <util/generic/maybe.h> -#include <util/generic/string.h> -#include <util/generic/hash_set.h> - -#include <util/datetime/base.h> - -namespace NYT { - -enum EEncoding : int -{ - E_IDENTITY /* "identity" */, - E_GZIP /* "gzip" */, - E_BROTLI /* "br" */, - E_Z_LZ4 /* "z-lz4" */, -}; - -enum class ENodeReaderFormat : int -{ - Yson, // Always use YSON format, - Skiff, // Always use Skiff format, throw exception if it's not possible (non-strict schema, dynamic table etc.) - Auto, // Use Skiff format if it's possible, YSON otherwise -}; - -enum class ETraceHttpRequestsMode -{ - // Never dump http requests. - Never /* "never" */, - // Dump failed http requests. - Error /* "error" */, - // Dump all http requests. - Always /* "always" */, -}; - -DEFINE_ENUM(EUploadDeduplicationMode, - // For each file only one process' thread from all possible hosts can upload it to the file cache at the same time. - // The others will wait for the uploading to finish and use already cached file. - ((Global) (0)) - - // For each file and each particular host only one process' thread can upload it to the file cache at the same time. - // The others will wait for the uploading to finish and use already cached file. - ((Host) (1)) - - // All processes' threads will upload a file to the cache concurrently. - ((Disabled) (2)) -); - -//////////////////////////////////////////////////////////////////////////////// - -struct TConfig - : public TThrRefBase -{ - TString Hosts; - TString Pool; - TString Token; - TString Prefix; - TString ApiVersion; - TString LogLevel; - - // Compression for data that is sent to YT cluster. - EEncoding ContentEncoding; - - // Compression for data that is read from YT cluster. - EEncoding AcceptEncoding; - - TString GlobalTxId; - - bool ForceIpV4; - bool ForceIpV6; - bool UseHosts; - - TDuration HostListUpdateInterval; - - TNode Spec; - TNode TableWriter; - - TDuration ConnectTimeout; - TDuration SocketTimeout; - TDuration AddressCacheExpirationTimeout; - TDuration TxTimeout; - TDuration PingTimeout; - TDuration PingInterval; - - bool UseAsyncTxPinger; - int AsyncHttpClientThreads; - int AsyncTxPingerPoolThreads; - - // How often should we poll for lock state - TDuration WaitLockPollInterval; - - TDuration RetryInterval; - TDuration ChunkErrorsRetryInterval; - - TDuration RateLimitExceededRetryInterval; - TDuration StartOperationRetryInterval; - - int RetryCount; - int ReadRetryCount; - int StartOperationRetryCount; - - /// @brief Period for checking status of running operation. - TDuration OperationTrackerPollPeriod = TDuration::Seconds(5); - - TString RemoteTempFilesDirectory; - TString RemoteTempTablesDirectory; - - // - // Infer schemas for nonexstent tables from typed rows (e.g. protobuf) - // when writing from operation or client writer. - // This options can be overriden in TOperationOptions and TTableWriterOptions. - bool InferTableSchema; - - bool UseClientProtobuf; - ENodeReaderFormat NodeReaderFormat; - bool ProtobufFormatWithDescriptors; - - int ConnectionPoolSize; - - /// Defines replication factor that is used for files that are uploaded to YT - /// to use them in operations. - int FileCacheReplicationFactor = 10; - - /// @brief Used when waiting for other process which uploads the same file to the file cache. - /// - /// If CacheUploadDeduplicationMode is not Disabled, current process can wait for some other - /// process which is uploading the same file. This value is proportional to the timeout of waiting, - /// actual timeout computes as follows: fileSizeGb * CacheLockTimeoutPerGb. - /// Default timeout assumes that host has uploading speed equal to 20 Mb/s. - /// If timeout was reached, the file will be uploaded by current process without any other waits. - TDuration CacheLockTimeoutPerGb; - - /// @brief Used to prevent concurrent uploading of the same file to the file cache. - /// NB: Each mode affects only users with the same mode enabled. - EUploadDeduplicationMode CacheUploadDeduplicationMode; - - bool MountSandboxInTmpfs; - - /// @brief Set upload options (e.g.) for files created by library. - /// - /// Path itself is always ignored but path options (e.g. `BypassArtifactCache`) are used when uploading system files: - /// cppbinary, job state, etc - TRichYPath ApiFilePathOptions; - - // Testing options, should never be used in user programs. - bool UseAbortableResponse = false; - bool EnableDebugMetrics = false; - - // - // There is optimization used with local YT that enables to skip binary upload and use real binary path. - // When EnableLocalModeOptimization is set to false this optimization is completely disabled. - bool EnableLocalModeOptimization = true; - - // - // If you want see stderr even if you jobs not failed set this true. - bool WriteStderrSuccessfulJobs = false; - - // - // This configuration is useful for debug. - // If set to ETraceHttpRequestsMode::Error library will dump all http error requests. - // If set to ETraceHttpRequestsMode::All library will dump all http requests. - // All tracing occurres as DEBUG level logging. - ETraceHttpRequestsMode TraceHttpRequestsMode = ETraceHttpRequestsMode::Never; - - TString SkynetApiHost; - - // Sets SO_PRIORITY option on the socket - TMaybe<int> SocketPriority; - - // Framing settings - // (cf. https://yt.yandex-team.ru/docs/description/proxy/http_proxy_reference#framing). - THashSet<TString> CommandsWithFraming; - - static bool GetBool(const char* var, bool defaultValue = false); - static int GetInt(const char* var, int defaultValue); - static TDuration GetDuration(const char* var, TDuration defaultValue); - static EEncoding GetEncoding(const char* var); - static EUploadDeduplicationMode GetUploadingDeduplicationMode( - const char* var, - EUploadDeduplicationMode defaultValue); - - static void ValidateToken(const TString& token); - static TString LoadTokenFromFile(const TString& tokenPath); - - static TNode LoadJsonSpec(const TString& strSpec); - - static TRichYPath LoadApiFilePathOptions(const TString& ysonMap); - - void LoadToken(); - void LoadSpec(); - void LoadTimings(); - - void Reset(); - - TConfig(); - - static TConfigPtr Get(); -}; - -//////////////////////////////////////////////////////////////////////////////// - -struct TProcessState -{ - TString FqdnHostName; - TString UserName; - TVector<TString> CommandLine; - - // Command line with everything that looks like tokens censored. - TVector<TString> CensoredCommandLine; - int Pid; - TString ClientVersion; - - TProcessState(); - - void SetCommandLine(int argc, const char* argv[]); - - static TProcessState* Get(); -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/config_ut.cpp b/yt/cpp/mapreduce/interface/config_ut.cpp deleted file mode 100644 index e49ba02108..0000000000 --- a/yt/cpp/mapreduce/interface/config_ut.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include <library/cpp/testing/unittest/registar.h> - -#include <yt/cpp/mapreduce/interface/config.h> - -using namespace NYT; - -Y_UNIT_TEST_SUITE(ConfigSuite) -{ - Y_UNIT_TEST(TestReset) { - // very limited test, checks only one config field - - auto origConfig = *TConfig::Get(); - TConfig::Get()->Reset(); - UNIT_ASSERT_VALUES_EQUAL(origConfig.Hosts, TConfig::Get()->Hosts); - - TConfig::Get()->Hosts = "hosts/fb867"; - TConfig::Get()->Reset(); - UNIT_ASSERT_VALUES_EQUAL(origConfig.Hosts, TConfig::Get()->Hosts); - } -} diff --git a/yt/cpp/mapreduce/interface/constants.h b/yt/cpp/mapreduce/interface/constants.h deleted file mode 100644 index 4f70410814..0000000000 --- a/yt/cpp/mapreduce/interface/constants.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - - -#include <util/system/defaults.h> - - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - - -// Maximum number of input tables for operation. -// If greater number of input tables are provided behaviour is undefined -// (it might work ok or it might fail or it might work very slowly). -constexpr size_t MaxInputTableCount = 1000; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/cypress.cpp b/yt/cpp/mapreduce/interface/cypress.cpp deleted file mode 100644 index 53686effd2..0000000000 --- a/yt/cpp/mapreduce/interface/cypress.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include "cypress.h" - -#include "config.h" - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -void ICypressClient::Concatenate( - const TVector<TYPath>& sourcePaths, - const TYPath& destinationPath, - const TConcatenateOptions& options) -{ - TVector<TRichYPath> richSourcePaths; - richSourcePaths.reserve(sourcePaths.size()); - for (const auto& path : sourcePaths) { - richSourcePaths.emplace_back(path); - } - Concatenate(richSourcePaths, destinationPath, options); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/cypress.h b/yt/cpp/mapreduce/interface/cypress.h deleted file mode 100644 index e05316ebc6..0000000000 --- a/yt/cpp/mapreduce/interface/cypress.h +++ /dev/null @@ -1,252 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/cypress.h -/// -/// Header containing interface to execute [Cypress](https://yt.yandex-team.ru/docs/description/common/cypress.html)-related commands. - -#include "fwd.h" - -#include "client_method_options.h" -#include "common.h" -#include "node.h" - -#include <util/generic/maybe.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// Client interface to execute [Cypress](https://yt.yandex-team.ru/docs/description/common/cypress.html)-related commands. -class ICypressClient -{ -public: - virtual ~ICypressClient() = default; - - /// - /// @brief Create Cypress node of given type. - /// - /// @param path Path in Cypress to the new object. - /// @param type New node type. - /// @param options Optional parameters. - /// - /// @return Id of the created node. - /// - /// @note All but the last components must exist unless @ref NYT::TCreateOptions::Recursive is `true`. - /// - /// @note The node itself must not exist unless @ref NYT::TCreateOptions::IgnoreExisting or @ref NYT::TCreateOptions::Force are `true`. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#create) - virtual TNodeId Create( - const TYPath& path, - ENodeType type, - const TCreateOptions& options = TCreateOptions()) = 0; - - /// - /// @brief Create table with schema inferred from the template argument. - /// - /// @tparam TRowType type of C++ representation of the row to be stored in the table. - /// @param path Path in Cypress to the new table. - /// @param sortColumns List of columns to mark as sorted in schema. - /// @param options Optional parameters. - /// - /// @return Id of the created node. - /// - /// @note If "schema" is passed in `options.Attributes` it has priority over the deduced schema (the latter is ignored). - template <typename TRowType> - TNodeId CreateTable( - const TYPath& path, - const TSortColumns& sortColumns = TSortColumns(), - const TCreateOptions& options = TCreateOptions()); - - /// - /// @brief Remove Cypress node. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#remove) - virtual void Remove( - const TYPath& path, - const TRemoveOptions& options = TRemoveOptions()) = 0; - - /// - /// @brief Check if Cypress node exists. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#exists) - virtual bool Exists( - const TYPath& path, - const TExistsOptions& options = TExistsOptions()) = 0; - - /// - /// @brief Get Cypress node contents. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get) - virtual TNode Get( - const TYPath& path, - const TGetOptions& options = TGetOptions()) = 0; - - /// - /// @brief Set Cypress node contents. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#set) - virtual void Set( - const TYPath& path, - const TNode& value, - const TSetOptions& options = TSetOptions()) = 0; - - /// - /// @brief Set multiple attributes for cypress path. - /// - /// @param path Path to root of the attributes to be set e.g. "//path/to/table/@"; - /// it is important to make sure that path ends with "/@". - /// @param attributes Map with attributes - /// @param options Optional parameters. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#multiset_attributes) - virtual void MultisetAttributes( - const TYPath& path, - const TNode::TMapType& attributes, - const TMultisetAttributesOptions& options = TMultisetAttributesOptions()) = 0; - - /// - /// @brief List Cypress map or attribute node keys. - /// - /// @param path Path in the tree to the node in question. - /// @param options Optional parameters. - /// - /// @return List of keys with attributes (if they were required in @ref NYT::TListOptions::AttributeFilter). - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#list) - virtual TNode::TListType List( - const TYPath& path, - const TListOptions& options = TListOptions()) = 0; - - /// - /// @brief Copy Cypress node. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#copy) - virtual TNodeId Copy( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TCopyOptions& options = TCopyOptions()) = 0; - - /// - /// @brief Move Cypress node (equivalent to copy-then-remove). - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#move) - virtual TNodeId Move( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TMoveOptions& options = TMoveOptions()) = 0; - - /// - /// @brief Create link to Cypress node. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#link) - virtual TNodeId Link( - const TYPath& targetPath, - const TYPath& linkPath, - const TLinkOptions& options = TLinkOptions()) = 0; - - /// - /// @brief Concatenate several tables into one. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#concatenate) - virtual void Concatenate( - const TVector<TRichYPath>& sourcePaths, - const TRichYPath& destinationPath, - const TConcatenateOptions& options = TConcatenateOptions()) = 0; - - /// - /// @brief Concatenate several tables into one. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#concatenate) - virtual void Concatenate( - const TVector<TYPath>& sourcePaths, - const TYPath& destinationPath, - const TConcatenateOptions& options = TConcatenateOptions()); - - /// - /// @brief Canonize YPath, moving all the complex YPath features to attributes. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#parse-ypath) - virtual TRichYPath CanonizeYPath(const TRichYPath& path) = 0; - - /// - /// @brief Get statistics for given sets of columns in given table ranges. - /// - /// @note Paths must contain column selectors. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-table-columnar-statistics) - virtual TVector<TTableColumnarStatistics> GetTableColumnarStatistics( - const TVector<TRichYPath>& paths, - const TGetTableColumnarStatisticsOptions& options = {}) = 0; - - /// - /// @brief Divide input tables into disjoint partitions. - /// - /// Resulted partitions are vectors of rich YPaths. - /// Each partition can be given to a separate worker for further independent processing. - /// - virtual TMultiTablePartitions GetTablePartitions( - const TVector<TRichYPath>& paths, - const TGetTablePartitionsOptions& options) = 0; - - /// - /// @brief Get file from file cache. - /// - /// @param md5Signature MD5 digest of the file. - /// @param cachePath Path to the file cache. - /// @param options Optional parameters. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-file-from-cache) - virtual TMaybe<TYPath> GetFileFromCache( - const TString& md5Signature, - const TYPath& cachePath, - const TGetFileFromCacheOptions& options = TGetFileFromCacheOptions()) = 0; - - /// - /// @brief Put file to file cache. - /// - /// @param filePath Path in Cypress to the file to cache. - /// @param md5Signature Expected MD5 digest of the file. - /// @param cachePath Path to the file cache. - /// @param options Optional parameters. - /// - /// @note The file in `filePath` must have been written with @ref NYT::TFileWriterOptions::ComputeMD5 set to `true`. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#put-file-to-cache) - virtual TYPath PutFileToCache( - const TYPath& filePath, - const TString& md5Signature, - const TYPath& cachePath, - const TPutFileToCacheOptions& options = TPutFileToCacheOptions()) = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TRowType> -TNodeId ICypressClient::CreateTable( - const TYPath& path, - const TSortColumns& sortColumns, - const TCreateOptions& options) -{ - static_assert( - std::is_base_of_v<::google::protobuf::Message, TRowType>, - "TRowType must be inherited from google::protobuf::Message"); - - TCreateOptions actualOptions = options; - if (!actualOptions.Attributes_) { - actualOptions.Attributes_ = TNode::CreateMap(); - } - - if (!actualOptions.Attributes_->HasKey("schema")) { - actualOptions.Attributes_->AsMap().emplace( - "schema", - CreateTableSchema<TRowType>(sortColumns).ToNode()); - } - - return Create(path, ENodeType::NT_TABLE, actualOptions); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/error_codes.h b/yt/cpp/mapreduce/interface/error_codes.h deleted file mode 100644 index d8d76e04fd..0000000000 --- a/yt/cpp/mapreduce/interface/error_codes.h +++ /dev/null @@ -1,468 +0,0 @@ -#pragma once - -// -// generated by generate-error-codes.py -// - -namespace NYT { -namespace NClusterErrorCodes { - - - -// from ./core/misc/public.h - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int OK = 0; - constexpr int Generic = 1; - constexpr int Canceled = 2; - constexpr int Timeout = 3; - -//////////////////////////////////////////////////////////////////////////////// - - - - -// from ./core/rpc/public.h -namespace NRpc { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int TransportError = 100; - constexpr int ProtocolError = 101; - constexpr int NoSuchService = 102; - constexpr int NoSuchMethod = 103; - constexpr int Unavailable = 105; - constexpr int PoisonPill = 106; - constexpr int RequestQueueSizeLimitExceeded = 108; - constexpr int AuthenticationError = 109; - constexpr int InvalidCsrfToken = 110; - constexpr int InvalidCredentials = 111; - constexpr int StreamingNotSupported = 112; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NRpc - - - -// from ./core/bus/public.h -namespace NBus { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int TransportError = 100; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NBus - - - -// from ./client/scheduler/public.h -namespace NScheduler { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int NoSuchOperation = 200; - constexpr int InvalidOperationState = 201; - constexpr int TooManyOperations = 202; - constexpr int NoSuchJob = 203; - constexpr int OperationFailedOnJobRestart = 210; - constexpr int OperationFailedWithInconsistentLocking = 211; - constexpr int OperationControllerCrashed = 212; - constexpr int TestingError = 213; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NScheduler - - - -// from ./client/table_client/public.h -namespace NTableClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int SortOrderViolation = 301; - constexpr int InvalidDoubleValue = 302; - constexpr int IncomparableType = 303; - constexpr int UnhashableType = 304; - // E.g. name table with more than #MaxColumnId columns (may come from legacy chunks). - constexpr int CorruptedNameTable = 305; - constexpr int UniqueKeyViolation = 306; - constexpr int SchemaViolation = 307; - constexpr int RowWeightLimitExceeded = 308; - constexpr int InvalidColumnFilter = 309; - constexpr int InvalidColumnRenaming = 310; - constexpr int IncompatibleKeyColumns = 311; - constexpr int ReaderDeadlineExpired = 312; - constexpr int TimestampOutOfRange = 313; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NTableClient - - - -// from ./client/cypress_client/public.h -namespace NCypressClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int SameTransactionLockConflict = 400; - constexpr int DescendantTransactionLockConflict = 401; - constexpr int ConcurrentTransactionLockConflict = 402; - constexpr int PendingLockConflict = 403; - constexpr int LockDestroyed = 404; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NCypressClient - - - -// from ./core/ytree/public.h -namespace NYTree { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int ResolveError = 500; - constexpr int AlreadyExists = 501; - constexpr int MaxChildCountViolation = 502; - constexpr int MaxStringLengthViolation = 503; - constexpr int MaxAttributeSizeViolation = 504; - constexpr int MaxKeyLengthViolation = 505; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYTree - - - -// from ./client/hydra/public.h -namespace NHydra { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int NoSuchSnapshot = 600; - constexpr int NoSuchChangelog = 601; - constexpr int InvalidEpoch = 602; - constexpr int InvalidVersion = 603; - constexpr int OutOfOrderMutations = 609; - constexpr int InvalidSnapshotVersion = 610; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NHydra - - - -// from ./client/chunk_client/public.h -namespace NChunkClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int AllTargetNodesFailed = 700; - constexpr int SendBlocksFailed = 701; - constexpr int NoSuchSession = 702; - constexpr int SessionAlreadyExists = 703; - constexpr int ChunkAlreadyExists = 704; - constexpr int WindowError = 705; - constexpr int BlockContentMismatch = 706; - constexpr int NoSuchBlock = 707; - constexpr int NoSuchChunk = 708; - constexpr int NoLocationAvailable = 710; - constexpr int IOError = 711; - constexpr int MasterCommunicationFailed = 712; - constexpr int NoSuchChunkTree = 713; - constexpr int MasterNotConnected = 714; - constexpr int ChunkUnavailable = 716; - constexpr int NoSuchChunkList = 717; - constexpr int WriteThrottlingActive = 718; - constexpr int NoSuchMedium = 719; - constexpr int OptimisticLockFailure = 720; - constexpr int InvalidBlockChecksum = 721; - constexpr int BlockOutOfRange = 722; - constexpr int ObjectNotReplicated = 723; - constexpr int MissingExtension = 724; - constexpr int BandwidthThrottlingFailed = 725; - constexpr int ReaderTimeout = 726; - constexpr int NoSuchChunkView = 727; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NChunkClient - - - -// from ./client/election/public.h -namespace NElection { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int InvalidState = 800; - constexpr int InvalidLeader = 801; - constexpr int InvalidEpoch = 802; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NElection - - - -// from ./client/security_client/public.h -namespace NSecurityClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int AuthenticationError = 900; - constexpr int AuthorizationError = 901; - constexpr int AccountLimitExceeded = 902; - constexpr int UserBanned = 903; - constexpr int RequestQueueSizeLimitExceeded = 904; - constexpr int NoSuchAccount = 905; - constexpr int SafeModeEnabled = 906; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NSecurityClient - - - -// from ./client/object_client/public.h -namespace NObjectClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int PrerequisiteCheckFailed = 1000; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NObjectClient - - - -// from ./server/lib/exec_agent/public.h -namespace NExecAgent { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int ConfigCreationFailed = 1100; - constexpr int AbortByScheduler = 1101; - constexpr int ResourceOverdraft = 1102; - constexpr int WaitingJobTimeout = 1103; - constexpr int SlotNotFound = 1104; - constexpr int JobEnvironmentDisabled = 1105; - constexpr int JobProxyConnectionFailed = 1106; - constexpr int ArtifactCopyingFailed = 1107; - constexpr int NodeDirectoryPreparationFailed = 1108; - constexpr int SlotLocationDisabled = 1109; - constexpr int QuotaSettingFailed = 1110; - constexpr int RootVolumePreparationFailed = 1111; - constexpr int NotEnoughDiskSpace = 1112; - constexpr int ArtifactDownloadFailed = 1113; - constexpr int JobProxyPreparationTimeout = 1114; - constexpr int JobPreparationTimeout = 1115; - constexpr int JobProxyFailed = 1120; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NExecAgent - - - -// from ./ytlib/job_proxy/public.h -namespace NJobProxy { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int MemoryLimitExceeded = 1200; - constexpr int MemoryCheckFailed = 1201; - constexpr int JobTimeLimitExceeded = 1202; - constexpr int UnsupportedJobType = 1203; - constexpr int JobNotPrepared = 1204; - constexpr int UserJobFailed = 1205; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NJobProxy - - - -// from ./server/node/data_node/public.h -namespace NDataNode { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int LocalChunkReaderFailed = 1300; - constexpr int LayerUnpackingFailed = 1301; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDataNode - - - -// from ./core/net/public.h -namespace NNet { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int Aborted = 1500; - constexpr int ResolveTimedOut = 1501; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NNet - - - -// from ./client/node_tracker_client/public.h -namespace NNodeTrackerClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int NoSuchNode = 1600; - constexpr int InvalidState = 1601; - constexpr int NoSuchNetwork = 1602; - constexpr int NoSuchRack = 1603; - constexpr int NoSuchDataCenter = 1604; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NNodeTrackerClient - - - -// from ./client/tablet_client/public.h -namespace NTabletClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int TransactionLockConflict = 1700; - constexpr int NoSuchTablet = 1701; - constexpr int TabletNotMounted = 1702; - constexpr int AllWritesDisabled = 1703; - constexpr int InvalidMountRevision = 1704; - constexpr int TableReplicaAlreadyExists = 1705; - constexpr int InvalidTabletState = 1706; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NTabletClient - - - -// from ./server/lib/shell/public.h -namespace NShell { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int ShellExited = 1800; - constexpr int ShellManagerShutDown = 1801; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NShell - - - -// from ./client/api/public.h -namespace NApi { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int TooManyConcurrentRequests = 1900; - constexpr int JobArchiveUnavailable = 1910; - constexpr int RetriableArchiveError = 1911; - constexpr int NoSuchOperation = 1915; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NApi - - - -// from ./server/controller_agent/chunk_pools/public.h -namespace NChunkPools { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int DataSliceLimitExceeded = 2000; - constexpr int MaxDataWeightPerJobExceeded = 2001; - constexpr int MaxPrimaryDataWeightPerJobExceeded = 2002; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NChunkPools - - - -// from ./client/api/rpc_proxy/public.h -namespace NApi { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int ProxyBanned = 2100; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NApi - - - -// from ./ytlib/controller_agent/public.h -namespace NControllerAgent { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int AgentCallFailed = 4400; - constexpr int NoOnlineNodeToScheduleJob = 4410; - constexpr int MaterializationFailed = 4415; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NControllerAgent - - - -// from ./client/transaction_client/public.h -namespace NTransactionClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int NoSuchTransaction = 11000; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NTransactionClient - - - -// from ./server/lib/containers/public.h -namespace NContainers { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int FailedToStartContainer = 13000; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NContainers - - - -// from ./ytlib/job_prober_client/public.h -namespace NJobProberClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int JobIsNotRunning = 17000; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NJobProberClient - -} // namespace NClusterErrorCodes -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/error_ut.cpp b/yt/cpp/mapreduce/interface/error_ut.cpp deleted file mode 100644 index 03f2751b23..0000000000 --- a/yt/cpp/mapreduce/interface/error_ut.cpp +++ /dev/null @@ -1,81 +0,0 @@ -#include <library/cpp/testing/unittest/registar.h> - -#include <library/cpp/json/json_reader.h> - -#include <yt/cpp/mapreduce/interface/errors.h> -#include <yt/cpp/mapreduce/common/helpers.h> - -using namespace NYT; - -template<> -void Out<NYT::TNode>(IOutputStream& s, const NYT::TNode& node) -{ - s << "TNode:" << NodeToYsonString(node); -} - -Y_UNIT_TEST_SUITE(ErrorSuite) -{ - Y_UNIT_TEST(TestParseJson) - { - // Scary real world error! Бу! - const char* jsonText = - R"""({)""" - R"""("code":500,)""" - R"""("message":"Error resolving path //home/user/link",)""" - R"""("attributes":{)""" - R"""("fid":18446484571700269066,)""" - R"""("method":"Create",)""" - R"""("tid":17558639495721339338,)""" - R"""("datetime":"2017-04-07T13:38:56.474819Z",)""" - R"""("pid":414529,)""" - R"""("host":"build01-01g.yt.yandex.net"},)""" - R"""("inner_errors":[{)""" - R"""("code":1,)""" - R"""("message":"Node //tt cannot have children",)""" - R"""("attributes":{)""" - R"""("fid":18446484571700269066,)""" - R"""("tid":17558639495721339338,)""" - R"""("datetime":"2017-04-07T13:38:56.474725Z",)""" - R"""("pid":414529,)""" - R"""("host":"build01-01g.yt.yandex.net"},)""" - R"""("inner_errors":[]}]})"""; - - NJson::TJsonValue jsonValue; - ReadJsonFastTree(jsonText, &jsonValue, /*throwOnError=*/ true); - - TYtError error(jsonValue); - UNIT_ASSERT_VALUES_EQUAL(error.GetCode(), 500); - UNIT_ASSERT_VALUES_EQUAL(error.GetMessage(), R"""(Error resolving path //home/user/link)"""); - UNIT_ASSERT_VALUES_EQUAL(error.InnerErrors().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(error.InnerErrors()[0].GetCode(), 1); - - UNIT_ASSERT_VALUES_EQUAL(error.HasAttributes(), true); - UNIT_ASSERT_VALUES_EQUAL(error.GetAttributes().at("method"), TNode("Create")); - - UNIT_ASSERT_VALUES_EQUAL(error.GetAllErrorCodes(), TSet<int>({500, 1})); - } - - Y_UNIT_TEST(TestGetYsonText) { - const char* jsonText = - R"""({)""" - R"""("code":500,)""" - R"""("message":"outer error",)""" - R"""("attributes":{)""" - R"""("method":"Create",)""" - R"""("pid":414529},)""" - R"""("inner_errors":[{)""" - R"""("code":1,)""" - R"""("message":"inner error",)""" - R"""("attributes":{},)""" - R"""("inner_errors":[])""" - R"""(}]})"""; - TYtError error; - error.ParseFrom(jsonText); - TString ysonText = error.GetYsonText(); - TYtError error2(NodeFromYsonString(ysonText)); - UNIT_ASSERT_EQUAL( - ysonText, - R"""({"code"=500;"message"="outer error";"attributes"={"method"="Create";"pid"=414529};"inner_errors"=[{"code"=1;"message"="inner error"}]})"""); - UNIT_ASSERT_EQUAL(error2.GetYsonText(), ysonText); - } -} diff --git a/yt/cpp/mapreduce/interface/errors.cpp b/yt/cpp/mapreduce/interface/errors.cpp deleted file mode 100644 index 49a7c7cfc1..0000000000 --- a/yt/cpp/mapreduce/interface/errors.cpp +++ /dev/null @@ -1,437 +0,0 @@ -#include "errors.h" - -#include <library/cpp/yson/node/node_io.h> -#include <library/cpp/yson/node/node_visitor.h> - -#include <yt/cpp/mapreduce/interface/error_codes.h> - -#include <library/cpp/json/json_reader.h> -#include <library/cpp/yson/writer.h> - -#include <util/string/builder.h> -#include <util/stream/str.h> -#include <util/generic/set.h> - -namespace NYT { - -using namespace NJson; - -//////////////////////////////////////////////////////////////////// - -static void WriteErrorDescription(const TYtError& error, IOutputStream* out) -{ - (*out) << '\'' << error.GetMessage() << '\''; - const auto& innerErrorList = error.InnerErrors(); - if (!innerErrorList.empty()) { - (*out) << " { "; - bool first = true; - for (const auto& innerError : innerErrorList) { - if (first) { - first = false; - } else { - (*out) << " ; "; - } - WriteErrorDescription(innerError, out); - } - (*out) << " }"; - } -} - -static void SerializeError(const TYtError& error, NYson::IYsonConsumer* consumer) -{ - consumer->OnBeginMap(); - { - consumer->OnKeyedItem("code"); - consumer->OnInt64Scalar(error.GetCode()); - - consumer->OnKeyedItem("message"); - consumer->OnStringScalar(error.GetMessage()); - - if (!error.GetAttributes().empty()) { - consumer->OnKeyedItem("attributes"); - consumer->OnBeginMap(); - { - for (const auto& item : error.GetAttributes()) { - consumer->OnKeyedItem(item.first); - TNodeVisitor(consumer).Visit(item.second); - } - } - consumer->OnEndMap(); - } - - if (!error.InnerErrors().empty()) { - consumer->OnKeyedItem("inner_errors"); - { - consumer->OnBeginList(); - for (const auto& innerError : error.InnerErrors()) { - SerializeError(innerError, consumer); - } - consumer->OnEndList(); - } - } - } - consumer->OnEndMap(); -} - -static TString DumpJobInfoForException(const TOperationId& operationId, const TVector<TFailedJobInfo>& failedJobInfoList) -{ - ::TStringBuilder output; - // Exceptions have limit to contain 65508 bytes of text, so we also limit stderr text - constexpr size_t MAX_SIZE = 65508 / 2; - - size_t written = 0; - for (const auto& failedJobInfo : failedJobInfoList) { - if (written >= MAX_SIZE) { - break; - } - TStringStream nextChunk; - nextChunk << '\n'; - nextChunk << "OperationId: " << GetGuidAsString(operationId) << " JobId: " << GetGuidAsString(failedJobInfo.JobId) << '\n'; - nextChunk << "Error: " << failedJobInfo.Error.FullDescription() << '\n'; - if (!failedJobInfo.Stderr.empty()) { - nextChunk << "Stderr: " << Endl; - size_t tmpWritten = written + nextChunk.Str().size(); - if (tmpWritten >= MAX_SIZE) { - break; - } - - if (tmpWritten + failedJobInfo.Stderr.size() > MAX_SIZE) { - nextChunk << failedJobInfo.Stderr.substr(failedJobInfo.Stderr.size() - (MAX_SIZE - tmpWritten)); - } else { - nextChunk << failedJobInfo.Stderr; - } - } - written += nextChunk.Str().size(); - output << nextChunk.Str(); - } - return output; -} - -//////////////////////////////////////////////////////////////////// - -TYtError::TYtError() - : Code_(0) -{ } - -TYtError::TYtError(const TString& message) - : Code_(NYT::NClusterErrorCodes::Generic) - , Message_(message) -{ } - -TYtError::TYtError(int code, const TString& message) - : Code_(code) - , Message_(message) -{ } - -TYtError::TYtError(const TJsonValue& value) -{ - const TJsonValue::TMapType& map = value.GetMap(); - TJsonValue::TMapType::const_iterator it = map.find("message"); - if (it != map.end()) { - Message_ = it->second.GetString(); - } - - it = map.find("code"); - if (it != map.end()) { - Code_ = static_cast<int>(it->second.GetInteger()); - } else { - Code_ = NYT::NClusterErrorCodes::Generic; - } - - it = map.find("inner_errors"); - if (it != map.end()) { - const TJsonValue::TArray& innerErrors = it->second.GetArray(); - for (const auto& innerError : innerErrors) { - InnerErrors_.push_back(TYtError(innerError)); - } - } - - it = map.find("attributes"); - if (it != map.end()) { - auto attributes = NYT::NodeFromJsonValue(it->second); - if (attributes.IsMap()) { - Attributes_ = std::move(attributes.AsMap()); - } - } -} - -TYtError::TYtError(const TNode& node) -{ - const auto& map = node.AsMap(); - auto it = map.find("message"); - if (it != map.end()) { - Message_ = it->second.AsString(); - } - - it = map.find("code"); - if (it != map.end()) { - Code_ = static_cast<int>(it->second.AsInt64()); - } else { - Code_ = NYT::NClusterErrorCodes::Generic; - } - - it = map.find("inner_errors"); - if (it != map.end()) { - const auto& innerErrors = it->second.AsList(); - for (const auto& innerError : innerErrors) { - InnerErrors_.push_back(TYtError(innerError)); - } - } - - it = map.find("attributes"); - if (it != map.end()) { - auto& attributes = it->second; - if (attributes.IsMap()) { - Attributes_ = std::move(attributes.AsMap()); - } - } -} - -int TYtError::GetCode() const -{ - return Code_; -} - -const TString& TYtError::GetMessage() const -{ - return Message_; -} - -const TVector<TYtError>& TYtError::InnerErrors() const -{ - return InnerErrors_; -} - -void TYtError::ParseFrom(const TString& jsonError) -{ - TJsonValue value; - TStringInput input(jsonError); - ReadJsonTree(&input, &value); - *this = TYtError(value); -} - -TSet<int> TYtError::GetAllErrorCodes() const -{ - TDeque<const TYtError*> queue = {this}; - TSet<int> result; - while (!queue.empty()) { - const auto* current = queue.front(); - queue.pop_front(); - result.insert(current->Code_); - for (const auto& error : current->InnerErrors_) { - queue.push_back(&error); - } - } - return result; -} - -bool TYtError::ContainsErrorCode(int code) const -{ - if (Code_ == code) { - return true; - } - for (const auto& error : InnerErrors_) { - if (error.ContainsErrorCode(code)) { - return true; - } - } - return false; -} - - -bool TYtError::ContainsText(const TStringBuf& text) const -{ - if (Message_.Contains(text)) { - return true; - } - for (const auto& error : InnerErrors_) { - if (error.ContainsText(text)) { - return true; - } - } - return false; -} - -bool TYtError::HasAttributes() const -{ - return !Attributes_.empty(); -} - -const TNode::TMapType& TYtError::GetAttributes() const -{ - return Attributes_; -} - -TString TYtError::GetYsonText() const -{ - TStringStream out; - ::NYson::TYsonWriter writer(&out, NYson::EYsonFormat::Text); - SerializeError(*this, &writer); - return std::move(out.Str()); -} - -TString TYtError::ShortDescription() const -{ - TStringStream out; - WriteErrorDescription(*this, &out); - return std::move(out.Str()); -} - -TString TYtError::FullDescription() const -{ - TStringStream s; - WriteErrorDescription(*this, &s); - s << "; full error: " << GetYsonText(); - return s.Str(); -} - -//////////////////////////////////////////////////////////////////////////////// - -TErrorResponse::TErrorResponse(int httpCode, const TString& requestId) - : HttpCode_(httpCode) - , RequestId_(requestId) -{ } - -bool TErrorResponse::IsOk() const -{ - return Error_.GetCode() == 0; -} - -void TErrorResponse::SetRawError(const TString& message) -{ - Error_ = TYtError(message); - Setup(); -} - -void TErrorResponse::SetError(TYtError error) -{ - Error_ = std::move(error); - Setup(); -} - -void TErrorResponse::ParseFromJsonError(const TString& jsonError) -{ - Error_.ParseFrom(jsonError); - Setup(); -} - -void TErrorResponse::SetIsFromTrailers(bool isFromTrailers) -{ - IsFromTrailers_ = isFromTrailers; -} - -int TErrorResponse::GetHttpCode() const -{ - return HttpCode_; -} - -bool TErrorResponse::IsFromTrailers() const -{ - return IsFromTrailers_; -} - -bool TErrorResponse::IsTransportError() const -{ - return HttpCode_ == 503; -} - -TString TErrorResponse::GetRequestId() const -{ - return RequestId_; -} - -const TYtError& TErrorResponse::GetError() const -{ - return Error_; -} - -bool TErrorResponse::IsResolveError() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NYTree::ResolveError); -} - -bool TErrorResponse::IsAccessDenied() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NSecurityClient::AuthorizationError); -} - -bool TErrorResponse::IsConcurrentTransactionLockConflict() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NCypressClient::ConcurrentTransactionLockConflict); -} - -bool TErrorResponse::IsRequestRateLimitExceeded() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NSecurityClient::RequestQueueSizeLimitExceeded); -} - -bool TErrorResponse::IsRequestQueueSizeLimitExceeded() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NRpc::RequestQueueSizeLimitExceeded); -} - -bool TErrorResponse::IsChunkUnavailable() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NChunkClient::ChunkUnavailable); -} - -bool TErrorResponse::IsRequestTimedOut() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::Timeout); -} - -bool TErrorResponse::IsNoSuchTransaction() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NTransactionClient::NoSuchTransaction); -} - -bool TErrorResponse::IsConcurrentOperationsLimitReached() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NScheduler::TooManyOperations); -} - -void TErrorResponse::Setup() -{ - TStringStream s; - *this << Error_.FullDescription(); -} - -//////////////////////////////////////////////////////////////////// - -TOperationFailedError::TOperationFailedError( - EState state, - TOperationId id, - TYtError ytError, - TVector<TFailedJobInfo> failedJobInfo) - : State_(state) - , OperationId_(id) - , Error_(std::move(ytError)) - , FailedJobInfo_(std::move(failedJobInfo)) -{ - *this << Error_.FullDescription(); - if (!FailedJobInfo_.empty()) { - *this << DumpJobInfoForException(OperationId_, FailedJobInfo_); - } -} - -TOperationFailedError::EState TOperationFailedError::GetState() const -{ - return State_; -} - -TOperationId TOperationFailedError::GetOperationId() const -{ - return OperationId_; -} - -const TYtError& TOperationFailedError::GetError() const -{ - return Error_; -} - -const TVector<TFailedJobInfo>& TOperationFailedError::GetFailedJobInfo() const -{ - return FailedJobInfo_; -} - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/errors.h b/yt/cpp/mapreduce/interface/errors.h deleted file mode 100644 index afad58ed72..0000000000 --- a/yt/cpp/mapreduce/interface/errors.h +++ /dev/null @@ -1,290 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/errors.h -/// -/// Errors and exceptions emitted by library. - -#include "fwd.h" -#include "common.h" - -#include <library/cpp/yson/node/node.h> - -#include <util/generic/bt_exception.h> -#include <util/generic/yexception.h> -#include <util/generic/string.h> -#include <util/generic/vector.h> - -namespace NJson { - class TJsonValue; -} // namespace NJson - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Error that is thrown when library detects invalid usage of API. -/// -/// For example trying to start operations on empty table list. -class TApiUsageError - : public TWithBackTrace<yexception> -{ }; - -/// -/// @brief Error that is thrown when request retries continues for too long. -/// -/// @see NYT::TRetryConfig -/// @see NYT::IRetryConfigProvider -class TRequestRetriesTimeout - : public yexception -{ }; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Error returned by YT cluster. -/// -/// An object of this class describe error that happened on YT server. -/// Internally each error is a tree. Each node of the tree contains: -/// - integer error code; -/// - text description of error; -/// - attributes describing error context. -/// -/// To get text description of an error one should use -/// @ref NYT::TYtError::ShortDescription or @ref NYT::TYtError::FullDescription -/// -/// To distinguish between error kinds @ref NYT::TYtError::ContainsErrorCode should be used. -/// -/// @see NYT::TErrorResponse -/// @see NYT::TOperationFailedError -class TYtError -{ -public: - /// Constructs error with NYT::NClusterErrorCodes::OK code and empty message. - TYtError(); - - /// Constructs error with NYT::NClusterErrorCodes::Generic code and given message. - explicit TYtError(const TString& message); - - /// Constructs error with given code and given message. - TYtError(int code, const TString& message); - - /// Construct error from json representation. - TYtError(const ::NJson::TJsonValue& value); - - /// Construct error from TNode representation. - TYtError(const TNode& value); - - /// - /// @brief Check if error or any of inner errors has given error code. - /// - /// Use this method to distinguish kind of error. - bool ContainsErrorCode(int code) const; - - /// - /// @brief Get short description of error. - /// - /// Short description contain text description of error and all inner errors. - /// It is human readable but misses some important information (error codes, error attributes). - /// - /// Usually it's better to use @ref NYT::TYtError::FullDescription to log errors. - TString ShortDescription() const; - - /// - /// @brief Get full description of error. - /// - /// Full description contains readable short description - /// followed by text yson representation of error that contains error codes and attributes. - TString FullDescription() const; - - /// - /// @brief Get error code of the topmost error. - /// - /// @warning Do not use this method to distinguish between error kinds - /// @ref NYT::TYtError::ContainsErrorCode should be used instead. - int GetCode() const; - - /// - /// @brief Get error text of the topmost error. - /// - /// @warning This method should not be used to log errors - /// since text description of inner errors is going to be lost. - /// @ref NYT::TYtError::FullDescription should be used instead. - const TString& GetMessage() const; - - /// - /// @brief Check if error or any of inner errors contains given text chunk. - /// - /// @warning @ref NYT::TYtError::ContainsErrorCode must be used instead of - /// this method when possible. If there is no suitable error code it's - /// better to ask yt@ to add one. This method should only be used as workaround. - bool ContainsText(const TStringBuf& text) const; - - /// @brief Get inner errors. - const TVector<TYtError>& InnerErrors() const; - - /// Parse error from json string. - void ParseFrom(const TString& jsonError); - - /// Collect error codes from entire error tree. - TSet<int> GetAllErrorCodes() const; - - /// Check if error has any attributes. - bool HasAttributes() const; - - /// Get error attributes. - const TNode::TMapType& GetAttributes() const; - - /// Get text yson representation of error - TString GetYsonText() const; - -private: - int Code_; - TString Message_; - TVector<TYtError> InnerErrors_; - TNode::TMapType Attributes_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Generic error response returned by server. -/// -/// TErrorResponse can be thrown from almost any client method when server responds with error. -/// -class TErrorResponse - : public yexception -{ -public: - TErrorResponse(int httpCode, const TString& requestId); - TErrorResponse(int httpCode, TYtError error); - - /// Get error object returned by server. - const TYtError& GetError() const; - - /// Get if (correlation-id) of request that was responded with error. - TString GetRequestId() const; - - /// Get HTTP code of response. - int GetHttpCode() const; - - /// Is error parsed from response trailers. - bool IsFromTrailers() const; - - /// Check if error was caused by transport problems inside YT cluster. - bool IsTransportError() const; - - /// Check if error was caused by failure to resolve cypress path. - bool IsResolveError() const; - - /// Check if error was caused by lack of permissions to execute request. - bool IsAccessDenied() const; - - /// Check if error was caused by failure to lock object because of another transaction is holding lock. - bool IsConcurrentTransactionLockConflict() const; - - /// Check if error was caused by request quota limit exceeding. - bool IsRequestRateLimitExceeded() const; - - // YT can't serve request because it is overloaded. - bool IsRequestQueueSizeLimitExceeded() const; - - /// Check if error was caused by failure to get chunk. Such errors are almost always temporary. - bool IsChunkUnavailable() const; - - /// Check if error was caused by internal YT timeout. - bool IsRequestTimedOut() const; - - /// Check if error was caused by trying to work with transaction that was finished or never existed. - bool IsNoSuchTransaction() const; - - // User reached their limit of concurrently running operations. - bool IsConcurrentOperationsLimitReached() const; - - /// @deprecated This method must not be used. - bool IsOk() const; - - void SetRawError(const TString& message); - void SetError(TYtError error); - void ParseFromJsonError(const TString& jsonError); - void SetIsFromTrailers(bool isFromTrailers); - -private: - void Setup(); - -private: - int HttpCode_; - TString RequestId_; - TYtError Error_; - bool IsFromTrailers_ = false; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// Info about failed jobs. -/// -/// @see NYT::TOperationFailedError -struct TFailedJobInfo -{ - /// Id of a job. - TJobId JobId; - - /// Error describing job failure. - TYtError Error; - - /// Stderr of job. - /// - /// @note YT doesn't store all job stderrs, check @ref NYT::IOperationClient::GetJobStderr - /// for list of limitations. - /// - /// @see NYT::IOperationClient::GetJobStderr - TString Stderr; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Error that is thrown when operation watched by library fails. -/// -/// This error is thrown from operation starting methods when they are started in sync mode (@ refNYT::TOperationOptions::Wait == true) -/// or from future returned by NYT::IOperation::Watch. -/// -/// @see NYT::IOperationClient -class TOperationFailedError - : public yexception -{ -public: - /// Final state of operation. - enum EState { - /// Operation was failed due to some error. - Failed, - /// Operation didn't experienced errors, but was aborted by user request or by YT. - Aborted, - }; - -public: - TOperationFailedError(EState state, TOperationId id, TYtError ytError, TVector<TFailedJobInfo> failedJobInfo); - - /// Get final state of operation. - EState GetState() const; - - /// Get operation id. - TOperationId GetOperationId() const; - - /// Return operation error. - const TYtError& GetError() const; - - /// Return info about failed jobs (if any). - const TVector<TFailedJobInfo>& GetFailedJobInfo() const; - -private: - EState State_; - TOperationId OperationId_; - TYtError Error_; - TVector<TFailedJobInfo> FailedJobInfo_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/finish_or_die.h b/yt/cpp/mapreduce/interface/finish_or_die.h deleted file mode 100644 index 9d7dcece02..0000000000 --- a/yt/cpp/mapreduce/interface/finish_or_die.h +++ /dev/null @@ -1,41 +0,0 @@ -#pragma once - -#include <util/system/yassert.h> - -#include <exception> - -/// @cond Doxygen_Suppress -namespace NYT::NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -void FinishOrDie(T* pThis, const char* className) noexcept -{ - auto fail = [&] (const char* what) { - Y_FAIL( - "\n\n" - "Destructor of %s caught exception during Finish: %s.\n" - "Some data is probably has not been written.\n" - "In order to handle such exceptions consider explicitly call Finish() method.\n", - className, - what); - }; - - try { - pThis->Finish(); - } catch (const std::exception& ex) { - if (!std::uncaught_exceptions()) { - fail(ex.what()); - } - } catch (...) { - if (!std::uncaught_exceptions()) { - fail("<unknown exception>"); - } - } -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail -/// @endcond diff --git a/yt/cpp/mapreduce/interface/fluent.h b/yt/cpp/mapreduce/interface/fluent.h deleted file mode 100644 index 8ca6e86336..0000000000 --- a/yt/cpp/mapreduce/interface/fluent.h +++ /dev/null @@ -1,678 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/fluent.h -/// -/// Adapters for working with @ref NYson::IYsonConsumer in a structured way, with compile-time syntax checks. -/// -/// The following documentation is copied verbatim from `yt/core/ytree/fluent.h`. -/// -/// WHAT IS THIS -/// -/// Fluent adapters encapsulate invocation of IYsonConsumer methods in a -/// convenient structured manner. Key advantage of fluent-like code is that -/// attempt of building syntactically incorrect YSON structure will result -/// in a compile-time error. -/// -/// Each fluent object is associated with a context that defines possible YSON -/// tokens that may appear next. For example, TFluentMap is a fluent object -/// that corresponds to a location within YSON map right before a key-value -/// pair or the end of the map. -/// -/// More precisely, each object that may be obtained by a sequence of fluent -/// method calls has the full history of its enclosing YSON composite types in -/// its single template argument hereinafter referred to as TParent. This allows -/// us not to forget the original context after opening and closing the embedded -/// composite structure. -/// -/// It is possible to invoke a separate YSON building procedure by calling -/// one of convenience Do* methods. There are two possibilities here: it is -/// possible to delegate invocation context either as a fluent object (like -/// TFluentMap, TFluentList, TFluentAttributes or TFluentAny) or as a raw -/// IYsonConsumer*. The latter is discouraged since it is impossible to check -/// if a given side-built YSON structure fits current fluent context. -/// For example it is possible to call Do() method inside YSON map passing -/// consumer to a procedure that will treat context like it is in a list. -/// Passing typed fluent builder saves you from such a misbehaviour. -/// -/// TFluentXxx corresponds to an internal class of TXxx -/// without any history hidden in template argument. It allows you to -/// write procedures of form: -/// -/// void BuildSomeAttributesInYson(TFluentMap fluent) { ... } -/// -/// without thinking about the exact way how this procedure is nested in other -/// procedures. -/// -/// An important notation: we will refer to a function whose first argument -/// is TFluentXxx as TFuncXxx. -/// -/// -/// BRIEF LIST OF AVAILABLE METHODS -/// -/// Only the most popular methods are covered here. Refer to the code for the -/// rest of them. -/// -/// TAny: -/// * Value(T value) -> TParent, serialize `value` using underlying consumer. -/// T should be such that free function Serialize(NYson::IYsonConsumer*, const T&) is -/// defined; -/// * BeginMap() -> TFluentMap, open map; -/// * BeginList() -> TFluentList, open list; -/// * BeginAttributes() -> TFluentAttributes, open attributes; -/// -/// * Do(TFuncAny func) -> TAny, delegate invocation to a separate procedure. -/// * DoIf(bool condition, TFuncAny func) -> TAny, same as Do() but invoke -/// `func` only if `condition` is true; -/// * DoFor(TCollection collection, TFuncAny func) -> TAny, same as Do() -/// but iterate over `collection` and pass each of its elements as a second -/// argument to `func`. Instead of passing a collection you may it is possible -/// to pass two iterators as an argument; -/// -/// * DoMap(TFuncMap func) -> TAny, open a map, delegate invocation to a separate -/// procedure and close map; -/// * DoMapFor(TCollection collection, TFuncMap func) -> TAny, open a map, iterate -/// over `collection` and pass each of its elements as a second argument to `func` -/// and close map; -/// * DoList(TFuncList func) -> TAny, same as DoMap(); -/// * DoListFor(TCollection collection, TFuncList func) -> TAny; same as DoMapFor(). -/// -/// -/// TFluentMap: -/// * Item(TStringBuf key) -> TAny, open an element keyed with `key`; -/// * EndMap() -> TParent, close map; -/// * Do(TFuncMap func) -> TFluentMap, same as Do() for TAny; -/// * DoIf(bool condition, TFuncMap func) -> TFluentMap, same as DoIf() for TAny; -/// * DoFor(TCollection collection, TFuncMap func) -> TFluentMap, same as DoFor() for TAny. -/// -/// -/// TFluentList: -/// * Item() -> TAny, open an new list element; -/// * EndList() -> TParent, close list; -/// * Do(TFuncList func) -> TFluentList, same as Do() for TAny; -/// * DoIf(bool condition, TFuncList func) -> TFluentList, same as DoIf() for TAny; -/// * DoFor(TCollection collection, TListMap func) -> TFluentList, same as DoFor() for TAny. -/// -/// -/// TFluentAttributes: -/// * Item(TStringBuf key) -> TAny, open an element keyed with `key`. -/// * EndAttributes() -> TParentWithoutAttributes, close attributes. Note that -/// this method leads to a context that is forces not to have attributes, -/// preventing us from putting attributes twice before an object. -/// * Do(TFuncAttributes func) -> TFluentAttributes, same as Do() for TAny; -/// * DoIf(bool condition, TFuncAttributes func) -> TFluentAttributes, same as DoIf() -/// for TAny; -/// * DoFor(TCollection collection, TListAttributes func) -> TFluentAttributes, same as DoFor() -/// for TAny. -/// - - -#include "common.h" -#include "serialize.h" - -#include <library/cpp/yson/node/serialize.h> -#include <library/cpp/yson/node/node_builder.h> - -#include <library/cpp/yson/consumer.h> -#include <library/cpp/yson/writer.h> - -#include <util/generic/noncopyable.h> -#include <util/generic/ptr.h> -#include <util/stream/str.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -struct TFluentYsonUnwrapper -{ - using TUnwrapped = T; - - static TUnwrapped Unwrap(T t) - { - return std::move(t); - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -struct TFluentYsonVoid -{ }; - -template <> -struct TFluentYsonUnwrapper<TFluentYsonVoid> -{ - using TUnwrapped = void; - - static TUnwrapped Unwrap(TFluentYsonVoid) - { } -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// This class is actually a namespace for specific fluent adapter classes. -class TFluentYsonBuilder - : private TNonCopyable -{ -private: - template <class T> - static void WriteValue(NYT::NYson::IYsonConsumer* consumer, const T& value) - { - Serialize(value, consumer); - } - -public: - class TFluentAny; - template <class TParent> class TAny; - template <class TParent> class TToAttributes; - template <class TParent> class TAttributes; - template <class TParent> class TListType; - template <class TParent> class TMapType; - - /// Base class for all fluent adapters. - template <class TParent> - class TFluentBase - { - public: - /// Implicit conversion to yson consumer - operator NYT::NYson::IYsonConsumer* () const - { - return Consumer; - } - - protected: - /// @cond Doxygen_Suppress - NYT::NYson::IYsonConsumer* Consumer; - TParent Parent; - - TFluentBase(NYT::NYson::IYsonConsumer* consumer, TParent parent) - : Consumer(consumer) - , Parent(std::move(parent)) - { } - - using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped; - - TUnwrappedParent GetUnwrappedParent() - { - return TFluentYsonUnwrapper<TParent>::Unwrap(std::move(Parent)); - } - /// @endcond Doxygen_Suppress - }; - - /// Base class for fluent adapters for fragment of list, map or attributes. - template <template <class TParent> class TThis, class TParent> - class TFluentFragmentBase - : public TFluentBase<TParent> - { - public: - using TDeepThis = TThis<TParent>; - using TShallowThis = TThis<TFluentYsonVoid>; - using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped; - - explicit TFluentFragmentBase(NYT::NYson::IYsonConsumer* consumer, TParent parent = TParent()) - : TFluentBase<TParent>(consumer, std::move(parent)) - { } - - /// Delegate invocation to a separate procedure. - template <class TFunc> - TDeepThis& Do(const TFunc& func) - { - func(TShallowThis(this->Consumer)); - return *static_cast<TDeepThis*>(this); - } - - /// Conditionally delegate invocation to a separate procedure. - template <class TFunc> - TDeepThis& DoIf(bool condition, const TFunc& func) - { - if (condition) { - func(TShallowThis(this->Consumer)); - } - return *static_cast<TDeepThis*>(this); - } - - /// Calls `func(*this, element)` for each `element` in range `[begin, end)`. - template <class TFunc, class TIterator> - TDeepThis& DoFor(const TIterator& begin, const TIterator& end, const TFunc& func) - { - for (auto current = begin; current != end; ++current) { - func(TShallowThis(this->Consumer), current); - } - return *static_cast<TDeepThis*>(this); - } - - /// Calls `func(*this, element)` for each `element` in `collection`. - template <class TFunc, class TCollection> - TDeepThis& DoFor(const TCollection& collection, const TFunc& func) - { - for (const auto& item : collection) { - func(TShallowThis(this->Consumer), item); - } - return *static_cast<TDeepThis*>(this); - } - - }; - - /// Fluent adapter of a value without attributes. - template <class TParent> - class TAnyWithoutAttributes - : public TFluentBase<TParent> - { - public: - using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped; - - TAnyWithoutAttributes(NYT::NYson::IYsonConsumer* consumer, TParent parent) - : TFluentBase<TParent>(consumer, std::move(parent)) - { } - - /// Pass `value` to underlying consumer. - template <class T> - TUnwrappedParent Value(const T& value) - { - WriteValue(this->Consumer, value); - return this->GetUnwrappedParent(); - } - - /// Call `OnEntity()` of underlying consumer. - TUnwrappedParent Entity() - { - this->Consumer->OnEntity(); - return this->GetUnwrappedParent(); - } - - /// Serialize `collection` to underlying consumer as a list. - template <class TCollection> - TUnwrappedParent List(const TCollection& collection) - { - this->Consumer->OnBeginList(); - for (const auto& item : collection) { - this->Consumer->OnListItem(); - WriteValue(this->Consumer, item); - } - this->Consumer->OnEndList(); - return this->GetUnwrappedParent(); - } - - /// Serialize maximum `maxSize` elements of `collection` to underlying consumer as a list. - template <class TCollection> - TUnwrappedParent ListLimited(const TCollection& collection, size_t maxSize) - { - this->Consumer->OnBeginAttributes(); - this->Consumer->OnKeyedItem("count"); - this->Consumer->OnInt64Scalar(collection.size()); - this->Consumer->OnEndAttributes(); - this->Consumer->OnBeginList(); - size_t printedSize = 0; - for (const auto& item : collection) { - if (printedSize >= maxSize) - break; - this->Consumer->OnListItem(); - WriteValue(this->Consumer, item); - ++printedSize; - } - this->Consumer->OnEndList(); - return this->GetUnwrappedParent(); - } - - /// Open a list. - TListType<TParent> BeginList() - { - this->Consumer->OnBeginList(); - return TListType<TParent>(this->Consumer, this->Parent); - } - - /// Open a list, delegate invocation to `func`, then close the list. - template <class TFunc> - TUnwrappedParent DoList(const TFunc& func) - { - this->Consumer->OnBeginList(); - func(TListType<TFluentYsonVoid>(this->Consumer)); - this->Consumer->OnEndList(); - return this->GetUnwrappedParent(); - } - - /// Open a list, call `func(*this, element)` for each `element` of range, then close the list. - template <class TFunc, class TIterator> - TUnwrappedParent DoListFor(const TIterator& begin, const TIterator& end, const TFunc& func) - { - this->Consumer->OnBeginList(); - for (auto current = begin; current != end; ++current) { - func(TListType<TFluentYsonVoid>(this->Consumer), current); - } - this->Consumer->OnEndList(); - return this->GetUnwrappedParent(); - } - - /// Open a list, call `func(*this, element)` for each `element` of `collection`, then close the list. - template <class TFunc, class TCollection> - TUnwrappedParent DoListFor(const TCollection& collection, const TFunc& func) - { - this->Consumer->OnBeginList(); - for (const auto& item : collection) { - func(TListType<TFluentYsonVoid>(this->Consumer), item); - } - this->Consumer->OnEndList(); - return this->GetUnwrappedParent(); - } - - /// Open a map. - TMapType<TParent> BeginMap() - { - this->Consumer->OnBeginMap(); - return TMapType<TParent>(this->Consumer, this->Parent); - } - - /// Open a map, delegate invocation to `func`, then close the map. - template <class TFunc> - TUnwrappedParent DoMap(const TFunc& func) - { - this->Consumer->OnBeginMap(); - func(TMapType<TFluentYsonVoid>(this->Consumer)); - this->Consumer->OnEndMap(); - return this->GetUnwrappedParent(); - } - - /// Open a map, call `func(*this, element)` for each `element` of range, then close the map. - template <class TFunc, class TIterator> - TUnwrappedParent DoMapFor(const TIterator& begin, const TIterator& end, const TFunc& func) - { - this->Consumer->OnBeginMap(); - for (auto current = begin; current != end; ++current) { - func(TMapType<TFluentYsonVoid>(this->Consumer), current); - } - this->Consumer->OnEndMap(); - return this->GetUnwrappedParent(); - } - - /// Open a map, call `func(*this, element)` for each `element` of `collection`, then close the map. - template <class TFunc, class TCollection> - TUnwrappedParent DoMapFor(const TCollection& collection, const TFunc& func) - { - this->Consumer->OnBeginMap(); - for (const auto& item : collection) { - func(TMapType<TFluentYsonVoid>(this->Consumer), item); - } - this->Consumer->OnEndMap(); - return this->GetUnwrappedParent(); - } - }; - - /// Fluent adapter of any value. - template <class TParent> - class TAny - : public TAnyWithoutAttributes<TParent> - { - public: - using TBase = TAnyWithoutAttributes<TParent>; - - explicit TAny(NYT::NYson::IYsonConsumer* consumer, TParent parent) - : TBase(consumer, std::move(parent)) - { } - - /// Open attributes. - TAttributes<TBase> BeginAttributes() - { - this->Consumer->OnBeginAttributes(); - return TAttributes<TBase>( - this->Consumer, - TBase(this->Consumer, this->Parent)); - } - }; - - /// Fluent adapter of attributes fragment (the inside part of attributes). - template <class TParent = TFluentYsonVoid> - class TAttributes - : public TFluentFragmentBase<TAttributes, TParent> - { - public: - using TThis = TAttributes<TParent>; - using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped; - - explicit TAttributes(NYT::NYson::IYsonConsumer* consumer, TParent parent = TParent()) - : TFluentFragmentBase<TFluentYsonBuilder::TAttributes, TParent>(consumer, std::move(parent)) - { } - - /// Pass attribute key to underlying consumer. - TAny<TThis> Item(const TStringBuf& key) - { - this->Consumer->OnKeyedItem(key); - return TAny<TThis>(this->Consumer, *this); - } - - /// Pass attribute key to underlying consumer. - template <size_t Size> - TAny<TThis> Item(const char (&key)[Size]) - { - return Item(TStringBuf(key, Size - 1)); - } - - //TODO: from TNode - - /// Close the attributes. - TUnwrappedParent EndAttributes() - { - this->Consumer->OnEndAttributes(); - return this->GetUnwrappedParent(); - } - }; - - /// Fluent adapter of list fragment (the inside part of a list). - template <class TParent = TFluentYsonVoid> - class TListType - : public TFluentFragmentBase<TListType, TParent> - { - public: - using TThis = TListType<TParent>; - using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped; - - explicit TListType(NYT::NYson::IYsonConsumer* consumer, TParent parent = TParent()) - : TFluentFragmentBase<TFluentYsonBuilder::TListType, TParent>(consumer, std::move(parent)) - { } - - /// Call `OnListItem()` of underlying consumer. - TAny<TThis> Item() - { - this->Consumer->OnListItem(); - return TAny<TThis>(this->Consumer, *this); - } - - // TODO: from TNode - - /// Close the list. - TUnwrappedParent EndList() - { - this->Consumer->OnEndList(); - return this->GetUnwrappedParent(); - } - }; - - /// Fluent adapter of map fragment (the inside part of a map). - template <class TParent = TFluentYsonVoid> - class TMapType - : public TFluentFragmentBase<TMapType, TParent> - { - public: - using TThis = TMapType<TParent>; - using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped; - - explicit TMapType(NYT::NYson::IYsonConsumer* consumer, TParent parent = TParent()) - : TFluentFragmentBase<TFluentYsonBuilder::TMapType, TParent>(consumer, std::move(parent)) - { } - - /// Pass map key to underlying consumer. - template <size_t Size> - TAny<TThis> Item(const char (&key)[Size]) - { - return Item(TStringBuf(key, Size - 1)); - } - - /// Pass map key to underlying consumer. - TAny<TThis> Item(const TStringBuf& key) - { - this->Consumer->OnKeyedItem(key); - return TAny<TThis>(this->Consumer, *this); - } - - // TODO: from TNode - - /// Close the map. - TUnwrappedParent EndMap() - { - this->Consumer->OnEndMap(); - return this->GetUnwrappedParent(); - } - }; - -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// Builder representing any value. -using TFluentAny = TFluentYsonBuilder::TAny<TFluentYsonVoid>; - -/// Builder representing the inside of a list (list fragment). -using TFluentList = TFluentYsonBuilder::TListType<TFluentYsonVoid>; - -/// Builder representing the inside of a map (map fragment). -using TFluentMap = TFluentYsonBuilder::TMapType<TFluentYsonVoid>; - -/// Builder representing the inside of attributes. -using TFluentAttributes = TFluentYsonBuilder::TAttributes<TFluentYsonVoid>; - -//////////////////////////////////////////////////////////////////////////////// - -/// Create a fluent adapter to invoke methods of `consumer`. -static inline TFluentAny BuildYsonFluently(NYT::NYson::IYsonConsumer* consumer) -{ - return TFluentAny(consumer, TFluentYsonVoid()); -} - -/// Create a fluent adapter to invoke methods of `consumer` describing the contents of a list. -static inline TFluentList BuildYsonListFluently(NYT::NYson::IYsonConsumer* consumer) -{ - return TFluentList(consumer); -} - -/// Create a fluent adapter to invoke methods of `consumer` describing the contents of a map. -static inline TFluentMap BuildYsonMapFluently(NYT::NYson::IYsonConsumer* consumer) -{ - return TFluentMap(consumer); -} - -//////////////////////////////////////////////////////////////////////////////// - -class TFluentYsonWriterState - : public TThrRefBase -{ -public: - using TValue = TString; - - explicit TFluentYsonWriterState(::NYson::EYsonFormat format) - : Writer(&Output, format) - { } - - TString GetValue() - { - return Output.Str(); - } - - NYT::NYson::IYsonConsumer* GetConsumer() - { - return &Writer; - } - -private: - TStringStream Output; - ::NYson::TYsonWriter Writer; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TFluentYsonBuilderState - : public TThrRefBase -{ -public: - using TValue = TNode; - - explicit TFluentYsonBuilderState() - : Builder(&Node) - { } - - TNode GetValue() - { - return std::move(Node); - } - - NYT::NYson::IYsonConsumer* GetConsumer() - { - return &Builder; - } - -private: - TNode Node; - TNodeBuilder Builder; -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <class TState> -class TFluentYsonHolder -{ -public: - explicit TFluentYsonHolder(::TIntrusivePtr<TState> state) - : State(state) - { } - - ::TIntrusivePtr<TState> GetState() const - { - return State; - } - -private: - ::TIntrusivePtr<TState> State; -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <class TState> -struct TFluentYsonUnwrapper< TFluentYsonHolder<TState> > -{ - using TUnwrapped = typename TState::TValue; - - static TUnwrapped Unwrap(const TFluentYsonHolder<TState>& holder) - { - return std::move(holder.GetState()->GetValue()); - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <class TState> -TFluentYsonBuilder::TAny<TFluentYsonHolder<TState>> -BuildYsonFluentlyWithState(::TIntrusivePtr<TState> state) -{ - return TFluentYsonBuilder::TAny<TFluentYsonHolder<TState>>( - state->GetConsumer(), - TFluentYsonHolder<TState>(state)); -} - -/// Create a fluent adapter returning a `TString` with corresponding YSON when construction is finished. -inline TFluentYsonBuilder::TAny<TFluentYsonHolder<TFluentYsonWriterState>> -BuildYsonStringFluently(::NYson::EYsonFormat format = ::NYson::EYsonFormat::Text) -{ - ::TIntrusivePtr<TFluentYsonWriterState> state(new TFluentYsonWriterState(format)); - return BuildYsonFluentlyWithState(state); -} - -/// Create a fluent adapter returning a @ref NYT::TNode when construction is finished. -inline TFluentYsonBuilder::TAny<TFluentYsonHolder<TFluentYsonBuilderState>> -BuildYsonNodeFluently() -{ - ::TIntrusivePtr<TFluentYsonBuilderState> state(new TFluentYsonBuilderState); - return BuildYsonFluentlyWithState(state); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/format.cpp b/yt/cpp/mapreduce/interface/format.cpp deleted file mode 100644 index f8318310a4..0000000000 --- a/yt/cpp/mapreduce/interface/format.cpp +++ /dev/null @@ -1,135 +0,0 @@ -#include "format.h" -#include "protobuf_format.h" - -#include "errors.h" - -#include <google/protobuf/descriptor.h> -#include <google/protobuf/messagext.h> - -namespace NYT { - -TTableSchema CreateTableSchema( - const ::google::protobuf::Descriptor& messageDescriptor, - bool keepFieldsWithoutExtension) -{ - return NDetail::CreateTableSchemaImpl(messageDescriptor, keepFieldsWithoutExtension); -} - -//////////////////////////////////////////////////////////////////////////////// - -TFormat::TFormat(const TNode& config) - : Config(config) -{ } - - -TFormat TFormat::Protobuf( - const TVector<const ::google::protobuf::Descriptor*>& descriptors, - bool withDescriptors) -{ - if (withDescriptors) { - return TFormat(NDetail::MakeProtoFormatConfigWithDescriptors(descriptors)); - } else { - return TFormat(NDetail::MakeProtoFormatConfigWithTables(descriptors)); - } -} - -TFormat TFormat::YsonText() -{ - TNode config("yson"); - config.Attributes()("format", "text"); - return TFormat(config); -} - -TFormat TFormat::YsonBinary() -{ - TNode config("yson"); - config.Attributes()("format", "binary"); - return TFormat(config); -} - -TFormat TFormat::YaMRLenval() -{ - TNode config("yamr"); - config.Attributes()("lenval", true)("has_subkey", true); - return TFormat(config); -} - -TFormat TFormat::Json() -{ - return TFormat(TNode("json")); -} - -bool TFormat::IsTextYson() const -{ - if (!Config.IsString() || Config.AsString() != "yson") { - return false; - } - if (!Config.HasAttributes()) { - return false; - } - const auto& attributes = Config.GetAttributes(); - if (!attributes.HasKey("format") || attributes["format"] != TNode("text")) { - return false; - } - return true; -} - -bool TFormat::IsProtobuf() const -{ - return Config.IsString() && Config.AsString() == "protobuf"; -} - -bool TFormat::IsYamredDsv() const -{ - return Config.IsString() && Config.AsString() == "yamred_dsv"; -} - -static TString FormatName(const TFormat& format) -{ - if (!format.Config.IsString()) { - Y_VERIFY(format.Config.IsUndefined()); - return "<undefined>"; - } - return format.Config.AsString(); -} - -TYamredDsvAttributes TFormat::GetYamredDsvAttributes() const -{ - if (!IsYamredDsv()) { - ythrow TApiUsageError() << "Cannot get yamred_dsv attributes for " << FormatName(*this) << " format"; - } - TYamredDsvAttributes attributes; - - const auto& nodeAttributes = Config.GetAttributes(); - { - const auto& keyColumns = nodeAttributes["key_column_names"]; - if (!keyColumns.IsList()) { - ythrow yexception() << "Ill-formed format: key_column_names is of non-list type: " << keyColumns.GetType(); - } - for (auto& column : keyColumns.AsList()) { - if (!column.IsString()) { - ythrow yexception() << "Ill-formed format: key_column_names: " << column.GetType(); - } - attributes.KeyColumnNames.push_back(column.AsString()); - } - } - - if (nodeAttributes.HasKey("subkey_column_names")) { - const auto& subkeyColumns = nodeAttributes["subkey_column_names"]; - if (!subkeyColumns.IsList()) { - ythrow yexception() << "Ill-formed format: subkey_column_names is not a list: " << subkeyColumns.GetType(); - } - for (const auto& column : subkeyColumns.AsList()) { - if (!column.IsString()) { - ythrow yexception() << "Ill-formed format: non-string inside subkey_key_column_names: " << column.GetType(); - } - attributes.SubkeyColumnNames.push_back(column.AsString()); - } - } - - return attributes; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/format.h b/yt/cpp/mapreduce/interface/format.h deleted file mode 100644 index e297576464..0000000000 --- a/yt/cpp/mapreduce/interface/format.h +++ /dev/null @@ -1,122 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/format.h -/// -/// Header containing class to work with raw [YT formats](https://yt.yandex-team.ru/docs/description/storage/formats.html). - -#include "node.h" - -#include <google/protobuf/descriptor.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// @deprecated -struct TYamredDsvAttributes -{ - /// Names of key columns. - TVector<TString> KeyColumnNames; - - /// Names of subkey columns. - TVector<TString> SubkeyColumnNames; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Class representing YT data format. -/// -/// Normally the user does not need to use it. -/// However, the class is handy for "raw" operations and table reading and writing, -/// e.g. @ref NYT::IOperationClient::RawMap and other raw operations, -/// @ref NYT::IIOClient::CreateRawReader and @ref NYT::IIOClient::CreateRawWriter. -/// Anyway, the static factory methods should be preferred to the constructor. -/// -/// @see [YT doc](https://yt.yandex-team.ru/docs/description/storage/formats.html). -struct TFormat -{ -public: - /// Format representation understandable by YT. - TNode Config; - -public: - /// @brief Construct format from given YT format representation. - /// - /// @note Prefer using static factory methods (e.g. @ref NYT::TFormat::YsonBinary, @ref NYT::TFormat::YsonText, @ref NYT::TFormat::Protobuf). - explicit TFormat(const TNode& config = TNode()); - - /// @brief Create text YSON format. - /// - /// @see [the doc](https://yt.yandex-team.ru/docs/description/storage/formats.html#YSON) - static TFormat YsonText(); - - /// @brief Create binary YSON format. - /// - /// @see [the doc](https://yt.yandex-team.ru/docs/description/storage/formats.html#YSON) - static TFormat YsonBinary(); - - /// @brief Create YaMR format. - /// - /// @deprecated - static TFormat YaMRLenval(); - - /// @brief Create protobuf format from protobuf message descriptors. - /// - /// @see [the doc](https://yt.yandex-team.ru/docs/api/c++/protobuf.html). - static TFormat Protobuf( - const TVector<const ::google::protobuf::Descriptor*>& descriptors, - bool withDescriptors = false); - - /// @brief Create JSON format. - /// - /// @see [the doc](https://yt.yandex-team.ru/docs/description/storage/formats.html#JSON) - static TFormat Json(); - - /// @brief Create protobuf format for the message specified in template parameter. - /// - /// `T` must be inherited from `Message`. - /// - /// @see [the doc](https://yt.yandex-team.ru/docs/api/c++/protobuf.html). - template<typename T> - static inline TFormat Protobuf(bool withDescriptors = false); - - /// @brief Is the format text YSON? - /// - /// @see [the doc](https://yt.yandex-team.ru/docs/description/storage/formats.html#YSON) - bool IsTextYson() const; - - /// @brief Is the format protobuf? - /// - /// @see [the doc](https://yt.yandex-team.ru/docs/api/c++/protobuf.html) - bool IsProtobuf() const; - - /// @brief Is the format YaMR? - /// - /// @deprecated - bool IsYamredDsv() const; - - /// @brief For YAMR format returns its attributes in structured way. - /// - /// @deprecated - TYamredDsvAttributes GetYamredDsvAttributes() const; -}; - -//////////////////////////////////////////////////////////////////////////////// - -template<typename T> -TFormat TFormat::Protobuf(bool withDescriptors) { - return TFormat::Protobuf({T::descriptor()}, withDescriptors); -} - -/// @brief Create table schema from protobuf message descriptor. -/// -/// @param messageDescriptor Message descriptor -/// @param keepFieldsWithoutExtension Add to schema fields without "column_name" or "key_column_name" extensions. -TTableSchema CreateTableSchema( - const ::google::protobuf::Descriptor& messageDescriptor, - bool keepFieldsWithoutExtension); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/format_ut.cpp b/yt/cpp/mapreduce/interface/format_ut.cpp deleted file mode 100644 index 069c29087d..0000000000 --- a/yt/cpp/mapreduce/interface/format_ut.cpp +++ /dev/null @@ -1,235 +0,0 @@ -#include "common.h" -#include "errors.h" -#include "format.h" -#include "common_ut.h" - -#include <yt/cpp/mapreduce/interface/proto3_ut.pb.h> -#include <yt/cpp/mapreduce/interface/protobuf_table_schema_ut.pb.h> - -#include <library/cpp/testing/unittest/registar.h> - -using namespace NYT; - -static TNode GetColumns(const TFormat& format, int tableIndex = 0) -{ - return format.Config.GetAttributes()["tables"][tableIndex]["columns"]; -} - -Y_UNIT_TEST_SUITE(ProtobufFormat) -{ - Y_UNIT_TEST(TIntegral) - { - const auto format = TFormat::Protobuf<NUnitTesting::TIntegral>(); - auto columns = GetColumns(format); - - struct TColumn - { - TString Name; - TString ProtoType; - int FieldNumber; - }; - - auto expected = TVector<TColumn>{ - {"DoubleField", "double", 1}, - {"FloatField", "float", 2}, - {"Int32Field", "int32", 3}, - {"Int64Field", "int64", 4}, - {"Uint32Field", "uint32", 5}, - {"Uint64Field", "uint64", 6}, - {"Sint32Field", "sint32", 7}, - {"Sint64Field", "sint64", 8}, - {"Fixed32Field", "fixed32", 9}, - {"Fixed64Field", "fixed64", 10}, - {"Sfixed32Field", "sfixed32", 11}, - {"Sfixed64Field", "sfixed64", 12}, - {"BoolField", "bool", 13}, - {"EnumField", "enum_string", 14}, - }; - - UNIT_ASSERT_VALUES_EQUAL(columns.Size(), expected.size()); - for (int i = 0; i < static_cast<int>(columns.Size()); ++i) { - UNIT_ASSERT_VALUES_EQUAL(columns[i]["name"], expected[i].Name); - UNIT_ASSERT_VALUES_EQUAL(columns[i]["proto_type"], expected[i].ProtoType); - UNIT_ASSERT_VALUES_EQUAL(columns[i]["field_number"], expected[i].FieldNumber); - } - } - - Y_UNIT_TEST(TRowFieldSerializationOption) - { - const auto format = TFormat::Protobuf<NUnitTesting::TRowFieldSerializationOption>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns[0]["name"], "UrlRow_1"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["field_number"], 1); - const auto& fields = columns[0]["fields"]; - UNIT_ASSERT_VALUES_EQUAL(fields[0]["name"], "Host"); - UNIT_ASSERT_VALUES_EQUAL(fields[0]["proto_type"], "string"); - UNIT_ASSERT_VALUES_EQUAL(fields[0]["field_number"], 1); - - UNIT_ASSERT_VALUES_EQUAL(fields[1]["name"], "Path"); - UNIT_ASSERT_VALUES_EQUAL(fields[1]["proto_type"], "string"); - UNIT_ASSERT_VALUES_EQUAL(fields[1]["field_number"], 2); - - UNIT_ASSERT_VALUES_EQUAL(fields[2]["name"], "HttpCode"); - UNIT_ASSERT_VALUES_EQUAL(fields[2]["proto_type"], "sint32"); - UNIT_ASSERT_VALUES_EQUAL(fields[2]["field_number"], 3); - - UNIT_ASSERT_VALUES_EQUAL(columns[1]["name"], "UrlRow_2"); - UNIT_ASSERT_VALUES_EQUAL(columns[1]["proto_type"], "message"); - UNIT_ASSERT_VALUES_EQUAL(columns[1]["field_number"], 2); - } - - Y_UNIT_TEST(Packed) - { - const auto format = TFormat::Protobuf<NUnitTesting::TPacked>(); - auto column = GetColumns(format)[0]; - - UNIT_ASSERT_VALUES_EQUAL(column["name"], "PackedListInt64"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["field_number"], 1); - UNIT_ASSERT_VALUES_EQUAL(column["packed"], true); - UNIT_ASSERT_VALUES_EQUAL(column["repeated"], true); - } - - Y_UNIT_TEST(Cyclic) - { - UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic::TA>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic::TB>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic::TC>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic::TD>(), TApiUsageError); - - const auto format = TFormat::Protobuf<NUnitTesting::TCyclic::TE>(); - auto column = GetColumns(format)[0]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "d"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "message"); - UNIT_ASSERT_VALUES_EQUAL(column["field_number"], 1); - } - - Y_UNIT_TEST(Map) - { - const auto format = TFormat::Protobuf<NUnitTesting::TWithMap>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns.Size(), 5); - { - const auto& column = columns[0]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapDefault"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "message"); - } - { - const auto& column = columns[1]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapListOfStructsLegacy"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "message"); - } - { - const auto& column = columns[2]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapListOfStructs"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message"); - } - { - const auto& column = columns[3]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapOptionalDict"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message"); - } - { - const auto& column = columns[4]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapDict"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message"); - } - } - - Y_UNIT_TEST(Oneof) - { - const auto format = TFormat::Protobuf<NUnitTesting::TWithOneof>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns.Size(), 4); - auto check = [] (const TNode& column, TStringBuf name, TStringBuf oneof2Name) { - UNIT_ASSERT_VALUES_EQUAL(column["name"], name); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 5); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "field"); - - const auto& oneof2 = column["fields"][1]; - UNIT_ASSERT_VALUES_EQUAL(oneof2["name"], oneof2Name); - UNIT_ASSERT_VALUES_EQUAL(oneof2["proto_type"], "oneof"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][0]["name"], "y2"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][1]["name"], "z2"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][1]["proto_type"], "structured_message"); - const auto& embeddedOneof = oneof2["fields"][1]["fields"][0]; - UNIT_ASSERT_VALUES_EQUAL(embeddedOneof["name"], "Oneof"); - UNIT_ASSERT_VALUES_EQUAL(embeddedOneof["fields"][0]["name"], "x"); - UNIT_ASSERT_VALUES_EQUAL(embeddedOneof["fields"][1]["name"], "y"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][2]["name"], "x2"); - - UNIT_ASSERT_VALUES_EQUAL(column["fields"][2]["name"], "x1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][3]["name"], "y1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][4]["name"], "z1"); - }; - - check(columns[0], "DefaultSeparateFields", "variant_field_name"); - check(columns[1], "NoDefault", "Oneof2"); - - { - const auto& column = columns[2]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "SerializationProtobuf"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 3); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "x1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["name"], "y1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][2]["name"], "z1"); - } - { - const auto& column = columns[3]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "TopLevelOneof"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "oneof"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 1); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "MemberOfTopLevelOneof"); - } - } -} - -Y_UNIT_TEST_SUITE(Proto3) -{ - Y_UNIT_TEST(TWithOptional) - { - const auto format = TFormat::Protobuf<NTestingProto3::TWithOptional>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns[0]["name"], "x"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["field_number"], 1); - } - - Y_UNIT_TEST(TWithOptionalMessage) - { - const auto format = TFormat::Protobuf<NTestingProto3::TWithOptionalMessage>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns[0]["name"], "x"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["field_number"], 1); - - UNIT_ASSERT_VALUES_EQUAL(columns[0]["fields"].Size(), 1); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["fields"][0]["name"], "x"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["fields"][0]["field_number"], 1); - } -} diff --git a/yt/cpp/mapreduce/interface/fwd.h b/yt/cpp/mapreduce/interface/fwd.h deleted file mode 100644 index 0434c03d8b..0000000000 --- a/yt/cpp/mapreduce/interface/fwd.h +++ /dev/null @@ -1,397 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/fwd.h -/// -/// Header containing mostly forward declarations of types. - - -#include <util/generic/fwd.h> -#include <util/system/types.h> - -#include <variant> - -/// @cond Doxygen_Suppress -namespace google::protobuf { - class Message; -} - -namespace NYT { - - //////////////////////////////////////////////////////////////////////////////// - // batch_request.h - //////////////////////////////////////////////////////////////////////////////// - - class IBatchRequest; - using TBatchRequestPtr = ::TIntrusivePtr<IBatchRequest>; - - //////////////////////////////////////////////////////////////////////////////// - // client.h - //////////////////////////////////////////////////////////////////////////////// - - enum ELockMode : int; - - struct TStartTransactionOptions; - - struct TLockOptions; - - template <class TDerived> - struct TTabletOptions; - - struct TMountTableOptions; - - struct TUnmountTableOptions; - - struct TRemountTableOptions; - - struct TReshardTableOptions; - - struct TAlterTableOptions; - - struct TLookupRowsOptions; - - struct TSelectRowsOptions; - - struct TCreateClientOptions; - - struct TAlterTableReplicaOptions; - - struct TGetFileFromCacheOptions; - - struct TPutFileToCacheOptions; - - struct TCheckPermissionResult; - struct TCheckPermissionResponse; - struct TCheckPermissionOptions; - - struct TTabletInfo; - - class ILock; - using ILockPtr = ::TIntrusivePtr<ILock>; - - class ITransaction; - using ITransactionPtr = ::TIntrusivePtr<ITransaction>; - - class ITransactionPinger; - using ITransactionPingerPtr = ::TIntrusivePtr<ITransactionPinger>; - - struct IOperation; - using IOperationPtr = ::TIntrusivePtr<IOperation>; - - class IClientBase; - - class IClient; - - using IClientPtr = ::TIntrusivePtr<IClient>; - using IClientBasePtr = ::TIntrusivePtr<IClientBase>; - - //////////////////////////////////////////////////////////////////////////////// - // config.h - //////////////////////////////////////////////////////////////////////////////// - - struct TConfig; - using TConfigPtr = ::TIntrusivePtr<TConfig>; - - //////////////////////////////////////////////////////////////////////////////// - // cypress.h - //////////////////////////////////////////////////////////////////////////////// - - enum ENodeType : int; - - struct TCreateOptions; - - struct TRemoveOptions; - - struct TGetOptions; - - struct TSetOptions; - - struct TMultisetAttributesOptions; - - struct TListOptions; - - struct TCopyOptions; - - struct TMoveOptions; - - struct TLinkOptions; - - struct TConcatenateOptions; - - struct TInsertRowsOptions; - - struct TDeleteRowsOptions; - - struct TTrimRowsOptions; - - class ICypressClient; - - //////////////////////////////////////////////////////////////////////////////// - // errors.h - //////////////////////////////////////////////////////////////////////////////// - - class TApiUsageError; - - class TYtError; - - class TErrorResponse; - - struct TFailedJobInfo; - - class TOperationFailedError; - - //////////////////////////////////////////////////////////////////////////////// - // node.h - //////////////////////////////////////////////////////////////////////////////// - - class TNode; - - //////////////////////////////////////////////////////////////////////////////// - // common.h - //////////////////////////////////////////////////////////////////////////////// - - using TTransactionId = TGUID; - using TNodeId = TGUID; - using TLockId = TGUID; - using TOperationId = TGUID; - using TTabletCellId = TGUID; - using TReplicaId = TGUID; - using TJobId = TGUID; - - using TYPath = TString; - using TLocalFilePath = TString; - - template <class T, class TDerived = void> - struct TOneOrMany; - - // key column values - using TKey = TOneOrMany<TNode>; - - class TSortColumn; - - // column names - using TColumnNames = TOneOrMany<TString>; - - // key column descriptors. - class TSortColumns; - - enum EValueType : int; - - enum ESortOrder : int; - - enum EOptimizeForAttr : i8; - - enum EErasureCodecAttr : i8; - - enum ESchemaModificationAttr : i8; - - enum class EMasterReadKind : int; - - class TColumnSchema; - - class TTableSchema; - - enum class ERelation; - - struct TKeyBound; - - struct TReadLimit; - - struct TReadRange; - - struct TRichYPath; - - struct TAttributeFilter; - - //////////////////////////////////////////////////////////////////////////////// - // io.h - //////////////////////////////////////////////////////////////////////////////// - - enum class EFormatType : int; - - struct TFormat; - - class IFileReader; - - using IFileReaderPtr = ::TIntrusivePtr<IFileReader>; - - class IFileWriter; - - using IFileWriterPtr = ::TIntrusivePtr<IFileWriter>; - - class IBlobTableReader; - using IBlobTableReaderPtr = ::TIntrusivePtr<IBlobTableReader>; - - class TRawTableReader; - - using TRawTableReaderPtr = ::TIntrusivePtr<TRawTableReader>; - - class TRawTableWriter; - - using TRawTableWriterPtr = ::TIntrusivePtr<TRawTableWriter>; - - template <class T, class = void> - class TTableReader; - - template <class T, class = void> - class TTableRangesReader; - - template <typename T> - using TTableRangesReaderPtr = ::TIntrusivePtr<TTableRangesReader<T>>; - - template <class T> - using TTableReaderPtr = ::TIntrusivePtr<TTableReader<T>>; - - template <class T, class = void> - class TTableWriter; - - template <class T> - using TTableWriterPtr = ::TIntrusivePtr<TTableWriter<T>>; - - struct TYaMRRow; - - using ::google::protobuf::Message; - - class ISkiffRowParser; - - using ISkiffRowParserPtr = ::TIntrusivePtr<ISkiffRowParser>; - - class ISkiffRowSkipper; - - using ISkiffRowSkipperPtr = ::TIntrusivePtr<ISkiffRowSkipper>; - - namespace NDetail { - - class TYdlGenericRowType; - - } // namespace NDetail - - template<class... TYdlRowTypes> - class TYdlOneOf; - - template<class... TProtoRowTypes> - class TProtoOneOf; - - template<class... TSkiffRowTypes> - class TSkiffRowOneOf; - - using TYaMRReader = TTableReader<TYaMRRow>; - using TYaMRWriter = TTableWriter<TYaMRRow>; - using TNodeReader = TTableReader<TNode>; - using TNodeWriter = TTableWriter<TNode>; - using TMessageReader = TTableReader<Message>; - using TMessageWriter = TTableWriter<Message>; - using TYdlTableWriter = TTableWriter<NDetail::TYdlGenericRowType>; - - template <class TDerived> - struct TIOOptions; - - struct TFileReaderOptions; - - struct TFileWriterOptions; - - struct TTableReaderOptions; - - class TSkiffRowHints; - - struct TTableWriterOptions; - - //////////////////////////////////////////////////////////////////////////////// - // job_statistics.h - //////////////////////////////////////////////////////////////////////////////// - - class TJobStatistics; - - template <typename T> - class TJobStatisticsEntry; - - //////////////////////////////////////////////////////////////////////////////// - // operation.h - //////////////////////////////////////////////////////////////////////////////// - - class TFormatHints; - - struct TUserJobSpec; - - struct TMapOperationSpec; - - struct TRawMapOperationSpec; - - struct TReduceOperationSpec; - - struct TMapReduceOperationSpec; - - struct TJoinReduceOperationSpec; - - struct TSortOperationSpec; - - class IIOperationPreparationContext; - - class IJob; - using IJobPtr = ::TIntrusivePtr<IJob>; - - class IRawJob; - using IRawJobPtr = ::TIntrusivePtr<IRawJob>; - - enum EMergeMode : int; - - struct TMergeOperationSpec; - - struct TEraseOperationSpec; - - template <class TR, class TW> - class IMapper; - - template <class TR, class TW> - class IReducer; - - template <class TR, class TW> - class IAggregatorReducer; - - struct TSuspendOperationOptions; - - struct TResumeOperationOptions; - - enum class EOperationBriefState : int; - - struct TOperationAttributes; - - struct TOperationOptions; - - enum class EOperationAttribute : int; - - struct TOperationAttributeFilter; - - struct TGetOperationOptions; - - struct TListOperationsOptions; - - struct TGetJobOptions; - - struct TListJobsOptions; - - struct IOperationClient; - - enum class EFinishedJobState : int; - - enum class EJobType : int; - enum class EJobState : int; - enum class ETaskName : int; - class TTaskName; - - struct TJobBinaryDefault; - - struct TJobBinaryLocalPath; - - struct TJobBinaryCypressPath; - - using TJobBinaryConfig = std::variant< - TJobBinaryDefault, - TJobBinaryLocalPath, - TJobBinaryCypressPath>; - - struct TRetryConfig; - class IRetryConfigProvider; - using IRetryConfigProviderPtr = ::TIntrusivePtr<IRetryConfigProvider>; -} -/// @endcond diff --git a/yt/cpp/mapreduce/interface/init.h b/yt/cpp/mapreduce/interface/init.h deleted file mode 100644 index 302be268fc..0000000000 --- a/yt/cpp/mapreduce/interface/init.h +++ /dev/null @@ -1,71 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/init.h -/// -/// Initialization functions of YT Wrapper. - -#include <yt/cpp/mapreduce/interface/wait_proxy.h> - -#include <util/generic/fwd.h> - -#include <functional> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// Options for @ref NYT::Initialize() and @ref NYT::JoblessInitialize() functions -struct TInitializeOptions -{ - using TSelf = TInitializeOptions; - - /// - /// @brief Override waiting functions for YT Wrapper. - /// - /// This options allows to override functions used by this library to wait something. - FLUENT_FIELD_DEFAULT(::TIntrusivePtr<IWaitProxy>, WaitProxy, nullptr); - - /// - /// @brief Enable/disable cleanup when program execution terminates abnormally. - /// - /// When set to true, library will abort all active transactions and running operations when program - /// terminates on error or signal. - FLUENT_FIELD_DEFAULT(bool, CleanupOnTermination, false); - - /// - /// @brief Set callback to be called before exit() in job mode. - /// - /// Provided function will be called just before exit() when program is started in job mode. - /// This might be useful for shutting down libraries that are used inside operations. - /// - /// NOTE: Keep in mind that inside job execution environment differs from client execution environment. - /// So JobOnExitFunction should not depend on argc/argv environment variables etc. - FLUENT_FIELD_OPTION(std::function<void()>, JobOnExitFunction); -}; - -/// -/// @brief Performs basic initialization (logging, termination handlers, etc). -/// -/// This function never switches to job mode. -void JoblessInitialize(const TInitializeOptions& options = TInitializeOptions()); - -/// -/// @brief Performs basic initialization and switches to a job mode if required. -/// -/// This function performs basic initialization (it sets up logging reads the config, etc) and checks if binary is launched -/// on YT machine inside a job. If latter is true this function launches proper job and after job is done it calls exit(). -/// -/// This function must be called if application starts any operation. -/// This function must be called immediately after entering main() function before any argument parsing is done. -void Initialize(int argc, const char **argv, const TInitializeOptions &options = TInitializeOptions()); - -/// Similar to @ref NYT::Initialize(int, const char**, const TInitializeOptions&) -void Initialize(int argc, char **argv, const TInitializeOptions &options = TInitializeOptions()); - -/// Similar to @ref NYT::Initialize(int, const char**, const TInitializeOptions&) -void Initialize(const TInitializeOptions &options = TInitializeOptions()); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/io-inl.h b/yt/cpp/mapreduce/interface/io-inl.h deleted file mode 100644 index c35ebb7481..0000000000 --- a/yt/cpp/mapreduce/interface/io-inl.h +++ /dev/null @@ -1,1015 +0,0 @@ -#pragma once - -#ifndef IO_INL_H_ -#error "Direct inclusion of this file is not allowed, use io.h" -#endif -#undef IO_INL_H_ - -#include "finish_or_die.h" - -#include <util/generic/typetraits.h> -#include <util/generic/yexception.h> -#include <util/stream/length.h> - -#include <util/system/mutex.h> -#include <util/system/spinlock.h> - -#include <library/cpp/yson/node/node_builder.h> - -#include <yt/cpp/mapreduce/interface/serialize.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -template<class T> -struct TIsProtoOneOf - : std::false_type -{ }; - -template <class ...TProtoRowTypes> -struct TIsProtoOneOf<TProtoOneOf<TProtoRowTypes...>> - : std::true_type -{ }; - -template <class T> -struct TIsSkiffRowOneOf - : std::false_type -{ }; - -template <class ...TSkiffRowTypes> -struct TIsSkiffRowOneOf<TSkiffRowOneOf<TSkiffRowTypes...>> - : std::true_type -{ }; - -} // namespace NDetail - -//////////////////////////////////////////////////////////////////////////////// - -template <class T, class = void> -struct TRowTraits; - -template <> -struct TRowTraits<TNode> -{ - using TRowType = TNode; - using IReaderImpl = INodeReaderImpl; - using IWriterImpl = INodeWriterImpl; -}; - -template <> -struct TRowTraits<TYaMRRow> -{ - using TRowType = TYaMRRow; - using IReaderImpl = IYaMRReaderImpl; - using IWriterImpl = IYaMRWriterImpl; -}; - -template <> -struct TRowTraits<Message> -{ - using TRowType = Message; - using IReaderImpl = IProtoReaderImpl; - using IWriterImpl = IProtoWriterImpl; -}; - -template <class T> -struct TRowTraits<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>> -{ - using TRowType = T; - using IReaderImpl = IProtoReaderImpl; - using IWriterImpl = IProtoWriterImpl; -}; - -template <class T> -struct TRowTraits<T, std::enable_if_t<TIsSkiffRow<T>::value>> -{ - using TRowType = T; - using IReaderImpl = ISkiffRowReaderImpl; -}; - -template <class... TSkiffRowTypes> -struct TRowTraits<TSkiffRowOneOf<TSkiffRowTypes...>> -{ - using TRowType = TSkiffRowOneOf<TSkiffRowTypes...>; - using IReaderImpl = ISkiffRowReaderImpl; -}; - -template <class... TProtoRowTypes> -struct TRowTraits<TProtoOneOf<TProtoRowTypes...>> -{ - using TRowType = TProtoOneOf<TProtoRowTypes...>; - using IReaderImpl = IProtoReaderImpl; - using IWriterImpl = IProtoWriterImpl; -}; - -//////////////////////////////////////////////////////////////////////////////// - -struct IReaderImplBase - : public TThrRefBase -{ - virtual bool IsValid() const = 0; - virtual void Next() = 0; - virtual ui32 GetTableIndex() const = 0; - virtual ui32 GetRangeIndex() const = 0; - virtual ui64 GetRowIndex() const = 0; - virtual void NextKey() = 0; - - // Not pure virtual because of clients that has already implemented this interface. - virtual TMaybe<size_t> GetReadByteCount() const; - virtual i64 GetTabletIndex() const; - virtual bool IsEndOfStream() const; - virtual bool IsRawReaderExhausted() const; -}; - -struct INodeReaderImpl - : public IReaderImplBase -{ - virtual const TNode& GetRow() const = 0; - virtual void MoveRow(TNode* row) = 0; -}; - -struct IYaMRReaderImpl - : public IReaderImplBase -{ - virtual const TYaMRRow& GetRow() const = 0; - virtual void MoveRow(TYaMRRow* row) - { - *row = GetRow(); - } -}; - -struct IProtoReaderImpl - : public IReaderImplBase -{ - virtual void ReadRow(Message* row) = 0; -}; - -struct ISkiffRowReaderImpl - : public IReaderImplBase -{ - virtual void ReadRow(const ISkiffRowParserPtr& parser) = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -// We don't include <yt/cpp/mapreduce/interface/logging/yt_log.h> in this file -// to avoid macro name clashes (specifically YT_LOG_DEBUG) -void LogTableReaderStatistics(ui64 rowCount, TMaybe<size_t> byteCount); - -template <class T> -class TTableReaderBase - : public TThrRefBase -{ -public: - using TRowType = typename TRowTraits<T>::TRowType; - using IReaderImpl = typename TRowTraits<T>::IReaderImpl; - - explicit TTableReaderBase(::TIntrusivePtr<IReaderImpl> reader) - : Reader_(reader) - { } - - ~TTableReaderBase() override - { - NDetail::LogTableReaderStatistics(ReadRowCount_, Reader_->GetReadByteCount()); - } - - bool IsValid() const - { - return Reader_->IsValid(); - } - - void Next() - { - Reader_->Next(); - ++ReadRowCount_; - RowState_ = ERowState::None; - } - - bool IsEndOfStream() - { - return Reader_->IsEndOfStream(); - } - - bool IsRawReaderExhausted() - { - return Reader_->IsRawReaderExhausted(); - } - - ui32 GetTableIndex() const - { - return Reader_->GetTableIndex(); - } - - ui32 GetRangeIndex() const - { - return Reader_->GetRangeIndex(); - } - - ui64 GetRowIndex() const - { - return Reader_->GetRowIndex(); - } - - i64 GetTabletIndex() const - { - return Reader_->GetTabletIndex(); - } - -protected: - template <typename TCacher, typename TCacheGetter> - const auto& DoGetRowCached(TCacher cacher, TCacheGetter cacheGetter) const - { - switch (RowState_) { - case ERowState::None: - cacher(); - RowState_ = ERowState::Cached; - break; - case ERowState::Cached: - break; - case ERowState::MovedOut: - ythrow yexception() << "Row is already moved"; - } - return *cacheGetter(); - } - - template <typename U, typename TMover, typename TCacheMover> - void DoMoveRowCached(U* result, TMover mover, TCacheMover cacheMover) - { - Y_VERIFY(result); - switch (RowState_) { - case ERowState::None: - mover(result); - break; - case ERowState::Cached: - cacheMover(result); - break; - case ERowState::MovedOut: - ythrow yexception() << "Row is already moved"; - } - RowState_ = ERowState::MovedOut; - } - -private: - enum class ERowState - { - None, - Cached, - MovedOut, - }; - -protected: - ::TIntrusivePtr<IReaderImpl> Reader_; - -private: - ui64 ReadRowCount_ = 0; - mutable ERowState RowState_ = ERowState::None; -}; - -template <class T> -class TSimpleTableReader - : public TTableReaderBase<T> -{ -public: - using TBase = TTableReaderBase<T>; - using typename TBase::TRowType; - - using TBase::TBase; - - const TRowType& GetRow() const - { - // Caching is implemented in underlying reader. - return TBase::DoGetRowCached( - /* cacher */ [&] {}, - /* cacheGetter */ [&] { - return &Reader_->GetRow(); - }); - } - - void MoveRow(TRowType* result) - { - // Caching is implemented in underlying reader. - TBase::DoMoveRowCached( - result, - /* mover */ [&] (TRowType* result) { - Reader_->MoveRow(result); - }, - /* cacheMover */ [&] (TRowType* result) { - Reader_->MoveRow(result); - }); - } - - TRowType MoveRow() - { - TRowType result; - MoveRow(&result); - return result; - } - -private: - using TBase::Reader_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail - -template <> -class TTableReader<TNode> - : public NDetail::TSimpleTableReader<TNode> -{ - using TSimpleTableReader<TNode>::TSimpleTableReader; -}; - -template <> -class TTableReader<TYaMRRow> - : public NDetail::TSimpleTableReader<TYaMRRow> -{ - using TSimpleTableReader<TYaMRRow>::TSimpleTableReader; -}; - -template <> -class TTableReader<Message> - : public NDetail::TTableReaderBase<Message> -{ -public: - using TBase = NDetail::TTableReaderBase<Message>; - - using TBase::TBase; - - template <class U> - const U& GetRow() const - { - static_assert(TIsBaseOf<Message, U>::Value); - - return TBase::DoGetRowCached( - /* cacher */ [&] { - CachedRow_.Reset(new U); - Reader_->ReadRow(CachedRow_.Get()); - }, - /* cacheGetter */ [&] { - auto result = dynamic_cast<const U*>(CachedRow_.Get()); - Y_VERIFY(result); - return result; - }); - } - - template <class U> - void MoveRow(U* result) - { - static_assert(TIsBaseOf<Message, U>::Value); - - TBase::DoMoveRowCached( - result, - /* mover */ [&] (U* result) { - Reader_->ReadRow(result); - }, - /* cacheMover */ [&] (U* result) { - auto cast = dynamic_cast<U*>(CachedRow_.Get()); - Y_VERIFY(cast); - result->Swap(cast); - }); - } - - template <class U> - U MoveRow() - { - static_assert(TIsBaseOf<Message, U>::Value); - - U result; - MoveRow(&result); - return result; - } - - ::TIntrusivePtr<IProtoReaderImpl> GetReaderImpl() const - { - return Reader_; - } - -private: - using TBase::Reader_; - mutable THolder<Message> CachedRow_; -}; - -template<class... TProtoRowTypes> -class TTableReader<TProtoOneOf<TProtoRowTypes...>> - : public NDetail::TTableReaderBase<TProtoOneOf<TProtoRowTypes...>> -{ -public: - using TBase = NDetail::TTableReaderBase<TProtoOneOf<TProtoRowTypes...>>; - - using TBase::TBase; - - template <class U> - const U& GetRow() const - { - AssertIsOneOf<U>(); - return TBase::DoGetRowCached( - /* cacher */ [&] { - Reader_->ReadRow(&std::get<U>(CachedRows_)); - CachedIndex_ = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value; - }, - /* cacheGetter */ [&] { - return &std::get<U>(CachedRows_); - }); - } - - template <class U> - void MoveRow(U* result) - { - AssertIsOneOf<U>(); - return TBase::DoMoveRowCached( - result, - /* mover */ [&] (U* result) { - Reader_->ReadRow(result); - }, - /* cacheMover */ [&] (U* result) { - Y_VERIFY((NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value) == CachedIndex_); - *result = std::move(std::get<U>(CachedRows_)); - }); - } - - template <class U> - U MoveRow() - { - U result; - MoveRow(&result); - return result; - } - - ::TIntrusivePtr<IProtoReaderImpl> GetReaderImpl() const - { - return Reader_; - } - -private: - using TBase::Reader_; - // std::variant could also be used here, but std::tuple leads to better performance - // because of deallocations that std::variant has to do - mutable std::tuple<TProtoRowTypes...> CachedRows_; - mutable int CachedIndex_; - - template <class U> - static constexpr void AssertIsOneOf() - { - static_assert( - (std::is_same<U, TProtoRowTypes>::value || ...), - "Template parameter must be one of TProtoOneOf template parameter"); - } -}; - -template <class T> -class TTableReader<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>> - : public TTableReader<TProtoOneOf<T>> -{ -public: - using TRowType = T; - using TBase = TTableReader<TProtoOneOf<T>>; - - using TBase::TBase; - - const T& GetRow() const - { - return TBase::template GetRow<T>(); - } - - void MoveRow(T* result) - { - TBase::template MoveRow<T>(result); - } - - T MoveRow() - { - return TBase::template MoveRow<T>(); - } -}; - -template<class... TSkiffRowTypes> -class TTableReader<TSkiffRowOneOf<TSkiffRowTypes...>> - : public NDetail::TTableReaderBase<TSkiffRowOneOf<TSkiffRowTypes...>> -{ -public: - using TBase = NDetail::TTableReaderBase<TSkiffRowOneOf<TSkiffRowTypes...>>; - - using TBase::TBase; - - explicit TTableReader(::TIntrusivePtr<typename TBase::IReaderImpl> reader, const TMaybe<TSkiffRowHints>& hints) - : TBase(reader) - , Parsers_({(CreateSkiffParser<TSkiffRowTypes>(&std::get<TSkiffRowTypes>(CachedRows_), hints))...}) - { } - - template <class U> - const U& GetRow() const - { - AssertIsOneOf<U>(); - return TBase::DoGetRowCached( - /* cacher */ [&] { - auto index = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value; - Reader_->ReadRow(Parsers_[index]); - CachedIndex_ = index; - }, - /* cacheGetter */ [&] { - return &std::get<U>(CachedRows_); - }); - } - - template <class U> - void MoveRow(U* result) - { - AssertIsOneOf<U>(); - return TBase::DoMoveRowCached( - result, - /* mover */ [&] (U* result) { - auto index = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value; - Reader_->ReadRow(Parsers_[index]); - *result = std::move(std::get<U>(CachedRows_)); - }, - /* cacheMover */ [&] (U* result) { - Y_VERIFY((NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value) == CachedIndex_); - *result = std::move(std::get<U>(CachedRows_)); - }); - } - - template <class U> - U MoveRow() - { - U result; - MoveRow(&result); - return result; - } - - ::TIntrusivePtr<ISkiffRowReaderImpl> GetReaderImpl() const - { - return Reader_; - } - -private: - using TBase::Reader_; - // std::variant could also be used here, but std::tuple leads to better performance - // because of deallocations that std::variant has to do - mutable std::tuple<TSkiffRowTypes...> CachedRows_; - mutable std::vector<ISkiffRowParserPtr> Parsers_; - mutable int CachedIndex_; - - template <class U> - static constexpr void AssertIsOneOf() - { - static_assert( - (std::is_same<U, TSkiffRowTypes>::value || ...), - "Template parameter must be one of TSkiffRowOneOf template parameter"); - } -}; - -template <class T> -class TTableReader<T, std::enable_if_t<TIsSkiffRow<T>::value>> - : public TTableReader<TSkiffRowOneOf<T>> -{ -public: - using TRowType = T; - using TBase = TTableReader<TSkiffRowOneOf<T>>; - - using TBase::TBase; - - const T& GetRow() - { - return TBase::template GetRow<T>(); - } - - void MoveRow(T* result) - { - TBase::template MoveRow<T>(result); - } - - T MoveRow() - { - return TBase::template MoveRow<T>(); - } -}; - -template <> -inline TTableReaderPtr<TNode> IIOClient::CreateTableReader<TNode>( - const TRichYPath& path, const TTableReaderOptions& options) -{ - return new TTableReader<TNode>(CreateNodeReader(path, options)); -} - -template <> -inline TTableReaderPtr<TYaMRRow> IIOClient::CreateTableReader<TYaMRRow>( - const TRichYPath& path, const TTableReaderOptions& options) -{ - return new TTableReader<TYaMRRow>(CreateYaMRReader(path, options)); -} - -template <class T, class = std::enable_if_t<TIsBaseOf<Message, T>::Value>> -struct TReaderCreator -{ - static TTableReaderPtr<T> Create(::TIntrusivePtr<IProtoReaderImpl> reader) - { - return new TTableReader<T>(reader); - } -}; - -template <class T> -inline TTableReaderPtr<T> IIOClient::CreateTableReader( - const TRichYPath& path, const TTableReaderOptions& options) -{ - if constexpr (TIsBaseOf<Message, T>::Value) { - TAutoPtr<T> prototype(new T); - return new TTableReader<T>(CreateProtoReader(path, options, prototype.Get())); - } else if constexpr (TIsSkiffRow<T>::value) { - const auto& hints = options.FormatHints_ ? options.FormatHints_->SkiffRowHints_ : Nothing(); - auto schema = GetSkiffSchema<T>(hints); - auto skipper = CreateSkiffSkipper<T>(hints); - return new TTableReader<T>(CreateSkiffRowReader(path, options, skipper, schema), hints); - } else { - static_assert(TDependentFalse<T>, "Unsupported type for table reader"); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -TTableReaderPtr<T> CreateTableReader( - IInputStream* stream, - const TTableReaderOptions& options) -{ - return TReaderCreator<T>::Create(NDetail::CreateProtoReader(stream, options, T::descriptor())); -} - -template <class... Ts> -TTableReaderPtr<typename NDetail::TProtoOneOfUnique<Ts...>::TType> CreateProtoMultiTableReader( - IInputStream* stream, - const TTableReaderOptions& options) -{ - return new TTableReader<typename NDetail::TProtoOneOfUnique<Ts...>::TType>( - NDetail::CreateProtoReader(stream, options, {Ts::descriptor()...})); -} - -template <class T> -TTableReaderPtr<T> CreateProtoMultiTableReader( - IInputStream* stream, - int tableCount, - const TTableReaderOptions& options) -{ - static_assert(TIsBaseOf<::google::protobuf::Message, T>::Value); - TVector<const ::google::protobuf::Descriptor*> descriptors(tableCount, T::descriptor()); - return new TTableReader<T>(NDetail::CreateProtoReader(stream, options, std::move(descriptors))); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -class TTableRangesReader<T> - : public TThrRefBase -{ -public: - using TRowType = T; - -private: - using TReaderImpl = typename TRowTraits<TRowType>::IReaderImpl; - -public: - TTableRangesReader(::TIntrusivePtr<TReaderImpl> readerImpl) - : ReaderImpl_(readerImpl) - , Reader_(MakeIntrusive<TTableReader<TRowType>>(readerImpl)) - , IsValid_(Reader_->IsValid()) - { } - - TTableReader<T>& GetRange() - { - return *Reader_; - } - - bool IsValid() const - { - return IsValid_; - } - - void Next() - { - ReaderImpl_->NextKey(); - if ((IsValid_ = Reader_->IsValid())) { - Reader_->Next(); - } - } - -private: - ::TIntrusivePtr<TReaderImpl> ReaderImpl_; - ::TIntrusivePtr<TTableReader<TRowType>> Reader_; - bool IsValid_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -struct IWriterImplBase - : public TThrRefBase -{ - virtual void AddRow(const T& row, size_t tableIndex) = 0; - - virtual void AddRow(const T& row, size_t tableIndex, size_t /*rowWeight*/) - { - AddRow(row, tableIndex); - } - - virtual void AddRow(T&& row, size_t tableIndex) = 0; - - virtual void AddRow(T&& row, size_t tableIndex, size_t /*rowWeight*/) - { - AddRow(std::move(row), tableIndex); - } - - virtual void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex, size_t rowBatchWeight = 0) - { - for (const auto& row : rowBatch) { - AddRow(row, tableIndex, rowBatchWeight / rowBatch.size()); - } - } - - virtual void AddRowBatch(TVector<T>&& rowBatch, size_t tableIndex, size_t rowBatchWeight = 0) - { - auto rowBatchSize = rowBatch.size(); - for (auto&& row : std::move(rowBatch)) { - AddRow(std::move(row), tableIndex, rowBatchWeight / rowBatchSize); - } - } - - virtual size_t GetTableCount() const = 0; - virtual void FinishTable(size_t tableIndex) = 0; - virtual void Abort() - { } -}; - -struct INodeWriterImpl - : public IWriterImplBase<TNode> -{ -}; - -struct IYaMRWriterImpl - : public IWriterImplBase<TYaMRRow> -{ -}; - -struct IProtoWriterImpl - : public IWriterImplBase<Message> -{ -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -class TTableWriterBase - : public TThrRefBase -{ -public: - using TRowType = T; - using IWriterImpl = typename TRowTraits<T>::IWriterImpl; - - explicit TTableWriterBase(::TIntrusivePtr<IWriterImpl> writer) - : Writer_(writer) - , Locks_(MakeAtomicShared<TVector<TAdaptiveLock>>(writer->GetTableCount())) - { } - - ~TTableWriterBase() override - { - if (Locks_.RefCount() == 1) { - NDetail::FinishOrDie(this, "TTableWriterBase"); - } - } - - void Abort() - { - Writer_->Abort(); - } - - void AddRow(const T& row, size_t tableIndex = 0, size_t rowWeight = 0) - { - DoAddRow<T>(row, tableIndex, rowWeight); - } - - void AddRow(T&& row, size_t tableIndex = 0, size_t rowWeight = 0) - { - DoAddRow<T>(std::move(row), tableIndex, rowWeight); - } - - void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0) - { - DoAddRowBatch<T>(rowBatch, tableIndex, rowBatchWeight); - } - - void AddRowBatch(TVector<T>&& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0) - { - DoAddRowBatch<T>(std::move(rowBatch), tableIndex, rowBatchWeight); - } - - void Finish() - { - for (size_t i = 0; i < Locks_->size(); ++i) { - auto guard = Guard((*Locks_)[i]); - Writer_->FinishTable(i); - } - } - -protected: - template <class U> - void DoAddRow(const U& row, size_t tableIndex = 0, size_t rowWeight = 0) - { - if (tableIndex >= Locks_->size()) { - ythrow TIOException() << - "Table index " << tableIndex << - " is out of range [0, " << Locks_->size() << ")"; - } - - auto guard = Guard((*Locks_)[tableIndex]); - Writer_->AddRow(row, tableIndex, rowWeight); - } - - template <class U> - void DoAddRow(U&& row, size_t tableIndex = 0, size_t rowWeight = 0) - { - if (tableIndex >= Locks_->size()) { - ythrow TIOException() << - "Table index " << tableIndex << - " is out of range [0, " << Locks_->size() << ")"; - } - - auto guard = Guard((*Locks_)[tableIndex]); - Writer_->AddRow(std::move(row), tableIndex, rowWeight); - } - - template <class U> - void DoAddRowBatch(const TVector<U>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0) - { - if (tableIndex >= Locks_->size()) { - ythrow TIOException() << - "Table index " << tableIndex << - " is out of range [0, " << Locks_->size() << ")"; - } - - auto guard = Guard((*Locks_)[tableIndex]); - Writer_->AddRowBatch(rowBatch, tableIndex, rowBatchWeight); - } - - template <class U> - void DoAddRowBatch(TVector<U>&& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0) - { - if (tableIndex >= Locks_->size()) { - ythrow TIOException() << - "Table index " << tableIndex << - " is out of range [0, " << Locks_->size() << ")"; - } - - auto guard = Guard((*Locks_)[tableIndex]); - Writer_->AddRowBatch(std::move(rowBatch), tableIndex, rowBatchWeight); - } - - ::TIntrusivePtr<IWriterImpl> GetWriterImpl() - { - return Writer_; - } - -private: - ::TIntrusivePtr<IWriterImpl> Writer_; - TAtomicSharedPtr<TVector<TAdaptiveLock>> Locks_; -}; - -template <> -class TTableWriter<TNode> - : public TTableWriterBase<TNode> -{ -public: - using TBase = TTableWriterBase<TNode>; - - explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer) - : TBase(writer) - { } -}; - -template <> -class TTableWriter<TYaMRRow> - : public TTableWriterBase<TYaMRRow> -{ -public: - using TBase = TTableWriterBase<TYaMRRow>; - - explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer) - : TBase(writer) - { } -}; - -template <> -class TTableWriter<Message> - : public TTableWriterBase<Message> -{ -public: - using TBase = TTableWriterBase<Message>; - - explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer) - : TBase(writer) - { } - - template <class U, std::enable_if_t<std::is_base_of<Message, U>::value>* = nullptr> - void AddRow(const U& row, size_t tableIndex = 0, size_t rowWeight = 0) - { - TBase::AddRow(row, tableIndex, rowWeight); - } - - template <class U, std::enable_if_t<std::is_base_of<Message, U>::value>* = nullptr> - void AddRowBatch(const TVector<U>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0) - { - for (const auto& row : rowBatch) { - AddRow(row, tableIndex, rowBatchWeight / rowBatch.size()); - } - } -}; - -template <class T> -class TTableWriter<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>> - : public TTableWriter<Message> -{ -public: - using TRowType = T; - using TBase = TTableWriter<Message>; - - explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer) - : TBase(writer) - { } - - void AddRow(const T& row, size_t tableIndex = 0, size_t rowWeight = 0) - { - TBase::AddRow<T>(row, tableIndex, rowWeight); - } - - void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0) - { - TBase::AddRowBatch<T>(rowBatch, tableIndex, rowBatchWeight); - } -}; - -template <> -inline TTableWriterPtr<TNode> IIOClient::CreateTableWriter<TNode>( - const TRichYPath& path, const TTableWriterOptions& options) -{ - return new TTableWriter<TNode>(CreateNodeWriter(path, options)); -} - -template <> -inline TTableWriterPtr<TYaMRRow> IIOClient::CreateTableWriter<TYaMRRow>( - const TRichYPath& path, const TTableWriterOptions& options) -{ - return new TTableWriter<TYaMRRow>(CreateYaMRWriter(path, options)); -} - -template <class T> -inline TTableWriterPtr<T> IIOClient::CreateTableWriter( - const TRichYPath& path, const TTableWriterOptions& options) -{ - if constexpr (TIsBaseOf<Message, T>::Value) { - TAutoPtr<T> prototype(new T); - return new TTableWriter<T>(CreateProtoWriter(path, options, prototype.Get())); - } else { - static_assert(TDependentFalse<T>, "Unsupported type for table writer"); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -TTableReaderPtr<T> CreateConcreteProtobufReader(TTableReader<Message>* reader) -{ - static_assert(std::is_base_of_v<Message, T>, "T must be a protobuf type (either Message or its descendant)"); - Y_ENSURE(reader, "reader must be non-null"); - return ::MakeIntrusive<TTableReader<T>>(reader->GetReaderImpl()); -} - -template <typename T> -TTableReaderPtr<T> CreateConcreteProtobufReader(const TTableReaderPtr<Message>& reader) -{ - Y_ENSURE(reader, "reader must be non-null"); - return CreateConcreteProtobufReader<T>(reader.Get()); -} - -template <typename T> -TTableReaderPtr<Message> CreateGenericProtobufReader(TTableReader<T>* reader) -{ - static_assert(std::is_base_of_v<Message, T>, "T must be a protobuf type (either Message or its descendant)"); - Y_ENSURE(reader, "reader must be non-null"); - return ::MakeIntrusive<TTableReader<Message>>(reader->GetReaderImpl()); -} - -template <typename T> -TTableReaderPtr<Message> CreateGenericProtobufReader(const TTableReaderPtr<T>& reader) -{ - Y_ENSURE(reader, "reader must be non-null"); - return CreateGenericProtobufReader(reader.Get()); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/io.cpp b/yt/cpp/mapreduce/interface/io.cpp deleted file mode 100644 index f97629721a..0000000000 --- a/yt/cpp/mapreduce/interface/io.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include "io.h" - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <util/string/cast.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TMaybe<size_t> IReaderImplBase::GetReadByteCount() const -{ - return Nothing(); -} - -i64 IReaderImplBase::GetTabletIndex() const -{ - Y_FAIL("Unimplemented"); -} - -bool IReaderImplBase::IsEndOfStream() const -{ - Y_FAIL("Unimplemented"); -} - -bool IReaderImplBase::IsRawReaderExhausted() const -{ - Y_FAIL("Unimplemented"); -} - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -void LogTableReaderStatistics(ui64 rowCount, TMaybe<size_t> byteCount) -{ - TString byteCountStr = (byteCount ? ::ToString(*byteCount) : "<unknown>"); - YT_LOG_DEBUG("Table reader has read %v rows, %v bytes", - rowCount, - byteCountStr); -} - -} // namespace NDetail - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/io.h b/yt/cpp/mapreduce/interface/io.h deleted file mode 100644 index e2b20a1802..0000000000 --- a/yt/cpp/mapreduce/interface/io.h +++ /dev/null @@ -1,586 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/io.h -/// -/// Header containing client interface for reading and writing tables and files. - - -#include "fwd.h" - -#include "client_method_options.h" -#include "common.h" -#include "format.h" -#include "node.h" -#include "mpl.h" -#include "skiff_row.h" - -#include <google/protobuf/message.h> - -#include <util/stream/input.h> -#include <util/stream/output.h> -#include <util/generic/yexception.h> -#include <util/generic/maybe.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief "Marker" type to use for several protobuf types in @ref NYT::TTableReader. -/// -/// @tparam Ts Possible types of rows to be read. -template<class... TProtoRowTypes> -class TProtoOneOf -{ -public: - static_assert( - (TIsBaseOf<::google::protobuf::Message, TProtoRowTypes>::Value && ...), - "Template parameters can only be protobuf types"); - - TProtoOneOf() = delete; -}; - -/// -/// @brief "Marker" type to use for several skiff row types in @ref NYT::TTableReader. -/// -/// @tparam Ts Possible types of rows to be read. -template<class... TSkiffRowTypes> -class TSkiffRowOneOf -{ -public: - static_assert( - (TIsSkiffRow<TSkiffRowTypes>::value && ...), - "Template parameters can only be SkiffRow types"); - - TSkiffRowOneOf() = delete; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @cond Doxygen_Suppress -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -template <class TTuple> -struct TProtoOneOfFromTuple; - -template <class... Ts> -struct TProtoOneOfFromTuple<std::tuple<Ts...>> -{ - using TType = TProtoOneOf<Ts...>; -}; - -template <class... Ts> -struct TProtoOneOfUnique -{ - using TTuple = typename TUniqueTypes<std::tuple<>, std::tuple<Ts...>>::TType; - using TType = typename TProtoOneOfFromTuple<TTuple>::TType; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -/// @endcond - -//////////////////////////////////////////////////////////////////////////////// - -struct INodeReaderImpl; -struct IYaMRReaderImpl; -struct IProtoReaderImpl; -struct ISkiffRowReaderImpl; -struct INodeWriterImpl; -struct IYaMRWriterImpl; -struct IProtoWriterImpl; - -//////////////////////////////////////////////////////////////////////////////// - -/// Class of exceptions connected to reading or writing tables or files. -class TIOException - : public yexception -{ }; - -/////////////////////////////////////////////////////////////////////////////// - -/// Interface representing YT file reader. -class IFileReader - : public TThrRefBase - , public IInputStream -{ }; - -/// Interface representing YT file writer. -class IFileWriter - : public TThrRefBase - , public IOutputStream -{ }; - -//////////////////////////////////////////////////////////////////////////////// - -/// Low-level interface to read YT table with retries. -class TRawTableReader - : public TThrRefBase - , public IInputStream -{ -public: - /// @brief Retry table read starting from the specified `rangeIndex` and `rowIndex`. - /// - /// @param rangeIndex Index of first range to read - /// @param rowIndex Index of first row to read; if `rowIndex == Nothing` entire request will be retried. - /// - /// @return `true` on successful request retry, `false` if no retry attempts are left (then `Retry()` shouldn't be called any more). - /// - /// `rowIndex` must be inside the range with index `rangeIndex` if the latter is specified. - /// - /// After successful retry the user should reset `rangeIndex` / `rowIndex` values and read new ones - /// from the stream. - virtual bool Retry( - const TMaybe<ui32>& rangeIndex, - const TMaybe<ui64>& rowIndex) = 0; - - /// Resets retry attempt count to the initial value (then `Retry()` can be called again). - virtual void ResetRetries() = 0; - - /// @brief May the input stream contain table ranges? - /// - /// In the case when it is `true` the `TRawTableReader` user is responsible - /// to track active range index in order to pass it to Retry(). - virtual bool HasRangeIndices() const = 0; -}; - -/// @brief Low-level interface to write YT table. -/// -/// Retries must be handled by implementation. -class TRawTableWriter - : public TThrRefBase - , public IOutputStream -{ -public: - /// @brief Call this method after complete row representation is written to the stream. - /// - /// When this method is called `TRowTableWriter` can check its buffer - /// and if it is full send data to YT. - /// @note `TRawTableWriter` never sends partial records to YT (due to retries). - virtual void NotifyRowEnd() = 0; - - /// @brief Try to abort writing process as soon as possible (makes sense for multi-threaded writers). - /// - /// By default it does nothing, but implementations are welcome to override this method. - virtual void Abort() - { } -}; - -/// @brief Interface to deal with multiple raw output streams. -class IProxyOutput -{ -public: - virtual ~IProxyOutput() - { } - - /// Get amount of managed streams. - virtual size_t GetStreamCount() const = 0; - - /// Get stream corresponding to the specified table index. - virtual IOutputStream* GetStream(size_t tableIndex) const = 0; - - /// This handler must be called right after the next row has been written. - virtual void OnRowFinished(size_t tableIndex) = 0; - - /// @brief Try to abort writing process as soon as possible (makes sense for multi-threaded writers). - /// - /// By default it does nothing, but implementations are welcome to override this method. - virtual void Abort() - { } -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Class template to read typed rows from YT tables. -/// -/// @tparam T Row type. -/// -/// Correct usage of this class usually looks like -/// ``` -/// for (const auto& cursor : *reader) { -/// const auto& row = cursor.GetRow(); -/// ... -/// } -/// ``` -/// or, more verbosely, -/// ``` -/// for (; reader->IsValid(); reader->Next()) { -/// const auto& row = reader->GetRow(); -/// ... -/// } -/// ``` -/// -/// @note Actual (partial) specializations of this template may look a bit different, -/// e.g. @ref NYT::TTableReader::GetRow, @ref NYT::TTableReader::MoveRow may be method templates. -template <class T, class> -class TTableReader - : public TThrRefBase -{ -public: - /// Get current row. - const T& GetRow() const; - - /// Extract current row; further calls to `GetRow` and `MoveRow` will fail. - T MoveRow(); - - /// Extract current row to `result`; further calls to `GetRow` and `MoveRow` will fail. - void MoveRow(T* result); - - /// Check whether all the rows were read. - bool IsValid() const; - - /// Move the cursor to the next row. - void Next(); - - /// Get table index of the current row. - ui32 GetTableIndex() const; - - /// Get range index of the current row (zero if it is unknown or read request contains no ranges) - ui32 GetRangeIndex() const; - - /// Get current row index (zero if it unknown). - ui64 GetRowIndex() const; - - /// Get current tablet index (for ordered dynamic tables). - i64 GetTabletIndex() const; - - /// Returns `true` if job consumed all the input and `false` otherwise. - bool IsEndOfStream() const; - - /// Returns `true` if job raw input stream was closed and `false` otherwise. - bool IsRawReaderExhausted() const; -}; - -/// @brief Iterator for use in range-based-for. -/// -/// @note Idiomatic usage: -/// ``` -/// for (const auto& cursor : *reader) { -/// const auto& row = cursor.GetRow(); -/// ... -/// } -/// ``` -template <class T> -class TTableReaderIterator -{ -public: - /// Construct iterator from table reader (can be `nullptr`). - explicit TTableReaderIterator<T>(TTableReader<T>* reader) - { - if (reader && reader->IsValid()) { - Reader_ = reader; - } else { - Reader_ = nullptr; - } - } - - /// Equality operator. - bool operator==(const TTableReaderIterator& it) const - { - return Reader_ == it.Reader_; - } - - /// Inequality operator. - bool operator!=(const TTableReaderIterator& it) const - { - return Reader_ != it.Reader_; - } - - /// Dereference operator. - TTableReader<T>& operator*() - { - return *Reader_; - } - - /// Const dereference operator. - const TTableReader<T>& operator*() const - { - return *Reader_; - } - - /// Preincrement operator. - TTableReaderIterator& operator++() - { - Reader_->Next(); - if (!Reader_->IsValid()) { - Reader_ = nullptr; - } - return *this; - } - -private: - TTableReader<T>* Reader_; -}; - -/// @brief Function to facilitate range-based-for for @ref NYT::TTableReader. -/// -/// @see @ref NYT::TTableReaderIterator -template <class T> -TTableReaderIterator<T> begin(TTableReader<T>& reader) -{ - return TTableReaderIterator<T>(&reader); -} - -/// @brief Function to facilitate range-based-for for @ref NYT::TTableReader. -/// -/// @see @ref NYT::TTableReaderIterator -template <class T> -TTableReaderIterator<T> end(TTableReader<T>&) -{ - return TTableReaderIterator<T>(nullptr); -} - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Class to facilitate reading table rows sorted by key. -/// -/// Each reader returned from @ref NYT::TTableRangesReader::GetRange represents -/// a range of rows with the same key. -/// -/// @note Idiomatic usage: -/// ``` -/// for (; reader->IsValid(); reader->Next()) { -/// auto& rangeReader = reader->GetRange(); -/// ... -/// } -/// ``` -template <class T, class> -class TTableRangesReader - : public TThrRefBase -{ -public: - /// Get reader for rows with the same key. - TTableReader<T>& GetRange(); - - /// Check whether all rows are read. - bool IsValid() const; - - /// Move cursor to the next range. - void Next(); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// Class template to write typed rows to YT tables. -template <class T, class> -class TTableWriter - : public TThrRefBase -{ -public: - /// @brief Submit a row for writing. - /// - /// The row may (and very probably will) *not* be written immediately. - void AddRow(const T& row); - - /// Stop writing data as soon as possible (without flushing data, e.g. before aborting parent transaction). - void Finish(); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Type representing YaMR table row. -/// -/// @deprecated -struct TYaMRRow -{ - /// Key column. - TStringBuf Key; - - /// Subkey column. - TStringBuf SubKey; - - /// Value column. - TStringBuf Value; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// Interface for creating table and file readers and writer. -class IIOClient -{ -public: - virtual ~IIOClient() = default; - - /// Create a reader for file at `path`. - virtual IFileReaderPtr CreateFileReader( - const TRichYPath& path, - const TFileReaderOptions& options = TFileReaderOptions()) = 0; - - /// Create a writer for file at `path`. - virtual IFileWriterPtr CreateFileWriter( - const TRichYPath& path, - const TFileWriterOptions& options = TFileWriterOptions()) = 0; - - /// Create a typed reader for table at `path`. - template <class T> - TTableReaderPtr<T> CreateTableReader( - const TRichYPath& path, - const TTableReaderOptions& options = TTableReaderOptions()); - - /// Create a typed writer for table at `path`. - template <class T> - TTableWriterPtr<T> CreateTableWriter( - const TRichYPath& path, - const TTableWriterOptions& options = TTableWriterOptions()); - - /// Create a writer to write protobuf messages with specified descriptor. - virtual TTableWriterPtr<::google::protobuf::Message> CreateTableWriter( - const TRichYPath& path, - const ::google::protobuf::Descriptor& descriptor, - const TTableWriterOptions& options = TTableWriterOptions()) = 0; - - /// Create a reader to read a table using specified format. - virtual TRawTableReaderPtr CreateRawReader( - const TRichYPath& path, - const TFormat& format, - const TTableReaderOptions& options = TTableReaderOptions()) = 0; - - /// Create a reader to write a table using specified format. - virtual TRawTableWriterPtr CreateRawWriter( - const TRichYPath& path, - const TFormat& format, - const TTableWriterOptions& options = TTableWriterOptions()) = 0; - - /// - /// @brief Create a reader for [blob table](https://docs.yandex-team.ru/docs/yt/description/storage/blobtables) at `path`. - /// - /// @param path Blob table path. - /// @param blobId Key identifying the blob. - /// @param options Optional parameters - /// - /// Blob table is a table that stores a number of blobs. - /// Blobs are sliced into parts of the same size (maybe except of last part). - /// Those parts are stored in the separate rows. - /// - /// Blob table have constraints on its schema. - /// - There must be columns that identify blob (blob id columns). That columns might be of any type. - /// - There must be a column of `int64` type that identify part inside the blob (this column is called `part index`). - /// - There must be a column of `string` type that stores actual data (this column is called `data column`). - virtual IFileReaderPtr CreateBlobTableReader( - const TYPath& path, - const TKey& blobId, - const TBlobTableReaderOptions& options = TBlobTableReaderOptions()) = 0; - -private: - virtual ::TIntrusivePtr<INodeReaderImpl> CreateNodeReader( - const TRichYPath& path, const TTableReaderOptions& options) = 0; - - virtual ::TIntrusivePtr<IYaMRReaderImpl> CreateYaMRReader( - const TRichYPath& path, const TTableReaderOptions& options) = 0; - - virtual ::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader( - const TRichYPath& path, - const TTableReaderOptions& options, - const ::google::protobuf::Message* prototype) = 0; - - virtual ::TIntrusivePtr<ISkiffRowReaderImpl> CreateSkiffRowReader( - const TRichYPath& path, - const TTableReaderOptions& options, - const ISkiffRowSkipperPtr& skipper, - const NSkiff::TSkiffSchemaPtr& schema) = 0; - - virtual ::TIntrusivePtr<INodeWriterImpl> CreateNodeWriter( - const TRichYPath& path, const TTableWriterOptions& options) = 0; - - virtual ::TIntrusivePtr<IYaMRWriterImpl> CreateYaMRWriter( - const TRichYPath& path, const TTableWriterOptions& options) = 0; - - virtual ::TIntrusivePtr<IProtoWriterImpl> CreateProtoWriter( - const TRichYPath& path, - const TTableWriterOptions& options, - const ::google::protobuf::Message* prototype) = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Create a protobuf table reader from a stream. -/// -/// @tparam T Protobuf message type to read (must be inherited from `Message`). -/// -/// @param stream Input stream in YT protobuf format. -template <typename T> -TTableReaderPtr<T> CreateTableReader( - IInputStream* stream, - const TTableReaderOptions& options = {}); - -/// -/// @brief Create a protobuf multi table reader from a stream. -/// -/// @tparam Ts Protobuf message types to read (must be inherited from `Message`). -/// -/// @param stream Input stream in YT protobuf format. -template <class... Ts> -TTableReaderPtr<typename NDetail::TProtoOneOfUnique<Ts...>::TType> CreateProtoMultiTableReader( - IInputStream* stream, - const TTableReaderOptions& options = {}); - -/// -/// @brief Create a homogenous protobuf multi table reader from a stream. -/// -/// @tparam T Protobuf message type to read (must be inherited from `Message`). -/// -/// @param stream Input stream in YT protobuf format. -/// @param tableCount Number of tables in input stream. -template <class T> -TTableReaderPtr<T> CreateProtoMultiTableReader( - IInputStream* stream, - int tableCount, - const TTableReaderOptions& options = {}); - -/// Create a @ref NYT::TNode table reader from a stream. -template <> -TTableReaderPtr<TNode> CreateTableReader<TNode>( - IInputStream* stream, const TTableReaderOptions& options); - -/// Create a @ref NYT::TYaMRRow table reader from a stream. -template <> -TTableReaderPtr<TYaMRRow> CreateTableReader<TYaMRRow>( - IInputStream* stream, const TTableReaderOptions& options); - -namespace NDetail { - -/// Create a protobuf table reader from a stream. -::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader( - IInputStream* stream, - const TTableReaderOptions& options, - const ::google::protobuf::Descriptor* descriptor); - - -/// Create a protobuf table reader from a stream that can contain table switches. -::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader( - IInputStream* stream, - const TTableReaderOptions& options, - TVector<const ::google::protobuf::Descriptor*> descriptors); - -} // namespace NDetail - -//////////////////////////////////////////////////////////////////////////////// - -/// Convert generic protobuf table reader to a concrete one (for certain type `T`). -template <typename T> -TTableReaderPtr<T> CreateConcreteProtobufReader(TTableReader<Message>* reader); - -/// Convert generic protobuf table reader to a concrete one (for certain type `T`). -template <typename T> -TTableReaderPtr<T> CreateConcreteProtobufReader(const TTableReaderPtr<Message>& reader); - -/// Convert a concrete (for certain type `T`) protobuf table reader to a generic one. -template <typename T> -TTableReaderPtr<Message> CreateGenericProtobufReader(TTableReader<T>* reader); - -/// Convert a concrete (for certain type `T`) protobuf table reader to a generic one. -template <typename T> -TTableReaderPtr<Message> CreateGenericProtobufReader(const TTableReaderPtr<T>& reader); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT - -#define IO_INL_H_ -#include "io-inl.h" -#undef IO_INL_H_ diff --git a/yt/cpp/mapreduce/interface/job_counters.cpp b/yt/cpp/mapreduce/interface/job_counters.cpp deleted file mode 100644 index 6d4a2a6fcb..0000000000 --- a/yt/cpp/mapreduce/interface/job_counters.cpp +++ /dev/null @@ -1,164 +0,0 @@ -#include "job_counters.h" - -namespace NYT { - -//////////////////////////////////////////////////////////////////// - -namespace { - ui64 CountTotal(const TNode& data) - { - if (data.IsMap()) { - if (auto totalPtr = data.AsMap().FindPtr("total")) { - return data["total"].IntCast<ui64>(); - } else { - ui64 total = 0; - for (const auto& keyVal: data.AsMap()) { - total += CountTotal(keyVal.second); - } - return total; - } - } else { - return data.IntCast<ui64>(); - } - } - - TNode GetNode(const TNode& data, const TStringBuf& key) - { - if (auto resPtr = data.AsMap().FindPtr(key)) { - return *resPtr; - } - return TNode(); - } -} // namespace - -//////////////////////////////////////////////////////////////////// - -TJobCounter::TJobCounter(TNode data) - : Data_(std::move(data)) -{ - if (Data_.HasValue()) { - Total_ = CountTotal(Data_); - } -} - -TJobCounter::TJobCounter(ui64 total) - : Total_(total) -{ } - -ui64 TJobCounter::GetTotal() const -{ - return Total_; -} - -ui64 TJobCounter::GetValue(const TStringBuf key) const -{ - if (Data_.HasValue()) { - return CountTotal(Data_[key]); - } - return 0; -} - -//////////////////////////////////////////////////////////////////// - -TJobCounters::TJobCounters(const NYT::TNode& counters) - : Total_(0) -{ - if (!counters.IsMap()) { - ythrow yexception() << "TJobCounters must be initialized with Map type TNode"; - } - auto abortedNode = GetNode(counters, "aborted"); - if (abortedNode.HasValue()) { - Aborted_ = TJobCounter(GetNode(abortedNode, "total")); - AbortedScheduled_ = TJobCounter(GetNode(abortedNode, "scheduled")); - AbortedNonScheduled_ = TJobCounter(GetNode(abortedNode, "non_scheduled")); - } - auto completedNode = GetNode(counters, "completed"); - if (completedNode.HasValue()) { - Completed_ = TJobCounter(GetNode(completedNode, "total")); - CompletedNonInterrupted_ = TJobCounter(GetNode(completedNode, "non-interrupted")); - CompletedInterrupted_ = TJobCounter(GetNode(completedNode, "interrupted")); - } - Lost_ = TJobCounter(GetNode(counters, "lost")); - Invalidated_ = TJobCounter(GetNode(counters, "invalidated")); - Failed_ = TJobCounter(GetNode(counters, "failed")); - Running_ = TJobCounter(GetNode(counters, "running")); - Suspended_ = TJobCounter(GetNode(counters, "suspended")); - Pending_ = TJobCounter(GetNode(counters, "pending")); - Blocked_ = TJobCounter(GetNode(counters, "blocked")); - Total_ = CountTotal(counters); -} - - -const TJobCounter& TJobCounters::GetAborted() const -{ - return Aborted_; -} - -const TJobCounter& TJobCounters::GetAbortedScheduled() const -{ - return AbortedScheduled_; -} - -const TJobCounter& TJobCounters::GetAbortedNonScheduled() const -{ - return AbortedNonScheduled_; -} - -const TJobCounter& TJobCounters::GetCompleted() const -{ - return Completed_; -} - -const TJobCounter& TJobCounters::GetCompletedNonInterrupted() const -{ - return CompletedNonInterrupted_; -} - -const TJobCounter& TJobCounters::GetCompletedInterrupted() const -{ - return CompletedInterrupted_; -} - -const TJobCounter& TJobCounters::GetLost() const -{ - return Lost_; -} - -const TJobCounter& TJobCounters::GetInvalidated() const -{ - return Invalidated_; -} - -const TJobCounter& TJobCounters::GetFailed() const -{ - return Failed_; -} - -const TJobCounter& TJobCounters::GetRunning() const -{ - return Running_; -} - -const TJobCounter& TJobCounters::GetSuspended() const -{ - return Suspended_; -} - -const TJobCounter& TJobCounters::GetPending() const -{ - return Pending_; -} - -const TJobCounter& TJobCounters::GetBlocked() const -{ - return Blocked_; -} - -ui64 TJobCounters::GetTotal() const -{ - return Total_; -} - -//////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/job_counters.h b/yt/cpp/mapreduce/interface/job_counters.h deleted file mode 100644 index 9257cc1ec1..0000000000 --- a/yt/cpp/mapreduce/interface/job_counters.h +++ /dev/null @@ -1,74 +0,0 @@ -#pragma once - -#include "fwd.h" - -#include <yt/cpp/mapreduce/interface/node.h> - -namespace NYT { - -class TJobCounter -{ -private: - TNode Data_; - ui64 Total_ = 0; - -public: - TJobCounter() = default; - - TJobCounter(TNode data); - TJobCounter(ui64 total); - - ui64 GetTotal() const; - - ui64 GetValue(const TStringBuf key) const; -}; - -/// Class representing a collection of job counters. -class TJobCounters -{ -public: - /// - /// Construct empty counter. - TJobCounters() = default; - - /// - /// Construct counter from counters node. - TJobCounters(const NYT::TNode& counters); - - const TJobCounter& GetAborted() const; - const TJobCounter& GetAbortedScheduled() const; - const TJobCounter& GetAbortedNonScheduled() const; - const TJobCounter& GetCompleted() const; - const TJobCounter& GetCompletedNonInterrupted() const; - const TJobCounter& GetCompletedInterrupted() const; - const TJobCounter& GetLost() const; - const TJobCounter& GetInvalidated() const; - const TJobCounter& GetFailed() const; - const TJobCounter& GetRunning() const; - const TJobCounter& GetSuspended() const; - const TJobCounter& GetPending() const; - const TJobCounter& GetBlocked() const; - - ui64 GetTotal() const; - -private: - ui64 Total_ = 0; - - TJobCounter Aborted_; - TJobCounter AbortedScheduled_; - TJobCounter AbortedNonScheduled_; - TJobCounter Completed_; - TJobCounter CompletedNonInterrupted_; - TJobCounter CompletedInterrupted_; - TJobCounter Lost_; - TJobCounter Invalidated_; - TJobCounter Failed_; - TJobCounter Running_; - TJobCounter Suspended_; - TJobCounter Pending_; - TJobCounter Blocked_; -}; - -//////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/job_counters_ut.cpp b/yt/cpp/mapreduce/interface/job_counters_ut.cpp deleted file mode 100644 index 56d3932b8f..0000000000 --- a/yt/cpp/mapreduce/interface/job_counters_ut.cpp +++ /dev/null @@ -1,103 +0,0 @@ -#include <yt/cpp/mapreduce/interface/job_counters.h> -#include <yt/cpp/mapreduce/interface/operation.h> - -#include <library/cpp/yson/node/node_io.h> - -#include <library/cpp/testing/unittest/registar.h> - -using namespace NYT; - -Y_UNIT_TEST_SUITE(JobCounters) -{ - Y_UNIT_TEST(Full) - { - const TString input = R"""( - { - "completed" = { - "total" = 6; - "non-interrupted" = 1; - "interrupted" = { - "whatever_interrupted" = 2; - "whatever_else_interrupted" = 3; - }; - }; - "aborted" = { - "non_scheduled" = { - "whatever_non_scheduled" = 4; - "whatever_else_non_scheduled" = 5; - }; - "scheduled" = { - "whatever_scheduled" = 6; - "whatever_else_scheduled" = 7; - }; - "total" = 22; - }; - "lost" = 8; - "invalidated" = 9; - "failed" = 10; - "running" = 11; - "suspended" = 12; - "pending" = 13; - "blocked" = 14; - "total" = 105; - })"""; - - TJobCounters counters(NodeFromYsonString(input)); - - UNIT_ASSERT_VALUES_EQUAL(counters.GetTotal(), 105); - - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompleted().GetTotal(), 6); - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedNonInterrupted().GetTotal(), 1); - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedInterrupted().GetTotal(), 5); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAborted().GetTotal(), 22); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedNonScheduled().GetTotal(), 9); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedScheduled().GetTotal(), 13); - UNIT_ASSERT_VALUES_EQUAL(counters.GetLost().GetTotal(), 8); - UNIT_ASSERT_VALUES_EQUAL(counters.GetInvalidated().GetTotal(), 9); - UNIT_ASSERT_VALUES_EQUAL(counters.GetFailed().GetTotal(), 10); - UNIT_ASSERT_VALUES_EQUAL(counters.GetRunning().GetTotal(), 11); - UNIT_ASSERT_VALUES_EQUAL(counters.GetSuspended().GetTotal(), 12); - UNIT_ASSERT_VALUES_EQUAL(counters.GetPending().GetTotal(), 13); - UNIT_ASSERT_VALUES_EQUAL(counters.GetBlocked().GetTotal(), 14); - - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedInterrupted().GetValue("whatever_interrupted"), 2); - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedInterrupted().GetValue("whatever_else_interrupted"), 3); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedNonScheduled().GetValue("whatever_non_scheduled"), 4); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedNonScheduled().GetValue("whatever_else_non_scheduled"), 5); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedScheduled().GetValue("whatever_scheduled"), 6); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedScheduled().GetValue("whatever_else_scheduled"), 7); - - UNIT_ASSERT_EXCEPTION(counters.GetCompletedInterrupted().GetValue("Nothingness"), yexception); - } - - Y_UNIT_TEST(Empty) - { - const TString input = "{}"; - - TJobCounters counters(NodeFromYsonString(input)); - - UNIT_ASSERT_VALUES_EQUAL(counters.GetTotal(), 0); - - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompleted().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedNonInterrupted().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedInterrupted().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAborted().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedNonScheduled().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedScheduled().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetLost().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetInvalidated().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetFailed().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetRunning().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetSuspended().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetPending().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetBlocked().GetTotal(), 0); - } - - Y_UNIT_TEST(Broken) - { - UNIT_ASSERT_EXCEPTION_CONTAINS(TJobCounters(TNode()), yexception, "TJobCounters"); - UNIT_ASSERT_EXCEPTION_CONTAINS(TJobCounters(TNode(1)), yexception, "TJobCounters"); - UNIT_ASSERT_EXCEPTION_CONTAINS(TJobCounters(TNode(1.0)), yexception, "TJobCounters"); - UNIT_ASSERT_EXCEPTION_CONTAINS(TJobCounters(TNode("Whatever")), yexception, "TJobCounters"); - } -} diff --git a/yt/cpp/mapreduce/interface/job_statistics.cpp b/yt/cpp/mapreduce/interface/job_statistics.cpp deleted file mode 100644 index bd9791672d..0000000000 --- a/yt/cpp/mapreduce/interface/job_statistics.cpp +++ /dev/null @@ -1,361 +0,0 @@ -#include "job_statistics.h" - -#include "operation.h" - -#include <library/cpp/yson/node/node.h> -#include <library/cpp/yson/node/serialize.h> - -#include <library/cpp/yson/writer.h> - -#include <util/datetime/base.h> -#include <util/generic/hash_set.h> -#include <util/generic/ptr.h> -#include <util/stream/file.h> -#include <util/string/cast.h> -#include <util/string/subst.h> -#include <util/system/file.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////// - -template <> -i64 ConvertJobStatisticsEntry(i64 value) -{ - return value; -} - -template <> -TDuration ConvertJobStatisticsEntry(i64 value) -{ - return TDuration::MilliSeconds(value); -} - -//////////////////////////////////////////////////////////////////// - -static TTaskName JobTypeToTaskName(EJobType jobType) -{ - switch (jobType) { - case EJobType::PartitionMap: - return ETaskName::PartitionMap0; - case EJobType::Partition: - return ETaskName::Partition0; - default: - return ToString(jobType); - } -} - -static TTaskName FixTaskName(TString taskName) -{ - if (taskName == "partition") { - return ETaskName::Partition0; - } else if (taskName == "partition_map") { - return ETaskName::PartitionMap0; - } - return taskName; -} - -//////////////////////////////////////////////////////////////////// - -class TJobStatistics::TData - : public TThrRefBase -{ -public: - using TTaskName2Data = THashMap<TString, TJobStatistics::TDataEntry>; - using TState2TaskName2Data = THashMap<EJobState, TTaskName2Data>; - using TName2State2TaskName2Data = THashMap<TString, TState2TaskName2Data>; - -public: - TName2State2TaskName2Data Name2State2TaskName2Data; - -public: - TData() = default; - - TData(const TNode& statisticsNode) - { - ParseNode(statisticsNode, TString(), &Name2State2TaskName2Data); - } - - static void Aggregate(TJobStatistics::TDataEntry* result, const TJobStatistics::TDataEntry& other) - { - result->Max = Max(result->Max, other.Max); - result->Min = Min(result->Min, other.Min); - result->Sum += other.Sum; - result->Count += other.Count; - } - - static void ParseNode(const TNode& node, TState2TaskName2Data* output) - { - auto getInt = [] (const TNode& theNode, TStringBuf key) { - const auto& nodeAsMap = theNode.AsMap(); - auto it = nodeAsMap.find(key); - if (it == nodeAsMap.end()) { - ythrow yexception() << "Key '" << key << "' is not found"; - } - const auto& valueNode = it->second; - if (!valueNode.IsInt64()) { - ythrow yexception() << "Key '" << key << "' is not of int64 type"; - } - return valueNode.AsInt64(); - }; - - for (const auto& [stateStr, taskName2DataNode] : node.AsMap()) { - EJobState state; - if (!TryFromString(stateStr, state)) { - continue; - } - for (const auto& [taskName, dataNode] : taskName2DataNode.AsMap()) { - auto fixedTaskName = FixTaskName(taskName); - auto& data = (*output)[state][fixedTaskName.Get()]; - data.Max = getInt(dataNode, "max"); - data.Min = getInt(dataNode, "min"); - data.Sum = getInt(dataNode, "sum"); - data.Count = getInt(dataNode, "count"); - } - } - } - - static void ParseNode(const TNode& node, const TString& curPath, TName2State2TaskName2Data* output) - { - Y_VERIFY(node.IsMap()); - - for (const auto& [key, value] : node.AsMap()) { - if (key == "$"sv) { - ParseNode(value, &(*output)[curPath]); - } else { - TString childPath = curPath; - if (!childPath.empty()) { - childPath.push_back('/'); - } - if (key.find_first_of('/') != key.npos) { - TString keyCopy(key); - SubstGlobal(keyCopy, "/", "\\/"); - childPath += keyCopy; - } else { - childPath += key; - } - ParseNode(value, childPath, output); - } - } - } -}; - -//////////////////////////////////////////////////////////////////// - -struct TJobStatistics::TFilter - : public TThrRefBase -{ - TVector<TTaskName> TaskNameFilter; - TVector<EJobState> JobStateFilter = {EJobState::Completed}; -}; - -//////////////////////////////////////////////////////////////////// - -const TString TJobStatistics::CustomStatisticsNamePrefix_ = "custom/"; - -TJobStatistics::TJobStatistics() - : Data_(::MakeIntrusive<TData>()) - , Filter_(::MakeIntrusive<TFilter>()) -{ } - - -TJobStatistics::TJobStatistics(const NYT::TNode& statisticsNode) - : Data_(::MakeIntrusive<TData>(statisticsNode)) - , Filter_(::MakeIntrusive<TFilter>()) -{ } - -TJobStatistics::TJobStatistics(::TIntrusivePtr<TData> data, ::TIntrusivePtr<TFilter> filter) - : Data_(data) - , Filter_(::MakeIntrusive<TFilter>(*filter)) -{ } - -TJobStatistics::TJobStatistics(const TJobStatistics& jobStatistics) = default; -TJobStatistics::TJobStatistics(TJobStatistics&&) = default; - -TJobStatistics& TJobStatistics::operator=(const TJobStatistics& jobStatistics) = default; -TJobStatistics& TJobStatistics::operator=(TJobStatistics&& jobStatistics) = default; - -TJobStatistics::~TJobStatistics() = default; - -TJobStatistics TJobStatistics::TaskName(TVector<TTaskName> taskNames) const -{ - auto newFilter = ::MakeIntrusive<TFilter>(*Filter_); - newFilter->TaskNameFilter = std::move(taskNames); - return TJobStatistics(Data_, std::move(newFilter)); -} - -TJobStatistics TJobStatistics::JobState(TVector<EJobState> jobStates) const -{ - auto newFilter = ::MakeIntrusive<TFilter>(*Filter_); - newFilter->JobStateFilter = std::move(jobStates); - return TJobStatistics(Data_, std::move(newFilter)); -} - -TJobStatistics TJobStatistics::JobType(TVector<EJobType> jobTypes) const -{ - TVector<TTaskName> taskNames; - for (auto jobType : jobTypes) { - taskNames.push_back(JobTypeToTaskName(jobType)); - } - return TaskName(std::move(taskNames)); -} - -bool TJobStatistics::HasStatistics(TStringBuf name) const -{ - return Data_->Name2State2TaskName2Data.contains(name); -} - -TJobStatisticsEntry<i64> TJobStatistics::GetStatistics(TStringBuf name) const -{ - return GetStatisticsAs<i64>(name); -} - -TVector<TString> TJobStatistics::GetStatisticsNames() const -{ - TVector<TString> result; - result.reserve(Data_->Name2State2TaskName2Data.size()); - for (const auto& entry : Data_->Name2State2TaskName2Data) { - result.push_back(entry.first); - } - return result; -} - -bool TJobStatistics::HasCustomStatistics(TStringBuf name) const -{ - return HasStatistics(CustomStatisticsNamePrefix_ + name); -} - -TJobStatisticsEntry<i64> TJobStatistics::GetCustomStatistics(TStringBuf name) const -{ - return GetCustomStatisticsAs<i64>(name); -} - -TVector<TString> TJobStatistics::GetCustomStatisticsNames() const -{ - TVector<TString> result; - for (const auto& entry : Data_->Name2State2TaskName2Data) { - if (entry.first.StartsWith(CustomStatisticsNamePrefix_)) { - result.push_back(entry.first.substr(CustomStatisticsNamePrefix_.size())); - } - } - return result; -} - -TMaybe<TJobStatistics::TDataEntry> TJobStatistics::GetStatisticsImpl(TStringBuf name) const -{ - auto name2State2TaskName2DataIt = Data_->Name2State2TaskName2Data.find(name); - Y_ENSURE( - name2State2TaskName2DataIt != Data_->Name2State2TaskName2Data.end(), - "Statistics '" << name << "' are missing"); - const auto& state2TaskName2Data = name2State2TaskName2DataIt->second; - - TMaybe<TDataEntry> result; - auto aggregate = [&] (const TDataEntry& data) { - if (result) { - TData::Aggregate(&result.GetRef(), data); - } else { - result = data; - } - }; - - auto aggregateTaskName2Data = [&] (const TData::TTaskName2Data& taskName2Data) { - if (Filter_->TaskNameFilter.empty()) { - for (const auto& [taskName, data] : taskName2Data) { - aggregate(data); - } - } else { - for (const auto& taskName : Filter_->TaskNameFilter) { - auto it = taskName2Data.find(taskName.Get()); - if (it == taskName2Data.end()) { - continue; - } - const auto& data = it->second; - aggregate(data); - } - } - }; - - if (Filter_->JobStateFilter.empty()) { - for (const auto& [state, taskName2Data] : state2TaskName2Data) { - aggregateTaskName2Data(taskName2Data); - } - } else { - for (auto state : Filter_->JobStateFilter) { - auto it = state2TaskName2Data.find(state); - if (it == state2TaskName2Data.end()) { - continue; - } - const auto& taskName2Data = it->second; - aggregateTaskName2Data(taskName2Data); - } - } - - return result; -} - -//////////////////////////////////////////////////////////////////// - -namespace { - -constexpr int USER_STATISTICS_FILE_DESCRIPTOR = 5; -constexpr char PATH_DELIMITER = '/'; -constexpr char ESCAPE = '\\'; - -IOutputStream* GetStatisticsStream() -{ - static TFile file = Duplicate(USER_STATISTICS_FILE_DESCRIPTOR); - static TFileOutput stream(file); - return &stream; -} - -template <typename T> -void WriteCustomStatisticsAny(TStringBuf path, const T& value) -{ - ::NYson::TYsonWriter writer(GetStatisticsStream(), NYson::EYsonFormat::Binary, ::NYson::EYsonType::ListFragment); - int depth = 0; - size_t begin = 0; - size_t end = 0; - TVector<TString> items; - while (end <= path.size()) { - if (end + 1 < path.size() && path[end] == ESCAPE && path[end + 1] == PATH_DELIMITER) { - end += 2; - continue; - } - if (end == path.size() || path[end] == PATH_DELIMITER) { - writer.OnBeginMap(); - items.emplace_back(path.data() + begin, end - begin); - SubstGlobal(items.back(), "\\/", "/"); - writer.OnKeyedItem(TStringBuf(items.back())); - ++depth; - begin = end + 1; - } - ++end; - } - Serialize(value, &writer); - while (depth > 0) { - writer.OnEndMap(); - --depth; - } -} - -} - -//////////////////////////////////////////////////////////////////// - -void WriteCustomStatistics(const TNode& statistics) -{ - ::NYson::TYsonWriter writer(GetStatisticsStream(), NYson::EYsonFormat::Binary, ::NYson::EYsonType::ListFragment); - Serialize(statistics, &writer); -} - -void WriteCustomStatistics(TStringBuf path, i64 value) -{ - WriteCustomStatisticsAny(path, value); -} - -void FlushCustomStatisticsStream() { - GetStatisticsStream()->Flush(); -} -//////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/job_statistics.h b/yt/cpp/mapreduce/interface/job_statistics.h deleted file mode 100644 index 8af751604f..0000000000 --- a/yt/cpp/mapreduce/interface/job_statistics.h +++ /dev/null @@ -1,268 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/job_statistics.h -/// -/// Header containing classes and utility functions to work with -/// [job statistics](https://docs.yandex-team.ru/yt/problems/jobstatistics). - -#include "fwd.h" - -#include <library/cpp/yson/node/node.h> - -#include <util/system/defaults.h> -#include <util/generic/maybe.h> -#include <util/generic/ptr.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////// - -/// -/// @brief Convert i64 representation of statistics to other type. -/// -/// Library defines this template for types TDuration and i64. -/// Users may define it for their types. -/// -/// @see @ref NYT::TJobStatistics::GetStatisticsAs method. -template <typename T> -T ConvertJobStatisticsEntry(i64 value); - -//////////////////////////////////////////////////////////////////// - -/// Class representing a collection of job statistics. -class TJobStatistics -{ -public: - /// - /// Construct empty statistics. - TJobStatistics(); - - /// - /// Construct statistics from statistics node. - TJobStatistics(const NYT::TNode& statistics); - - TJobStatistics(const TJobStatistics& jobStatistics); - TJobStatistics(TJobStatistics&& jobStatistics); - - TJobStatistics& operator=(const TJobStatistics& jobStatistics); - TJobStatistics& operator=(TJobStatistics&& jobStatistics); - - ~TJobStatistics(); - - /// - /// @brief Filter statistics by task name. - /// - /// @param taskNames What task names to include (empty means all). - TJobStatistics TaskName(TVector<TTaskName> taskNames) const; - - /// - /// @brief Filter statistics by job state. - /// - /// @param filter What job states to include (empty means all). - /// - /// @note Default statistics include only (successfully) completed jobs. - TJobStatistics JobState(TVector<EJobState> filter) const; - - /// - /// @brief Filter statistics by job type. - /// - /// @param filter What job types to include (empty means all). - /// - /// @deprecated Use @ref TJobStatistics::TaskName instead. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/jobs#obshaya-shema - TJobStatistics JobType(TVector<EJobType> filter) const; - - /// - /// @brief Check that given statistics exist. - /// - /// @param name Slash separated statistics name, e.g. "time/total" (like it appears in web interface). - bool HasStatistics(TStringBuf name) const; - - /// - /// @brief Get statistics by name. - /// - /// @param name Slash separated statistics name, e.g. "time/total" (like it appears in web interface). - /// - /// @note If statistics is missing an exception is thrown. If because of filters - /// no fields remain the returned value is empty (all fields are `Nothing`). - /// - /// @note We don't use `TMaybe<TJobStatisticsEntry>` here; - /// instead, @ref NYT::TJobStatisticsEntry methods return `TMaybe<i64>`, - /// so user easier use `.GetOrElse`: - /// ``` - /// jobStatistics.GetStatistics("some/statistics/name").Max().GetOrElse(0); - /// ``` - TJobStatisticsEntry<i64> GetStatistics(TStringBuf name) const; - - /// - /// @brief Get statistics by name. - /// - /// @param name Slash separated statistics name, e.g. "time/total" (like it appears in web interface). - /// - /// @note In order to use `GetStatisticsAs` method, @ref NYT::ConvertJobStatisticsEntry function must be defined - /// (the library defines it for `i64` and `TDuration`, user may define it for other types). - template <typename T> - TJobStatisticsEntry<T> GetStatisticsAs(TStringBuf name) const; - - /// - /// Get (slash separated) names of statistics. - TVector<TString> GetStatisticsNames() const; - - /// - /// @brief Check if given custom statistics exists. - /// - /// @param name Slash separated custom statistics name. - bool HasCustomStatistics(TStringBuf name) const; - - /// - /// @brief Get custom statistics (those the user can write in job with @ref NYT::WriteCustomStatistics). - /// - /// @param name Slash separated custom statistics name. - TJobStatisticsEntry<i64> GetCustomStatistics(TStringBuf name) const; - - /// - /// @brief Get custom statistics (those the user can write in job with @ref NYT::WriteCustomStatistics). - /// - /// @param name Slash separated custom statistics name. - template <typename T> - TJobStatisticsEntry<T> GetCustomStatisticsAs(TStringBuf name) const; - - /// - /// Get names of all custom statistics. - TVector<TString> GetCustomStatisticsNames() const; - -private: - class TData; - struct TFilter; - - struct TDataEntry { - i64 Max; - i64 Min; - i64 Sum; - i64 Count; - }; - - static const TString CustomStatisticsNamePrefix_; - -private: - TJobStatistics(::TIntrusivePtr<TData> data, ::TIntrusivePtr<TFilter> filter); - - TMaybe<TDataEntry> GetStatisticsImpl(TStringBuf name) const; - -private: - ::TIntrusivePtr<TData> Data_; - ::TIntrusivePtr<TFilter> Filter_; - -private: - template<typename T> - friend class TJobStatisticsEntry; -}; - -//////////////////////////////////////////////////////////////////// - -/// Class representing single statistic. -template <typename T> -class TJobStatisticsEntry -{ -public: - TJobStatisticsEntry(TMaybe<TJobStatistics::TDataEntry> data) - : Data_(std::move(data)) - { } - - /// Sum of the statistic over all jobs. - TMaybe<T> Sum() const - { - if (Data_) { - return ConvertJobStatisticsEntry<T>(Data_->Sum); - } - return Nothing(); - } - - /// @brief Average of the statistic over all jobs. - /// - /// @note Only jobs that emitted statistics are taken into account. - TMaybe<T> Avg() const - { - if (Data_ && Data_->Count) { - return ConvertJobStatisticsEntry<T>(Data_->Sum / Data_->Count); - } - return Nothing(); - } - - /// @brief Number of jobs that emitted this statistic. - TMaybe<T> Count() const - { - if (Data_) { - return ConvertJobStatisticsEntry<T>(Data_->Count); - } - return Nothing(); - } - - /// @brief Maximum value of the statistic over all jobs. - TMaybe<T> Max() const - { - if (Data_) { - return ConvertJobStatisticsEntry<T>(Data_->Max); - } - return Nothing(); - } - - /// @brief Minimum value of the statistic over all jobs. - TMaybe<T> Min() const - { - if (Data_) { - return ConvertJobStatisticsEntry<T>(Data_->Min); - } - return Nothing(); - } - -private: - TMaybe<TJobStatistics::TDataEntry> Data_; - -private: - friend class TJobStatistics; -}; - -//////////////////////////////////////////////////////////////////// - -template <typename T> -TJobStatisticsEntry<T> TJobStatistics::GetStatisticsAs(TStringBuf name) const -{ - return TJobStatisticsEntry<T>(GetStatisticsImpl(name)); -} - -template <typename T> -TJobStatisticsEntry<T> TJobStatistics::GetCustomStatisticsAs(TStringBuf name) const -{ - return TJobStatisticsEntry<T>(GetStatisticsImpl(CustomStatisticsNamePrefix_ + name)); -} - -//////////////////////////////////////////////////////////////////// - -/// -/// @brief Write [custom statistics](https://yt.yandex-team.ru/docs/description/mr/jobs#user_stats). -/// -/// @param path Slash-separated path (length must not exceed 512 bytes). -/// @param value Value of the statistic. -/// -/// @note The function must be called in job. -/// Total number of statistics (with different paths) must not exceed 128. -void WriteCustomStatistics(TStringBuf path, i64 value); - -/// -/// @brief Write several [custom statistics](https://yt.yandex-team.ru/docs/description/mr/jobs#user_stats) at once. -/// -/// @param statistics A tree of map nodes with leaves of type `i64`. -/// -/// @note The call is equivalent to calling @ref NYT::WriteCustomStatistics(TStringBuf, i64) for every path in the given map. -void WriteCustomStatistics(const TNode& statistics); - -/// -/// @brief Flush [custom statistics stream](https://yt.yandex-team.ru/docs/description/mr/jobs#user_stats) -/// -void FlushCustomStatisticsStream(); -//////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/job_statistics_ut.cpp b/yt/cpp/mapreduce/interface/job_statistics_ut.cpp deleted file mode 100644 index 0cf53d771a..0000000000 --- a/yt/cpp/mapreduce/interface/job_statistics_ut.cpp +++ /dev/null @@ -1,257 +0,0 @@ -#include <yt/cpp/mapreduce/interface/job_statistics.h> -#include <yt/cpp/mapreduce/interface/operation.h> - -#include <library/cpp/yson/node/node_io.h> - -#include <library/cpp/testing/unittest/registar.h> - -using namespace NYT; - -Y_UNIT_TEST_SUITE(JobStatistics) -{ - Y_UNIT_TEST(Simple) - { - const TString input = R"""( - { - "data" = { - "output" = { - "0" = { - "uncompressed_data_size" = { - "$" = { - "completed" = { - "simple_sort" = { - "max" = 130; - "count" = 1; - "min" = 130; - "sum" = 130; - }; - "map" = { - "max" = 42; - "count" = 1; - "min" = 42; - "sum" = 42; - }; - }; - "aborted" = { - "simple_sort" = { - "max" = 24; - "count" = 1; - "min" = 24; - "sum" = 24; - }; - }; - }; - }; - }; - }; - }; - })"""; - - TJobStatistics stat(NodeFromYsonString(input)); - - UNIT_ASSERT(stat.HasStatistics("data/output/0/uncompressed_data_size")); - UNIT_ASSERT(!stat.HasStatistics("nonexistent-statistics")); - UNIT_ASSERT_EXCEPTION_CONTAINS(stat.GetStatistics("BLAH-BLAH"), yexception, "Statistics"); - - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatisticsNames(), TVector<TString>{"data/output/0/uncompressed_data_size"}); - - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Max(), 130); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Count(), 2); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Min(), 42); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Sum(), 172); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Avg(), 172 / 2); - - UNIT_ASSERT_VALUES_EQUAL(stat.JobState({EJobState::Aborted}).GetStatistics("data/output/0/uncompressed_data_size").Sum(), 24); - UNIT_ASSERT_VALUES_EQUAL(stat.JobType({EJobType::Map}).JobState({EJobState::Aborted}).GetStatistics("data/output/0/uncompressed_data_size").Sum(), TMaybe<i64>()); - } - - Y_UNIT_TEST(TestOtherTypes) - { - const TString input = R"""( - { - "time" = { - "exec" = { - "$" = { - "completed" = { - "map" = { - "max" = 2482468; - "count" = 38; - "min" = 578976; - "sum" = 47987270; - }; - }; - }; - }; - }; - })"""; - - TJobStatistics stat(NodeFromYsonString(input)); - - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatisticsAs<TDuration>("time/exec").Max(), TDuration::MilliSeconds(2482468)); - } - - Y_UNIT_TEST(Custom) - { - const TString input = R"""( - { - "custom" = { - "some" = { - "path" = { - "$" = { - "completed" = { - "map" = { - "max" = -1; - "count" = 1; - "min" = -1; - "sum" = -1; - }; - }; - }; - }; - }; - "another" = { - "path" = { - "$" = { - "completed" = { - "map" = { - "max" = 1001; - "count" = 2; - "min" = 1001; - "sum" = 2002; - }; - }; - }; - }; - }; - }; - })"""; - - TJobStatistics stat(NodeFromYsonString(input)); - - UNIT_ASSERT(stat.HasCustomStatistics("some/path")); - UNIT_ASSERT(!stat.HasCustomStatistics("nonexistent-statistics")); - UNIT_ASSERT_EXCEPTION_CONTAINS(stat.GetCustomStatistics("BLAH-BLAH"), yexception, "Statistics"); - - const auto names = stat.GetCustomStatisticsNames(); - const THashSet<TString> expected = {"some/path", "another/path"}; - UNIT_ASSERT_VALUES_EQUAL(THashSet<TString>(names.begin(), names.end()), expected); - - UNIT_ASSERT_VALUES_EQUAL(stat.GetCustomStatistics("some/path").Max(), -1); - UNIT_ASSERT_VALUES_EQUAL(stat.GetCustomStatistics("another/path").Avg(), 1001); - } - - Y_UNIT_TEST(TaskNames) - { - const TString input = R"""( - { - "data" = { - "output" = { - "0" = { - "uncompressed_data_size" = { - "$" = { - "completed" = { - "partition_map" = { - "max" = 130; - "count" = 1; - "min" = 130; - "sum" = 130; - }; - "partition(0)" = { - "max" = 42; - "count" = 1; - "min" = 42; - "sum" = 42; - }; - }; - "aborted" = { - "simple_sort" = { - "max" = 24; - "count" = 1; - "min" = 24; - "sum" = 24; - }; - }; - }; - }; - }; - }; - }; - })"""; - - TJobStatistics stat(NodeFromYsonString(input)); - - UNIT_ASSERT(stat.HasStatistics("data/output/0/uncompressed_data_size")); - UNIT_ASSERT(!stat.HasStatistics("nonexistent-statistics")); - UNIT_ASSERT_EXCEPTION_CONTAINS(stat.GetStatistics("BLAH-BLAH"), yexception, "Statistics"); - - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatisticsNames(), TVector<TString>{"data/output/0/uncompressed_data_size"}); - - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Max(), 130); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Count(), 2); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Min(), 42); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Sum(), 172); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Avg(), 172 / 2); - - UNIT_ASSERT_VALUES_EQUAL( - stat - .JobState({EJobState::Aborted}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - 24); - UNIT_ASSERT_VALUES_EQUAL( - stat - .JobType({EJobType::Partition}) - .JobState({EJobState::Aborted}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - TMaybe<i64>()); - UNIT_ASSERT_VALUES_EQUAL( - stat - .TaskName({"partition(0)"}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - 42); - UNIT_ASSERT_VALUES_EQUAL( - stat - .TaskName({"partition"}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - TMaybe<i64>()); - UNIT_ASSERT_VALUES_EQUAL( - stat - .TaskName({"partition_map(0)"}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - 130); - UNIT_ASSERT_VALUES_EQUAL( - stat - .JobType({EJobType::Partition}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - 42); - UNIT_ASSERT_VALUES_EQUAL( - stat - .JobType({EJobType::PartitionMap}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - 130); - UNIT_ASSERT_VALUES_EQUAL( - stat - .TaskName({ETaskName::Partition0}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - 42); - UNIT_ASSERT_VALUES_EQUAL( - stat - .TaskName({ETaskName::Partition1}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - TMaybe<i64>()); - UNIT_ASSERT_VALUES_EQUAL( - stat - .TaskName({ETaskName::PartitionMap0}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - 130); - } -} diff --git a/yt/cpp/mapreduce/interface/logging/logger.cpp b/yt/cpp/mapreduce/interface/logging/logger.cpp deleted file mode 100644 index bfa56b94f6..0000000000 --- a/yt/cpp/mapreduce/interface/logging/logger.cpp +++ /dev/null @@ -1,188 +0,0 @@ -#include "logger.h" - -#include <util/datetime/base.h> - -#include <util/stream/file.h> -#include <util/stream/format.h> -#include <util/stream/printf.h> -#include <util/stream/str.h> - -#include <util/system/mutex.h> -#include <util/system/rwlock.h> -#include <util/system/thread.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -static TStringBuf StripFileName(TStringBuf path) { - TStringBuf l, r; - if (path.TryRSplit('/', l, r) || path.TryRSplit('\\', l, r)) { - return r; - } else { - return path; - } -} - -static char GetLogLevelCode(ILogger::ELevel level) { - switch (level) { - case ILogger::FATAL: return 'F'; - case ILogger::ERROR: return 'E'; - case ILogger::INFO: return 'I'; - case ILogger::DEBUG: return 'D'; - } - Y_UNREACHABLE(); -} - -//////////////////////////////////////////////////////////////////////////////// - -class TNullLogger - : public ILogger -{ -public: - void Log(ELevel level, const TSourceLocation& sourceLocation, const char* format, va_list args) override - { - Y_UNUSED(level); - Y_UNUSED(sourceLocation); - Y_UNUSED(format); - Y_UNUSED(args); - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TLoggerBase - : public ILogger -{ -public: - TLoggerBase(ELevel cutLevel) - : CutLevel_(cutLevel) - { } - - virtual void OutputLine(const TString& line) = 0; - - void Log(ELevel level, const TSourceLocation& sourceLocation, const char* format, va_list args) override - { - if (level > CutLevel_) { - return; - } - - TStringStream stream; - stream << TInstant::Now().ToStringLocal() - << " " << GetLogLevelCode(level) - << " [" << Hex(TThread::CurrentThreadId(), HF_FULL) << "] "; - Printf(stream, format, args); - stream << " - " << StripFileName(sourceLocation.File) << ':' << sourceLocation.Line << Endl; - - TGuard<TMutex> guard(Mutex_); - OutputLine(stream.Str()); - } - -private: - ELevel CutLevel_; - TMutex Mutex_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TStdErrLogger - : public TLoggerBase -{ -public: - TStdErrLogger(ELevel cutLevel) - : TLoggerBase(cutLevel) - { } - - void OutputLine(const TString& line) override - { - Cerr << line; - } -}; - -ILoggerPtr CreateStdErrLogger(ILogger::ELevel cutLevel) -{ - return new TStdErrLogger(cutLevel); -} - -//////////////////////////////////////////////////////////////////////////////// - -class TFileLogger - : public TLoggerBase -{ -public: - TFileLogger(ELevel cutLevel, const TString& path, bool append) - : TLoggerBase(cutLevel) - , Stream_(TFile(path, OpenAlways | WrOnly | Seq | (append ? ForAppend : EOpenMode()))) - { } - - void OutputLine(const TString& line) override - { - Stream_ << line; - } - -private: - TUnbufferedFileOutput Stream_; -}; - -ILoggerPtr CreateFileLogger(ILogger::ELevel cutLevel, const TString& path, bool append) -{ - return new TFileLogger(cutLevel, path, append); -} -//////////////////////////////////////////////////////////////////////////////// - -class TBufferedFileLogger - : public TLoggerBase -{ -public: - TBufferedFileLogger(ELevel cutLevel, const TString& path, bool append) - : TLoggerBase(cutLevel) - , Stream_(TFile(path, OpenAlways | WrOnly | Seq | (append ? ForAppend : EOpenMode()))) - { } - - void OutputLine(const TString& line) override - { - Stream_ << line; - } - -private: - TFileOutput Stream_; -}; - -ILoggerPtr CreateBufferedFileLogger(ILogger::ELevel cutLevel, const TString& path, bool append) -{ - return new TBufferedFileLogger(cutLevel, path, append); -} - -//////////////////////////////////////////////////////////////////////////////// - -static TRWMutex LoggerMutex; -static ILoggerPtr Logger; - -struct TLoggerInitializer -{ - TLoggerInitializer() - { - Logger = new TNullLogger; - } -} LoggerInitializer; - -void SetLogger(ILoggerPtr logger) -{ - auto guard = TWriteGuard(LoggerMutex); - if (logger) { - Logger = logger; - } else { - Logger = new TNullLogger; - } -} - -ILoggerPtr GetLogger() -{ - auto guard = TReadGuard(LoggerMutex); - return Logger; -} - -//////////////////////////////////////////////////////////////////////////////// - -} - diff --git a/yt/cpp/mapreduce/interface/logging/logger.h b/yt/cpp/mapreduce/interface/logging/logger.h deleted file mode 100644 index 2b5aae87d1..0000000000 --- a/yt/cpp/mapreduce/interface/logging/logger.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -#include <util/generic/ptr.h> -#include <util/generic/string.h> -#include <util/system/compat.h> -#include <util/system/src_location.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class ILogger - : public TThrRefBase -{ -public: - enum ELevel - { - FATAL /* "fatal", "FATAL" */, - // We don't have such level as `warning', but we support it for compatibility with other APIs. - ERROR /* "error", "warning", "ERROR", "WARNING" */, - INFO /* "info", "INFO" */, - DEBUG /* "debug", "DEBUG" */ - }; - - virtual void Log(ELevel level, const ::TSourceLocation& sourceLocation, const char* format, va_list args) = 0; -}; - -using ILoggerPtr = ::TIntrusivePtr<ILogger>; - -void SetLogger(ILoggerPtr logger); -ILoggerPtr GetLogger(); - -ILoggerPtr CreateStdErrLogger(ILogger::ELevel cutLevel); -ILoggerPtr CreateFileLogger(ILogger::ELevel cutLevel, const TString& path, bool append = false); - -/** - * Create logger that writes to a file in a buffered manner. - * It should result in fewer system calls (useful if you expect a lot of log messages), - * but in case of a crash, you would lose some log messages that haven't been flushed yet. - */ -ILoggerPtr CreateBufferedFileLogger(ILogger::ELevel cutLevel, const TString& path, bool append = false); - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/logging/ya.make b/yt/cpp/mapreduce/interface/logging/ya.make deleted file mode 100644 index 8095bfe4ba..0000000000 --- a/yt/cpp/mapreduce/interface/logging/ya.make +++ /dev/null @@ -1,16 +0,0 @@ -LIBRARY() - -INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc) - -SRCS( - logger.cpp - yt_log.cpp -) - -PEERDIR( - library/cpp/yt/logging -) - -GENERATE_ENUM_SERIALIZATION(logger.h) - -END() diff --git a/yt/cpp/mapreduce/interface/logging/yt_log.cpp b/yt/cpp/mapreduce/interface/logging/yt_log.cpp deleted file mode 100644 index 9fa7b91580..0000000000 --- a/yt/cpp/mapreduce/interface/logging/yt_log.cpp +++ /dev/null @@ -1,126 +0,0 @@ -#include "yt_log.h" - -#include "logger.h" - -#include <util/generic/guid.h> - -#include <util/system/mutex.h> - -namespace NYT { - -using namespace NLogging; - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -class TLogManager - : public ILogManager -{ -public: - static constexpr TStringBuf CategoryName = "Wrapper"; - -public: - void RegisterStaticAnchor( - TLoggingAnchor* anchor, - ::TSourceLocation sourceLocation, - TStringBuf anchorMessage) override - { - if (anchor->Registered.exchange(true)) { - return; - } - - anchor->Enabled.store(true); - - auto guard = Guard(Mutex_); - anchor->SourceLocation = sourceLocation; - anchor->AnchorMessage = anchorMessage; - } - - void UpdateAnchor(TLoggingAnchor* /*position*/) override - { } - - void Enqueue(TLogEvent&& event) override - { - auto message = TString(event.MessageRef.ToStringBuf()); - LogMessage( - ToImplLevel(event.Level), - ::TSourceLocation(event.SourceFile, event.SourceLine), - "%.*s", - event.MessageRef.size(), - event.MessageRef.begin()); - } - - const TLoggingCategory* GetCategory(TStringBuf categoryName) override - { - Y_VERIFY(categoryName == CategoryName); - return &Category_; - } - - void UpdateCategory(TLoggingCategory* /*category*/) override - { - Y_FAIL(); - } - - bool GetAbortOnAlert() const override - { - return false; - } - -private: - static ILogger::ELevel ToImplLevel(ELogLevel level) - { - switch (level) { - case ELogLevel::Minimum: - case ELogLevel::Trace: - case ELogLevel::Debug: - return ILogger::ELevel::DEBUG; - case ELogLevel::Info: - return ILogger::ELevel::INFO; - case ELogLevel::Warning: - case ELogLevel::Error: - return ILogger::ELevel::ERROR; - case ELogLevel::Alert: - case ELogLevel::Fatal: - case ELogLevel::Maximum: - return ILogger::ELevel::FATAL; - } - } - - static void LogMessage(ILogger::ELevel level, const ::TSourceLocation& sourceLocation, const char* format, ...) - { - va_list args; - va_start(args, format); - GetLogger()->Log(level, sourceLocation, format, args); - va_end(args); - } - -private: - ::TMutex Mutex_; - std::atomic<int> ActualVersion_{1}; - const TLoggingCategory Category_{ - .Name{CategoryName}, - .MinPlainTextLevel{ELogLevel::Minimum}, - .CurrentVersion{1}, - .ActualVersion = &ActualVersion_, - }; -}; - -TLogManager LogManager; - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - -TLogger Logger(&LogManager, TLogManager::CategoryName); - -//////////////////////////////////////////////////////////////////////////////// - -void FormatValue(TStringBuilderBase* builder, const TGUID& value, TStringBuf /*format*/) -{ - builder->AppendString(GetGuidAsString(value)); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/logging/yt_log.h b/yt/cpp/mapreduce/interface/logging/yt_log.h deleted file mode 100644 index 4cf93a6ba1..0000000000 --- a/yt/cpp/mapreduce/interface/logging/yt_log.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include <library/cpp/yt/logging/logger.h> - -struct TGUID; - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -extern NLogging::TLogger Logger; - -void FormatValue(TStringBuilderBase* builder, const TGUID& value, TStringBuf format); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/mpl.h b/yt/cpp/mapreduce/interface/mpl.h deleted file mode 100644 index 9865e28b6c..0000000000 --- a/yt/cpp/mapreduce/interface/mpl.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once - -#include "fwd.h" - -#include <tuple> -#include <type_traits> - -namespace NYT { - -/// @cond Doxygen_Suppress - -//////////////////////////////////////////////////////////////////////////////// - -template <class TBase, class TDerived> -struct TIsBaseOf -{ - static constexpr bool Value = std::is_base_of_v<TBase, TDerived> && !std::is_same_v<TBase, TDerived>; -}; - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -template <class T, class Tuple> -struct TIndexInTuple; - -template <class T, class... Types> -struct TIndexInTuple<T, std::tuple<T, Types...>> -{ - static constexpr int Value = 0; -}; - -template <class T> -struct TIndexInTuple<T, std::tuple<>> -{ - static constexpr int Value = 0; -}; - -template <class T, class U, class... Types> -struct TIndexInTuple<T, std::tuple<U, Types...>> -{ - static constexpr int Value = 1 + TIndexInTuple<T, std::tuple<Types...>>::Value; -}; - -template <class T, class TTuple> -constexpr bool DoesTupleContainType = (TIndexInTuple<T, TTuple>::Value < std::tuple_size<TTuple>{}); - -template <class TOut, class TIn = std::tuple<>> -struct TUniqueTypes; - -template <class... TOut, class TInCar, class... TInCdr> -struct TUniqueTypes<std::tuple<TOut...>, std::tuple<TInCar, TInCdr...>> -{ - using TType = std::conditional_t< - DoesTupleContainType<TInCar, std::tuple<TOut...>>, - typename TUniqueTypes<std::tuple<TOut...>, std::tuple<TInCdr...>>::TType, - typename TUniqueTypes<std::tuple<TOut..., TInCar>, std::tuple<TInCdr...>>::TType - >; -}; - -template <class TOut> -struct TUniqueTypes<TOut, std::tuple<>> -{ - using TType = TOut; -}; - -} // namespace NDetail - -/// @endcond Doxygen_Suppress - -//////////////////////////////////////////////////////////////////////////////// - -} diff --git a/yt/cpp/mapreduce/interface/node.h b/yt/cpp/mapreduce/interface/node.h deleted file mode 100644 index fece1b36de..0000000000 --- a/yt/cpp/mapreduce/interface/node.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -// Backward compatibility -#include "fwd.h" -#include <library/cpp/yson/node/node.h> - - diff --git a/yt/cpp/mapreduce/interface/operation-inl.h b/yt/cpp/mapreduce/interface/operation-inl.h deleted file mode 100644 index 8d53cd446f..0000000000 --- a/yt/cpp/mapreduce/interface/operation-inl.h +++ /dev/null @@ -1,928 +0,0 @@ -#pragma once - -#ifndef OPERATION_INL_H_ -#error "Direct inclusion of this file is not allowed, use operation.h" -#include "operation.h" -#endif -#undef OPERATION_INL_H_ - -#include "errors.h" - -#include <util/generic/bt_exception.h> -#include <util/generic/singleton.h> -#include <util/system/type_name.h> - -#include <util/stream/file.h> -#include <util/stream/buffer.h> -#include <util/string/subst.h> - -#include <typeindex> - -namespace NYT { - -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -template<class T> -void Assign(TVector<T>& array, size_t idx, const T& value) { - array.resize(std::max(array.size(), idx + 1)); - array[idx] = value; -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TRow> -TStructuredRowStreamDescription GetStructuredRowStreamDescription() -{ - if constexpr (std::is_same_v<TRow, NYT::TNode>) { - return TTNodeStructuredRowStream{}; - } else if constexpr (std::is_same_v<TRow, NYT::TYaMRRow>) { - return TTYaMRRowStructuredRowStream{}; - } else if constexpr (std::is_same_v<::google::protobuf::Message, TRow>) { - return TProtobufStructuredRowStream{nullptr}; - } else if constexpr (TIsBaseOf<::google::protobuf::Message, TRow>::Value) { - return TProtobufStructuredRowStream{TRow::descriptor()}; - } else if constexpr (TIsProtoOneOf<TRow>::value) { - return TProtobufStructuredRowStream{nullptr}; - } else { - static_assert(TDependentFalse<TRow>, "Unknown row type"); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TRow> -TStructuredTablePath Structured(TRichYPath richYPath) -{ - return TStructuredTablePath(std::move(richYPath), StructuredTableDescription<TRow>()); -} - -template <typename TRow> -TTableStructure StructuredTableDescription() -{ - if constexpr (std::is_same_v<TRow, NYT::TNode>) { - return TUnspecifiedTableStructure{}; - } else if constexpr (std::is_same_v<TRow, NYT::TYaMRRow>) { - return TUnspecifiedTableStructure{}; - } else if constexpr (std::is_base_of_v<::google::protobuf::Message, TRow>) { - if constexpr (std::is_same_v<::google::protobuf::Message, TRow>) { - static_assert(TDependentFalse<TRow>, "Cannot use ::google::protobuf::Message as table descriptor"); - } else { - return TProtobufTableStructure{TRow::descriptor()}; - } - } else { - static_assert(TDependentFalse<TRow>, "Unknown row type"); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TDerived> -TDerived& TRawOperationIoTableSpec<TDerived>::AddInput(const TRichYPath& path) -{ - Inputs_.push_back(path); - return static_cast<TDerived&>(*this); -} - -template <typename TDerived> -TDerived& TRawOperationIoTableSpec<TDerived>::SetInput(size_t tableIndex, const TRichYPath& path) -{ - NDetail::Assign(Inputs_, tableIndex, path); -} - -template <typename TDerived> -TDerived& TRawOperationIoTableSpec<TDerived>::AddOutput(const TRichYPath& path) -{ - Outputs_.push_back(path); - return static_cast<TDerived&>(*this); -} - -template <typename TDerived> -TDerived& TRawOperationIoTableSpec<TDerived>::SetOutput(size_t tableIndex, const TRichYPath& path) -{ - NDetail::Assign(Outputs_, tableIndex, path); -} - -template <typename TDerived> -const TVector<TRichYPath>& TRawOperationIoTableSpec<TDerived>::GetInputs() const -{ - return Inputs_; -} - -template <typename TDerived> -const TVector<TRichYPath>& TRawOperationIoTableSpec<TDerived>::GetOutputs() const -{ - return Outputs_; -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TDerived> -TDerived& TRawMapReduceOperationIoSpec<TDerived>::AddMapOutput(const TRichYPath& path) -{ - MapOutputs_.push_back(path); - return static_cast<TDerived&>(*this); -} - -template <typename TDerived> -TDerived& TRawMapReduceOperationIoSpec<TDerived>::SetMapOutput(size_t tableIndex, const TRichYPath& path) -{ - NDetail::Assign(MapOutputs_, tableIndex, path); -} - -template <typename TDerived> -const TVector<TRichYPath>& TRawMapReduceOperationIoSpec<TDerived>::GetMapOutputs() const -{ - return MapOutputs_; -} - -//////////////////////////////////////////////////////////////////////////////// - -::TIntrusivePtr<INodeReaderImpl> CreateJobNodeReader(TRawTableReaderPtr rawTableReader); -::TIntrusivePtr<IYaMRReaderImpl> CreateJobYaMRReader(TRawTableReaderPtr rawTableReader); -::TIntrusivePtr<IProtoReaderImpl> CreateJobProtoReader(TRawTableReaderPtr rawTableReader); - -::TIntrusivePtr<INodeWriterImpl> CreateJobNodeWriter(THolder<IProxyOutput> rawTableWriter); -::TIntrusivePtr<IYaMRWriterImpl> CreateJobYaMRWriter(THolder<IProxyOutput> rawTableWriter); -::TIntrusivePtr<IProtoWriterImpl> CreateJobProtoWriter(THolder<IProxyOutput> rawTableWriter); - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -inline ::TIntrusivePtr<typename TRowTraits<T>::IReaderImpl> CreateJobReaderImpl(TRawTableReaderPtr rawTableReader); - -template <> -inline ::TIntrusivePtr<INodeReaderImpl> CreateJobReaderImpl<TNode>(TRawTableReaderPtr rawTableReader) -{ - return CreateJobNodeReader(rawTableReader); -} - -template <> -inline ::TIntrusivePtr<IYaMRReaderImpl> CreateJobReaderImpl<TYaMRRow>(TRawTableReaderPtr rawTableReader) -{ - return CreateJobYaMRReader(rawTableReader); -} - -template <> -inline ::TIntrusivePtr<IProtoReaderImpl> CreateJobReaderImpl<Message>(TRawTableReaderPtr rawTableReader) -{ - return CreateJobProtoReader(rawTableReader); -} - -template <class T> -inline ::TIntrusivePtr<typename TRowTraits<T>::IReaderImpl> CreateJobReaderImpl(TRawTableReaderPtr rawTableReader) -{ - if constexpr (TIsBaseOf<Message, T>::Value || NDetail::TIsProtoOneOf<T>::value) { - return CreateJobProtoReader(rawTableReader); - } else { - static_assert(TDependentFalse<T>, "Unknown row type"); - } -} - -template <class T> -inline TTableReaderPtr<T> CreateJobReader(TRawTableReaderPtr rawTableReader) -{ - return new TTableReader<T>(CreateJobReaderImpl<T>(rawTableReader)); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -TTableWriterPtr<T> CreateJobWriter(THolder<IProxyOutput> rawJobWriter); - -template <> -inline TTableWriterPtr<TNode> CreateJobWriter<TNode>(THolder<IProxyOutput> rawJobWriter) -{ - return new TTableWriter<TNode>(CreateJobNodeWriter(std::move(rawJobWriter))); -} - -template <> -inline TTableWriterPtr<TYaMRRow> CreateJobWriter<TYaMRRow>(THolder<IProxyOutput> rawJobWriter) -{ - return new TTableWriter<TYaMRRow>(CreateJobYaMRWriter(std::move(rawJobWriter))); -} - -template <> -inline TTableWriterPtr<Message> CreateJobWriter<Message>(THolder<IProxyOutput> rawJobWriter) -{ - return new TTableWriter<Message>(CreateJobProtoWriter(std::move(rawJobWriter))); -} - -template <class T, class = void> -struct TProtoWriterCreator; - -template <class T> -struct TProtoWriterCreator<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>> -{ - static TTableWriterPtr<T> Create(::TIntrusivePtr<IProtoWriterImpl> writer) - { - return new TTableWriter<T>(writer); - } -}; - -template <class T> -inline TTableWriterPtr<T> CreateJobWriter(THolder<IProxyOutput> rawJobWriter) -{ - if constexpr (TIsBaseOf<Message, T>::Value) { - return TProtoWriterCreator<T>::Create(CreateJobProtoWriter(std::move(rawJobWriter))); - } else { - static_assert(TDependentFalse<T>, "Unknown row type"); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -void TOperationInputSpecBase::AddInput(const TRichYPath& path) -{ - Inputs_.push_back(path); - StructuredInputs_.emplace_back(Structured<T>(path)); -} - -template <class T> -void TOperationInputSpecBase::SetInput(size_t tableIndex, const TRichYPath& path) -{ - NDetail::Assign(Inputs_, tableIndex, path); - NDetail::Assign(StructuredInputs_, tableIndex, Structured<T>(path)); -} - - -template <class T> -void TOperationOutputSpecBase::AddOutput(const TRichYPath& path) -{ - Outputs_.push_back(path); - StructuredOutputs_.emplace_back(Structured<T>(path)); -} - -template <class T> -void TOperationOutputSpecBase::SetOutput(size_t tableIndex, const TRichYPath& path) -{ - NDetail::Assign(Outputs_, tableIndex, path); - NDetail::Assign(StructuredOutputs_, tableIndex, Structured<T>(path)); -} - -template <class TDerived> -template <class T> -TDerived& TOperationIOSpec<TDerived>::AddInput(const TRichYPath& path) -{ - static_assert(!std::is_same<T, Message>::value, "input type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)"); - TOperationInputSpecBase::AddInput<T>(path); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -template <class T> -TDerived& TOperationIOSpec<TDerived>::SetInput(size_t tableIndex, const TRichYPath& path) -{ - static_assert(!std::is_same<T, Message>::value, "input type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)"); - TOperationInputSpecBase::SetInput<T>(tableIndex, path); - return *static_cast<TDerived*>(this); -} - - -template <class TDerived> -template <class T> -TDerived& TOperationIOSpec<TDerived>::AddOutput(const TRichYPath& path) -{ - static_assert(!std::is_same<T, Message>::value, "output type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)"); - TOperationOutputSpecBase::AddOutput<T>(path); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -template <class T> -TDerived& TOperationIOSpec<TDerived>::SetOutput(size_t tableIndex, const TRichYPath& path) -{ - static_assert(!std::is_same<T, Message>::value, "output type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)"); - TOperationOutputSpecBase::SetOutput<T>(tableIndex, path); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -TDerived& TOperationIOSpec<TDerived>::AddStructuredInput(TStructuredTablePath path) -{ - TOperationInputSpecBase::AddStructuredInput(std::move(path)); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -TDerived& TOperationIOSpec<TDerived>::AddStructuredOutput(TStructuredTablePath path) -{ - TOperationOutputSpecBase::AddStructuredOutput(std::move(path)); - return *static_cast<TDerived*>(this); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -TVanillaTask& TVanillaTask::AddOutput(const TRichYPath& path) -{ - static_assert(!std::is_same<T, Message>::value, "output type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)"); - TOperationOutputSpecBase::AddOutput<T>(path); - return *this; -} - -template <class T> -TVanillaTask& TVanillaTask::SetOutput(size_t tableIndex, const TRichYPath& path) -{ - static_assert(!std::is_same<T, Message>::value, "output type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)"); - TOperationOutputSpecBase::SetOutput<T>(tableIndex, path); - return *this; -} - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -void ResetUseClientProtobuf(const char* methodName); - -} // namespace NDetail - -template <class TDerived> -TDerived& TOperationIOSpec<TDerived>::AddProtobufInput_VerySlow_Deprecated(const TRichYPath& path) -{ - NDetail::ResetUseClientProtobuf("AddProtobufInput_VerySlow_Deprecated"); - Inputs_.push_back(path); - StructuredInputs_.emplace_back(TStructuredTablePath(path, TProtobufTableStructure{nullptr})); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -TDerived& TOperationIOSpec<TDerived>::AddProtobufOutput_VerySlow_Deprecated(const TRichYPath& path) -{ - NDetail::ResetUseClientProtobuf("AddProtobufOutput_VerySlow_Deprecated"); - Outputs_.push_back(path); - StructuredOutputs_.emplace_back(TStructuredTablePath(path, TProtobufTableStructure{nullptr})); - return *static_cast<TDerived*>(this); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TRow> -TJobOperationPreparer::TInputGroup& TJobOperationPreparer::TInputGroup::Description() -{ - for (auto i : Indices_) { - Preparer_.InputDescription<TRow>(i); - } - return *this; -} - -template <typename TRow> -TJobOperationPreparer::TOutputGroup& TJobOperationPreparer::TOutputGroup::Description(bool inferSchema) -{ - for (auto i : Indices_) { - Preparer_.OutputDescription<TRow>(i, inferSchema); - } - return *this; -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TCont> -TJobOperationPreparer::TInputGroup TJobOperationPreparer::BeginInputGroup(const TCont& indices) -{ - for (auto i : indices) { - ValidateInputTableIndex(i, TStringBuf("BeginInputGroup()")); - } - return TInputGroup(*this, TVector<int>(std::begin(indices), std::end(indices))); -} - -template <typename TCont> -TJobOperationPreparer::TOutputGroup TJobOperationPreparer::BeginOutputGroup(const TCont& indices) -{ - for (auto i : indices) { - ValidateOutputTableIndex(i, TStringBuf("BeginOutputGroup()")); - } - return TOutputGroup(*this, indices); -} - - -template <typename TRow> -TJobOperationPreparer& TJobOperationPreparer::InputDescription(int tableIndex) -{ - ValidateMissingInputDescription(tableIndex); - InputTableDescriptions_[tableIndex] = StructuredTableDescription<TRow>(); - return *this; -} - -template <typename TRow> -TJobOperationPreparer& TJobOperationPreparer::OutputDescription(int tableIndex, bool inferSchema) -{ - ValidateMissingOutputDescription(tableIndex); - OutputTableDescriptions_[tableIndex] = StructuredTableDescription<TRow>(); - if (inferSchema && !OutputSchemas_[tableIndex]) { - OutputSchemas_[tableIndex] = CreateTableSchema<TRow>(); - } - return *this; -} - -//////////////////////////////////////////////////////////////////////////////// - -template <class TDerived> -template <class TRow> -TDerived& TIntermediateTablesHintSpec<TDerived>::HintMapOutput() -{ - IntermediateMapOutputDescription_ = StructuredTableDescription<TRow>(); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -template <class TRow> -TDerived& TIntermediateTablesHintSpec<TDerived>::AddMapOutput(const TRichYPath& path) -{ - MapOutputs_.push_back(path); - StructuredMapOutputs_.emplace_back(Structured<TRow>(path)); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -template <class TRow> -TDerived& TIntermediateTablesHintSpec<TDerived>::HintReduceCombinerInput() -{ - IntermediateReduceCombinerInputDescription_ = StructuredTableDescription<TRow>(); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -template <class TRow> -TDerived& TIntermediateTablesHintSpec<TDerived>::HintReduceCombinerOutput() -{ - IntermediateReduceCombinerOutputDescription_ = StructuredTableDescription<TRow>(); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -template <class TRow> -TDerived& TIntermediateTablesHintSpec<TDerived>::HintReduceInput() -{ - IntermediateReducerInputDescription_ = StructuredTableDescription<TRow>(); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -const TVector<TStructuredTablePath>& TIntermediateTablesHintSpec<TDerived>::GetStructuredMapOutputs() const -{ - return StructuredMapOutputs_; -} - -template <class TDerived> -const TMaybe<TTableStructure>& TIntermediateTablesHintSpec<TDerived>::GetIntermediateMapOutputDescription() const -{ - return IntermediateMapOutputDescription_; -} - -template <class TDerived> -const TMaybe<TTableStructure>& TIntermediateTablesHintSpec<TDerived>::GetIntermediateReduceCombinerInputDescription() const -{ - return IntermediateReduceCombinerInputDescription_; -} - -template <class TDerived> -const TMaybe<TTableStructure>& TIntermediateTablesHintSpec<TDerived>::GetIntermediateReduceCombinerOutputDescription() const -{ - return IntermediateReduceCombinerOutputDescription_; -} - -template <class TDerived> -const TMaybe<TTableStructure>& TIntermediateTablesHintSpec<TDerived>::GetIntermediateReducerInputDescription() const -{ - return IntermediateReducerInputDescription_; -} - -//////////////////////////////////////////////////////////////////////////////// - -struct TReducerContext -{ - bool Break = false; - static TReducerContext* Get() { return Singleton<TReducerContext>(); } -}; - -template <class TR, class TW> -inline void IReducer<TR, TW>::Break() -{ - TReducerContext::Get()->Break = true; -} - -template <typename TReader, typename TWriter> -void FeedJobInput( - IMapper<TReader, TWriter>* mapper, - typename TRowTraits<typename TReader::TRowType>::IReaderImpl* readerImpl, - TWriter* writer) -{ - using TInputRow = typename TReader::TRowType; - - auto reader = MakeIntrusive<TTableReader<TInputRow>>(readerImpl); - mapper->Do(reader.Get(), writer); -} - -template <typename TReader, typename TWriter> -void FeedJobInput( - IReducer<TReader, TWriter>* reducer, - typename TRowTraits<typename TReader::TRowType>::IReaderImpl* readerImpl, - TWriter* writer) -{ - using TInputRow = typename TReader::TRowType; - - auto rangesReader = MakeIntrusive<TTableRangesReader<TInputRow>>(readerImpl); - for (; rangesReader->IsValid(); rangesReader->Next()) { - reducer->Do(&rangesReader->GetRange(), writer); - if (TReducerContext::Get()->Break) { - break; - } - } -} - -template <typename TReader, typename TWriter> -void FeedJobInput( - IAggregatorReducer<TReader, TWriter>* reducer, - typename TRowTraits<typename TReader::TRowType>::IReaderImpl* readerImpl, - TWriter* writer) -{ - using TInputRow = typename TReader::TRowType; - - auto rangesReader = MakeIntrusive<TTableRangesReader<TInputRow>>(readerImpl); - reducer->Do(rangesReader.Get(), writer); -} - -template <class TRawJob> -int RunRawJob(size_t outputTableCount, IInputStream& jobStateStream) -{ - TRawJobContext context(outputTableCount); - - TRawJob job; - job.Load(jobStateStream); - job.Do(context); - return 0; -} - -template <> -inline int RunRawJob<TCommandRawJob>(size_t /* outputTableCount */, IInputStream& /* jobStateStream */) -{ - Y_FAIL(); -} - -template <class TVanillaJob> -int RunVanillaJob(size_t outputTableCount, IInputStream& jobStateStream) -{ - TVanillaJob job; - job.Load(jobStateStream); - - if constexpr (std::is_base_of<IVanillaJob<>, TVanillaJob>::value) { - Y_VERIFY(outputTableCount == 0, "Void vanilla job expects zero 'outputTableCount'"); - job.Do(); - } else { - Y_VERIFY(outputTableCount, "Vanilla job with table writer expects nonzero 'outputTableCount'"); - using TOutputRow = typename TVanillaJob::TWriter::TRowType; - - THolder<IProxyOutput> rawJobWriter; - if (auto customWriter = job.CreateCustomRawJobWriter(outputTableCount)) { - rawJobWriter = std::move(customWriter); - } else { - rawJobWriter = CreateRawJobWriter(outputTableCount); - } - auto writer = CreateJobWriter<TOutputRow>(std::move(rawJobWriter)); - - job.Start(writer.Get()); - job.Do(writer.Get()); - job.Finish(writer.Get()); - - writer->Finish(); - } - return 0; -} - -template <> -inline int RunVanillaJob<TCommandVanillaJob>(size_t /* outputTableCount */, IInputStream& /* jobStateStream */) -{ - Y_FAIL(); -} - -template <class TJob> - requires TIsBaseOf<IStructuredJob, TJob>::Value -int RunJob(size_t outputTableCount, IInputStream& jobStateStream) -{ - using TInputRow = typename TJob::TReader::TRowType; - using TOutputRow = typename TJob::TWriter::TRowType; - - auto job = MakeIntrusive<TJob>(); - job->Load(jobStateStream); - - TRawTableReaderPtr rawJobReader; - if (auto customReader = job->CreateCustomRawJobReader(/*fd*/ 0)) { - rawJobReader = customReader; - } else { - rawJobReader = CreateRawJobReader(/*fd*/ 0); - } - auto readerImpl = CreateJobReaderImpl<TInputRow>(rawJobReader); - - // Many users don't expect to have jobs with empty input so we skip such jobs. - if (!readerImpl->IsValid()) { - return 0; - } - - THolder<IProxyOutput> rawJobWriter; - if (auto customWriter = job->CreateCustomRawJobWriter(outputTableCount)) { - rawJobWriter = std::move(customWriter); - } else { - rawJobWriter = CreateRawJobWriter(outputTableCount); - } - auto writer = CreateJobWriter<TOutputRow>(std::move(rawJobWriter)); - - job->Start(writer.Get()); - FeedJobInput(job.Get(), readerImpl.Get(), writer.Get()); - job->Finish(writer.Get()); - - writer->Finish(); - - return 0; -} - -// -// We leave RunMapJob/RunReduceJob/RunAggregatorReducer for backward compatibility, -// some user use them already. :( - -template <class TMapper> -int RunMapJob(size_t outputTableCount, IInputStream& jobStateStream) -{ - return RunJob<TMapper>(outputTableCount, jobStateStream); -} - -template <class TReducer> -int RunReduceJob(size_t outputTableCount, IInputStream& jobStateStream) -{ - return RunJob<TReducer>(outputTableCount, jobStateStream); -} - -template <class TReducer> -int RunAggregatorReducer(size_t outputTableCount, IInputStream& jobStateStream) -{ - return RunJob<TReducer>(outputTableCount, jobStateStream); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T, typename = void> -struct TIsConstructibleFromNode - : std::false_type -{ }; - -template <typename T> -struct TIsConstructibleFromNode<T, std::void_t<decltype(T::FromNode(std::declval<TNode&>()))>> - : std::true_type -{ }; - -template <class TJob> -::TIntrusivePtr<NYT::IStructuredJob> ConstructJobFromNode(const TNode& node) -{ - if constexpr (TIsConstructibleFromNode<TJob>::value) { - Y_ENSURE(node.GetType() != TNode::Undefined, - "job has FromNode method but constructor arguments were not provided"); - return TJob::FromNode(node); - } else { - Y_ENSURE(node.GetType() == TNode::Undefined, - "constructor arguments provided but job does not contain FromNode method"); - return MakeIntrusive<TJob>(); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -using TJobFunction = int (*)(size_t, IInputStream&); -using TConstructJobFunction = ::TIntrusivePtr<NYT::IStructuredJob> (*)(const TNode&); - -class TJobFactory -{ -public: - static TJobFactory* Get() - { - return Singleton<TJobFactory>(); - } - - template <class TJob> - void RegisterJob(const char* name) - { - RegisterJobImpl<TJob>(name, RunJob<TJob>); - JobConstructors[name] = ConstructJobFromNode<TJob>; - } - - template <class TRawJob> - void RegisterRawJob(const char* name) - { - RegisterJobImpl<TRawJob>(name, RunRawJob<TRawJob>); - } - - template <class TVanillaJob> - void RegisterVanillaJob(const char* name) - { - RegisterJobImpl<TVanillaJob>(name, RunVanillaJob<TVanillaJob>); - } - - TString GetJobName(const IJob* job) - { - const auto typeIndex = std::type_index(typeid(*job)); - CheckJobRegistered(typeIndex); - return JobNames[typeIndex]; - } - - TJobFunction GetJobFunction(const char* name) - { - CheckNameRegistered(name); - return JobFunctions[name]; - } - - TConstructJobFunction GetConstructingFunction(const char* name) - { - CheckNameRegistered(name); - return JobConstructors[name]; - } - -private: - TMap<std::type_index, TString> JobNames; - THashMap<TString, TJobFunction> JobFunctions; - THashMap<TString, TConstructJobFunction> JobConstructors; - - template <typename TJob, typename TRunner> - void RegisterJobImpl(const char* name, TRunner runner) { - const auto typeIndex = std::type_index(typeid(TJob)); - CheckNotRegistered(typeIndex, name); - JobNames[typeIndex] = name; - JobFunctions[name] = runner; - } - - void CheckNotRegistered(const std::type_index& typeIndex, const char* name) - { - Y_ENSURE(!JobNames.contains(typeIndex), - "type_info '" << typeIndex.name() << "'" - "is already registered under name '" << JobNames[typeIndex] << "'"); - Y_ENSURE(!JobFunctions.contains(name), - "job with name '" << name << "' is already registered"); - } - - void CheckJobRegistered(const std::type_index& typeIndex) - { - Y_ENSURE(JobNames.contains(typeIndex), - "type_info '" << typeIndex.name() << "' is not registered, use REGISTER_* macros"); - } - - void CheckNameRegistered(const char* name) - { - Y_ENSURE(JobFunctions.contains(name), - "job with name '" << name << "' is not registered, use REGISTER_* macros"); - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <class TMapper> -struct TMapperRegistrator -{ - TMapperRegistrator(const char* name) - { - static_assert(TMapper::JobType == IJob::EType::Mapper, - "REGISTER_MAPPER is not compatible with this job class"); - - NYT::TJobFactory::Get()->RegisterJob<TMapper>(name); - } -}; - -template <class TReducer> -struct TReducerRegistrator -{ - TReducerRegistrator(const char* name) - { - static_assert(TReducer::JobType == IJob::EType::Reducer || - TReducer::JobType == IJob::EType::ReducerAggregator, - "REGISTER_REDUCER is not compatible with this job class"); - - NYT::TJobFactory::Get()->RegisterJob<TReducer>(name); - } -}; - -template <class TRawJob> -struct TRawJobRegistrator -{ - TRawJobRegistrator(const char* name) - { - static_assert(TRawJob::JobType == IJob::EType::RawJob, - "REGISTER_RAW_JOB is not compatible with this job class"); - NYT::TJobFactory::Get()->RegisterRawJob<TRawJob>(name); - } -}; - -template <class TVanillaJob> -struct TVanillaJobRegistrator -{ - TVanillaJobRegistrator(const char* name) - { - static_assert(TVanillaJob::JobType == IJob::EType::VanillaJob, - "REGISTER_VANILLA_JOB is not compatible with this job class"); - NYT::TJobFactory::Get()->RegisterVanillaJob<TVanillaJob>(name); - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -inline TString YtRegistryTypeName(const TString& name) { - TString res = name; -#ifdef _win_ - SubstGlobal(res, "class ", ""); -#endif - return res; -} - -//////////////////////////////////////////////////////////////////////////////// - -#define REGISTER_MAPPER(...) \ -static const NYT::TMapperRegistrator<__VA_ARGS__> \ -Y_GENERATE_UNIQUE_ID(TJobRegistrator)(NYT::YtRegistryTypeName(TypeName<__VA_ARGS__>()).data()); - -#define REGISTER_NAMED_MAPPER(name, ...) \ -static const NYT::TMapperRegistrator<__VA_ARGS__> \ -Y_GENERATE_UNIQUE_ID(TJobRegistrator)(name); - -#define REGISTER_REDUCER(...) \ -static const NYT::TReducerRegistrator<__VA_ARGS__> \ -Y_GENERATE_UNIQUE_ID(TJobRegistrator)(NYT::YtRegistryTypeName(TypeName<__VA_ARGS__>()).data()); - -#define REGISTER_NAMED_REDUCER(name, ...) \ -static const NYT::TReducerRegistrator<__VA_ARGS__> \ -Y_GENERATE_UNIQUE_ID(TJobRegistrator)(name); - -#define REGISTER_NAMED_RAW_JOB(name, ...) \ -static const NYT::TRawJobRegistrator<__VA_ARGS__> \ -Y_GENERATE_UNIQUE_ID(TJobRegistrator)(name); - -#define REGISTER_RAW_JOB(...) \ -REGISTER_NAMED_RAW_JOB((NYT::YtRegistryTypeName(TypeName<__VA_ARGS__>()).data()), __VA_ARGS__) - -#define REGISTER_NAMED_VANILLA_JOB(name, ...) \ -static NYT::TVanillaJobRegistrator<__VA_ARGS__> \ -Y_GENERATE_UNIQUE_ID(TJobRegistrator)(name); - -#define REGISTER_VANILLA_JOB(...) \ -REGISTER_NAMED_VANILLA_JOB((NYT::YtRegistryTypeName(TypeName<__VA_ARGS__>()).data()), __VA_ARGS__) - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TReader, typename TWriter> -TStructuredRowStreamDescription IMapper<TReader, TWriter>::GetInputRowStreamDescription() const -{ - return NYT::NDetail::GetStructuredRowStreamDescription<typename TReader::TRowType>(); -} - -template <typename TReader, typename TWriter> -TStructuredRowStreamDescription IMapper<TReader, TWriter>::GetOutputRowStreamDescription() const -{ - return NYT::NDetail::GetStructuredRowStreamDescription<typename TWriter::TRowType>(); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TReader, typename TWriter> -TStructuredRowStreamDescription IReducer<TReader, TWriter>::GetInputRowStreamDescription() const -{ - return NYT::NDetail::GetStructuredRowStreamDescription<typename TReader::TRowType>(); -} - -template <typename TReader, typename TWriter> -TStructuredRowStreamDescription IReducer<TReader, TWriter>::GetOutputRowStreamDescription() const -{ - return NYT::NDetail::GetStructuredRowStreamDescription<typename TWriter::TRowType>(); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TReader, typename TWriter> -TStructuredRowStreamDescription IAggregatorReducer<TReader, TWriter>::GetInputRowStreamDescription() const -{ - return NYT::NDetail::GetStructuredRowStreamDescription<typename TReader::TRowType>(); -} - -template <typename TReader, typename TWriter> -TStructuredRowStreamDescription IAggregatorReducer<TReader, TWriter>::GetOutputRowStreamDescription() const -{ - return NYT::NDetail::GetStructuredRowStreamDescription<typename TWriter::TRowType>(); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TWriter> -TStructuredRowStreamDescription IVanillaJob<TWriter>::GetInputRowStreamDescription() const -{ - return TVoidStructuredRowStream(); -} - -template <typename TWriter> -TStructuredRowStreamDescription IVanillaJob<TWriter>::GetOutputRowStreamDescription() const -{ - return NYT::NDetail::GetStructuredRowStreamDescription<typename TWriter::TRowType>(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/operation.cpp b/yt/cpp/mapreduce/interface/operation.cpp deleted file mode 100644 index 706fc4caa4..0000000000 --- a/yt/cpp/mapreduce/interface/operation.cpp +++ /dev/null @@ -1,663 +0,0 @@ -#include "operation.h" - -#include <util/generic/iterator_range.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - i64 OutputTableCount = -1; -} // namespace NDetail - -//////////////////////////////////////////////////////////////////////////////// - -TTaskName::TTaskName(TString taskName) - : TaskName_(std::move(taskName)) -{ } - -TTaskName::TTaskName(const char* taskName) - : TaskName_(taskName) -{ } - -TTaskName::TTaskName(ETaskName taskName) - : TaskName_(ToString(taskName)) -{ } - -const TString& TTaskName::Get() const -{ - return TaskName_; -} - -//////////////////////////////////////////////////////////////////////////////// - -TCommandRawJob::TCommandRawJob(TStringBuf command) - : Command_(command) -{ } - -const TString& TCommandRawJob::GetCommand() const -{ - return Command_; -} - -void TCommandRawJob::Do(const TRawJobContext& /* jobContext */) -{ - Y_FAIL("TCommandRawJob::Do must not be called"); -} - -REGISTER_NAMED_RAW_JOB("NYT::TCommandRawJob", TCommandRawJob) - -//////////////////////////////////////////////////////////////////////////////// - -TCommandVanillaJob::TCommandVanillaJob(TStringBuf command) - : Command_(command) -{ } - -const TString& TCommandVanillaJob::GetCommand() const -{ - return Command_; -} - -void TCommandVanillaJob::Do() -{ - Y_FAIL("TCommandVanillaJob::Do must not be called"); -} - -REGISTER_NAMED_VANILLA_JOB("NYT::TCommandVanillaJob", TCommandVanillaJob); - -//////////////////////////////////////////////////////////////////////////////// - -bool operator==(const TUnspecifiedTableStructure&, const TUnspecifiedTableStructure&) -{ - return true; -} - -bool operator==(const TProtobufTableStructure& lhs, const TProtobufTableStructure& rhs) -{ - return lhs.Descriptor == rhs.Descriptor; -} - -//////////////////////////////////////////////////////////////////////////////// - -const TVector<TStructuredTablePath>& TOperationInputSpecBase::GetStructuredInputs() const -{ - return StructuredInputs_; -} - -const TVector<TStructuredTablePath>& TOperationOutputSpecBase::GetStructuredOutputs() const -{ - return StructuredOutputs_; -} - -void TOperationInputSpecBase::AddStructuredInput(TStructuredTablePath path) -{ - Inputs_.push_back(path.RichYPath); - StructuredInputs_.push_back(std::move(path)); -} - -void TOperationOutputSpecBase::AddStructuredOutput(TStructuredTablePath path) -{ - Outputs_.push_back(path.RichYPath); - StructuredOutputs_.push_back(std::move(path)); -} - -//////////////////////////////////////////////////////////////////////////////// - -TVanillaTask& TVanillaTask::AddStructuredOutput(TStructuredTablePath path) -{ - TOperationOutputSpecBase::AddStructuredOutput(std::move(path)); - return *this; -} - -//////////////////////////////////////////////////////////////////////////////// - -TStructuredRowStreamDescription IVanillaJob<void>::GetInputRowStreamDescription() const -{ - return TVoidStructuredRowStream(); -} - -TStructuredRowStreamDescription IVanillaJob<void>::GetOutputRowStreamDescription() const -{ - return TVoidStructuredRowStream(); -} - -//////////////////////////////////////////////////////////////////////////////// - -TRawJobContext::TRawJobContext(size_t outputTableCount) - : InputFile_(Duplicate(0)) -{ - for (size_t i = 0; i != outputTableCount; ++i) { - OutputFileList_.emplace_back(Duplicate(3 * i + 1)); - } -} - -const TFile& TRawJobContext::GetInputFile() const -{ - return InputFile_; -} - -const TVector<TFile>& TRawJobContext::GetOutputFileList() const -{ - return OutputFileList_; -} - -//////////////////////////////////////////////////////////////////////////////// - -TUserJobSpec& TUserJobSpec::AddLocalFile( - const TLocalFilePath& path, - const TAddLocalFileOptions& options) -{ - LocalFiles_.emplace_back(path, options); - return *this; -} - -TUserJobSpec& TUserJobSpec::JobBinaryLocalPath(TString path, TMaybe<TString> md5) -{ - JobBinary_ = TJobBinaryLocalPath{path, md5}; - return *this; -} - -TUserJobSpec& TUserJobSpec::JobBinaryCypressPath(TString path, TMaybe<TTransactionId> transactionId) -{ - JobBinary_ = TJobBinaryCypressPath{path, transactionId}; - return *this; -} - -const TJobBinaryConfig& TUserJobSpec::GetJobBinary() const -{ - return JobBinary_; -} - -TVector<std::tuple<TLocalFilePath, TAddLocalFileOptions>> TUserJobSpec::GetLocalFiles() const -{ - return LocalFiles_; -} - -//////////////////////////////////////////////////////////////////////////////// - -TJobOperationPreparer::TInputGroup::TInputGroup(TJobOperationPreparer& preparer, TVector<int> indices) - : Preparer_(preparer) - , Indices_(std::move(indices)) -{ } - -TJobOperationPreparer::TInputGroup& TJobOperationPreparer::TInputGroup::ColumnRenaming(const THashMap<TString, TString>& renaming) -{ - for (auto i : Indices_) { - Preparer_.InputColumnRenaming(i, renaming); - } - return *this; -} - -TJobOperationPreparer::TInputGroup& TJobOperationPreparer::TInputGroup::ColumnFilter(const TVector<TString>& columns) -{ - for (auto i : Indices_) { - Preparer_.InputColumnFilter(i, columns); - } - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::TInputGroup::EndInputGroup() -{ - return Preparer_; -} - -TJobOperationPreparer::TOutputGroup::TOutputGroup(TJobOperationPreparer& preparer, TVector<int> indices) - : Preparer_(preparer) - , Indices_(std::move(indices)) -{ } - -TJobOperationPreparer::TOutputGroup& TJobOperationPreparer::TOutputGroup::Schema(const TTableSchema &schema) -{ - for (auto i : Indices_) { - Preparer_.OutputSchema(i, schema); - } - return *this; -} - -TJobOperationPreparer::TOutputGroup& TJobOperationPreparer::TOutputGroup::NoSchema() -{ - for (auto i : Indices_) { - Preparer_.NoOutputSchema(i); - } - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::TOutputGroup::EndOutputGroup() -{ - return Preparer_; -} - -//////////////////////////////////////////////////////////////////////////////// - -TJobOperationPreparer::TJobOperationPreparer(const IOperationPreparationContext& context) - : Context_(context) - , OutputSchemas_(context.GetOutputCount()) - , InputColumnRenamings_(context.GetInputCount()) - , InputColumnFilters_(context.GetInputCount()) - , InputTableDescriptions_(context.GetInputCount()) - , OutputTableDescriptions_(context.GetOutputCount()) -{ } - -TJobOperationPreparer::TInputGroup TJobOperationPreparer::BeginInputGroup(int begin, int end) -{ - Y_ENSURE_EX(begin <= end, TApiUsageError() - << "BeginInputGroup(): begin must not exceed end, got " << begin << ", " << end); - TVector<int> indices; - for (int i = begin; i < end; ++i) { - ValidateInputTableIndex(i, TStringBuf("BeginInputGroup()")); - indices.push_back(i); - } - return TInputGroup(*this, std::move(indices)); -} - - -TJobOperationPreparer::TOutputGroup TJobOperationPreparer::BeginOutputGroup(int begin, int end) -{ - Y_ENSURE_EX(begin <= end, TApiUsageError() - << "BeginOutputGroup(): begin must not exceed end, got " << begin << ", " << end); - TVector<int> indices; - for (int i = begin; i < end; ++i) { - ValidateOutputTableIndex(i, TStringBuf("BeginOutputGroup()")); - indices.push_back(i); - } - return TOutputGroup(*this, std::move(indices)); -} - -TJobOperationPreparer& TJobOperationPreparer::NodeOutput(int tableIndex) -{ - ValidateMissingOutputDescription(tableIndex); - OutputTableDescriptions_[tableIndex] = StructuredTableDescription<TNode>(); - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::OutputSchema(int tableIndex, TTableSchema schema) -{ - ValidateMissingOutputSchema(tableIndex); - OutputSchemas_[tableIndex] = std::move(schema); - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::NoOutputSchema(int tableIndex) -{ - ValidateMissingOutputSchema(tableIndex); - OutputSchemas_[tableIndex] = EmptyNonstrictSchema(); - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::InputColumnRenaming( - int tableIndex, - const THashMap<TString,TString>& renaming) -{ - ValidateInputTableIndex(tableIndex, TStringBuf("InputColumnRenaming()")); - InputColumnRenamings_[tableIndex] = renaming; - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::InputColumnFilter(int tableIndex, const TVector<TString>& columns) -{ - ValidateInputTableIndex(tableIndex, TStringBuf("InputColumnFilter()")); - InputColumnFilters_[tableIndex] = columns; - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::FormatHints(TUserJobFormatHints newFormatHints) -{ - FormatHints_ = newFormatHints; - return *this; -} - -void TJobOperationPreparer::Finish() -{ - FinallyValidate(); -} - -TVector<TTableSchema> TJobOperationPreparer::GetOutputSchemas() -{ - TVector<TTableSchema> result; - result.reserve(OutputSchemas_.size()); - for (auto& schema : OutputSchemas_) { - Y_VERIFY(schema.Defined()); - result.push_back(std::move(*schema)); - schema.Clear(); - } - return result; -} - -void TJobOperationPreparer::FinallyValidate() const -{ - TVector<int> illegallyMissingSchemaIndices; - for (int i = 0; i < static_cast<int>(OutputSchemas_.size()); ++i) { - if (!OutputSchemas_[i]) { - illegallyMissingSchemaIndices.push_back(i); - } - } - if (illegallyMissingSchemaIndices.empty()) { - return; - } - TApiUsageError error; - error << "Output table schemas are missing: "; - for (auto i : illegallyMissingSchemaIndices) { - error << "no. " << i; - if (auto path = Context_.GetInputPath(i)) { - error << "(" << *path << ")"; - } - error << "; "; - } - ythrow std::move(error); -} - -//////////////////////////////////////////////////////////////////////////////// - -void TJobOperationPreparer::ValidateInputTableIndex(int tableIndex, TStringBuf message) const -{ - Y_ENSURE_EX( - 0 <= tableIndex && tableIndex < static_cast<int>(Context_.GetInputCount()), - TApiUsageError() << - message << ": input table index " << tableIndex << " us out of range [0;" << - OutputSchemas_.size() << ")"); -} - -void TJobOperationPreparer::ValidateOutputTableIndex(int tableIndex, TStringBuf message) const -{ - Y_ENSURE_EX( - 0 <= tableIndex && tableIndex < static_cast<int>(Context_.GetOutputCount()), - TApiUsageError() << - message << ": output table index " << tableIndex << " us out of range [0;" << - OutputSchemas_.size() << ")"); -} - -void TJobOperationPreparer::ValidateMissingOutputSchema(int tableIndex) const -{ - ValidateOutputTableIndex(tableIndex, "ValidateMissingOutputSchema()"); - Y_ENSURE_EX(!OutputSchemas_[tableIndex], - TApiUsageError() << - "Output table schema no. " << tableIndex << " " << - "(" << Context_.GetOutputPath(tableIndex).GetOrElse("<unknown path>") << ") " << - "is already set"); -} - -void TJobOperationPreparer::ValidateMissingInputDescription(int tableIndex) const -{ - ValidateInputTableIndex(tableIndex, "ValidateMissingInputDescription()"); - Y_ENSURE_EX(!InputTableDescriptions_[tableIndex], - TApiUsageError() << - "Description for input no. " << tableIndex << " " << - "(" << Context_.GetOutputPath(tableIndex).GetOrElse("<unknown path>") << ") " << - "is already set"); -} - -void TJobOperationPreparer::ValidateMissingOutputDescription(int tableIndex) const -{ - ValidateOutputTableIndex(tableIndex, "ValidateMissingOutputDescription()"); - Y_ENSURE_EX(!OutputTableDescriptions_[tableIndex], - TApiUsageError() << - "Description for output no. " << tableIndex << " " << - "(" << Context_.GetOutputPath(tableIndex).GetOrElse("<unknown path>") << ") " << - "is already set"); -} - -TTableSchema TJobOperationPreparer::EmptyNonstrictSchema() { - return TTableSchema().Strict(false); -} - -//////////////////////////////////////////////////////////////////////////////// - -const TVector<THashMap<TString, TString>>& TJobOperationPreparer::GetInputColumnRenamings() const -{ - return InputColumnRenamings_; -} - -const TVector<TMaybe<TVector<TString>>>& TJobOperationPreparer::GetInputColumnFilters() const -{ - return InputColumnFilters_; -} - -const TVector<TMaybe<TTableStructure>>& TJobOperationPreparer::GetInputDescriptions() const -{ - return InputTableDescriptions_; -} - -const TVector<TMaybe<TTableStructure>>& TJobOperationPreparer::GetOutputDescriptions() const -{ - return OutputTableDescriptions_; -} - -const TUserJobFormatHints& TJobOperationPreparer::GetFormatHints() const -{ - return FormatHints_; -} - -TJobOperationPreparer& TJobOperationPreparer::InputFormatHints(TFormatHints hints) -{ - FormatHints_.InputFormatHints(hints); - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::OutputFormatHints(TFormatHints hints) -{ - FormatHints_.OutputFormatHints(hints); - return *this; -} - -//////////////////////////////////////////////////////////////////////////////// - -void IJob::PrepareOperation(const IOperationPreparationContext& context, TJobOperationPreparer& resultBuilder) const -{ - for (int i = 0; i < context.GetOutputCount(); ++i) { - resultBuilder.NoOutputSchema(i); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -IOperationPtr IOperationClient::Map( - const TMapOperationSpec& spec, - ::TIntrusivePtr<IMapperBase> mapper, - const TOperationOptions& options) -{ - Y_VERIFY(mapper.Get()); - - return DoMap( - spec, - std::move(mapper), - options); -} - -IOperationPtr IOperationClient::Map( - ::TIntrusivePtr<IMapperBase> mapper, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TMapOperationSpec& spec, - const TOperationOptions& options) -{ - Y_ENSURE_EX(spec.Inputs_.empty(), - TApiUsageError() << "TMapOperationSpec::Inputs MUST be empty"); - Y_ENSURE_EX(spec.Outputs_.empty(), - TApiUsageError() << "TMapOperationSpec::Outputs MUST be empty"); - - auto mapSpec = spec; - for (const auto& inputPath : input.Parts_) { - mapSpec.AddStructuredInput(inputPath); - } - for (const auto& outputPath : output.Parts_) { - mapSpec.AddStructuredOutput(outputPath); - } - return Map(mapSpec, std::move(mapper), options); -} - -IOperationPtr IOperationClient::Reduce( - const TReduceOperationSpec& spec, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options) -{ - Y_VERIFY(reducer.Get()); - - return DoReduce( - spec, - std::move(reducer), - options); -} - -IOperationPtr IOperationClient::Reduce( - ::TIntrusivePtr<IReducerBase> reducer, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TSortColumns& reduceBy, - const TReduceOperationSpec& spec, - const TOperationOptions& options) -{ - Y_ENSURE_EX(spec.Inputs_.empty(), - TApiUsageError() << "TReduceOperationSpec::Inputs MUST be empty"); - Y_ENSURE_EX(spec.Outputs_.empty(), - TApiUsageError() << "TReduceOperationSpec::Outputs MUST be empty"); - Y_ENSURE_EX(spec.ReduceBy_.Parts_.empty(), - TApiUsageError() << "TReduceOperationSpec::ReduceBy MUST be empty"); - - auto reduceSpec = spec; - for (const auto& inputPath : input.Parts_) { - reduceSpec.AddStructuredInput(inputPath); - } - for (const auto& outputPath : output.Parts_) { - reduceSpec.AddStructuredOutput(outputPath); - } - reduceSpec.ReduceBy(reduceBy); - return Reduce(reduceSpec, std::move(reducer), options); -} - -IOperationPtr IOperationClient::JoinReduce( - const TJoinReduceOperationSpec& spec, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options) -{ - Y_VERIFY(reducer.Get()); - - return DoJoinReduce( - spec, - std::move(reducer), - options); -} - -IOperationPtr IOperationClient::MapReduce( - const TMapReduceOperationSpec& spec, - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options) -{ - Y_VERIFY(reducer.Get()); - - return DoMapReduce( - spec, - std::move(mapper), - nullptr, - std::move(reducer), - options); -} - -IOperationPtr IOperationClient::MapReduce( - const TMapReduceOperationSpec& spec, - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reduceCombiner, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options) -{ - Y_VERIFY(reducer.Get()); - - return DoMapReduce( - spec, - std::move(mapper), - std::move(reduceCombiner), - std::move(reducer), - options); -} - -IOperationPtr IOperationClient::MapReduce( - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reducer, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TSortColumns& reduceBy, - TMapReduceOperationSpec spec, - const TOperationOptions& options) -{ - Y_ENSURE_EX(spec.Inputs_.empty(), - TApiUsageError() << "TMapReduceOperationSpec::Inputs MUST be empty"); - Y_ENSURE_EX(spec.Outputs_.empty(), - TApiUsageError() << "TMapReduceOperationSpec::Outputs MUST be empty"); - Y_ENSURE_EX(spec.ReduceBy_.Parts_.empty(), - TApiUsageError() << "TMapReduceOperationSpec::ReduceBy MUST be empty"); - - for (const auto& inputPath : input.Parts_) { - spec.AddStructuredInput(inputPath); - } - for (const auto& outputPath : output.Parts_) { - spec.AddStructuredOutput(outputPath); - } - spec.ReduceBy(reduceBy); - return MapReduce(spec, std::move(mapper), std::move(reducer), options); -} - -IOperationPtr IOperationClient::MapReduce( - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reduceCombiner, - ::TIntrusivePtr<IReducerBase> reducer, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TSortColumns& reduceBy, - TMapReduceOperationSpec spec, - const TOperationOptions& options) -{ - Y_ENSURE_EX(spec.Inputs_.empty(), - TApiUsageError() << "TMapReduceOperationSpec::Inputs MUST be empty"); - Y_ENSURE_EX(spec.Outputs_.empty(), - TApiUsageError() << "TMapReduceOperationSpec::Outputs MUST be empty"); - Y_ENSURE_EX(spec.ReduceBy_.Parts_.empty(), - TApiUsageError() << "TMapReduceOperationSpec::ReduceBy MUST be empty"); - - for (const auto& inputPath : input.Parts_) { - spec.AddStructuredInput(inputPath); - } - for (const auto& outputPath : output.Parts_) { - spec.AddStructuredOutput(outputPath); - } - spec.ReduceBy(reduceBy); - return MapReduce(spec, std::move(mapper), std::move(reduceCombiner), std::move(reducer), options); -} - -IOperationPtr IOperationClient::Sort( - const TOneOrMany<TRichYPath>& input, - const TRichYPath& output, - const TSortColumns& sortBy, - const TSortOperationSpec& spec, - const TOperationOptions& options) -{ - Y_ENSURE_EX(spec.Inputs_.empty(), - TApiUsageError() << "TSortOperationSpec::Inputs MUST be empty"); - Y_ENSURE_EX(spec.Output_.Path_.empty(), - TApiUsageError() << "TSortOperationSpec::Output MUST be empty"); - Y_ENSURE_EX(spec.SortBy_.Parts_.empty(), - TApiUsageError() << "TSortOperationSpec::SortBy MUST be empty"); - - auto sortSpec = spec; - for (const auto& inputPath : input.Parts_) { - sortSpec.AddInput(inputPath); - } - sortSpec.Output(output); - sortSpec.SortBy(sortBy); - return Sort(sortSpec, options); -} - -//////////////////////////////////////////////////////////////////////////////// - -TRawTableReaderPtr IStructuredJob::CreateCustomRawJobReader(int) const -{ - return nullptr; -} - -THolder<IProxyOutput> IStructuredJob::CreateCustomRawJobWriter(size_t) const -{ - return nullptr; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/operation.h b/yt/cpp/mapreduce/interface/operation.h deleted file mode 100644 index 171a7e4af7..0000000000 --- a/yt/cpp/mapreduce/interface/operation.h +++ /dev/null @@ -1,3494 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/operation.h -/// -/// Header containing interface to run operations in YT -/// and retrieve information about them. -/// @see [the doc](https://yt.yandex-team.ru/docs/description/mr/map_reduce_overview.html). - -#include "client_method_options.h" -#include "errors.h" -#include "io.h" -#include "job_statistics.h" -#include "job_counters.h" - -#include <library/cpp/threading/future/future.h> -#include <library/cpp/type_info/type_info.h> - -#include <util/datetime/base.h> -#include <util/generic/variant.h> -#include <util/generic/vector.h> -#include <util/generic/maybe.h> -#include <util/system/file.h> -#include <util/system/types.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// Tag class marking that the row type for table is not specified. -struct TUnspecifiedTableStructure -{ }; - -/// Tag class marking that table rows have protobuf type. -struct TProtobufTableStructure -{ - /// @brief Descriptor of the protobuf type of table rows. - /// - /// @note If table is tagged with @ref ::google::protobuf::Message instead of real proto class - /// this descriptor might be null. - const ::google::protobuf::Descriptor* Descriptor = nullptr; -}; - - -/// Tag class to specify table row type. -using TTableStructure = std::variant< - TUnspecifiedTableStructure, - TProtobufTableStructure ->; - -bool operator==(const TUnspecifiedTableStructure&, const TUnspecifiedTableStructure&); -bool operator==(const TProtobufTableStructure& lhs, const TProtobufTableStructure& rhs); - -/// Table path marked with @ref NYT::TTableStructure tag. -struct TStructuredTablePath -{ - TStructuredTablePath(TRichYPath richYPath = TRichYPath(), TTableStructure description = TUnspecifiedTableStructure()) - : RichYPath(std::move(richYPath)) - , Description(std::move(description)) - { } - - TStructuredTablePath(TRichYPath richYPath, const ::google::protobuf::Descriptor* descriptor) - : RichYPath(std::move(richYPath)) - , Description(TProtobufTableStructure({descriptor})) - { } - - TStructuredTablePath(TYPath path) - : RichYPath(std::move(path)) - , Description(TUnspecifiedTableStructure()) - { } - - TStructuredTablePath(const char* path) - : RichYPath(path) - , Description(TUnspecifiedTableStructure()) - { } - - TRichYPath RichYPath; - TTableStructure Description; -}; - -/// Create marked table path from row type. -template <typename TRow> -TStructuredTablePath Structured(TRichYPath richYPath); - -/// Create tag class from row type. -template <typename TRow> -TTableStructure StructuredTableDescription(); - -/////////////////////////////////////////////////////////////////////////////// - -/// Tag class marking that row stream is empty. -struct TVoidStructuredRowStream -{ }; - -/// Tag class marking that row stream consists of `NYT::TNode`. -struct TTNodeStructuredRowStream -{ }; - -/// Tag class marking that row stream consists of @ref NYT::TYaMRRow. -struct TTYaMRRowStructuredRowStream -{ }; - -/// Tag class marking that row stream consists of protobuf rows of given type. -struct TProtobufStructuredRowStream -{ - /// @brief Descriptor of the protobuf type of table rows. - /// - /// @note If `Descriptor` is nullptr, then row stream consists of multiple message types. - const ::google::protobuf::Descriptor* Descriptor = nullptr; -}; - -/// Tag class to specify type of rows in an operation row stream -using TStructuredRowStreamDescription = std::variant< - TVoidStructuredRowStream, - TTNodeStructuredRowStream, - TTYaMRRowStructuredRowStream, - TProtobufStructuredRowStream ->; - -/////////////////////////////////////////////////////////////////////////////// - -/// Tag class marking that current binary should be used in operation. -struct TJobBinaryDefault -{ }; - -/// Tag class marking that binary from specified local path should be used in operation. -struct TJobBinaryLocalPath -{ - TString Path; - TMaybe<TString> MD5CheckSum; -}; - -/// Tag class marking that binary from specified Cypress path should be used in operation. -struct TJobBinaryCypressPath -{ - TYPath Path; - TMaybe<TTransactionId> TransactionId; -}; - -//////////////////////////////////////////////////////////////////////////////// - - -/// @cond Doxygen_Suppress -namespace NDetail { - extern i64 OutputTableCount; -} // namespace NDetail -/// @endcond - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Auto merge mode. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/automerge -enum class EAutoMergeMode -{ - /// Auto merge is disabled. - Disabled /* "disabled" */, - - /// Mode that tries to achieve good chunk sizes and doesn't limit usage of chunk quota for intermediate chunks. - Relaxed /* "relaxed" */, - - /// Mode that tries to optimize usage of chunk quota for intermediate chunks, operation might run slower. - Economy /* "economy" */, - - /// - /// @brief Manual configuration of automerge parameters. - /// - /// @ref TAutoMergeSpec - Manual /* "manual" */, -}; - -/// -/// @brief Options for auto merge operation stage. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/automerge -class TAutoMergeSpec -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TAutoMergeSpec; - /// @endcond - - /// Mode of the auto merge. - FLUENT_FIELD_OPTION(EAutoMergeMode, Mode); - - /// @brief Upper limit for number of intermediate chunks. - /// - /// Works only for Manual mode. - FLUENT_FIELD_OPTION(i64, MaxIntermediateChunkCount); - - /// @brief Number of chunks limit to merge in one job. - /// - /// Works only for Manual mode. - FLUENT_FIELD_OPTION(i64, ChunkCountPerMergeJob); - - /// @brief Automerge will not merge chunks that are larger than `DesiredChunkSize * (ChunkSizeThreshold / 100.)` - /// - /// Works only for Manual mode. - FLUENT_FIELD_OPTION(i64, ChunkSizeThreshold); -}; - -/// Base for operations with auto merge options. -template <class TDerived> -class TWithAutoMergeSpec -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// @brief Options for auto merge operation stage. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/automerge - FLUENT_FIELD_OPTION(TAutoMergeSpec, AutoMerge); -}; - -/// -/// @brief Resources controlled by scheduler and used by running operations. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/scheduler/scheduler_and_pools#resursy -class TSchedulerResources -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TSchedulerResources; - /// @endcond - - /// Each job consumes exactly one user slot. - FLUENT_FIELD_OPTION_ENCAPSULATED(i64, UserSlots); - - /// Number of (virtual) cpu cores consumed by all jobs. - FLUENT_FIELD_OPTION_ENCAPSULATED(i64, Cpu); - - /// Amount of memory in bytes. - FLUENT_FIELD_OPTION_ENCAPSULATED(i64, Memory); -}; - -/// Base for input format hints of a user job. -template <class TDerived> -class TUserJobInputFormatHintsBase -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// @brief Fine tune input format of the job. - FLUENT_FIELD_OPTION(TFormatHints, InputFormatHints); -}; - -/// Base for output format hints of a user job. -template <class TDerived> -class TUserJobOutputFormatHintsBase -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// @brief Fine tune output format of the job. - FLUENT_FIELD_OPTION(TFormatHints, OutputFormatHints); -}; - -/// Base for format hints of a user job. -template <class TDerived> -class TUserJobFormatHintsBase - : public TUserJobInputFormatHintsBase<TDerived> - , public TUserJobOutputFormatHintsBase<TDerived> -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond -}; - -/// User job format hints. -class TUserJobFormatHints - : public TUserJobFormatHintsBase<TUserJobFormatHints> -{ }; - -/// Spec of input and output tables of a raw operation. -template <class TDerived> -class TRawOperationIoTableSpec -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// Add input table path to input path list. - TDerived& AddInput(const TRichYPath& path); - - /// Set input table path no. `tableIndex`. - TDerived& SetInput(size_t tableIndex, const TRichYPath& path); - - /// Add output table path to output path list. - TDerived& AddOutput(const TRichYPath& path); - - /// Set output table path no. `tableIndex`. - TDerived& SetOutput(size_t tableIndex, const TRichYPath& path); - - /// Get all input table paths. - const TVector<TRichYPath>& GetInputs() const; - - /// Get all output table paths. - const TVector<TRichYPath>& GetOutputs() const; - -private: - TVector<TRichYPath> Inputs_; - TVector<TRichYPath> Outputs_; -}; - -/// Base spec for IO in "simple" raw operations (Map, Reduce etc.). -template <class TDerived> -struct TSimpleRawOperationIoSpec - : public TRawOperationIoTableSpec<TDerived> -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// @brief Describes format for both input and output. - /// - /// @note `Format' is overriden by `InputFormat' and `OutputFormat'. - FLUENT_FIELD_OPTION(TFormat, Format); - - /// Describes input format. - FLUENT_FIELD_OPTION(TFormat, InputFormat); - - /// Describes output format. - FLUENT_FIELD_OPTION(TFormat, OutputFormat); -}; - -/// Spec for IO in MapReduce operation. -template <class TDerived> -class TRawMapReduceOperationIoSpec - : public TRawOperationIoTableSpec<TDerived> -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// @brief Describes format for both input and output of mapper. - /// - /// @note `MapperFormat' is overriden by `MapperInputFormat' and `MapperOutputFormat'. - FLUENT_FIELD_OPTION(TFormat, MapperFormat); - - /// Describes mapper input format. - FLUENT_FIELD_OPTION(TFormat, MapperInputFormat); - - /// Describes mapper output format. - FLUENT_FIELD_OPTION(TFormat, MapperOutputFormat); - - /// @brief Describes format for both input and output of reduce combiner. - /// - /// @note `ReduceCombinerFormat' is overriden by `ReduceCombinerInputFormat' and `ReduceCombinerOutputFormat'. - FLUENT_FIELD_OPTION(TFormat, ReduceCombinerFormat); - - /// Describes reduce combiner input format. - FLUENT_FIELD_OPTION(TFormat, ReduceCombinerInputFormat); - - /// Describes reduce combiner output format. - FLUENT_FIELD_OPTION(TFormat, ReduceCombinerOutputFormat); - - /// @brief Describes format for both input and output of reducer. - /// - /// @note `ReducerFormat' is overriden by `ReducerInputFormat' and `ReducerOutputFormat'. - FLUENT_FIELD_OPTION(TFormat, ReducerFormat); - - /// Describes reducer input format. - FLUENT_FIELD_OPTION(TFormat, ReducerInputFormat); - - /// Describes reducer output format. - FLUENT_FIELD_OPTION(TFormat, ReducerOutputFormat); - - /// Add direct map output table path. - TDerived& AddMapOutput(const TRichYPath& path); - - /// Set direct map output table path no. `tableIndex`. - TDerived& SetMapOutput(size_t tableIndex, const TRichYPath& path); - - /// Get all direct map output table paths - const TVector<TRichYPath>& GetMapOutputs() const; - -private: - TVector<TRichYPath> MapOutputs_; -}; - -/// -/// @brief Base spec of operations with input tables. -class TOperationInputSpecBase -{ -public: - template <class T, class = void> - struct TFormatAdder; - - /// - /// @brief Add input table path to input path list and specify type of rows. - template <class T> - void AddInput(const TRichYPath& path); - - /// - /// @brief Add input table path as structured paths. - void AddStructuredInput(TStructuredTablePath path); - - /// - /// @brief Set input table path and type. - template <class T> - void SetInput(size_t tableIndex, const TRichYPath& path); - - /// - /// @brief All input paths. - TVector<TRichYPath> Inputs_; - - /// - /// @brief Get all input structured paths. - const TVector<TStructuredTablePath>& GetStructuredInputs() const; - -private: - TVector<TStructuredTablePath> StructuredInputs_; - friend struct TOperationIOSpecBase; - template <class T> - friend struct TOperationIOSpec; -}; - -/// -/// @brief Base spec of operations with output tables. -class TOperationOutputSpecBase -{ -public: - template <class T, class = void> - struct TFormatAdder; - - /// - /// @brief Add output table path to output path list and specify type of rows. - template <class T> - void AddOutput(const TRichYPath& path); - - /// - /// @brief Add output table path as structured paths. - void AddStructuredOutput(TStructuredTablePath path); - - /// - /// @brief Set output table path and type. - template <class T> - void SetOutput(size_t tableIndex, const TRichYPath& path); - - /// - /// @brief All output paths. - TVector<TRichYPath> Outputs_; - - /// - /// @brief Get all output structured paths. - const TVector<TStructuredTablePath>& GetStructuredOutputs() const; - -private: - TVector<TStructuredTablePath> StructuredOutputs_; - friend struct TOperationIOSpecBase; - template <class T> - friend struct TOperationIOSpec; -}; - -/// -/// @brief Base spec for operations with inputs and outputs. -struct TOperationIOSpecBase - : public TOperationInputSpecBase - , public TOperationOutputSpecBase -{ }; - -/// -/// @brief Base spec for operations with inputs and outputs. -template <class TDerived> -struct TOperationIOSpec - : public TOperationIOSpecBase -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - template <class T> - TDerived& AddInput(const TRichYPath& path); - - TDerived& AddStructuredInput(TStructuredTablePath path); - - template <class T> - TDerived& SetInput(size_t tableIndex, const TRichYPath& path); - - template <class T> - TDerived& AddOutput(const TRichYPath& path); - - TDerived& AddStructuredOutput(TStructuredTablePath path); - - template <class T> - TDerived& SetOutput(size_t tableIndex, const TRichYPath& path); - - - // DON'T USE THESE METHODS! They are left solely for backward compatibility. - // These methods are the only way to do equivalent of (Add/Set)(Input/Output)<Message> - // but please consider using (Add/Set)(Input/Output)<TConcreteMessage> - // (where TConcreteMessage is some descendant of Message) - // because they are faster and better (see https://st.yandex-team.ru/YT-6967) - TDerived& AddProtobufInput_VerySlow_Deprecated(const TRichYPath& path); - TDerived& AddProtobufOutput_VerySlow_Deprecated(const TRichYPath& path); -}; - -/// -/// @brief Base spec for all operations. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/operations_options -template <class TDerived> -struct TOperationSpecBase -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// - /// @brief Limit on operation execution time. - /// - /// If operation doesn't finish in time it will be aborted. - FLUENT_FIELD_OPTION(TDuration, TimeLimit); - - /// @brief Title to be shown in web interface. - FLUENT_FIELD_OPTION(TString, Title); - - /// @brief Pool to be used for this operation. - FLUENT_FIELD_OPTION(TString, Pool); - - /// @brief Weight of operation. - /// - /// Coefficient defining how much resources operation gets relative to its siblings in the same pool. - FLUENT_FIELD_OPTION(double, Weight); - - /// @breif Pool tree list that operation will use. - FLUENT_OPTIONAL_VECTOR_FIELD_ENCAPSULATED(TString, PoolTree); - - /// How much resources can be consumed by operation. - FLUENT_FIELD_OPTION_ENCAPSULATED(TSchedulerResources, ResourceLimits); -}; - -/// -/// @brief Base spec for all operations with user jobs. -template <class TDerived> -struct TUserOperationSpecBase - : TOperationSpecBase<TDerived> -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// How many jobs can fail before operation is failed. - FLUENT_FIELD_OPTION(ui64, MaxFailedJobCount); - - /// On any unsuccessful job completion (i.e. abortion or failure) force the whole operation to fail. - FLUENT_FIELD_OPTION(bool, FailOnJobRestart); - - /// - /// @brief Table to save whole stderr of operation. - /// - /// @see https://clubs.at.yandex-team.ru/yt/1045 - FLUENT_FIELD_OPTION(TYPath, StderrTablePath); - - /// - /// @brief Table to save coredumps of operation. - /// - /// @see https://clubs.at.yandex-team.ru/yt/1045 - FLUENT_FIELD_OPTION(TYPath, CoreTablePath); - - /// - /// @brief How long should the scheduler wait for the job to be started on a node. - /// - /// When you run huge jobs that require preemption of all the other jobs on - /// a node, the default timeout might be insufficient and your job may be - /// aborted with 'waiting_timeout' reason. This is especially problematic - /// when you are setting 'FailOnJobRestart' option. - /// - /// @note The value must be between 10 seconds and 10 minutes. - FLUENT_FIELD_OPTION(TDuration, WaitingJobTimeout); -}; - -/// -/// @brief Class to provide information on intermediate mapreduce stream protobuf types. -/// -/// When using protobuf format it is important to know exact types of proto messages -/// that are used in input/output. -/// -/// Sometimes such messages cannot be derived from job class -/// i.e. when job class uses `NYT::TTableReader<::google::protobuf::Message>` -/// or `NYT::TTableWriter<::google::protobuf::Message>`. -/// -/// When using such jobs user can provide exact message type using this class. -/// -/// @note Only input/output that relate to intermediate tables can be hinted. -/// Input to map and output of reduce is derived from `AddInput`/`AddOutput`. -template <class TDerived> -struct TIntermediateTablesHintSpec -{ - /// Specify intermediate map output type. - template <class T> - TDerived& HintMapOutput(); - - /// Specify reduce combiner input. - template <class T> - TDerived& HintReduceCombinerInput(); - - /// Specify reduce combiner output. - template <class T> - TDerived& HintReduceCombinerOutput(); - - /// Specify reducer input. - template <class T> - TDerived& HintReduceInput(); - - /// - /// @brief Add output of map stage. - /// - /// Mapper output table #0 is always intermediate table that is going to be reduced later. - /// Rows that mapper write to tables #1, #2, ... are saved in MapOutput tables. - template <class T> - TDerived& AddMapOutput(const TRichYPath& path); - - TVector<TRichYPath> MapOutputs_; - - const TVector<TStructuredTablePath>& GetStructuredMapOutputs() const; - const TMaybe<TTableStructure>& GetIntermediateMapOutputDescription() const; - const TMaybe<TTableStructure>& GetIntermediateReduceCombinerInputDescription() const; - const TMaybe<TTableStructure>& GetIntermediateReduceCombinerOutputDescription() const; - const TMaybe<TTableStructure>& GetIntermediateReducerInputDescription() const; - -private: - TVector<TStructuredTablePath> StructuredMapOutputs_; - TMaybe<TTableStructure> IntermediateMapOutputDescription_; - TMaybe<TTableStructure> IntermediateReduceCombinerInputDescription_; - TMaybe<TTableStructure> IntermediateReduceCombinerOutputDescription_; - TMaybe<TTableStructure> IntermediateReducerInputDescription_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -struct TAddLocalFileOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TAddLocalFileOptions; - /// @endcond - - /// - /// @brief Path by which job will see the uploaded file. - /// - /// Defaults to basename of the local path. - FLUENT_FIELD_OPTION(TString, PathInJob); - - /// - /// @brief MD5 checksum of uploaded file. - /// - /// If not specified it is computed by this library. - /// If this argument is provided, the user can some cpu and disk IO. - FLUENT_FIELD_OPTION(TString, MD5CheckSum); - - /// - /// @brief Do not put file into node cache - /// - /// @see NYT::TRichYPath::BypassArtifactCache - FLUENT_FIELD_OPTION(bool, BypassArtifactCache); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Binary to run job profiler on. -enum class EProfilingBinary -{ - /// Profile job proxy. - JobProxy /* "job_proxy" */, - - /// Profile user job. - UserJob /* "user_job" */, -}; - -/// @brief Type of job profiler. -enum class EProfilerType -{ - /// Profile CPU usage. - Cpu /* "cpu" */, - - /// Profile memory usage. - Memory /* "memory" */, - - /// Profiler peak memory usage. - PeakMemory /* "peak_memory" */, -}; - -/// @brief Specifies a job profiler. -struct TJobProfilerSpec -{ - /// @cond Doxygen_Suppress - using TSelf = TJobProfilerSpec; - /// @endcond - - /// @brief Binary to profile. - FLUENT_FIELD_OPTION(EProfilingBinary, ProfilingBinary); - - /// @brief Type of the profiler. - FLUENT_FIELD_OPTION(EProfilerType, ProfilerType); - - /// @brief Probabiliy of the job being selected for profiling. - FLUENT_FIELD_OPTION(double, ProfilingProbability); - - /// @brief For sampling profilers, sets the number of samples per second. - FLUENT_FIELD_OPTION(int, SamplingFrequency); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Spec of user job. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/operations_options#user_script_options -struct TUserJobSpec -{ - /// @cond Doxygen_Suppress - using TSelf = TUserJobSpec; - /// @endcond - - /// - /// @brief Specify a local file to upload to Cypress and prepare for use in job. - TSelf& AddLocalFile(const TLocalFilePath& path, const TAddLocalFileOptions& options = TAddLocalFileOptions()); - - /// - /// @brief Get the list of all added local files. - TVector<std::tuple<TLocalFilePath, TAddLocalFileOptions>> GetLocalFiles() const; - - /// @brief Paths to files in Cypress to use in job. - FLUENT_VECTOR_FIELD(TRichYPath, File); - - /// - /// @brief MemoryLimit specifies how much memory job process can use. - /// - /// @note - /// If job uses tmpfs (check @ref NYT::TOperationOptions::MountSandboxInTmpfs) - /// YT computes its memory usage as total of: - /// - memory usage of job process itself (including mapped files); - /// - total size of tmpfs used by this job. - /// - /// @note - /// When @ref NYT::TOperationOptions::MountSandboxInTmpfs is enabled library will compute - /// total size of all files used by this job and add this total size to MemoryLimit. - /// Thus you shouldn't include size of your files (e.g. binary file) into MemoryLimit. - /// - /// @note - /// Final memory memory_limit passed to YT is calculated as follows: - /// - /// @note - /// ``` - /// memory_limit = MemoryLimit + <total-size-of-used-files> + ExtraTmpfsSize - /// ``` - /// - /// @see NYT::TUserJobSpec::ExtraTmpfsSize - FLUENT_FIELD_OPTION(i64, MemoryLimit); - - /// - /// @brief Size of data that is going to be written to tmpfs. - /// - /// This option should be used if job writes data to tmpfs. - /// - /// ExtraTmpfsSize should not include size of files specified with - /// @ref NYT::TUserJobSpec::AddLocalFile or @ref NYT::TUserJobSpec::AddFile - /// These files are copied to tmpfs automatically and their total size - /// is computed automatically. - /// - /// @see NYT::TOperationOptions::MountSandboxInTmpfs - /// @see NYT::TUserJobSpec::MemoryLimit - FLUENT_FIELD_OPTION(i64, ExtraTmpfsSize); - - /// - /// @brief Maximum number of CPU cores for a single job to use. - FLUENT_FIELD_OPTION(double, CpuLimit); - - /// - /// @brief Fraction of @ref NYT::TUserJobSpec::MemoryLimit that job gets at start. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/operations_options#memory_reserve_factor - FLUENT_FIELD_OPTION(double, MemoryReserveFactor); - - /// - /// @brief Local path to executable to be used inside jobs. - //// - /// Provided executable must use C++ YT API library (this library) - /// and implement job class that is going to be used. - /// - /// This option might be useful if we want to start operation from nonlinux machines - /// (in that case we use `JobBinary` to provide path to the same program compiled for linux). - /// Other example of using this option is uploading executable to cypress in advance - /// and save the time required to upload current executable to cache. - /// `md5` argument can be used to save cpu time and disk IO when binary MD5 checksum is known. - /// When argument is not provided library will compute it itself. - TUserJobSpec& JobBinaryLocalPath(TString path, TMaybe<TString> md5 = Nothing()); - - /// - /// @brief Cypress path to executable to be used inside jobs. - TUserJobSpec& JobBinaryCypressPath(TString path, TMaybe<TTransactionId> transactionId = Nothing()); - - /// - /// @brief String that will be prepended to the command. - /// - /// This option overrides @ref NYT::TOperationOptions::JobCommandPrefix. - FLUENT_FIELD(TString, JobCommandPrefix); - - /// - /// @brief String that will be appended to the command. - /// - /// This option overrides @ref NYT::TOperationOptions::JobCommandSuffix. - FLUENT_FIELD(TString, JobCommandSuffix); - - /// - /// @brief Map of environment variables that will be set for jobs. - FLUENT_MAP_FIELD(TString, TString, Environment); - - /// - /// @brief Limit for all files inside job sandbox (in bytes). - FLUENT_FIELD_OPTION(ui64, DiskSpaceLimit); - - /// - /// @brief Number of ports reserved for the job (passed through environment in YT_PORT_0, YT_PORT_1, ...). - FLUENT_FIELD_OPTION(ui16, PortCount); - - /// - /// @brief Network project used to isolate job network. - FLUENT_FIELD_OPTION(TString, NetworkProject); - - /// - /// @brief Limit on job execution time. - /// - /// Jobs that exceed this limit will be considered failed. - FLUENT_FIELD_OPTION(TDuration, JobTimeLimit); - - /// - /// @brief Get job binary config. - const TJobBinaryConfig& GetJobBinary() const; - - /// - /// @brief List of profilers to run. - FLUENT_VECTOR_FIELD(TJobProfilerSpec, JobProfiler); - -private: - TVector<std::tuple<TLocalFilePath, TAddLocalFileOptions>> LocalFiles_; - TJobBinaryConfig JobBinary_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Spec of Map operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/map -template <typename TDerived> -struct TMapOperationSpecBase - : public TUserOperationSpecBase<TDerived> - , public TWithAutoMergeSpec<TDerived> -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// - /// @brief Spec of mapper job. - FLUENT_FIELD(TUserJobSpec, MapperSpec); - - /// - /// @brief Whether to guarantee the order of rows passed to mapper matches the order in the table. - /// - /// When `Ordered' is false (by default), there is no guaranties about order of reading rows. - /// In this case mapper might work slightly faster because row delivered from fast node can be processed YT waits - /// response from slow nodes. - /// When `Ordered' is true, rows will come in order in which they are stored in input tables. - FLUENT_FIELD_OPTION(bool, Ordered); - - /// - /// @brief Recommended number of jobs to run. - /// - /// `JobCount' has higher priority than @ref NYT::TMapOperationSpecBase::DataSizePerJob. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui32, JobCount); - - /// - /// @brief Recommended of data size for each job. - /// - /// `DataSizePerJob` has lower priority that @ref NYT::TMapOperationSpecBase::JobCount. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui64, DataSizePerJob); -}; - -/// -/// @brief Spec of Map operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/map -struct TMapOperationSpec - : public TMapOperationSpecBase<TMapOperationSpec> - , public TOperationIOSpec<TMapOperationSpec> - , public TUserJobFormatHintsBase<TMapOperationSpec> -{ }; - -/// -/// @brief Spec of raw Map operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/map -struct TRawMapOperationSpec - : public TMapOperationSpecBase<TRawMapOperationSpec> - , public TSimpleRawOperationIoSpec<TRawMapOperationSpec> -{ }; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Spec of Reduce operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/reduce -template <typename TDerived> -struct TReduceOperationSpecBase - : public TUserOperationSpecBase<TDerived> - , public TWithAutoMergeSpec<TDerived> -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// - /// @brief Spec of reduce job. - FLUENT_FIELD(TUserJobSpec, ReducerSpec); - - /// - /// @brief Columns to sort rows by (must include `ReduceBy` as prefix). - FLUENT_FIELD(TSortColumns, SortBy); - - /// - /// @brief Columns to group rows by. - FLUENT_FIELD(TSortColumns, ReduceBy); - - /// - /// @brief Columns to join foreign tables by (must be prefix of `ReduceBy`). - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables - FLUENT_FIELD_OPTION(TSortColumns, JoinBy); - - /// - /// @brief Guarantee to feed all rows with same `ReduceBy` columns to a single job (`true` by default). - FLUENT_FIELD_OPTION(bool, EnableKeyGuarantee); - - /// - /// @brief Recommended number of jobs to run. - /// - /// `JobCount' has higher priority than @ref NYT::TReduceOperationSpecBase::DataSizePerJob. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui32, JobCount); - - /// - /// @brief Recommended of data size for each job. - /// - /// `DataSizePerJob` has lower priority that @ref NYT::TReduceOperationSpecBase::JobCount. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui64, DataSizePerJob); -}; - -/// -/// @brief Spec of Reduce operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/reduce -struct TReduceOperationSpec - : public TReduceOperationSpecBase<TReduceOperationSpec> - , public TOperationIOSpec<TReduceOperationSpec> - , public TUserJobFormatHintsBase<TReduceOperationSpec> -{ }; - -/// -/// @brief Spec of raw Reduce operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/reduce -struct TRawReduceOperationSpec - : public TReduceOperationSpecBase<TRawReduceOperationSpec> - , public TSimpleRawOperationIoSpec<TRawReduceOperationSpec> -{ }; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Spec of JoinReduce operation. -/// -/// @deprecated Instead the user should run a reduce operation -/// with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false`. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables -template <typename TDerived> -struct TJoinReduceOperationSpecBase - : public TUserOperationSpecBase<TDerived> -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// - /// @brief Spec of reduce job. - FLUENT_FIELD(TUserJobSpec, ReducerSpec); - - /// - /// @brief Columns to join foreign tables by (must be prefix of `ReduceBy`). - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables - FLUENT_FIELD(TSortColumns, JoinBy); - - /// - /// @brief Recommended number of jobs to run. - /// - /// `JobCount' has higher priority than @ref NYT::TJoinReduceOperationSpecBase::DataSizePerJob. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui32, JobCount); - - /// - /// @brief Recommended of data size for each job. - /// - /// `DataSizePerJob` has lower priority that @ref NYT::TJoinReduceOperationSpecBase::JobCount. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui64, DataSizePerJob); -}; - -/// -/// @brief Spec of JoinReduce operation. -/// -/// @deprecated Instead the user should run a reduce operation -/// with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false`. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables -struct TJoinReduceOperationSpec - : public TJoinReduceOperationSpecBase<TJoinReduceOperationSpec> - , public TOperationIOSpec<TJoinReduceOperationSpec> - , public TUserJobFormatHintsBase<TJoinReduceOperationSpec> -{ }; - -/// -/// @brief Spec of raw JoinReduce operation. -/// -/// @deprecated Instead the user should run a reduce operation -/// with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false`. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables -struct TRawJoinReduceOperationSpec - : public TJoinReduceOperationSpecBase<TRawJoinReduceOperationSpec> - , public TSimpleRawOperationIoSpec<TRawJoinReduceOperationSpec> -{ }; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Spec of MapReduce operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce -template <typename TDerived> -struct TMapReduceOperationSpecBase - : public TUserOperationSpecBase<TDerived> -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// - /// @brief Spec of map job. - FLUENT_FIELD(TUserJobSpec, MapperSpec); - - /// - /// @brief Spec of reduce job. - FLUENT_FIELD(TUserJobSpec, ReducerSpec); - - /// - /// @brief Spec of reduce combiner. - FLUENT_FIELD(TUserJobSpec, ReduceCombinerSpec); - - /// - /// @brief Columns to sort rows by (must include `ReduceBy` as prefix). - FLUENT_FIELD(TSortColumns, SortBy); - - /// - /// @brief Columns to group rows by. - FLUENT_FIELD(TSortColumns, ReduceBy); - - /// - /// @brief Recommended number of map jobs to run. - /// - /// `JobCount' has higher priority than @ref NYT::TMapReduceOperationSpecBase::DataSizePerMapJob. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui32, MapJobCount); - - /// - /// @brief Recommended of data size for each map job. - /// - /// `DataSizePerJob` has lower priority that @ref NYT::TMapReduceOperationSpecBase::MapJobCount. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui64, DataSizePerMapJob); - - /// - /// @brief Recommended number of intermediate data partitions. - FLUENT_FIELD_OPTION(ui64, PartitionCount); - - /// - /// @brief Recommended size of intermediate data partitions. - FLUENT_FIELD_OPTION(ui64, PartitionDataSize); - - /// - /// @brief Account to use for intermediate data. - FLUENT_FIELD_OPTION(TString, IntermediateDataAccount); - - /// - /// @brief Replication factor for intermediate data (1 by default). - FLUENT_FIELD_OPTION(ui64, IntermediateDataReplicationFactor); - - /// - /// @brief Recommended size of data to be passed to a single reduce combiner. - FLUENT_FIELD_OPTION(ui64, DataSizePerSortJob); - - /// - /// @brief Whether to guarantee the order of rows passed to mapper matches the order in the table. - /// - /// @see @ref NYT::TMapOperationSpec::Ordered for more info. - FLUENT_FIELD_OPTION(bool, Ordered); - - /// - /// @brief Guarantee to run reduce combiner before reducer. - FLUENT_FIELD_OPTION(bool, ForceReduceCombiners); -}; - -/// -/// @brief Spec of MapReduce operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce -struct TMapReduceOperationSpec - : public TMapReduceOperationSpecBase<TMapReduceOperationSpec> - , public TOperationIOSpec<TMapReduceOperationSpec> - , public TIntermediateTablesHintSpec<TMapReduceOperationSpec> -{ - /// @cond Doxygen_Suppress - using TSelf = TMapReduceOperationSpec; - /// @endcond - - /// - /// @brief Format hints for mapper. - FLUENT_FIELD_DEFAULT(TUserJobFormatHints, MapperFormatHints, TUserJobFormatHints()); - - /// - /// @brief Format hints for reducer. - FLUENT_FIELD_DEFAULT(TUserJobFormatHints, ReducerFormatHints, TUserJobFormatHints()); - - /// - /// @brief Format hints for reduce combiner. - FLUENT_FIELD_DEFAULT(TUserJobFormatHints, ReduceCombinerFormatHints, TUserJobFormatHints()); -}; - -/// -/// @brief Spec of raw MapReduce operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce -struct TRawMapReduceOperationSpec - : public TMapReduceOperationSpecBase<TRawMapReduceOperationSpec> - , public TRawMapReduceOperationIoSpec<TRawMapReduceOperationSpec> -{ }; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Schema inference mode. -/// -/// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference -enum class ESchemaInferenceMode : int -{ - FromInput /* "from_input" */, - FromOutput /* "from_output" */, - Auto /* "auto" */, -}; - -/// -/// @brief Spec of Sort operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/sort -struct TSortOperationSpec - : TOperationSpecBase<TSortOperationSpec> -{ - /// @cond Doxygen_Suppress - using TSelf = TSortOperationSpec; - /// @endcond - - /// - /// @brief Paths to input tables. - FLUENT_VECTOR_FIELD(TRichYPath, Input); - - /// - /// @brief Path to output table. - FLUENT_FIELD(TRichYPath, Output); - - /// - /// @brief Columns to sort table by. - FLUENT_FIELD(TSortColumns, SortBy); - - /// - /// @brief Recommended number of intermediate data partitions. - FLUENT_FIELD_OPTION(ui64, PartitionCount); - - /// - /// @brief Recommended size of intermediate data partitions. - FLUENT_FIELD_OPTION(ui64, PartitionDataSize); - - /// - /// @brief Recommended number of partition jobs to run. - /// - /// `JobCount' has higher priority than @ref NYT::TSortOperationSpec::DataSizePerPartitionJob. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui64, PartitionJobCount); - - /// - /// @brief Recommended of data size for each partition job. - /// - /// `DataSizePerJob` has lower priority that @ref NYT::TSortOperationSpec::PartitionJobCount. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui64, DataSizePerPartitionJob); - - /// - /// @brief Inference mode for output table schema. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference - FLUENT_FIELD_OPTION(ESchemaInferenceMode, SchemaInferenceMode); - - /// - /// @brief Account to use for intermediate data. - FLUENT_FIELD_OPTION(TString, IntermediateDataAccount); - - /// - /// @brief Replication factor for intermediate data (1 by default). - FLUENT_FIELD_OPTION(ui64, IntermediateDataReplicationFactor); -}; - - -/// -/// @brief Merge mode. -enum EMergeMode : int -{ - MM_UNORDERED /* "unordered" */, - MM_ORDERED /* "ordered" */, - MM_SORTED /* "sorted" */, -}; - -/// -/// @brief Spec of Merge operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/merge -struct TMergeOperationSpec - : TOperationSpecBase<TMergeOperationSpec> -{ - /// @cond Doxygen_Suppress - using TSelf = TMergeOperationSpec; - /// @endcond - - /// - /// @brief Paths to input tables. - FLUENT_VECTOR_FIELD(TRichYPath, Input); - - /// - /// @brief Path to output table. - FLUENT_FIELD(TRichYPath, Output); - - /// - /// @brief Columns by which to merge (for @ref NYT::EMergeMode::MM_SORTED). - FLUENT_FIELD(TSortColumns, MergeBy); - - /// - /// @brief Merge mode. - FLUENT_FIELD_DEFAULT(EMergeMode, Mode, MM_UNORDERED); - - /// - /// @brief Combine output chunks to larger ones. - FLUENT_FIELD_DEFAULT(bool, CombineChunks, false); - - /// - /// @brief Guarantee that all input chunks will be read. - FLUENT_FIELD_DEFAULT(bool, ForceTransform, false); - - /// - /// @brief Recommended number of jobs to run. - /// - /// `JobCount' has higher priority than @ref NYT::TMergeOperationSpec::DataSizePerJob. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui32, JobCount); - - /// - /// @brief Recommended of data size for each job. - /// - /// `DataSizePerJob` has lower priority that @ref NYT::TMergeOperationSpec::JobCount. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui64, DataSizePerJob); - - /// - /// @brief Inference mode for output table schema. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference - FLUENT_FIELD_OPTION(ESchemaInferenceMode, SchemaInferenceMode); -}; - -/// -/// @brief Spec of Erase operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/erase -struct TEraseOperationSpec - : TOperationSpecBase<TEraseOperationSpec> -{ - /// @cond Doxygen_Suppress - using TSelf = TEraseOperationSpec; - /// @endcond - - /// - /// @brief Which table (or row range) to erase. - FLUENT_FIELD(TRichYPath, TablePath); - - /// - /// Combine output chunks to larger ones. - FLUENT_FIELD_DEFAULT(bool, CombineChunks, false); - - /// - /// @brief Inference mode for output table schema. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference - FLUENT_FIELD_OPTION(ESchemaInferenceMode, SchemaInferenceMode); -}; - -/// -/// @brief Spec of RemoteCopy operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/remote_copy -struct TRemoteCopyOperationSpec - : TOperationSpecBase<TRemoteCopyOperationSpec> -{ - /// @cond Doxygen_Suppress - using TSelf = TRemoteCopyOperationSpec; - /// @endcond - - /// - /// @brief Source cluster name. - FLUENT_FIELD(TString, ClusterName); - - /// - /// @brief Network to use for copy (all remote cluster nodes must have it configured). - FLUENT_FIELD_OPTION(TString, NetworkName); - - /// - /// @brief Paths to input tables. - FLUENT_VECTOR_FIELD(TRichYPath, Input); - - /// - /// @brief Path to output table. - FLUENT_FIELD(TRichYPath, Output); - - /// - /// @brief Inference mode for output table schema. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference - FLUENT_FIELD_OPTION(ESchemaInferenceMode, SchemaInferenceMode); - - /// - /// @brief Copy user attributes from input to output table (allowed only for single input table). - FLUENT_FIELD_DEFAULT(bool, CopyAttributes, false); - - /// - /// @brief Names of user attributes to copy from input to output table. - /// - /// @note To make this option make sense set @ref NYT::TRemoteCopyOperationSpec::CopyAttributes to `true`. - FLUENT_VECTOR_FIELD(TString, AttributeKey); - -private: - - /// - /// @brief Config for remote cluster connection. - FLUENT_FIELD_OPTION(TNode, ClusterConnection); -}; - -class IVanillaJobBase; - -/// -/// @brief Task of Vanilla operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/vanilla -struct TVanillaTask - : public TOperationOutputSpecBase - , public TUserJobOutputFormatHintsBase<TVanillaTask> -{ - /// @cond Doxygen_Suppress - using TSelf = TVanillaTask; - /// @endcond - - /// - /// @brief Add output table path and specify the task output type (i.e. TMyProtoMessage). - template <class T> - TSelf& AddOutput(const TRichYPath& path); - - /// - /// @brief Add output table path as structured path. - TSelf& AddStructuredOutput(TStructuredTablePath path); - - /// - /// @brief Set output table path and specify the task output type (i.e. TMyProtoMessage). - template <class T> - TSelf& SetOutput(size_t tableIndex, const TRichYPath& path); - - /// - /// @brief Task name. - FLUENT_FIELD(TString, Name); - - /// - /// @brief Job to be executed in this task. - FLUENT_FIELD(::TIntrusivePtr<IVanillaJobBase>, Job); - - /// - /// @brief User job spec. - FLUENT_FIELD(TUserJobSpec, Spec); - - /// - /// @brief Number of jobs to run and wait for successful completion. - /// - /// @note If @ref NYT::TUserOperationSpecBase::FailOnJobRestart is `false`, a failed job will be restarted - /// and will not count in this amount. - FLUENT_FIELD(ui64, JobCount); - - /// - /// @brief Network project name. - FLUENT_FIELD(TMaybe<TString>, NetworkProject); - -}; - -/// -/// @brief Spec of Vanilla operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/vanilla -struct TVanillaOperationSpec - : TUserOperationSpecBase<TVanillaOperationSpec> -{ - /// @cond Doxygen_Suppress - using TSelf = TVanillaOperationSpec; - /// @endcond - - /// - /// @brief Description of tasks to run in this operation. - FLUENT_VECTOR_FIELD(TVanillaTask, Task); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Options for @ref NYT::IOperationClient::Map and other operation start commands. -struct TOperationOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TOperationOptions; - /// @endcond - - /// - /// @brief Additional field to put to operation spec. - FLUENT_FIELD_OPTION(TNode, Spec); - - /// - /// @brief Start operation mode. - enum class EStartOperationMode : int - { - /// - /// @brief Prepare operation asynchronously. Call IOperation::Start() to start operation. - AsyncPrepare, - - /// - /// @brief Prepare and start operation asynchronously. Don't wait for operation completion. - AsyncStart, - - /// - /// @brief Prepare and start operation synchronously. Don't wait for operation completion. - SyncStart, - - /// - /// @brief Prepare, start and wait for operation completion synchronously. - SyncWait, - }; - - /// - /// @brief Start operation mode. - FLUENT_FIELD_DEFAULT(EStartOperationMode, StartOperationMode, EStartOperationMode::SyncWait); - - /// - /// @brief Wait for operation finish synchronously. - /// - /// @deprecated Use StartOperationMode() instead. - TSelf& Wait(bool value) { - StartOperationMode_ = value ? EStartOperationMode::SyncWait : EStartOperationMode::SyncStart; - return static_cast<TSelf&>(*this); - } - - /// - /// - /// @brief Use format from table attribute (for YAMR-like format). - /// - /// @deprecated - FLUENT_FIELD_DEFAULT(bool, UseTableFormats, false); - - /// - /// @brief Prefix for bash command running the jobs. - /// - /// Can be overridden for the specific job type in the @ref NYT::TUserJobSpec. - FLUENT_FIELD(TString, JobCommandPrefix); - - /// - /// @brief Suffix for bash command running the jobs. - /// - /// Can be overridden for the specific job type in the @ref NYT::TUserJobSpec. - FLUENT_FIELD(TString, JobCommandSuffix); - - /// - /// @brief Put all files required by the job into tmpfs. - /// - /// This option can be set globally using @ref NYT::TConfig::MountSandboxInTmpfs. - /// @see https://yt.yandex-team.ru/docs/problems/woodpeckers - FLUENT_FIELD_DEFAULT(bool, MountSandboxInTmpfs, false); - - /// - /// @brief Path to directory to store temporary files. - FLUENT_FIELD_OPTION(TString, FileStorage); - - /// - /// @brief Expiration timeout for uploaded files. - FLUENT_FIELD_OPTION(TDuration, FileExpirationTimeout); - - /// - /// @brief Info to be passed securely to the job. - FLUENT_FIELD_OPTION(TNode, SecureVault); - - /// - /// @brief File cache mode. - enum class EFileCacheMode : int - { - /// - /// @brief Use YT API commands "get_file_from_cache" and "put_file_to_cache". - ApiCommandBased, - - /// - /// @brief Upload files to random paths inside @ref NYT::TOperationOptions::FileStorage without caching. - CachelessRandomPathUpload, - }; - - /// - /// @brief File cache mode. - FLUENT_FIELD_DEFAULT(EFileCacheMode, FileCacheMode, EFileCacheMode::ApiCommandBased); - - /// - /// @brief Id of transaction within which all Cypress file storage entries will be checked/created. - /// - /// By default, the root transaction is used. - /// - /// @note Set a specific transaction only if you - /// 1. specify non-default file storage path in @ref NYT::TOperationOptions::FileStorage or in @ref NYT::TConfig::RemoteTempFilesDirectory. - /// 2. use `CachelessRandomPathUpload` caching mode (@ref NYT::TOperationOptions::FileCacheMode). - FLUENT_FIELD(TTransactionId, FileStorageTransactionId); - - /// - /// @brief Ensure stderr and core tables exist before starting operation. - /// - /// If set to `false`, it is user's responsibility to ensure these tables exist. - FLUENT_FIELD_DEFAULT(bool, CreateDebugOutputTables, true); - - /// - /// @brief Ensure output tables exist before starting operation. - /// - /// If set to `false`, it is user's responsibility to ensure output tables exist. - FLUENT_FIELD_DEFAULT(bool, CreateOutputTables, true); - - /// - /// @brief Try to infer schema of inexistent table from the type of written rows. - /// - /// @note Default values for this option may differ depending on the row type. - /// For protobuf it's currently `false` by default. - FLUENT_FIELD_OPTION(bool, InferOutputSchema); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Get operation secure vault (specified in @ref NYT::TOperationOptions::SecureVault) inside a job. -const TNode& GetJobSecureVault(); - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Context passed to @ref NYT::IRawJob::Do. -class TRawJobContext -{ -public: - explicit TRawJobContext(size_t outputTableCount); - - /// - /// @brief Get file corresponding to input stream. - const TFile& GetInputFile() const; - - /// - /// @brief Get files corresponding to output streams. - const TVector<TFile>& GetOutputFileList() const; - -private: - TFile InputFile_; - TVector<TFile> OutputFileList_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Interface for classes that can be Saved/Loaded (to be used with @ref Y_SAVELOAD_JOB). -class ISerializableForJob -{ -public: - virtual ~ISerializableForJob() = default; - - /// - /// @brief Dump state to output stream to be restored in job. - virtual void Save(IOutputStream& stream) const = 0; - - /// - /// @brief Load state from a stream. - virtual void Load(IInputStream& stream) = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Provider of information about operation inputs/outputs during @ref NYT::IJob::PrepareOperation. -class IOperationPreparationContext -{ -public: - virtual ~IOperationPreparationContext() = default; - - /// @brief Get the number of input tables. - virtual int GetInputCount() const = 0; - - /// @brief Get the number of output tables. - virtual int GetOutputCount() const = 0; - - /// @brief Get the schema of input table no. `index`. - virtual const TTableSchema& GetInputSchema(int index) const = 0; - - /// @brief Get all the input table schemas. - virtual const TVector<TTableSchema>& GetInputSchemas() const = 0; - - /// @brief Path to the input table if available (`Nothing()` for intermediate tables). - virtual TMaybe<TYPath> GetInputPath(int index) const = 0; - - /// @brief Path to the output table if available (`Nothing()` for intermediate tables). - virtual TMaybe<TYPath> GetOutputPath(int index) const = 0; -}; - -/// -/// @brief Fluent builder class for @ref NYT::IJob::PrepareOperation. -/// -/// @note Method calls are supposed to be chained. -class TJobOperationPreparer -{ -public: - - /// - /// @brief Group of input tables that allows to specify properties on all of them at once. - /// - /// The instances are created with @ref NYT::TJobOperationPreparer::BeginInputGroup, not directly. - class TInputGroup - { - public: - TInputGroup(TJobOperationPreparer& preparer, TVector<int> indices); - - /// @brief Specify the type of input rows. - template <typename TRow> - TInputGroup& Description(); - - /// @brief Specify renaming of input columns. - TInputGroup& ColumnRenaming(const THashMap<TString, TString>& renaming); - - /// @brief Specify what input columns to send to job - /// - /// @note Filter is applied before renaming, so it must specify original column names. - TInputGroup& ColumnFilter(const TVector<TString>& columns); - - /// @brief Finish describing the input group. - TJobOperationPreparer& EndInputGroup(); - - private: - TJobOperationPreparer& Preparer_; - TVector<int> Indices_; - }; - - /// - /// @brief Group of output tables that allows to specify properties on all of them at once. - /// - /// The instances are created with @ref NYT::TJobOperationPreparer::BeginOutputGroup, not directly. - class TOutputGroup - { - public: - TOutputGroup(TJobOperationPreparer& preparer, TVector<int> indices); - - /// @brief Specify the type of output rows. - /// - /// @tparam TRow type of output rows from tables of this group. - /// @param inferSchema Infer schema from `TRow` and specify it for these output tables. - template <typename TRow> - TOutputGroup& Description(bool inferSchema = true); - - /// @brief Specify schema for these tables. - TOutputGroup& Schema(const TTableSchema& schema); - - /// @brief Specify that all the the tables in this group are unschematized. - /// - /// It is equivalent of `.Schema(TTableSchema().Strict(false)`. - TOutputGroup& NoSchema(); - - /// @brief Finish describing the output group. - TJobOperationPreparer& EndOutputGroup(); - - private: - TJobOperationPreparer& Preparer_; - TVector<int> Indices_; - }; - -public: - explicit TJobOperationPreparer(const IOperationPreparationContext& context); - - /// @brief Begin input group consisting of tables with indices `[begin, end)`. - /// - /// @param begin First index. - /// @param end Index after the last one. - TInputGroup BeginInputGroup(int begin, int end); - - /// @brief Begin input group consisting of tables with indices from `indices`. - /// - /// @tparam TCont Container with integers. Must support `std::begin` and `std::end` functions. - /// @param indices Indices of tables to include in the group. - template <typename TCont> - TInputGroup BeginInputGroup(const TCont& indices); - - /// @brief Begin output group consisting of tables with indices `[begin, end)`. - /// - /// @param begin First index. - /// @param end Index after the last one. - TOutputGroup BeginOutputGroup(int begin, int end); - - /// @brief Begin input group consisting of tables with indices from `indices`. - /// - /// @tparam TCont Container with integers. Must support `std::begin` and `std::end` functions. - /// @param indices Indices of tables to include in the group. - template <typename TCont> - TOutputGroup BeginOutputGroup(const TCont& indices); - - /// @brief Specify the schema for output table no `tableIndex`. - /// - /// @note All the output schemas must be specified either with this method, `NoOutputSchema` or `OutputDescription` with `inferSchema == true` - TJobOperationPreparer& OutputSchema(int tableIndex, TTableSchema schema); - - /// @brief Mark the output table no. `tableIndex` as unschematized. - TJobOperationPreparer& NoOutputSchema(int tableIndex); - - /// @brief Specify renaming of input columns for table no. `tableIndex`. - TJobOperationPreparer& InputColumnRenaming(int tableIndex, const THashMap<TString, TString>& renaming); - - /// @brief Specify what input columns of table no. `tableIndex` to send to job - /// - /// @note Filter is applied before renaming, so it must specify original column names. - TJobOperationPreparer& InputColumnFilter(int tableIndex, const TVector<TString>& columns); - - /// @brief Specify the type of input rows for table no. `tableIndex`. - /// - /// @tparam TRow type of input rows. - template <typename TRow> - TJobOperationPreparer& InputDescription(int tableIndex); - - /// @brief Specify the type of output rows for table no. `tableIndex`. - /// - /// @tparam TRow type of output rows. - /// @param inferSchema Infer schema from `TRow` and specify it for the output tables. - template <typename TRow> - TJobOperationPreparer& OutputDescription(int tableIndex, bool inferSchema = true); - - /// @brief Set type of output rows for table no. `tableIndex` to TNode - /// - /// @note Set schema via `OutputSchema` if needed - TJobOperationPreparer& NodeOutput(int tableIndex); - - /// @brief Specify input format hints. - /// - /// These hints have lower priority than ones specified in spec. - TJobOperationPreparer& InputFormatHints(TFormatHints hints); - - /// @brief Specify output format hints. - /// - /// These hints have lower priority than ones specified in spec. - TJobOperationPreparer& OutputFormatHints(TFormatHints hints); - - /// @brief Specify format hints. - /// - /// These hints have lower priority than ones specified in spec. - TJobOperationPreparer& FormatHints(TUserJobFormatHints newFormatHints); - - /// @name "Private" members - /// The following methods should not be used by clients in @ref NYT::IJob::PrepareOperation - ///@{ - - /// @brief Finish the building process. - void Finish(); - - /// @brief Get output table schemas as specified by the user. - TVector<TTableSchema> GetOutputSchemas(); - - /// @brief Get input column renamings as specified by the user. - const TVector<THashMap<TString, TString>>& GetInputColumnRenamings() const; - - /// @brief Get input column filters as specified by the user. - const TVector<TMaybe<TVector<TString>>>& GetInputColumnFilters() const; - - /// @brief Get input column descriptions as specified by the user. - const TVector<TMaybe<TTableStructure>>& GetInputDescriptions() const; - - /// @brief Get output column descriptions as specified by the user. - const TVector<TMaybe<TTableStructure>>& GetOutputDescriptions() const; - - /// @brief Get format hints as specified by the user. - const TUserJobFormatHints& GetFormatHints() const; - - ///@} -private: - - /// @brief Validate that schema for output table no. `tableIndex` has not been set yet. - void ValidateMissingOutputSchema(int tableIndex) const; - - /// @brief Validate that description for input table no. `tableIndex` has not been set yet. - void ValidateMissingInputDescription(int tableIndex) const; - - /// @brief Validate that description for output table no. `tableIndex` has not been set yet. - void ValidateMissingOutputDescription(int tableIndex) const; - - /// @brief Validate that `tableIndex` is in correct range for input table indices. - /// - /// @param message Message to add to the exception in case of violation. - void ValidateInputTableIndex(int tableIndex, TStringBuf message) const; - - /// @brief Validate that `tableIndex` is in correct range for output table indices. - /// - /// @param message Message to add to the exception in case of violation. - void ValidateOutputTableIndex(int tableIndex, TStringBuf message) const; - - /// @brief Validate that all the output schemas has been set. - void FinallyValidate() const; - - static TTableSchema EmptyNonstrictSchema(); - -private: - const IOperationPreparationContext& Context_; - - TVector<TMaybe<TTableSchema>> OutputSchemas_; - TVector<THashMap<TString, TString>> InputColumnRenamings_; - TVector<TMaybe<TVector<TString>>> InputColumnFilters_; - TVector<TMaybe<TTableStructure>> InputTableDescriptions_; - TVector<TMaybe<TTableStructure>> OutputTableDescriptions_; - TUserJobFormatHints FormatHints_ = {}; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Interface for all user jobs. -class IJob - : public TThrRefBase -{ -public: - - /// - /// @brief Type of job. - enum EType - { - Mapper, - Reducer, - ReducerAggregator, - RawJob, - VanillaJob, - }; - - /// - /// @brief Save job state to stream to be restored on cluster nodes. - virtual void Save(IOutputStream& stream) const - { - Y_UNUSED(stream); - } - - /// - /// @brief Restore job state from a stream. - virtual void Load(IInputStream& stream) - { - Y_UNUSED(stream); - } - - /// - /// @brief Get operation secure vault (specified in @ref NYT::TOperationOptions::SecureVault) inside a job. - const TNode& SecureVault() const - { - return GetJobSecureVault(); - } - - /// - /// @brief Get number of output tables. - i64 GetOutputTableCount() const - { - Y_VERIFY(NDetail::OutputTableCount > 0); - - return NDetail::OutputTableCount; - } - - /// - /// @brief Method allowing user to control some properties of input and output tables and formats. - /// - /// User can override this method in their job class to: - /// - specify output table schemas. - /// The most natural way is usually through @ref NYT::TJobOperationPreparer::OutputDescription (especially for protobuf), - /// but you can use @ref NYT::TJobOperationPreparer::OutputSchema directly - /// - specify output row type (@ref NYT::TJobOperationPreparer::OutputDescription) - /// - specify input row type (@ref NYT::TJobOperationPreparer::InputDescription) - /// - specify input column filter and renaming (@ref NYT::TJobOperationPreparer::InputColumnFilter and @ref NYT::TJobOperationPreparer::InputColumnRenaming) - /// - specify format hints (@ref NYT::TJobOperationPreparer::InputFormatHints, - /// NYT::TJobOperationPreparer::OutputFormatHints and @ref NYT::TJobOperationPreparer::FormatHints) - /// - maybe something more, cf. the methods of @ref NYT::TJobOperationPreparer. - /// - /// If one has several similar tables, groups can be used. - /// Groups are delimited by @ref NYT::TJobOperationPreparer::BeginInputGroup / - /// @ref NYT::TJobOperationPreparer::TInputGroup::EndInputGroup and - /// @ref NYT::TJobOperationPreparer::BeginOutputGroup / - /// @ref NYT::TJobOperationPreparer::TOutputGroup::EndOutputGroup. - /// Example: - /// @code{.cpp} - /// preparer - /// .BeginInputGroup({1,2,4,8}) - /// .ColumnRenaming({{"a", "b"}, {"c", "d"}}) - /// .ColumnFilter({"a", "c"}) - /// .EndInputGroup(); - /// @endcode - /// - /// @note All the output table schemas must be set - /// (possibly as empty nonstrict using @ref NYT::TJobOperationPreparer::NoOutputSchema or - /// @ref NYT::TJobOperationPreparer::TOutputGroup::NoSchema). - /// By default all the output table schemas are marked as empty nonstrict. - virtual void PrepareOperation(const IOperationPreparationContext& context, TJobOperationPreparer& preparer) const; -}; - -/// -/// @brief Declare what fields of currently declared job class to save and restore on cluster node. -#define Y_SAVELOAD_JOB(...) \ - virtual void Save(IOutputStream& stream) const override { Save(&stream); } \ - virtual void Load(IInputStream& stream) override { Load(&stream); } \ - Y_PASS_VA_ARGS(Y_SAVELOAD_DEFINE(__VA_ARGS__)) - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Interface for jobs with typed inputs and outputs. -class IStructuredJob - : public IJob -{ -public: - /// - /// @brief This methods are called when creating table reader and writer for the job. - /// - /// Override them if you want to implement custom input logic. (e.g. addtitional bufferization) - virtual TRawTableReaderPtr CreateCustomRawJobReader(int fd) const; - virtual THolder<IProxyOutput> CreateCustomRawJobWriter(size_t outputTableCount) const; - - virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const = 0; - virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Create default raw job reader. -TRawTableReaderPtr CreateRawJobReader(int fd = 0); - -/// -/// @brief Create default raw job writer. -THolder<IProxyOutput> CreateRawJobWriter(size_t outputTableCount); - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Base interface for structured (typed) map jobs. -class IMapperBase - : public IStructuredJob -{ }; - -/// -/// @brief Base interface for structured (typed) map jobs with given reader and writer. -template <class TR, class TW> -class IMapper - : public IMapperBase -{ -public: - using TReader = TR; - using TWriter = TW; - -public: - /// Type of job implemented by this class. - static constexpr EType JobType = EType::Mapper; - - /// - /// @brief This method is called before feeding input rows to mapper (before `Do` method). - virtual void Start(TWriter* writer) - { - Y_UNUSED(writer); - } - - /// - /// @brief This method is called exactly once for the whole job input. - /// - /// Read input rows from `reader` and write output ones to `writer`. - virtual void Do(TReader* reader, TWriter* writer) = 0; - - /// - /// @brief This method is called after feeding input rows to mapper (after `Do` method). - virtual void Finish(TWriter* writer) - { - Y_UNUSED(writer); - } - - virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override; - virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Base interface for structured (typed) reduce jobs. -/// -/// It is common base for @ref NYT::IReducer and @ref NYT::IAggregatorReducer. -class IReducerBase - : public IStructuredJob -{ }; - -/// -/// @brief Base interface for structured (typed) reduce jobs with given reader and writer. -template <class TR, class TW> -class IReducer - : public IReducerBase -{ -public: - using TReader = TR; - using TWriter = TW; - -public: - /// Type of job implemented by this class. - static constexpr EType JobType = EType::Reducer; - -public: - - /// - /// @brief This method is called before feeding input rows to reducer (before `Do` method). - virtual void Start(TWriter* writer) - { - Y_UNUSED(writer); - } - - /// - /// @brief This method is called exactly once for each range with same value of `ReduceBy` (or `JoinBy`) keys. - virtual void Do(TReader* reader, TWriter* writer) = 0; - - /// - /// @brief This method is called after feeding input rows to reducer (after `Do` method). - virtual void Finish(TWriter* writer) - { - Y_UNUSED(writer); - } - - /// - /// @brief Refuse to process the remaining row ranges and finish the job (successfully). - void Break(); - - virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override; - virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Base interface of jobs used inside reduce operations. -/// -/// Unlike @ref NYT::IReducer jobs their `Do' method is called only once -/// and takes whole range of records split by key boundaries. -/// -/// Template argument `TR` must be @ref NYT::TTableRangesReader. -template <class TR, class TW> -class IAggregatorReducer - : public IReducerBase -{ -public: - using TReader = TR; - using TWriter = TW; - -public: - /// Type of job implemented by this class. - static constexpr EType JobType = EType::ReducerAggregator; - -public: - /// - /// @brief This method is called before feeding input rows to reducer (before `Do` method). - virtual void Start(TWriter* writer) - { - Y_UNUSED(writer); - } - - /// - /// @brief This method is called exactly once for the whole job input. - virtual void Do(TReader* reader, TWriter* writer) = 0; - - /// - /// @brief This method is called after feeding input rows to reducer (after `Do` method). - virtual void Finish(TWriter* writer) - { - Y_UNUSED(writer); - } - - virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override; - virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Interface for raw jobs (i.e. reading and writing byte streams). -class IRawJob - : public IJob -{ -public: - /// Type of job implemented by this class. - static constexpr EType JobType = EType::RawJob; - - /// - /// @brief This method is called exactly once for the whole job input. - virtual void Do(const TRawJobContext& jobContext) = 0; -}; - -/// -/// @brief Interface of jobs that run the given bash command. -class ICommandJob - : public IJob -{ -public: - /// - /// @brief Get bash command to run. - /// - /// @note This method is called on the client side. - virtual const TString& GetCommand() const = 0; -}; - -/// -/// @brief Raw job executing given bash command. -/// -/// @note The binary will not be uploaded. -class TCommandRawJob - : public IRawJob - , public ICommandJob -{ -public: - /// - /// @brief Create job with specified command. - /// - /// @param command Bash command to run. - explicit TCommandRawJob(TStringBuf command = {}); - - const TString& GetCommand() const override; - void Do(const TRawJobContext& jobContext) override; - -private: - TString Command_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Base interface for vanilla jobs. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/vanilla -class IVanillaJobBase - : public virtual IStructuredJob -{ -public: - /// Type of job implemented by this class. - static constexpr EType JobType = EType::VanillaJob; -}; - -template <class TW = void> -class IVanillaJob; - -/// -/// @brief Interface of vanilla job without outputs. -template <> -class IVanillaJob<void> - : public IVanillaJobBase -{ -public: - /// - /// @brief This method is called exactly once for each vanilla job. - virtual void Do() = 0; - - virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override; - virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override; -}; - -/// -/// @brief Vanilla job executing given bash command. -/// -/// @note The binary will not be uploaded. -class TCommandVanillaJob - : public IVanillaJob<> - , public ICommandJob -{ -public: - /// - /// @brief Create job with specified command. - /// - /// @param command Bash command to run. - explicit TCommandVanillaJob(TStringBuf command = {}); - - const TString& GetCommand() const override; - void Do() override; - -private: - TString Command_; -}; - -/// -/// @brief Interface for vanilla jobs with output tables. -template <class TW> -class IVanillaJob - : public IVanillaJobBase -{ -public: - using TWriter = TW; - -public: - /// - /// @brief This method is called before `Do` method. - virtual void Start(TWriter* /* writer */) - { } - - /// - /// @brief This method is called exactly once for each vanilla job. - /// - /// Write output rows to `writer`. - virtual void Do(TWriter* writer) = 0; - - /// - /// @brief This method is called after `Do` method. - virtual void Finish(TWriter* /* writer */) - { } - - virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override; - virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Attributes to request for an operation. -enum class EOperationAttribute : int -{ - Id /* "id" */, - Type /* "type" */, - State /* "state" */, - AuthenticatedUser /* "authenticated_user" */, - StartTime /* "start_time" */, - FinishTime /* "finish_time" */, - BriefProgress /* "brief_progress" */, - BriefSpec /* "brief_spec" */, - Suspended /* "suspended" */, - Result /* "result" */, - Progress /* "progress" */, - Events /* "events" */, - Spec /* "spec" */, - FullSpec /* "full_spec" */, - UnrecognizedSpec /* "unrecognized_spec" */, -}; - -/// -/// @brief Class describing which attributes to request in @ref NYT::IClient::GetOperation or @ref NYT::IClient::ListOperations. -struct TOperationAttributeFilter -{ - /// @cond Doxygen_Suppress - using TSelf = TOperationAttributeFilter; - /// @endcond - - TVector<EOperationAttribute> Attributes_; - - /// - /// @brief Add attribute to the filter. Calls are supposed to be chained. - TSelf& Add(EOperationAttribute attribute) - { - Attributes_.push_back(attribute); - return *this; - } -}; - -/// -/// @brief Options for @ref NYT::IClient::GetOperation call. -struct TGetOperationOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetOperationOptions; - /// @endcond - - /// - /// @brief What attributes to request (if omitted, the default set of attributes will be requested). - FLUENT_FIELD_OPTION(TOperationAttributeFilter, AttributeFilter); -}; - -/// -/// @brief "Coarse-grained" state of an operation. -enum class EOperationBriefState : int -{ - InProgress /* "in_progress" */, - Completed /* "completed" */, - Aborted /* "aborted" */, - - /// Failed - Failed /* "failed" */, -}; - -/// -/// @brief Operation type. -enum class EOperationType : int -{ - Map /* "map" */, - Merge /* "merge" */, - Erase /* "erase" */, - Sort /* "sort" */, - Reduce /* "reduce" */, - MapReduce /* "map_reduce" */, - RemoteCopy /* "remote_copy" */, - JoinReduce /* "join_reduce" */, - Vanilla /* "vanilla" */, -}; - -/// -/// @brief Operation progress. -struct TOperationProgress -{ - /// - /// @brief Total job statistics. - TJobStatistics JobStatistics; - - /// - /// @brief Job counter for various job states with hierarchy. - TJobCounters JobCounters; - - /// - /// @brief Time when this progress was built on scheduler or CA. - TMaybe<TInstant> BuildTime; -}; - -/// -/// @brief Brief operation progress (numbers of jobs in these states). -struct TOperationBriefProgress -{ - ui64 Aborted = 0; - ui64 Completed = 0; - ui64 Failed = 0; - ui64 Lost = 0; - ui64 Pending = 0; - ui64 Running = 0; - ui64 Total = 0; -}; - -/// -/// @brief Operation result. -struct TOperationResult -{ - /// - /// @brief For a unsuccessfully finished operation: description of error. - TMaybe<TYtError> Error; -}; - -/// -/// @brief Operation event (change of state). -struct TOperationEvent -{ - /// - /// @brief New state of operation. - TString State; - - /// - /// @brief Time of state change. - TInstant Time; -}; - -/// -/// @brief Operation info. -/// -/// A field may be `Nothing()` either if it was not requested (see @ref NYT::TGetOperationOptions::AttributeFilter) -/// or it is not available (i.e. `FinishTime` for a running operation). -/// @see https://yt.yandex-team.ru/docs/api/commands#get_operation -struct TOperationAttributes -{ - /// - /// @brief Operation id. - TMaybe<TOperationId> Id; - - /// - /// @brief Operation type. - TMaybe<EOperationType> Type; - - /// - /// @brief Operation state. - TMaybe<TString> State; - - /// - /// @brief "Coarse-grained" operation state. - TMaybe<EOperationBriefState> BriefState; - - /// - /// @brief Name of user that started the operation. - TMaybe<TString> AuthenticatedUser; - - /// - /// @brief Operation start time. - TMaybe<TInstant> StartTime; - - /// - /// @brief Operation finish time (if the operation has finished). - TMaybe<TInstant> FinishTime; - - /// - /// @brief Brief progress of the operation. - TMaybe<TOperationBriefProgress> BriefProgress; - - /// - /// @brief Brief spec of operation (light-weight fields only). - TMaybe<TNode> BriefSpec; - - /// - /// @brief Spec of the operation as provided by the user. - TMaybe<TNode> Spec; - - /// - /// @brief Full spec of operation (all fields not specified by user are filled with default values). - TMaybe<TNode> FullSpec; - - /// - /// @brief Fields not recognized by scheduler. - TMaybe<TNode> UnrecognizedSpec; - - /// - /// @brief Is operation suspended. - TMaybe<bool> Suspended; - - /// - /// @brief Operation result. - TMaybe<TOperationResult> Result; - - /// - /// @brief Operation progress. - TMaybe<TOperationProgress> Progress; - - /// - /// @brief List of operation events (changes of state). - TMaybe<TVector<TOperationEvent>> Events; - - /// - /// @brief Map from alert name to its description. - TMaybe<THashMap<TString, TYtError>> Alerts; -}; - -/// -/// @brief Direction of cursor for paging, see @ref NYT::TListOperationsOptions::CursorDirection. -enum class ECursorDirection -{ - Past /* "past" */, - Future /* "future" */, -}; - -/// -/// @brief Options of @ref NYT::IClient::ListOperations command. -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#list_operations -struct TListOperationsOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TListOperationsOptions; - /// @endcond - - /// - /// @name Time range specification - /// - /// List operations with start time in half-closed interval - /// `[CursorTime, ToTime)` if `CursorDirection == Future` or - /// `[FromTime, CursorTime)` if `CursorDirection == Past`. - ///@{ - - /// - /// @brief Search for operations with start time >= `FromTime`. - FLUENT_FIELD_OPTION(TInstant, FromTime); - - /// - /// @brief Search for operations with start time < `ToTime`. - FLUENT_FIELD_OPTION(TInstant, ToTime); - - /// - /// @brief Additional restriction on operation start time (useful for pagination). - /// - /// Search for operations with start time >= `CursorTime` if `CursorDirection == Future` - /// and with start time < `CursorTime` if `CursorDirection == Past` - FLUENT_FIELD_OPTION(TInstant, CursorTime); - - /// - /// @brief Direction of pagination (see @ref NYT::TListOperationsOptions::CursorTime). - FLUENT_FIELD_OPTION(ECursorDirection, CursorDirection); - - ///@} - - /// - /// @name Filters - /// Choose operations satisfying given filters. - ///@{ - - /// - /// @brief Search for `Filter` as a substring in operation text factors - /// (e.g. title or input/output table paths). - FLUENT_FIELD_OPTION(TString, Filter); - - /// - /// @brief Choose operations whose pools include `Pool`. - FLUENT_FIELD_OPTION(TString, Pool); - - /// - /// @brief Choose operations with given @ref NYT::TOperationAttributes::AuthenticatedUser. - FLUENT_FIELD_OPTION(TString, User); - - /// - /// @brief Choose operations with given @ref NYT::TOperationAttributes::State. - FLUENT_FIELD_OPTION(TString, State); - - /// - /// @brief Choose operations with given @ref NYT::TOperationAttributes::Type. - FLUENT_FIELD_OPTION(EOperationType, Type); - - /// - /// @brief Choose operations having (or not having) any failed jobs. - FLUENT_FIELD_OPTION(bool, WithFailedJobs); - - ///@} - - /// - /// @brief Search for operations in the archive in addition to Cypress. - FLUENT_FIELD_OPTION(bool, IncludeArchive); - - /// - /// @brief Include the counters for different filter parameters in the response. - /// - /// Include number of operations for each pool, user, state, type - /// and the number of operations having failed jobs. - FLUENT_FIELD_OPTION(bool, IncludeCounters); - - /// - /// @brief Return no more than `Limit` operations (current default and maximum value is 1000). - FLUENT_FIELD_OPTION(i64, Limit); -}; - -/// -/// @brief Response for @ref NYT::IClient::ListOperations command. -struct TListOperationsResult -{ - /// - /// @brief Found operations' attributes. - TVector<TOperationAttributes> Operations; - - /// - /// @name Counters for different filter. - /// - /// If counters were requested (@ref NYT::TListOperationsOptions::IncludeCounters is `true`) - /// the maps contain the number of operations found for each pool, user, state and type. - /// NOTE: - /// 1) Counters ignore CursorTime and CursorDirection, - /// they always are collected in the whole [FromTime, ToTime) interval. - /// 2) Each next counter in the sequence [pool, user, state, type, with_failed_jobs] - /// takes into account all the previous filters (i.e. if you set User filter to "some-user" - /// type counts describe only operations with user "some-user"). - /// @{ - - /// - /// @brief Number of operations for each pool. - TMaybe<THashMap<TString, i64>> PoolCounts; - - /// - /// @brief Number of operations for each user (subject to previous filters). - TMaybe<THashMap<TString, i64>> UserCounts; - - /// - /// @brief Number of operations for each state (subject to previous filters). - TMaybe<THashMap<TString, i64>> StateCounts; - - /// - /// @brief Number of operations for each type (subject to previous filters). - TMaybe<THashMap<EOperationType, i64>> TypeCounts; - - /// - /// @brief Number of operations having failed jobs (subject to all previous filters). - TMaybe<i64> WithFailedJobsCount; - - /// @} - - /// - /// @brief Whether some operations were not returned due to @ref NYT::TListOperationsOptions::Limit. - /// - /// `Incomplete == true` means that not all operations satisfying filters - /// were returned (limit exceeded) and you need to repeat the request with new @ref NYT::TListOperationsOptions::CursorTime - /// (e.g. `CursorTime == *Operations.back().StartTime`, but don't forget to - /// remove the duplicates). - bool Incomplete; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Data source for @ref NYT::IClient::ListJobs command. -enum class EListJobsDataSource : int -{ - Runtime /* "runtime" */, - Archive /* "archive" */, - Auto /* "auto" */, - Manual /* "manual" */, -}; - -/// -/// @brief Job type. -enum class EJobType : int -{ - SchedulerFirst /* "scheduler_first" */, - Map /* "map" */, - PartitionMap /* "partition_map" */, - SortedMerge /* "sorted_merge" */, - OrderedMerge /* "ordered_merge" */, - UnorderedMerge /* "unordered_merge" */, - Partition /* "partition" */, - SimpleSort /* "simple_sort" */, - FinalSort /* "final_sort" */, - SortedReduce /* "sorted_reduce" */, - PartitionReduce /* "partition_reduce" */, - ReduceCombiner /* "reduce_combiner" */, - RemoteCopy /* "remote_copy" */, - IntermediateSort /* "intermediate_sort" */, - OrderedMap /* "ordered_map" */, - JoinReduce /* "join_reduce" */, - Vanilla /* "vanilla" */, - SchedulerUnknown /* "scheduler_unknown" */, - SchedulerLast /* "scheduler_last" */, - ReplicatorFirst /* "replicator_first" */, - ReplicateChunk /* "replicate_chunk" */, - RemoveChunk /* "remove_chunk" */, - RepairChunk /* "repair_chunk" */, - SealChunk /* "seal_chunk" */, - ReplicatorLast /* "replicator_last" */, -}; - -/// -/// @brief Well-known task names. -enum class ETaskName : int -{ - Map /* "map" */, - PartitionMap0 /* "partition_map(0)" */, - SortedMerge /* "sorted_merge" */, - OrderedMerge /* "ordered_merge" */, - UnorderedMerge /* "unordered_merge" */, - Partition0 /* "partition(0)" */, - Partition1 /* "partition(1)" */, - Partition2 /* "partition(2)" */, - SimpleSort /* "simple_sort" */, - FinalSort /* "final_sort" */, - SortedReduce /* "sorted_reduce" */, - PartitionReduce /* "partition_reduce" */, - ReduceCombiner /* "reduce_combiner" */, - RemoteCopy /* "remote_copy" */, - IntermediateSort /* "intermediate_sort" */, - OrderedMap /* "ordered_map" */, - JoinReduce /* "join_reduce" */, -}; - -/// -/// @brief Task name (can either well-known or just a string). -class TTaskName -{ -public: - - // Constructors are implicit by design. - - /// - /// @brief Construct a custom task name. - TTaskName(TString taskName); - - /// - /// @brief Construct a custom task name. - TTaskName(const char* taskName); - - /// - /// @brief Construct a well-known task name. - TTaskName(ETaskName taskName); - - const TString& Get() const; - -private: - TString TaskName_; -}; - -/// -/// @brief Job state. -enum class EJobState : int -{ - None /* "none" */, - Waiting /* "waiting" */, - Running /* "running" */, - Aborting /* "aborting" */, - Completed /* "completed" */, - Failed /* "failed" */, - Aborted /* "aborted" */, - Lost /* "lost" */, -}; - -/// -/// @brief Job sort field. -/// -/// @see @ref NYT::TListJobsOptions. -enum class EJobSortField : int -{ - Type /* "type" */, - State /* "state" */, - StartTime /* "start_time" */, - FinishTime /* "finish_time" */, - Address /* "address" */, - Duration /* "duration" */, - Progress /* "progress" */, - Id /* "id" */, -}; - -/// -/// @brief Job sort direction. -/// -/// @see @ref NYT::TListJobsOptions. -enum class EJobSortDirection : int -{ - Ascending /* "ascending" */, - Descending /* "descending" */, -}; - -/// -/// @brief Options for @ref NYT::IClient::ListJobs. -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#list_jobs -struct TListJobsOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TListJobsOptions; - /// @endcond - - /// - /// @name Filters - /// Return only jobs with given value of parameter (type, state, address and existence of stderr). - /// If a field is `Nothing()`, return jobs with all possible values of the corresponding parameter. - /// @{ - - /// - /// @brief Job type. - FLUENT_FIELD_OPTION(EJobType, Type); - - /// - /// @brief Job state. - FLUENT_FIELD_OPTION(EJobState, State); - - /// - /// @brief Address of the cluster node where job was running. - FLUENT_FIELD_OPTION(TString, Address); - - /// - /// @brief Return only jobs whose stderr has been saved. - FLUENT_FIELD_OPTION(bool, WithStderr); - - /// - /// @brief Return only jobs whose spec has been saved. - FLUENT_FIELD_OPTION(bool, WithSpec); - - /// - /// @brief Return only jobs whose fail context has been saved. - FLUENT_FIELD_OPTION(bool, WithFailContext); - - /// @} - - /// - /// @name Sort options - /// @{ - - /// - /// @brief Sort by this field. - FLUENT_FIELD_OPTION(EJobSortField, SortField); - - /// - /// @brief Sort order. - FLUENT_FIELD_OPTION(ESortOrder, SortOrder); - - /// @} - - /// - /// @brief Data source. - /// - /// Where to search for jobs: in scheduler and Cypress ('Runtime'), in archive ('Archive'), - /// automatically basing on operation presence in Cypress ('Auto') or choose manually (`Manual'). - FLUENT_FIELD_OPTION(EListJobsDataSource, DataSource); - - /// @deprecated - FLUENT_FIELD_OPTION(bool, IncludeCypress); - - /// @deprecated - FLUENT_FIELD_OPTION(bool, IncludeControllerAgent); - - /// @deprecated - FLUENT_FIELD_OPTION(bool, IncludeArchive); - - /// - /// @brief Maximum number of jobs to return. - FLUENT_FIELD_OPTION(i64, Limit); - - /// - /// @brief Number of jobs (in specified sort order) to skip. - /// - /// Together with @ref NYT::TListJobsOptions::Limit may be used for pagination. - FLUENT_FIELD_OPTION(i64, Offset); -}; - -/// -/// @brief Description of a core dump that happened in the job. -struct TCoreInfo -{ - i64 ProcessId; - TString ExecutableName; - TMaybe<ui64> Size; - TMaybe<TYtError> Error; -}; - -/// -/// @brief Job attributes. -/// -/// A field may be `Nothing()` if it is not available (i.e. `FinishTime` for a running job). -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#get_job -struct TJobAttributes -{ - /// - /// @brief Job id. - TMaybe<TJobId> Id; - - /// - /// @brief Job type - TMaybe<EJobType> Type; - - /// - /// @brief Job state. - TMaybe<EJobState> State; - - /// - /// @brief Address of a cluster node where job was running. - TMaybe<TString> Address; - - /// - /// @brief The name of the task that job corresponds to. - TMaybe<TString> TaskName; - - /// - /// @brief Job start time. - TMaybe<TInstant> StartTime; - - /// - /// @brief Job finish time (for a finished job). - TMaybe<TInstant> FinishTime; - - /// - /// @brief Estimated ratio of job's completed work. - TMaybe<double> Progress; - - /// - /// @brief Size of saved job stderr. - TMaybe<i64> StderrSize; - - /// - /// @brief Error for a unsuccessfully finished job. - TMaybe<TYtError> Error; - - /// - /// @brief Job brief statistics. - TMaybe<TNode> BriefStatistics; - - /// - /// @brief Job input paths (with ranges). - TMaybe<TVector<TRichYPath>> InputPaths; - - /// - /// @brief Infos for core dumps produced by job. - TMaybe<TVector<TCoreInfo>> CoreInfos; -}; - -/// -/// @brief Response for @ref NYT::IOperation::ListJobs. -struct TListJobsResult -{ - /// - /// @brief Jobs. - TVector<TJobAttributes> Jobs; - - /// - /// @deprecated - TMaybe<i64> CypressJobCount; - - /// - /// @brief Number of jobs retrieved from controller agent. - TMaybe<i64> ControllerAgentJobCount; - - /// - /// @brief Number of jobs retrieved from archive. - TMaybe<i64> ArchiveJobCount; -}; - -//////////////////////////////////////////////////////////////////// - -/// -/// @brief Options for @ref NYT::IClient::GetJob. -struct TGetJobOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetJobOptions; - /// @endcond -}; - -/// -/// @brief Options for @ref NYT::IClient::GetJobInput. -struct TGetJobInputOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetJobInputOptions; - /// @endcond -}; - -/// -/// @brief Options for @ref NYT::IClient::GetJobFailContext. -struct TGetJobFailContextOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetJobFailContextOptions; - /// @endcond -}; - -/// -/// @brief Options for @ref NYT::IClient::GetJobStderr. -struct TGetJobStderrOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetJobStderrOptions; - /// @endcond -}; - -//////////////////////////////////////////////////////////////////// - -/// -/// @brief Options for @ref NYT::IOperation::GetFailedJobInfo. -struct TGetFailedJobInfoOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetFailedJobInfoOptions; - /// @endcond - - /// - /// @brief How many jobs to download. Which jobs will be chosen is undefined. - FLUENT_FIELD_DEFAULT(ui64, MaxJobCount, 10); - - /// - /// @brief How much of stderr tail should be downloaded. - FLUENT_FIELD_DEFAULT(ui64, StderrTailSize, 64 * 1024); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Interface representing an operation. -struct IOperation - : public TThrRefBase -{ - virtual ~IOperation() = default; - - /// - /// @brief Get operation id. - virtual const TOperationId& GetId() const = 0; - - /// - /// @brief Get URL of the operation in YT Web UI. - virtual TString GetWebInterfaceUrl() const = 0; - - /// - /// @brief Get last error for not started operations. Get state on YT cluster for started operations. - /// - /// For not started operations last error is an error that's being retried during operation - /// preparation/start (e.g. lock files, start operation request). - virtual TString GetStatus() const = 0; - - /// - /// @brief Get preparation future. - /// - /// @return future that is set when operation is prepared. - virtual ::NThreading::TFuture<void> GetPreparedFuture() = 0; - - /// - /// @brief Start operation synchronously. - /// - /// @note: Do NOT call this method twice. - /// - /// If operation is not prepared yet, Start() will block waiting for preparation finish. - /// Be ready to catch exception if operation preparation or start failed. - virtual void Start() = 0; - - /// - /// @brief Is the operation started - /// - /// Returns true if the operation is started on the cluster - virtual bool IsStarted() const = 0; - - /// - /// @brief Get start future. - /// - /// @return future that is set when operation is started. - virtual ::NThreading::TFuture<void> GetStartedFuture() = 0; - - /// - /// @brief Start watching operation. - /// - /// @return future that is set when operation is complete. - /// - /// @note: the user should check value of returned future to ensure that operation completed successfully e.g. - /// @code{.cpp} - /// auto operationComplete = operation->Watch(); - /// operationComplete.Wait(); - /// operationComplete.GetValue(); /// will throw if operation completed with errors - /// @endcode - /// - /// If operation is completed successfully the returned future contains void value. - /// If operation is completed with error future contains @ref NYT::TOperationFailedError. - /// In rare cases when error occurred while waiting (e.g. YT become unavailable) future might contain other exception. - virtual ::NThreading::TFuture<void> Watch() = 0; - - /// - /// @brief Get information about failed jobs. - /// - /// Can be called for operation in any stage. - /// Though user should keep in mind that this method always fetches info from cypress - /// and doesn't work when operation is archived. Successfully completed operations can be archived - /// quite quickly (in about ~30 seconds). - virtual TVector<TFailedJobInfo> GetFailedJobInfo(const TGetFailedJobInfoOptions& options = TGetFailedJobInfoOptions()) = 0; - - /// - /// Get operation brief state. - virtual EOperationBriefState GetBriefState() = 0; - - /// - /// @brief Get error (if operation has failed). - /// - /// @return `Nothing()` if operation is in 'Completed' or 'InProgress' state (or reason for failed / aborted operation). - virtual TMaybe<TYtError> GetError() = 0; - - /// - /// Get job statistics. - virtual TJobStatistics GetJobStatistics() = 0; - - /// - /// Get operation progress. - /// - /// @return `Nothing()` if operation has no running jobs yet, e.g. when it is in "materializing" or "pending" state. - virtual TMaybe<TOperationBriefProgress> GetBriefProgress() = 0; - - /// - /// @brief Abort operation. - /// - /// Operation will be finished immediately. - /// All results of completed/running jobs will be lost. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#abort_op - virtual void AbortOperation() = 0; - - /// - /// @brief Complete operation. - /// - /// Operation will be finished immediately. - /// All results of completed jobs will appear in output tables. - /// All results of running (not completed) jobs will be lost. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#complete_op - virtual void CompleteOperation() = 0; - - /// - /// @brief Suspend operation. - /// - /// Jobs will not be aborted by default, c.f. @ref NYT::TSuspendOperationOptions. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#suspend_op - virtual void SuspendOperation( - const TSuspendOperationOptions& options = TSuspendOperationOptions()) = 0; - - /// - /// @brief Resume previously suspended operation. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#resume_op - virtual void ResumeOperation( - const TResumeOperationOptions& options = TResumeOperationOptions()) = 0; - - /// - /// @brief Get operation attributes. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#get_operation - virtual TOperationAttributes GetAttributes( - const TGetOperationOptions& options = TGetOperationOptions()) = 0; - - /// - /// @brief Update operation runtime parameters. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#update_op_parameters - virtual void UpdateParameters( - const TUpdateOperationParametersOptions& options = TUpdateOperationParametersOptions()) = 0; - - /// - /// @brief Get job attributes. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#get_job - virtual TJobAttributes GetJob( - const TJobId& jobId, - const TGetJobOptions& options = TGetJobOptions()) = 0; - - /// - /// List jobs satisfying given filters (see @ref NYT::TListJobsOptions). - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#list_jobs - virtual TListJobsResult ListJobs( - const TListJobsOptions& options = TListJobsOptions()) = 0; -}; - -/// -/// @brief Interface of client capable of managing operations. -struct IOperationClient -{ - /// - /// @brief Run Map operation. - /// - /// @param spec Operation spec. - /// @param mapper Instance of a job to run. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/map - IOperationPtr Map( - const TMapOperationSpec& spec, - ::TIntrusivePtr<IMapperBase> mapper, - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run Map operation. - /// - /// @param mapper Instance of a job to run. - /// @param input Input table(s) - /// @param output Output table(s) - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/map - IOperationPtr Map( - ::TIntrusivePtr<IMapperBase> mapper, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TMapOperationSpec& spec = TMapOperationSpec(), - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run raw Map operation. - /// - /// @param spec Operation spec. - /// @param rawJob Instance of a raw mapper to run. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/map - virtual IOperationPtr RawMap( - const TRawMapOperationSpec& spec, - ::TIntrusivePtr<IRawJob> rawJob, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run Reduce operation. - /// - /// @param spec Operation spec. - /// @param reducer Instance of a job to run. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/reduce - IOperationPtr Reduce( - const TReduceOperationSpec& spec, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run Reduce operation. - /// - /// @param reducer Instance of a job to run. - /// @param input Input table(s) - /// @param output Output table(s) - /// @param reduceBy Columns to group rows by. - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/reduce - IOperationPtr Reduce( - ::TIntrusivePtr<IReducerBase> reducer, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TSortColumns& reduceBy, - const TReduceOperationSpec& spec = TReduceOperationSpec(), - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run raw Reduce operation. - /// - /// @param spec Operation spec. - /// @param rawJob Instance of a raw reducer to run. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/reduce - virtual IOperationPtr RawReduce( - const TRawReduceOperationSpec& spec, - ::TIntrusivePtr<IRawJob> rawJob, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run JoinReduce operation. - /// - /// @param spec Operation spec. - /// @param reducer Instance of a job to run. - /// @param options Optional parameters. - /// - /// @deprecated Use @ref NYT::IOperationClient::Reduce with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false. - IOperationPtr JoinReduce( - const TJoinReduceOperationSpec& spec, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run raw JoinReduce operation. - /// - /// @param spec Operation spec. - /// @param rawJob Instance of a raw reducer to run. - /// @param options Optional parameters. - /// - /// @deprecated Use @ref NYT::IOperationClient::RawReduce with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false. - virtual IOperationPtr RawJoinReduce( - const TRawJoinReduceOperationSpec& spec, - ::TIntrusivePtr<IRawJob> rawJob, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run MapReduce operation. - /// - /// @param spec Operation spec. - /// @param mapper Instance of a map job to run (identity mapper if `nullptr`). - /// @param reducer Instance of a reduce job to run. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce - IOperationPtr MapReduce( - const TMapReduceOperationSpec& spec, - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run MapReduce operation. - /// - /// @param spec Operation spec. - /// @param mapper Instance of a map job to run (identity mapper if `nullptr`). - /// @param reducerCombiner Instance of a reduce combiner to run (identity reduce combiner if `nullptr`). - /// @param reducer Instance of a reduce job to run. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce - IOperationPtr MapReduce( - const TMapReduceOperationSpec& spec, - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reduceCombiner, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run MapReduce operation. - /// - /// @param mapper Instance of mapper to run (identity mapper if `nullptr`). - /// @param reducer Instance of reducer to run. - /// @param input Input table(s) - /// @param output Output table(s) - /// @param reduceBy Columns to group rows by. - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce - IOperationPtr MapReduce( - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reducer, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TSortColumns& reduceBy, - TMapReduceOperationSpec spec = TMapReduceOperationSpec(), - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run MapReduce operation. - /// - /// @param mapper Instance of mapper to run (identity mapper if `nullptr`). - /// @param reduceCombiner Instance of reduceCombiner to run (identity reduce combiner if `nullptr`). - /// @param reducer Instance of reducer to run. - /// @param input Input table(s) - /// @param output Output table(s) - /// @param reduceBy Columns to group rows by. - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce - IOperationPtr MapReduce( - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reduceCombiner, - ::TIntrusivePtr<IReducerBase> reducer, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TSortColumns& reduceBy, - TMapReduceOperationSpec spec = TMapReduceOperationSpec(), - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run raw MapReduce operation. - /// - /// @param spec Operation spec. - /// @param mapper Instance of a raw mapper to run (identity mapper if `nullptr`). - /// @param mapper Instance of a raw reduce combiner to run (identity reduce combiner if `nullptr`). - /// @param mapper Instance of a raw reducer to run. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce - virtual IOperationPtr RawMapReduce( - const TRawMapReduceOperationSpec& spec, - ::TIntrusivePtr<IRawJob> mapper, - ::TIntrusivePtr<IRawJob> reduceCombiner, - ::TIntrusivePtr<IRawJob> reducer, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run Sort operation. - /// - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/sort - virtual IOperationPtr Sort( - const TSortOperationSpec& spec, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run Sort operation. - /// - /// @param input Input table(s). - /// @param output Output table. - /// @param sortBy Columns to sort input rows by. - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/sort - IOperationPtr Sort( - const TOneOrMany<TRichYPath>& input, - const TRichYPath& output, - const TSortColumns& sortBy, - const TSortOperationSpec& spec = TSortOperationSpec(), - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run Merge operation. - /// - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/merge - virtual IOperationPtr Merge( - const TMergeOperationSpec& spec, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run Erase operation. - /// - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/erase - virtual IOperationPtr Erase( - const TEraseOperationSpec& spec, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run RemoteCopy operation. - /// - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/remote_copy - virtual IOperationPtr RemoteCopy( - const TRemoteCopyOperationSpec& spec, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run Vanilla operation. - /// - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/vanilla - virtual IOperationPtr RunVanilla( - const TVanillaOperationSpec& spec, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Abort operation. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#abort_op - virtual void AbortOperation( - const TOperationId& operationId) = 0; - - /// - /// @brief Complete operation. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#complete_op - virtual void CompleteOperation( - const TOperationId& operationId) = 0; - - /// - /// @brief Wait for operation to finish. - virtual void WaitForOperation( - const TOperationId& operationId) = 0; - - /// - /// @brief Check and return operation status. - /// - /// @note this function will never return @ref NYT::EOperationBriefState::Failed or @ref NYT::EOperationBriefState::Aborted status, - /// it will throw @ref NYT::TOperationFailedError instead. - virtual EOperationBriefState CheckOperation( - const TOperationId& operationId) = 0; - - /// - /// @brief Create an operation object given operation id. - /// - /// @throw @ref NYT::TErrorResponse if the operation doesn't exist. - virtual IOperationPtr AttachOperation(const TOperationId& operationId) = 0; - -private: - virtual IOperationPtr DoMap( - const TMapOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> mapper, - const TOperationOptions& options) = 0; - - virtual IOperationPtr DoReduce( - const TReduceOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> reducer, - const TOperationOptions& options) = 0; - - virtual IOperationPtr DoJoinReduce( - const TJoinReduceOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> reducer, - const TOperationOptions& options) = 0; - - virtual IOperationPtr DoMapReduce( - const TMapReduceOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> mapper, - ::TIntrusivePtr<IStructuredJob> reduceCombiner, - ::TIntrusivePtr<IStructuredJob> reducer, - const TOperationOptions& options) = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT - -#define OPERATION_INL_H_ -#include "operation-inl.h" -#undef OPERATION_INL_H_ diff --git a/yt/cpp/mapreduce/interface/operation_ut.cpp b/yt/cpp/mapreduce/interface/operation_ut.cpp deleted file mode 100644 index 0fa62e1568..0000000000 --- a/yt/cpp/mapreduce/interface/operation_ut.cpp +++ /dev/null @@ -1,269 +0,0 @@ -#include <yt/cpp/mapreduce/interface/common_ut.h> -#include <yt/cpp/mapreduce/interface/job_statistics.h> -#include <yt/cpp/mapreduce/interface/operation.h> -#include <yt/cpp/mapreduce/interface/protobuf_table_schema_ut.pb.h> - -#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h> - -#include <library/cpp/yson/node/node_io.h> - -#include <library/cpp/testing/unittest/registar.h> - -using namespace NYT; -using namespace NYT::NUnitTesting; - -class TDummyInferenceContext - : public IOperationPreparationContext -{ -public: - TDummyInferenceContext(int inputCount, int outputCount) - : InputCount_(inputCount) - , OutputCount_(outputCount) - , InputSchemas_(inputCount) - { } - - int GetInputCount() const override - { - return InputCount_; - } - - int GetOutputCount() const override - { - return OutputCount_; - } - - const TVector<TTableSchema>& GetInputSchemas() const override - { - return InputSchemas_; - } - - const TTableSchema& GetInputSchema(int index) const override - { - return InputSchemas_[index]; - } - - TMaybe<TYPath> GetInputPath(int) const override - { - return Nothing(); - } - - TMaybe<TYPath> GetOutputPath(int) const override - { - return Nothing(); - } - -private: - int InputCount_; - int OutputCount_; - TVector<TTableSchema> InputSchemas_; -}; - -Y_UNIT_TEST_SUITE(PrepareOperation) -{ - - Y_UNIT_TEST(BasicSchemas) - { - auto firstSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64)); - auto otherSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("other_column").Type(EValueType::VT_BOOLEAN)); - auto thirdSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("third_column").Type(EValueType::VT_STRING)); - - TDummyInferenceContext context(3,7); - TJobOperationPreparer builder(context); - - builder - .OutputSchema(1, firstSchema) - .BeginOutputGroup(TVector<int>{2, 5}) - .Schema(otherSchema) - .EndOutputGroup() - .BeginOutputGroup(3, 5) - .Schema(thirdSchema) - .EndOutputGroup() - .BeginOutputGroup(TVector<int>{0, 6}) - .Schema(thirdSchema) - .EndOutputGroup(); - - UNIT_ASSERT_EXCEPTION(builder.OutputSchema(1, otherSchema), TApiUsageError); - UNIT_ASSERT_EXCEPTION(builder.BeginOutputGroup(3, 5).Schema(otherSchema), TApiUsageError); - UNIT_ASSERT_EXCEPTION(builder.BeginOutputGroup(TVector<int>{3,6,7}).Schema(otherSchema), TApiUsageError); - - builder.Finish(); - auto result = builder.GetOutputSchemas(); - - ASSERT_SERIALIZABLES_EQUAL(result[0], thirdSchema); - ASSERT_SERIALIZABLES_EQUAL(result[1], firstSchema); - ASSERT_SERIALIZABLES_EQUAL(result[2], otherSchema); - ASSERT_SERIALIZABLES_EQUAL(result[3], thirdSchema); - ASSERT_SERIALIZABLES_EQUAL(result[4], thirdSchema); - ASSERT_SERIALIZABLES_EQUAL(result[5], otherSchema); - ASSERT_SERIALIZABLES_EQUAL(result[6], thirdSchema); - } - - Y_UNIT_TEST(NoSchema) - { - auto schema = TTableSchema() - .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64)); - - TDummyInferenceContext context(3,4); - TJobOperationPreparer builder(context); - - builder - .OutputSchema(1, schema) - .NoOutputSchema(0) - .BeginOutputGroup(2, 4) - .Schema(schema) - .EndOutputGroup(); - - UNIT_ASSERT_EXCEPTION(builder.OutputSchema(0, schema), TApiUsageError); - - builder.Finish(); - auto result = builder.GetOutputSchemas(); - - UNIT_ASSERT(result[0].Empty()); - - ASSERT_SERIALIZABLES_EQUAL(result[1], schema); - ASSERT_SERIALIZABLES_EQUAL(result[2], schema); - ASSERT_SERIALIZABLES_EQUAL(result[3], schema); - } - - Y_UNIT_TEST(Descriptions) - { - auto urlRowSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("Host").Type(NTi::Optional(NTi::String()))) - .AddColumn(TColumnSchema().Name("Path").Type(NTi::Optional(NTi::String()))) - .AddColumn(TColumnSchema().Name("HttpCode").Type(NTi::Optional(NTi::Int32()))); - - auto urlRowStruct = NTi::Struct({ - {"Host", NTi::Optional(NTi::String())}, - {"Path", NTi::Optional(NTi::String())}, - {"HttpCode", NTi::Optional(NTi::Int32())}, - }); - - auto rowFieldSerializationOptionSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("UrlRow_1").Type(NTi::Optional(urlRowStruct))) - .AddColumn(TColumnSchema().Name("UrlRow_2").Type(NTi::Optional(NTi::String()))); - - auto rowSerializedRepeatedFieldsSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("Ints").Type(NTi::List(NTi::Int64()))) - .AddColumn(TColumnSchema().Name("UrlRows").Type(NTi::List(urlRowStruct))); - - TDummyInferenceContext context(5,7); - TJobOperationPreparer builder(context); - - builder - .InputDescription<TUrlRow>(0) - .BeginInputGroup(2, 3) - .Description<TUrlRow>() - .EndInputGroup() - .BeginInputGroup(TVector<int>{1, 4}) - .Description<TRowSerializedRepeatedFields>() - .EndInputGroup() - .InputDescription<TUrlRow>(3); - - UNIT_ASSERT_EXCEPTION(builder.InputDescription<TUrlRow>(0), TApiUsageError); - - builder - .OutputDescription<TUrlRow>(0, false) - .OutputDescription<TRowFieldSerializationOption>(1) - .BeginOutputGroup(2, 4) - .Description<TUrlRow>() - .EndOutputGroup() - .BeginOutputGroup(TVector<int>{4,6}) - .Description<TRowSerializedRepeatedFields>() - .EndOutputGroup() - .OutputDescription<TUrlRow>(5, false); - - UNIT_ASSERT_EXCEPTION(builder.OutputDescription<TUrlRow>(0), TApiUsageError); - UNIT_ASSERT_NO_EXCEPTION(builder.OutputSchema(0, urlRowSchema)); - UNIT_ASSERT_NO_EXCEPTION(builder.OutputSchema(5, urlRowSchema)); - UNIT_ASSERT_EXCEPTION(builder.OutputSchema(1, urlRowSchema), TApiUsageError); - - builder.Finish(); - auto result = builder.GetOutputSchemas(); - - ASSERT_SERIALIZABLES_EQUAL(result[0], urlRowSchema); - ASSERT_SERIALIZABLES_EQUAL(result[1], rowFieldSerializationOptionSchema); - ASSERT_SERIALIZABLES_EQUAL(result[2], urlRowSchema); - ASSERT_SERIALIZABLES_EQUAL(result[3], urlRowSchema); - ASSERT_SERIALIZABLES_EQUAL(result[4], rowSerializedRepeatedFieldsSchema); - ASSERT_SERIALIZABLES_EQUAL(result[5], urlRowSchema); - ASSERT_SERIALIZABLES_EQUAL(result[6], rowSerializedRepeatedFieldsSchema); - - auto expectedInputDescriptions = TVector<TMaybe<TTableStructure>>{ - {TProtobufTableStructure{TUrlRow::descriptor()}}, - {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, - {TProtobufTableStructure{TUrlRow::descriptor()}}, - {TProtobufTableStructure{TUrlRow::descriptor()}}, - {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, - }; - UNIT_ASSERT_EQUAL(expectedInputDescriptions, builder.GetInputDescriptions()); - - auto expectedOutputDescriptions = TVector<TMaybe<TTableStructure>>{ - {TProtobufTableStructure{TUrlRow::descriptor()}}, - {TProtobufTableStructure{TRowFieldSerializationOption::descriptor()}}, - {TProtobufTableStructure{TUrlRow::descriptor()}}, - {TProtobufTableStructure{TUrlRow::descriptor()}}, - {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, - {TProtobufTableStructure{TUrlRow::descriptor()}}, - {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, - }; - UNIT_ASSERT_EQUAL(expectedOutputDescriptions, builder.GetOutputDescriptions()); - } - - Y_UNIT_TEST(InputColumns) - { - TDummyInferenceContext context(5, 1); - TJobOperationPreparer builder(context); - builder - .InputColumnFilter(2, {"a", "b"}) - .BeginInputGroup(0, 2) - .ColumnFilter({"b", "c"}) - .ColumnRenaming({{"b", "B"}, {"c", "C"}}) - .EndInputGroup() - .InputColumnRenaming(3, {{"a", "AAA"}}) - .NoOutputSchema(0); - builder.Finish(); - - auto expectedRenamings = TVector<THashMap<TString, TString>>{ - {{"b", "B"}, {"c", "C"}}, - {{"b", "B"}, {"c", "C"}}, - {}, - {{"a", "AAA"}}, - {}, - }; - UNIT_ASSERT_EQUAL(builder.GetInputColumnRenamings(), expectedRenamings); - - auto expectedFilters = TVector<TMaybe<TVector<TString>>>{ - {{"b", "c"}}, - {{"b", "c"}}, - {{"a", "b"}}, - {}, - {}, - }; - UNIT_ASSERT_EQUAL(builder.GetInputColumnFilters(), expectedFilters); - } - - Y_UNIT_TEST(Bug_r7349102) - { - auto firstSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64)); - auto otherSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("other_column").Type(EValueType::VT_BOOLEAN)); - auto thirdSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("third_column").Type(EValueType::VT_STRING)); - - TDummyInferenceContext context(3,1); - TJobOperationPreparer builder(context); - - builder - .InputDescription<TUrlRow>(0) - .InputDescription<TUrlRow>(1) - .InputDescription<TUrlRow>(2) - .OutputDescription<TUrlRow>(0); - - builder.Finish(); - } - -} // Y_UNIT_TEST_SUITE(SchemaInference) diff --git a/yt/cpp/mapreduce/interface/proto3_ut.proto b/yt/cpp/mapreduce/interface/proto3_ut.proto deleted file mode 100644 index b24c13085b..0000000000 --- a/yt/cpp/mapreduce/interface/proto3_ut.proto +++ /dev/null @@ -1,17 +0,0 @@ -syntax = "proto3"; - -import "yt/yt_proto/yt/formats/extension.proto"; - -package NYT.NTestingProto3; - -option (NYT.file_default_field_flags) = SERIALIZATION_YT; - -message TWithOptional -{ - optional int64 x = 1; -} - -message TWithOptionalMessage -{ - optional TWithOptional x = 1; -} diff --git a/yt/cpp/mapreduce/interface/protobuf_file_options_ut.cpp b/yt/cpp/mapreduce/interface/protobuf_file_options_ut.cpp deleted file mode 100644 index 5ffa9564d7..0000000000 --- a/yt/cpp/mapreduce/interface/protobuf_file_options_ut.cpp +++ /dev/null @@ -1,271 +0,0 @@ -#include "errors.h" -#include "format.h" -#include "common_ut.h" - -#include <yt/cpp/mapreduce/interface/protobuf_file_options_ut.pb.h> - -#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h> - -#include <library/cpp/testing/unittest/registar.h> - -using namespace NYT; - -Y_UNIT_TEST_SUITE(ProtobufFileOptions) -{ - NTi::TTypePtr GetUrlRowType(bool required) - { - static const NTi::TTypePtr structType = NTi::Struct({ - {"Host", ToTypeV3(EValueType::VT_STRING, false)}, - {"Path", ToTypeV3(EValueType::VT_STRING, false)}, - {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)}}); - return required ? structType : NTi::TTypePtr(NTi::Optional(structType)); - } - - Y_UNIT_TEST(TRowFieldSerializationOption) - { - const auto schema = CreateTableSchema<NTestingFileOptions::TRowFieldSerializationOption>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("UrlRow_1").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false)))); - } - - Y_UNIT_TEST(TRowMixedSerializationOptions) - { - const auto schema = CreateTableSchema<NTestingFileOptions::TRowMixedSerializationOptions>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("UrlRow_1").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false)))); - } - - Y_UNIT_TEST(FieldSortOrder) - { - const auto schema = CreateTableSchema<NTestingFileOptions::TFieldSortOrder>(); - - auto asInProtoFile = NTi::Optional(NTi::Struct({ - {"x", NTi::Optional(NTi::Int64())}, - {"y", NTi::Optional(NTi::String())}, - {"z", NTi::Optional(NTi::Bool())}, - })); - auto byFieldNumber = NTi::Optional(NTi::Struct({ - {"z", NTi::Optional(NTi::Bool())}, - {"x", NTi::Optional(NTi::Int64())}, - {"y", NTi::Optional(NTi::String())}, - })); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("EmbeddedDefault").Type(asInProtoFile)) - .AddColumn(TColumnSchema().Name("EmbeddedAsInProtoFile").Type(asInProtoFile)) - .AddColumn(TColumnSchema().Name("EmbeddedByFieldNumber").Type(byFieldNumber))); - } - - Y_UNIT_TEST(Map) - { - const auto schema = CreateTableSchema<NTestingFileOptions::TWithMap>(); - - auto createKeyValueStruct = [] (NTi::TTypePtr key, NTi::TTypePtr value) { - return NTi::List(NTi::Struct({ - {"key", NTi::Optional(key)}, - {"value", NTi::Optional(value)}, - })); - }; - - auto embedded = NTi::Struct({ - {"x", NTi::Optional(NTi::Int64())}, - {"y", NTi::Optional(NTi::String())}, - }); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("MapDefault") - .Type(createKeyValueStruct(NTi::Int64(), embedded))) - .AddColumn(TColumnSchema() - .Name("MapDict") - .Type(NTi::Dict(NTi::Int64(), embedded)))); - } - - Y_UNIT_TEST(Oneof) - { - const auto schema = CreateTableSchema<NTestingFileOptions::TWithOneof>(); - - auto embedded = NTi::Struct({ - {"x", NTi::Optional(NTi::Int64())}, - {"y", NTi::Optional(NTi::String())}, - }); - - auto defaultVariantType = NTi::Optional(NTi::Struct({ - {"field", NTi::Optional(NTi::String())}, - {"Oneof2", NTi::Optional(NTi::Variant(NTi::Struct({ - {"y2", NTi::String()}, - {"z2", embedded}, - {"x2", NTi::Int64()}, - })))}, - {"x1", NTi::Optional(NTi::Int64())}, - {"y1", NTi::Optional(NTi::String())}, - {"z1", NTi::Optional(embedded)}, - })); - - auto noDefaultType = NTi::Optional(NTi::Struct({ - {"field", NTi::Optional(NTi::String())}, - {"y2", NTi::Optional(NTi::String())}, - {"z2", NTi::Optional(embedded)}, - {"x2", NTi::Optional(NTi::Int64())}, - {"x1", NTi::Optional(NTi::Int64())}, - {"y1", NTi::Optional(NTi::String())}, - {"z1", NTi::Optional(embedded)}, - })); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("DefaultVariant") - .Type(defaultVariantType) - ) - .AddColumn(TColumnSchema() - .Name("NoDefault") - .Type(noDefaultType) - ) - .AddColumn(TColumnSchema() - .Name("SerializationProtobuf") - .Type(NTi::Optional(NTi::Struct({ - {"x1", NTi::Optional(NTi::Int64())}, - {"y1", NTi::Optional(NTi::String())}, - {"z1", NTi::Optional(NTi::String())}, - }))) - ) - .AddColumn(TColumnSchema() - .Name("MemberOfTopLevelOneof") - .Type(NTi::Optional(NTi::Int64())) - ) - ); - } -} - -static TNode GetColumns(const TFormat& format, int tableIndex = 0) -{ - return format.Config.GetAttributes()["tables"][tableIndex]["columns"]; -} - -Y_UNIT_TEST_SUITE(ProtobufFormatFileOptions) -{ - Y_UNIT_TEST(TRowFieldSerializationOption) - { - const auto format = TFormat::Protobuf<NTestingFileOptions::TRowFieldSerializationOption>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns[0]["name"], "UrlRow_1"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["proto_type"], "message"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["field_number"], 1); - - UNIT_ASSERT_VALUES_EQUAL(columns[1]["name"], "UrlRow_2"); - UNIT_ASSERT_VALUES_EQUAL(columns[1]["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(columns[1]["field_number"], 2); - const auto& fields = columns[1]["fields"]; - UNIT_ASSERT_VALUES_EQUAL(fields[0]["name"], "Host"); - UNIT_ASSERT_VALUES_EQUAL(fields[0]["proto_type"], "string"); - UNIT_ASSERT_VALUES_EQUAL(fields[0]["field_number"], 1); - - UNIT_ASSERT_VALUES_EQUAL(fields[1]["name"], "Path"); - UNIT_ASSERT_VALUES_EQUAL(fields[1]["proto_type"], "string"); - UNIT_ASSERT_VALUES_EQUAL(fields[1]["field_number"], 2); - - UNIT_ASSERT_VALUES_EQUAL(fields[2]["name"], "HttpCode"); - UNIT_ASSERT_VALUES_EQUAL(fields[2]["proto_type"], "sint32"); - UNIT_ASSERT_VALUES_EQUAL(fields[2]["field_number"], 3); - } - - Y_UNIT_TEST(Map) - { - const auto format = TFormat::Protobuf<NTestingFileOptions::TWithMap>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns.Size(), 2); - { - const auto& column = columns[0]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapDefault"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message"); - } - { - const auto& column = columns[1]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapDict"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message"); - } - } - - Y_UNIT_TEST(Oneof) - { - const auto format = TFormat::Protobuf<NTestingFileOptions::TWithOneof>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns.Size(), 4); - - { - const auto& column = columns[0]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "DefaultVariant"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 5); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "field"); - - const auto& oneof2 = column["fields"][1]; - UNIT_ASSERT_VALUES_EQUAL(oneof2["name"], "Oneof2"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["proto_type"], "oneof"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][0]["name"], "y2"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][1]["name"], "z2"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][1]["proto_type"], "structured_message"); - const auto& embeddedFields = oneof2["fields"][1]["fields"]; - UNIT_ASSERT_VALUES_EQUAL(embeddedFields[0]["name"], "x"); - UNIT_ASSERT_VALUES_EQUAL(embeddedFields[1]["name"], "y"); - - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][2]["name"], "x2"); - - UNIT_ASSERT_VALUES_EQUAL(column["fields"][2]["name"], "x1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][3]["name"], "y1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][4]["name"], "z1"); - }; - - { - const auto& column = columns[1]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "NoDefault"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - const auto& fields = column["fields"]; - UNIT_ASSERT_VALUES_EQUAL(fields.Size(), 7); - - UNIT_ASSERT_VALUES_EQUAL(fields[0]["name"], "field"); - - UNIT_ASSERT_VALUES_EQUAL(fields[1]["name"], "y2"); - - UNIT_ASSERT_VALUES_EQUAL(fields[2]["name"], "z2"); - UNIT_ASSERT_VALUES_EQUAL(fields[2]["proto_type"], "structured_message"); - const auto& embeddedFields = fields[2]["fields"]; - UNIT_ASSERT_VALUES_EQUAL(embeddedFields[0]["name"], "x"); - UNIT_ASSERT_VALUES_EQUAL(embeddedFields[1]["name"], "y"); - - UNIT_ASSERT_VALUES_EQUAL(fields[3]["name"], "x2"); - - UNIT_ASSERT_VALUES_EQUAL(fields[4]["name"], "x1"); - UNIT_ASSERT_VALUES_EQUAL(fields[5]["name"], "y1"); - UNIT_ASSERT_VALUES_EQUAL(fields[6]["name"], "z1"); - }; - - { - const auto& column = columns[2]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "SerializationProtobuf"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 3); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "x1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["name"], "y1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][2]["name"], "z1"); - } - { - const auto& column = columns[3]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MemberOfTopLevelOneof"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "int64"); - } - } -} diff --git a/yt/cpp/mapreduce/interface/protobuf_file_options_ut.proto b/yt/cpp/mapreduce/interface/protobuf_file_options_ut.proto deleted file mode 100644 index 4804b2f60c..0000000000 --- a/yt/cpp/mapreduce/interface/protobuf_file_options_ut.proto +++ /dev/null @@ -1,142 +0,0 @@ -import "yt/yt_proto/yt/formats/extension.proto"; - -package NYT.NTestingFileOptions; - -option (NYT.file_default_field_flags) = SERIALIZATION_YT; -option (NYT.file_default_field_flags) = MAP_AS_LIST_OF_STRUCTS; -option (NYT.file_default_message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE; -option (NYT.file_default_oneof_flags) = SEPARATE_FIELDS; - -message TUrlRow -{ - optional string Host = 1 [(NYT.column_name) = "Host"]; - optional string Path = 2 [(NYT.column_name) = "Path"]; - optional sint32 HttpCode = 3 [(NYT.column_name) = "HttpCode"]; -} - -message TRowFieldSerializationOption -{ - optional TUrlRow UrlRow_1 = 1 [(NYT.flags) = SERIALIZATION_PROTOBUF]; - optional TUrlRow UrlRow_2 = 2; -} - -message TRowMixedSerializationOptions -{ - option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF; - optional TUrlRow UrlRow_1 = 1; - optional TUrlRow UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_YT]; -} - -message TRowSerializedRepeatedFields -{ - repeated int64 Ints = 1; - repeated TUrlRow UrlRows = 2; -} - -message TFieldSortOrder -{ - message TEmbeddedDefault { - optional int64 x = 2; - optional string y = 12; - optional bool z = 1; - } - message TEmbeddedAsInProtoFile { - option (NYT.message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE; - optional int64 x = 2; - optional string y = 12; - optional bool z = 1; - } - message TEmbeddedByFieldNumber { - option (NYT.message_flags) = SORT_FIELDS_BY_FIELD_NUMBER; - optional int64 x = 2; - optional string y = 12; - optional bool z = 1; - } - option (NYT.default_field_flags) = SERIALIZATION_YT; - - optional TEmbeddedDefault EmbeddedDefault = 1; - optional TEmbeddedAsInProtoFile EmbeddedAsInProtoFile = 2; - optional TEmbeddedByFieldNumber EmbeddedByFieldNumber = 3; -} - -message TWithMap -{ - message TEmbedded { - optional int64 x = 1; - optional string y = 2; - } - - map<int64, TEmbedded> MapDefault = 1; - map<int64, TEmbedded> MapDict = 5 [(NYT.flags) = MAP_AS_DICT]; -} - -message TWithOneof -{ - message TEmbedded - { - oneof Oneof { - int64 x = 1; - string y = 2; - } - } - - message TDefaultVariant - { - option (NYT.default_oneof_flags) = VARIANT; - optional string field = 1; - - oneof Oneof2 - { - string y2 = 4; - TEmbedded z2 = 6; - int64 x2 = 2; - } - - oneof Oneof1 - { - option (NYT.oneof_flags) = SEPARATE_FIELDS; - int64 x1 = 10; - string y1 = 3; - TEmbedded z1 = 5; - } - } - - message TNoDefault - { - optional string field = 1; - - oneof Oneof2 - { - string y2 = 4; - TEmbedded z2 = 6; - int64 x2 = 2; - } - - oneof Oneof1 - { - int64 x1 = 10; - string y1 = 3; - TEmbedded z1 = 5; - } - } - - message TSerializationProtobuf - { - option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF; - oneof Oneof - { - int64 x1 = 2; - string y1 = 1; - TEmbedded z1 = 3; - } - } - - optional TDefaultVariant DefaultVariant = 1; - optional TNoDefault NoDefault = 2; - optional TSerializationProtobuf SerializationProtobuf = 3; - - oneof TopLevelOneof - { - int64 MemberOfTopLevelOneof = 4; - } -} diff --git a/yt/cpp/mapreduce/interface/protobuf_format.cpp b/yt/cpp/mapreduce/interface/protobuf_format.cpp deleted file mode 100644 index 3d57ed2797..0000000000 --- a/yt/cpp/mapreduce/interface/protobuf_format.cpp +++ /dev/null @@ -1,1498 +0,0 @@ -#include "protobuf_format.h" - -#include "errors.h" - -#include <yt/yt_proto/yt/formats/extension.pb.h> - -#include <google/protobuf/text_format.h> - -#include <library/cpp/yson/node/node_io.h> - -#include <util/generic/hash_set.h> -#include <util/generic/stack.h> -#include <util/generic/overloaded.h> - -#include <util/stream/output.h> -#include <util/stream/file.h> - -namespace NYT::NDetail { - -using ::google::protobuf::Descriptor; -using ::google::protobuf::DescriptorProto; -using ::google::protobuf::EnumDescriptor; -using ::google::protobuf::EnumDescriptorProto; -using ::google::protobuf::FieldDescriptor; -using ::google::protobuf::FieldDescriptorProto; -using ::google::protobuf::OneofDescriptor; -using ::google::protobuf::Message; -using ::google::protobuf::FileDescriptor; -using ::google::protobuf::FileDescriptorProto; -using ::google::protobuf::FileDescriptorSet; -using ::google::protobuf::FieldOptions; -using ::google::protobuf::FileOptions; -using ::google::protobuf::OneofOptions; -using ::google::protobuf::MessageOptions; - -using ::ToString; - -namespace { - -//////////////////////////////////////////////////////////////////////////////// - -using TOneofOption = std::variant< - EProtobufOneofMode>; - -using TFieldOption = std::variant< - EProtobufType, - EProtobufSerializationMode, - EProtobufListMode, - EProtobufMapMode, - EProtobufEnumWritingMode>; - -using TMessageOption = std::variant< - EProtobufFieldSortOrder>; - -struct TOtherColumns -{ }; - -using TValueTypeOrOtherColumns = std::variant<EValueType, TOtherColumns>; - -//////////////////////////////////////////////////////////////////////////////// - -TFieldOption FieldFlagToOption(EWrapperFieldFlag::Enum flag) -{ - using EFlag = EWrapperFieldFlag; - switch (flag) { - case EFlag::SERIALIZATION_PROTOBUF: - return EProtobufSerializationMode::Protobuf; - case EFlag::SERIALIZATION_YT: - return EProtobufSerializationMode::Yt; - - case EFlag::ANY: - return EProtobufType::Any; - case EFlag::OTHER_COLUMNS: - return EProtobufType::OtherColumns; - case EFlag::ENUM_INT: - return EProtobufType::EnumInt; - case EFlag::ENUM_STRING: - return EProtobufType::EnumString; - - case EFlag::OPTIONAL_LIST: - return EProtobufListMode::Optional; - case EFlag::REQUIRED_LIST: - return EProtobufListMode::Required; - - case EFlag::MAP_AS_LIST_OF_STRUCTS_LEGACY: - return EProtobufMapMode::ListOfStructsLegacy; - case EFlag::MAP_AS_LIST_OF_STRUCTS: - return EProtobufMapMode::ListOfStructs; - case EFlag::MAP_AS_DICT: - return EProtobufMapMode::Dict; - case EFlag::MAP_AS_OPTIONAL_DICT: - return EProtobufMapMode::OptionalDict; - case EFlag::EMBEDDED: - return EProtobufSerializationMode::Embedded; - - case EFlag::ENUM_SKIP_UNKNOWN_VALUES: - return EProtobufEnumWritingMode::SkipUnknownValues; - case EFlag::ENUM_CHECK_VALUES: - return EProtobufEnumWritingMode::CheckValues; - } - Y_FAIL(); -} - -TMessageOption MessageFlagToOption(EWrapperMessageFlag::Enum flag) -{ - using EFlag = EWrapperMessageFlag; - switch (flag) { - case EFlag::DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE: - return EProtobufFieldSortOrder::AsInProtoFile; - case EFlag::SORT_FIELDS_BY_FIELD_NUMBER: - return EProtobufFieldSortOrder::ByFieldNumber; - } - Y_FAIL(); -} - -TOneofOption OneofFlagToOption(EWrapperOneofFlag::Enum flag) -{ - using EFlag = EWrapperOneofFlag; - switch (flag) { - case EFlag::SEPARATE_FIELDS: - return EProtobufOneofMode::SeparateFields; - case EFlag::VARIANT: - return EProtobufOneofMode::Variant; - } - Y_FAIL(); -} - -EWrapperFieldFlag::Enum OptionToFieldFlag(TFieldOption option) -{ - using EFlag = EWrapperFieldFlag; - struct TVisitor - { - EFlag::Enum operator() (EProtobufType type) - { - switch (type) { - case EProtobufType::Any: - return EFlag::ANY; - case EProtobufType::OtherColumns: - return EFlag::OTHER_COLUMNS; - case EProtobufType::EnumInt: - return EFlag::ENUM_INT; - case EProtobufType::EnumString: - return EFlag::ENUM_STRING; - } - Y_FAIL(); - } - EFlag::Enum operator() (EProtobufSerializationMode serializationMode) - { - switch (serializationMode) { - case EProtobufSerializationMode::Yt: - return EFlag::SERIALIZATION_YT; - case EProtobufSerializationMode::Protobuf: - return EFlag::SERIALIZATION_PROTOBUF; - case EProtobufSerializationMode::Embedded: - return EFlag::EMBEDDED; - } - Y_FAIL(); - } - EFlag::Enum operator() (EProtobufListMode listMode) - { - switch (listMode) { - case EProtobufListMode::Optional: - return EFlag::OPTIONAL_LIST; - case EProtobufListMode::Required: - return EFlag::REQUIRED_LIST; - } - Y_FAIL(); - } - EFlag::Enum operator() (EProtobufMapMode mapMode) - { - switch (mapMode) { - case EProtobufMapMode::ListOfStructsLegacy: - return EFlag::MAP_AS_LIST_OF_STRUCTS_LEGACY; - case EProtobufMapMode::ListOfStructs: - return EFlag::MAP_AS_LIST_OF_STRUCTS; - case EProtobufMapMode::Dict: - return EFlag::MAP_AS_DICT; - case EProtobufMapMode::OptionalDict: - return EFlag::MAP_AS_OPTIONAL_DICT; - } - Y_FAIL(); - } - EFlag::Enum operator() (EProtobufEnumWritingMode enumWritingMode) - { - switch (enumWritingMode) { - case EProtobufEnumWritingMode::SkipUnknownValues: - return EFlag::ENUM_SKIP_UNKNOWN_VALUES; - case EProtobufEnumWritingMode::CheckValues: - return EFlag::ENUM_CHECK_VALUES; - } - Y_FAIL(); - } - }; - - return std::visit(TVisitor(), option); -} - -EWrapperMessageFlag::Enum OptionToMessageFlag(TMessageOption option) -{ - using EFlag = EWrapperMessageFlag; - struct TVisitor - { - EFlag::Enum operator() (EProtobufFieldSortOrder sortOrder) - { - switch (sortOrder) { - case EProtobufFieldSortOrder::AsInProtoFile: - return EFlag::DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE; - case EProtobufFieldSortOrder::ByFieldNumber: - return EFlag::SORT_FIELDS_BY_FIELD_NUMBER; - } - Y_FAIL(); - } - }; - - return std::visit(TVisitor(), option); -} - -EWrapperOneofFlag::Enum OptionToOneofFlag(TOneofOption option) -{ - using EFlag = EWrapperOneofFlag; - struct TVisitor - { - EFlag::Enum operator() (EProtobufOneofMode mode) - { - switch (mode) { - case EProtobufOneofMode::SeparateFields: - return EFlag::SEPARATE_FIELDS; - case EProtobufOneofMode::Variant: - return EFlag::VARIANT; - } - Y_FAIL(); - } - }; - - return std::visit(TVisitor(), option); -} - - -template <typename T, typename TOptionToFlag> -void SetOption(TMaybe<T>& option, T newOption, TOptionToFlag optionToFlag) -{ - if (option) { - if (*option == newOption) { - ythrow yexception() << "Duplicate protobuf flag " << optionToFlag(newOption); - } else { - ythrow yexception() << "Incompatible protobuf flags " << - optionToFlag(*option) << " and " << optionToFlag(newOption); - } - } - option = newOption; -} - -class TParseProtobufFieldOptionsVisitor -{ -public: - void operator() (EProtobufType type) - { - SetOption(Type, type); - } - - void operator() (EProtobufSerializationMode serializationMode) - { - SetOption(SerializationMode, serializationMode); - } - - void operator() (EProtobufListMode listMode) - { - SetOption(ListMode, listMode); - } - - void operator() (EProtobufMapMode mapMode) - { - SetOption(MapMode, mapMode); - } - - void operator() (EProtobufEnumWritingMode enumWritingMode) - { - SetOption(EnumWritingMode, enumWritingMode); - } - - template <typename T> - void SetOption(TMaybe<T>& option, T newOption) - { - NYT::NDetail::SetOption(option, newOption, OptionToFieldFlag); - } - -public: - TMaybe<EProtobufType> Type; - TMaybe<EProtobufSerializationMode> SerializationMode; - TMaybe<EProtobufListMode> ListMode; - TMaybe<EProtobufMapMode> MapMode; - TMaybe<EProtobufEnumWritingMode> EnumWritingMode; -}; - -class TParseProtobufMessageOptionsVisitor -{ -public: - void operator() (EProtobufFieldSortOrder fieldSortOrder) - { - SetOption(FieldSortOrder, fieldSortOrder); - } - - template <typename T> - void SetOption(TMaybe<T>& option, T newOption) - { - NYT::NDetail::SetOption(option, newOption, OptionToMessageFlag); - } - -public: - TMaybe<EProtobufFieldSortOrder> FieldSortOrder; -}; - -class TParseProtobufOneofOptionsVisitor -{ -public: - void operator() (EProtobufOneofMode mode) - { - SetOption(Mode, mode); - } - - template <typename T> - void SetOption(TMaybe<T>& option, T newOption) - { - NYT::NDetail::SetOption(option, newOption, OptionToOneofFlag); - } - -public: - TMaybe<EProtobufOneofMode> Mode; -}; - -void ParseProtobufFieldOptions( - const ::google::protobuf::RepeatedField<EWrapperFieldFlag::Enum>& flags, - TProtobufFieldOptions* fieldOptions) -{ - TParseProtobufFieldOptionsVisitor visitor; - for (auto flag : flags) { - std::visit(visitor, FieldFlagToOption(flag)); - } - if (visitor.Type) { - fieldOptions->Type = *visitor.Type; - } - if (visitor.SerializationMode) { - fieldOptions->SerializationMode = *visitor.SerializationMode; - } - if (visitor.ListMode) { - fieldOptions->ListMode = *visitor.ListMode; - } - if (visitor.MapMode) { - fieldOptions->MapMode = *visitor.MapMode; - } -} - -void ParseProtobufMessageOptions( - const ::google::protobuf::RepeatedField<EWrapperMessageFlag::Enum>& flags, - TProtobufMessageOptions* messageOptions) -{ - TParseProtobufMessageOptionsVisitor visitor; - for (auto flag : flags) { - std::visit(visitor, MessageFlagToOption(flag)); - } - if (visitor.FieldSortOrder) { - messageOptions->FieldSortOrder = *visitor.FieldSortOrder; - } -} - -void ParseProtobufOneofOptions( - const ::google::protobuf::RepeatedField<EWrapperOneofFlag::Enum>& flags, - TProtobufOneofOptions* messageOptions) -{ - TParseProtobufOneofOptionsVisitor visitor; - for (auto flag : flags) { - std::visit(visitor, OneofFlagToOption(flag)); - } - if (visitor.Mode) { - messageOptions->Mode = *visitor.Mode; - } -} - -TProtobufFieldOptions GetDefaultFieldOptions( - const Descriptor* descriptor, - TProtobufFieldOptions defaultFieldOptions = {}) -{ - ParseProtobufFieldOptions( - descriptor->file()->options().GetRepeatedExtension(file_default_field_flags), - &defaultFieldOptions); - ParseProtobufFieldOptions( - descriptor->options().GetRepeatedExtension(default_field_flags), - &defaultFieldOptions); - return defaultFieldOptions; -} - -TProtobufOneofOptions GetDefaultOneofOptions(const Descriptor* descriptor) -{ - TProtobufOneofOptions defaultOneofOptions; - ParseProtobufOneofOptions( - descriptor->file()->options().GetRepeatedExtension(file_default_oneof_flags), - &defaultOneofOptions); - ParseProtobufOneofOptions( - descriptor->options().GetRepeatedExtension(default_oneof_flags), - &defaultOneofOptions); - switch (defaultOneofOptions.Mode) { - case EProtobufOneofMode::Variant: { - auto defaultFieldOptions = GetDefaultFieldOptions(descriptor); - switch (defaultFieldOptions.SerializationMode) { - case EProtobufSerializationMode::Protobuf: - // For Protobuf serialization mode default is SeparateFields. - defaultOneofOptions.Mode = EProtobufOneofMode::SeparateFields; - return defaultOneofOptions; - case EProtobufSerializationMode::Yt: - case EProtobufSerializationMode::Embedded: - return defaultOneofOptions; - } - Y_FAIL(); - } - case EProtobufOneofMode::SeparateFields: - return defaultOneofOptions; - } - Y_FAIL(); -} - -//////////////////////////////////////////////////////////////////////////////// - -void ValidateProtobufType(const FieldDescriptor& fieldDescriptor, EProtobufType protobufType) -{ - const auto fieldType = fieldDescriptor.type(); - auto ensureType = [&] (FieldDescriptor::Type expectedType) { - Y_ENSURE(fieldType == expectedType, - "Type of field " << fieldDescriptor.name() << "does not match specified field flag " << - OptionToFieldFlag(protobufType) << ": " - "expected " << FieldDescriptor::TypeName(expectedType) << ", " << - "got " << FieldDescriptor::TypeName(fieldType)); - }; - switch (protobufType) { - case EProtobufType::Any: - ensureType(FieldDescriptor::TYPE_BYTES); - return; - case EProtobufType::OtherColumns: - ensureType(FieldDescriptor::TYPE_BYTES); - return; - case EProtobufType::EnumInt: - ensureType(FieldDescriptor::TYPE_ENUM); - return; - case EProtobufType::EnumString: - ensureType(FieldDescriptor::TYPE_ENUM); - return; - } - Y_FAIL(); -} - -//////////////////////////////////////////////////////////////////////////////// - -class TCycleChecker -{ -private: - class TGuard - { - public: - TGuard(TCycleChecker* checker, const Descriptor* descriptor) - : Checker_(checker) - , Descriptor_(descriptor) - { - Checker_->ActiveVertices_.insert(Descriptor_); - Checker_->Stack_.push(Descriptor_); - } - - ~TGuard() - { - Checker_->ActiveVertices_.erase(Descriptor_); - Checker_->Stack_.pop(); - } - - private: - TCycleChecker* Checker_; - const Descriptor* Descriptor_; - }; - -public: - [[nodiscard]] TGuard Enter(const Descriptor* descriptor) - { - if (ActiveVertices_.contains(descriptor)) { - Y_VERIFY(!Stack_.empty()); - ythrow TApiUsageError() << "Cyclic reference found for protobuf messages. " << - "Consider removing " << EWrapperFieldFlag::SERIALIZATION_YT << " flag " << - "somewhere on the cycle containing " << - Stack_.top()->full_name() << " and " << descriptor->full_name(); - } - return TGuard(this, descriptor); - } - -private: - THashSet<const Descriptor*> ActiveVertices_; - TStack<const Descriptor*> Stack_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - -TProtobufFieldOptions GetFieldOptions( - const FieldDescriptor* fieldDescriptor, - const TMaybe<TProtobufFieldOptions>& defaultFieldOptions) -{ - TProtobufFieldOptions options; - if (defaultFieldOptions) { - options = *defaultFieldOptions; - } else { - options = GetDefaultFieldOptions(fieldDescriptor->containing_type()); - } - ParseProtobufFieldOptions(fieldDescriptor->options().GetRepeatedExtension(flags), &options); - return options; -} - -TProtobufOneofOptions GetOneofOptions( - const OneofDescriptor* oneofDescriptor, - const TMaybe<TProtobufOneofOptions>& defaultOneofOptions) -{ - TProtobufOneofOptions options; - if (defaultOneofOptions) { - options = *defaultOneofOptions; - } else { - options = GetDefaultOneofOptions(oneofDescriptor->containing_type()); - } - ParseProtobufOneofOptions(oneofDescriptor->options().GetRepeatedExtension(oneof_flags), &options); - - if (oneofDescriptor->is_synthetic()) { - options.Mode = EProtobufOneofMode::SeparateFields; - } - - auto variantFieldName = oneofDescriptor->options().GetExtension(variant_field_name); - switch (options.Mode) { - case EProtobufOneofMode::SeparateFields: - if (variantFieldName) { - ythrow TApiUsageError() << "\"variant_field_name\" requires (NYT.oneof_flags) = VARIANT"; - } - break; - case EProtobufOneofMode::Variant: - if (variantFieldName) { - options.VariantFieldName = variantFieldName; - } else { - options.VariantFieldName = oneofDescriptor->name(); - } - break; - } - return options; -} - - -TProtobufMessageOptions GetMessageOptions(const Descriptor* descriptor) -{ - TProtobufMessageOptions options; - ParseProtobufMessageOptions( - descriptor->file()->options().GetRepeatedExtension(file_default_message_flags), - &options); - ParseProtobufMessageOptions( - descriptor->options().GetRepeatedExtension(message_flags), - &options); - return options; -} - -TNode MakeEnumerationConfig(const ::google::protobuf::EnumDescriptor* enumDescriptor) -{ - auto config = TNode::CreateMap(); - for (int i = 0; i < enumDescriptor->value_count(); ++i) { - config[enumDescriptor->value(i)->name()] = enumDescriptor->value(i)->number(); - } - return config; -} - -TString DeduceProtobufType( - const FieldDescriptor* fieldDescriptor, - const TProtobufFieldOptions& options) -{ - if (options.Type) { - ValidateProtobufType(*fieldDescriptor, *options.Type); - return ToString(*options.Type); - } - switch (fieldDescriptor->type()) { - case FieldDescriptor::TYPE_ENUM: - return ToString(EProtobufType::EnumString); - case FieldDescriptor::TYPE_MESSAGE: - switch (options.SerializationMode) { - case EProtobufSerializationMode::Protobuf: - return "message"; - case EProtobufSerializationMode::Yt: - return "structured_message"; - case EProtobufSerializationMode::Embedded: - return "embedded_message"; - } - Y_FAIL(); - default: - return fieldDescriptor->type_name(); - } - Y_FAIL(); -} - -TString GetColumnName(const ::google::protobuf::FieldDescriptor& field) -{ - const auto& options = field.options(); - const auto columnName = options.GetExtension(column_name); - if (!columnName.empty()) { - return columnName; - } - const auto keyColumnName = options.GetExtension(key_column_name); - if (!keyColumnName.empty()) { - return keyColumnName; - } - return field.name(); -} - -TNode MakeProtoFormatMessageFieldsConfig( - const Descriptor* descriptor, - TNode* enumerations, - TCycleChecker& cycleChecker); - -TNode MakeProtoFormatMessageFieldsConfig( - const Descriptor* descriptor, - TNode* enumerations, - const TProtobufFieldOptions& defaultFieldOptions, - const TProtobufOneofOptions& defaultOneofOptions, - TCycleChecker& cycleChecker); - -TNode MakeMapFieldsConfig( - const FieldDescriptor* fieldDescriptor, - TNode* enumerations, - const TProtobufFieldOptions& fieldOptions, - TCycleChecker& cycleChecker) -{ - Y_VERIFY(fieldDescriptor->is_map()); - auto message = fieldDescriptor->message_type(); - switch (fieldOptions.MapMode) { - case EProtobufMapMode::ListOfStructsLegacy: - return MakeProtoFormatMessageFieldsConfig( - message, - enumerations, - cycleChecker); - case EProtobufMapMode::ListOfStructs: - case EProtobufMapMode::Dict: - case EProtobufMapMode::OptionalDict: { - TProtobufFieldOptions defaultFieldOptions; - defaultFieldOptions.SerializationMode = EProtobufSerializationMode::Yt; - return MakeProtoFormatMessageFieldsConfig( - message, - enumerations, - defaultFieldOptions, - TProtobufOneofOptions{}, - cycleChecker); - } - } - Y_FAIL(); -} - -TNode MakeProtoFormatFieldConfig( - const FieldDescriptor* fieldDescriptor, - TNode* enumerations, - const TProtobufFieldOptions& defaultOptions, - TCycleChecker& cycleChecker) -{ - auto fieldConfig = TNode::CreateMap(); - fieldConfig["field_number"] = fieldDescriptor->number(); - fieldConfig["name"] = GetColumnName(*fieldDescriptor); - - auto fieldOptions = GetFieldOptions(fieldDescriptor, defaultOptions); - - Y_ENSURE(fieldOptions.SerializationMode != EProtobufSerializationMode::Embedded, - "EMBEDDED flag is currently supported only with " - "ProtobufFormatWithDescriptors config option set to true"); - - if (fieldDescriptor->is_repeated()) { - Y_ENSURE_EX(fieldOptions.SerializationMode == EProtobufSerializationMode::Yt, - TApiUsageError() << "Repeated field \"" << fieldDescriptor->full_name() << "\" " << - "must have flag \"" << EWrapperFieldFlag::SERIALIZATION_YT << "\""); - } - fieldConfig["repeated"] = fieldDescriptor->is_repeated(); - fieldConfig["packed"] = fieldDescriptor->is_packed(); - - fieldConfig["proto_type"] = DeduceProtobufType(fieldDescriptor, fieldOptions); - - if (fieldDescriptor->type() == FieldDescriptor::TYPE_ENUM) { - auto* enumeration = fieldDescriptor->enum_type(); - (*enumerations)[enumeration->full_name()] = MakeEnumerationConfig(enumeration); - fieldConfig["enumeration_name"] = enumeration->full_name(); - } - - if (fieldOptions.SerializationMode != EProtobufSerializationMode::Yt) { - return fieldConfig; - } - - if (fieldDescriptor->is_map()) { - fieldConfig["fields"] = MakeMapFieldsConfig(fieldDescriptor, enumerations, fieldOptions, cycleChecker); - return fieldConfig; - } - - if (fieldDescriptor->type() == FieldDescriptor::TYPE_MESSAGE) { - fieldConfig["fields"] = MakeProtoFormatMessageFieldsConfig( - fieldDescriptor->message_type(), - enumerations, - cycleChecker); - } - - return fieldConfig; -} - -void MakeProtoFormatOneofConfig( - const OneofDescriptor* oneofDescriptor, - TNode* enumerations, - const TProtobufFieldOptions& defaultFieldOptions, - const TProtobufOneofOptions& defaultOneofOptions, - TCycleChecker& cycleChecker, - TNode* fields) -{ - auto addFields = [&] (TNode* fields) { - for (int i = 0; i < oneofDescriptor->field_count(); ++i) { - fields->Add(MakeProtoFormatFieldConfig( - oneofDescriptor->field(i), - enumerations, - defaultFieldOptions, - cycleChecker)); - } - }; - - auto oneofOptions = GetOneofOptions(oneofDescriptor, defaultOneofOptions); - switch (oneofOptions.Mode) { - case EProtobufOneofMode::SeparateFields: - addFields(fields); - return; - case EProtobufOneofMode::Variant: { - auto oneofFields = TNode::CreateList(); - addFields(&oneofFields); - auto oneofField = TNode() - ("proto_type", "oneof") - ("name", oneofOptions.VariantFieldName) - ("fields", std::move(oneofFields)); - fields->Add(std::move(oneofField)); - return; - } - } - Y_FAIL(); -} - -TNode MakeProtoFormatMessageFieldsConfig( - const Descriptor* descriptor, - TNode* enumerations, - const TProtobufFieldOptions& defaultFieldOptions, - const TProtobufOneofOptions& defaultOneofOptions, - TCycleChecker& cycleChecker) -{ - auto fields = TNode::CreateList(); - THashSet<const OneofDescriptor*> visitedOneofs; - auto guard = cycleChecker.Enter(descriptor); - for (int fieldIndex = 0; fieldIndex < descriptor->field_count(); ++fieldIndex) { - auto fieldDescriptor = descriptor->field(fieldIndex); - auto oneofDescriptor = fieldDescriptor->containing_oneof(); - if (!oneofDescriptor) { - fields.Add(MakeProtoFormatFieldConfig( - fieldDescriptor, - enumerations, - defaultFieldOptions, - cycleChecker)); - } else if (!visitedOneofs.contains(oneofDescriptor)) { - MakeProtoFormatOneofConfig( - oneofDescriptor, - enumerations, - defaultFieldOptions, - defaultOneofOptions, - cycleChecker, - &fields); - visitedOneofs.insert(oneofDescriptor); - } - } - return fields; -} - -TNode MakeProtoFormatMessageFieldsConfig( - const Descriptor* descriptor, - TNode* enumerations, - TCycleChecker& cycleChecker) -{ - return MakeProtoFormatMessageFieldsConfig( - descriptor, - enumerations, - GetDefaultFieldOptions(descriptor), - GetDefaultOneofOptions(descriptor), - cycleChecker); -} - -TNode MakeProtoFormatConfigWithTables(const TVector<const Descriptor*>& descriptors) -{ - TNode config("protobuf"); - config.Attributes() - ("enumerations", TNode::CreateMap()) - ("tables", TNode::CreateList()); - - auto& enumerations = config.Attributes()["enumerations"]; - - for (auto* descriptor : descriptors) { - TCycleChecker cycleChecker; - auto columns = MakeProtoFormatMessageFieldsConfig(descriptor, &enumerations, cycleChecker); - config.Attributes()["tables"].Add( - TNode()("columns", std::move(columns))); - } - - return config; -} - -//////////////////////////////////////////////////////////////////////////////// - -class TFileDescriptorSetBuilder -{ -public: - TFileDescriptorSetBuilder() - : ExtensionFile_(EWrapperFieldFlag::descriptor()->file()) - { } - - void AddDescriptor(const Descriptor* descriptor) - { - auto [it, inserted] = AllDescriptors_.insert(descriptor); - if (!inserted) { - return; - } - - const auto* containingType = descriptor->containing_type(); - while (containingType) { - AddDescriptor(containingType); - containingType = containingType->containing_type(); - } - for (int i = 0; i < descriptor->field_count(); ++i) { - AddField(descriptor->field(i)); - } - } - - FileDescriptorSet Build() - { - THashSet<const FileDescriptor*> visitedFiles; - TVector<const FileDescriptor*> fileTopoOrder; - for (const auto* descriptor : AllDescriptors_) { - TraverseDependencies(descriptor->file(), visitedFiles, fileTopoOrder); - } - - THashSet<TString> messageTypeNames; - THashSet<TString> enumTypeNames; - for (const auto* descriptor : AllDescriptors_) { - messageTypeNames.insert(descriptor->full_name()); - } - for (const auto* enumDescriptor : EnumDescriptors_) { - enumTypeNames.insert(enumDescriptor->full_name()); - } - FileDescriptorSet fileDescriptorSetProto; - for (const auto* file : fileTopoOrder) { - auto* fileProto = fileDescriptorSetProto.add_file(); - file->CopyTo(fileProto); - Strip(fileProto, messageTypeNames, enumTypeNames); - } - return fileDescriptorSetProto; - } - -private: - void AddField(const FieldDescriptor* fieldDescriptor) - { - if (fieldDescriptor->message_type()) { - AddDescriptor(fieldDescriptor->message_type()); - } - if (fieldDescriptor->enum_type()) { - AddEnumDescriptor(fieldDescriptor->enum_type()); - } - } - - void AddEnumDescriptor(const EnumDescriptor* enumDescriptor) - { - auto [it, inserted] = EnumDescriptors_.insert(enumDescriptor); - if (!inserted) { - return; - } - const auto* containingType = enumDescriptor->containing_type(); - while (containingType) { - AddDescriptor(containingType); - containingType = containingType->containing_type(); - } - } - - void TraverseDependencies( - const FileDescriptor* current, - THashSet<const FileDescriptor*>& visited, - TVector<const FileDescriptor*>& topoOrder) - { - auto [it, inserted] = visited.insert(current); - if (!inserted) { - return; - } - for (int i = 0; i < current->dependency_count(); ++i) { - TraverseDependencies(current->dependency(i), visited, topoOrder); - } - topoOrder.push_back(current); - } - - template <typename TOptions> - void StripUnknownOptions(TOptions* options) - { - std::vector<const FieldDescriptor*> fields; - auto reflection = options->GetReflection(); - reflection->ListFields(*options, &fields); - for (auto field : fields) { - if (field->is_extension() && field->file() != ExtensionFile_) { - reflection->ClearField(options, field); - } - } - } - - template <typename TRepeatedField, typename TPredicate> - void RemoveIf(TRepeatedField* repeatedField, TPredicate predicate) - { - repeatedField->erase( - std::remove_if(repeatedField->begin(), repeatedField->end(), predicate), - repeatedField->end()); - } - - void Strip( - const TString& containingTypePrefix, - DescriptorProto* messageProto, - const THashSet<TString>& messageTypeNames, - const THashSet<TString>& enumTypeNames) - { - const auto prefix = containingTypePrefix + messageProto->name() + '.'; - - RemoveIf(messageProto->mutable_nested_type(), [&] (const DescriptorProto& descriptorProto) { - return !messageTypeNames.contains(prefix + descriptorProto.name()); - }); - RemoveIf(messageProto->mutable_enum_type(), [&] (const EnumDescriptorProto& enumDescriptorProto) { - return !enumTypeNames.contains(prefix + enumDescriptorProto.name()); - }); - - messageProto->clear_extension(); - StripUnknownOptions(messageProto->mutable_options()); - for (auto& fieldProto : *messageProto->mutable_field()) { - StripUnknownOptions(fieldProto.mutable_options()); - } - for (auto& oneofProto : *messageProto->mutable_oneof_decl()) { - StripUnknownOptions(oneofProto.mutable_options()); - } - for (auto& nestedTypeProto : *messageProto->mutable_nested_type()) { - Strip(prefix, &nestedTypeProto, messageTypeNames, enumTypeNames); - } - for (auto& enumProto : *messageProto->mutable_enum_type()) { - StripUnknownOptions(enumProto.mutable_options()); - for (auto& enumValue : *enumProto.mutable_value()) { - StripUnknownOptions(enumValue.mutable_options()); - } - } - } - - void Strip( - FileDescriptorProto* fileProto, - const THashSet<TString>& messageTypeNames, - const THashSet<TString>& enumTypeNames) - { - const auto prefix = fileProto->package().Empty() - ? "" - : fileProto->package() + '.'; - - RemoveIf(fileProto->mutable_message_type(), [&] (const DescriptorProto& descriptorProto) { - return !messageTypeNames.contains(prefix + descriptorProto.name()); - }); - RemoveIf(fileProto->mutable_enum_type(), [&] (const EnumDescriptorProto& enumDescriptorProto) { - return !enumTypeNames.contains(prefix + enumDescriptorProto.name()); - }); - - fileProto->clear_service(); - fileProto->clear_extension(); - - StripUnknownOptions(fileProto->mutable_options()); - for (auto& messageProto : *fileProto->mutable_message_type()) { - Strip(prefix, &messageProto, messageTypeNames, enumTypeNames); - } - for (auto& enumProto : *fileProto->mutable_enum_type()) { - StripUnknownOptions(enumProto.mutable_options()); - for (auto& enumValue : *enumProto.mutable_value()) { - StripUnknownOptions(enumValue.mutable_options()); - } - } - } - -private: - const FileDescriptor* const ExtensionFile_; - THashSet<const Descriptor*> AllDescriptors_; - THashSet<const EnumDescriptor*> EnumDescriptors_; -}; - -TNode MakeProtoFormatConfigWithDescriptors(const TVector<const Descriptor*>& descriptors) -{ - TFileDescriptorSetBuilder builder; - auto typeNames = TNode::CreateList(); - for (const auto* descriptor : descriptors) { - builder.AddDescriptor(descriptor); - typeNames.Add(descriptor->full_name()); - } - - auto fileDescriptorSetText = builder.Build().ShortDebugString(); - TNode config("protobuf"); - config.Attributes() - ("file_descriptor_set_text", std::move(fileDescriptorSetText)) - ("type_names", std::move(typeNames)); - return config; -} - -//////////////////////////////////////////////////////////////////////////////// - -using TTypePtrOrOtherColumns = std::variant<NTi::TTypePtr, TOtherColumns>; - -struct TMember { - TString Name; - TTypePtrOrOtherColumns TypeOrOtherColumns; -}; - -//////////////////////////////////////////////////////////////////////////////// - -TValueTypeOrOtherColumns GetScalarFieldType( - const FieldDescriptor& fieldDescriptor, - const TProtobufFieldOptions& options) -{ - if (options.Type) { - switch (*options.Type) { - case EProtobufType::EnumInt: - return EValueType::VT_INT64; - case EProtobufType::EnumString: - return EValueType::VT_STRING; - case EProtobufType::Any: - return EValueType::VT_ANY; - case EProtobufType::OtherColumns: - return TOtherColumns{}; - } - Y_FAIL(); - } - - switch (fieldDescriptor.cpp_type()) { - case FieldDescriptor::CPPTYPE_INT32: - return EValueType::VT_INT32; - case FieldDescriptor::CPPTYPE_INT64: - return EValueType::VT_INT64; - case FieldDescriptor::CPPTYPE_UINT32: - return EValueType::VT_UINT32; - case FieldDescriptor::CPPTYPE_UINT64: - return EValueType::VT_UINT64; - case FieldDescriptor::CPPTYPE_FLOAT: - case FieldDescriptor::CPPTYPE_DOUBLE: - return EValueType::VT_DOUBLE; - case FieldDescriptor::CPPTYPE_BOOL: - return EValueType::VT_BOOLEAN; - case FieldDescriptor::CPPTYPE_STRING: - case FieldDescriptor::CPPTYPE_MESSAGE: - case FieldDescriptor::CPPTYPE_ENUM: - return EValueType::VT_STRING; - default: - ythrow yexception() << - "Unexpected field type '" << fieldDescriptor.cpp_type_name() << "' " << - "for field " << fieldDescriptor.name(); - } -} - -bool HasNameExtension(const FieldDescriptor& fieldDescriptor) -{ - const auto& options = fieldDescriptor.options(); - return options.HasExtension(column_name) || options.HasExtension(key_column_name); -} - -void SortFields(TVector<const FieldDescriptor*>& fieldDescriptors, EProtobufFieldSortOrder fieldSortOrder) -{ - switch (fieldSortOrder) { - case EProtobufFieldSortOrder::AsInProtoFile: - return; - case EProtobufFieldSortOrder::ByFieldNumber: - SortBy(fieldDescriptors, [] (const FieldDescriptor* fieldDescriptor) { - return fieldDescriptor->number(); - }); - return; - } - Y_FAIL(); -} - -NTi::TTypePtr CreateStruct(TStringBuf fieldName, TVector<TMember> members) -{ - TVector<NTi::TStructType::TOwnedMember> structMembers; - structMembers.reserve(members.size()); - for (auto& member : members) { - std::visit(TOverloaded{ - [&] (TOtherColumns) { - ythrow TApiUsageError() << - "Could not deduce YT type for field " << member.Name << " of " << - "embedded message field " << fieldName << " " << - "(note that " << EWrapperFieldFlag::OTHER_COLUMNS << " fields " << - "are not allowed inside embedded messages)"; - }, - [&] (NTi::TTypePtr& type) { - structMembers.emplace_back(std::move(member.Name), std::move(type)); - }, - }, member.TypeOrOtherColumns); - } - return NTi::Struct(std::move(structMembers)); -} - -TMaybe<TVector<TString>> InferColumnFilter(const ::google::protobuf::Descriptor& descriptor) -{ - auto isOtherColumns = [] (const ::google::protobuf::FieldDescriptor& field) { - return GetFieldOptions(&field).Type == EProtobufType::OtherColumns; - }; - - TVector<TString> result; - result.reserve(descriptor.field_count()); - for (int i = 0; i < descriptor.field_count(); ++i) { - const auto& field = *descriptor.field(i); - if (isOtherColumns(field)) { - return {}; - } - result.push_back(GetColumnName(field)); - } - return result; -} - -//////////////////////////////////////////////////////////////////////////////// - -class TTableSchemaInferrer -{ -public: - TTableSchemaInferrer(bool keepFieldsWithoutExtension) - : KeepFieldsWithoutExtension_(keepFieldsWithoutExtension) - { } - - TTableSchema InferSchema(const Descriptor& messageDescriptor); - -private: - TTypePtrOrOtherColumns GetFieldType( - const FieldDescriptor& fieldDescriptor, - const TProtobufFieldOptions& defaultOptions); - - void ProcessOneofField( - TStringBuf containingFieldName, - const OneofDescriptor& oneofDescriptor, - const TProtobufFieldOptions& defaultFieldOptions, - const TProtobufOneofOptions& defaultOneofOptions, - EProtobufFieldSortOrder fieldSortOrder, - TVector<TMember>* members); - - TVector<TMember> GetMessageMembers( - TStringBuf containingFieldName, - const Descriptor& fieldDescriptor, - TProtobufFieldOptions defaultFieldOptions, - std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder = std::nullopt); - - NTi::TTypePtr GetMessageType( - const FieldDescriptor& fieldDescriptor, - TProtobufFieldOptions defaultFieldOptions); - - NTi::TTypePtr GetMapType( - const FieldDescriptor& fieldDescriptor, - const TProtobufFieldOptions& fieldOptions); - -private: - void GetMessageMembersImpl( - TStringBuf containingFieldName, - const Descriptor& fieldDescriptor, - TProtobufFieldOptions defaultFieldOptions, - std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder, - TVector<TMember>* members); - -private: - const bool KeepFieldsWithoutExtension_; - TCycleChecker CycleChecker_; -}; - -void TTableSchemaInferrer::ProcessOneofField( - TStringBuf containingFieldName, - const OneofDescriptor& oneofDescriptor, - const TProtobufFieldOptions& defaultFieldOptions, - const TProtobufOneofOptions& defaultOneofOptions, - EProtobufFieldSortOrder fieldSortOrder, - TVector<TMember>* members) -{ - auto oneofOptions = GetOneofOptions(&oneofDescriptor, defaultOneofOptions); - - auto addFields = [&] (TVector<TMember>* members, bool removeOptionality) { - TVector<const FieldDescriptor*> fieldDescriptors; - for (int i = 0; i < oneofDescriptor.field_count(); ++i) { - fieldDescriptors.push_back(oneofDescriptor.field(i)); - } - SortFields(fieldDescriptors, fieldSortOrder); - for (auto innerFieldDescriptor : fieldDescriptors) { - auto typeOrOtherColumns = GetFieldType( - *innerFieldDescriptor, - defaultFieldOptions); - if (auto* maybeType = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns); - maybeType && removeOptionality && (*maybeType)->IsOptional()) - { - typeOrOtherColumns = (*maybeType)->AsOptional()->GetItemType(); - } - members->push_back(TMember{ - GetColumnName(*innerFieldDescriptor), - std::move(typeOrOtherColumns), - }); - } - }; - - switch (oneofOptions.Mode) { - case EProtobufOneofMode::SeparateFields: - addFields(members, /* removeOptionality */ false); - return; - case EProtobufOneofMode::Variant: { - TVector<TMember> variantMembers; - addFields(&variantMembers, /* removeOptionality */ true); - members->push_back(TMember{ - oneofOptions.VariantFieldName, - NTi::Optional( - NTi::Variant( - CreateStruct(containingFieldName, std::move(variantMembers)) - ) - ) - }); - return; - } - } - Y_FAIL(); -} - -TVector<TMember> TTableSchemaInferrer::GetMessageMembers( - TStringBuf containingFieldName, - const Descriptor& messageDescriptor, - TProtobufFieldOptions defaultFieldOptions, - std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder) -{ - TVector<TMember> members; - GetMessageMembersImpl( - containingFieldName, - messageDescriptor, - defaultFieldOptions, - overrideFieldSortOrder, - &members - ); - return members; -} - -void TTableSchemaInferrer::GetMessageMembersImpl( - TStringBuf containingFieldName, - const Descriptor& messageDescriptor, - TProtobufFieldOptions defaultFieldOptions, - std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder, - TVector<TMember>* members) -{ - auto guard = CycleChecker_.Enter(&messageDescriptor); - defaultFieldOptions = GetDefaultFieldOptions(&messageDescriptor, defaultFieldOptions); - auto messageOptions = GetMessageOptions(&messageDescriptor); - auto defaultOneofOptions = GetDefaultOneofOptions(&messageDescriptor); - - TVector<const FieldDescriptor*> fieldDescriptors; - fieldDescriptors.reserve(messageDescriptor.field_count()); - for (int i = 0; i < messageDescriptor.field_count(); ++i) { - if (!KeepFieldsWithoutExtension_ && !HasNameExtension(*messageDescriptor.field(i))) { - continue; - } - fieldDescriptors.push_back(messageDescriptor.field(i)); - } - - auto fieldSortOrder = overrideFieldSortOrder.value_or(messageOptions.FieldSortOrder); - SortFields(fieldDescriptors, fieldSortOrder); - - THashSet<const OneofDescriptor*> visitedOneofs; - for (const auto innerFieldDescriptor : fieldDescriptors) { - auto oneofDescriptor = innerFieldDescriptor->containing_oneof(); - if (oneofDescriptor) { - if (visitedOneofs.contains(oneofDescriptor)) { - continue; - } - ProcessOneofField( - containingFieldName, - *oneofDescriptor, - defaultFieldOptions, - defaultOneofOptions, - messageOptions.FieldSortOrder, - members); - visitedOneofs.insert(oneofDescriptor); - continue; - } - auto fieldOptions = GetFieldOptions(innerFieldDescriptor, defaultFieldOptions); - if (fieldOptions.SerializationMode == EProtobufSerializationMode::Embedded) { - Y_ENSURE(innerFieldDescriptor->type() == FieldDescriptor::TYPE_MESSAGE, - "EMBEDDED column must have message type"); - Y_ENSURE(innerFieldDescriptor->label() == FieldDescriptor::LABEL_REQUIRED, - "EMBEDDED column must be marked required"); - GetMessageMembersImpl( - innerFieldDescriptor->full_name(), - *innerFieldDescriptor->message_type(), - defaultFieldOptions, - /*overrideFieldSortOrder*/ std::nullopt, - members); - } else { - auto typeOrOtherColumns = GetFieldType( - *innerFieldDescriptor, - defaultFieldOptions); - members->push_back(TMember{ - GetColumnName(*innerFieldDescriptor), - std::move(typeOrOtherColumns), - }); - } - } -} - -NTi::TTypePtr TTableSchemaInferrer::GetMessageType( - const FieldDescriptor& fieldDescriptor, - TProtobufFieldOptions defaultFieldOptions) -{ - Y_VERIFY(fieldDescriptor.message_type()); - const auto& messageDescriptor = *fieldDescriptor.message_type(); - auto members = GetMessageMembers( - fieldDescriptor.full_name(), - messageDescriptor, - defaultFieldOptions); - - return CreateStruct(fieldDescriptor.full_name(), std::move(members)); -} - -NTi::TTypePtr TTableSchemaInferrer::GetMapType( - const FieldDescriptor& fieldDescriptor, - const TProtobufFieldOptions& fieldOptions) -{ - Y_VERIFY(fieldDescriptor.is_map()); - switch (fieldOptions.MapMode) { - case EProtobufMapMode::ListOfStructsLegacy: - case EProtobufMapMode::ListOfStructs: { - TProtobufFieldOptions embeddedOptions; - if (fieldOptions.MapMode == EProtobufMapMode::ListOfStructs) { - embeddedOptions.SerializationMode = EProtobufSerializationMode::Yt; - } - auto list = NTi::List(GetMessageType(fieldDescriptor, embeddedOptions)); - switch (fieldOptions.ListMode) { - case EProtobufListMode::Required: - return list; - case EProtobufListMode::Optional: - return NTi::Optional(std::move(list)); - } - Y_FAIL(); - } - case EProtobufMapMode::Dict: - case EProtobufMapMode::OptionalDict: { - auto message = fieldDescriptor.message_type(); - Y_VERIFY(message->field_count() == 2); - auto keyVariant = GetScalarFieldType(*message->field(0), TProtobufFieldOptions{}); - Y_VERIFY(std::holds_alternative<EValueType>(keyVariant)); - auto key = std::get<EValueType>(keyVariant); - TProtobufFieldOptions embeddedOptions; - embeddedOptions.SerializationMode = EProtobufSerializationMode::Yt; - auto valueVariant = GetFieldType(*message->field(1), embeddedOptions); - Y_VERIFY(std::holds_alternative<NTi::TTypePtr>(valueVariant)); - auto value = std::get<NTi::TTypePtr>(valueVariant); - Y_VERIFY(value->IsOptional()); - value = value->AsOptional()->GetItemType(); - auto dict = NTi::Dict(ToTypeV3(key, true), value); - if (fieldOptions.MapMode == EProtobufMapMode::OptionalDict) { - return NTi::Optional(dict); - } else { - return dict; - } - } - } -} - -TTypePtrOrOtherColumns TTableSchemaInferrer::GetFieldType( - const FieldDescriptor& fieldDescriptor, - const TProtobufFieldOptions& defaultOptions) -{ - auto fieldOptions = GetFieldOptions(&fieldDescriptor, defaultOptions); - if (fieldOptions.Type) { - ValidateProtobufType(fieldDescriptor, *fieldOptions.Type); - } - - auto getScalarType = [&] { - auto valueTypeOrOtherColumns = GetScalarFieldType(fieldDescriptor, fieldOptions); - return std::visit(TOverloaded{ - [] (TOtherColumns) -> TTypePtrOrOtherColumns { - return TOtherColumns{}; - }, - [] (EValueType valueType) -> TTypePtrOrOtherColumns { - return ToTypeV3(valueType, true); - } - }, valueTypeOrOtherColumns); - }; - - auto withFieldLabel = [&] (const TTypePtrOrOtherColumns& typeOrOtherColumns) -> TTypePtrOrOtherColumns { - switch (fieldDescriptor.label()) { - case FieldDescriptor::Label::LABEL_REPEATED: { - Y_ENSURE(fieldOptions.SerializationMode == EProtobufSerializationMode::Yt, - "Repeated fields are supported only for YT serialization mode, field \"" + fieldDescriptor.full_name() + - "\" has incorrect serialization mode"); - auto* type = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns); - Y_ENSURE(type, "OTHER_COLUMNS field can not be repeated"); - switch (fieldOptions.ListMode) { - case EProtobufListMode::Required: - return NTi::TTypePtr(NTi::List(*type)); - case EProtobufListMode::Optional: - return NTi::TTypePtr(NTi::Optional(NTi::List(*type))); - } - Y_FAIL(); - } - case FieldDescriptor::Label::LABEL_OPTIONAL: - return std::visit(TOverloaded{ - [] (TOtherColumns) -> TTypePtrOrOtherColumns { - return TOtherColumns{}; - }, - [] (NTi::TTypePtr type) -> TTypePtrOrOtherColumns { - return NTi::TTypePtr(NTi::Optional(std::move(type))); - } - }, typeOrOtherColumns); - case FieldDescriptor::LABEL_REQUIRED: { - auto* type = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns); - Y_ENSURE(type, "OTHER_COLUMNS field can not be required"); - return *type; - } - } - Y_FAIL(); - }; - - switch (fieldOptions.SerializationMode) { - case EProtobufSerializationMode::Protobuf: - return withFieldLabel(getScalarType()); - case EProtobufSerializationMode::Yt: - if (fieldDescriptor.type() == FieldDescriptor::TYPE_MESSAGE) { - if (fieldDescriptor.is_map()) { - return GetMapType(fieldDescriptor, fieldOptions); - } else { - return withFieldLabel(GetMessageType(fieldDescriptor, TProtobufFieldOptions{})); - } - } else { - return withFieldLabel(getScalarType()); - } - case EProtobufSerializationMode::Embedded: - ythrow yexception() << "EMBEDDED field is not allowed for field " - << fieldDescriptor.full_name(); - } - Y_FAIL(); -} - -TTableSchema TTableSchemaInferrer::InferSchema(const Descriptor& messageDescriptor) -{ - TTableSchema result; - - auto defaultFieldOptions = GetDefaultFieldOptions(&messageDescriptor); - auto members = GetMessageMembers( - messageDescriptor.full_name(), - messageDescriptor, - defaultFieldOptions, - // Use special sort order for top level messages. - /*overrideFieldSortOrder*/ EProtobufFieldSortOrder::AsInProtoFile); - - for (auto& member : members) { - std::visit(TOverloaded{ - [&] (TOtherColumns) { - result.Strict(false); - }, - [&] (NTi::TTypePtr& type) { - result.AddColumn(TColumnSchema() - .Name(std::move(member.Name)) - .Type(std::move(type)) - ); - }, - }, member.TypeOrOtherColumns); - } - - return result; -} - -TTableSchema CreateTableSchemaImpl( - const Descriptor& messageDescriptor, - bool keepFieldsWithoutExtension) -{ - TTableSchemaInferrer inferrer(keepFieldsWithoutExtension); - return inferrer.InferSchema(messageDescriptor); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail - -//////////////////////////////////////////////////////////////////////////////// - -template <> -void Out<NYT::EWrapperFieldFlag::Enum>(IOutputStream& stream, NYT::EWrapperFieldFlag::Enum value) -{ - stream << NYT::EWrapperFieldFlag_Enum_Name(value); -} - -template <> -void Out<NYT::EWrapperMessageFlag::Enum>(IOutputStream& stream, NYT::EWrapperMessageFlag::Enum value) -{ - stream << NYT::EWrapperMessageFlag_Enum_Name(value); -} - -template <> -void Out<NYT::EWrapperOneofFlag::Enum>(IOutputStream& stream, NYT::EWrapperOneofFlag::Enum value) -{ - stream << NYT::EWrapperOneofFlag_Enum_Name(value); -} diff --git a/yt/cpp/mapreduce/interface/protobuf_format.h b/yt/cpp/mapreduce/interface/protobuf_format.h deleted file mode 100644 index aafbced386..0000000000 --- a/yt/cpp/mapreduce/interface/protobuf_format.h +++ /dev/null @@ -1,106 +0,0 @@ -#pragma once - -#include "common.h" - -#include <yt/yt_proto/yt/formats/extension.pb.h> - -#include <util/generic/maybe.h> - -#include <google/protobuf/message.h> - -/// @cond Doxygen_Suppress -namespace NYT::NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -enum class EProtobufType -{ - EnumInt /* "enum_int" */, - EnumString /* "enum_string" */, - Any /* "any" */, - OtherColumns /* "other_columns" */, -}; - -enum class EProtobufSerializationMode -{ - Protobuf, - Yt, - Embedded, -}; - -enum class EProtobufListMode -{ - Optional, - Required, -}; - -enum class EProtobufMapMode -{ - ListOfStructsLegacy, - ListOfStructs, - Dict, - OptionalDict, -}; - -enum class EProtobufFieldSortOrder -{ - AsInProtoFile, - ByFieldNumber, -}; - -enum class EProtobufOneofMode -{ - SeparateFields, - Variant, -}; - -enum class EProtobufEnumWritingMode -{ - SkipUnknownValues, - CheckValues, -}; - -struct TProtobufOneofOptions -{ - EProtobufOneofMode Mode = EProtobufOneofMode::Variant; - TString VariantFieldName; -}; - -struct TProtobufFieldOptions -{ - TMaybe<EProtobufType> Type; - EProtobufSerializationMode SerializationMode = EProtobufSerializationMode::Protobuf; - EProtobufListMode ListMode = EProtobufListMode::Required; - EProtobufMapMode MapMode = EProtobufMapMode::ListOfStructsLegacy; -}; - -struct TProtobufMessageOptions -{ - EProtobufFieldSortOrder FieldSortOrder = EProtobufFieldSortOrder::ByFieldNumber; -}; - -TString GetColumnName(const ::google::protobuf::FieldDescriptor& field); - -TProtobufFieldOptions GetFieldOptions( - const ::google::protobuf::FieldDescriptor* fieldDescriptor, - const TMaybe<TProtobufFieldOptions>& defaultFieldOptions = {}); - -TProtobufOneofOptions GetOneofOptions( - const ::google::protobuf::OneofDescriptor* oneofDescriptor, - const TMaybe<TProtobufOneofOptions>& defaultOneofOptions = {}); - -TProtobufMessageOptions GetMessageOptions(const ::google::protobuf::Descriptor* descriptor); - -TMaybe<TVector<TString>> InferColumnFilter(const ::google::protobuf::Descriptor& descriptor); - -TNode MakeProtoFormatConfigWithTables(const TVector<const ::google::protobuf::Descriptor*>& descriptors); -TNode MakeProtoFormatConfigWithDescriptors(const TVector<const ::google::protobuf::Descriptor*>& descriptors); - -TTableSchema CreateTableSchemaImpl( - const ::google::protobuf::Descriptor& messageDescriptor, - bool keepFieldsWithoutExtension); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail -/// @endcond diff --git a/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.cpp b/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.cpp deleted file mode 100644 index 19a3d5163f..0000000000 --- a/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.cpp +++ /dev/null @@ -1,451 +0,0 @@ -#include "common.h" -#include "errors.h" -#include "common_ut.h" -#include "util/generic/fwd.h" - -#include <yt/cpp/mapreduce/interface/protobuf_table_schema_ut.pb.h> -#include <yt/cpp/mapreduce/interface/proto3_ut.pb.h> - -#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h> - -#include <library/cpp/testing/unittest/registar.h> - -#include <algorithm> - -using namespace NYT; - -bool IsFieldPresent(const TTableSchema& schema, TStringBuf name) -{ - for (const auto& field : schema.Columns()) { - if (field.Name() == name) { - return true; - } - } - return false; -} - -Y_UNIT_TEST_SUITE(ProtoSchemaTest_Simple) -{ - Y_UNIT_TEST(TIntegral) - { - const auto schema = CreateTableSchema<NUnitTesting::TIntegral>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("DoubleField").Type(ToTypeV3(EValueType::VT_DOUBLE, false))) - .AddColumn(TColumnSchema().Name("FloatField").Type(ToTypeV3(EValueType::VT_DOUBLE, false))) - .AddColumn(TColumnSchema().Name("Int32Field").Type(ToTypeV3(EValueType::VT_INT32, false))) - .AddColumn(TColumnSchema().Name("Int64Field").Type(ToTypeV3(EValueType::VT_INT64, false))) - .AddColumn(TColumnSchema().Name("Uint32Field").Type(ToTypeV3(EValueType::VT_UINT32, false))) - .AddColumn(TColumnSchema().Name("Uint64Field").Type(ToTypeV3(EValueType::VT_UINT64, false))) - .AddColumn(TColumnSchema().Name("Sint32Field").Type(ToTypeV3(EValueType::VT_INT32, false))) - .AddColumn(TColumnSchema().Name("Sint64Field").Type(ToTypeV3(EValueType::VT_INT64, false))) - .AddColumn(TColumnSchema().Name("Fixed32Field").Type(ToTypeV3(EValueType::VT_UINT32, false))) - .AddColumn(TColumnSchema().Name("Fixed64Field").Type(ToTypeV3(EValueType::VT_UINT64, false))) - .AddColumn(TColumnSchema().Name("Sfixed32Field").Type(ToTypeV3(EValueType::VT_INT32, false))) - .AddColumn(TColumnSchema().Name("Sfixed64Field").Type(ToTypeV3(EValueType::VT_INT64, false))) - .AddColumn(TColumnSchema().Name("BoolField").Type(ToTypeV3(EValueType::VT_BOOLEAN, false))) - .AddColumn(TColumnSchema().Name("EnumField").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(TOneOf) - { - const auto schema = CreateTableSchema<NUnitTesting::TOneOf>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("DoubleField").Type(ToTypeV3(EValueType::VT_DOUBLE, false))) - .AddColumn(TColumnSchema().Name("Int32Field").Type(ToTypeV3(EValueType::VT_INT32, false))) - .AddColumn(TColumnSchema().Name("BoolField").Type(ToTypeV3(EValueType::VT_BOOLEAN, false)))); - } - - Y_UNIT_TEST(TWithRequired) - { - const auto schema = CreateTableSchema<NUnitTesting::TWithRequired>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("RequiredField").Type(ToTypeV3(EValueType::VT_STRING, true))) - .AddColumn(TColumnSchema().Name("NotRequiredField").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(TAggregated) - { - const auto schema = CreateTableSchema<NUnitTesting::TAggregated>(); - - UNIT_ASSERT_VALUES_EQUAL(6, schema.Columns().size()); - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("StringField").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("BytesField").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("NestedField").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("NestedRepeatedField").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("NestedOneOfField").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("NestedRecursiveField").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(TAliased) - { - const auto schema = CreateTableSchema<NUnitTesting::TAliased>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("key").Type(ToTypeV3(EValueType::VT_INT32, false))) - .AddColumn(TColumnSchema().Name("subkey").Type(ToTypeV3(EValueType::VT_DOUBLE, false))) - .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(SortColumns) - { - const TSortColumns keys = {"key", "subkey"}; - - const auto schema = CreateTableSchema<NUnitTesting::TAliased>(keys); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("key") - .Type(ToTypeV3(EValueType::VT_INT32, false)) - .SortOrder(ESortOrder::SO_ASCENDING)) - .AddColumn(TColumnSchema() - .Name("subkey") - .Type(ToTypeV3(EValueType::VT_DOUBLE, false)) - .SortOrder(ESortOrder::SO_ASCENDING)) - .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(SortColumnsReordered) - { - const TSortColumns keys = {"subkey"}; - - const auto schema = CreateTableSchema<NUnitTesting::TAliased>(keys); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("subkey") - .Type(ToTypeV3(EValueType::VT_DOUBLE, false)) - .SortOrder(ESortOrder::SO_ASCENDING)) - .AddColumn(TColumnSchema().Name("key").Type(ToTypeV3(EValueType::VT_INT32, false))) - .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(SortColumnsInvalid) - { - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TAliased>({"subkey", "subkey"}), yexception); - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TAliased>({"key", "junk"}), yexception); - } - - Y_UNIT_TEST(KeepFieldsWithoutExtensionTrue) - { - const auto schema = CreateTableSchema<NUnitTesting::TAliased>({}, true); - UNIT_ASSERT(IsFieldPresent(schema, "key")); - UNIT_ASSERT(IsFieldPresent(schema, "subkey")); - UNIT_ASSERT(IsFieldPresent(schema, "Data")); - UNIT_ASSERT(schema.Strict()); - } - - Y_UNIT_TEST(KeepFieldsWithoutExtensionFalse) - { - const auto schema = CreateTableSchema<NUnitTesting::TAliased>({}, false); - UNIT_ASSERT(IsFieldPresent(schema, "key")); - UNIT_ASSERT(IsFieldPresent(schema, "subkey")); - UNIT_ASSERT(!IsFieldPresent(schema, "Data")); - UNIT_ASSERT(schema.Strict()); - } - - Y_UNIT_TEST(ProtobufTypeOption) - { - const auto schema = CreateTableSchema<NUnitTesting::TWithTypeOptions>({}); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .Strict(false) - .AddColumn(TColumnSchema().Name("ColorIntField").Type(ToTypeV3(EValueType::VT_INT64, false))) - .AddColumn(TColumnSchema().Name("ColorStringField").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("AnyField").Type(ToTypeV3(EValueType::VT_ANY, false))) - .AddColumn(TColumnSchema().Name("EmbeddedField").Type( - NTi::Optional(NTi::Struct({ - {"ColorIntField", ToTypeV3(EValueType::VT_INT64, false)}, - {"ColorStringField", ToTypeV3(EValueType::VT_STRING, false)}, - {"AnyField", ToTypeV3(EValueType::VT_ANY, false)}})))) - .AddColumn(TColumnSchema().Name("RepeatedEnumIntField").Type(NTi::List(NTi::Int64())))); - } - - Y_UNIT_TEST(ProtobufTypeOption_TypeMismatch) - { - UNIT_ASSERT_EXCEPTION( - CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_EnumInt>({}), - yexception); - UNIT_ASSERT_EXCEPTION( - CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_EnumString>({}), - yexception); - UNIT_ASSERT_EXCEPTION( - CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_Any>({}), - yexception); - UNIT_ASSERT_EXCEPTION( - CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_OtherColumns>({}), - yexception); - } -} - -Y_UNIT_TEST_SUITE(ProtoSchemaTest_Complex) -{ - Y_UNIT_TEST(TRepeated) - { - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TRepeated>(), yexception); - - const auto schema = CreateTableSchema<NUnitTesting::TRepeatedYtMode>(); - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("Int32Field").Type(NTi::List(ToTypeV3(EValueType::VT_INT32, true))))); - } - - Y_UNIT_TEST(TRepeatedOptionalList) - { - const auto schema = CreateTableSchema<NUnitTesting::TOptionalList>(); - auto type = NTi::Optional(NTi::List(NTi::Int64())); - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("OptionalListInt64").TypeV3(type))); - } - - NTi::TTypePtr GetUrlRowType(bool required) - { - static const NTi::TTypePtr structType = NTi::Struct({ - {"Host", ToTypeV3(EValueType::VT_STRING, false)}, - {"Path", ToTypeV3(EValueType::VT_STRING, false)}, - {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)}}); - return required ? structType : NTi::TTypePtr(NTi::Optional(structType)); - } - - Y_UNIT_TEST(TRowFieldSerializationOption) - { - const auto schema = CreateTableSchema<NUnitTesting::TRowFieldSerializationOption>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false))) - .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(TRowMessageSerializationOption) - { - const auto schema = CreateTableSchema<NUnitTesting::TRowMessageSerializationOption>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false))) - .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false)))); - } - - Y_UNIT_TEST(TRowMixedSerializationOptions) - { - const auto schema = CreateTableSchema<NUnitTesting::TRowMixedSerializationOptions>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false))) - .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - NTi::TTypePtr GetUrlRowType_ColumnNames(bool required) - { - static const NTi::TTypePtr type = NTi::Struct({ - {"Host_ColumnName", ToTypeV3(EValueType::VT_STRING, false)}, - {"Path_KeyColumnName", ToTypeV3(EValueType::VT_STRING, false)}, - {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)}, - }); - return required ? type : NTi::TTypePtr(NTi::Optional(type)); - } - - Y_UNIT_TEST(TRowMixedSerializationOptions_ColumnNames) - { - const auto schema = CreateTableSchema<NUnitTesting::TRowMixedSerializationOptions_ColumnNames>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType_ColumnNames(false))) - .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(NoOptionInheritance) - { - auto deepestEmbedded = NTi::Optional(NTi::Struct({{"x", ToTypeV3(EValueType::VT_INT64, false)}})); - - const auto schema = CreateTableSchema<NUnitTesting::TNoOptionInheritance>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("EmbeddedYt_YtOption") - .Type(NTi::Optional(NTi::Struct({{"embedded", deepestEmbedded}})))) - .AddColumn(TColumnSchema().Name("EmbeddedYt_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("EmbeddedYt_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema() - .Name("EmbeddedProtobuf_YtOption") - .Type(NTi::Optional(NTi::Struct({{"embedded", ToTypeV3(EValueType::VT_STRING, false)}})))) - .AddColumn(TColumnSchema().Name("EmbeddedProtobuf_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("EmbeddedProtobuf_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema() - .Name("Embedded_YtOption") - .Type(NTi::Optional(NTi::Struct({{"embedded", ToTypeV3(EValueType::VT_STRING, false)}})))) - .AddColumn(TColumnSchema().Name("Embedded_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("Embedded_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(Cyclic) - { - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic::TA>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic::TB>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic::TC>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic::TD>(), TApiUsageError); - - ASSERT_SERIALIZABLES_EQUAL( - TTableSchema().AddColumn( - TColumnSchema().Name("d").TypeV3(NTi::Optional(NTi::String()))), - CreateTableSchema<NUnitTesting::TCyclic::TE>()); - } - - Y_UNIT_TEST(FieldSortOrder) - { - const auto schema = CreateTableSchema<NUnitTesting::TFieldSortOrder>(); - - auto byFieldNumber = NTi::Optional(NTi::Struct({ - {"z", NTi::Optional(NTi::Bool())}, - {"x", NTi::Optional(NTi::Int64())}, - {"y", NTi::Optional(NTi::String())}, - })); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("EmbeddedDefault").Type(byFieldNumber)) - .AddColumn(TColumnSchema() - .Name("EmbeddedAsInProtoFile") - .Type(NTi::Optional(NTi::Struct({ - {"x", NTi::Optional(NTi::Int64())}, - {"y", NTi::Optional(NTi::String())}, - {"z", NTi::Optional(NTi::Bool())}, - })))) - .AddColumn(TColumnSchema().Name("EmbeddedByFieldNumber").Type(byFieldNumber))); - } - - Y_UNIT_TEST(Map) - { - const auto schema = CreateTableSchema<NUnitTesting::TWithMap>(); - - auto createKeyValueStruct = [] (NTi::TTypePtr key, NTi::TTypePtr value) { - return NTi::List(NTi::Struct({ - {"key", NTi::Optional(key)}, - {"value", NTi::Optional(value)}, - })); - }; - - auto embedded = NTi::Struct({ - {"x", NTi::Optional(NTi::Int64())}, - {"y", NTi::Optional(NTi::String())}, - }); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("MapDefault") - .Type(createKeyValueStruct(NTi::Int64(), NTi::String()))) - .AddColumn(TColumnSchema() - .Name("MapListOfStructsLegacy") - .Type(createKeyValueStruct(NTi::Int64(), NTi::String()))) - .AddColumn(TColumnSchema() - .Name("MapListOfStructs") - .Type(createKeyValueStruct(NTi::Int64(), embedded))) - .AddColumn(TColumnSchema() - .Name("MapOptionalDict") - .Type(NTi::Optional(NTi::Dict(NTi::Int64(), embedded)))) - .AddColumn(TColumnSchema() - .Name("MapDict") - .Type(NTi::Dict(NTi::Int64(), embedded)))); - } - - Y_UNIT_TEST(Oneof) - { - const auto schema = CreateTableSchema<NUnitTesting::TWithOneof>(); - - auto embedded = NTi::Struct({ - {"Oneof", NTi::Optional(NTi::Variant(NTi::Struct({ - {"x", NTi::Int64()}, - {"y", NTi::String()}, - })))}, - }); - - auto createType = [&] (TString oneof2Name) { - return NTi::Optional(NTi::Struct({ - {"field", NTi::Optional(NTi::String())}, - {oneof2Name, NTi::Optional(NTi::Variant(NTi::Struct({ - {"x2", NTi::Int64()}, - {"y2", NTi::String()}, - {"z2", embedded}, - })))}, - {"y1", NTi::Optional(NTi::String())}, - {"z1", NTi::Optional(embedded)}, - {"x1", NTi::Optional(NTi::Int64())}, - })); - }; - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("DefaultSeparateFields") - .Type(createType("variant_field_name"))) - .AddColumn(TColumnSchema() - .Name("NoDefault") - .Type(createType("Oneof2"))) - .AddColumn(TColumnSchema() - .Name("SerializationProtobuf") - .Type(NTi::Optional(NTi::Struct({ - {"y1", NTi::Optional(NTi::String())}, - {"x1", NTi::Optional(NTi::Int64())}, - {"z1", NTi::Optional(NTi::String())}, - })))) - .AddColumn(TColumnSchema() - .Name("TopLevelOneof") - .Type( - NTi::Optional( - NTi::Variant(NTi::Struct({ - {"MemberOfTopLevelOneof", NTi::Int64()} - })) - ) - )) - ); - } - - Y_UNIT_TEST(Embedded) - { - const auto schema = CreateTableSchema<NUnitTesting::TEmbeddingMessage>(); - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .Strict(false) - .AddColumn(TColumnSchema().Name("embedded2_num").Type(NTi::Optional(NTi::Uint64()))) - .AddColumn(TColumnSchema().Name("embedded2_struct").Type(NTi::Optional(NTi::Struct({ - {"float1", NTi::Optional(NTi::Double())}, - {"string1", NTi::Optional(NTi::String())}, - })))) - .AddColumn(TColumnSchema().Name("embedded2_repeated").Type(NTi::List(NTi::String()))) - .AddColumn(TColumnSchema().Name("embedded_num").Type(NTi::Optional(NTi::Uint64()))) - .AddColumn(TColumnSchema().Name("embedded_extra_field").Type(NTi::Optional(NTi::String()))) - .AddColumn(TColumnSchema().Name("variant").Type(NTi::Optional(NTi::Variant(NTi::Struct({ - {"str_variant", NTi::String()}, - {"uint_variant", NTi::Uint64()}, - }))))) - .AddColumn(TColumnSchema().Name("num").Type(NTi::Optional(NTi::Uint64()))) - .AddColumn(TColumnSchema().Name("extra_field").Type(NTi::Optional(NTi::String()))) - ); - } -} - -Y_UNIT_TEST_SUITE(ProtoSchemaTest_Proto3) -{ - Y_UNIT_TEST(TWithOptional) - { - const auto schema = CreateTableSchema<NTestingProto3::TWithOptional>(); - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("x").Type(NTi::Optional(NTi::Int64())) - ) - ); - } - - Y_UNIT_TEST(TWithOptionalMessage) - { - const auto schema = CreateTableSchema<NTestingProto3::TWithOptionalMessage>(); - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("x").Type( - NTi::Optional( - NTi::Struct({{"x", NTi::Optional(NTi::Int64())}}) - ) - ) - ) - ); - } -} diff --git a/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.proto b/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.proto deleted file mode 100644 index 60bad6e650..0000000000 --- a/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.proto +++ /dev/null @@ -1,402 +0,0 @@ -import "yt/yt_proto/yt/formats/extension.proto"; - -package NYT.NUnitTesting; - -message TIntegral -{ - optional double DoubleField = 1; - optional float FloatField = 2; - optional int32 Int32Field = 3; - optional int64 Int64Field = 4; - optional uint32 Uint32Field = 5; - optional uint64 Uint64Field = 6; - optional sint32 Sint32Field = 7; - optional sint64 Sint64Field = 8; - optional fixed32 Fixed32Field = 9; - optional fixed64 Fixed64Field = 10; - optional sfixed32 Sfixed32Field = 11; - optional sfixed64 Sfixed64Field = 12; - optional bool BoolField = 13; - enum TriBool - { - TRI_FALSE = 0; - TRI_TRUE = 1; - TRI_UNDEF = -1; - } - optional TriBool EnumField = 14; -} - -message TRepeated -{ - repeated int32 Int32Field = 1; -} - -message TRepeatedYtMode -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - repeated int32 Int32Field = 1; -} - -message TWithTypeOptions -{ - enum Color - { - WHITE = 0; - BLUE = 1; - RED = -1; - } - - message TEmbedded - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - - optional Color ColorIntField = 1 [(NYT.flags) = ENUM_INT]; - optional Color ColorStringField = 2 [(NYT.flags) = ENUM_STRING]; - optional bytes AnyField = 3 [(NYT.flags) = ANY]; - } - - optional Color ColorIntField = 1 [(NYT.flags) = ENUM_INT]; - optional Color ColorStringField = 2 [(NYT.flags) = ENUM_STRING]; - optional bytes AnyField = 3 [(NYT.flags) = ANY]; - optional bytes OtherColumnsField = 4 [(NYT.flags) = OTHER_COLUMNS]; - optional TEmbedded EmbeddedField = 5 [(NYT.flags) = SERIALIZATION_YT]; - repeated Color RepeatedEnumIntField = 6 [(NYT.flags) = SERIALIZATION_YT, (NYT.flags) = ENUM_INT]; -} - -message TWithTypeOptions_TypeMismatch_EnumInt -{ - optional int64 EnumField = 1 [(NYT.flags) = ENUM_INT]; -} - -message TWithTypeOptions_TypeMismatch_EnumString -{ - optional string EnumField = 1 [(NYT.flags) = ENUM_STRING]; -} - -message TWithTypeOptions_TypeMismatch_Any -{ - optional string AnyField = 1 [(NYT.flags) = ANY]; -} - -message TWithTypeOptions_TypeMismatch_OtherColumns -{ - optional string OtherColumnsField = 1 [(NYT.flags) = OTHER_COLUMNS]; -} - -message TOneOf -{ - oneof Chooser - { - double DoubleField = 1; - int32 Int32Field = 2; - } - optional bool BoolField = 3; -} - -message TWithRequired -{ - required string RequiredField = 1; - optional string NotRequiredField = 2; -}; - -message TAggregated -{ - optional string StringField = 1; - optional bytes BytesField = 2; - optional TIntegral NestedField = 3; - optional TRepeated NestedRepeatedField = 4; - optional TOneOf NestedOneOfField = 5; - optional TAggregated NestedRecursiveField = 6; -} - -message TAliased -{ - optional int32 Key = 1 [(NYT.key_column_name) = "key"]; - optional double Subkey = 2 [(NYT.key_column_name) = "subkey"]; - optional TAggregated Data = 3; -} - -//////////////////////////////////////////////////////////////////////////////// - -message TUrlRow -{ - optional string Host = 1 [(NYT.column_name) = "Host"]; - optional string Path = 2 [(NYT.column_name) = "Path"]; - optional sint32 HttpCode = 3 [(NYT.column_name) = "HttpCode"]; -} - -message TRowFieldSerializationOption -{ - optional TUrlRow UrlRow_1 = 1 [(NYT.flags) = SERIALIZATION_YT]; - optional TUrlRow UrlRow_2 = 2; -} - -message TRowMessageSerializationOption -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - optional TUrlRow UrlRow_1 = 1; - optional TUrlRow UrlRow_2 = 2; -} - -message TRowMixedSerializationOptions -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - optional TUrlRow UrlRow_1 = 1; - optional TUrlRow UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF]; -} - -message TRowSerializedRepeatedFields -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - repeated int64 Ints = 1; - repeated TUrlRow UrlRows = 2; -} - -message TUrlRowWithColumnNames -{ - optional string Host = 1 [(NYT.column_name) = "Host_ColumnName", (NYT.key_column_name) = "Host_KeyColumnName"]; - optional string Path = 2 [(NYT.key_column_name) = "Path_KeyColumnName"]; - optional sint32 HttpCode = 3; -} - -message TRowMixedSerializationOptions_ColumnNames -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - optional TUrlRowWithColumnNames UrlRow_1 = 1; - optional TUrlRowWithColumnNames UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF]; -} - -message TNoOptionInheritance -{ - message TDeepestEmbedded - { - optional int64 x = 1; - } - - message TEmbedded - { - optional TDeepestEmbedded embedded = 1; - } - - message TEmbeddedYt - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - - optional TDeepestEmbedded embedded = 1; - } - - message TEmbeddedProtobuf - { - option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF; - - optional TDeepestEmbedded embedded = 1; - } - - optional TEmbeddedYt EmbeddedYt_YtOption = 1 [(NYT.flags) = SERIALIZATION_YT]; - optional TEmbeddedYt EmbeddedYt_ProtobufOption = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF]; - optional TEmbeddedYt EmbeddedYt_NoOption = 3; - optional TEmbeddedProtobuf EmbeddedProtobuf_YtOption = 4 [(NYT.flags) = SERIALIZATION_YT]; - optional TEmbeddedProtobuf EmbeddedProtobuf_ProtobufOption = 5 [(NYT.flags) = SERIALIZATION_PROTOBUF]; - optional TEmbeddedProtobuf EmbeddedProtobuf_NoOption = 6; - optional TEmbedded Embedded_YtOption = 7 [(NYT.flags) = SERIALIZATION_YT]; - optional TEmbedded Embedded_ProtobufOption = 8 [(NYT.flags) = SERIALIZATION_PROTOBUF]; - optional TEmbedded Embedded_NoOption = 9; -} - -message TOptionalList -{ - repeated int64 OptionalListInt64 = 1 [(NYT.flags) = OPTIONAL_LIST, (NYT.flags) = SERIALIZATION_YT]; -} - -message TPacked -{ - repeated int64 PackedListInt64 = 1 [(NYT.flags) = SERIALIZATION_YT, packed=true]; -} - -message TCyclic -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - - message TA - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - repeated TB b = 1; - optional TC c = 2; - } - - message TB - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - optional TD d = 1; - } - - message TC - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - optional TD d = 1; - } - - message TD - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - optional TA a = 1; - } - - message TE - { - optional TD d = 1 [(NYT.flags) = SERIALIZATION_PROTOBUF]; - } - - optional TA a = 1; -} - -message TFieldSortOrder -{ - message TEmbeddedDefault { - optional int64 x = 2; - optional string y = 12; - optional bool z = 1; - } - message TEmbeddedAsInProtoFile { - option (NYT.message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE; - optional int64 x = 2; - optional string y = 12; - optional bool z = 1; - } - message TEmbeddedByFieldNumber { - option (NYT.message_flags) = SORT_FIELDS_BY_FIELD_NUMBER; - optional int64 x = 2; - optional string y = 12; - optional bool z = 1; - } - option (NYT.default_field_flags) = SERIALIZATION_YT; - - optional TEmbeddedDefault EmbeddedDefault = 1; - optional TEmbeddedAsInProtoFile EmbeddedAsInProtoFile = 2; - optional TEmbeddedByFieldNumber EmbeddedByFieldNumber = 3; -} - -message TWithMap -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - - message TEmbedded { - optional int64 x = 1; - optional string y = 2; - } - - map<int64, TEmbedded> MapDefault = 1; - map<int64, TEmbedded> MapListOfStructsLegacy = 2 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS_LEGACY]; - map<int64, TEmbedded> MapListOfStructs = 3 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS]; - map<int64, TEmbedded> MapOptionalDict = 4 [(NYT.flags) = MAP_AS_OPTIONAL_DICT]; - map<int64, TEmbedded> MapDict = 5 [(NYT.flags) = MAP_AS_DICT]; -} - -message TWithOneof -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - - message TEmbedded - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - oneof Oneof { - int64 x = 1; - string y = 2; - } - } - - message TDefaultSeparateFields - { - option (NYT.default_oneof_flags) = SEPARATE_FIELDS; - option (NYT.default_field_flags) = SERIALIZATION_YT; - - optional string field = 1; - - oneof Oneof2 - { - option (NYT.variant_field_name) = "variant_field_name"; - option (NYT.oneof_flags) = VARIANT; - string y2 = 4; - TEmbedded z2 = 6; - int64 x2 = 2; - } - - oneof Oneof1 - { - int64 x1 = 10; - string y1 = 3; - TEmbedded z1 = 5; - } - } - - message TNoDefault - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - - optional string field = 1; - - oneof Oneof2 - { - string y2 = 4; - TEmbedded z2 = 6; - int64 x2 = 2; - } - - oneof Oneof1 - { - option (NYT.oneof_flags) = SEPARATE_FIELDS; - int64 x1 = 10; - string y1 = 3; - TEmbedded z1 = 5; - } - } - - message TSerializationProtobuf - { - oneof Oneof - { - int64 x1 = 2; - string y1 = 1; - TEmbedded z1 = 3; - } - } - - optional TDefaultSeparateFields DefaultSeparateFields = 1; - optional TNoDefault NoDefault = 2; - optional TSerializationProtobuf SerializationProtobuf = 3; - - oneof TopLevelOneof - { - int64 MemberOfTopLevelOneof = 4; - } -} - -message TEmbeddedStruct { - optional float float1 = 1; - optional string string1 = 2; -} - -message TEmbedded2Message { - option (NYT.default_field_flags) = SERIALIZATION_YT; - optional uint64 embedded2_num = 10; - optional TEmbeddedStruct embedded2_struct = 17; - repeated string embedded2_repeated = 42; -} - -message TEmbedded1Message { - option (NYT.default_field_flags) = SERIALIZATION_YT; - required TEmbedded2Message t2 = 1 [(NYT.flags) = EMBEDDED]; - oneof variant { - string str_variant = 101; - uint64 uint_variant = 102; - } - optional uint64 embedded_num = 10; // make intensional field_num collision! - optional string embedded_extra_field = 11; -} - -message TEmbeddingMessage { - optional bytes other_columns_field = 15 [(NYT.flags) = OTHER_COLUMNS]; - required TEmbedded1Message t1 = 2 [(NYT.flags) = EMBEDDED]; - optional uint64 num = 12; - optional string extra_field = 13; -} diff --git a/yt/cpp/mapreduce/interface/public.h b/yt/cpp/mapreduce/interface/public.h deleted file mode 100644 index bdeda78795..0000000000 --- a/yt/cpp/mapreduce/interface/public.h +++ /dev/null @@ -1,10 +0,0 @@ -#pragma once - -#include <memory> - -namespace NYT::NAuth { - -struct IServiceTicketAuthPtrWrapper; -using IServiceTicketAuthPtrWrapperPtr = std::shared_ptr<IServiceTicketAuthPtrWrapper>; - -} // namespace NYT::NAuth diff --git a/yt/cpp/mapreduce/interface/retry_policy.h b/yt/cpp/mapreduce/interface/retry_policy.h deleted file mode 100644 index c198839079..0000000000 --- a/yt/cpp/mapreduce/interface/retry_policy.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include <util/datetime/base.h> -#include <util/generic/ptr.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// A configuration that controls retries of a single request. -struct TRetryConfig -{ - /// - /// @brief How long retries of a single YT request can go on. - /// - /// If this limit is reached while retry count is not yet exceeded @ref TRequestRetriesTimeout exception is thrown. - TDuration RetriesTimeLimit = TDuration::Max(); -}; - -/// The library uses this class to understand how to retry individual requests. -class IRetryConfigProvider - : public virtual TThrRefBase -{ -public: - /// - /// @brief Gets retry policy for single request. - /// - /// CreateRetryConfig is called before ANY request. - /// Returned config controls retries of this request. - /// - /// Must be thread safe since it can be used from different threads - /// to perform internal library requests (e.g. pings). - /// - /// Some methods (e.g. IClient::Map) involve multiple requests to YT and therefore - /// this method will be called several times during execution of single method. - /// - /// If user needs to limit overall retries inside long operation they might create - /// retry policy that knows about overall deadline - /// @ref NYT::TRetryConfig::RetriesTimeLimit taking into account that overall deadline. - /// (E.g. when deadline reached it returns zero limit for retries). - virtual TRetryConfig CreateRetryConfig() = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT - diff --git a/yt/cpp/mapreduce/interface/serialize.cpp b/yt/cpp/mapreduce/interface/serialize.cpp deleted file mode 100644 index ae05d9f50d..0000000000 --- a/yt/cpp/mapreduce/interface/serialize.cpp +++ /dev/null @@ -1,553 +0,0 @@ -#include "serialize.h" - -#include "common.h" -#include "fluent.h" - -#include <library/cpp/yson/parser.h> -#include <library/cpp/yson/node/node_io.h> -#include <library/cpp/yson/node/serialize.h> - -#include <library/cpp/type_info/type_io.h> - -#include <util/generic/string.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -// const auto& nodeMap = node.AsMap(); -#define DESERIALIZE_ITEM(NAME, MEMBER) \ - if (const auto* item = nodeMap.FindPtr(NAME)) { \ - Deserialize(MEMBER, *item); \ - } - -// const auto& attributesMap = node.GetAttributes().AsMap(); -#define DESERIALIZE_ATTR(NAME, MEMBER) \ - if (const auto* attr = attributesMap.FindPtr(NAME)) { \ - Deserialize(MEMBER, *attr); \ - } - -//////////////////////////////////////////////////////////////////////////////// - -void Serialize(const TSortColumn& sortColumn, NYson::IYsonConsumer* consumer) -{ - if (sortColumn.SortOrder() == ESortOrder::SO_ASCENDING) { - Serialize(sortColumn.Name(), consumer); - } else { - BuildYsonFluently(consumer).BeginMap() - .Item("name").Value(sortColumn.Name()) - .Item("sort_order").Value(ToString(sortColumn.SortOrder())) - .EndMap(); - } -} - -void Deserialize(TSortColumn& sortColumn, const TNode& node) -{ - if (node.IsString()) { - sortColumn = TSortColumn(node.AsString()); - } else if (node.IsMap()) { - const auto& name = node["name"].AsString(); - const auto& sortOrderString = node["sort_order"].AsString(); - sortColumn = TSortColumn(name, ::FromString<ESortOrder>(sortOrderString)); - } else { - ythrow yexception() << "Expected sort column to be string or map, got " << node.GetType(); - } -} - -template <class T, class TDerived> -void SerializeOneOrMany(const TOneOrMany<T, TDerived>& oneOrMany, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).List(oneOrMany.Parts_); -} - -template <class T, class TDerived> -void DeserializeOneOrMany(TOneOrMany<T, TDerived>& oneOrMany, const TNode& node) -{ - Deserialize(oneOrMany.Parts_, node); -} - -void Serialize(const TKey& key, NYson::IYsonConsumer* consumer) -{ - SerializeOneOrMany(key, consumer); -} - -void Deserialize(TKey& key, const TNode& node) -{ - DeserializeOneOrMany(key, node); -} - -void Serialize(const TSortColumns& sortColumns, NYson::IYsonConsumer* consumer) -{ - SerializeOneOrMany(sortColumns, consumer); -} - -void Deserialize(TSortColumns& sortColumns, const TNode& node) -{ - DeserializeOneOrMany(sortColumns, node); -} - -void Serialize(const TColumnNames& columnNames, NYson::IYsonConsumer* consumer) -{ - SerializeOneOrMany(columnNames, consumer); -} - -void Deserialize(TColumnNames& columnNames, const TNode& node) -{ - DeserializeOneOrMany(columnNames, node); -} - -//////////////////////////////////////////////////////////////////////////////// - -void Deserialize(EValueType& valueType, const TNode& node) -{ - const auto& nodeStr = node.AsString(); - static const THashMap<TString, EValueType> str2ValueType = { - {"int8", VT_INT8}, - {"int16", VT_INT16}, - {"int32", VT_INT32}, - {"int64", VT_INT64}, - - {"uint8", VT_UINT8}, - {"uint16", VT_UINT16}, - {"uint32", VT_UINT32}, - {"uint64", VT_UINT64}, - - {"boolean", VT_BOOLEAN}, - {"double", VT_DOUBLE}, - - {"string", VT_STRING}, - {"utf8", VT_UTF8}, - - {"any", VT_ANY}, - - {"null", VT_NULL}, - {"void", VT_VOID}, - - {"date", VT_DATE}, - {"datetime", VT_DATETIME}, - {"timestamp", VT_TIMESTAMP}, - {"interval", VT_INTERVAL}, - {"float", VT_FLOAT}, - {"json", VT_JSON}, - }; - - auto it = str2ValueType.find(nodeStr); - if (it == str2ValueType.end()) { - ythrow yexception() << "Invalid value type '" << nodeStr << "'"; - } - - valueType = it->second; -} - -void Deserialize(ESortOrder& sortOrder, const TNode& node) -{ - sortOrder = FromString<ESortOrder>(node.AsString()); -} - -void Deserialize(EOptimizeForAttr& optimizeFor, const TNode& node) -{ - optimizeFor = FromString<EOptimizeForAttr>(node.AsString()); -} - -void Deserialize(EErasureCodecAttr& erasureCodec, const TNode& node) -{ - erasureCodec = FromString<EErasureCodecAttr>(node.AsString()); -} - -void Deserialize(ESchemaModificationAttr& schemaModification, const TNode& node) -{ - schemaModification = FromString<ESchemaModificationAttr>(node.AsString()); -} - -void Serialize(const TColumnSchema& columnSchema, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).BeginMap() - .Item("name").Value(columnSchema.Name()) - .DoIf(!columnSchema.RawTypeV3().Defined(), - [&] (TFluentMap fluent) { - fluent.Item("type").Value(NDetail::ToString(columnSchema.Type())); - fluent.Item("required").Value(columnSchema.Required()); - if (columnSchema.Type() == VT_ANY - && *columnSchema.TypeV3() != *NTi::Optional(NTi::Yson())) - { - // A lot of user canonize serialized schema. - // To be backward compatible we only set type_v3 for new types. - fluent.Item("type_v3").Value(columnSchema.TypeV3()); - } - } - ) - .DoIf(columnSchema.RawTypeV3().Defined(), [&] (TFluentMap fluent) { - const auto& rawTypeV3 = *columnSchema.RawTypeV3(); - fluent.Item("type_v3").Value(rawTypeV3); - - // We going set old fields `type` and `required` to be compatible - // with old clusters that doesn't support type_v3 yet. - - // if type is simple return its name otherwise return empty optional - auto isRequired = [](TStringBuf simpleType) { - return simpleType != "null" && simpleType != "void"; - }; - auto getSimple = [] (const TNode& typeV3) -> TMaybe<TString> { - static const THashMap<TString,TString> typeV3ToOld = { - {"bool", "boolean"}, - {"yson", "any"}, - }; - TMaybe<TString> result; - if (typeV3.IsString()) { - result = typeV3.AsString(); - } else if (typeV3.IsMap() && typeV3.Size() == 1) { - Y_VERIFY(typeV3["type_name"].IsString(), "invalid type is passed"); - result = typeV3["type_name"].AsString(); - } - if (result) { - auto it = typeV3ToOld.find(*result); - if (it != typeV3ToOld.end()) { - result = it->second; - } - } - return result; - }; - auto simplify = [&](const TNode& typeV3) -> TMaybe<std::pair<TString, bool>> { - auto simple = getSimple(typeV3); - if (simple) { - return std::make_pair(*simple, isRequired(*simple)); - } - if (typeV3.IsMap() && typeV3["type_name"] == "optional") { - auto simpleItem = getSimple(typeV3["item"]); - if (simpleItem && isRequired(*simpleItem)) { - return std::make_pair(*simpleItem, false); - } - } - return {}; - }; - - auto simplified = simplify(rawTypeV3); - - if (simplified) { - const auto& [simpleType, required] = *simplified; - fluent - .Item("type").Value(simpleType) - .Item("required").Value(required); - return; - } - }) - .DoIf(columnSchema.SortOrder().Defined(), [&] (TFluentMap fluent) { - fluent.Item("sort_order").Value(ToString(*columnSchema.SortOrder())); - }) - .DoIf(columnSchema.Lock().Defined(), [&] (TFluentMap fluent) { - fluent.Item("lock").Value(*columnSchema.Lock()); - }) - .DoIf(columnSchema.Expression().Defined(), [&] (TFluentMap fluent) { - fluent.Item("expression").Value(*columnSchema.Expression()); - }) - .DoIf(columnSchema.Aggregate().Defined(), [&] (TFluentMap fluent) { - fluent.Item("aggregate").Value(*columnSchema.Aggregate()); - }) - .DoIf(columnSchema.Group().Defined(), [&] (TFluentMap fluent) { - fluent.Item("group").Value(*columnSchema.Group()); - }) - .EndMap(); -} - -void Deserialize(TColumnSchema& columnSchema, const TNode& node) -{ - const auto& nodeMap = node.AsMap(); - DESERIALIZE_ITEM("name", columnSchema.Name_); - DESERIALIZE_ITEM("type_v3", columnSchema.RawTypeV3_); - DESERIALIZE_ITEM("sort_order", columnSchema.SortOrder_); - DESERIALIZE_ITEM("lock", columnSchema.Lock_); - DESERIALIZE_ITEM("expression", columnSchema.Expression_); - DESERIALIZE_ITEM("aggregate", columnSchema.Aggregate_); - DESERIALIZE_ITEM("group", columnSchema.Group_); - - if (nodeMap.contains("type_v3")) { - NTi::TTypePtr type; - DESERIALIZE_ITEM("type_v3", type); - columnSchema.Type(type); - } else { - EValueType oldType = VT_INT64; - bool required = false; - DESERIALIZE_ITEM("type", oldType); - DESERIALIZE_ITEM("required", required); - columnSchema.Type(ToTypeV3(oldType, required)); - } -} - -void Serialize(const TTableSchema& tableSchema, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).BeginAttributes() - .Item("strict").Value(tableSchema.Strict()) - .Item("unique_keys").Value(tableSchema.UniqueKeys()) - .EndAttributes() - .List(tableSchema.Columns()); -} - -void Deserialize(TTableSchema& tableSchema, const TNode& node) -{ - const auto& attributesMap = node.GetAttributes().AsMap(); - DESERIALIZE_ATTR("strict", tableSchema.Strict_); - DESERIALIZE_ATTR("unique_keys", tableSchema.UniqueKeys_); - Deserialize(tableSchema.Columns_, node); -} - -//////////////////////////////////////////////////////////////////////////////// - -void Serialize(const TKeyBound& keyBound, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).BeginList() - .Item().Value(ToString(keyBound.Relation())) - .Item().Value(keyBound.Key()) - .EndList(); -} - -void Deserialize(TKeyBound& keyBound, const TNode& node) -{ - const auto& nodeList = node.AsList(); - Y_ENSURE(nodeList.size() == 2); - - const auto& relationNode = nodeList[0]; - keyBound.Relation(::FromString<ERelation>(relationNode.AsString())); - - const auto& keyNode = nodeList[1]; - TKey key; - Deserialize(key, keyNode); - keyBound.Key(std::move(key)); -} - -//////////////////////////////////////////////////////////////////////////////// - -void Serialize(const TReadLimit& readLimit, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).BeginMap() - .DoIf(readLimit.KeyBound_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("key_bound").Value(*readLimit.KeyBound_); - }) - .DoIf(readLimit.Key_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("key").Value(*readLimit.Key_); - }) - .DoIf(readLimit.RowIndex_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("row_index").Value(*readLimit.RowIndex_); - }) - .DoIf(readLimit.Offset_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("offset").Value(*readLimit.Offset_); - }) - .DoIf(readLimit.TabletIndex_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("tablet_index").Value(*readLimit.TabletIndex_); - }) - .EndMap(); -} - -void Deserialize(TReadLimit& readLimit, const TNode& node) -{ - const auto& nodeMap = node.AsMap(); - DESERIALIZE_ITEM("key_bound", readLimit.KeyBound_); - DESERIALIZE_ITEM("key", readLimit.Key_); - DESERIALIZE_ITEM("row_index", readLimit.RowIndex_); - DESERIALIZE_ITEM("offset", readLimit.Offset_); - DESERIALIZE_ITEM("tablet_index", readLimit.TabletIndex_); -} - -void Serialize(const TReadRange& readRange, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).BeginMap() - .DoIf(!IsTrivial(readRange.LowerLimit_), [&] (TFluentMap fluent) { - fluent.Item("lower_limit").Value(readRange.LowerLimit_); - }) - .DoIf(!IsTrivial(readRange.UpperLimit_), [&] (TFluentMap fluent) { - fluent.Item("upper_limit").Value(readRange.UpperLimit_); - }) - .DoIf(!IsTrivial(readRange.Exact_), [&] (TFluentMap fluent) { - fluent.Item("exact").Value(readRange.Exact_); - }) - .EndMap(); -} - -void Deserialize(TReadRange& readRange, const TNode& node) -{ - const auto& nodeMap = node.AsMap(); - DESERIALIZE_ITEM("lower_limit", readRange.LowerLimit_); - DESERIALIZE_ITEM("upper_limit", readRange.UpperLimit_); - DESERIALIZE_ITEM("exact", readRange.Exact_); -} - -void Serialize(const THashMap<TString, TString>& renameColumns, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer) - .DoMapFor(renameColumns, [] (TFluentMap fluent, const auto& item) { - fluent.Item(item.first).Value(item.second); - }); -} - -void Serialize(const TRichYPath& path, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).BeginAttributes() - .DoIf(path.GetRanges().Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("ranges").List(*path.GetRanges()); - }) - .DoIf(path.Columns_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("columns").Value(*path.Columns_); - }) - .DoIf(path.Append_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("append").Value(*path.Append_); - }) - .DoIf(path.PartiallySorted_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("partially_sorted").Value(*path.PartiallySorted_); - }) - .DoIf(!path.SortedBy_.Parts_.empty(), [&] (TFluentAttributes fluent) { - fluent.Item("sorted_by").Value(path.SortedBy_); - }) - .DoIf(path.Teleport_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("teleport").Value(*path.Teleport_); - }) - .DoIf(path.Primary_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("primary").Value(*path.Primary_); - }) - .DoIf(path.Foreign_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("foreign").Value(*path.Foreign_); - }) - .DoIf(path.RowCountLimit_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("row_count_limit").Value(*path.RowCountLimit_); - }) - .DoIf(path.FileName_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("file_name").Value(*path.FileName_); - }) - .DoIf(path.OriginalPath_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("original_path").Value(*path.OriginalPath_); - }) - .DoIf(path.Executable_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("executable").Value(*path.Executable_); - }) - .DoIf(path.Format_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("format").Value(*path.Format_); - }) - .DoIf(path.Schema_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("schema").Value(*path.Schema_); - }) - .DoIf(path.Timestamp_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("timestamp").Value(*path.Timestamp_); - }) - .DoIf(path.CompressionCodec_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("compression_codec").Value(*path.CompressionCodec_); - }) - .DoIf(path.ErasureCodec_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("erasure_codec").Value(ToString(*path.ErasureCodec_)); - }) - .DoIf(path.SchemaModification_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("schema_modification").Value(ToString(*path.SchemaModification_)); - }) - .DoIf(path.OptimizeFor_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("optimize_for").Value(ToString(*path.OptimizeFor_)); - }) - .DoIf(path.TransactionId_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("transaction_id").Value(GetGuidAsString(*path.TransactionId_)); - }) - .DoIf(path.RenameColumns_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("rename_columns").Value(*path.RenameColumns_); - }) - .DoIf(path.BypassArtifactCache_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("bypass_artifact_cache").Value(*path.BypassArtifactCache_); - }) - .EndAttributes() - .Value(path.Path_); -} - -void Deserialize(TRichYPath& path, const TNode& node) -{ - path = {}; - - const auto& attributesMap = node.GetAttributes().AsMap(); - DESERIALIZE_ATTR("ranges", path.MutableRanges()); - DESERIALIZE_ATTR("columns", path.Columns_); - DESERIALIZE_ATTR("append", path.Append_); - DESERIALIZE_ATTR("partially_sorted", path.PartiallySorted_); - DESERIALIZE_ATTR("sorted_by", path.SortedBy_); - DESERIALIZE_ATTR("teleport", path.Teleport_); - DESERIALIZE_ATTR("primary", path.Primary_); - DESERIALIZE_ATTR("foreign", path.Foreign_); - DESERIALIZE_ATTR("row_count_limit", path.RowCountLimit_); - DESERIALIZE_ATTR("file_name", path.FileName_); - DESERIALIZE_ATTR("original_path", path.OriginalPath_); - DESERIALIZE_ATTR("executable", path.Executable_); - DESERIALIZE_ATTR("format", path.Format_); - DESERIALIZE_ATTR("schema", path.Schema_); - DESERIALIZE_ATTR("timestamp", path.Timestamp_); - DESERIALIZE_ATTR("compression_codec", path.CompressionCodec_); - DESERIALIZE_ATTR("erasure_codec", path.ErasureCodec_); - DESERIALIZE_ATTR("schema_modification", path.SchemaModification_); - DESERIALIZE_ATTR("optimize_for", path.OptimizeFor_); - DESERIALIZE_ATTR("transaction_id", path.TransactionId_); - DESERIALIZE_ATTR("rename_columns", path.RenameColumns_); - DESERIALIZE_ATTR("bypass_artifact_cache", path.BypassArtifactCache_); - Deserialize(path.Path_, node); -} - -void Serialize(const TAttributeFilter& filter, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).List(filter.Attributes_); -} - -void Deserialize(TTableColumnarStatistics& statistics, const TNode& node) -{ - const auto& nodeMap = node.AsMap(); - DESERIALIZE_ITEM("column_data_weights", statistics.ColumnDataWeight); - DESERIALIZE_ITEM("legacy_chunks_data_weight", statistics.LegacyChunksDataWeight); - DESERIALIZE_ITEM("timestamp_total_weight", statistics.TimestampTotalWeight); -} - -void Deserialize(TMultiTablePartition::TStatistics& statistics, const TNode& node) -{ - const auto& nodeMap = node.AsMap(); - DESERIALIZE_ITEM("chunk_count", statistics.ChunkCount); - DESERIALIZE_ITEM("data_weight", statistics.DataWeight); - DESERIALIZE_ITEM("row_count", statistics.RowCount); -} - -void Deserialize(TMultiTablePartition& partition, const TNode& node) -{ - const auto& nodeMap = node.AsMap(); - DESERIALIZE_ITEM("table_ranges", partition.TableRanges); - DESERIALIZE_ITEM("aggregate_statistics", partition.AggregateStatistics); -} - -void Deserialize(TMultiTablePartitions& partitions, const TNode& node) -{ - const auto& nodeMap = node.AsMap(); - DESERIALIZE_ITEM("partitions", partitions.Partitions); -} - -void Serialize(const TGUID& value, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).Value(GetGuidAsString(value)); -} - -void Deserialize(TGUID& value, const TNode& node) -{ - value = GetGuid(node.AsString()); -} - -void Deserialize(TTabletInfo& value, const TNode& node) -{ - auto nodeMap = node.AsMap(); - DESERIALIZE_ITEM("total_row_count", value.TotalRowCount) - DESERIALIZE_ITEM("trimmed_row_count", value.TrimmedRowCount) - DESERIALIZE_ITEM("barrier_timestamp", value.BarrierTimestamp) -} - -void Serialize(const NTi::TTypePtr& type, NYson::IYsonConsumer* consumer) -{ - auto yson = NTi::NIo::SerializeYson(type.Get()); - ::NYson::ParseYsonStringBuffer(yson, consumer); -} - -void Deserialize(NTi::TTypePtr& type, const TNode& node) -{ - auto yson = NodeToYsonString(node, NYson::EYsonFormat::Binary); - type = NTi::NIo::DeserializeYson(*NTi::HeapFactory(), yson); -} - -#undef DESERIALIZE_ITEM -#undef DESERIALIZE_ATTR - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/serialize.h b/yt/cpp/mapreduce/interface/serialize.h deleted file mode 100644 index 223dd446ba..0000000000 --- a/yt/cpp/mapreduce/interface/serialize.h +++ /dev/null @@ -1,90 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/serialize.h -/// -/// Header containing declaration of functions for serializing to/from YSON. - -#include "common.h" - -#include <library/cpp/type_info/fwd.h> - -namespace NYT::NYson { -struct IYsonConsumer; -} // namespace NYT::NYson - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -void Deserialize(TMaybe<T>& value, const TNode& node) -{ - value.ConstructInPlace(); - Deserialize(value.GetRef(), node); -} - -template <class T> -void Deserialize(TVector<T>& value, const TNode& node) -{ - for (const auto& element : node.AsList()) { - value.emplace_back(); - Deserialize(value.back(), element); - } -} - -template <class T> -void Deserialize(THashMap<TString, T>& value, const TNode& node) -{ - for (const auto& item : node.AsMap()) { - Deserialize(value[item.first], item.second); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -void Serialize(const TKey& key, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TKey& key, const TNode& node); - -void Serialize(const TSortColumns& sortColumns, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TSortColumns& sortColumns, const TNode& node); - -void Serialize(const TColumnNames& columnNames, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TColumnNames& columnNames, const TNode& node); - -void Serialize(const TSortColumn& sortColumn, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TSortColumn& sortColumn, const TNode& node); - -void Serialize(const TKeyBound& keyBound, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TKeyBound& keyBound, const TNode& node); - -void Serialize(const TReadLimit& readLimit, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TReadLimit& readLimit, const TNode& node); - -void Serialize(const TReadRange& readRange, NYT::NYson::IYsonConsumer* consumer); - -void Serialize(const TRichYPath& path, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TRichYPath& path, const TNode& node); - -void Serialize(const TAttributeFilter& filter, NYT::NYson::IYsonConsumer* consumer); - -void Serialize(const TColumnSchema& columnSchema, NYT::NYson::IYsonConsumer* consumer); -void Serialize(const TTableSchema& tableSchema, NYT::NYson::IYsonConsumer* consumer); - -void Deserialize(EValueType& valueType, const TNode& node); -void Deserialize(TTableSchema& tableSchema, const TNode& node); -void Deserialize(TColumnSchema& columnSchema, const TNode& node); -void Deserialize(TTableColumnarStatistics& statistics, const TNode& node); -void Deserialize(TMultiTablePartition& partition, const TNode& node); -void Deserialize(TMultiTablePartitions& partitions, const TNode& node); -void Deserialize(TTabletInfo& tabletInfos, const TNode& node); - -void Serialize(const TGUID& path, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TGUID& value, const TNode& node); - -void Serialize(const NTi::TTypePtr& type, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(NTi::TTypePtr& type, const TNode& node); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/serialize_ut.cpp b/yt/cpp/mapreduce/interface/serialize_ut.cpp deleted file mode 100644 index 59d4501ee8..0000000000 --- a/yt/cpp/mapreduce/interface/serialize_ut.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#include <yt/cpp/mapreduce/interface/serialize.h> -#include <yt/cpp/mapreduce/interface/common.h> - -#include <library/cpp/yson/node/node_builder.h> - -#include <library/cpp/testing/unittest/registar.h> - -#include <util/generic/serialized_enum.h> - -using namespace NYT; - -Y_UNIT_TEST_SUITE(Serialization) -{ - Y_UNIT_TEST(TableSchema) - { - auto schema = TTableSchema() - .AddColumn(TColumnSchema().Name("a").Type(EValueType::VT_STRING).SortOrder(SO_ASCENDING)) - .AddColumn(TColumnSchema().Name("b").Type(EValueType::VT_UINT64)) - .AddColumn(TColumnSchema().Name("c").Type(EValueType::VT_INT64, true)); - - auto schemaNode = schema.ToNode(); - UNIT_ASSERT(schemaNode.IsList()); - UNIT_ASSERT_VALUES_EQUAL(schemaNode.Size(), 3); - - - UNIT_ASSERT_VALUES_EQUAL(schemaNode[0]["name"], "a"); - UNIT_ASSERT_VALUES_EQUAL(schemaNode[0]["type"], "string"); - UNIT_ASSERT_VALUES_EQUAL(schemaNode[0]["required"], false); - UNIT_ASSERT_VALUES_EQUAL(schemaNode[0]["sort_order"], "ascending"); - - UNIT_ASSERT_VALUES_EQUAL(schemaNode[1]["name"], "b"); - UNIT_ASSERT_VALUES_EQUAL(schemaNode[1]["type"], "uint64"); - UNIT_ASSERT_VALUES_EQUAL(schemaNode[1]["required"], false); - - UNIT_ASSERT_VALUES_EQUAL(schemaNode[2]["name"], "c"); - UNIT_ASSERT_VALUES_EQUAL(schemaNode[2]["type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(schemaNode[2]["required"], true); - } - - Y_UNIT_TEST(ValueTypeSerialization) - { - for (const auto value : GetEnumAllValues<EValueType>()) { - TNode serialized = NYT::NDetail::ToString(value); - EValueType deserialized; - Deserialize(deserialized, serialized); - UNIT_ASSERT_VALUES_EQUAL(value, deserialized); - } - } -} diff --git a/yt/cpp/mapreduce/interface/skiff_row.cpp b/yt/cpp/mapreduce/interface/skiff_row.cpp deleted file mode 100644 index 7838bdaee9..0000000000 --- a/yt/cpp/mapreduce/interface/skiff_row.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "skiff_row.h" diff --git a/yt/cpp/mapreduce/interface/skiff_row.h b/yt/cpp/mapreduce/interface/skiff_row.h deleted file mode 100644 index 5dd335cb65..0000000000 --- a/yt/cpp/mapreduce/interface/skiff_row.h +++ /dev/null @@ -1,127 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/skiff_row.h -/// Header containing interfaces that you need to define for using TSkiffRowTableReader -/// What you need to do for your struct type TMyType: -/// 1. Write `true` specialization TIsSkiffRow<TMyType>; -/// 2. Write specialization GetSkiffSchema<TMyType>(); -/// 3. Write your own parser derived from ISkiffRowParser and write specialization GetSkiffParser<TMyType>() which returns this parser. - -#include "fwd.h" - -#include <yt/cpp/mapreduce/skiff/skiff_schema.h> - -#include <yt/cpp/mapreduce/interface/format.h> - -#include <library/cpp/skiff/skiff.h> - -#include <util/generic/maybe.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -//! Need to write `true_type` specialization for your row type `T`. -/// And implement two functions: `GetSkiffSchema` and `CreateSkiffParser`. -/// -/// Example: -/// -/// template <> -/// struct TIsSkiffRow<T> -/// : std::true_type -/// { }; -/// -template<class T> -struct TIsSkiffRow - : std::false_type -{ }; - -//////////////////////////////////////////////////////////////////////////////// - -//! Return skiff schema for row type `T`. -/// Need to write its specialization. -template <typename T> -NSkiff::TSkiffSchemaPtr GetSkiffSchema(const TMaybe<TSkiffRowHints>& /*hints*/) -{ - static_assert(TDependentFalse<T>, "Unimplemented `GetSkiffSchema` method"); -} - -//////////////////////////////////////////////////////////////////////////////// - -//! Allow to parse rows as user's structs from stream (TCheckedInDebugSkiffParser). -/// Need to write derived class for your own row type. -/// -/// Example: -/// -/// class TMySkiffRowParser : public ISkiffRowParser -/// { -/// public: -/// TMySkiffRowParser(TMySkiffRow* row) -/// : Row_(row) -/// {} -/// -/// void Parse(NSkiff::TCheckedInDebugSkiffParser* parser) -/// . { -/// Row_->SomeInt64Field = parser->ParseInt64(); -/// } -/// -/// private: -/// TMySkiffRow* Row_; -/// } -/// -class ISkiffRowParser - : public TThrRefBase -{ -public: - //! Read one row from parser - virtual void Parse(NSkiff::TCheckedInDebugSkiffParser* /*parser*/) = 0; -}; - -//! Creates a parser for row type `T`. -template <typename T> -ISkiffRowParserPtr CreateSkiffParser(T* /*row*/, const TMaybe<TSkiffRowHints>& /*hints*/) -{ - static_assert(TDependentFalse<T>, "Unimplemented `CreateSkiffParser` function"); -} - -//////////////////////////////////////////////////////////////////////////////// - -//! Allow to skip row content without getting row. -/// By default row will be parsed using your parser derived from ISkiffRowParser. -/// If you want, you can write more optimal skipper, but it isn't required. -class ISkiffRowSkipper - : public TThrRefBase -{ -public: - virtual void SkipRow(NSkiff::TCheckedInDebugSkiffParser* /*parser*/) = 0; -}; - -//! Default ISkiffRowSkipper implementation. -template <typename T> -class TSkiffRowSkipper : public ISkiffRowSkipper { -public: - explicit TSkiffRowSkipper(const TMaybe<TSkiffRowHints>& hints) - : Parser_(CreateSkiffParser<T>(&Row_, hints)) - { } - - void SkipRow(NSkiff::TCheckedInDebugSkiffParser* parser) { - Parser_->Parse(parser); - } - -private: - T Row_; - ISkiffRowParserPtr Parser_; -}; - -//! Creates a skipper for row type 'T'. -/// You don't need to write its specialization. -template <typename T> -ISkiffRowSkipperPtr CreateSkiffSkipper(const TMaybe<TSkiffRowHints>& hints) -{ - return ::MakeIntrusive<TSkiffRowSkipper<T>>(hints); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/tvm.cpp b/yt/cpp/mapreduce/interface/tvm.cpp deleted file mode 100644 index bfa3f0304e..0000000000 --- a/yt/cpp/mapreduce/interface/tvm.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "tvm.h" diff --git a/yt/cpp/mapreduce/interface/tvm.h b/yt/cpp/mapreduce/interface/tvm.h deleted file mode 100644 index d8d16d841b..0000000000 --- a/yt/cpp/mapreduce/interface/tvm.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once - -#include <yt/yt/library/tvm/tvm_base.h> - -#include <library/cpp/yt/memory/intrusive_ptr.h> - -namespace NYT::NAuth { - -//////////////////////////////////////////////////////////////////////////////// - -/// This wrapper is required because NYT::NAuth::IServiceTicketAuthPtr is NYT::TIntrusivePtr, -/// and, if we used this pointer in interfaces of `mapreduce/yt` client, a lot of users of this library -/// could get unexpected build errors that `TIntrusivePtr` is ambigious -/// (from `::` namespace and from `::NYT::` namespace). -/// So we use this wrapper in our interfaces to avoid such problems for users. -struct IServiceTicketAuthPtrWrapper -{ - // - /// Construct wrapper from NYT::TIntrusivePtr - /// - /// This constructor is implicit so users can transparently pass NYT::TIntrusivePtr to the functions of - /// mapreduce/yt client. - template <class T, class = typename std::enable_if_t<std::is_convertible_v<T*, IServiceTicketAuth*>>> - IServiceTicketAuthPtrWrapper(const TIntrusivePtr<T> ptr) - : Ptr(ptr) - { - } - - /// Wrapped pointer - NYT::TIntrusivePtr<IServiceTicketAuth> Ptr; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NAuth diff --git a/yt/cpp/mapreduce/interface/ut/ya.make b/yt/cpp/mapreduce/interface/ut/ya.make deleted file mode 100644 index 0219e6430c..0000000000 --- a/yt/cpp/mapreduce/interface/ut/ya.make +++ /dev/null @@ -1,25 +0,0 @@ -UNITTEST_FOR(yt/cpp/mapreduce/interface) - -SRCS( - common_ut.cpp - config_ut.cpp - error_ut.cpp - format_ut.cpp - job_counters_ut.cpp - job_statistics_ut.cpp - operation_ut.cpp - proto3_ut.proto - protobuf_table_schema_ut.cpp - protobuf_file_options_ut.cpp - protobuf_table_schema_ut.proto - protobuf_file_options_ut.proto - serialize_ut.cpp -) - -PEERDIR( - contrib/libs/protobuf - library/cpp/testing/unittest - yt/yt_proto/yt/formats -) - -END() diff --git a/yt/cpp/mapreduce/interface/wait_proxy.h b/yt/cpp/mapreduce/interface/wait_proxy.h deleted file mode 100644 index f7d8e0638e..0000000000 --- a/yt/cpp/mapreduce/interface/wait_proxy.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/serialize.h -/// -/// Header containing interface to enable customizable waiting. - -#include <yt/cpp/mapreduce/interface/common.h> - -#include <util/datetime/base.h> - -namespace NThreading { -template <typename T> -class TFuture; -} - -class TSystemEvent; -class TCondVar; -class TMutex; - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Interface to facilitate customizable waiting. -/// -/// All the waiting functions in the library are obliged to use the methods of a wait proxy instead of direct function calls. -class IWaitProxy - : public TThrRefBase -{ -public: - virtual ~IWaitProxy() = default; - - /// - /// @brief Wait for the future setting with timeout. - virtual bool WaitFuture(const ::NThreading::TFuture<void>& future, TDuration timeout) = 0; - - /// - /// @brief Wait for a system event with timeout. - virtual bool WaitEvent(TSystemEvent& event, TDuration timeout) = 0; - - /// - /// @brief Wait for the notification on the condition variable with timeout. - virtual bool WaitCondVar(TCondVar& condVar, TMutex& mutex, TDuration timeout) = 0; - - /// - /// @brief Sleep in the current thread for (approximately) specified amount of time. - virtual void Sleep(TDuration timeout) = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/ya.make b/yt/cpp/mapreduce/interface/ya.make deleted file mode 100644 index 0e94f14633..0000000000 --- a/yt/cpp/mapreduce/interface/ya.make +++ /dev/null @@ -1,46 +0,0 @@ -LIBRARY() - -INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc) - -SRCS( - batch_request.cpp - client.cpp - client_method_options.cpp - common.cpp - config.cpp - cypress.cpp - errors.cpp - format.cpp - job_counters.cpp - job_statistics.cpp - io.cpp - operation.cpp - protobuf_format.cpp - serialize.cpp - skiff_row.cpp - tvm.cpp -) - -PEERDIR( - contrib/libs/protobuf - library/cpp/type_info - library/cpp/threading/future - library/cpp/yson/node - yt/cpp/mapreduce/interface/logging - yt/yt_proto/yt/formats - yt/yt/library/tvm -) - -GENERATE_ENUM_SERIALIZATION(client_method_options.h) -GENERATE_ENUM_SERIALIZATION(client.h) -GENERATE_ENUM_SERIALIZATION(common.h) -GENERATE_ENUM_SERIALIZATION(config.h) -GENERATE_ENUM_SERIALIZATION(cypress.h) -GENERATE_ENUM_SERIALIZATION(job_counters.h) -GENERATE_ENUM_SERIALIZATION(job_statistics.h) -GENERATE_ENUM_SERIALIZATION(operation.h) -GENERATE_ENUM_SERIALIZATION(protobuf_format.h) - -END() - -RECURSE_FOR_TESTS(ut) diff --git a/yt/cpp/mapreduce/io/counting_raw_reader.cpp b/yt/cpp/mapreduce/io/counting_raw_reader.cpp deleted file mode 100644 index 6a918bdddb..0000000000 --- a/yt/cpp/mapreduce/io/counting_raw_reader.cpp +++ /dev/null @@ -1,38 +0,0 @@ -#include "counting_raw_reader.h" - -namespace NYT { -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -bool TCountingRawTableReader::Retry(const TMaybe<ui32>& rangeIndex, const TMaybe<ui64>& rowIndex) -{ - return Reader_->Retry(rangeIndex, rowIndex); -} - -void TCountingRawTableReader::ResetRetries() -{ - Reader_->ResetRetries(); -} - -bool TCountingRawTableReader::HasRangeIndices() const -{ - return Reader_->HasRangeIndices(); -} - -size_t TCountingRawTableReader::GetReadByteCount() const -{ - return ReadByteCount_; -} - -size_t TCountingRawTableReader::DoRead(void* buf, size_t len) -{ - auto readLen = Reader_->Read(buf, len); - ReadByteCount_ += readLen; - return readLen; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/counting_raw_reader.h b/yt/cpp/mapreduce/io/counting_raw_reader.h deleted file mode 100644 index 3b6705c5e4..0000000000 --- a/yt/cpp/mapreduce/io/counting_raw_reader.h +++ /dev/null @@ -1,31 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/io.h> - -namespace NYT { -namespace NDetail { - -class TCountingRawTableReader - final : public TRawTableReader -{ -public: - TCountingRawTableReader(::TIntrusivePtr<TRawTableReader> reader) - : Reader_(std::move(reader)) - { } - - bool Retry(const TMaybe<ui32>& rangeIndex, const TMaybe<ui64>& rowIndex) override; - void ResetRetries() override; - bool HasRangeIndices() const override; - - size_t GetReadByteCount() const; - -protected: - size_t DoRead(void* buf, size_t len) override; - -private: - ::TIntrusivePtr<TRawTableReader> Reader_; - size_t ReadByteCount_ = 0; -}; - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/helpers.h b/yt/cpp/mapreduce/io/helpers.h deleted file mode 100644 index 5dbbf20906..0000000000 --- a/yt/cpp/mapreduce/io/helpers.h +++ /dev/null @@ -1,130 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/io.h> -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/common/helpers.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -template <class TOptions> -struct TIOOptionsTraits; - -template <> -struct TIOOptionsTraits<TFileReaderOptions> -{ - static constexpr const char* const ConfigName = "file_reader"; -}; -template <> -struct TIOOptionsTraits<TFileWriterOptions> -{ - static constexpr const char* const ConfigName = "file_writer"; -}; -template <> -struct TIOOptionsTraits<TTableReaderOptions> -{ - static constexpr const char* const ConfigName = "table_reader"; -}; -template <> -struct TIOOptionsTraits<TTableWriterOptions> -{ - static constexpr const char* const ConfigName = "table_writer"; -}; - -template <class TOptions> -TNode FormIORequestParameters( - const TRichYPath& path, - const TOptions& options) -{ - auto params = PathToParamNode(path); - if (options.Config_) { - params[TIOOptionsTraits<TOptions>::ConfigName] = *options.Config_; - } - return params; -} - -template <> -inline TNode FormIORequestParameters( - const TRichYPath& path, - const TFileReaderOptions& options) -{ - auto params = PathToParamNode(path); - if (options.Config_) { - params[TIOOptionsTraits<TTableReaderOptions>::ConfigName] = *options.Config_; - } - if (options.Offset_) { - params["offset"] = *options.Offset_; - } - if (options.Length_) { - params["length"] = *options.Length_; - } - return params; -} - -static void AddWriterOptionsToNode(const TWriterOptions& options, TNode* node) -{ - if (options.EnableEarlyFinish_) { - (*node)["enable_early_finish"] = *options.EnableEarlyFinish_; - } - if (options.UploadReplicationFactor_) { - (*node)["upload_replication_factor"] = *options.UploadReplicationFactor_; - } - if (options.MinUploadReplicationFactor_) { - (*node)["min_upload_replication_factor"] = *options.MinUploadReplicationFactor_; - } - if (options.DesiredChunkSize_) { - (*node)["desired_chunk_size"] = *options.DesiredChunkSize_; - } -} - -template <> -inline TNode FormIORequestParameters( - const TRichYPath& path, - const TFileWriterOptions& options) -{ - auto params = PathToParamNode(path); - TNode fileWriter = TNode::CreateMap(); - if (options.Config_) { - fileWriter = *options.Config_; - } - if (options.WriterOptions_) { - AddWriterOptionsToNode(*options.WriterOptions_, &fileWriter); - } - if (fileWriter.Empty()) { - AddWriterOptionsToNode( - TWriterOptions() - .EnableEarlyFinish(true) - .UploadReplicationFactor(3) - .MinUploadReplicationFactor(2), - &fileWriter); - } - params[TIOOptionsTraits<TFileWriterOptions>::ConfigName] = fileWriter; - if (options.ComputeMD5_) { - params["compute_md5"] = *options.ComputeMD5_; - } - return params; -} - -template <> -inline TNode FormIORequestParameters( - const TRichYPath& path, - const TTableWriterOptions& options) -{ - auto params = PathToParamNode(path); - auto tableWriter = TConfig::Get()->TableWriter; - if (options.Config_) { - MergeNodes(tableWriter, *options.Config_); - } - if (options.WriterOptions_) { - AddWriterOptionsToNode(*options.WriterOptions_, &tableWriter); - } - if (!tableWriter.Empty()) { - params[TIOOptionsTraits<TTableWriterOptions>::ConfigName] = std::move(tableWriter); - } - return params; -} - -//////////////////////////////////////////////////////////////////////////////// - -} diff --git a/yt/cpp/mapreduce/io/job_reader.cpp b/yt/cpp/mapreduce/io/job_reader.cpp deleted file mode 100644 index 39056f00e2..0000000000 --- a/yt/cpp/mapreduce/io/job_reader.cpp +++ /dev/null @@ -1,46 +0,0 @@ -#include "job_reader.h" - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TJobReader::TJobReader(int fd) - : TJobReader(Duplicate(fd)) -{ } - -TJobReader::TJobReader(const TFile& file) - : FdFile_(file) - , FdInput_(FdFile_) - , BufferedInput_(&FdInput_, BUFFER_SIZE) -{ } - -bool TJobReader::Retry(const TMaybe<ui32>& /*rangeIndex*/, const TMaybe<ui64>& /*rowIndex*/) -{ - return false; -} - -void TJobReader::ResetRetries() -{ } - -bool TJobReader::HasRangeIndices() const -{ - return true; -} - -size_t TJobReader::DoRead(void* buf, size_t len) -{ - return BufferedInput_.Read(buf, len); -} - -//////////////////////////////////////////////////////////////////////////////// - -TRawTableReaderPtr CreateRawJobReader(int fd) -{ - return ::MakeIntrusive<TJobReader>(fd); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/job_reader.h b/yt/cpp/mapreduce/io/job_reader.h deleted file mode 100644 index ce62ec180f..0000000000 --- a/yt/cpp/mapreduce/io/job_reader.h +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/io.h> - -#include <util/stream/buffered.h> -#include <util/stream/file.h> -#include <util/system/file.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TJobReader - : public TRawTableReader -{ -public: - explicit TJobReader(int fd); - explicit TJobReader(const TFile& file); - - virtual bool Retry( const TMaybe<ui32>& /*rangeIndex*/, const TMaybe<ui64>& /*rowIndex*/) override; - virtual void ResetRetries() override; - virtual bool HasRangeIndices() const override; - -protected: - size_t DoRead(void* buf, size_t len) override; - -private: - TFile FdFile_; - TUnbufferedFileInput FdInput_; - TBufferedInput BufferedInput_; - - static const size_t BUFFER_SIZE = 64 << 10; -}; - -//////////////////////////////////////////////////////////////////////////////// - - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/job_writer.cpp b/yt/cpp/mapreduce/io/job_writer.cpp deleted file mode 100644 index d08bb0a665..0000000000 --- a/yt/cpp/mapreduce/io/job_writer.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include "job_writer.h" - -#include <yt/cpp/mapreduce/interface/io.h> - -#include <util/system/file.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TJobWriter::TStream::TStream(int fd) - : TStream(Duplicate(fd)) -{ } - -TJobWriter::TStream::TStream(const TFile& file) - : FdFile(file) - , FdOutput(FdFile) - , BufferedOutput(&FdOutput, BUFFER_SIZE) -{ } - -TJobWriter::TStream::~TStream() -{ -} - -//////////////////////////////////////////////////////////////////////////////// - -TJobWriter::TJobWriter(size_t outputTableCount) -{ - for (size_t i = 0; i < outputTableCount; ++i) { - Streams_.emplace_back(MakeHolder<TStream>(int(i * 3 + 1))); - } -} - -TJobWriter::TJobWriter(const TVector<TFile>& fileList) -{ - for (const auto& f : fileList) { - Streams_.emplace_back(MakeHolder<TStream>(f)); - } -} - -size_t TJobWriter::GetStreamCount() const -{ - return Streams_.size(); -} - -IOutputStream* TJobWriter::GetStream(size_t tableIndex) const -{ - if (tableIndex >= Streams_.size()) { - ythrow TIOException() << - "Table index " << tableIndex << - " is out of range [0, " << Streams_.size() << ")"; - } - return &Streams_[tableIndex]->BufferedOutput; -} - -void TJobWriter::OnRowFinished(size_t) -{ } - -//////////////////////////////////////////////////////////////////////////////// - -THolder<IProxyOutput> CreateRawJobWriter(size_t outputTableCount) -{ - return ::MakeHolder<TJobWriter>(outputTableCount); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/job_writer.h b/yt/cpp/mapreduce/io/job_writer.h deleted file mode 100644 index 9b24650640..0000000000 --- a/yt/cpp/mapreduce/io/job_writer.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/io.h> - -#include <util/generic/vector.h> -#include <util/generic/ptr.h> -#include <util/stream/file.h> -#include <util/stream/buffered.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TJobWriter - : public IProxyOutput -{ -public: - explicit TJobWriter(size_t outputTableCount); - explicit TJobWriter(const TVector<TFile>& fileList); - - size_t GetStreamCount() const override; - IOutputStream* GetStream(size_t tableIndex) const override; - void OnRowFinished(size_t tableIndex) override; - -private: - struct TStream { - TFile FdFile; - TUnbufferedFileOutput FdOutput; - TBufferedOutput BufferedOutput; - - explicit TStream(int fd); - explicit TStream(const TFile& file); - ~TStream(); - - static const size_t BUFFER_SIZE = 1 << 20; - }; - - TVector<THolder<TStream>> Streams_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/lenval_table_reader.cpp b/yt/cpp/mapreduce/io/lenval_table_reader.cpp deleted file mode 100644 index 98274c7996..0000000000 --- a/yt/cpp/mapreduce/io/lenval_table_reader.cpp +++ /dev/null @@ -1,198 +0,0 @@ -#include "lenval_table_reader.h" - -#include <yt/cpp/mapreduce/common/helpers.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <util/string/printf.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -const i32 CONTROL_ATTR_TABLE_INDEX = -1; -const i32 CONTROL_ATTR_KEY_SWITCH = -2; -const i32 CONTROL_ATTR_RANGE_INDEX = -3; -const i32 CONTROL_ATTR_ROW_INDEX = -4; -const i32 CONTROL_ATTR_END_OF_STREAM = -5; -const i32 CONTROL_ATTR_TABLET_INDEX = -6; - -//////////////////////////////////////////////////////////////////////////////// - -TLenvalTableReader::TLenvalTableReader(::TIntrusivePtr<TRawTableReader> input) - : Input_(std::move(input)) -{ - TLenvalTableReader::Next(); -} - -TLenvalTableReader::~TLenvalTableReader() -{ } - -void TLenvalTableReader::CheckValidity() const -{ - if (!IsValid()) { - ythrow yexception() << "Iterator is not valid"; - } -} - -bool TLenvalTableReader::IsValid() const -{ - return Valid_; -} - -void TLenvalTableReader::Next() -{ - if (!RowTaken_) { - SkipRow(); - } - - CheckValidity(); - - if (RowIndex_) { - ++*RowIndex_; - } - - while (true) { - try { - i32 value = 0; - if (!ReadInteger(&value, true)) { - return; - } - - while (value < 0 && !IsEndOfStream_) { - switch (value) { - case CONTROL_ATTR_KEY_SWITCH: - if (!AtStart_) { - Valid_ = false; - return; - } else { - ReadInteger(&value); - } - break; - - case CONTROL_ATTR_TABLE_INDEX: { - ui32 tmp = 0; - ReadInteger(&tmp); - TableIndex_ = tmp; - ReadInteger(&value); - break; - } - case CONTROL_ATTR_ROW_INDEX: { - ui64 tmp = 0; - ReadInteger(&tmp); - RowIndex_ = tmp; - ReadInteger(&value); - break; - } - case CONTROL_ATTR_RANGE_INDEX: { - ui32 tmp = 0; - ReadInteger(&tmp); - RangeIndex_ = tmp; - ReadInteger(&value); - break; - } - case CONTROL_ATTR_TABLET_INDEX: { - ui64 tmp = 0; - ReadInteger(&tmp); - TabletIndex_ = tmp; - ReadInteger(&value); - break; - } - case CONTROL_ATTR_END_OF_STREAM: { - IsEndOfStream_ = true; - break; - } - default: - ythrow yexception() << - Sprintf("Invalid control integer %d in lenval stream", value); - } - } - - Length_ = static_cast<ui32>(value); - RowTaken_ = false; - AtStart_ = false; - } catch (const std::exception& e) { - if (!PrepareRetry()) { - throw; - } - continue; - } - break; - } -} - -bool TLenvalTableReader::Retry() -{ - if (PrepareRetry()) { - RowTaken_ = true; - Next(); - return true; - } - return false; -} - -void TLenvalTableReader::NextKey() -{ - while (Valid_) { - Next(); - } - - if (Finished_) { - return; - } - - Valid_ = true; - - if (RowIndex_) { - --*RowIndex_; - } - - RowTaken_ = true; -} - -ui32 TLenvalTableReader::GetTableIndex() const -{ - CheckValidity(); - return TableIndex_; -} - -ui32 TLenvalTableReader::GetRangeIndex() const -{ - CheckValidity(); - return RangeIndex_.GetOrElse(0); -} - -ui64 TLenvalTableReader::GetRowIndex() const -{ - CheckValidity(); - return RowIndex_.GetOrElse(0UL); -} - -TMaybe<size_t> TLenvalTableReader::GetReadByteCount() const -{ - return Input_.GetReadByteCount(); -} - -bool TLenvalTableReader::IsEndOfStream() const -{ - return IsEndOfStream_; -} - -bool TLenvalTableReader::IsRawReaderExhausted() const -{ - return Finished_; -} - -bool TLenvalTableReader::PrepareRetry() -{ - if (Input_.Retry(RangeIndex_, RowIndex_)) { - RowIndex_.Clear(); - RangeIndex_.Clear(); - return true; - } - return false; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/lenval_table_reader.h b/yt/cpp/mapreduce/io/lenval_table_reader.h deleted file mode 100644 index 990fe0b756..0000000000 --- a/yt/cpp/mapreduce/io/lenval_table_reader.h +++ /dev/null @@ -1,67 +0,0 @@ -#pragma once - -#include "counting_raw_reader.h" - -#include <yt/cpp/mapreduce/interface/io.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TLenvalTableReader -{ -public: - explicit TLenvalTableReader(::TIntrusivePtr<TRawTableReader> input); - virtual ~TLenvalTableReader(); - -protected: - bool IsValid() const; - void Next(); - ui32 GetTableIndex() const; - ui32 GetRangeIndex() const; - ui64 GetRowIndex() const; - void NextKey(); - TMaybe<size_t> GetReadByteCount() const; - bool IsEndOfStream() const; - bool IsRawReaderExhausted() const; - - void CheckValidity() const; - - bool Retry(); - - template <class T> - bool ReadInteger(T* result, bool acceptEndOfStream = false) - { - size_t count = Input_.Load(result, sizeof(T)); - if (acceptEndOfStream && count == 0) { - Finished_ = true; - Valid_ = false; - return false; - } - Y_ENSURE(count == sizeof(T), "Premature end of stream"); - return true; - } - - virtual void SkipRow() = 0; - -protected: - NDetail::TCountingRawTableReader Input_; - - bool Valid_ = true; - bool Finished_ = false; - ui32 TableIndex_ = 0; - TMaybe<ui64> RowIndex_; - TMaybe<ui32> RangeIndex_; - TMaybe<ui64> TabletIndex_; - bool IsEndOfStream_ = false; - bool AtStart_ = true; - bool RowTaken_ = true; - ui32 Length_ = 0; - -private: - bool PrepareRetry(); -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/node_table_reader.cpp b/yt/cpp/mapreduce/io/node_table_reader.cpp deleted file mode 100644 index d39e1398a5..0000000000 --- a/yt/cpp/mapreduce/io/node_table_reader.cpp +++ /dev/null @@ -1,375 +0,0 @@ -#include "node_table_reader.h" - -#include <yt/cpp/mapreduce/common/node_builder.h> -#include <yt/cpp/mapreduce/common/wait_proxy.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <library/cpp/yson/parser.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TRowBuilder - : public ::NYson::TYsonConsumerBase -{ -public: - explicit TRowBuilder(TMaybe<TRowElement>* resultRow); - - void OnStringScalar(TStringBuf value) override; - void OnInt64Scalar(i64 value) override; - void OnUint64Scalar(ui64 value) override; - void OnDoubleScalar(double value) override; - void OnBooleanScalar(bool value) override; - void OnBeginList() override; - void OnEntity() override; - void OnListItem() override; - void OnEndList() override; - void OnBeginMap() override; - void OnKeyedItem(TStringBuf key) override; - void OnEndMap() override; - void OnBeginAttributes() override; - void OnEndAttributes() override; - - void Finalize(); - -private: - THolder<TNodeBuilder> Builder_; - TRowElement Row_; - int Depth_ = 0; - bool Started_ = false; - TMaybe<TRowElement>* ResultRow_; - - void SaveResultRow(); -}; - -TRowBuilder::TRowBuilder(TMaybe<TRowElement>* resultRow) - : ResultRow_(resultRow) -{ } - -void TRowBuilder::OnStringScalar(TStringBuf value) -{ - Row_.Size += sizeof(TNode) + sizeof(TString) + value.size(); - Builder_->OnStringScalar(value); -} - -void TRowBuilder::OnInt64Scalar(i64 value) -{ - Row_.Size += sizeof(TNode); - Builder_->OnInt64Scalar(value); -} - -void TRowBuilder::OnUint64Scalar(ui64 value) -{ - Row_.Size += sizeof(TNode); - Builder_->OnUint64Scalar(value); -} - -void TRowBuilder::OnDoubleScalar(double value) -{ - Row_.Size += sizeof(TNode); - Builder_->OnDoubleScalar(value); -} - -void TRowBuilder::OnBooleanScalar(bool value) -{ - Row_.Size += sizeof(TNode); - Builder_->OnBooleanScalar(value); -} - -void TRowBuilder::OnBeginList() -{ - ++Depth_; - Builder_->OnBeginList(); -} - -void TRowBuilder::OnEntity() -{ - Row_.Size += sizeof(TNode); - Builder_->OnEntity(); -} - -void TRowBuilder::OnListItem() -{ - if (Depth_ == 0) { - SaveResultRow(); - } else { - Builder_->OnListItem(); - } -} - -void TRowBuilder::OnEndList() -{ - --Depth_; - Builder_->OnEndList(); -} - -void TRowBuilder::OnBeginMap() -{ - ++Depth_; - Builder_->OnBeginMap(); -} - -void TRowBuilder::OnKeyedItem(TStringBuf key) -{ - Row_.Size += sizeof(TString) + key.size(); - Builder_->OnKeyedItem(key); -} - -void TRowBuilder::OnEndMap() -{ - --Depth_; - Builder_->OnEndMap(); -} - -void TRowBuilder::OnBeginAttributes() -{ - ++Depth_; - Builder_->OnBeginAttributes(); -} - -void TRowBuilder::OnEndAttributes() -{ - --Depth_; - Builder_->OnEndAttributes(); -} - -void TRowBuilder::SaveResultRow() -{ - if (!Started_) { - Started_ = true; - } else { - *ResultRow_ = std::move(Row_); - } - Row_.Reset(); - Builder_.Reset(new TNodeBuilder(&Row_.Node)); -} - -void TRowBuilder::Finalize() -{ - if (Started_) { - *ResultRow_ = std::move(Row_); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -TNodeTableReader::TNodeTableReader(::TIntrusivePtr<TRawTableReader> input) - : Input_(std::move(input)) -{ - PrepareParsing(); - Next(); -} - -TNodeTableReader::~TNodeTableReader() -{ -} - -void TNodeTableReader::ParseListFragmentItem() { - if (!Parser_->Parse()) { - Builder_->Finalize(); - IsLast_ = true; - } -} - -const TNode& TNodeTableReader::GetRow() const -{ - CheckValidity(); - if (!Row_) { - ythrow yexception() << "Row is moved"; - } - return Row_->Node; -} - -void TNodeTableReader::MoveRow(TNode* result) -{ - CheckValidity(); - if (!Row_) { - ythrow yexception() << "Row is moved"; - } - *result = std::move(Row_->Node); - Row_.Clear(); -} - -bool TNodeTableReader::IsValid() const -{ - return Valid_; -} - -void TNodeTableReader::Next() -{ - try { - NextImpl(); - } catch (const std::exception& ex) { - YT_LOG_ERROR("TNodeTableReader::Next failed: %v", ex.what()); - throw; - } -} - -void TNodeTableReader::NextImpl() -{ - CheckValidity(); - - if (RowIndex_) { - ++*RowIndex_; - } - - // At the begin of stream parser doesn't return a finished row. - ParseFirstListFragmentItem(); - - while (true) { - if (IsLast_) { - Finished_ = true; - Valid_ = false; - break; - } - - try { - ParseListFragmentItem(); - } catch (std::exception& ex) { - NeedParseFirst_ = true; - OnStreamError(std::current_exception(), ex.what()); - ParseFirstListFragmentItem(); - continue; - } - - Row_ = std::move(*NextRow_); - if (!Row_) { - throw yexception() << "No row in NextRow_"; - } - - // We successfully parsed one more row from the stream, - // so reset retry count to their initial value. - Input_.ResetRetries(); - - if (!Row_->Node.IsNull()) { - AtStart_ = false; - break; - } - - for (auto& entry : Row_->Node.GetAttributes().AsMap()) { - if (entry.first == "key_switch") { - if (!AtStart_) { - Valid_ = false; - } - } else if (entry.first == "table_index") { - TableIndex_ = static_cast<ui32>(entry.second.AsInt64()); - } else if (entry.first == "row_index") { - RowIndex_ = static_cast<ui64>(entry.second.AsInt64()); - } else if (entry.first == "range_index") { - RangeIndex_ = static_cast<ui32>(entry.second.AsInt64()); - } else if (entry.first == "tablet_index") { - TabletIndex_ = entry.second.AsInt64(); - } else if (entry.first == "end_of_stream") { - IsEndOfStream_ = true; - } - } - - if (!Valid_) { - break; - } - } -} - -void TNodeTableReader::ParseFirstListFragmentItem() -{ - while (NeedParseFirst_) { - try { - ParseListFragmentItem(); - NeedParseFirst_ = false; - break; - } catch (std::exception& ex) { - OnStreamError(std::current_exception(), ex.what()); - } - } -} - -ui32 TNodeTableReader::GetTableIndex() const -{ - CheckValidity(); - return TableIndex_; -} - -ui32 TNodeTableReader::GetRangeIndex() const -{ - CheckValidity(); - return RangeIndex_.GetOrElse(0); -} - -ui64 TNodeTableReader::GetRowIndex() const -{ - CheckValidity(); - return RowIndex_.GetOrElse(0UL); -} - -i64 TNodeTableReader::GetTabletIndex() const -{ - CheckValidity(); - return TabletIndex_.GetOrElse(0L); -} - -void TNodeTableReader::NextKey() -{ - while (Valid_) { - Next(); - } - - if (Finished_) { - return; - } - - Valid_ = true; - - if (RowIndex_) { - --*RowIndex_; - } -} - -TMaybe<size_t> TNodeTableReader::GetReadByteCount() const -{ - return Input_.GetReadByteCount(); -} - -bool TNodeTableReader::IsEndOfStream() const -{ - return IsEndOfStream_; -} - -bool TNodeTableReader::IsRawReaderExhausted() const -{ - return Finished_; -} - -//////////////////////////////////////////////////////////////////////////////// - -void TNodeTableReader::PrepareParsing() -{ - NextRow_.Clear(); - Builder_.Reset(new TRowBuilder(&NextRow_)); - Parser_.Reset(new ::NYson::TYsonListParser(Builder_.Get(), &Input_)); -} - -void TNodeTableReader::OnStreamError(std::exception_ptr exception, TString error) -{ - YT_LOG_ERROR("Read error: %v", error); - Exception_ = exception; - if (Input_.Retry(RangeIndex_, RowIndex_)) { - RowIndex_.Clear(); - RangeIndex_.Clear(); - PrepareParsing(); - } else { - std::rethrow_exception(Exception_); - } -} - -void TNodeTableReader::CheckValidity() const -{ - if (!Valid_) { - ythrow yexception() << "Iterator is not valid"; - } -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/node_table_reader.h b/yt/cpp/mapreduce/io/node_table_reader.h deleted file mode 100644 index 4fe839eeb6..0000000000 --- a/yt/cpp/mapreduce/io/node_table_reader.h +++ /dev/null @@ -1,91 +0,0 @@ -#pragma once - -#include "counting_raw_reader.h" - -#include <yt/cpp/mapreduce/interface/io.h> - -#include <library/cpp/yson/public.h> - -#include <util/stream/input.h> -#include <util/generic/buffer.h> -#include <util/system/event.h> -#include <util/system/thread.h> - -#include <atomic> - -namespace NYT { - -class TRawTableReader; -class TRowBuilder; - -//////////////////////////////////////////////////////////////////////////////// - -struct TRowElement -{ - TNode Node; - size_t Size = 0; - - void Reset() - { - Node = TNode(); - Size = 0; - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TNodeTableReader - : public INodeReaderImpl -{ -public: - explicit TNodeTableReader(::TIntrusivePtr<TRawTableReader> input); - ~TNodeTableReader() override; - - const TNode& GetRow() const override; - void MoveRow(TNode* result) override; - - bool IsValid() const override; - void Next() override; - ui32 GetTableIndex() const override; - ui32 GetRangeIndex() const override; - ui64 GetRowIndex() const override; - i64 GetTabletIndex() const override; - void NextKey() override; - TMaybe<size_t> GetReadByteCount() const override; - bool IsEndOfStream() const override; - bool IsRawReaderExhausted() const override; - -private: - void NextImpl(); - void OnStreamError(std::exception_ptr exception, TString error); - void CheckValidity() const; - void PrepareParsing(); - void ParseListFragmentItem(); - void ParseFirstListFragmentItem(); - -private: - NDetail::TCountingRawTableReader Input_; - - bool Valid_ = true; - bool Finished_ = false; - ui32 TableIndex_ = 0; - TMaybe<ui64> RowIndex_; - TMaybe<ui32> RangeIndex_; - TMaybe<i64> TabletIndex_; - bool IsEndOfStream_ = false; - bool AtStart_ = true; - - TMaybe<TRowElement> Row_; - TMaybe<TRowElement> NextRow_; - - THolder<TRowBuilder> Builder_; - THolder<::NYson::TYsonListParser> Parser_; - - std::exception_ptr Exception_; - bool NeedParseFirst_ = true; - bool IsLast_ = false; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/node_table_writer.cpp b/yt/cpp/mapreduce/io/node_table_writer.cpp deleted file mode 100644 index dcb5a0f5b5..0000000000 --- a/yt/cpp/mapreduce/io/node_table_writer.cpp +++ /dev/null @@ -1,72 +0,0 @@ -#include "node_table_writer.h" - -#include <yt/cpp/mapreduce/common/node_visitor.h> - -#include <yt/cpp/mapreduce/interface/io.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <library/cpp/yson/writer.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TNodeTableWriter::TNodeTableWriter(THolder<IProxyOutput> output, NYson::EYsonFormat format) - : Output_(std::move(output)) -{ - for (size_t i = 0; i < Output_->GetStreamCount(); ++i) { - Writers_.push_back( - MakeHolder<::NYson::TYsonWriter>(Output_->GetStream(i), format, NYT::NYson::EYsonType::ListFragment)); - } -} - -TNodeTableWriter::~TNodeTableWriter() -{ } - -size_t TNodeTableWriter::GetTableCount() const -{ - return Output_->GetStreamCount(); -} - -void TNodeTableWriter::FinishTable(size_t tableIndex) { - Output_->GetStream(tableIndex)->Finish(); -} - -void TNodeTableWriter::AddRow(const TNode& row, size_t tableIndex) -{ - if (row.HasAttributes()) { - ythrow TIOException() << "Row cannot have attributes"; - } - - static const TNode emptyMap = TNode::CreateMap(); - const TNode* outRow = &emptyMap; - if (row.GetType() != TNode::Undefined) { - if (!row.IsMap()) { - ythrow TIOException() << "Row should be a map node"; - } else { - outRow = &row; - } - } - - auto* writer = Writers_[tableIndex].Get(); - writer->OnListItem(); - - TNodeVisitor visitor(writer); - visitor.Visit(*outRow); - - Output_->OnRowFinished(tableIndex); -} - -void TNodeTableWriter::AddRow(TNode&& row, size_t tableIndex) { - AddRow(row, tableIndex); -} - -void TNodeTableWriter::Abort() -{ - Output_->Abort(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/node_table_writer.h b/yt/cpp/mapreduce/io/node_table_writer.h deleted file mode 100644 index 4bf8cb2fe7..0000000000 --- a/yt/cpp/mapreduce/io/node_table_writer.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/io.h> -#include <library/cpp/yson/public.h> - -namespace NYT { - -class IProxyOutput; - -//////////////////////////////////////////////////////////////////////////////// - -class TNodeTableWriter - : public INodeWriterImpl -{ -public: - explicit TNodeTableWriter(THolder<IProxyOutput> output, ::NYson::EYsonFormat format = ::NYson::EYsonFormat::Binary); - ~TNodeTableWriter() override; - - void AddRow(const TNode& row, size_t tableIndex) override; - void AddRow(TNode&& row, size_t tableIndex) override; - - size_t GetTableCount() const override; - void FinishTable(size_t) override; - void Abort() override; - -private: - THolder<IProxyOutput> Output_; - TVector<THolder<::NYson::TYsonWriter>> Writers_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/proto_helpers.cpp b/yt/cpp/mapreduce/io/proto_helpers.cpp deleted file mode 100644 index 2ffbfd8d89..0000000000 --- a/yt/cpp/mapreduce/io/proto_helpers.cpp +++ /dev/null @@ -1,101 +0,0 @@ -#include "proto_helpers.h" - -#include <yt/cpp/mapreduce/interface/io.h> -#include <yt/cpp/mapreduce/interface/fluent.h> - -#include <yt/yt_proto/yt/formats/extension.pb.h> - -#include <google/protobuf/descriptor.h> -#include <google/protobuf/descriptor.pb.h> -#include <google/protobuf/messagext.h> -#include <google/protobuf/io/coded_stream.h> - -#include <util/stream/str.h> -#include <util/stream/file.h> -#include <util/folder/path.h> - -namespace NYT { - -using ::google::protobuf::Message; -using ::google::protobuf::Descriptor; -using ::google::protobuf::DescriptorPool; - -using ::google::protobuf::io::CodedInputStream; -using ::google::protobuf::io::TCopyingInputStreamAdaptor; - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -TVector<const Descriptor*> GetJobDescriptors(const TString& fileName) -{ - TVector<const Descriptor*> descriptors; - if (!TFsPath(fileName).Exists()) { - ythrow TIOException() << - "Cannot load '" << fileName << "' file"; - } - - TIFStream input(fileName); - TString line; - while (input.ReadLine(line)) { - const auto* pool = DescriptorPool::generated_pool(); - const auto* descriptor = pool->FindMessageTypeByName(line); - descriptors.push_back(descriptor); - } - - return descriptors; -} - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - -TVector<const Descriptor*> GetJobInputDescriptors() -{ - return GetJobDescriptors("proto_input"); -} - -TVector<const Descriptor*> GetJobOutputDescriptors() -{ - return GetJobDescriptors("proto_output"); -} - -void ValidateProtoDescriptor( - const Message& row, - size_t tableIndex, - const TVector<const Descriptor*>& descriptors, - bool isRead) -{ - const char* direction = isRead ? "input" : "output"; - - if (tableIndex >= descriptors.size()) { - ythrow TIOException() << - "Table index " << tableIndex << - " is out of range [0, " << descriptors.size() << - ") in " << direction; - } - - if (row.GetDescriptor() != descriptors[tableIndex]) { - ythrow TIOException() << - "Invalid row of type " << row.GetDescriptor()->full_name() << - " at index " << tableIndex << - ", row of type " << descriptors[tableIndex]->full_name() << - " expected in " << direction; - } -} - -void ParseFromArcadiaStream(IInputStream* stream, Message& row, ui32 length) -{ - TLengthLimitedInput input(stream, length); - TCopyingInputStreamAdaptor adaptor(&input); - CodedInputStream codedStream(&adaptor); - codedStream.SetTotalBytesLimit(length + 1); - bool parsedOk = row.ParseFromCodedStream(&codedStream); - Y_ENSURE(parsedOk, "Failed to parse protobuf message"); - - Y_ENSURE(input.Left() == 0); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/proto_helpers.h b/yt/cpp/mapreduce/io/proto_helpers.h deleted file mode 100644 index 9d1ec0027c..0000000000 --- a/yt/cpp/mapreduce/io/proto_helpers.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/node.h> - -namespace google { -namespace protobuf { - -class Message; -class Descriptor; - -} -} - -class IInputStream; - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TVector<const ::google::protobuf::Descriptor*> GetJobInputDescriptors(); -TVector<const ::google::protobuf::Descriptor*> GetJobOutputDescriptors(); - -void ValidateProtoDescriptor( - const ::google::protobuf::Message& row, - size_t tableIndex, - const TVector<const ::google::protobuf::Descriptor*>& descriptors, - bool isRead); - -void ParseFromArcadiaStream( - IInputStream* stream, - ::google::protobuf::Message& row, - ui32 size); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/proto_table_reader.cpp b/yt/cpp/mapreduce/io/proto_table_reader.cpp deleted file mode 100644 index 28a4bc8719..0000000000 --- a/yt/cpp/mapreduce/io/proto_table_reader.cpp +++ /dev/null @@ -1,305 +0,0 @@ -#include "proto_table_reader.h" - -#include "node_table_reader.h" - -#include "proto_helpers.h" - -#include <yt/yt_proto/yt/formats/extension.pb.h> - -#include <util/string/escape.h> -#include <util/string/printf.h> - -namespace NYT { - -using ::google::protobuf::Descriptor; -using ::google::protobuf::FieldDescriptor; -using ::google::protobuf::EnumValueDescriptor; - -const TString& GetFieldColumnName(const FieldDescriptor* fieldDesc) { - const auto& columnName = fieldDesc->options().GetExtension(column_name); - if (!columnName.empty()) { - return columnName; - } - const auto& keyColumnName = fieldDesc->options().GetExtension(key_column_name); - if (!keyColumnName.empty()) { - return keyColumnName; - } - return fieldDesc->name(); -} - -void ReadMessageFromNode(const TNode& node, Message* row) -{ - auto* descriptor = row->GetDescriptor(); - auto* reflection = row->GetReflection(); - - int count = descriptor->field_count(); - for (int i = 0; i < count; ++i) { - auto* fieldDesc = descriptor->field(i); - - const auto& columnName = GetFieldColumnName(fieldDesc); - - const auto& nodeMap = node.AsMap(); - auto it = nodeMap.find(columnName); - if (it == nodeMap.end()) { - continue; // no such column - } - auto actualType = it->second.GetType(); - if (actualType == TNode::Null) { - continue; // null field - } - - auto checkType = [&columnName] (TNode::EType expected, TNode::EType actual) { - if (expected != actual) { - ythrow TNode::TTypeError() << "expected node type " << expected - << ", actual " << actual << " for node " << columnName.data(); - } - }; - - switch (fieldDesc->type()) { - case FieldDescriptor::TYPE_STRING: - case FieldDescriptor::TYPE_BYTES: - checkType(TNode::String, actualType); - reflection->SetString(row, fieldDesc, it->second.AsString()); - break; - case FieldDescriptor::TYPE_INT64: - case FieldDescriptor::TYPE_SINT64: - case FieldDescriptor::TYPE_SFIXED64: - checkType(TNode::Int64, actualType); - reflection->SetInt64(row, fieldDesc, it->second.AsInt64()); - break; - case FieldDescriptor::TYPE_INT32: - case FieldDescriptor::TYPE_SINT32: - case FieldDescriptor::TYPE_SFIXED32: - checkType(TNode::Int64, actualType); - reflection->SetInt32(row, fieldDesc, it->second.AsInt64()); - break; - case FieldDescriptor::TYPE_UINT64: - case FieldDescriptor::TYPE_FIXED64: - checkType(TNode::Uint64, actualType); - reflection->SetUInt64(row, fieldDesc, it->second.AsUint64()); - break; - case FieldDescriptor::TYPE_UINT32: - case FieldDescriptor::TYPE_FIXED32: - checkType(TNode::Uint64, actualType); - reflection->SetUInt32(row, fieldDesc, it->second.AsUint64()); - break; - case FieldDescriptor::TYPE_DOUBLE: - checkType(TNode::Double, actualType); - reflection->SetDouble(row, fieldDesc, it->second.AsDouble()); - break; - case FieldDescriptor::TYPE_FLOAT: - checkType(TNode::Double, actualType); - reflection->SetFloat(row, fieldDesc, it->second.AsDouble()); - break; - case FieldDescriptor::TYPE_BOOL: - checkType(TNode::Bool, actualType); - reflection->SetBool(row, fieldDesc, it->second.AsBool()); - break; - case FieldDescriptor::TYPE_ENUM: { - TNode::EType columnType = TNode::String; - for (const auto& flag : fieldDesc->options().GetRepeatedExtension(flags)) { - if (flag == EWrapperFieldFlag::ENUM_INT) { - columnType = TNode::Int64; - break; - } - } - checkType(columnType, actualType); - - const EnumValueDescriptor* valueDesc = nullptr; - TString stringValue; - if (columnType == TNode::String) { - const auto& value = it->second.AsString(); - valueDesc = fieldDesc->enum_type()->FindValueByName(value); - stringValue = value; - } else if (columnType == TNode::Int64) { - const auto& value = it->second.AsInt64(); - valueDesc = fieldDesc->enum_type()->FindValueByNumber(value); - stringValue = ToString(value); - } else { - Y_FAIL(); - } - - if (valueDesc == nullptr) { - ythrow yexception() << "Failed to parse value '" << EscapeC(stringValue) << "' as " << fieldDesc->enum_type()->full_name(); - } - - reflection->SetEnum(row, fieldDesc, valueDesc); - - break; - } - case FieldDescriptor::TYPE_MESSAGE: { - checkType(TNode::String, actualType); - Message* message = reflection->MutableMessage(row, fieldDesc); - if (!message->ParseFromArray(it->second.AsString().data(), it->second.AsString().size())) { - ythrow yexception() << "Failed to parse protobuf message"; - } - break; - } - default: - ythrow yexception() << "Incorrect protobuf type"; - } - } -} - -//////////////////////////////////////////////////////////////////////////////// - -TProtoTableReader::TProtoTableReader( - ::TIntrusivePtr<TRawTableReader> input, - TVector<const Descriptor*>&& descriptors) - : NodeReader_(new TNodeTableReader(std::move(input))) - , Descriptors_(std::move(descriptors)) -{ } - -TProtoTableReader::~TProtoTableReader() -{ } - -void TProtoTableReader::ReadRow(Message* row) -{ - const auto& node = NodeReader_->GetRow(); - ReadMessageFromNode(node, row); -} - -bool TProtoTableReader::IsValid() const -{ - return NodeReader_->IsValid(); -} - -void TProtoTableReader::Next() -{ - NodeReader_->Next(); -} - -ui32 TProtoTableReader::GetTableIndex() const -{ - return NodeReader_->GetTableIndex(); -} - -ui32 TProtoTableReader::GetRangeIndex() const -{ - return NodeReader_->GetRangeIndex(); -} - -ui64 TProtoTableReader::GetRowIndex() const -{ - return NodeReader_->GetRowIndex(); -} - -void TProtoTableReader::NextKey() -{ - NodeReader_->NextKey(); -} - -TMaybe<size_t> TProtoTableReader::GetReadByteCount() const -{ - return NodeReader_->GetReadByteCount(); -} - -bool TProtoTableReader::IsEndOfStream() const -{ - return NodeReader_->IsEndOfStream(); -} - -bool TProtoTableReader::IsRawReaderExhausted() const -{ - return NodeReader_->IsRawReaderExhausted(); -} - -//////////////////////////////////////////////////////////////////////////////// - -TLenvalProtoTableReader::TLenvalProtoTableReader( - ::TIntrusivePtr<TRawTableReader> input, - TVector<const Descriptor*>&& descriptors) - : TLenvalTableReader(std::move(input)) - , Descriptors_(std::move(descriptors)) -{ } - -TLenvalProtoTableReader::~TLenvalProtoTableReader() -{ } - -void TLenvalProtoTableReader::ReadRow(Message* row) -{ - ValidateProtoDescriptor(*row, GetTableIndex(), Descriptors_, true); - - while (true) { - try { - ParseFromArcadiaStream(&Input_, *row, Length_); - RowTaken_ = true; - - // We successfully parsed one more row from the stream, - // so reset retry count to their initial value. - Input_.ResetRetries(); - - break; - } catch (const std::exception& ) { - if (!TLenvalTableReader::Retry()) { - throw; - } - } - } -} - -bool TLenvalProtoTableReader::IsValid() const -{ - return TLenvalTableReader::IsValid(); -} - -void TLenvalProtoTableReader::Next() -{ - TLenvalTableReader::Next(); -} - -ui32 TLenvalProtoTableReader::GetTableIndex() const -{ - return TLenvalTableReader::GetTableIndex(); -} - -ui32 TLenvalProtoTableReader::GetRangeIndex() const -{ - return TLenvalTableReader::GetRangeIndex(); -} - -ui64 TLenvalProtoTableReader::GetRowIndex() const -{ - return TLenvalTableReader::GetRowIndex(); -} - -void TLenvalProtoTableReader::NextKey() -{ - TLenvalTableReader::NextKey(); -} - -TMaybe<size_t> TLenvalProtoTableReader::GetReadByteCount() const -{ - return TLenvalTableReader::GetReadByteCount(); -} - -bool TLenvalProtoTableReader::IsEndOfStream() const -{ - return TLenvalTableReader::IsEndOfStream(); -} - -bool TLenvalProtoTableReader::IsRawReaderExhausted() const -{ - return TLenvalTableReader::IsRawReaderExhausted(); -} - -void TLenvalProtoTableReader::SkipRow() -{ - while (true) { - try { - size_t skipped = Input_.Skip(Length_); - if (skipped != Length_) { - ythrow yexception() << "Premature end of stream"; - } - break; - } catch (const std::exception& ) { - if (!TLenvalTableReader::Retry()) { - throw; - } - } - } -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/proto_table_reader.h b/yt/cpp/mapreduce/io/proto_table_reader.h deleted file mode 100644 index 05a528b9c6..0000000000 --- a/yt/cpp/mapreduce/io/proto_table_reader.h +++ /dev/null @@ -1,76 +0,0 @@ -#pragma once - -#include "lenval_table_reader.h" - -#include <yt/cpp/mapreduce/interface/io.h> - -namespace NYT { - -class TRawTableReader; -class TNodeTableReader; - -//////////////////////////////////////////////////////////////////////////////// - -class TProtoTableReader - : public IProtoReaderImpl -{ -public: - explicit TProtoTableReader( - ::TIntrusivePtr<TRawTableReader> input, - TVector<const ::google::protobuf::Descriptor*>&& descriptors); - ~TProtoTableReader() override; - - void ReadRow(Message* row) override; - - bool IsValid() const override; - void Next() override; - ui32 GetTableIndex() const override; - ui32 GetRangeIndex() const override; - ui64 GetRowIndex() const override; - void NextKey() override; - TMaybe<size_t> GetReadByteCount() const override; - bool IsEndOfStream() const override; - bool IsRawReaderExhausted() const override; - -private: - THolder<TNodeTableReader> NodeReader_; - TVector<const ::google::protobuf::Descriptor*> Descriptors_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TLenvalProtoTableReader - : public IProtoReaderImpl - , public TLenvalTableReader -{ -public: - explicit TLenvalProtoTableReader( - ::TIntrusivePtr<TRawTableReader> input, - TVector<const ::google::protobuf::Descriptor*>&& descriptors); - ~TLenvalProtoTableReader() override; - - void ReadRow(Message* row) override; - - bool IsValid() const override; - void Next() override; - ui32 GetTableIndex() const override; - ui32 GetRangeIndex() const override; - ui64 GetRowIndex() const override; - void NextKey() override; - TMaybe<size_t> GetReadByteCount() const override; - bool IsEndOfStream() const override; - bool IsRawReaderExhausted() const override; - -protected: - void SkipRow() override; - -private: - TVector<const ::google::protobuf::Descriptor*> Descriptors_; -}; - -// Sometime useful outside mapreduce/yt -void ReadMessageFromNode(const TNode& node, Message* row); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/proto_table_writer.cpp b/yt/cpp/mapreduce/io/proto_table_writer.cpp deleted file mode 100644 index 1ce7811625..0000000000 --- a/yt/cpp/mapreduce/io/proto_table_writer.cpp +++ /dev/null @@ -1,184 +0,0 @@ -#include "proto_table_writer.h" - -#include "node_table_writer.h" -#include "proto_helpers.h" - -#include <yt/cpp/mapreduce/common/node_builder.h> - -#include <yt/cpp/mapreduce/interface/io.h> - -#include <yt/yt_proto/yt/formats/extension.pb.h> - -#include <google/protobuf/unknown_field_set.h> - -namespace NYT { - -using ::google::protobuf::Descriptor; -using ::google::protobuf::FieldDescriptor; - -//////////////////////////////////////////////////////////////////////////////// - -TNode MakeNodeFromMessage(const Message& row) -{ - TNode node; - TNodeBuilder builder(&node); - builder.OnBeginMap(); - - auto* descriptor = row.GetDescriptor(); - auto* reflection = row.GetReflection(); - - int count = descriptor->field_count(); - for (int i = 0; i < count; ++i) { - auto* fieldDesc = descriptor->field(i); - if (fieldDesc->is_repeated()) { - Y_ENSURE(reflection->FieldSize(row, fieldDesc) == 0, "Storing repeated protobuf fields is not supported yet"); - continue; - } else if (!reflection->HasField(row, fieldDesc)) { - continue; - } - - TString columnName = fieldDesc->options().GetExtension(column_name); - if (columnName.empty()) { - const auto& keyColumnName = fieldDesc->options().GetExtension(key_column_name); - columnName = keyColumnName.empty() ? fieldDesc->name() : keyColumnName; - } - - builder.OnKeyedItem(columnName); - - switch (fieldDesc->type()) { - case FieldDescriptor::TYPE_STRING: - case FieldDescriptor::TYPE_BYTES: - builder.OnStringScalar(reflection->GetString(row, fieldDesc)); - break; - case FieldDescriptor::TYPE_INT64: - case FieldDescriptor::TYPE_SINT64: - case FieldDescriptor::TYPE_SFIXED64: - builder.OnInt64Scalar(reflection->GetInt64(row, fieldDesc)); - break; - case FieldDescriptor::TYPE_INT32: - case FieldDescriptor::TYPE_SINT32: - case FieldDescriptor::TYPE_SFIXED32: - builder.OnInt64Scalar(reflection->GetInt32(row, fieldDesc)); - break; - case FieldDescriptor::TYPE_UINT64: - case FieldDescriptor::TYPE_FIXED64: - builder.OnUint64Scalar(reflection->GetUInt64(row, fieldDesc)); - break; - case FieldDescriptor::TYPE_UINT32: - case FieldDescriptor::TYPE_FIXED32: - builder.OnUint64Scalar(reflection->GetUInt32(row, fieldDesc)); - break; - case FieldDescriptor::TYPE_DOUBLE: - builder.OnDoubleScalar(reflection->GetDouble(row, fieldDesc)); - break; - case FieldDescriptor::TYPE_FLOAT: - builder.OnDoubleScalar(reflection->GetFloat(row, fieldDesc)); - break; - case FieldDescriptor::TYPE_BOOL: - builder.OnBooleanScalar(reflection->GetBool(row, fieldDesc)); - break; - case FieldDescriptor::TYPE_ENUM: - builder.OnStringScalar(reflection->GetEnum(row, fieldDesc)->name()); - break; - case FieldDescriptor::TYPE_MESSAGE: - builder.OnStringScalar(reflection->GetMessage(row, fieldDesc).SerializeAsString()); - break; - default: - ythrow yexception() << "Invalid field type for column: " << columnName; - break; - } - } - - builder.OnEndMap(); - return node; -} - -//////////////////////////////////////////////////////////////////////////////// - -TProtoTableWriter::TProtoTableWriter( - THolder<IProxyOutput> output, - TVector<const Descriptor*>&& descriptors) - : NodeWriter_(new TNodeTableWriter(std::move(output))) - , Descriptors_(std::move(descriptors)) -{ } - -TProtoTableWriter::~TProtoTableWriter() -{ } - -size_t TProtoTableWriter::GetTableCount() const -{ - return NodeWriter_->GetTableCount(); -} - -void TProtoTableWriter::FinishTable(size_t tableIndex) -{ - NodeWriter_->FinishTable(tableIndex); -} - -void TProtoTableWriter::AddRow(const Message& row, size_t tableIndex) -{ - NodeWriter_->AddRow(MakeNodeFromMessage(row), tableIndex); -} - -void TProtoTableWriter::AddRow(Message&& row, size_t tableIndex) -{ - TProtoTableWriter::AddRow(row, tableIndex); -} - - -void TProtoTableWriter::Abort() -{ - NodeWriter_->Abort(); -} - -//////////////////////////////////////////////////////////////////////////////// - -TLenvalProtoTableWriter::TLenvalProtoTableWriter( - THolder<IProxyOutput> output, - TVector<const Descriptor*>&& descriptors) - : Output_(std::move(output)) - , Descriptors_(std::move(descriptors)) -{ } - -TLenvalProtoTableWriter::~TLenvalProtoTableWriter() -{ } - -size_t TLenvalProtoTableWriter::GetTableCount() const -{ - return Output_->GetStreamCount(); -} - -void TLenvalProtoTableWriter::FinishTable(size_t tableIndex) -{ - Output_->GetStream(tableIndex)->Finish(); -} - -void TLenvalProtoTableWriter::AddRow(const Message& row, size_t tableIndex) -{ - ValidateProtoDescriptor(row, tableIndex, Descriptors_, false); - - Y_VERIFY(row.GetReflection()->GetUnknownFields(row).empty(), - "Message has unknown fields. This probably means bug in client code.\n" - "Message: %s", row.DebugString().data()); - - auto* stream = Output_->GetStream(tableIndex); - i32 size = row.ByteSize(); - stream->Write(&size, sizeof(size)); - bool serializedOk = row.SerializeToArcadiaStream(stream); - Y_ENSURE(serializedOk, "Failed to serialize protobuf message"); - Output_->OnRowFinished(tableIndex); -} - -void TLenvalProtoTableWriter::AddRow(Message&& row, size_t tableIndex) -{ - TLenvalProtoTableWriter::AddRow(row, tableIndex); -} - -void TLenvalProtoTableWriter::Abort() -{ - Output_->Abort(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/proto_table_writer.h b/yt/cpp/mapreduce/io/proto_table_writer.h deleted file mode 100644 index a6df69e6ae..0000000000 --- a/yt/cpp/mapreduce/io/proto_table_writer.h +++ /dev/null @@ -1,61 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/io.h> - -namespace NYT { - -class IProxyOutput; -class TNodeTableWriter; - -//////////////////////////////////////////////////////////////////////////////// - -class TProtoTableWriter - : public IProtoWriterImpl -{ -public: - TProtoTableWriter( - THolder<IProxyOutput> output, - TVector<const ::google::protobuf::Descriptor*>&& descriptors); - ~TProtoTableWriter() override; - - void AddRow(const Message& row, size_t tableIndex) override; - void AddRow(Message&& row, size_t tableIndex) override; - - size_t GetTableCount() const override; - void FinishTable(size_t) override; - void Abort() override; - -private: - THolder<TNodeTableWriter> NodeWriter_; - TVector<const ::google::protobuf::Descriptor*> Descriptors_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TLenvalProtoTableWriter - : public IProtoWriterImpl -{ -public: - TLenvalProtoTableWriter( - THolder<IProxyOutput> output, - TVector<const ::google::protobuf::Descriptor*>&& descriptors); - ~TLenvalProtoTableWriter() override; - - void AddRow(const Message& row, size_t tableIndex) override; - void AddRow(Message&& row, size_t tableIndex) override; - - size_t GetTableCount() const override; - void FinishTable(size_t) override; - void Abort() override; - -private: - THolder<IProxyOutput> Output_; - TVector<const ::google::protobuf::Descriptor*> Descriptors_; -}; - -// Sometime useful outside mapreduce/yt -TNode MakeNodeFromMessage(const ::google::protobuf::Message& row); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/skiff_row_table_reader.cpp b/yt/cpp/mapreduce/io/skiff_row_table_reader.cpp deleted file mode 100644 index 8da3b2da31..0000000000 --- a/yt/cpp/mapreduce/io/skiff_row_table_reader.cpp +++ /dev/null @@ -1,232 +0,0 @@ -#include "skiff_row_table_reader.h" - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <yt/cpp/mapreduce/interface/skiff_row.h> - -#include <library/cpp/skiff/skiff.h> - -#include <library/cpp/yt/logging/logger.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TSkiffRowTableReader::TSkiffRowTableReader( - ::TIntrusivePtr<TRawTableReader> input, - const NSkiff::TSkiffSchemaPtr& schema, - TVector<ISkiffRowSkipperPtr>&& skippers, - NDetail::TCreateSkiffSchemaOptions&& options) - : Input_(std::move(input)) - , BufferedInput_(&Input_) - , Parser_({schema, &BufferedInput_}) - , Skippers_(std::move(skippers)) - , Options_(std::move(options)) -{ - Next(); -} - -TSkiffRowTableReader::~TSkiffRowTableReader() -{ } - -bool TSkiffRowTableReader::Retry() -{ - if (PrepareRetry()) { - RowTaken_ = true; - Next(); - return true; - } - return false; -} - -bool TSkiffRowTableReader::PrepareRetry() -{ - if (Input_.Retry(RangeIndex_, RowIndex_)) { - RowIndex_.Clear(); - RangeIndex_.Clear(); - BufferedInput_ = TBufferedInput(&Input_); - Parser_.emplace(&BufferedInput_); - return true; - } - return false; -} - -void TSkiffRowTableReader::ReadRow(const ISkiffRowParserPtr& parser) -{ - while (true) { - try { - parser->Parse(&Parser_.value()); - RowTaken_ = true; - - // We successfully parsed one more row from the stream, - // so reset retry count to their initial value. - Input_.ResetRetries(); - - break; - } catch (const std::exception& ex) { - YT_LOG_ERROR("Read error during parsing: %v", ex.what()); - - if (!Retry()) { - throw; - } - } - } -} - -bool TSkiffRowTableReader::IsValid() const -{ - return Valid_; -} - -void TSkiffRowTableReader::SkipRow() -{ - CheckValidity(); - while (true) { - try { - Skippers_[TableIndex_]->SkipRow(&Parser_.value()); - - break; - } catch (const std::exception& ex) { - YT_LOG_ERROR("Read error during skipping row: %v", ex.what()); - - if (!Retry()) { - throw; - } - } - } -} - -void TSkiffRowTableReader::CheckValidity() const { - if (!IsValid()) { - ythrow yexception() << "Iterator is not valid"; - } -} - -void TSkiffRowTableReader::Next() -{ - if (!RowTaken_) { - SkipRow(); - } - - CheckValidity(); - - if (Y_UNLIKELY(Finished_ || !Parser_->HasMoreData())) { - Finished_ = true; - Valid_ = false; - return; - } - - if (AfterKeySwitch_) { - AfterKeySwitch_ = false; - return; - } - - if (RowIndex_) { - ++*RowIndex_; - } - - while (true) { - try { - auto tag = Parser_->ParseVariant16Tag(); - if (tag == NSkiff::EndOfSequenceTag<ui16>()) { - IsEndOfStream_ = true; - break; - } else { - TableIndex_ = tag; - } - - if (TableIndex_ >= Skippers_.size()) { - ythrow TIOException() << - "Table index " << TableIndex_ << - " is out of range [0, " << Skippers_.size() << - ") in read"; - } - - if (Options_.HasKeySwitch_) { - auto keySwitch = Parser_->ParseBoolean(); - if (keySwitch) { - AfterKeySwitch_ = true; - Valid_ = false; - } - } - - auto tagRowIndex = Parser_->ParseVariant8Tag(); - if (tagRowIndex == 1) { - RowIndex_ = Parser_->ParseInt64(); - } else { - Y_ENSURE(tagRowIndex == 0, "Tag for row_index was expected to be 0 or 1, got " << tagRowIndex); - } - - if (Options_.HasRangeIndex_) { - auto tagRangeIndex = Parser_->ParseVariant8Tag(); - if (tagRangeIndex == 1) { - RangeIndex_ = Parser_->ParseInt64(); - } else { - Y_ENSURE(tagRangeIndex == 0, "Tag for range_index was expected to be 0 or 1, got " << tagRangeIndex); - } - } - - break; - } catch (const std::exception& ex) { - YT_LOG_ERROR("Read error: %v", ex.what()); - - if (!PrepareRetry()) { - throw; - } - } - } - - RowTaken_ = false; -} - -ui32 TSkiffRowTableReader::GetTableIndex() const -{ - CheckValidity(); - return TableIndex_; -} - -ui32 TSkiffRowTableReader::GetRangeIndex() const -{ - CheckValidity(); - return RangeIndex_.GetOrElse(0); -} - -ui64 TSkiffRowTableReader::GetRowIndex() const -{ - CheckValidity(); - return RowIndex_.GetOrElse(0ULL); -} - -void TSkiffRowTableReader::NextKey() { - while (Valid_) { - Next(); - } - - if (Finished_) { - return; - } - - Valid_ = true; - - if (RowIndex_) { - --*RowIndex_; - } - - RowTaken_ = true; -} - -TMaybe<size_t> TSkiffRowTableReader::GetReadByteCount() const { - return Input_.GetReadByteCount(); -} - -bool TSkiffRowTableReader::IsEndOfStream() const { - return IsEndOfStream_; -} - -bool TSkiffRowTableReader::IsRawReaderExhausted() const { - return Finished_; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/skiff_row_table_reader.h b/yt/cpp/mapreduce/io/skiff_row_table_reader.h deleted file mode 100644 index 368968266c..0000000000 --- a/yt/cpp/mapreduce/io/skiff_row_table_reader.h +++ /dev/null @@ -1,67 +0,0 @@ -#pragma once - -#include "counting_raw_reader.h" - -#include <yt/cpp/mapreduce/client/skiff.h> - -#include <yt/cpp/mapreduce/interface/io.h> - -#include <yt/cpp/mapreduce/skiff/unchecked_parser.h> - -#include <util/stream/buffered.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class TSkiffRowTableReader - : public ISkiffRowReaderImpl -{ -public: - explicit TSkiffRowTableReader( - ::TIntrusivePtr<TRawTableReader> input, - const NSkiff::TSkiffSchemaPtr& schema, - TVector<ISkiffRowSkipperPtr>&& skippers, - NDetail::TCreateSkiffSchemaOptions&& options); - - ~TSkiffRowTableReader() override; - - void ReadRow(const ISkiffRowParserPtr& parser) override; - - bool IsValid() const override; - void Next() override; - ui32 GetTableIndex() const override; - ui32 GetRangeIndex() const override; - ui64 GetRowIndex() const override; - void NextKey() override; - TMaybe<size_t> GetReadByteCount() const override; - bool IsEndOfStream() const override; - bool IsRawReaderExhausted() const override; - -private: - bool Retry(); - void SkipRow(); - void CheckValidity() const; - bool PrepareRetry(); - -private: - NDetail::TCountingRawTableReader Input_; - TBufferedInput BufferedInput_; - std::optional<NSkiff::TCheckedInDebugSkiffParser> Parser_; - TVector<ISkiffRowSkipperPtr> Skippers_; - NDetail::TCreateSkiffSchemaOptions Options_; - - bool RowTaken_ = true; - bool Valid_ = true; - bool Finished_ = false; - bool AfterKeySwitch_ = false; - bool IsEndOfStream_ = false; - - TMaybe<ui64> RowIndex_; - TMaybe<ui32> RangeIndex_; - ui32 TableIndex_ = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/skiff_table_reader.cpp b/yt/cpp/mapreduce/io/skiff_table_reader.cpp deleted file mode 100644 index 51c20609f0..0000000000 --- a/yt/cpp/mapreduce/io/skiff_table_reader.cpp +++ /dev/null @@ -1,293 +0,0 @@ -#include "skiff_table_reader.h" - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <library/cpp/yson/node/node_io.h> - -#include <yt/cpp/mapreduce/skiff/wire_type.h> -#include <yt/cpp/mapreduce/skiff/skiff_schema.h> - -#include <util/string/cast.h> - -namespace NYT { -namespace NDetail { -namespace { - -//////////////////////////////////////////////////////////////////////////////// - -enum EColumnType : i8 -{ - Dense, - KeySwitch, - RangeIndex, - RowIndex -}; - -struct TSkiffColumnSchema -{ - EColumnType Type; - bool Required; - NSkiff::EWireType WireType; - TString Name; - - TSkiffColumnSchema(EColumnType type, bool required, NSkiff::EWireType wireType, const TString& name) - : Type(type) - , Required(required) - , WireType(wireType) - , Name(name) - { } -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace - -struct TSkiffTableReader::TSkiffTableSchema -{ - TVector<TSkiffColumnSchema> Columns; -}; - -TSkiffTableReader::TSkiffTableReader( - ::TIntrusivePtr<TRawTableReader> input, - const NSkiff::TSkiffSchemaPtr& schema) - : Input_(std::move(input)) - , BufferedInput_(&Input_) - , Parser_(&BufferedInput_) - , Schemas_(CreateSkiffTableSchemas(schema)) -{ - Next(); -} - -TSkiffTableReader::~TSkiffTableReader() = default; - -const TNode& TSkiffTableReader::GetRow() const -{ - EnsureValidity(); - Y_ENSURE(!Row_.IsUndefined(), "Row is moved"); - return Row_; -} - -void TSkiffTableReader::MoveRow(TNode* result) -{ - EnsureValidity(); - Y_ENSURE(!Row_.IsUndefined(), "Row is moved"); - *result = std::move(Row_); - Row_ = TNode(); -} - -bool TSkiffTableReader::IsValid() const -{ - return Valid_; -} - -void TSkiffTableReader::Next() -{ - EnsureValidity(); - if (Y_UNLIKELY(Finished_ || !Parser_->HasMoreData())) { - Finished_ = true; - Valid_ = false; - return; - } - - if (AfterKeySwitch_) { - AfterKeySwitch_ = false; - return; - } - - while (true) { - try { - ReadRow(); - break; - } catch (const std::exception& exception) { - YT_LOG_ERROR("Read error: %v", exception.what()); - if (!Input_.Retry(RangeIndex_, RowIndex_)) { - throw; - } - BufferedInput_ = TBufferedInput(&Input_); - Parser_.emplace(NSkiff::TUncheckedSkiffParser(&BufferedInput_)); - RangeIndex_.Clear(); - RowIndex_.Clear(); - } - } -} - -ui32 TSkiffTableReader::GetTableIndex() const -{ - EnsureValidity(); - return TableIndex_; -} - -ui32 TSkiffTableReader::GetRangeIndex() const -{ - EnsureValidity(); - return RangeIndex_.GetOrElse(0); -} - -ui64 TSkiffTableReader::GetRowIndex() const -{ - EnsureValidity(); - return RowIndex_.GetOrElse(0ULL); -} - -void TSkiffTableReader::NextKey() -{ - while (Valid_) { - Next(); - } - - if (Finished_) { - return; - } - - Valid_ = true; -} - -TMaybe<size_t> TSkiffTableReader::GetReadByteCount() const -{ - return Input_.GetReadByteCount(); -} - -bool TSkiffTableReader::IsRawReaderExhausted() const -{ - return Finished_; -} - -//////////////////////////////////////////////////////////////////////////////// - -TVector<TSkiffTableReader::TSkiffTableSchema> TSkiffTableReader::CreateSkiffTableSchemas( - const NSkiff::TSkiffSchemaPtr& schema) -{ - using NSkiff::EWireType; - - constexpr auto keySwitchColumnName = "$key_switch"; - constexpr auto rangeIndexColumnName = "$range_index"; - constexpr auto rowIndexColumnName = "$row_index"; - - static const THashMap<TString, TSkiffColumnSchema> specialColumns = { - {keySwitchColumnName, {EColumnType::KeySwitch, true, EWireType::Boolean, keySwitchColumnName}}, - {rangeIndexColumnName, {EColumnType::RangeIndex, false, EWireType::Int64, rangeIndexColumnName}}, - {rowIndexColumnName, {EColumnType::RowIndex, false, EWireType::Int64, rowIndexColumnName}}, - }; - - Y_ENSURE(schema->GetWireType() == EWireType::Variant16, - "Expected 'variant16' wire type for schema, got '" << schema->GetWireType() << "'"); - TVector<TSkiffTableSchema> result; - for (const auto& tableSchema : schema->GetChildren()) { - Y_ENSURE(tableSchema->GetWireType() == EWireType::Tuple, - "Expected 'tuple' wire type for table schema, got '" << tableSchema->GetWireType() << "'"); - TVector<TSkiffColumnSchema> columns; - for (const auto& columnSchema : tableSchema->GetChildren()) { - if (columnSchema->GetName().StartsWith("$")) { - auto iter = specialColumns.find(columnSchema->GetName()); - Y_ENSURE(iter != specialColumns.end(), "Unknown special column: " << columnSchema->GetName()); - columns.push_back(iter->second); - } else { - auto wireType = columnSchema->GetWireType(); - bool required = true; - if (wireType == EWireType::Variant8) { - const auto& children = columnSchema->GetChildren(); - Y_ENSURE( - children.size() == 2 && children[0]->GetWireType() == EWireType::Nothing && - NSkiff::IsSimpleType(children[1]->GetWireType()), - "Expected schema of form 'variant8<nothing, simple-type>', got " - << NSkiff::GetShortDebugString(columnSchema)); - wireType = children[1]->GetWireType(); - required = false; - } - Y_ENSURE(NSkiff::IsSimpleType(wireType), - "Expected column schema to be of simple type, got " << NSkiff::GetShortDebugString(columnSchema)); - columns.emplace_back( - EColumnType::Dense, - required, - wireType, - columnSchema->GetName()); - } - } - result.push_back({std::move(columns)}); - } - return result; -} - -void TSkiffTableReader::ReadRow() -{ - if (Row_.IsUndefined()) { - Row_ = TNode::CreateMap(); - } else { - Row_.AsMap().clear(); - } - - if (RowIndex_) { - ++*RowIndex_; - } - - TableIndex_ = Parser_->ParseVariant16Tag(); - Y_ENSURE(TableIndex_ < Schemas_.size(), "Table index out of range: " << TableIndex_ << " >= " << Schemas_.size()); - const auto& tableSchema = Schemas_[TableIndex_]; - - auto parse = [&](NSkiff::EWireType wireType) -> TNode { - switch (wireType) { - case NSkiff::EWireType::Int64: - return Parser_->ParseInt64(); - case NSkiff::EWireType::Uint64: - return Parser_->ParseUint64(); - case NSkiff::EWireType::Boolean: - return Parser_->ParseBoolean(); - case NSkiff::EWireType::Double: - return Parser_->ParseDouble(); - case NSkiff::EWireType::String32: - return Parser_->ParseString32(); - case NSkiff::EWireType::Yson32: - return NodeFromYsonString(Parser_->ParseYson32()); - case NSkiff::EWireType::Nothing: - return TNode::CreateEntity(); - default: - Y_FAIL("Bad column wire type: '%s'", ::ToString(wireType).data()); - } - }; - - for (const auto& columnSchema : tableSchema.Columns) { - if (!columnSchema.Required) { - auto tag = Parser_->ParseVariant8Tag(); - if (tag == 0) { - if (columnSchema.Type == EColumnType::Dense) { - Row_[columnSchema.Name] = TNode::CreateEntity(); - } - continue; - } - Y_ENSURE(tag == 1, "Tag for 'variant8<nothing," << columnSchema.WireType - << ">' expected to be 0 or 1, got " << tag); - } - auto value = parse(columnSchema.WireType); - switch (columnSchema.Type) { - case EColumnType::Dense: - Row_[columnSchema.Name] = std::move(value); - break; - case EColumnType::KeySwitch: - if (value.AsBool()) { - AfterKeySwitch_ = true; - Valid_ = false; - } - break; - case EColumnType::RangeIndex: - RangeIndex_ = value.AsInt64(); - break; - case EColumnType::RowIndex: - RowIndex_ = value.AsInt64(); - break; - default: - Y_FAIL("Bad column type: %d", static_cast<int>(columnSchema.Type)); - } - } - - // We successfully parsed one more row from the stream, - // so reset retry count to their initial value. - Input_.ResetRetries(); -} - -void TSkiffTableReader::EnsureValidity() const -{ - Y_ENSURE(Valid_, "Iterator is not valid"); -} - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/skiff_table_reader.h b/yt/cpp/mapreduce/io/skiff_table_reader.h deleted file mode 100644 index 95ece5f9c7..0000000000 --- a/yt/cpp/mapreduce/io/skiff_table_reader.h +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once - -#include "counting_raw_reader.h" - -#include <yt/cpp/mapreduce/interface/io.h> - -#include <yt/cpp/mapreduce/skiff/wire_type.h> -#include <yt/cpp/mapreduce/skiff/unchecked_parser.h> - -#include <util/stream/buffered.h> - -namespace NYT { -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -class TSkiffTableReader - : public INodeReaderImpl -{ -public: - TSkiffTableReader( - ::TIntrusivePtr<TRawTableReader> input, - const std::shared_ptr<NSkiff::TSkiffSchema>& schema); - ~TSkiffTableReader() override; - - virtual const TNode& GetRow() const override; - virtual void MoveRow(TNode* row) override; - - bool IsValid() const override; - void Next() override; - ui32 GetTableIndex() const override; - ui32 GetRangeIndex() const override; - ui64 GetRowIndex() const override; - void NextKey() override; - TMaybe<size_t> GetReadByteCount() const override; - bool IsRawReaderExhausted() const override; - -private: - struct TSkiffTableSchema; - -private: - void EnsureValidity() const; - void ReadRow(); - static TVector<TSkiffTableSchema> CreateSkiffTableSchemas(const std::shared_ptr<NSkiff::TSkiffSchema>& schema); - -private: - NDetail::TCountingRawTableReader Input_; - TBufferedInput BufferedInput_; - std::optional<NSkiff::TUncheckedSkiffParser> Parser_; - TVector<TSkiffTableSchema> Schemas_; - - TNode Row_; - - bool Valid_ = true; - bool AfterKeySwitch_ = false; - bool Finished_ = false; - TMaybe<ui64> RangeIndex_; - TMaybe<ui64> RowIndex_; - ui32 TableIndex_ = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/stream_raw_reader.cpp b/yt/cpp/mapreduce/io/stream_raw_reader.cpp deleted file mode 100644 index ec19b67d0b..0000000000 --- a/yt/cpp/mapreduce/io/stream_raw_reader.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "stream_table_reader.h" - -#include "node_table_reader.h" -#include "proto_table_reader.h" -#include "skiff_table_reader.h" -#include "yamr_table_reader.h" - -#include <util/system/env.h> -#include <util/string/type.h> - -namespace NYT { - -template <> -TTableReaderPtr<TNode> CreateTableReader<TNode>( - IInputStream* stream, const TTableReaderOptions& /*options*/) -{ - auto impl = ::MakeIntrusive<TNodeTableReader>( - ::MakeIntrusive<NDetail::TInputStreamProxy>(stream)); - return new TTableReader<TNode>(impl); -} - -template <> -TTableReaderPtr<TYaMRRow> CreateTableReader<TYaMRRow>( - IInputStream* stream, const TTableReaderOptions& /*options*/) -{ - auto impl = ::MakeIntrusive<TYaMRTableReader>( - ::MakeIntrusive<NDetail::TInputStreamProxy>(stream)); - return new TTableReader<TYaMRRow>(impl); -} - - -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader( - IInputStream* stream, - const TTableReaderOptions& /* options */, - const ::google::protobuf::Descriptor* descriptor) -{ - return new TLenvalProtoTableReader( - ::MakeIntrusive<TInputStreamProxy>(stream), - {descriptor}); -} - -::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader( - IInputStream* stream, - const TTableReaderOptions& /* options */, - TVector<const ::google::protobuf::Descriptor*> descriptors) -{ - return new TLenvalProtoTableReader( - ::MakeIntrusive<TInputStreamProxy>(stream), - std::move(descriptors)); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/stream_table_reader.h b/yt/cpp/mapreduce/io/stream_table_reader.h deleted file mode 100644 index d799c63cf4..0000000000 --- a/yt/cpp/mapreduce/io/stream_table_reader.h +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/io.h> - -namespace NYT { -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -class TInputStreamProxy - : public TRawTableReader -{ -public: - TInputStreamProxy(IInputStream* stream) - : Stream_(stream) - { } - - bool Retry(const TMaybe<ui32>& /* rangeIndex */, const TMaybe<ui64>& /* rowIndex */) override - { - return false; - } - - void ResetRetries() override - { } - - bool HasRangeIndices() const override - { - return false; - } - -protected: - size_t DoRead(void* buf, size_t len) override - { - return Stream_->Read(buf, len); - } - -private: - IInputStream* Stream_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader( - IInputStream* stream, - const TTableReaderOptions& /* options */, - const ::google::protobuf::Descriptor* descriptor); - -::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader( - IInputStream* stream, - const TTableReaderOptions& /* options */, - TVector<const ::google::protobuf::Descriptor*> descriptors); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail - -template <> -TTableReaderPtr<TNode> CreateTableReader<TNode>( - IInputStream* stream, const TTableReaderOptions& options); - -template <> -TTableReaderPtr<TYaMRRow> CreateTableReader<TYaMRRow>( - IInputStream* stream, const TTableReaderOptions& /*options*/); - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/ya.make b/yt/cpp/mapreduce/io/ya.make deleted file mode 100644 index d355e86850..0000000000 --- a/yt/cpp/mapreduce/io/ya.make +++ /dev/null @@ -1,33 +0,0 @@ -LIBRARY() - -INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc) - -SRCS( - counting_raw_reader.cpp - job_reader.cpp - job_writer.cpp - lenval_table_reader.cpp - node_table_reader.cpp - node_table_writer.cpp - proto_helpers.cpp - proto_table_reader.cpp - proto_table_writer.cpp - skiff_row_table_reader.cpp - skiff_table_reader.cpp - stream_raw_reader.cpp - yamr_table_reader.cpp - yamr_table_writer.cpp -) - -PEERDIR( - contrib/libs/protobuf - library/cpp/yson - yt/cpp/mapreduce/common - yt/cpp/mapreduce/interface - yt/cpp/mapreduce/interface/logging - yt/yt_proto/yt/formats - library/cpp/yson/node - yt/cpp/mapreduce/skiff -) - -END() diff --git a/yt/cpp/mapreduce/io/yamr_table_reader.cpp b/yt/cpp/mapreduce/io/yamr_table_reader.cpp deleted file mode 100644 index 6204738e10..0000000000 --- a/yt/cpp/mapreduce/io/yamr_table_reader.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include "yamr_table_reader.h" - -#include <yt/cpp/mapreduce/common/helpers.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> -#include <yt/cpp/mapreduce/raw_client/raw_requests.h> - -//////////////////////////////////////////////////////////////////// - -static void CheckedSkip(IInputStream* input, size_t byteCount) -{ - size_t skipped = input->Skip(byteCount); - Y_ENSURE(skipped == byteCount, "Premature end of YaMR stream"); -} - -//////////////////////////////////////////////////////////////////// - -namespace NYT { - -using namespace NYT::NDetail::NRawClient; - -//////////////////////////////////////////////////////////////////////////////// - -TYaMRTableReader::TYaMRTableReader(::TIntrusivePtr<TRawTableReader> input) - : TLenvalTableReader(std::move(input)) -{ } - -TYaMRTableReader::~TYaMRTableReader() -{ } - -const TYaMRRow& TYaMRTableReader::GetRow() const -{ - CheckValidity(); - if (!RowTaken_) { - const_cast<TYaMRTableReader*>(this)->ReadRow(); - } - return Row_; -} - -bool TYaMRTableReader::IsValid() const -{ - return Valid_; -} - -void TYaMRTableReader::Next() -{ - TLenvalTableReader::Next(); -} - -void TYaMRTableReader::NextKey() -{ - TLenvalTableReader::NextKey(); -} - -ui32 TYaMRTableReader::GetTableIndex() const -{ - return TLenvalTableReader::GetTableIndex(); -} - -ui32 TYaMRTableReader::GetRangeIndex() const -{ - return TLenvalTableReader::GetRangeIndex(); -} - -ui64 TYaMRTableReader::GetRowIndex() const -{ - return TLenvalTableReader::GetRowIndex(); -} - -TMaybe<size_t> TYaMRTableReader::GetReadByteCount() const -{ - return TLenvalTableReader::GetReadByteCount(); -} - -bool TYaMRTableReader::IsEndOfStream() const -{ - return TLenvalTableReader::IsEndOfStream(); -} - -bool TYaMRTableReader::IsRawReaderExhausted() const -{ - return TLenvalTableReader::IsRawReaderExhausted(); -} - -void TYaMRTableReader::ReadField(TString* result, i32 length) -{ - result->resize(length); - size_t count = Input_.Load(result->begin(), length); - Y_ENSURE(count == static_cast<size_t>(length), "Premature end of YaMR stream"); -} - -void TYaMRTableReader::ReadRow() -{ - while (true) { - try { - i32 value = static_cast<i32>(Length_); - ReadField(&Key_, value); - Row_.Key = Key_; - - ReadInteger(&value); - ReadField(&SubKey_, value); - Row_.SubKey = SubKey_; - - ReadInteger(&value); - ReadField(&Value_, value); - Row_.Value = Value_; - - RowTaken_ = true; - - // We successfully parsed one more row from the stream, - // so reset retry count to their initial value. - Input_.ResetRetries(); - - break; - } catch (const std::exception& ) { - if (!TLenvalTableReader::Retry()) { - throw; - } - } - } -} - -void TYaMRTableReader::SkipRow() -{ - while (true) { - try { - i32 value = static_cast<i32>(Length_); - CheckedSkip(&Input_, value); - - ReadInteger(&value); - CheckedSkip(&Input_, value); - - ReadInteger(&value); - CheckedSkip(&Input_, value); - break; - } catch (const std::exception& ) { - if (!TLenvalTableReader::Retry()) { - throw; - } - } - } -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/yamr_table_reader.h b/yt/cpp/mapreduce/io/yamr_table_reader.h deleted file mode 100644 index 39fdecfa71..0000000000 --- a/yt/cpp/mapreduce/io/yamr_table_reader.h +++ /dev/null @@ -1,48 +0,0 @@ -#pragma once - -#include "lenval_table_reader.h" - -#include <yt/cpp/mapreduce/interface/io.h> - -namespace NYT { - -class TRawTableReader; -struct TClientContext; - -//////////////////////////////////////////////////////////////////////////////// - -class TYaMRTableReader - : public IYaMRReaderImpl - , public TLenvalTableReader -{ -public: - explicit TYaMRTableReader(::TIntrusivePtr<TRawTableReader> input); - ~TYaMRTableReader() override; - - const TYaMRRow& GetRow() const override; - - bool IsValid() const override; - void Next() override; - ui32 GetTableIndex() const override; - ui32 GetRangeIndex() const override; - ui64 GetRowIndex() const override; - void NextKey() override; - TMaybe<size_t> GetReadByteCount() const override; - bool IsEndOfStream() const override; - bool IsRawReaderExhausted() const override; - -private: - void ReadField(TString* result, i32 length); - - void ReadRow(); - void SkipRow() override; - - TYaMRRow Row_; - TString Key_; - TString SubKey_; - TString Value_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/yamr_table_writer.cpp b/yt/cpp/mapreduce/io/yamr_table_writer.cpp deleted file mode 100644 index cce7ceb0f0..0000000000 --- a/yt/cpp/mapreduce/io/yamr_table_writer.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include "yamr_table_writer.h" - -#include <yt/cpp/mapreduce/interface/io.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TYaMRTableWriter::TYaMRTableWriter(THolder<IProxyOutput> output) - : Output_(std::move(output)) -{ } - -TYaMRTableWriter::~TYaMRTableWriter() -{ } - -size_t TYaMRTableWriter::GetTableCount() const -{ - return Output_->GetStreamCount(); -} - -void TYaMRTableWriter::FinishTable(size_t tableIndex) { - Output_->GetStream(tableIndex)->Finish(); -} - -void TYaMRTableWriter::AddRow(const TYaMRRow& row, size_t tableIndex) -{ - auto* stream = Output_->GetStream(tableIndex); - - auto writeField = [&stream] (const TStringBuf& field) { - i32 length = static_cast<i32>(field.length()); - stream->Write(&length, sizeof(length)); - stream->Write(field.data(), field.length()); - }; - - writeField(row.Key); - writeField(row.SubKey); - writeField(row.Value); - - Output_->OnRowFinished(tableIndex); -} - -void TYaMRTableWriter::AddRow(TYaMRRow&& row, size_t tableIndex) { - TYaMRTableWriter::AddRow(row, tableIndex); -} - -void TYaMRTableWriter::Abort() -{ - Output_->Abort(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/io/yamr_table_writer.h b/yt/cpp/mapreduce/io/yamr_table_writer.h deleted file mode 100644 index cf88eaf287..0000000000 --- a/yt/cpp/mapreduce/io/yamr_table_writer.h +++ /dev/null @@ -1,31 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/io.h> - -namespace NYT { - -class IProxyOutput; - -//////////////////////////////////////////////////////////////////////////////// - -class TYaMRTableWriter - : public IYaMRWriterImpl -{ -public: - explicit TYaMRTableWriter(THolder<IProxyOutput> output); - ~TYaMRTableWriter() override; - - void AddRow(const TYaMRRow& row, size_t tableIndex) override; - void AddRow(TYaMRRow&& row, size_t tableIndex) override; - - size_t GetTableCount() const override; - void FinishTable(size_t) override; - void Abort() override; - -private: - THolder<IProxyOutput> Output_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/library/table_schema/protobuf.cpp b/yt/cpp/mapreduce/library/table_schema/protobuf.cpp deleted file mode 100644 index 888da828e7..0000000000 --- a/yt/cpp/mapreduce/library/table_schema/protobuf.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "protobuf.h" diff --git a/yt/cpp/mapreduce/library/table_schema/protobuf.h b/yt/cpp/mapreduce/library/table_schema/protobuf.h deleted file mode 100644 index e29e096745..0000000000 --- a/yt/cpp/mapreduce/library/table_schema/protobuf.h +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/common.h> diff --git a/yt/cpp/mapreduce/library/table_schema/ya.make b/yt/cpp/mapreduce/library/table_schema/ya.make deleted file mode 100644 index 4aebad72dd..0000000000 --- a/yt/cpp/mapreduce/library/table_schema/ya.make +++ /dev/null @@ -1,14 +0,0 @@ -LIBRARY() - -INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc) - -SRCS( - protobuf.h - protobuf.cpp -) - -PEERDIR( - yt/cpp/mapreduce/interface -) - -END() diff --git a/yt/cpp/mapreduce/raw_client/raw_batch_request.cpp b/yt/cpp/mapreduce/raw_client/raw_batch_request.cpp deleted file mode 100644 index be81f5a21a..0000000000 --- a/yt/cpp/mapreduce/raw_client/raw_batch_request.cpp +++ /dev/null @@ -1,687 +0,0 @@ -#include "raw_batch_request.h" - -#include "raw_requests.h" -#include "rpc_parameters_serialization.h" - -#include <yt/cpp/mapreduce/common/helpers.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <yt/cpp/mapreduce/interface/client.h> -#include <yt/cpp/mapreduce/interface/errors.h> -#include <yt/cpp/mapreduce/interface/serialize.h> - -#include <library/cpp/yson/node/node.h> - -#include <yt/cpp/mapreduce/http/context.h> -#include <yt/cpp/mapreduce/http/retry_request.h> - -#include <util/generic/guid.h> -#include <util/string/builder.h> - -#include <exception> - -namespace NYT::NDetail::NRawClient { - -using NThreading::TFuture; -using NThreading::TPromise; -using NThreading::NewPromise; - -//////////////////////////////////////////////////////////////////// - -static TString RequestInfo(const TNode& request) -{ - return ::TStringBuilder() - << request["command"].AsString() << ' ' << NodeToYsonString(request["parameters"]); -} - -static void EnsureNothing(const TMaybe<TNode>& node) -{ - Y_ENSURE(!node, "Internal error: expected to have no response, but got response of type " << node->GetType()); -} - -static void EnsureSomething(const TMaybe<TNode>& node) -{ - Y_ENSURE(node, "Internal error: expected to have response of any type, but got no response."); -} - -static void EnsureType(const TNode& node, TNode::EType type) -{ - Y_ENSURE(node.GetType() == type, "Internal error: unexpected response type. " - << "Expected: " << type << ", actual: " << node.GetType()); -} - -static void EnsureType(const TMaybe<TNode>& node, TNode::EType type) -{ - Y_ENSURE(node, "Internal error: expected to have response of type " << type << ", but got no response."); - EnsureType(*node, type); -} - -//////////////////////////////////////////////////////////////////// - -template <typename TReturnType> -class TResponseParserBase - : public TRawBatchRequest::IResponseItemParser -{ -public: - using TFutureResult = TFuture<TReturnType>; - -public: - TResponseParserBase() - : Result(NewPromise<TReturnType>()) - { } - - void SetException(std::exception_ptr e) override - { - Result.SetException(std::move(e)); - } - - TFuture<TReturnType> GetFuture() - { - return Result.GetFuture(); - } - -protected: - TPromise<TReturnType> Result; -}; - -//////////////////////////////////////////////////////////////////// - - -class TGetResponseParser - : public TResponseParserBase<TNode> -{ -public: - void SetResponse(TMaybe<TNode> node) override - { - EnsureSomething(node); - Result.SetValue(std::move(*node)); - } -}; - -//////////////////////////////////////////////////////////////////// - -class TVoidResponseParser - : public TResponseParserBase<void> -{ -public: - void SetResponse(TMaybe<TNode> node) override - { - EnsureNothing(node); - Result.SetValue(); - } -}; - -//////////////////////////////////////////////////////////////////// - -class TListResponseParser - : public TResponseParserBase<TNode::TListType> -{ -public: - void SetResponse(TMaybe<TNode> node) override - { - EnsureType(node, TNode::List); - Result.SetValue(std::move(node->AsList())); - } -}; - -//////////////////////////////////////////////////////////////////// - -class TExistsResponseParser - : public TResponseParserBase<bool> -{ -public: - void SetResponse(TMaybe<TNode> node) override - { - EnsureType(node, TNode::Bool); - Result.SetValue(std::move(node->AsBool())); - } -}; - -//////////////////////////////////////////////////////////////////// - -class TGuidResponseParser - : public TResponseParserBase<TGUID> -{ -public: - void SetResponse(TMaybe<TNode> node) override - { - EnsureType(node, TNode::String); - Result.SetValue(GetGuid(node->AsString())); - } -}; - -//////////////////////////////////////////////////////////////////// - -class TCanonizeYPathResponseParser - : public TResponseParserBase<TRichYPath> -{ -public: - explicit TCanonizeYPathResponseParser(TString pathPrefix, const TRichYPath& original) - : OriginalNode_(PathToNode(original)) - , PathPrefix_(std::move(pathPrefix)) - { } - - void SetResponse(TMaybe<TNode> node) override - { - EnsureType(node, TNode::String); - - for (const auto& item : OriginalNode_.GetAttributes().AsMap()) { - node->Attributes()[item.first] = item.second; - } - TRichYPath result; - Deserialize(result, *node); - result.Path_ = AddPathPrefix(result.Path_, PathPrefix_); - Result.SetValue(result); - } - -private: - TNode OriginalNode_; - TString PathPrefix_; -}; - -//////////////////////////////////////////////////////////////////// - -class TGetOperationResponseParser - : public TResponseParserBase<TOperationAttributes> -{ -public: - void SetResponse(TMaybe<TNode> node) override - { - EnsureType(node, TNode::Map); - Result.SetValue(ParseOperationAttributes(*node)); - } -}; - -//////////////////////////////////////////////////////////////////// - -class TTableColumnarStatisticsParser - : public TResponseParserBase<TVector<TTableColumnarStatistics>> -{ -public: - void SetResponse(TMaybe<TNode> node) override - { - EnsureType(node, TNode::Map); - TVector<TTableColumnarStatistics> statistics; - Deserialize(statistics, *node); - Result.SetValue(std::move(statistics)); - } -}; - -//////////////////////////////////////////////////////////////////// - -class TTablePartitionsParser - : public TResponseParserBase<TMultiTablePartitions> -{ -public: - void SetResponse(TMaybe<TNode> node) override - { - EnsureType(node, TNode::Map); - TMultiTablePartitions partitions; - Deserialize(partitions, *node); - Result.SetValue(std::move(partitions)); - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TGetFileFromCacheParser - : public TResponseParserBase<TMaybe<TYPath>> -{ -public: - void SetResponse(TMaybe<TNode> node) override - { - EnsureType(node, TNode::String); - if (node->AsString().empty()) { - Result.SetValue(Nothing()); - } else { - Result.SetValue(node->AsString()); - } - } -}; - -//////////////////////////////////////////////////////////////////// - -class TYPathParser - : public TResponseParserBase<TYPath> -{ -public: - void SetResponse(TMaybe<TNode> node) override - { - EnsureType(node, TNode::String); - Result.SetValue(node->AsString()); - } -}; - -//////////////////////////////////////////////////////////////////// - -class TCheckPermissionParser - : public TResponseParserBase<TCheckPermissionResponse> -{ -public: - void SetResponse(TMaybe<TNode> node) override - { - EnsureType(node, TNode::Map); - Result.SetValue(ParseCheckPermissionResponse(*node)); - } -}; - -//////////////////////////////////////////////////////////////////// - -TRawBatchRequest::TBatchItem::TBatchItem(TNode parameters, ::TIntrusivePtr<IResponseItemParser> responseParser) - : Parameters(std::move(parameters)) - , ResponseParser(std::move(responseParser)) - , NextTry() -{ } - -TRawBatchRequest::TBatchItem::TBatchItem(const TBatchItem& batchItem, TInstant nextTry) - : Parameters(batchItem.Parameters) - , ResponseParser(batchItem.ResponseParser) - , NextTry(nextTry) -{ } - -//////////////////////////////////////////////////////////////////// - -TRawBatchRequest::TRawBatchRequest(const TConfigPtr& config) - : Config_(config) -{ } - -TRawBatchRequest::~TRawBatchRequest() = default; - -bool TRawBatchRequest::IsExecuted() const -{ - return Executed_; -} - -void TRawBatchRequest::MarkExecuted() -{ - Executed_ = true; -} - -template <typename TResponseParser> -typename TResponseParser::TFutureResult TRawBatchRequest::AddRequest( - const TString& command, - TNode parameters, - TMaybe<TNode> input) -{ - return AddRequest(command, parameters, input, MakeIntrusive<TResponseParser>()); -} - -template <typename TResponseParser> -typename TResponseParser::TFutureResult TRawBatchRequest::AddRequest( - const TString& command, - TNode parameters, - TMaybe<TNode> input, - ::TIntrusivePtr<TResponseParser> parser) -{ - Y_ENSURE(!Executed_, "Cannot add request: batch request is already executed"); - TNode request; - request["command"] = command; - request["parameters"] = std::move(parameters); - if (input) { - request["input"] = std::move(*input); - } - BatchItemList_.emplace_back(std::move(request), parser); - return parser->GetFuture(); -} - -void TRawBatchRequest::AddRequest(TBatchItem batchItem) -{ - Y_ENSURE(!Executed_, "Cannot add request: batch request is already executed"); - BatchItemList_.push_back(batchItem); -} - -TFuture<TNodeId> TRawBatchRequest::Create( - const TTransactionId& transaction, - const TYPath& path, - ENodeType type, - const TCreateOptions& options) -{ - return AddRequest<TGuidResponseParser>( - "create", - SerializeParamsForCreate(transaction, Config_->Prefix, path, type, options), - Nothing()); -} - -TFuture<void> TRawBatchRequest::Remove( - const TTransactionId& transaction, - const TYPath& path, - const TRemoveOptions& options) -{ - return AddRequest<TVoidResponseParser>( - "remove", - SerializeParamsForRemove(transaction, Config_->Prefix, path, options), - Nothing()); -} - -TFuture<bool> TRawBatchRequest::Exists( - const TTransactionId& transaction, - const TYPath& path, - const TExistsOptions& options) -{ - return AddRequest<TExistsResponseParser>( - "exists", - SerializeParamsForExists(transaction, Config_->Prefix, path, options), - Nothing()); -} - -TFuture<TNode> TRawBatchRequest::Get( - const TTransactionId& transaction, - const TYPath& path, - const TGetOptions& options) -{ - return AddRequest<TGetResponseParser>( - "get", - SerializeParamsForGet(transaction, Config_->Prefix, path, options), - Nothing()); -} - -TFuture<void> TRawBatchRequest::Set( - const TTransactionId& transaction, - const TYPath& path, - const TNode& node, - const TSetOptions& options) -{ - return AddRequest<TVoidResponseParser>( - "set", - SerializeParamsForSet(transaction, Config_->Prefix, path, options), - node); -} - -TFuture<TNode::TListType> TRawBatchRequest::List( - const TTransactionId& transaction, - const TYPath& path, - const TListOptions& options) -{ - return AddRequest<TListResponseParser>( - "list", - SerializeParamsForList(transaction, Config_->Prefix, path, options), - Nothing()); -} - -TFuture<TNodeId> TRawBatchRequest::Copy( - const TTransactionId& transaction, - const TYPath& sourcePath, - const TYPath& destinationPath, - const TCopyOptions& options) -{ - return AddRequest<TGuidResponseParser>( - "copy", - SerializeParamsForCopy(transaction, Config_->Prefix, sourcePath, destinationPath, options), - Nothing()); -} - -TFuture<TNodeId> TRawBatchRequest::Move( - const TTransactionId& transaction, - const TYPath& sourcePath, - const TYPath& destinationPath, - const TMoveOptions& options) -{ - return AddRequest<TGuidResponseParser>( - "move", - SerializeParamsForMove(transaction, Config_->Prefix, sourcePath, destinationPath, options), - Nothing()); -} - -TFuture<TNodeId> TRawBatchRequest::Link( - const TTransactionId& transaction, - const TYPath& targetPath, - const TYPath& linkPath, - const TLinkOptions& options) -{ - return AddRequest<TGuidResponseParser>( - "link", - SerializeParamsForLink(transaction, Config_->Prefix, targetPath, linkPath, options), - Nothing()); -} - -TFuture<TLockId> TRawBatchRequest::Lock( - const TTransactionId& transaction, - const TYPath& path, - ELockMode mode, - const TLockOptions& options) -{ - return AddRequest<TGuidResponseParser>( - "lock", - SerializeParamsForLock(transaction, Config_->Prefix, path, mode, options), - Nothing()); -} - -TFuture<void> TRawBatchRequest::Unlock( - const TTransactionId& transaction, - const TYPath& path, - const TUnlockOptions& options) -{ - return AddRequest<TVoidResponseParser>( - "unlock", - SerializeParamsForUnlock(transaction, Config_->Prefix, path, options), - Nothing()); -} - -TFuture<TMaybe<TYPath>> TRawBatchRequest::GetFileFromCache( - const TTransactionId& transactionId, - const TString& md5Signature, - const TYPath& cachePath, - const TGetFileFromCacheOptions& options) -{ - return AddRequest<TGetFileFromCacheParser>( - "get_file_from_cache", - SerializeParamsForGetFileFromCache(transactionId, md5Signature, cachePath, options), - Nothing()); -} - -TFuture<TYPath> TRawBatchRequest::PutFileToCache( - const TTransactionId& transactionId, - const TYPath& filePath, - const TString& md5Signature, - const TYPath& cachePath, - const TPutFileToCacheOptions& options) -{ - return AddRequest<TYPathParser>( - "put_file_to_cache", - SerializeParamsForPutFileToCache(transactionId, Config_->Prefix, filePath, md5Signature, cachePath, options), - Nothing()); -} - -TFuture<TCheckPermissionResponse> TRawBatchRequest::CheckPermission( - const TString& user, - EPermission permission, - const TYPath& path, - const TCheckPermissionOptions& options) -{ - return AddRequest<TCheckPermissionParser>( - "check_permission", - SerializeParamsForCheckPermission(user, permission, Config_->Prefix, path, options), - Nothing()); -} - -TFuture<TOperationAttributes> TRawBatchRequest::GetOperation( - const TOperationId& operationId, - const TGetOperationOptions& options) -{ - return AddRequest<TGetOperationResponseParser>( - "get_operation", - SerializeParamsForGetOperation(operationId, options), - Nothing()); -} - -TFuture<void> TRawBatchRequest::AbortOperation(const TOperationId& operationId) -{ - return AddRequest<TVoidResponseParser>( - "abort_op", - SerializeParamsForAbortOperation(operationId), - Nothing()); -} - -TFuture<void> TRawBatchRequest::CompleteOperation(const TOperationId& operationId) -{ - return AddRequest<TVoidResponseParser>( - "complete_op", - SerializeParamsForCompleteOperation(operationId), - Nothing()); -} -TFuture<void> TRawBatchRequest::SuspendOperation( - const TOperationId& operationId, - const TSuspendOperationOptions& options) -{ - return AddRequest<TVoidResponseParser>( - "suspend_operation", - SerializeParamsForSuspendOperation(operationId, options), - Nothing()); -} -TFuture<void> TRawBatchRequest::ResumeOperation( - const TOperationId& operationId, - const TResumeOperationOptions& options) -{ - return AddRequest<TVoidResponseParser>( - "resume_operation", - SerializeParamsForResumeOperation(operationId, options), - Nothing()); -} - -TFuture<void> TRawBatchRequest::UpdateOperationParameters( - const TOperationId& operationId, - const TUpdateOperationParametersOptions& options) -{ - return AddRequest<TVoidResponseParser>( - "update_op_parameters", - SerializeParamsForUpdateOperationParameters(operationId, options), - Nothing()); -} - -TFuture<TRichYPath> TRawBatchRequest::CanonizeYPath(const TRichYPath& path) -{ - if (path.Path_.find_first_of("<>{}[]") != TString::npos) { - return AddRequest<TCanonizeYPathResponseParser>( - "parse_ypath", - SerializeParamsForParseYPath(path), - Nothing(), - MakeIntrusive<TCanonizeYPathResponseParser>(Config_->Prefix, path)); - } else { - TRichYPath result = path; - result.Path_ = AddPathPrefix(result.Path_, Config_->Prefix); - return NThreading::MakeFuture(result); - } -} - -TFuture<TVector<TTableColumnarStatistics>> TRawBatchRequest::GetTableColumnarStatistics( - const TTransactionId& transaction, - const TVector<TRichYPath>& paths, - const TGetTableColumnarStatisticsOptions& options) -{ - return AddRequest<TTableColumnarStatisticsParser>( - "get_table_columnar_statistics", - SerializeParamsForGetTableColumnarStatistics(transaction, paths, options), - Nothing()); -} - -TFuture<TMultiTablePartitions> TRawBatchRequest::GetTablePartitions( - const TTransactionId& transaction, - const TVector<TRichYPath>& paths, - const TGetTablePartitionsOptions& options) -{ - return AddRequest<TTablePartitionsParser>( - "partition_tables", - SerializeParamsForGetTablePartitions(transaction, paths, options), - Nothing()); -} - -void TRawBatchRequest::FillParameterList(size_t maxSize, TNode* result, TInstant* nextTry) const -{ - Y_VERIFY(result); - Y_VERIFY(nextTry); - - *nextTry = TInstant(); - maxSize = Min(maxSize, BatchItemList_.size()); - *result = TNode::CreateList(); - for (size_t i = 0; i < maxSize; ++i) { - YT_LOG_DEBUG("ExecuteBatch preparing: %v", - RequestInfo(BatchItemList_[i].Parameters)); - - result->Add(BatchItemList_[i].Parameters); - if (BatchItemList_[i].NextTry > *nextTry) { - *nextTry = BatchItemList_[i].NextTry; - } - } -} - -void TRawBatchRequest::ParseResponse( - const TResponseInfo& requestResult, - const IRequestRetryPolicyPtr& retryPolicy, - TRawBatchRequest* retryBatch, - TInstant now) -{ - TNode node = NodeFromYsonString(requestResult.Response); - return ParseResponse(node, requestResult.RequestId, retryPolicy, retryBatch, now); -} - -void TRawBatchRequest::ParseResponse( - TNode node, - const TString& requestId, - const IRequestRetryPolicyPtr& retryPolicy, - TRawBatchRequest* retryBatch, - TInstant now) -{ - Y_VERIFY(retryBatch); - - EnsureType(node, TNode::List); - auto& responseList = node.AsList(); - const auto size = responseList.size(); - Y_ENSURE(size <= BatchItemList_.size(), - "Size of server response exceeds size of batch request;" - " size of batch: " << BatchItemList_.size() << - " size of server response: " << size << '.'); - - for (size_t i = 0; i != size; ++i) { - try { - EnsureType(responseList[i], TNode::Map); - auto& responseNode = responseList[i].AsMap(); - const auto outputIt = responseNode.find("output"); - if (outputIt != responseNode.end()) { - BatchItemList_[i].ResponseParser->SetResponse(std::move(outputIt->second)); - } else { - const auto errorIt = responseNode.find("error"); - if (errorIt == responseNode.end()) { - BatchItemList_[i].ResponseParser->SetResponse(Nothing()); - } else { - TErrorResponse error(400, requestId); - error.SetError(TYtError(errorIt->second)); - if (auto curInterval = IsRetriable(error) ? retryPolicy->OnRetriableError(error) : Nothing()) { - YT_LOG_INFO( - "Batch subrequest (%s) failed, will retry, error: %s", - RequestInfo(BatchItemList_[i].Parameters), - error.what()); - retryBatch->AddRequest(TBatchItem(BatchItemList_[i], now + *curInterval)); - } else { - YT_LOG_ERROR( - "Batch subrequest (%s) failed, error: %s", - RequestInfo(BatchItemList_[i].Parameters), - error.what()); - BatchItemList_[i].ResponseParser->SetException(std::make_exception_ptr(error)); - } - } - } - } catch (const std::exception& e) { - // We don't expect other exceptions, so we don't catch (...) - BatchItemList_[i].ResponseParser->SetException(std::current_exception()); - } - } - BatchItemList_.erase(BatchItemList_.begin(), BatchItemList_.begin() + size); -} - -void TRawBatchRequest::SetErrorResult(std::exception_ptr e) const -{ - for (const auto& batchItem : BatchItemList_) { - batchItem.ResponseParser->SetException(e); - } -} - -size_t TRawBatchRequest::BatchSize() const -{ - return BatchItemList_.size(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail::NRawClient diff --git a/yt/cpp/mapreduce/raw_client/raw_batch_request.h b/yt/cpp/mapreduce/raw_client/raw_batch_request.h deleted file mode 100644 index 7ed5bebf5e..0000000000 --- a/yt/cpp/mapreduce/raw_client/raw_batch_request.h +++ /dev/null @@ -1,190 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/common/fwd.h> - -#include <yt/cpp/mapreduce/interface/batch_request.h> -#include <yt/cpp/mapreduce/interface/fwd.h> -#include <yt/cpp/mapreduce/interface/node.h> -#include <yt/cpp/mapreduce/interface/retry_policy.h> - -#include <yt/cpp/mapreduce/http/requests.h> - -#include <library/cpp/threading/future/future.h> - -#include <util/generic/ptr.h> -#include <util/generic/deque.h> - -#include <exception> - -namespace NYT::NDetail { - struct TResponseInfo; -} - -namespace NYT::NDetail::NRawClient { - -//////////////////////////////////////////////////////////////////////////////// - -class TRawBatchRequest - : public TThrRefBase -{ -public: - struct IResponseItemParser - : public TThrRefBase - { - ~IResponseItemParser() = default; - - virtual void SetResponse(TMaybe<TNode> node) = 0; - virtual void SetException(std::exception_ptr e) = 0; - }; - -public: - TRawBatchRequest(const TConfigPtr& config); - ~TRawBatchRequest(); - - bool IsExecuted() const; - void MarkExecuted(); - - void FillParameterList(size_t maxSize, TNode* result, TInstant* nextTry) const; - - size_t BatchSize() const; - - void ParseResponse( - const TResponseInfo& requestResult, - const IRequestRetryPolicyPtr& retryPolicy, - TRawBatchRequest* retryBatch, - TInstant now = TInstant::Now()); - void ParseResponse( - TNode response, - const TString& requestId, - const IRequestRetryPolicyPtr& retryPolicy, - TRawBatchRequest* retryBatch, - TInstant now = TInstant::Now()); - void SetErrorResult(std::exception_ptr e) const; - - ::NThreading::TFuture<TNodeId> Create( - const TTransactionId& transaction, - const TYPath& path, - ENodeType type, - const TCreateOptions& options); - ::NThreading::TFuture<void> Remove( - const TTransactionId& transaction, - const TYPath& path, - const TRemoveOptions& options); - ::NThreading::TFuture<bool> Exists( - const TTransactionId& transaction, - const TYPath& path, - const TExistsOptions& options); - ::NThreading::TFuture<TNode> Get( - const TTransactionId& transaction, - const TYPath& path, - const TGetOptions& options); - ::NThreading::TFuture<void> Set( - const TTransactionId& transaction, - const TYPath& path, - const TNode& value, - const TSetOptions& options); - ::NThreading::TFuture<TNode::TListType> List( - const TTransactionId& transaction, - const TYPath& path, - const TListOptions& options); - ::NThreading::TFuture<TNodeId> Copy( - const TTransactionId& transaction, - const TYPath& sourcePath, - const TYPath& destinationPath, - const TCopyOptions& options); - ::NThreading::TFuture<TNodeId> Move( - const TTransactionId& transaction, - const TYPath& sourcePath, - const TYPath& destinationPath, - const TMoveOptions& options); - ::NThreading::TFuture<TNodeId> Link( - const TTransactionId& transaction, - const TYPath& targetPath, - const TYPath& linkPath, - const TLinkOptions& options); - ::NThreading::TFuture<TLockId> Lock( - const TTransactionId& transaction, - const TYPath& path, - ELockMode mode, - const TLockOptions& options); - ::NThreading::TFuture<void> Unlock( - const TTransactionId& transaction, - const TYPath& path, - const TUnlockOptions& options); - ::NThreading::TFuture<TMaybe<TYPath>> GetFileFromCache( - const TTransactionId& transactionId, - const TString& md5Signature, - const TYPath& cachePath, - const TGetFileFromCacheOptions& options); - ::NThreading::TFuture<TYPath> PutFileToCache( - const TTransactionId& transactionId, - const TYPath& filePath, - const TString& md5Signature, - const TYPath& cachePath, - const TPutFileToCacheOptions& options); - ::NThreading::TFuture<TCheckPermissionResponse> CheckPermission( - const TString& user, - EPermission permission, - const TYPath& path, - const TCheckPermissionOptions& options); - ::NThreading::TFuture<TOperationAttributes> GetOperation( - const TOperationId& operationId, - const TGetOperationOptions& options); - ::NThreading::TFuture<void> AbortOperation(const TOperationId& operationId); - ::NThreading::TFuture<void> CompleteOperation(const TOperationId& operationId); - ::NThreading::TFuture<void> SuspendOperation( - const TOperationId& operationId, - const TSuspendOperationOptions& options); - ::NThreading::TFuture<void> ResumeOperation( - const TOperationId& operationId, - const TResumeOperationOptions& options); - ::NThreading::TFuture<void> UpdateOperationParameters( - const TOperationId& operationId, - const TUpdateOperationParametersOptions& options); - ::NThreading::TFuture<TRichYPath> CanonizeYPath(const TRichYPath& path); - ::NThreading::TFuture<TVector<TTableColumnarStatistics>> GetTableColumnarStatistics( - const TTransactionId& transaction, - const TVector<TRichYPath>& paths, - const TGetTableColumnarStatisticsOptions& options); - ::NThreading::TFuture<TMultiTablePartitions> GetTablePartitions( - const TTransactionId& transaction, - const TVector<TRichYPath>& paths, - const TGetTablePartitionsOptions& options); - -private: - struct TBatchItem { - TNode Parameters; - ::TIntrusivePtr<IResponseItemParser> ResponseParser; - TInstant NextTry; - - TBatchItem(TNode parameters, ::TIntrusivePtr<IResponseItemParser> responseParser); - - TBatchItem(const TBatchItem& batchItem, TInstant nextTry); - }; - -private: - template <typename TResponseParser> - typename TResponseParser::TFutureResult AddRequest( - const TString& command, - TNode parameters, - TMaybe<TNode> input); - - template <typename TResponseParser> - typename TResponseParser::TFutureResult AddRequest( - const TString& command, - TNode parameters, - TMaybe<TNode> input, - ::TIntrusivePtr<TResponseParser> parser); - - void AddRequest(TBatchItem batchItem); - -private: - TConfigPtr Config_; - - TDeque<TBatchItem> BatchItemList_; - bool Executed_ = false; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail::NRawClient diff --git a/yt/cpp/mapreduce/raw_client/raw_requests.cpp b/yt/cpp/mapreduce/raw_client/raw_requests.cpp deleted file mode 100644 index 26120759fd..0000000000 --- a/yt/cpp/mapreduce/raw_client/raw_requests.cpp +++ /dev/null @@ -1,1027 +0,0 @@ -#include "raw_requests.h" - -#include "raw_batch_request.h" -#include "rpc_parameters_serialization.h" - -#include <yt/cpp/mapreduce/common/helpers.h> -#include <yt/cpp/mapreduce/common/retry_lib.h> -#include <yt/cpp/mapreduce/common/wait_proxy.h> - -#include <yt/cpp/mapreduce/http/fwd.h> -#include <yt/cpp/mapreduce/http/context.h> -#include <yt/cpp/mapreduce/http/helpers.h> -#include <yt/cpp/mapreduce/http/http_client.h> -#include <yt/cpp/mapreduce/http/retry_request.h> - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/client.h> -#include <yt/cpp/mapreduce/interface/operation.h> -#include <yt/cpp/mapreduce/interface/serialize.h> -#include <yt/cpp/mapreduce/interface/tvm.h> - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <library/cpp/yson/node/node_io.h> - -#include <util/generic/guid.h> -#include <util/generic/scope.h> - -namespace NYT::NDetail::NRawClient { - -/////////////////////////////////////////////////////////////////////////////// - -void ExecuteBatch( - IRequestRetryPolicyPtr retryPolicy, - const TClientContext& context, - TRawBatchRequest& batchRequest, - const TExecuteBatchOptions& options) -{ - if (batchRequest.IsExecuted()) { - ythrow yexception() << "Cannot execute batch request since it is already executed"; - } - Y_DEFER { - batchRequest.MarkExecuted(); - }; - - const auto concurrency = options.Concurrency_.GetOrElse(50); - const auto batchPartMaxSize = options.BatchPartMaxSize_.GetOrElse(concurrency * 5); - - if (!retryPolicy) { - retryPolicy = CreateDefaultRequestRetryPolicy(context.Config); - } - - while (batchRequest.BatchSize()) { - TRawBatchRequest retryBatch(context.Config); - - while (batchRequest.BatchSize()) { - auto parameters = TNode::CreateMap(); - TInstant nextTry; - batchRequest.FillParameterList(batchPartMaxSize, ¶meters["requests"], &nextTry); - if (nextTry) { - SleepUntil(nextTry); - } - parameters["concurrency"] = concurrency; - auto body = NodeToYsonString(parameters); - THttpHeader header("POST", "execute_batch"); - header.AddMutationId(); - NDetail::TResponseInfo result; - try { - result = RetryRequestWithPolicy(retryPolicy, context, header, body); - } catch (const std::exception& e) { - batchRequest.SetErrorResult(std::current_exception()); - retryBatch.SetErrorResult(std::current_exception()); - throw; - } - batchRequest.ParseResponse(std::move(result), retryPolicy.Get(), &retryBatch); - } - - batchRequest = std::move(retryBatch); - } -} - -TNode Get( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TGetOptions& options) -{ - THttpHeader header("GET", "get"); - header.MergeParameters(SerializeParamsForGet(transactionId, context.Config->Prefix, path, options)); - return NodeFromYsonString(RetryRequestWithPolicy(retryPolicy, context, header).Response); -} - -TNode TryGet( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TGetOptions& options) -{ - try { - return Get(retryPolicy, context, transactionId, path, options); - } catch (const TErrorResponse& error) { - if (!error.IsResolveError()) { - throw; - } - return TNode(); - } -} - -void Set( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TNode& value, - const TSetOptions& options) -{ - THttpHeader header("PUT", "set"); - header.AddMutationId(); - header.MergeParameters(SerializeParamsForSet(transactionId, context.Config->Prefix, path, options)); - auto body = NodeToYsonString(value); - RetryRequestWithPolicy(retryPolicy, context, header, body); -} - -void MultisetAttributes( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TNode::TMapType& value, - const TMultisetAttributesOptions& options) -{ - THttpHeader header("PUT", "api/v4/multiset_attributes", false); - header.AddMutationId(); - header.MergeParameters(SerializeParamsForMultisetAttributes(transactionId, context.Config->Prefix, path, options)); - - auto body = NodeToYsonString(value); - RetryRequestWithPolicy(retryPolicy, context, header, body); -} - -bool Exists( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TExistsOptions& options) -{ - THttpHeader header("GET", "exists"); - header.MergeParameters(SerializeParamsForExists(transactionId, context.Config->Prefix, path, options)); - return ParseBoolFromResponse(RetryRequestWithPolicy(retryPolicy, context, header).Response); -} - -TNodeId Create( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const ENodeType& type, - const TCreateOptions& options) -{ - THttpHeader header("POST", "create"); - header.AddMutationId(); - header.MergeParameters(SerializeParamsForCreate(transactionId, context.Config->Prefix, path, type, options)); - return ParseGuidFromResponse(RetryRequestWithPolicy(retryPolicy, context, header).Response); -} - -TNodeId Copy( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& sourcePath, - const TYPath& destinationPath, - const TCopyOptions& options) -{ - THttpHeader header("POST", "copy"); - header.AddMutationId(); - header.MergeParameters(SerializeParamsForCopy(transactionId, context.Config->Prefix, sourcePath, destinationPath, options)); - return ParseGuidFromResponse(RetryRequestWithPolicy(retryPolicy, context, header).Response); -} - -TNodeId Move( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& sourcePath, - const TYPath& destinationPath, - const TMoveOptions& options) -{ - THttpHeader header("POST", "move"); - header.AddMutationId(); - header.MergeParameters(NRawClient::SerializeParamsForMove(transactionId, context.Config->Prefix, sourcePath, destinationPath, options)); - return ParseGuidFromResponse(RetryRequestWithPolicy(retryPolicy, context, header).Response); -} - -void Remove( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TRemoveOptions& options) -{ - THttpHeader header("POST", "remove"); - header.AddMutationId(); - header.MergeParameters(SerializeParamsForRemove(transactionId, context.Config->Prefix, path, options)); - RetryRequestWithPolicy(retryPolicy, context, header); -} - -TNode::TListType List( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TListOptions& options) -{ - THttpHeader header("GET", "list"); - - TYPath updatedPath = AddPathPrefix(path, context.Config->Prefix); - // Translate "//" to "/" - // Translate "//some/constom/prefix/from/config/" to "//some/constom/prefix/from/config" - if (path.empty() && updatedPath.EndsWith('/')) { - updatedPath.pop_back(); - } - header.MergeParameters(SerializeParamsForList(transactionId, context.Config->Prefix, updatedPath, options)); - auto result = RetryRequestWithPolicy(retryPolicy, context, header); - return NodeFromYsonString(result.Response).AsList(); -} - -TNodeId Link( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& targetPath, - const TYPath& linkPath, - const TLinkOptions& options) -{ - THttpHeader header("POST", "link"); - header.AddMutationId(); - header.MergeParameters(SerializeParamsForLink(transactionId, context.Config->Prefix, targetPath, linkPath, options)); - return ParseGuidFromResponse(RetryRequestWithPolicy(retryPolicy, context, header).Response); -} - -TLockId Lock( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - ELockMode mode, - const TLockOptions& options) -{ - THttpHeader header("POST", "lock"); - header.AddMutationId(); - header.MergeParameters(SerializeParamsForLock(transactionId, context.Config->Prefix, path, mode, options)); - return ParseGuidFromResponse(RetryRequestWithPolicy(retryPolicy, context, header).Response); -} - -void Unlock( - IRequestRetryPolicyPtr retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TUnlockOptions& options) -{ - THttpHeader header("POST", "unlock"); - header.AddMutationId(); - header.MergeParameters(SerializeParamsForUnlock(transactionId, context.Config->Prefix, path, options)); - RetryRequestWithPolicy(retryPolicy, context, header); -} - -void Concatenate( - const TClientContext& context, - const TTransactionId& transactionId, - const TVector<TRichYPath>& sourcePaths, - const TRichYPath& destinationPath, - const TConcatenateOptions& options) -{ - THttpHeader header("POST", "concatenate"); - header.AddMutationId(); - header.MergeParameters(SerializeParamsForConcatenate(transactionId, context.Config->Prefix, sourcePaths, destinationPath, options)); - RequestWithoutRetry(context, header); -} - -void PingTx( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId) -{ - THttpHeader header("POST", "ping_tx"); - header.MergeParameters(SerializeParamsForPingTx(transactionId)); - TRequestConfig requestConfig; - requestConfig.HttpConfig = NHttpClient::THttpConfig{ - .SocketTimeout = context.Config->PingTimeout - }; - RetryRequestWithPolicy(retryPolicy, context, header, {}, requestConfig); -} - -TOperationAttributes ParseOperationAttributes(const TNode& node) -{ - const auto& mapNode = node.AsMap(); - TOperationAttributes result; - - if (auto idNode = mapNode.FindPtr("id")) { - result.Id = GetGuid(idNode->AsString()); - } - - if (auto typeNode = mapNode.FindPtr("type")) { - result.Type = FromString<EOperationType>(typeNode->AsString()); - } else if (auto operationTypeNode = mapNode.FindPtr("operation_type")) { - // COMPAT(levysotsky): "operation_type" is a deprecated synonim for "type". - // This branch should be removed when all clusters are updated. - result.Type = FromString<EOperationType>(operationTypeNode->AsString()); - } - - if (auto stateNode = mapNode.FindPtr("state")) { - result.State = stateNode->AsString(); - // We don't use FromString here, because OS_IN_PROGRESS unites many states: "initializing", "running", etc. - if (*result.State == "completed") { - result.BriefState = EOperationBriefState::Completed; - } else if (*result.State == "aborted") { - result.BriefState = EOperationBriefState::Aborted; - } else if (*result.State == "failed") { - result.BriefState = EOperationBriefState::Failed; - } else { - result.BriefState = EOperationBriefState::InProgress; - } - } - if (auto authenticatedUserNode = mapNode.FindPtr("authenticated_user")) { - result.AuthenticatedUser = authenticatedUserNode->AsString(); - } - if (auto startTimeNode = mapNode.FindPtr("start_time")) { - result.StartTime = TInstant::ParseIso8601(startTimeNode->AsString()); - } - if (auto finishTimeNode = mapNode.FindPtr("finish_time")) { - result.FinishTime = TInstant::ParseIso8601(finishTimeNode->AsString()); - } - auto briefProgressNode = mapNode.FindPtr("brief_progress"); - if (briefProgressNode && briefProgressNode->HasKey("jobs")) { - result.BriefProgress.ConstructInPlace(); - static auto load = [] (const TNode& item) { - // Backward compatibility with old YT versions - return item.IsInt64() ? item.AsInt64() : item["total"].AsInt64(); - }; - const auto& jobs = (*briefProgressNode)["jobs"]; - result.BriefProgress->Aborted = load(jobs["aborted"]); - result.BriefProgress->Completed = load(jobs["completed"]); - result.BriefProgress->Running = jobs["running"].AsInt64(); - result.BriefProgress->Total = jobs["total"].AsInt64(); - result.BriefProgress->Failed = jobs["failed"].AsInt64(); - result.BriefProgress->Lost = jobs["lost"].AsInt64(); - result.BriefProgress->Pending = jobs["pending"].AsInt64(); - } - if (auto briefSpecNode = mapNode.FindPtr("brief_spec")) { - result.BriefSpec = *briefSpecNode; - } - if (auto specNode = mapNode.FindPtr("spec")) { - result.Spec = *specNode; - } - if (auto fullSpecNode = mapNode.FindPtr("full_spec")) { - result.FullSpec = *fullSpecNode; - } - if (auto unrecognizedSpecNode = mapNode.FindPtr("unrecognized_spec")) { - result.UnrecognizedSpec = *unrecognizedSpecNode; - } - if (auto suspendedNode = mapNode.FindPtr("suspended")) { - result.Suspended = suspendedNode->AsBool(); - } - if (auto resultNode = mapNode.FindPtr("result")) { - result.Result.ConstructInPlace(); - auto error = TYtError((*resultNode)["error"]); - if (error.GetCode() != 0) { - result.Result->Error = std::move(error); - } - } - if (auto progressNode = mapNode.FindPtr("progress")) { - const auto& progressMap = progressNode->AsMap(); - TMaybe<TInstant> buildTime; - if (auto buildTimeNode = progressMap.FindPtr("build_time")) { - buildTime = TInstant::ParseIso8601(buildTimeNode->AsString()); - } - TJobStatistics jobStatistics; - if (auto jobStatisticsNode = progressMap.FindPtr("job_statistics")) { - jobStatistics = TJobStatistics(*jobStatisticsNode); - } - TJobCounters jobCounters; - if (auto jobCountersNode = progressMap.FindPtr("total_job_counter")) { - jobCounters = TJobCounters(*jobCountersNode); - } - result.Progress = TOperationProgress{ - .JobStatistics = std::move(jobStatistics), - .JobCounters = std::move(jobCounters), - .BuildTime = buildTime, - }; - } - if (auto eventsNode = mapNode.FindPtr("events")) { - result.Events.ConstructInPlace().reserve(eventsNode->Size()); - for (const auto& eventNode : eventsNode->AsList()) { - result.Events->push_back(TOperationEvent{ - eventNode["state"].AsString(), - TInstant::ParseIso8601(eventNode["time"].AsString()), - }); - } - } - if (auto alertsNode = mapNode.FindPtr("alerts")) { - result.Alerts.ConstructInPlace(); - for (const auto& [alertType, alertError] : alertsNode->AsMap()) { - result.Alerts->emplace(alertType, TYtError(alertError)); - } - } - - return result; -} - -TOperationAttributes GetOperation( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TGetOperationOptions& options) -{ - THttpHeader header("GET", "get_operation"); - header.MergeParameters(SerializeParamsForGetOperation(operationId, options)); - auto result = RetryRequestWithPolicy(retryPolicy, context, header); - return ParseOperationAttributes(NodeFromYsonString(result.Response)); -} - -void AbortOperation( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId) -{ - THttpHeader header("POST", "abort_op"); - header.AddMutationId(); - header.MergeParameters(SerializeParamsForAbortOperation(operationId)); - RetryRequestWithPolicy(retryPolicy, context, header); -} - -void CompleteOperation( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId) -{ - THttpHeader header("POST", "complete_op"); - header.AddMutationId(); - header.MergeParameters(SerializeParamsForCompleteOperation(operationId)); - RetryRequestWithPolicy(retryPolicy, context, header); -} - -void SuspendOperation( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TSuspendOperationOptions& options) -{ - THttpHeader header("POST", "suspend_op"); - header.AddMutationId(); - header.MergeParameters(SerializeParamsForSuspendOperation(operationId, options)); - RetryRequestWithPolicy(retryPolicy, context, header); -} - -void ResumeOperation( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TResumeOperationOptions& options) -{ - THttpHeader header("POST", "resume_op"); - header.AddMutationId(); - header.MergeParameters(SerializeParamsForResumeOperation(operationId, options)); - RetryRequestWithPolicy(retryPolicy, context, header); -} - -template <typename TKey> -static THashMap<TKey, i64> GetCounts(const TNode& countsNode) -{ - THashMap<TKey, i64> counts; - for (const auto& entry : countsNode.AsMap()) { - counts.emplace(FromString<TKey>(entry.first), entry.second.AsInt64()); - } - return counts; -} - -TListOperationsResult ListOperations( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TListOperationsOptions& options) -{ - THttpHeader header("GET", "list_operations"); - header.MergeParameters(SerializeParamsForListOperations(options)); - auto responseInfo = RetryRequestWithPolicy(retryPolicy, context, header); - auto resultNode = NodeFromYsonString(responseInfo.Response); - - TListOperationsResult result; - for (const auto& operationNode : resultNode["operations"].AsList()) { - result.Operations.push_back(ParseOperationAttributes(operationNode)); - } - - if (resultNode.HasKey("pool_counts")) { - result.PoolCounts = GetCounts<TString>(resultNode["pool_counts"]); - } - if (resultNode.HasKey("user_counts")) { - result.UserCounts = GetCounts<TString>(resultNode["user_counts"]); - } - if (resultNode.HasKey("type_counts")) { - result.TypeCounts = GetCounts<EOperationType>(resultNode["type_counts"]); - } - if (resultNode.HasKey("state_counts")) { - result.StateCounts = GetCounts<TString>(resultNode["state_counts"]); - } - if (resultNode.HasKey("failed_jobs_count")) { - result.WithFailedJobsCount = resultNode["failed_jobs_count"].AsInt64(); - } - - result.Incomplete = resultNode["incomplete"].AsBool(); - - return result; -} - -void UpdateOperationParameters( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TUpdateOperationParametersOptions& options) -{ - THttpHeader header("POST", "update_op_parameters"); - header.MergeParameters(SerializeParamsForUpdateOperationParameters(operationId, options)); - RetryRequestWithPolicy(retryPolicy, context, header); -} - -TJobAttributes ParseJobAttributes(const TNode& node) -{ - const auto& mapNode = node.AsMap(); - TJobAttributes result; - - // Currently "get_job" returns "job_id" field and "list_jobs" returns "id" field. - auto idNode = mapNode.FindPtr("id"); - if (!idNode) { - idNode = mapNode.FindPtr("job_id"); - } - if (idNode) { - result.Id = GetGuid(idNode->AsString()); - } - - if (auto typeNode = mapNode.FindPtr("type")) { - result.Type = FromString<EJobType>(typeNode->AsString()); - } - if (auto stateNode = mapNode.FindPtr("state")) { - result.State = FromString<EJobState>(stateNode->AsString()); - } - if (auto addressNode = mapNode.FindPtr("address")) { - result.Address = addressNode->AsString(); - } - if (auto taskNameNode = mapNode.FindPtr("task_name")) { - result.TaskName = taskNameNode->AsString(); - } - if (auto startTimeNode = mapNode.FindPtr("start_time")) { - result.StartTime = TInstant::ParseIso8601(startTimeNode->AsString()); - } - if (auto finishTimeNode = mapNode.FindPtr("finish_time")) { - result.FinishTime = TInstant::ParseIso8601(finishTimeNode->AsString()); - } - if (auto progressNode = mapNode.FindPtr("progress")) { - result.Progress = progressNode->AsDouble(); - } - if (auto stderrSizeNode = mapNode.FindPtr("stderr_size")) { - result.StderrSize = stderrSizeNode->AsUint64(); - } - if (auto errorNode = mapNode.FindPtr("error")) { - result.Error.ConstructInPlace(*errorNode); - } - if (auto briefStatisticsNode = mapNode.FindPtr("brief_statistics")) { - result.BriefStatistics = *briefStatisticsNode; - } - if (auto inputPathsNode = mapNode.FindPtr("input_paths")) { - const auto& inputPathNodesList = inputPathsNode->AsList(); - result.InputPaths.ConstructInPlace(); - result.InputPaths->reserve(inputPathNodesList.size()); - for (const auto& inputPathNode : inputPathNodesList) { - TRichYPath path; - Deserialize(path, inputPathNode); - result.InputPaths->push_back(std::move(path)); - } - } - if (auto coreInfosNode = mapNode.FindPtr("core_infos")) { - const auto& coreInfoNodesList = coreInfosNode->AsList(); - result.CoreInfos.ConstructInPlace(); - result.CoreInfos->reserve(coreInfoNodesList.size()); - for (const auto& coreInfoNode : coreInfoNodesList) { - TCoreInfo coreInfo; - coreInfo.ProcessId = coreInfoNode["process_id"].AsInt64(); - coreInfo.ExecutableName = coreInfoNode["executable_name"].AsString(); - if (coreInfoNode.HasKey("size")) { - coreInfo.Size = coreInfoNode["size"].AsUint64(); - } - if (coreInfoNode.HasKey("error")) { - coreInfo.Error.ConstructInPlace(coreInfoNode["error"]); - } - result.CoreInfos->push_back(std::move(coreInfo)); - } - } - return result; -} - -TJobAttributes GetJob( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobOptions& options) -{ - THttpHeader header("GET", "get_job"); - header.MergeParameters(SerializeParamsForGetJob(operationId, jobId, options)); - auto responseInfo = RetryRequestWithPolicy(retryPolicy, context, header); - auto resultNode = NodeFromYsonString(responseInfo.Response); - return ParseJobAttributes(resultNode); -} - -TListJobsResult ListJobs( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TListJobsOptions& options) -{ - THttpHeader header("GET", "list_jobs"); - header.MergeParameters(SerializeParamsForListJobs(operationId, options)); - auto responseInfo = RetryRequestWithPolicy(retryPolicy, context, header); - auto resultNode = NodeFromYsonString(responseInfo.Response); - - TListJobsResult result; - - const auto& jobNodesList = resultNode["jobs"].AsList(); - result.Jobs.reserve(jobNodesList.size()); - for (const auto& jobNode : jobNodesList) { - result.Jobs.push_back(ParseJobAttributes(jobNode)); - } - - if (resultNode.HasKey("cypress_job_count") && !resultNode["cypress_job_count"].IsNull()) { - result.CypressJobCount = resultNode["cypress_job_count"].AsInt64(); - } - if (resultNode.HasKey("controller_agent_job_count") && !resultNode["controller_agent_job_count"].IsNull()) { - result.ControllerAgentJobCount = resultNode["scheduler_job_count"].AsInt64(); - } - if (resultNode.HasKey("archive_job_count") && !resultNode["archive_job_count"].IsNull()) { - result.ArchiveJobCount = resultNode["archive_job_count"].AsInt64(); - } - - return result; -} - -class TResponseReader - : public IFileReader -{ -public: - TResponseReader(const TClientContext& context, THttpHeader header) - { - if (context.ServiceTicketAuth) { - header.SetServiceTicket(context.ServiceTicketAuth->Ptr->IssueServiceTicket()); - } else { - header.SetToken(context.Token); - } - - auto hostName = GetProxyForHeavyRequest(context); - auto requestId = CreateGuidAsString(); - - Response_ = context.HttpClient->Request(GetFullUrl(hostName, context, header), requestId, header); - ResponseStream_ = Response_->GetResponseStream(); - } - -private: - size_t DoRead(void* buf, size_t len) override - { - return ResponseStream_->Read(buf, len); - } - - size_t DoSkip(size_t len) override - { - return ResponseStream_->Skip(len); - } - -private: - THttpRequest Request_; - NHttpClient::IHttpResponsePtr Response_; - IInputStream* ResponseStream_; -}; - -IFileReaderPtr GetJobInput( - const TClientContext& context, - const TJobId& jobId, - const TGetJobInputOptions& /* options */) -{ - THttpHeader header("GET", "get_job_input"); - header.AddParameter("job_id", GetGuidAsString(jobId)); - return new TResponseReader(context, std::move(header)); -} - -IFileReaderPtr GetJobFailContext( - const TClientContext& context, - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobFailContextOptions& /* options */) -{ - THttpHeader header("GET", "get_job_fail_context"); - header.AddOperationId(operationId); - header.AddParameter("job_id", GetGuidAsString(jobId)); - return new TResponseReader(context, std::move(header)); -} - -TString GetJobStderrWithRetries( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobStderrOptions& /* options */) -{ - THttpHeader header("GET", "get_job_stderr"); - header.AddOperationId(operationId); - header.AddParameter("job_id", GetGuidAsString(jobId)); - TRequestConfig config; - config.IsHeavy = true; - auto responseInfo = RetryRequestWithPolicy(retryPolicy, context, header, {}, config); - return responseInfo.Response; -} - -IFileReaderPtr GetJobStderr( - const TClientContext& context, - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobStderrOptions& /* options */) -{ - THttpHeader header("GET", "get_job_stderr"); - header.AddOperationId(operationId); - header.AddParameter("job_id", GetGuidAsString(jobId)); - return new TResponseReader(context, std::move(header)); -} - -TMaybe<TYPath> GetFileFromCache( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TString& md5Signature, - const TYPath& cachePath, - const TGetFileFromCacheOptions& options) -{ - THttpHeader header("GET", "get_file_from_cache"); - header.MergeParameters(SerializeParamsForGetFileFromCache(transactionId, md5Signature, cachePath, options)); - auto responseInfo = RetryRequestWithPolicy(retryPolicy, context, header); - auto path = NodeFromYsonString(responseInfo.Response).AsString(); - return path.empty() ? Nothing() : TMaybe<TYPath>(path); -} - -TYPath PutFileToCache( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& filePath, - const TString& md5Signature, - const TYPath& cachePath, - const TPutFileToCacheOptions& options) -{ - THttpHeader header("POST", "put_file_to_cache"); - header.MergeParameters(SerializeParamsForPutFileToCache(transactionId, context.Config->Prefix, filePath, md5Signature, cachePath, options)); - auto result = RetryRequestWithPolicy(retryPolicy, context, header); - return NodeFromYsonString(result.Response).AsString(); -} - -TNode::TListType SkyShareTable( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const std::vector<TYPath>& tablePaths, - const TSkyShareTableOptions& options) -{ - THttpHeader header("POST", "api/v1/share", /*IsApi*/ false); - - auto proxyName = context.ServerName.substr(0, context.ServerName.find('.')); - - auto host = context.Config->SkynetApiHost; - if (host == "") { - host = "skynet." + proxyName + ".yt.yandex.net"; - } - - header.MergeParameters(SerializeParamsForSkyShareTable(proxyName, context.Config->Prefix, tablePaths, options)); - TClientContext skyApiHost({ .ServerName = host, .HttpClient = NHttpClient::CreateDefaultHttpClient() }); - TResponseInfo response = {}; - - // As documented at https://wiki.yandex-team.ru/yt/userdoc/blob_tables/#shag3.sozdajomrazdachu - // first request returns HTTP status code 202 (Accepted). And we need retrying until we have 200 (OK). - while (response.HttpCode != 200) { - response = RetryRequestWithPolicy(retryPolicy, skyApiHost, header, ""); - TWaitProxy::Get()->Sleep(TDuration::Seconds(5)); - } - - if (options.KeyColumns_) { - return NodeFromJsonString(response.Response)["torrents"].AsList(); - } else { - TNode torrent; - - torrent["key"] = TNode::CreateList(); - torrent["rbtorrent"] = response.Response; - - return TNode::TListType{ torrent }; - } -} - -TCheckPermissionResponse ParseCheckPermissionResponse(const TNode& node) -{ - auto parseSingleResult = [] (const TNode::TMapType& node) { - TCheckPermissionResult result; - result.Action = ::FromString<ESecurityAction>(node.at("action").AsString()); - if (auto objectId = node.FindPtr("object_id")) { - result.ObjectId = GetGuid(objectId->AsString()); - } - if (auto objectName = node.FindPtr("object_name")) { - result.ObjectName = objectName->AsString(); - } - if (auto subjectId = node.FindPtr("subject_id")) { - result.SubjectId = GetGuid(subjectId->AsString()); - } - if (auto subjectName = node.FindPtr("subject_name")) { - result.SubjectName = subjectName->AsString(); - } - return result; - }; - - const auto& mapNode = node.AsMap(); - TCheckPermissionResponse result; - static_cast<TCheckPermissionResult&>(result) = parseSingleResult(mapNode); - if (auto columns = mapNode.FindPtr("columns")) { - result.Columns.reserve(columns->AsList().size()); - for (const auto& columnNode : columns->AsList()) { - result.Columns.push_back(parseSingleResult(columnNode.AsMap())); - } - } - return result; -} - -TCheckPermissionResponse CheckPermission( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TString& user, - EPermission permission, - const TYPath& path, - const TCheckPermissionOptions& options) -{ - THttpHeader header("GET", "check_permission"); - header.MergeParameters(SerializeParamsForCheckPermission(user, permission, context.Config->Prefix, path, options)); - auto response = RetryRequestWithPolicy(retryPolicy, context, header); - return ParseCheckPermissionResponse(NodeFromYsonString(response.Response)); -} - -TVector<TTabletInfo> GetTabletInfos( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TYPath& path, - const TVector<int>& tabletIndexes, - const TGetTabletInfosOptions& options) -{ - THttpHeader header("POST", "api/v4/get_tablet_infos", false); - header.MergeParameters(SerializeParamsForGetTabletInfos(context.Config->Prefix, path, tabletIndexes, options)); - auto response = RetryRequestWithPolicy(retryPolicy, context, header); - TVector<TTabletInfo> result; - Deserialize(result, *NodeFromYsonString(response.Response).AsMap().FindPtr("tablets")); - return result; -} - -TVector<TTableColumnarStatistics> GetTableColumnarStatistics( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TVector<TRichYPath>& paths, - const TGetTableColumnarStatisticsOptions& options) -{ - THttpHeader header("GET", "get_table_columnar_statistics"); - header.MergeParameters(SerializeParamsForGetTableColumnarStatistics(transactionId, paths, options)); - TRequestConfig config; - config.IsHeavy = true; - auto requestResult = RetryRequestWithPolicy(retryPolicy, context, header, {}, config); - auto response = NodeFromYsonString(requestResult.Response); - TVector<TTableColumnarStatistics> result; - Deserialize(result, response); - return result; -} - -TMultiTablePartitions GetTablePartitions( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TVector<TRichYPath>& paths, - const TGetTablePartitionsOptions& options) -{ - THttpHeader header("GET", "partition_tables"); - header.MergeParameters(SerializeParamsForGetTablePartitions(transactionId, paths, options)); - TRequestConfig config; - config.IsHeavy = true; - auto requestResult = RetryRequestWithPolicy(retryPolicy, context, header, {}, config); - auto response = NodeFromYsonString(requestResult.Response); - TMultiTablePartitions result; - Deserialize(result, response); - return result; -} - -TRichYPath CanonizeYPath( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TRichYPath& path) -{ - return CanonizeYPaths(retryPolicy, context, {path}).front(); -} - -TVector<TRichYPath> CanonizeYPaths( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TVector<TRichYPath>& paths) -{ - TRawBatchRequest batch(context.Config); - TVector<NThreading::TFuture<TRichYPath>> futures; - futures.reserve(paths.size()); - for (int i = 0; i < static_cast<int>(paths.size()); ++i) { - futures.push_back(batch.CanonizeYPath(paths[i])); - } - ExecuteBatch(retryPolicy, context, batch, TExecuteBatchOptions{}); - TVector<TRichYPath> result; - result.reserve(futures.size()); - for (auto& future : futures) { - result.push_back(future.ExtractValueSync()); - } - return result; -} - -void AlterTable( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TAlterTableOptions& options) -{ - THttpHeader header("POST", "alter_table"); - header.AddMutationId(); - header.MergeParameters(SerializeParamsForAlterTable(transactionId, context.Config->Prefix, path, options)); - RetryRequestWithPolicy(retryPolicy, context, header); -} - -void AlterTableReplica( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TReplicaId& replicaId, - const TAlterTableReplicaOptions& options) -{ - THttpHeader header("POST", "alter_table_replica"); - header.AddMutationId(); - header.MergeParameters(NRawClient::SerializeParamsForAlterTableReplica(replicaId, options)); - RetryRequestWithPolicy(retryPolicy, context, header); -} - -void DeleteRows( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TYPath& path, - const TNode::TListType& keys, - const TDeleteRowsOptions& options) -{ - THttpHeader header("PUT", "delete_rows"); - header.SetInputFormat(TFormat::YsonBinary()); - header.MergeParameters(NRawClient::SerializeParametersForDeleteRows(context.Config->Prefix, path, options)); - - auto body = NodeListToYsonString(keys); - TRequestConfig requestConfig; - requestConfig.IsHeavy = true; - RetryRequestWithPolicy(retryPolicy, context, header, body, requestConfig); -} - -void FreezeTable( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TYPath& path, - const TFreezeTableOptions& options) -{ - THttpHeader header("POST", "freeze_table"); - header.MergeParameters(SerializeParamsForFreezeTable(context.Config->Prefix, path, options)); - RetryRequestWithPolicy(retryPolicy, context, header); -} - -void UnfreezeTable( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TYPath& path, - const TUnfreezeTableOptions& options) -{ - THttpHeader header("POST", "unfreeze_table"); - header.MergeParameters(SerializeParamsForUnfreezeTable(context.Config->Prefix, path, options)); - RetryRequestWithPolicy(retryPolicy, context, header); -} - -void AbortTransaction( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId) -{ - THttpHeader header("POST", "abort_tx"); - header.AddMutationId(); - header.MergeParameters(NRawClient::SerializeParamsForAbortTransaction(transactionId)); - RetryRequestWithPolicy(retryPolicy, context, header); -} - -void CommitTransaction( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId) -{ - THttpHeader header("POST", "commit_tx"); - header.AddMutationId(); - header.MergeParameters(NRawClient::SerializeParamsForCommitTransaction(transactionId)); - RetryRequestWithPolicy(retryPolicy, context, header); -} - -TTransactionId StartTransaction( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& parentTransactionId, - const TStartTransactionOptions& options) -{ - THttpHeader header("POST", "start_tx"); - header.AddMutationId(); - header.MergeParameters(NRawClient::SerializeParamsForStartTransaction(parentTransactionId, context.Config->TxTimeout, options)); - return ParseGuidFromResponse(RetryRequestWithPolicy(retryPolicy, context, header).Response); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail::NRawClient diff --git a/yt/cpp/mapreduce/raw_client/raw_requests.h b/yt/cpp/mapreduce/raw_client/raw_requests.h deleted file mode 100644 index 05fcbade76..0000000000 --- a/yt/cpp/mapreduce/raw_client/raw_requests.h +++ /dev/null @@ -1,397 +0,0 @@ -#pragma once - -#include "raw_batch_request.h" - -#include <yt/cpp/mapreduce/common/fwd.h> -#include <yt/cpp/mapreduce/http/context.h> -#include <yt/cpp/mapreduce/interface/client_method_options.h> -#include <yt/cpp/mapreduce/interface/operation.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class IRequestRetryPolicy; -struct TClientContext; -struct TExecuteBatchOptions; - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail::NRawClient { - -//////////////////////////////////////////////////////////////////////////////// - -TOperationAttributes ParseOperationAttributes(const TNode& node); - -TCheckPermissionResponse ParseCheckPermissionResponse(const TNode& node); - -//////////////////////////////////////////////////////////////////////////////// - -// -// marks `batchRequest' as executed -void ExecuteBatch( - IRequestRetryPolicyPtr retryPolicy, - const TClientContext& context, - TRawBatchRequest& batchRequest, - const TExecuteBatchOptions& options = TExecuteBatchOptions()); - -// -// Cypress -// - -TNode Get( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TGetOptions& options = TGetOptions()); - -TNode TryGet( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TGetOptions& options); - -void Set( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TNode& value, - const TSetOptions& options = TSetOptions()); - -void MultisetAttributes( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TNode::TMapType& value, - const TMultisetAttributesOptions& options = TMultisetAttributesOptions()); - -bool Exists( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TExistsOptions& options = TExistsOptions()); - -TNodeId Create( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const ENodeType& type, - const TCreateOptions& options = TCreateOptions()); - -TNodeId Copy( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& sourcePath, - const TYPath& destinationPath, - const TCopyOptions& options = TCopyOptions()); - -TNodeId Move( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& sourcePath, - const TYPath& destinationPath, - const TMoveOptions& options = TMoveOptions()); - -void Remove( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TRemoveOptions& options = TRemoveOptions()); - -TNode::TListType List( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TListOptions& options = TListOptions()); - -TNodeId Link( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& targetPath, - const TYPath& linkPath, - const TLinkOptions& options = TLinkOptions()); - -TLockId Lock( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - ELockMode mode, - const TLockOptions& options = TLockOptions()); - -void Unlock( - IRequestRetryPolicyPtr retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TUnlockOptions& options = TUnlockOptions()); - -void Concatenate( - const TClientContext& context, - const TTransactionId& transactionId, - const TVector<TRichYPath>& sourcePaths, - const TRichYPath& destinationPath, - const TConcatenateOptions& options = TConcatenateOptions()); - -// -// Transactions -// - -void PingTx( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId); - -// -// Operations -// - -TOperationAttributes GetOperation( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TGetOperationOptions& options = TGetOperationOptions()); - -void AbortOperation( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId); - -void CompleteOperation( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId); - -void SuspendOperation( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TSuspendOperationOptions& options = TSuspendOperationOptions()); - -void ResumeOperation( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TResumeOperationOptions& options = TResumeOperationOptions()); - -TListOperationsResult ListOperations( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TListOperationsOptions& options = TListOperationsOptions()); - -void UpdateOperationParameters( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TUpdateOperationParametersOptions& options = TUpdateOperationParametersOptions()); - -// -// Jobs -// - -TJobAttributes GetJob( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobOptions& options = TGetJobOptions()); - -TListJobsResult ListJobs( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TListJobsOptions& options = TListJobsOptions()); - -::TIntrusivePtr<IFileReader> GetJobInput( - const TClientContext& context, - const TJobId& jobId, - const TGetJobInputOptions& options = TGetJobInputOptions()); - -::TIntrusivePtr<IFileReader> GetJobFailContext( - const TClientContext& context, - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobFailContextOptions& options = TGetJobFailContextOptions()); - -TString GetJobStderrWithRetries( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobStderrOptions& /* options */ = TGetJobStderrOptions()); - -::TIntrusivePtr<IFileReader> GetJobStderr( - const TClientContext& context, - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobStderrOptions& options = TGetJobStderrOptions()); - -// -// File cache -// - -TMaybe<TYPath> GetFileFromCache( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TString& md5Signature, - const TYPath& cachePath, - const TGetFileFromCacheOptions& options = TGetFileFromCacheOptions()); - -TYPath PutFileToCache( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& filePath, - const TString& md5Signature, - const TYPath& cachePath, - const TPutFileToCacheOptions& options = TPutFileToCacheOptions()); - -// -// SkyShare -// - -TNode::TListType SkyShareTable( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const std::vector<TYPath>& tablePaths, - const TSkyShareTableOptions& options); - -// -// Misc -// - -TCheckPermissionResponse CheckPermission( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TString& user, - EPermission permission, - const TYPath& path, - const TCheckPermissionOptions& options = TCheckPermissionOptions()); - -TVector<TTabletInfo> GetTabletInfos( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TYPath& path, - const TVector<int>& tabletIndexes, - const TGetTabletInfosOptions& options); - -TVector<TTableColumnarStatistics> GetTableColumnarStatistics( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TVector<TRichYPath>& paths, - const TGetTableColumnarStatisticsOptions& options); - -TMultiTablePartitions GetTablePartitions( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TVector<TRichYPath>& paths, - const TGetTablePartitionsOptions& options); - -TRichYPath CanonizeYPath( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TRichYPath& path); - -TVector<TRichYPath> CanonizeYPaths( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TVector<TRichYPath>& paths); - -// -// Tables -// - -void AlterTable( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId, - const TYPath& path, - const TAlterTableOptions& options); - -void AlterTableReplica( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TReplicaId& replicaId, - const TAlterTableReplicaOptions& options); - -void DeleteRows( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TYPath& path, - const TNode::TListType& keys, - const TDeleteRowsOptions& options); - -void FreezeTable( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TYPath& path, - const TFreezeTableOptions& options); - -void UnfreezeTable( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TYPath& path, - const TUnfreezeTableOptions& options); - - -// Transactions -void AbortTransaction( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId); - -void CommitTransaction( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& transactionId); - -TTransactionId StartTransaction( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TTransactionId& parentId, - const TStartTransactionOptions& options); - -//////////////////////////////////////////////////////////////////////////////// - -template<typename TSrc, typename TBatchAdder> -auto BatchTransform( - const IRequestRetryPolicyPtr& retryPolicy, - const TClientContext& context, - const TSrc& src, - TBatchAdder batchAdder, - const TExecuteBatchOptions& executeBatchOptions = {}) -{ - TRawBatchRequest batch(context.Config); - using TFuture = decltype(batchAdder(batch, *std::begin(src))); - TVector<TFuture> futures; - for (const auto& el : src) { - futures.push_back(batchAdder(batch, el)); - } - ExecuteBatch(retryPolicy, context, batch, executeBatchOptions); - using TDst = decltype(futures[0].ExtractValueSync()); - TVector<TDst> result; - result.reserve(std::size(src)); - for (auto& future : futures) { - result.push_back(future.ExtractValueSync()); - } - return result; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail::NRawClient -} // namespace NYT diff --git a/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.cpp b/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.cpp deleted file mode 100644 index 1936266d0d..0000000000 --- a/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.cpp +++ /dev/null @@ -1,873 +0,0 @@ -#include "rpc_parameters_serialization.h" - -#include <yt/cpp/mapreduce/common/helpers.h> - -#include <yt/cpp/mapreduce/interface/config.h> -#include <yt/cpp/mapreduce/interface/client_method_options.h> -#include <yt/cpp/mapreduce/interface/operation.h> -#include <yt/cpp/mapreduce/interface/serialize.h> - -#include <library/cpp/yson/node/node.h> -#include <library/cpp/yson/node/node_io.h> -#include <library/cpp/yson/node/node_builder.h> - -#include <util/generic/guid.h> -#include <util/string/cast.h> - -namespace NYT::NDetail::NRawClient { - -using ::ToString; - -//////////////////////////////////////////////////////////////////// - -static void SetTransactionIdParam(TNode* node, const TTransactionId& transactionId) -{ - if (transactionId != TTransactionId()) { - (*node)["transaction_id"] = GetGuidAsString(transactionId); - } -} - -static void SetOperationIdParam(TNode* node, const TOperationId& operationId) -{ - (*node)["operation_id"] = GetGuidAsString(operationId); -} - -static void SetPathParam(TNode* node, const TString& pathPrefix, const TYPath& path) -{ - (*node)["path"] = AddPathPrefix(path, pathPrefix); -} - -static TNode SerializeAttributeFilter(const TAttributeFilter& attributeFilter) -{ - TNode result = TNode::CreateList(); - for (const auto& attribute : attributeFilter.Attributes_) { - result.Add(attribute); - } - return result; -} - -static TNode SerializeAttributeFilter(const TOperationAttributeFilter& attributeFilter) -{ - TNode result = TNode::CreateList(); - for (const auto& attribute : attributeFilter.Attributes_) { - result.Add(ToString(attribute)); - } - return result; -} - -template <typename TOptions> -static void SetFirstLastTabletIndex(TNode* node, const TOptions& options) -{ - if (options.FirstTabletIndex_) { - (*node)["first_tablet_index"] = *options.FirstTabletIndex_; - } - if (options.LastTabletIndex_) { - (*node)["last_tablet_index"] = *options.LastTabletIndex_; - } -} - -static TString GetDefaultTransactionTitle() -{ - const auto processState = TProcessState::Get(); - TStringStream res; - - res << "User transaction. Created by: " << processState->UserName << " on " << processState->FqdnHostName - << " client: " << processState->ClientVersion << " pid: " << processState->Pid; - if (!processState->CommandLine.empty()) { - res << " program: " << processState->CommandLine[0]; - } else { - res << " command line is unknown probably NYT::Initialize was never called"; - } - -#ifndef NDEBUG - res << " build: debug"; -#endif - - return res.Str(); -} - -template <typename T> -void SerializeMasterReadOptions(TNode* node, const TMasterReadOptions<T>& options) -{ - if (options.ReadFrom_) { - (*node)["read_from"] = ToString(*options.ReadFrom_); - } -} - -//////////////////////////////////////////////////////////////////// - -TNode SerializeParamsForCreate( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - ENodeType type, - const TCreateOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - SetPathParam(&result, pathPrefix, path); - result["recursive"] = options.Recursive_; - result["type"] = ToString(type); - result["ignore_existing"] = options.IgnoreExisting_; - result["force"] = options.Force_; - if (options.Attributes_) { - result["attributes"] = *options.Attributes_; - } - return result; -} - -TNode SerializeParamsForRemove( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TRemoveOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - SetPathParam(&result, pathPrefix, path); - result["recursive"] = options.Recursive_; - result["force"] = options.Force_; - return result; -} - -TNode SerializeParamsForExists( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TExistsOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - SetPathParam(&result, pathPrefix, path); - SerializeMasterReadOptions(&result, options); - return result; -} - -TNode SerializeParamsForGet( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TGetOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - SetPathParam(&result, pathPrefix, path); - SerializeMasterReadOptions(&result, options); - if (options.AttributeFilter_) { - result["attributes"] = SerializeAttributeFilter(*options.AttributeFilter_); - } - if (options.MaxSize_) { - result["max_size"] = *options.MaxSize_; - } - return result; -} - -TNode SerializeParamsForSet( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TSetOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - SetPathParam(&result, pathPrefix, path); - result["recursive"] = options.Recursive_; - if (options.Force_) { - result["force"] = *options.Force_; - } - return result; -} - -TNode SerializeParamsForMultisetAttributes( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - [[maybe_unused]] const TMultisetAttributesOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - SetPathParam(&result, pathPrefix, path); - return result; -} - -TNode SerializeParamsForList( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TListOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - SetPathParam(&result, pathPrefix, path); - SerializeMasterReadOptions(&result, options); - if (options.MaxSize_) { - result["max_size"] = *options.MaxSize_; - } - if (options.AttributeFilter_) { - result["attributes"] = SerializeAttributeFilter(*options.AttributeFilter_); - } - return result; -} - -TNode SerializeParamsForCopy( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& sourcePath, - const TYPath& destinationPath, - const TCopyOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - result["source_path"] = AddPathPrefix(sourcePath, pathPrefix); - result["destination_path"] = AddPathPrefix(destinationPath, pathPrefix); - result["recursive"] = options.Recursive_; - result["force"] = options.Force_; - result["preserve_account"] = options.PreserveAccount_; - if (options.PreserveExpirationTime_) { - result["preserve_expiration_time"] = *options.PreserveExpirationTime_; - } - return result; -} - -TNode SerializeParamsForMove( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& sourcePath, - const TYPath& destinationPath, - const TMoveOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - result["source_path"] = AddPathPrefix(sourcePath, pathPrefix); - result["destination_path"] = AddPathPrefix(destinationPath, pathPrefix); - result["recursive"] = options.Recursive_; - result["force"] = options.Force_; - result["preserve_account"] = options.PreserveAccount_; - if (options.PreserveExpirationTime_) { - result["preserve_expiration_time"] = *options.PreserveExpirationTime_; - } - return result; -} - -TNode SerializeParamsForLink( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& targetPath, - const TYPath& linkPath, - const TLinkOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - result["target_path"] = AddPathPrefix(targetPath, pathPrefix); - result["link_path"] = AddPathPrefix(linkPath, pathPrefix); - result["recursive"] = options.Recursive_; - result["ignore_existing"] = options.IgnoreExisting_; - result["force"] = options.Force_; - if (options.Attributes_) { - result["attributes"] = *options.Attributes_; - } - return result; -} - -TNode SerializeParamsForLock( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - ELockMode mode, - const TLockOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - SetPathParam(&result, pathPrefix, path); - result["mode"] = ToString(mode); - result["waitable"] = options.Waitable_; - if (options.AttributeKey_) { - result["attribute_key"] = *options.AttributeKey_; - } - if (options.ChildKey_) { - result["child_key"] = *options.ChildKey_; - } - return result; -} - -TNode SerializeParamsForUnlock( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TUnlockOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - SetPathParam(&result, pathPrefix, path); - Y_UNUSED(options); - return result; -} - -TNode SerializeParamsForConcatenate( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TVector<TRichYPath>& sourcePaths, - const TRichYPath& destinationPath, - const TConcatenateOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - { - auto actualDestination = destinationPath; - actualDestination.Path(AddPathPrefix(actualDestination.Path_, pathPrefix)); - if (options.Append_) { - actualDestination.Append(*options.Append_); - } - result["destination_path"] = PathToNode(actualDestination); - } - auto& sourcePathsNode = result["source_paths"]; - for (const auto& path : sourcePaths) { - auto actualSource = path; - actualSource.Path(AddPathPrefix(actualSource.Path_, pathPrefix)); - sourcePathsNode.Add(PathToNode(actualSource)); - } - return result; -} - -TNode SerializeParamsForPingTx( - const TTransactionId& transactionId) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - return result; -} - -TNode SerializeParamsForListOperations( - const TListOperationsOptions& options) -{ - TNode result = TNode::CreateMap(); - if (options.FromTime_) { - result["from_time"] = ToString(*options.FromTime_); - } - if (options.ToTime_) { - result["to_time"] = ToString(*options.ToTime_); - } - if (options.CursorTime_) { - result["cursor_time"] = ToString(*options.CursorTime_); - } - if (options.CursorDirection_) { - result["cursor_direction"] = ToString(*options.CursorDirection_); - } - if (options.Pool_) { - result["pool"] = *options.Pool_; - } - if (options.Filter_) { - result["filter"] = *options.Filter_; - } - if (options.User_) { - result["user"] = *options.User_; - } - if (options.State_) { - result["state"] = *options.State_; - } - if (options.Type_) { - result["type"] = ToString(*options.Type_); - } - if (options.WithFailedJobs_) { - result["with_failed_jobs"] = *options.WithFailedJobs_; - } - if (options.IncludeCounters_) { - result["include_counters"] = *options.IncludeCounters_; - } - if (options.IncludeArchive_) { - result["include_archive"] = *options.IncludeArchive_; - } - if (options.Limit_) { - result["limit"] = *options.Limit_; - } - return result; -} - -TNode SerializeParamsForGetOperation( - const TOperationId& operationId, - const TGetOperationOptions& options) -{ - TNode result; - SetOperationIdParam(&result, operationId); - if (options.AttributeFilter_) { - result["attributes"] = SerializeAttributeFilter(*options.AttributeFilter_); - } - return result; -} - -TNode SerializeParamsForAbortOperation(const TOperationId& operationId) -{ - TNode result; - SetOperationIdParam(&result, operationId); - return result; -} - -TNode SerializeParamsForCompleteOperation(const TOperationId& operationId) -{ - TNode result; - SetOperationIdParam(&result, operationId); - return result; -} - -TNode SerializeParamsForSuspendOperation( - const TOperationId& operationId, - const TSuspendOperationOptions& options) -{ - TNode result; - SetOperationIdParam(&result, operationId); - if (options.AbortRunningJobs_) { - result["abort_running_jobs"] = *options.AbortRunningJobs_; - } - return result; -} - -TNode SerializeParamsForResumeOperation( - const TOperationId& operationId, - const TResumeOperationOptions& options) -{ - TNode result; - SetOperationIdParam(&result, operationId); - Y_UNUSED(options); - return result; -} - -TNode SerializeParamsForUpdateOperationParameters( - const TOperationId& operationId, - const TUpdateOperationParametersOptions& options) -{ - TNode result; - SetOperationIdParam(&result, operationId); - TNode& parameters = result["parameters"]; - if (options.Pool_) { - parameters["pool"] = *options.Pool_; - } - if (options.Weight_) { - parameters["weight"] = *options.Weight_; - } - if (!options.Owners_.empty()) { - parameters["owners"] = TNode::CreateList(); - for (const auto& owner : options.Owners_) { - parameters["owners"].Add(owner); - } - } - if (options.SchedulingOptionsPerPoolTree_) { - parameters["scheduling_options_per_pool_tree"] = TNode::CreateMap(); - for (const auto& entry : options.SchedulingOptionsPerPoolTree_->Options_) { - auto schedulingOptionsNode = TNode::CreateMap(); - const auto& schedulingOptions = entry.second; - if (schedulingOptions.Pool_) { - schedulingOptionsNode["pool"] = *schedulingOptions.Pool_; - } - if (schedulingOptions.Weight_) { - schedulingOptionsNode["weight"] = *schedulingOptions.Weight_; - } - if (schedulingOptions.ResourceLimits_) { - auto resourceLimitsNode = TNode::CreateMap(); - const auto& resourceLimits = *schedulingOptions.ResourceLimits_; - if (resourceLimits.UserSlots_) { - resourceLimitsNode["user_slots"] = *resourceLimits.UserSlots_; - } - if (resourceLimits.Memory_) { - resourceLimitsNode["memory"] = *resourceLimits.Memory_; - } - if (resourceLimits.Cpu_) { - resourceLimitsNode["cpu"] = *resourceLimits.Cpu_; - } - if (resourceLimits.Network_) { - resourceLimitsNode["network"] = *resourceLimits.Network_; - } - schedulingOptionsNode["resource_limits"] = std::move(resourceLimitsNode); - } - parameters["scheduling_options_per_pool_tree"][entry.first] = std::move(schedulingOptionsNode); - } - } - return result; -} - -TNode SerializeParamsForGetJob( - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobOptions& /* options */) -{ - TNode result; - SetOperationIdParam(&result, operationId); - result["job_id"] = GetGuidAsString(jobId); - return result; -} - -TNode SerializeParamsForListJobs( - const TOperationId& operationId, - const TListJobsOptions& options) -{ - TNode result; - SetOperationIdParam(&result, operationId); - - if (options.Type_) { - result["type"] = ToString(*options.Type_); - } - if (options.State_) { - result["state"] = ToString(*options.State_); - } - if (options.Address_) { - result["address"] = *options.Address_; - } - if (options.WithStderr_) { - result["with_stderr"] = *options.WithStderr_; - } - if (options.WithSpec_) { - result["with_spec"] = *options.WithSpec_; - } - if (options.WithFailContext_) { - result["with_fail_context"] = *options.WithFailContext_; - } - - if (options.SortField_) { - result["sort_field"] = ToString(*options.SortField_); - } - if (options.SortOrder_) { - result["sort_order"] = ToString(*options.SortOrder_); - } - - if (options.Offset_) { - result["offset"] = *options.Offset_; - } - if (options.Limit_) { - result["limit"] = *options.Limit_; - } - - if (options.IncludeCypress_) { - result["include_cypress"] = *options.IncludeCypress_; - } - if (options.IncludeArchive_) { - result["include_archive"] = *options.IncludeArchive_; - } - if (options.IncludeControllerAgent_) { - result["include_controller_agent"] = *options.IncludeControllerAgent_; - } - return result; -} - -TNode SerializeParametersForInsertRows( - const TString& pathPrefix, - const TYPath& path, - const TInsertRowsOptions& options) -{ - TNode result; - SetPathParam(&result, pathPrefix, path); - if (options.Aggregate_) { - result["aggregate"] = *options.Aggregate_; - } - if (options.Update_) { - result["update"] = *options.Update_; - } - if (options.Atomicity_) { - result["atomicity"] = ToString(*options.Atomicity_); - } - if (options.Durability_) { - result["durability"] = ToString(*options.Durability_); - } - if (options.RequireSyncReplica_) { - result["require_sync_replica"] = *options.RequireSyncReplica_; - } - return result; -} - -TNode SerializeParametersForDeleteRows( - const TString& pathPrefix, - const TYPath& path, - const TDeleteRowsOptions& options) -{ - TNode result; - SetPathParam(&result, pathPrefix, path); - if (options.Atomicity_) { - result["atomicity"] = ToString(*options.Atomicity_); - } - if (options.Durability_) { - result["durability"] = ToString(*options.Durability_); - } - if (options.RequireSyncReplica_) { - result["require_sync_replica"] = *options.RequireSyncReplica_; - } - return result; -} - -TNode SerializeParametersForTrimRows( - const TString& pathPrefix, - const TYPath& path, - const TTrimRowsOptions& /* options*/) -{ - TNode result; - SetPathParam(&result, pathPrefix, path); - return result; -} - -TNode SerializeParamsForParseYPath(const TRichYPath& path) -{ - TNode result; - result["path"] = PathToNode(path); - return result; -} - -TNode SerializeParamsForEnableTableReplica( - const TReplicaId& replicaId) -{ - TNode result; - result["replica_id"] = GetGuidAsString(replicaId); - return result; -} - -TNode SerializeParamsForDisableTableReplica( - const TReplicaId& replicaId) -{ - TNode result; - result["replica_id"] = GetGuidAsString(replicaId); - return result; -} - -TNode SerializeParamsForAlterTableReplica(const TReplicaId& replicaId, const TAlterTableReplicaOptions& options) -{ - TNode result; - result["replica_id"] = GetGuidAsString(replicaId); - if (options.Enabled_) { - result["enabled"] = *options.Enabled_; - } - if (options.Mode_) { - result["mode"] = ToString(*options.Mode_); - } - return result; -} - -TNode SerializeParamsForFreezeTable( - const TString& pathPrefix, - const TYPath& path, - const TFreezeTableOptions& options) -{ - TNode result; - SetPathParam(&result, pathPrefix, path); - SetFirstLastTabletIndex(&result, options); - return result; -} - -TNode SerializeParamsForUnfreezeTable( - const TString& pathPrefix, - const TYPath& path, - const TUnfreezeTableOptions& options) -{ - TNode result; - SetPathParam(&result, pathPrefix, path); - SetFirstLastTabletIndex(&result, options); - return result; -} - -TNode SerializeParamsForAlterTable( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TAlterTableOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - SetPathParam(&result, pathPrefix, path); - if (options.Dynamic_) { - result["dynamic"] = *options.Dynamic_; - } - if (options.Schema_) { - TNode schema; - { - TNodeBuilder builder(&schema); - Serialize(*options.Schema_, &builder); - } - result["schema"] = schema; - } - if (options.UpstreamReplicaId_) { - result["upstream_replica_id"] = GetGuidAsString(*options.UpstreamReplicaId_); - } - return result; -} - -TNode SerializeParamsForGetTableColumnarStatistics( - const TTransactionId& transactionId, - const TVector<TRichYPath>& paths, - const TGetTableColumnarStatisticsOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - for (const auto& path : paths) { - result["paths"].Add(PathToNode(path)); - } - if (options.FetcherMode_) { - result["fetcher_mode"] = ToString(*options.FetcherMode_); - } - return result; -} - -TNode SerializeParamsForGetTablePartitions( - const TTransactionId& transactionId, - const TVector<TRichYPath>& paths, - const TGetTablePartitionsOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - for (const auto& path : paths) { - result["paths"].Add(PathToNode(path)); - } - result["partition_mode"] = ToString(options.PartitionMode_); - result["data_weight_per_partition"] = options.DataWeightPerPartition_; - if (options.MaxPartitionCount_) { - result["max_partition_count"] = *options.MaxPartitionCount_; - } - result["adjust_data_weight_per_partition"] = options.AdjustDataWeightPerPartition_; - return result; -} - -TNode SerializeParamsForGetFileFromCache( - const TTransactionId& transactionId, - const TString& md5Signature, - const TYPath& cachePath, - const TGetFileFromCacheOptions&) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - result["md5"] = md5Signature; - result["cache_path"] = cachePath; - return result; -} - -TNode SerializeParamsForPutFileToCache( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& filePath, - const TString& md5Signature, - const TYPath& cachePath, - const TPutFileToCacheOptions& options) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - SetPathParam(&result, pathPrefix, filePath); - result["md5"] = md5Signature; - result["cache_path"] = cachePath; - if (options.PreserveExpirationTimeout_) { - result["preserve_expiration_timeout"] = *options.PreserveExpirationTimeout_; - } - return result; -} - -TNode SerializeParamsForSkyShareTable( - const TString& serverName, - const TString& pathPrefix, - const std::vector<TYPath>& tablePaths, - const TSkyShareTableOptions& options) -{ - TNode result; - - if (tablePaths.size() == 1) { - SetPathParam(&result, pathPrefix, tablePaths[0]); - } else { - auto pathList = TNode::CreateList(); - for (const auto& p : tablePaths) { - pathList.Add(AddPathPrefix(p, pathPrefix)); - } - result["paths"] = pathList; - } - result["cluster"] = serverName; - - if (options.KeyColumns_) { - auto keyColumnsList = TNode::CreateList(); - for (const auto& s : options.KeyColumns_->Parts_) { - if (s.empty()) { - continue; - } - keyColumnsList.Add(s); - } - result["key_columns"] = keyColumnsList; - } - - if (options.EnableFastbone_) { - result["enable_fastbone"] = *options.EnableFastbone_; - } - - return result; -} - -TNode SerializeParamsForCheckPermission( - const TString& user, - EPermission permission, - const TString& pathPrefix, - const TYPath& path, - const TCheckPermissionOptions& options) -{ - TNode result; - SetPathParam(&result, pathPrefix, path); - result["path"] = path; - result["user"] = user; - result["permission"] = ToString(permission); - if (!options.Columns_.empty()) { - result["columns"] = TNode::CreateList(); - result["columns"].AsList().assign(options.Columns_.begin(), options.Columns_.end()); - } - return result; -} - -TNode SerializeParamsForGetTabletInfos( - const TString& pathPrefix, - const TYPath& path, - const TVector<int>& tabletIndexes, - const TGetTabletInfosOptions& options) -{ - Y_UNUSED(options); - TNode result; - SetPathParam(&result, pathPrefix, path); - result["tablet_indexes"] = TNode::CreateList(); - result["tablet_indexes"].AsList().assign(tabletIndexes.begin(), tabletIndexes.end()); - return result; -} - -TNode SerializeParamsForAbortTransaction(const TTransactionId& transactionId) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - return result; -} - -TNode SerializeParamsForCommitTransaction(const TTransactionId& transactionId) -{ - TNode result; - SetTransactionIdParam(&result, transactionId); - return result; -} - -TNode SerializeParamsForStartTransaction( - const TTransactionId& parentTransactionId, - TDuration txTimeout, - const TStartTransactionOptions& options) -{ - TNode result; - - SetTransactionIdParam(&result, parentTransactionId); - result["timeout"] = static_cast<i64>((options.Timeout_.GetOrElse(txTimeout).MilliSeconds())); - if (options.Deadline_) { - result["deadline"] = ToString(options.Deadline_); - } - - if (options.PingAncestors_) { - result["ping_ancestor_transactions"] = true; - } - - if (options.Attributes_ && !options.Attributes_->IsMap()) { - ythrow TApiUsageError() << "Attributes must be a Map node"; - } - - auto attributes = options.Attributes_.GetOrElse(TNode::CreateMap()); - if (options.Title_) { - attributes["title"] = *options.Title_; - } else if (!attributes.HasKey("title")) { - attributes["title"] = GetDefaultTransactionTitle(); - } - result["attributes"] = attributes; - - return result; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail::NRawClient diff --git a/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h b/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h deleted file mode 100644 index a60e3ea369..0000000000 --- a/yt/cpp/mapreduce/raw_client/rpc_parameters_serialization.h +++ /dev/null @@ -1,231 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/fwd.h> -#include <yt/cpp/mapreduce/interface/client_method_options.h> - -namespace NYT::NDetail::NRawClient { - -//////////////////////////////////////////////////////////////////// - -TNode SerializeParamsForCreate( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - ENodeType type, - const TCreateOptions& options); - -TNode SerializeParamsForRemove( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TRemoveOptions& options); - -TNode SerializeParamsForExists( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TExistsOptions& options); - -TNode SerializeParamsForGet( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TGetOptions& options); - -TNode SerializeParamsForSet( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TSetOptions& options); - -TNode SerializeParamsForMultisetAttributes( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TMultisetAttributesOptions& options); - -TNode SerializeParamsForList( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TListOptions& options); - -TNode SerializeParamsForCopy( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& sourcePath, - const TYPath& destinationPath, - const TCopyOptions& options); - -TNode SerializeParamsForMove( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& sourcePath, - const TYPath& destinationPath, - const TMoveOptions& options); - -TNode SerializeParamsForLink( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& targetPath, - const TYPath& linkPath, - const TLinkOptions& options); - -TNode SerializeParamsForLock( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - ELockMode mode, - const TLockOptions& options); - -TNode SerializeParamsForUnlock( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TUnlockOptions& options); - -TNode SerializeParamsForConcatenate( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TVector<TRichYPath>& sourcePaths, - const TRichYPath& destinationPath, - const TConcatenateOptions& options); - -TNode SerializeParamsForPingTx( - const TTransactionId& transactionId); - -TNode SerializeParamsForGetOperation( - const TOperationId& operationId, - const TGetOperationOptions& options); - -TNode SerializeParamsForAbortOperation( - const TOperationId& operationId); - -TNode SerializeParamsForCompleteOperation( - const TOperationId& operationId); - -TNode SerializeParamsForSuspendOperation( - const TOperationId& operationId, - const TSuspendOperationOptions& options); - -TNode SerializeParamsForResumeOperation( - const TOperationId& operationId, - const TResumeOperationOptions& options); - -TNode SerializeParamsForListOperations( - const TListOperationsOptions& options); - -TNode SerializeParamsForUpdateOperationParameters( - const TOperationId& operationId, - const TUpdateOperationParametersOptions& options); - -TNode SerializeParamsForGetJob( - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobOptions& options); - -TNode SerializeParamsForListJobs( - const TOperationId& operationId, - const TListJobsOptions& options); - -TNode SerializeParametersForInsertRows( - const TString& pathPrefix, - const TYPath& path, - const TInsertRowsOptions& options); - -TNode SerializeParametersForDeleteRows( - const TString& pathPrefix, - const TYPath& path, - const TDeleteRowsOptions& options); - -TNode SerializeParametersForTrimRows( - const TString& pathPrefix, - const TYPath& path, - const TTrimRowsOptions& options); - -TNode SerializeParamsForParseYPath( - const TRichYPath& path); - -TNode SerializeParamsForEnableTableReplica( - const TReplicaId& replicaId); - -TNode SerializeParamsForDisableTableReplica( - const TReplicaId& replicaId); - -TNode SerializeParamsForAlterTableReplica( - const TReplicaId& replicaId, - const TAlterTableReplicaOptions& options); - -TNode SerializeParamsForFreezeTable( - const TString& pathPrefix, - const TYPath& path, - const TFreezeTableOptions& options); - -TNode SerializeParamsForUnfreezeTable( - const TString& pathPrefix, - const TYPath& path, - const TUnfreezeTableOptions& options); - -TNode SerializeParamsForAlterTable( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& path, - const TAlterTableOptions& options); - -TNode SerializeParamsForGetTableColumnarStatistics( - const TTransactionId& transactionId, - const TVector<TRichYPath>& paths, - const TGetTableColumnarStatisticsOptions& options); - -TNode SerializeParamsForGetTablePartitions( - const TTransactionId& transactionId, - const TVector<TRichYPath>& paths, - const TGetTablePartitionsOptions& options); - -TNode SerializeParamsForGetFileFromCache( - const TTransactionId& transactionId, - const TString& md5Signature, - const TYPath& cachePath, - const TGetFileFromCacheOptions&); - -TNode SerializeParamsForPutFileToCache( - const TTransactionId& transactionId, - const TString& pathPrefix, - const TYPath& filePath, - const TString& md5Signature, - const TYPath& cachePath, - const TPutFileToCacheOptions& options); - -TNode SerializeParamsForSkyShareTable( - const TString& serverName, - const TString& pathPrefix, - const std::vector<TYPath>& tablePaths, - const TSkyShareTableOptions& options); - -TNode SerializeParamsForCheckPermission( - const TString& user, - EPermission permission, - const TString& pathPrefix, - const TYPath& path, - const TCheckPermissionOptions& options); - -TNode SerializeParamsForGetTabletInfos( - const TString& pathPrefix, - const TYPath& path, - const TVector<int>& tabletIndexes, - const TGetTabletInfosOptions& options); - -TNode SerializeParamsForAbortTransaction( - const TTransactionId& transactionId); - -TNode SerializeParamsForCommitTransaction( - const TTransactionId& transactionId); - -TNode SerializeParamsForStartTransaction( - const TTransactionId& parentTransactionId, - TDuration txTimeout, - const TStartTransactionOptions& options); - -//////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail::NRawClient diff --git a/yt/cpp/mapreduce/raw_client/ya.make b/yt/cpp/mapreduce/raw_client/ya.make deleted file mode 100644 index 0d03aae80c..0000000000 --- a/yt/cpp/mapreduce/raw_client/ya.make +++ /dev/null @@ -1,19 +0,0 @@ -LIBRARY() - -INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc) - -SRCS( - raw_batch_request.cpp - raw_requests.cpp - rpc_parameters_serialization.cpp -) - -PEERDIR( - yt/cpp/mapreduce/common - yt/cpp/mapreduce/http - yt/cpp/mapreduce/interface - yt/cpp/mapreduce/interface/logging - library/cpp/yson/node -) - -END() diff --git a/yt/cpp/mapreduce/skiff/skiff_schema.h b/yt/cpp/mapreduce/skiff/skiff_schema.h deleted file mode 100644 index e8c97de8e8..0000000000 --- a/yt/cpp/mapreduce/skiff/skiff_schema.h +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -#include <library/cpp/skiff/skiff_schema.h> diff --git a/yt/cpp/mapreduce/skiff/unchecked_parser.h b/yt/cpp/mapreduce/skiff/unchecked_parser.h deleted file mode 100644 index 8fd9f90b0b..0000000000 --- a/yt/cpp/mapreduce/skiff/unchecked_parser.h +++ /dev/null @@ -1 +0,0 @@ -#include <library/cpp/skiff/skiff.h> diff --git a/yt/cpp/mapreduce/skiff/wire_type.h b/yt/cpp/mapreduce/skiff/wire_type.h deleted file mode 100644 index 96d19c06d3..0000000000 --- a/yt/cpp/mapreduce/skiff/wire_type.h +++ /dev/null @@ -1 +0,0 @@ -#include <library/cpp/skiff/public.h> diff --git a/yt/cpp/mapreduce/skiff/ya.make b/yt/cpp/mapreduce/skiff/ya.make deleted file mode 100644 index 95d91ecd47..0000000000 --- a/yt/cpp/mapreduce/skiff/ya.make +++ /dev/null @@ -1,9 +0,0 @@ -LIBRARY() - -INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc) - -PEERDIR( - library/cpp/skiff -) - -END() diff --git a/yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h b/yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h deleted file mode 100644 index 37d9d501cd..0000000000 --- a/yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h +++ /dev/null @@ -1,194 +0,0 @@ -#pragma once - -#include <yt/cpp/mapreduce/interface/logging/logger.h> -#include <yt/cpp/mapreduce/interface/client.h> - -#include <yt/cpp/mapreduce/interface/config.h> - -#include <library/cpp/yson/node/node_io.h> - -#include <util/generic/bt_exception.h> - -#include <util/datetime/base.h> - -//////////////////////////////////////////////////////////////////////////////// - -template<> -void Out<NYT::TNode>(IOutputStream& s, const NYT::TNode& node); - -template<> -void Out<TGUID>(IOutputStream& s, const TGUID& guid); - -//////////////////////////////////////////////////////////////////////////////// - -namespace NYT { -namespace NTesting { - -//////////////////////////////////////////////////////////////////////////////// - -IClientPtr CreateTestClient(TString proxy = "", const TCreateClientOptions& options = {}); - -// Create map node by unique path in Cypress and return that path. -TYPath CreateTestDirectory(const IClientBasePtr& client); - -TString GenerateRandomData(size_t size, ui64 seed = 42); - -TVector<TNode> ReadTable(const IClientBasePtr& client, const TString& tablePath); - -//////////////////////////////////////////////////////////////////////////////// - -// TODO: should be removed, usages should be replaced with TConfigSaverGuard -class TZeroWaitLockPollIntervalGuard -{ -public: - TZeroWaitLockPollIntervalGuard(); - - ~TZeroWaitLockPollIntervalGuard(); - -private: - TDuration OldWaitLockPollInterval_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TConfigSaverGuard -{ -public: - TConfigSaverGuard(); - ~TConfigSaverGuard(); - -private: - TConfig Config_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TDebugMetricDiff -{ -public: - TDebugMetricDiff(TString name); - ui64 GetTotal() const; - -private: - TString Name_; - ui64 InitialValue_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -struct TOwningYaMRRow -{ - TString Key; - TString SubKey; - TString Value; - - TOwningYaMRRow(const TYaMRRow& row = {}); - TOwningYaMRRow(TString key, TString subKey, TString value); - - operator TYaMRRow() const; -}; - -bool operator == (const TOwningYaMRRow& row1, const TOwningYaMRRow& row2); - -//////////////////////////////////////////////////////////////////////////////// - -class TTestFixture -{ -public: - explicit TTestFixture(const TCreateClientOptions& options = {}); - ~TTestFixture(); - - // Return precreated client. - IClientPtr GetClient() const; - - // Return newly created client. Useful for cases: - // - when we want to have multiple clients objects; - // - when we want to control to control destruction of client object; - IClientPtr CreateClient(const TCreateClientOptions& options = {}) const; - - IClientPtr CreateClientForUser(const TString& user, TCreateClientOptions options = {}); - - TYPath GetWorkingDir() const; - -private: - TConfigSaverGuard ConfigGuard_; - IClientPtr Client_; - TYPath WorkingDir_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TTabletFixture - : public TTestFixture -{ -public: - TTabletFixture(); - -private: - void WaitForTabletCell(); -}; - -//////////////////////////////////////////////////////////////////////////////// - -// Compares only columns and only "name" and "type" fields of columns. -bool AreSchemasEqual(const TTableSchema& lhs, const TTableSchema& rhs); - -class TWaitFailedException - : public TWithBackTrace<yexception> -{ }; - -void WaitForPredicate(const std::function<bool()>& predicate, TDuration timeout = TDuration::Seconds(60)); - -//////////////////////////////////////////////////////////////////////////////// - -// Redirects all the LOG_* calls with the corresponding level to `stream`. -// Moreover, the LOG_* calls are delegated to `oldLogger`. -class TStreamTeeLogger - : public ILogger -{ -public: - TStreamTeeLogger(ELevel cutLevel, IOutputStream* stream, ILoggerPtr oldLogger); - void Log(ELevel level, const ::TSourceLocation& sourceLocation, const char* format, va_list args) override; - -private: - ILoggerPtr OldLogger_; - IOutputStream* Stream_; - ELevel Level_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -TString ToYson(const T& x) -{ - TNode result; - TNodeBuilder builder(&result); - Serialize(x, &builder); - return NodeToYsonString(result); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NTesting -} // namespace NYT - -//////////////////////////////////////////////////////////////////////////////// - -template <> -void Out<NYT::NTesting::TOwningYaMRRow>(IOutputStream& out, const NYT::NTesting::TOwningYaMRRow& row); - -//////////////////////////////////////////////////////////////////////////////// - -// for UNITTEST() -#define ASSERT_SERIALIZABLES_EQUAL(a, b) \ - UNIT_ASSERT_EQUAL_C(a, b, NYT::NTesting::ToYson(a) << " != " << NYT::NTesting::ToYson(b)) - -#define ASSERT_SERIALIZABLES_UNEQUAL(a, b) \ - UNIT_ASSERT_UNEQUAL_C(a, b, NYT::NTesting::ToYson(a) << " == " << NYT::NTesting::ToYson(b)) - -// for GTEST() -#define ASSERT_SERIALIZABLES_EQ(a, b) \ - ASSERT_EQ(a, b) << NYT::NTesting::ToYson(a) << " != " << NYT::NTesting::ToYson(b) - -#define ASSERT_SERIALIZABLES_NE(a, b) \ - ASSERT_NE(a, b) << NYT::NTesting::ToYson(a) << " == " << NYT::NTesting::ToYson(b) |