diff options
author | max42 <[email protected]> | 2023-06-30 11:13:34 +0300 |
---|---|---|
committer | max42 <[email protected]> | 2023-06-30 11:13:34 +0300 |
commit | 3e1899838408bbad47622007aa382bc8a2b01f87 (patch) | |
tree | 0f21c1e6add187ddb6c3ccc048a7d640ce03fb87 /yt/cpp/mapreduce/interface | |
parent | 5463eb3f5e72a86f858a3d27c886470a724ede34 (diff) |
Revert "YT-19324: move YT provider to ydb/library/yql"
This reverts commit ca272f12fdd0e8d5c3e957fc87939148f1caaf72, reversing
changes made to 49f8acfc8b0b5c0071b804423bcf53fda26c7c12.
Diffstat (limited to 'yt/cpp/mapreduce/interface')
66 files changed, 0 insertions, 21022 deletions
diff --git a/yt/cpp/mapreduce/interface/batch_request.cpp b/yt/cpp/mapreduce/interface/batch_request.cpp deleted file mode 100644 index fefdacb61a0..00000000000 --- a/yt/cpp/mapreduce/interface/batch_request.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include "batch_request.h" -#include "client.h" - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -IBatchRequestBase& IBatchRequest::WithTransaction(const ITransactionPtr& transaction) -{ - return WithTransaction(transaction->GetId()); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/batch_request.h b/yt/cpp/mapreduce/interface/batch_request.h deleted file mode 100644 index 3ea28f76fd5..00000000000 --- a/yt/cpp/mapreduce/interface/batch_request.h +++ /dev/null @@ -1,222 +0,0 @@ -#pragma once - -#include "fwd.h" - -#include "client_method_options.h" - -#include <library/cpp/threading/future/future.h> -#include <util/generic/ptr.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////// - -/// Helper base of @ref NYT::IBatchRequest holding most of useful methods. -class IBatchRequestBase - : public TThrRefBase -{ -public: - virtual ~IBatchRequestBase() = default; - - /// - /// @brief Create cypress node. - /// - /// @see NYT::ICypressClient::Create - virtual ::NThreading::TFuture<TNodeId> Create( - const TYPath& path, - ENodeType type, - const TCreateOptions& options = TCreateOptions()) = 0; - - /// - /// @brief Remove cypress node. - /// - /// @see NYT::ICypressClient::Remove - virtual ::NThreading::TFuture<void> Remove( - const TYPath& path, - const TRemoveOptions& options = TRemoveOptions()) = 0; - - /// - /// @brief Check wether cypress node exists. - /// - /// @see NYT::ICypressClient::Exists - virtual ::NThreading::TFuture<bool> Exists( - const TYPath& path, - const TExistsOptions& options = TExistsOptions()) = 0; - - /// - /// @brief Get cypress node. - /// - /// @see NYT::ICypressClient::Get - virtual ::NThreading::TFuture<TNode> Get( - const TYPath& path, - const TGetOptions& options = TGetOptions()) = 0; - - /// - /// @brief Set cypress node. - /// - /// @see NYT::ICypressClient::Set - virtual ::NThreading::TFuture<void> Set( - const TYPath& path, - const TNode& node, - const TSetOptions& options = TSetOptions()) = 0; - - /// - /// @brief List cypress directory. - /// - /// @see NYT::ICypressClient::List - virtual ::NThreading::TFuture<TNode::TListType> List( - const TYPath& path, - const TListOptions& options = TListOptions()) = 0; - - /// - /// @brief Copy cypress node. - /// - /// @see NYT::ICypressClient::Copy - virtual ::NThreading::TFuture<TNodeId> Copy( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TCopyOptions& options = TCopyOptions()) = 0; - - /// - /// @brief Move cypress node. - /// - /// @see NYT::ICypressClient::Move - virtual ::NThreading::TFuture<TNodeId> Move( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TMoveOptions& options = TMoveOptions()) = 0; - - /// - /// @brief Create symbolic link. - /// - /// @see NYT::ICypressClient::Link. - virtual ::NThreading::TFuture<TNodeId> Link( - const TYPath& targetPath, - const TYPath& linkPath, - const TLinkOptions& options = TLinkOptions()) = 0; - - /// - /// @brief Lock cypress node. - /// - /// @see NYT::ICypressClient::Lock - virtual ::NThreading::TFuture<ILockPtr> Lock( - const TYPath& path, - ELockMode mode, - const TLockOptions& options = TLockOptions()) = 0; - - /// - /// @brief Unlock cypress node. - /// - /// @see NYT::ICypressClient::Unlock - virtual ::NThreading::TFuture<void> Unlock( - const TYPath& path, - const TUnlockOptions& options = TUnlockOptions()) = 0; - - /// - /// @brief Abort operation. - /// - /// @see NYT::IClient::AbortOperation - virtual ::NThreading::TFuture<void> AbortOperation(const TOperationId& operationId) = 0; - - /// - /// @brief Force complete operation. - /// - /// @see NYT::IClient::CompleteOperation - virtual ::NThreading::TFuture<void> CompleteOperation(const TOperationId& operationId) = 0; - - /// - /// @brief Suspend operation. - /// - /// @see NYT::IClient::SuspendOperation - virtual ::NThreading::TFuture<void> SuspendOperation( - const TOperationId& operationId, - const TSuspendOperationOptions& options = TSuspendOperationOptions()) = 0; - - /// - /// @brief Resume operation. - /// - /// @see NYT::IClient::ResumeOperation - virtual ::NThreading::TFuture<void> ResumeOperation( - const TOperationId& operationId, - const TResumeOperationOptions& options = TResumeOperationOptions()) = 0; - - /// - /// @brief Update parameters of running operation. - /// - /// @see NYT::IClient::UpdateOperationParameters - virtual ::NThreading::TFuture<void> UpdateOperationParameters( - const TOperationId& operationId, - const TUpdateOperationParametersOptions& options = TUpdateOperationParametersOptions()) = 0; - - /// - /// @brief Canonize cypress path - /// - /// @see NYT::ICypressClient::CanonizeYPath - virtual ::NThreading::TFuture<TRichYPath> CanonizeYPath(const TRichYPath& path) = 0; - - /// - /// @brief Get table columnar statistic - /// - /// @see NYT::ICypressClient::GetTableColumnarStatistics - virtual ::NThreading::TFuture<TVector<TTableColumnarStatistics>> GetTableColumnarStatistics( - const TVector<TRichYPath>& paths, - const TGetTableColumnarStatisticsOptions& options = {}) = 0; - - /// - /// @brief Check permission for given path. - /// - /// @see NYT::IClient::CheckPermission - virtual ::NThreading::TFuture<TCheckPermissionResponse> CheckPermission( - const TString& user, - EPermission permission, - const TYPath& path, - const TCheckPermissionOptions& options = TCheckPermissionOptions()) = 0; -}; - -/// -/// @brief Batch request object. -/// -/// Allows to send multiple lightweight requests at once significantly -/// reducing time of their execution. -/// -/// Methods of this class accept same arguments as @ref NYT::IClient methods but -/// return TFuture that is set after execution of @ref NYT::IBatchRequest::ExecuteBatch -/// -/// @see [Example of usage](https://a.yandex-team.ru/arc/trunk/arcadia/yt/cpp/mapreduce/examples/tutorial/batch_request/main.cpp) -class IBatchRequest - : public IBatchRequestBase -{ -public: - /// - /// @brief Temporary override current transaction. - /// - /// Using WithTransaction user can temporary override default transaction. - /// Example of usage: - /// TBatchRequest batchRequest; - /// auto noTxResult = batchRequest.Get("//some/path"); - /// auto txResult = batchRequest.WithTransaction(tx).Get("//some/path"); - virtual IBatchRequestBase& WithTransaction(const TTransactionId& transactionId) = 0; - IBatchRequestBase& WithTransaction(const ITransactionPtr& transaction); - - /// - /// @brief Executes all subrequests of batch request. - /// - /// After execution of this method all TFuture objects returned by subrequests will - /// be filled with either result or error. - /// - /// @note It is undefined in which order these requests are executed. - /// - /// @note This method doesn't throw if subrequest emits error. - /// Instead corresponding future is set with exception. - /// So it is always important to check TFuture status. - /// - /// Single TBatchRequest instance may be executed only once - /// and cannot be modified (filled with additional requests) after execution. - /// Exception is thrown on attempt to modify executed batch request - /// or execute it again. - virtual void ExecuteBatch(const TExecuteBatchOptions& options = TExecuteBatchOptions()) = 0; -}; - -//////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/client.cpp b/yt/cpp/mapreduce/interface/client.cpp deleted file mode 100644 index 11d308b8098..00000000000 --- a/yt/cpp/mapreduce/interface/client.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include "client.h" - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -void ILock::Wait(TDuration timeout) -{ - return GetAcquiredFuture().GetValue(timeout); -} - -void ITransaction::Detach() -{ - Y_FAIL("ITransaction::Detach() is not implemented"); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/client.h b/yt/cpp/mapreduce/interface/client.h deleted file mode 100644 index 54f37c3ae09..00000000000 --- a/yt/cpp/mapreduce/interface/client.h +++ /dev/null @@ -1,568 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/client.h -/// -/// Main header of the C++ YT Wrapper. - -/// -/// @mainpage C++ library for working with YT -/// -/// This library provides possibilities to work with YT as a [MapReduce](https://en.wikipedia.org/wiki/MapReduce) system. It allows: -/// - to read/write tables and files -/// - to run operations -/// - to work with transactions. -/// -/// This library provides only basic functions for working with dynamic tables. -/// To access full powers of YT dynamic tables one should use -/// [yt/client](https://a.yandex-team.ru/arc/trunk/arcadia/yt/19_4/yt/client) library. -/// -/// Entry points to this library: -/// - @ref NYT::Initialize() initialization function for this library; -/// - @ref NYT::IClient main interface to work with YT cluster; -/// - @ref NYT::CreateClient() function that creates client for particular cluster; -/// - @ref NYT::IOperationClient ancestor of @ref NYT::IClient containing the set of methods to run operations. -/// -/// Tutorial on how to use this library can be found [here](https://yt.yandex-team.ru/docs/api/c++/examples). - -#include "fwd.h" - -#include "client_method_options.h" -#include "constants.h" -#include "batch_request.h" -#include "cypress.h" -#include "init.h" -#include "io.h" -#include "node.h" -#include "operation.h" - -#include <library/cpp/threading/future/future.h> - -#include <util/datetime/base.h> -#include <util/generic/maybe.h> -#include <util/system/compiler.h> - -/// Main namespace of YT client -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// OAuth info (returned by @ref NYT::IClient::WhoAmI). -struct TAuthorizationInfo -{ - /// User's login. - TString Login; - - /// Realm. - TString Realm; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Part of @ref NYT::TCheckPermissionResponse. -/// -/// In case when 'Action == ESecurityAction::Deny' because of a 'deny' rule, -/// the "denying" object name and id and "denied" subject name an id may be returned. -struct TCheckPermissionResult -{ - /// Was the access granted or not. - ESecurityAction Action; - - /// Id of the object whose ACL's "deny" rule forbids the access. - TMaybe<TGUID> ObjectId; - - /// - /// @brief Name of the object whose ACL's "deny" rule forbids the access. - /// - /// Example is "node //tmp/x/y". - TMaybe<TString> ObjectName; - - /// Id of the subject for whom the access was denied by a "deny" rule. - TMaybe<TGUID> SubjectId; - - /// Name of the subject for whom the access was denied by a "deny" rule. - TMaybe<TString> SubjectName; -}; - -/// @brief Result of @ref NYT::IClient::CheckPermission command. -/// -/// The base part of the response corresponds to the check result for the node itself. -/// `Columns` vector contains check results for the columns (in the same order as in the request). -struct TCheckPermissionResponse - : public TCheckPermissionResult -{ - /// @brief Results for the table columns access permissions. - /// - /// @see [Columnar ACL doc](https://yt.yandex-team.ru/docs/description/common/columnar_acl) - TVector<TCheckPermissionResult> Columns; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Interface representing a lock obtained from @ref NYT::ITransaction::Lock. -/// -/// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#start-tx) -class ILock - : public TThrRefBase -{ -public: - virtual ~ILock() = default; - - /// Get cypress node id of lock itself. - virtual const TLockId& GetId() const = 0; - - /// Get cypress node id of locked object. - virtual TNodeId GetLockedNodeId() const = 0; - - /// - /// @brief Get future that will be set once lock is in "acquired" state. - /// - /// Note that future might contain exception if some error occurred - /// e.g. lock transaction was aborted. - virtual const ::NThreading::TFuture<void>& GetAcquiredFuture() const = 0; - - /// - /// @brief Wait until lock is in "acquired" state. - /// - /// Throws exception if timeout exceeded or some error occurred - /// e.g. lock transaction was aborted. - void Wait(TDuration timeout = TDuration::Max()); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Base class for @ref NYT::IClient and @ref NYT::ITransaction. -/// -/// This class contains transactional commands. -class IClientBase - : public TThrRefBase - , public ICypressClient - , public IIOClient - , public IOperationClient -{ -public: - /// - /// @brief Start a [transaction] (https://yt.yandex-team.ru/docs/description/storage/transactions.html#master_transactions). - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#start-tx) - [[nodiscard]] virtual ITransactionPtr StartTransaction( - const TStartTransactionOptions& options = TStartTransactionOptions()) = 0; - - /// - /// @brief Change properties of table. - /// - /// Allows to: - /// - switch table between dynamic/static mode - /// - or change table schema - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#alter-table) - virtual void AlterTable( - const TYPath& path, - const TAlterTableOptions& options = TAlterTableOptions()) = 0; - - /// - /// @brief Create batch request object that allows to execute several light requests in parallel. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#execute-batch) - virtual TBatchRequestPtr CreateBatchRequest() = 0; - - /// @brief Get root client outside of all transactions. - virtual IClientPtr GetParentClient() = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - - -/// @brief Interface representing a master transaction. -/// -/// @see [YT doc](https://yt.yandex-team.ru/docs/description/storage/transactions.html#master_transactions) -class ITransaction - : virtual public IClientBase -{ -public: - /// Get id of transaction. - virtual const TTransactionId& GetId() const = 0; - - /// - /// @brief Try to lock given path. - /// - /// Lock will be held until transaction is commited/aborted or @ref NYT::ITransaction::Unlock method is called. - /// Lock modes: - /// - `LM_EXCLUSIVE`: if exclusive lock is taken no other transaction can take exclusive or shared lock. - /// - `LM_SHARED`: if shared lock is taken other transactions can take shared lock but not exclusive. - /// - `LM_SNAPSHOT`: snapshot lock always succeeds, when snapshot lock is taken current transaction snapshots object. - /// It will not see changes that occurred to it in other transactions. - /// - /// Exclusive/shared lock can be waitable or not. - /// If nonwaitable lock cannot be taken exception is thrown. - /// If waitable lock cannot be taken it is created in pending state and client can wait until it actually taken. - /// Check @ref NYT::TLockOptions::Waitable and @ref NYT::ILock::GetAcquiredFuture for more details. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#lock) - virtual ILockPtr Lock( - const TYPath& path, - ELockMode mode, - const TLockOptions& options = TLockOptions()) = 0; - - /// - /// @brief Remove all the locks (including pending ones) for this transaction from a Cypress node at `path`. - /// - /// If the locked version of the node differs from the original one, - /// an error will be thrown. - /// - /// Command is successful even if the node has no locks. - /// Only explicit (created by @ref NYT::ITransaction::Lock) locks are removed. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#unlock) - virtual void Unlock( - const TYPath& path, - const TUnlockOptions& options = TUnlockOptions()) = 0; - - /// - /// @brief Commit transaction. - /// - /// All changes that are made by transactions become visible globally or to parent transaction. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#commit) - virtual void Commit() = 0; - - /// - /// @brief Abort transaction. - /// - /// All changes made by current transaction are lost. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#abort) - virtual void Abort() = 0; - - /// @brief Explicitly ping transaction. - /// - /// User usually does not need this method (as transactions are pinged automatically, - /// see @ref NYT::TStartTransactionOptions::AutoPingable). - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#ping) - virtual void Ping() = 0; - - /// - /// @brief Detach transaction. - /// - /// Stop any activities connected with it: pinging, aborting on crashes etc. - /// Forget about the transaction totally. - virtual void Detach(); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// Interface containing non-transactional commands. -class IClient - : virtual public IClientBase -{ -public: - /// - /// @brief Attach to existing master transaction. - /// - /// Returned object WILL NOT: - /// - ping transaction automatically (unless @ref NYT::TAttachTransactionOptions::AutoPing is set) - /// - abort it on program termination (unless @ref NYT::TAttachTransactionOptions::AbortOnTermination is set). - /// Otherwise returned object is similar to the object returned by @ref NYT::IClientBase::StartTransaction. - /// and it can see all the changes made inside the transaction. - [[nodiscard]] virtual ITransactionPtr AttachTransaction( - const TTransactionId& transactionId, - const TAttachTransactionOptions& options = TAttachTransactionOptions()) = 0; - - /// - /// @brief Mount dynamic table. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#mount-table) - virtual void MountTable( - const TYPath& path, - const TMountTableOptions& options = TMountTableOptions()) = 0; - - /// - /// @brief Unmount dynamic table. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#unmount-table) - virtual void UnmountTable( - const TYPath& path, - const TUnmountTableOptions& options = TUnmountTableOptions()) = 0; - - /// - /// @brief Remount dynamic table. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#remount-table) - virtual void RemountTable( - const TYPath& path, - const TRemountTableOptions& options = TRemountTableOptions()) = 0; - - /// - /// @brief Switch dynamic table from `mounted' into `frozen' state. - /// - /// When table is in frozen state all its data is flushed to disk and writes are disabled. - /// - /// @note this function launches the process of switching, but doesn't wait until switching is accomplished. - /// Waiting has to be performed by user. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#freeze-table) - virtual void FreezeTable( - const TYPath& path, - const TFreezeTableOptions& options = TFreezeTableOptions()) = 0; - - /// - /// @brief Switch dynamic table from `frozen` into `mounted` state. - /// - /// @note this function launches the process of switching, but doesn't wait until switching is accomplished. - /// Waiting has to be performed by user. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#unfreeze-table) - virtual void UnfreezeTable( - const TYPath& path, - const TUnfreezeTableOptions& options = TUnfreezeTableOptions()) = 0; - - /// - /// @brief Reshard dynamic table (break it into tablets) by given pivot keys. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#reshard-table) - virtual void ReshardTable( - const TYPath& path, - const TVector<TKey>& pivotKeys, - const TReshardTableOptions& options = TReshardTableOptions()) = 0; - - /// - /// @brief Reshard dynamic table, breaking it into given number of tablets. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#reshard-table) - virtual void ReshardTable( - const TYPath& path, - i64 tabletCount, - const TReshardTableOptions& options = TReshardTableOptions()) = 0; - - /// - /// @brief Insert rows into dynamic table. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#insert-rows) - virtual void InsertRows( - const TYPath& path, - const TNode::TListType& rows, - const TInsertRowsOptions& options = TInsertRowsOptions()) = 0; - - /// - /// @brief Delete rows from dynamic table. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#delete-rows) - virtual void DeleteRows( - const TYPath& path, - const TNode::TListType& keys, - const TDeleteRowsOptions& options = TDeleteRowsOptions()) = 0; - - /// - /// @brief Trim rows from the beginning of ordered dynamic table. - /// - /// Asynchronously removes `rowCount` rows from the beginning of ordered dynamic table. - /// Numeration of remaining rows *does not change*, e.g. after `trim(10)` and `trim(20)` - /// you get in total `20` deleted rows. - /// - /// @param path Path to ordered dynamic table. - /// @param tabletIndex Which tablet to trim. - /// @param rowCount How many trimmed rows will be in the table after command. - /// @param options Optional parameters. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#trim-rows) - virtual void TrimRows( - const TYPath& path, - i64 tabletIndex, - i64 rowCount, - const TTrimRowsOptions& options = TTrimRowsOptions()) = 0; - - /// - /// @brief Lookup rows with given keys from dynamic table. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#lookup-rows) - virtual TNode::TListType LookupRows( - const TYPath& path, - const TNode::TListType& keys, - const TLookupRowsOptions& options = TLookupRowsOptions()) = 0; - - /// - /// @brief Select rows from dynamic table, using [SQL dialect](https://yt.yandex-team.ru/docs//description/dynamic_tables/dyn_query_language.html). - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#select-rows) - virtual TNode::TListType SelectRows( - const TString& query, - const TSelectRowsOptions& options = TSelectRowsOptions()) = 0; - - /// - /// @brief Change properties of table replica. - /// - /// Allows to enable/disable replica and/or change its mode. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#alter-table-replica) - virtual void AlterTableReplica( - const TReplicaId& replicaId, - const TAlterTableReplicaOptions& alterTableReplicaOptions) = 0; - - /// - /// @brief Generate a monotonously increasing master timestamp. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#generate-timestamp) - virtual ui64 GenerateTimestamp() = 0; - - /// Return YT username of current client. - virtual TAuthorizationInfo WhoAmI() = 0; - - /// - /// @brief Get operation attributes. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-operation) - virtual TOperationAttributes GetOperation( - const TOperationId& operationId, - const TGetOperationOptions& options = TGetOperationOptions()) = 0; - - /// - /// @brief List operations satisfying given filters. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#list-operations) - virtual TListOperationsResult ListOperations( - const TListOperationsOptions& options = TListOperationsOptions()) = 0; - - /// - /// @brief Update operation runtime parameters. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#update-op-parameters) - virtual void UpdateOperationParameters( - const TOperationId& operationId, - const TUpdateOperationParametersOptions& options) = 0; - - /// - /// @brief Get job attributes. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-job) - virtual TJobAttributes GetJob( - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobOptions& options = TGetJobOptions()) = 0; - - /// - /// List attributes of jobs satisfying given filters. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#list-jobs) - virtual TListJobsResult ListJobs( - const TOperationId& operationId, - const TListJobsOptions& options = TListJobsOptions()) = 0; - - /// - /// @brief Get the input of a running or failed job. - /// - /// @ref NYT::TErrorResponse exception is thrown if job is missing. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-job-input) - virtual IFileReaderPtr GetJobInput( - const TJobId& jobId, - const TGetJobInputOptions& options = TGetJobInputOptions()) = 0; - - /// - /// @brief Get fail context of a failed job. - /// - /// @ref NYT::TErrorResponse exception is thrown if it is missing. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-job-fail-context) - virtual IFileReaderPtr GetJobFailContext( - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobFailContextOptions& options = TGetJobFailContextOptions()) = 0; - - /// - /// @brief Get stderr of a running or failed job. - /// - /// @ref NYT::TErrorResponse exception is thrown if it is missing. - /// - /// @note YT doesn't store all job stderrs - /// - /// @note If job stderr exceeds few megabytes YT will store only head and tail of stderr. - /// - /// @see Description of `max_stderr_size` spec option [here](https://yt.yandex-team.ru/docs//description/mr/operations_options.html). - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-job-stderr) - virtual IFileReaderPtr GetJobStderr( - const TOperationId& operationId, - const TJobId& jobId, - const TGetJobStderrOptions& options = TGetJobStderrOptions()) = 0; - - /// - /// @brief Create one or several rbtorrents for files in a blob table. - /// - /// If specified, one torrent is created for each value of `KeyColumns` option. - /// Otherwise, a single torrent with all files of a table is created. - /// - /// @return list of nodes, each node has two fields - /// * `key`: list of key columns values. Empty if `KeyColumns` is not specified. - /// * `rbtorrent`: rbtorrent string (with `rbtorrent:` prefix) - /// - /// @see [More info.](https://docs.yandex-team.ru/docs/yt/description/storage/blobtables#sky_share) - virtual TNode::TListType SkyShareTable( - const std::vector<TYPath>& tablePaths, - const TSkyShareTableOptions& options) = 0; - - /// - /// @brief Check if `user` has `permission` to access a Cypress node at `path`. - /// - /// For tables access to columns specified in `options.Columns_` can be checked - /// (@see [the doc](https://yt.yandex-team.ru/docs/description/common/columnar_acl)). - /// - /// If access is denied (the returned result has `.Action == ESecurityAction::Deny`) - /// because of a `deny` rule, the "denying" object name and id - /// and "denied" subject name an id may be returned. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#check_permission) - virtual TCheckPermissionResponse CheckPermission( - const TString& user, - EPermission permission, - const TYPath& path, - const TCheckPermissionOptions& options = TCheckPermissionOptions()) = 0; - - /// @brief Get information about tablet - /// @see NYT::TTabletInfo - virtual TVector<TTabletInfo> GetTabletInfos( - const TYPath& path, - const TVector<int>& tabletIndexes, - const TGetTabletInfosOptions& options = TGetTabletInfosOptions()) = 0; - - /// - /// @brief Suspend operation. - /// - /// Jobs will be aborted. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#suspend_op) - virtual void SuspendOperation( - const TOperationId& operationId, - const TSuspendOperationOptions& options = TSuspendOperationOptions()) = 0; - - /// @brief Resume previously suspended operation. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#resume_op) - virtual void ResumeOperation( - const TOperationId& operationId, - const TResumeOperationOptions& options = TResumeOperationOptions()) = 0; - - /// - /// @brief Synchronously terminates all client's background activities - /// - /// e.g. no callbacks will be executed after the function is completed - /// - /// @note It is safe to call Shutdown multiple times - /// - /// @note @ref NYT::TApiUsageError will be thrown if any client's method is called after shutdown - /// - virtual void Shutdown() = 0; -}; - - -/// Create a client for particular MapReduce cluster. -IClientPtr CreateClient( - const TString& serverName, - const TCreateClientOptions& options = TCreateClientOptions()); - - -/// Create a client for mapreduce cluster specified in `YT_PROXY` environment variable. -IClientPtr CreateClientFromEnv( - const TCreateClientOptions& options = TCreateClientOptions()); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/client_method_options.cpp b/yt/cpp/mapreduce/interface/client_method_options.cpp deleted file mode 100644 index 66f72bfe5fa..00000000000 --- a/yt/cpp/mapreduce/interface/client_method_options.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "client_method_options.h" - -#include "tvm.h" - -namespace NYT { - -template <typename T> -static void MergeMaybe(TMaybe<T>& origin, const TMaybe<T>& patch) -{ - if (patch) { - origin = patch; - } -} - -void TFormatHints::Merge(const TFormatHints& patch) -{ - if (patch.SkipNullValuesForTNode_) { - SkipNullValuesForTNode(true); - } - MergeMaybe(EnableStringToAllConversion_, patch.EnableStringToAllConversion_); - MergeMaybe(EnableAllToStringConversion_, patch.EnableAllToStringConversion_); - MergeMaybe(EnableIntegralTypeConversion_, patch.EnableIntegralTypeConversion_); - MergeMaybe(EnableIntegralToDoubleConversion_, patch.EnableIntegralToDoubleConversion_); - MergeMaybe(EnableTypeConversion_, patch.EnableTypeConversion_); - MergeMaybe(ComplexTypeMode_, patch.ComplexTypeMode_); -} - -TCreateClientOptions& TCreateClientOptions::ServiceTicketAuth(const NAuth::IServiceTicketAuthPtrWrapper& wrapper) -{ - ServiceTicketAuth_ = std::make_shared<NAuth::IServiceTicketAuthPtrWrapper>(wrapper); - return *this; -} - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/client_method_options.h b/yt/cpp/mapreduce/interface/client_method_options.h deleted file mode 100644 index 80746323535..00000000000 --- a/yt/cpp/mapreduce/interface/client_method_options.h +++ /dev/null @@ -1,1452 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/client_method_options.h -/// -/// Header containing options for @ref NYT::IClient methods. - -#include "common.h" -#include "config.h" -#include "format.h" -#include "public.h" -#include "retry_policy.h" - -#include <util/datetime/base.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// Type of the cypress node. -enum ENodeType : int -{ - NT_STRING /* "string_node" */, - NT_INT64 /* "int64_node" */, - NT_UINT64 /* "uint64_node" */, - NT_DOUBLE /* "double_node" */, - NT_BOOLEAN /* "boolean_node" */, - NT_MAP /* "map_node" */, - NT_LIST /* "list_node" */, - NT_FILE /* "file" */, - NT_TABLE /* "table" */, - NT_DOCUMENT /* "document" */, - NT_REPLICATED_TABLE /* "replicated_table" */, - NT_TABLE_REPLICA /* "table_replica" */, - NT_USER /* "user" */, - NT_SCHEDULER_POOL /* "scheduler_pool" */, - NT_LINK /* "link" */, -}; - -/// -/// @brief Mode of composite type representation in yson. -/// -/// @see https://yt.yandex-team.ru/docs/description/storage/data_types#yson -enum class EComplexTypeMode : int -{ - Named /* "named" */, - Positional /* "positional" */, -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Create -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#create -struct TCreateOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TCreateOptions; - /// @endcond - - /// Create missing parent directories if required. - FLUENT_FIELD_DEFAULT(bool, Recursive, false); - - /// - /// @brief Do not raise error if node already exists. - /// - /// Node is not recreated. - /// Force and IgnoreExisting MUST NOT be used simultaneously. - FLUENT_FIELD_DEFAULT(bool, IgnoreExisting, false); - - /// - /// @brief Recreate node if it exists. - /// - /// Force and IgnoreExisting MUST NOT be used simultaneously. - FLUENT_FIELD_DEFAULT(bool, Force, false); - - /// @brief Set node attributes. - FLUENT_FIELD_OPTION(TNode, Attributes); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Remove -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#remove -struct TRemoveOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TRemoveOptions; - /// @endcond - - /// - /// @brief Remove whole tree when removing composite cypress node (e.g. `map_node`). - /// - /// Without this option removing nonempty composite node will fail. - FLUENT_FIELD_DEFAULT(bool, Recursive, false); - - /// @brief Do not fail if removing node doesn't exist. - FLUENT_FIELD_DEFAULT(bool, Force, false); -}; - -/// Base class for options for operations that read from master. -template <typename TDerived> -struct TMasterReadOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// @brief Where to read from. - FLUENT_FIELD_OPTION(EMasterReadKind, ReadFrom); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Exists -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#exists -struct TExistsOptions - : public TMasterReadOptions<TExistsOptions> -{ -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Get -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#get -struct TGetOptions - : public TMasterReadOptions<TGetOptions> -{ - /// @brief Attributes that should be fetched with each node. - FLUENT_FIELD_OPTION(TAttributeFilter, AttributeFilter); - - /// @brief Limit for the number of children node. - FLUENT_FIELD_OPTION(i64, MaxSize); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Set -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#set -struct TSetOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TSetOptions; - /// @endcond - - /// Create missing parent directories if required. - FLUENT_FIELD_DEFAULT(bool, Recursive, false); - - /// Allow setting any nodes, not only attribute and document ones. - FLUENT_FIELD_OPTION(bool, Force); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::MultisetAttributes -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#multiset_attributes -struct TMultisetAttributesOptions -{ }; - -/// -/// @brief Options for @ref NYT::ICypressClient::List -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#list -struct TListOptions - : public TMasterReadOptions<TListOptions> -{ - /// @cond Doxygen_Suppress - using TSelf = TListOptions; - /// @endcond - - /// Attributes that should be fetched for each node. - FLUENT_FIELD_OPTION(TAttributeFilter, AttributeFilter); - - /// Limit for the number of children that will be fetched. - FLUENT_FIELD_OPTION(i64, MaxSize); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Copy -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#copy -struct TCopyOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TCopyOptions; - /// @endcond - - /// Create missing directories in destination path if required. - FLUENT_FIELD_DEFAULT(bool, Recursive, false); - - /// Allows to use existing node as destination, it will be overwritten. - FLUENT_FIELD_DEFAULT(bool, Force, false); - - /// Whether to preserves account of source node. - FLUENT_FIELD_DEFAULT(bool, PreserveAccount, false); - - /// Whether to preserve `expiration_time` attribute of source node. - FLUENT_FIELD_OPTION(bool, PreserveExpirationTime); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Move -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#move -struct TMoveOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TMoveOptions; - /// @endcond - - /// Create missing directories in destination path if required. - FLUENT_FIELD_DEFAULT(bool, Recursive, false); - - /// Allows to use existing node as destination, it will be overwritten. - FLUENT_FIELD_DEFAULT(bool, Force, false); - - /// Whether to preserves account of source node. - FLUENT_FIELD_DEFAULT(bool, PreserveAccount, false); - - /// Whether to preserve `expiration_time` attribute of source node. - FLUENT_FIELD_OPTION(bool, PreserveExpirationTime); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Link -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#link -struct TLinkOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TLinkOptions; - /// @endcond - - /// Create parent directories of destination if they don't exist. - FLUENT_FIELD_DEFAULT(bool, Recursive, false); - - /// Do not raise error if link already exists. - FLUENT_FIELD_DEFAULT(bool, IgnoreExisting, false); - - /// Force rewrite target node. - FLUENT_FIELD_DEFAULT(bool, Force, false); - - /// Attributes of created link. - FLUENT_FIELD_OPTION(TNode, Attributes); -}; - -/// -/// @brief Options for @ref NYT::ICypressClient::Concatenate -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#concatenate -struct TConcatenateOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TConcatenateOptions; - /// @endcond - - /// Whether we should append to destination or rewrite it. - FLUENT_FIELD_OPTION(bool, Append); -}; - -/// -/// @brief Options for @ref NYT::IIOClient::CreateBlobTableReader -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#read_blob_table -struct TBlobTableReaderOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TBlobTableReaderOptions; - /// @endcond - - /// Name of the part index column. By default it is "part_index". - FLUENT_FIELD_OPTION(TString, PartIndexColumnName); - - /// Name of the data column. By default it is "data". - FLUENT_FIELD_OPTION(TString, DataColumnName); - - /// - /// @brief Size of each part. - /// - /// All blob parts except the last part of the blob must be of this size - /// otherwise blob table reader emits error. - FLUENT_FIELD_DEFAULT(ui64, PartSize, 4 * 1024 * 1024); - - /// @brief Offset from which to start reading - FLUENT_FIELD_DEFAULT(i64, Offset, 0); -}; - -/// -/// @brief Resource limits for operation (or pool) -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/scheduler/scheduler_and_pools#resursy -/// @see NYT::TUpdateOperationParametersOptions -struct TResourceLimits -{ - /// @cond Doxygen_Suppress - using TSelf = TResourceLimits; - /// @endcond - - /// Number of slots for user jobs. - FLUENT_FIELD_OPTION(i64, UserSlots); - - /// Number of cpu cores. - FLUENT_FIELD_OPTION(double, Cpu); - - /// Network usage. Doesn't have precise physical unit. - FLUENT_FIELD_OPTION(i64, Network); - - /// Memory in bytes. - FLUENT_FIELD_OPTION(i64, Memory); -}; - -/// -/// @brief Scheduling options for single pool tree. -/// -/// @see NYT::TUpdateOperationParametersOptions -struct TSchedulingOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TSchedulingOptions; - /// @endcond - - /// - /// @brief Pool to switch operation to. - /// - /// @note Switching is currently disabled on the server (will induce an exception). - FLUENT_FIELD_OPTION(TString, Pool); - - /// @brief Operation weight. - FLUENT_FIELD_OPTION(double, Weight); - - /// @brief Operation resource limits. - FLUENT_FIELD_OPTION(TResourceLimits, ResourceLimits); -}; - -/// -/// @brief Collection of scheduling options for multiple pool trees. -/// -/// @see NYT::TUpdateOperationParametersOptions -struct TSchedulingOptionsPerPoolTree -{ - /// @cond Doxygen_Suppress - using TSelf = TSchedulingOptionsPerPoolTree; - /// @endcond - - TSchedulingOptionsPerPoolTree(const THashMap<TString, TSchedulingOptions>& options = {}) - : Options_(options) - { } - - /// Add scheduling options for pool tree. - TSelf& Add(TStringBuf poolTreeName, const TSchedulingOptions& schedulingOptions) - { - Y_ENSURE(Options_.emplace(poolTreeName, schedulingOptions).second); - return *this; - } - - THashMap<TString, TSchedulingOptions> Options_; -}; - -/// -/// @brief Options for @ref NYT::IOperation::SuspendOperation -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#suspend_op -struct TSuspendOperationOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TSuspendOperationOptions; - /// @endcond - - /// - /// @brief Whether to abort already running jobs. - /// - /// By default running jobs are not aborted. - FLUENT_FIELD_OPTION(bool, AbortRunningJobs); -}; - -/// -/// @brief Options for @ref NYT::IOperation::ResumeOperation -/// -/// @note They are empty for now but options might appear in the future. -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#resume_op -struct TResumeOperationOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TResumeOperationOptions; - /// @endcond -}; - -/// -/// @brief Options for @ref NYT::IOperation::UpdateParameters -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#update_op_parameters -struct TUpdateOperationParametersOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TUpdateOperationParametersOptions; - /// @endcond - - /// New owners of the operation. - FLUENT_VECTOR_FIELD(TString, Owner); - - /// Pool to switch operation to (for all pool trees it is running in). - FLUENT_FIELD_OPTION(TString, Pool); - - /// New operation weight (for all pool trees it is running in). - FLUENT_FIELD_OPTION(double, Weight); - - /// Scheduling options for each pool tree the operation is running in. - FLUENT_FIELD_OPTION(TSchedulingOptionsPerPoolTree, SchedulingOptionsPerPoolTree); -}; - -/// -/// @brief Base class for many options related to IO. -/// -/// @ref NYT::TFileWriterOptions -/// @ref NYT::TFileReaderOptions -/// @ref NYT::TTableReaderOptions -/// @ref NYT::TTableWriterOptions -template <class TDerived> -struct TIOOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// - /// @brief Advanced options for reader/writer. - /// - /// Readers/writers have many options not of all of them are supported by library. - /// If you need such unsupported option, you might use `Config` option until - /// option is supported. - /// - /// Example: - /// - /// TTableWriterOptions().Config(TNode()("max_row_weight", 64 << 20))) - /// - /// @note We encourage you to ask yt@ to add native C++ support of required options - /// and use `Config` only as temporary solution while native support is not ready. - FLUENT_FIELD_OPTION(TNode, Config); - - /// - /// @brief Whether to create internal client transaction for reading / writing table. - /// - /// This is advanced option. - /// - /// If `CreateTransaction` is set to `false` reader/writer doesn't create internal transaction - /// and doesn't lock table. This option is overriden (effectively `false`) for writers by - /// @ref NYT::TTableWriterOptions::SingleHttpRequest - /// - /// WARNING: if `CreateTransaction` is `false`, read/write might become non-atomic. - /// Change ONLY if you are sure what you are doing! - FLUENT_FIELD_DEFAULT(bool, CreateTransaction, true); -}; - -/// @brief Options for reading file from YT. -struct TFileReaderOptions - : public TIOOptions<TFileReaderOptions> -{ - /// - /// @brief Offset to start reading from. - /// - /// By default reading is started from the beginning of the file. - FLUENT_FIELD_OPTION(i64, Offset); - - /// - /// @brief Maximum length to read. - /// - /// By default file is read until the end. - FLUENT_FIELD_OPTION(i64, Length); -}; - -/// @brief Options that control how server side of YT stores data. -struct TWriterOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TWriterOptions; - /// @endcond - - /// - /// @brief Whether to wait all replicas to be written. - /// - /// When set to true upload will be considered successful as soon as - /// @ref NYT::TWriterOptions::MinUploadReplicationFactor number of replicas are created. - FLUENT_FIELD_OPTION(bool, EnableEarlyFinish); - - /// Number of replicas to be created. - FLUENT_FIELD_OPTION(ui64, UploadReplicationFactor); - - /// - /// Min number of created replicas needed to consider upload successful. - /// - /// @see NYT::TWriterOptions::EnableEarlyFinish - FLUENT_FIELD_OPTION(ui64, MinUploadReplicationFactor); - - /// - /// @brief Desired size of a chunk. - /// - /// @see @ref NYT::TWriterOptions::RetryBlockSize - FLUENT_FIELD_OPTION(ui64, DesiredChunkSize); - - /// - /// @brief Size of data block accumulated in memory to provide retries. - /// - /// Data is accumulated in memory buffer so in case error occurs data could be resended. - /// - /// If `RetryBlockSize` is not set buffer size is set to `DesiredChunkSize`. - /// If niether `RetryBlockSize` nor `DesiredChunkSize` is set size of buffer is 64MB. - /// - /// @note Written chunks cannot be larger than size of this memory buffer. - /// - /// Since DesiredChunkSize is compared against data already compressed with compression codec - /// it makes sense to set `RetryBlockSize = DesiredChunkSize / ExpectedCompressionRatio` - /// - /// @see @ref NYT::TWriterOptions::DesiredChunkSize - /// @see @ref NYT::TTableWriterOptions::SingleHttpRequest - FLUENT_FIELD_OPTION(size_t, RetryBlockSize); -}; - -/// -/// @brief Options for writing file -/// -/// @see NYT::IIOClient::CreateFileWriter -struct TFileWriterOptions - : public TIOOptions<TFileWriterOptions> -{ - /// - /// @brief Whether to compute MD5 sum of written file. - /// - /// If ComputeMD5 is set to `true` and we are appending to an existing file - /// the `md5` attribute must be set (i.e. it was previously written only with `ComputeMD5 == true`). - FLUENT_FIELD_OPTION(bool, ComputeMD5); - - /// - /// @brief Options to control how YT server side writes data. - /// - /// @see NYT::TWriterOptions - FLUENT_FIELD_OPTION(TWriterOptions, WriterOptions); -}; - -class TSkiffRowHints { -public: - /// @cond Doxygen_Suppress - using TSelf = TSkiffRowHints; - /// @endcond - - /// - /// @brief Library doesn't interpret it, only pass it to CreateSkiffParser<...>() and GetSkiffSchema<...>() functions. - /// - /// You can set something in it to pass necessary information to CreateSkiffParser<...>() and GetSkiffSchema<...>() functions. - FLUENT_FIELD_OPTION(TNode, Attributes); -}; - -/// Options that control how C++ objects represent table rows when reading or writing a table. -class TFormatHints -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TFormatHints; - /// @endcond - - /// - /// @brief Whether to skip null values. - /// - /// When set to true TNode doesn't contain null column values - /// (e.g. corresponding keys will be missing instead of containing null value). - /// - /// Only meaningful for TNode representation. - /// - /// Useful for sparse tables which have many columns in schema - /// but only few columns are set in any row. - FLUENT_FIELD_DEFAULT(bool, SkipNullValuesForTNode, false); - - /// - /// @brief Whether to convert string to numeric and boolean types (e.g. "42u" -> 42u, "false" -> %false) - /// when writing to schemaful table. - FLUENT_FIELD_OPTION(bool, EnableStringToAllConversion); - - /// - /// @brief Whether to convert numeric and boolean types to string (e.g., 3.14 -> "3.14", %true -> "true") - /// when writing to schemaful table. - FLUENT_FIELD_OPTION(bool, EnableAllToStringConversion); - - /// - /// @brief Whether to convert uint64 <-> int64 when writing to schemaful table. - /// - /// On overflow the corresponding error with be raised. - /// - /// This options is enabled by default. - FLUENT_FIELD_OPTION(bool, EnableIntegralTypeConversion); - - /// Whether to convert uint64 and int64 to double (e.g. 42 -> 42.0) when writing to schemaful table. - FLUENT_FIELD_OPTION(bool, EnableIntegralToDoubleConversion); - - /// Shortcut for enabling all type conversions. - FLUENT_FIELD_OPTION(bool, EnableTypeConversion); - - /// - /// @brief Controls how complex types are represented in TNode or yson-strings. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/data_types#yson - FLUENT_FIELD_OPTION(EComplexTypeMode, ComplexTypeMode); - - /// - /// @brief Allow to use any meta-information for creating skiff schema and parser for reading ISkiffRow. - FLUENT_FIELD_OPTION(TSkiffRowHints, SkiffRowHints); - - /// - /// @brief Apply the patch to the fields. - /// - /// Non-default and non-empty values replace the default and empty ones. - void Merge(const TFormatHints& patch); -}; - -/// Options that control which control attributes (like row_index) are added to rows during read. -class TControlAttributes -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TControlAttributes; - /// @endcond - - /// - /// @brief Whether to add "row_index" attribute to rows read. - FLUENT_FIELD_DEFAULT(bool, EnableRowIndex, true); - - /// - /// @brief Whether to add "range_index" attribute to rows read. - FLUENT_FIELD_DEFAULT(bool, EnableRangeIndex, true); -}; - -/// Options for @ref NYT::IClient::CreateTableReader -struct TTableReaderOptions - : public TIOOptions<TTableReaderOptions> -{ - /// @deprecated Size of internal client buffer. - FLUENT_FIELD_DEFAULT(size_t, SizeLimit, 4 << 20); - - /// - /// @brief Allows to fine tune format that is used for reading tables. - /// - /// Has no effect when used with raw-reader. - FLUENT_FIELD_OPTION(TFormatHints, FormatHints); - - /// - /// @brief Allows to tune which attributes are added to rows while reading tables. - /// - FLUENT_FIELD_DEFAULT(TControlAttributes, ControlAttributes, TControlAttributes()); -}; - -/// Options for @ref NYT::IClient::CreateTableWriter -struct TTableWriterOptions - : public TIOOptions<TTableWriterOptions> -{ - /// - /// @brief Enable or disable retryful writing. - /// - /// If set to true no retry is made but we also make less requests to master. - /// If set to false writer can make up to `TConfig::RetryCount` attempts to send each block of data. - /// - /// @note Writers' methods might throw strange exceptions that might look like network error - /// when `SingleHttpRequest == true` and YT node encounters an error - /// (due to limitations of HTTP protocol YT node have no chance to report error - /// before it reads the whole input so it just drops the connection). - FLUENT_FIELD_DEFAULT(bool, SingleHttpRequest, false); - - /// - /// @brief Allows to change the size of locally buffered rows before flushing to yt. - /// - /// Used only with @ref NYT::TTableWriterOptions::SingleHttpRequest - FLUENT_FIELD_DEFAULT(size_t, BufferSize, 64 << 20); - - /// - /// @brief Allows to fine tune format that is used for writing tables. - /// - /// Has no effect when used with raw-writer. - FLUENT_FIELD_OPTION(TFormatHints, FormatHints); - - /// @brief Try to infer schema of inexistent table from the type of written rows. - /// - /// @note Default values for this option may differ depending on the row type. - /// For protobuf it's currently false by default. - FLUENT_FIELD_OPTION(bool, InferSchema); - - /// - /// @brief Options to control how YT server side writes data. - /// - /// @see NYT::TWriterOptions - FLUENT_FIELD_OPTION(TWriterOptions, WriterOptions); -}; - -/// -/// @brief Options for @ref NYT::IClient::StartTransaction -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#start_tx -struct TStartTransactionOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TStartTransactionOptions; - /// @endcond - - FLUENT_FIELD_DEFAULT(bool, PingAncestors, false); - - /// - /// @brief How long transaction lives after last ping. - /// - /// If server doesn't receive any pings for transaction for this time - /// transaction will be aborted. By default timeout is 15 seconds. - FLUENT_FIELD_OPTION(TDuration, Timeout); - - /// - /// @brief Moment in the future when transaction is aborted. - FLUENT_FIELD_OPTION(TInstant, Deadline); - - /// - /// @brief Whether to ping created transaction automatically. - /// - /// When set to true library creates a thread that pings transaction. - /// When set to false library doesn't ping transaction and it's user responsibility to ping it. - FLUENT_FIELD_DEFAULT(bool, AutoPingable, true); - - /// - /// @brief Set the title attribute of transaction. - /// - /// If title was not specified - /// neither using this option nor using @ref NYT::TStartTransactionOptions::Attributes option - /// library will generate default title for transaction. - /// Such default title includes machine name, pid, user name and some other useful info. - FLUENT_FIELD_OPTION(TString, Title); - - /// - /// @brief Set custom transaction attributes - /// - /// @note @ref NYT::TStartTransactionOptions::Title option overrides `"title"` attribute. - FLUENT_FIELD_OPTION(TNode, Attributes); -}; - -/// -/// @brief Options for attaching transaction. -/// -/// @see NYT::IClient::AttachTransaction -struct TAttachTransactionOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TAttachTransactionOptions; - /// @endcond - - /// - /// @brief Ping transaction automatically. - /// - /// When set to |true| library creates a thread that pings transaction. - /// When set to |false| library doesn't ping transaction and - /// it's user responsibility to ping it. - FLUENT_FIELD_DEFAULT(bool, AutoPingable, false); - - /// - /// @brief Abort transaction on program termination. - /// - /// Should the transaction be aborted on program termination - /// (either normal or by a signal or uncaught exception -- two latter - /// only if @ref TInitializeOptions::CleanupOnTermination is set). - FLUENT_FIELD_DEFAULT(bool, AbortOnTermination, false); -}; - -/// -/// @brief Type of the lock. -/// -/// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locking_mode -/// @see NYT::ITransaction::Lock -enum ELockMode : int -{ - /// Exclusive lock. - LM_EXCLUSIVE /* "exclusive" */, - - /// Shared lock. - LM_SHARED /* "shared" */, - - /// Snapshot lock. - LM_SNAPSHOT /* "snapshot" */, -}; - -/// -/// @brief Options for locking cypress node -/// -/// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locks -/// @see NYT::ITransaction::Lock -struct TLockOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TLockOptions; - /// @endcond - - /// - /// @brief Whether to wait already locked node to be unlocked. - /// - /// If `Waitable' is set to true Lock method will create - /// waitable lock, that will be taken once other transactions - /// that hold lock to that node are commited / aborted. - /// - /// @note Lock method DOES NOT wait until lock is actually acquired. - /// Waiting should be done using corresponding methods of ILock. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locking_queue - FLUENT_FIELD_DEFAULT(bool, Waitable, false); - - /// - /// @brief Also take attribute_key lock. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locks_compatibility - FLUENT_FIELD_OPTION(TString, AttributeKey); - - /// - /// @brief Also take child_key lock. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locks_compatibility - FLUENT_FIELD_OPTION(TString, ChildKey); -}; - -/// -/// @brief Options for @ref NYT::ITransaction::Unlock -/// -/// @note They are empty for now but options might appear in the future. -/// -/// @see https://yt.yandex-team.ru/docs/description/storage/transactions#locks_compatibility -struct TUnlockOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TUnlockOptions; - /// @endcond -}; - -/// Base class for options that deal with tablets. -template <class TDerived> -struct TTabletOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// Index of a first tablet to deal with. - FLUENT_FIELD_OPTION(i64, FirstTabletIndex); - - /// Index of a last tablet to deal with. - FLUENT_FIELD_OPTION(i64, LastTabletIndex); -}; - -/// -/// @brief Options for @ref NYT::IClient::MountTable -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#mount_table -struct TMountTableOptions - : public TTabletOptions<TMountTableOptions> -{ - /// @cond Doxygen_Suppress - using TSelf = TMountTableOptions; - /// @endcond - - /// If specified table will be mounted to this cell. - FLUENT_FIELD_OPTION(TTabletCellId, CellId); - - /// If set to true tablets will be mounted in freezed state. - FLUENT_FIELD_DEFAULT(bool, Freeze, false); -}; - -/// -/// @brief Options for @ref NYT::IClient::UnmountTable -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#unmount_table -struct TUnmountTableOptions - : public TTabletOptions<TUnmountTableOptions> -{ - /// @cond Doxygen_Suppress - using TSelf = TUnmountTableOptions; - /// @endcond - - /// Advanced option, don't use unless yt team told you so. - FLUENT_FIELD_DEFAULT(bool, Force, false); -}; - -/// -/// @brief Options for @ref NYT::IClient::RemountTable -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#remount_table -struct TRemountTableOptions - : public TTabletOptions<TRemountTableOptions> -{ }; - -/// -/// @brief Options for @ref NYT::IClient::ReshardTable -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#reshard_table -struct TReshardTableOptions - : public TTabletOptions<TReshardTableOptions> -{ }; - -/// -/// @brief Options for @ref NYT::IClient::FreezeTable -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#freeze_table -struct TFreezeTableOptions - : public TTabletOptions<TFreezeTableOptions> -{ }; - -/// -/// @brief Options for @ref NYT::IClient::UnfreezeTable -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#unfreeze_table -struct TUnfreezeTableOptions - : public TTabletOptions<TUnfreezeTableOptions> -{ }; - -/// -/// @brief Options for @ref NYT::IClient::AlterTable -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#alter_table -struct TAlterTableOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TAlterTableOptions; - /// @endcond - - /// Change table schema. - FLUENT_FIELD_OPTION(TTableSchema, Schema); - - /// Alter table between static and dynamic mode. - FLUENT_FIELD_OPTION(bool, Dynamic); - - /// - /// @brief Changes id of upstream replica on metacluster. - /// - /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables - FLUENT_FIELD_OPTION(TReplicaId, UpstreamReplicaId); -}; - -/// -/// @brief Options for @ref NYT::IClient::LookupRows -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#lookup_rows -struct TLookupRowsOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TLookupRowsOptions; - /// @endcond - - /// Timeout for operation. - FLUENT_FIELD_OPTION(TDuration, Timeout); - - /// Column names to return. - FLUENT_FIELD_OPTION(TColumnNames, Columns); - - /// - /// @brief Whether to return rows that were not found in table. - /// - /// If set to true List returned by LookupRows method will have same - /// length as list of keys. If row is not found in table corresponding item in list - /// will have null value. - FLUENT_FIELD_DEFAULT(bool, KeepMissingRows, false); - - /// If set to true returned values will have "timestamp" attribute. - FLUENT_FIELD_OPTION(bool, Versioned); -}; - -/// -/// @brief Options for @ref NYT::IClient::SelectRows -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#select_rows -struct TSelectRowsOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TSelectRowsOptions; - /// @endcond - - /// Timeout for operation. - FLUENT_FIELD_OPTION(TDuration, Timeout); - - /// - /// @brief Limitation for number of rows read by single node. - /// - /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/dyn_query_language#ogranicheniya-na-slozhnost-zaprosa-(opcii) - FLUENT_FIELD_OPTION(i64, InputRowLimit); - - /// - /// @brief Limitation for number of output rows on single cluster node. - /// - /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/dyn_query_language#ogranicheniya-na-slozhnost-zaprosa-(opcii) - FLUENT_FIELD_OPTION(i64, OutputRowLimit); - - /// - /// @brief Maximum row ranges derived from WHERE clause. - /// - /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/dyn_query_language#ogranicheniya-na-slozhnost-zaprosa-(opcii) - FLUENT_FIELD_DEFAULT(ui64, RangeExpansionLimit, 1000); - - /// - /// @brief Whether to fail if InputRowLimit or OutputRowLimit is exceeded. - /// - /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/dyn_query_language#ogranicheniya-na-slozhnost-zaprosa-(opcii) - FLUENT_FIELD_DEFAULT(bool, FailOnIncompleteResult, true); - - /// @brief Enable verbose logging on server side. - FLUENT_FIELD_DEFAULT(bool, VerboseLogging, false); - - FLUENT_FIELD_DEFAULT(bool, EnableCodeCache, true); -}; - -/// Options for NYT::CreateClient; -struct TCreateClientOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TCreateClientOptions; - /// @endcond - - /// @brief Impersonated user name. - /// - /// If authenticated user is allowed to impersonate other YT users (e.g. yql_agent), this field may be used to override user name. - FLUENT_FIELD_OPTION(TString, ImpersonationUser); - - /// @brief User token. - /// - /// @see NYT::TCreateClientOptions::TokenPath - FLUENT_FIELD(TString, Token); - - /// @brief Path to the file where user token is stored. - /// - /// Token is looked in these places in following order: - /// - @ref NYT::TCreateClientOptions::Token - /// - @ref NYT::TCreateClientOptions::TokenPath - /// - `TConfig::Get()->Token` option. - /// - `YT_TOKEN` environment variable - /// - `YT_SECURE_VAULT_YT_TOKEN` environment variable - /// - File specified in `YT_TOKEN_PATH` environment variable - /// - `$HOME/.yt/token` file. - FLUENT_FIELD(TString, TokenPath); - - /// @brief TVM service ticket producer. - /// - /// We store a wrapper of NYT::TIntrusivePtr here (not a NYT::TIntrusivePtr), - /// because otherwise other projects will have build problems - /// because of visibility of two different `TIntrusivePtr`-s (::TInstrusivePtr and NYT::TInstrusivePtr). - /// - /// @see NYT::NAuth::TServiceTicketClientAuth - /// {@ - NAuth::IServiceTicketAuthPtrWrapperPtr ServiceTicketAuth_ = nullptr; - TSelf& ServiceTicketAuth(const NAuth::IServiceTicketAuthPtrWrapper& wrapper); - /// @} - - /// @brief Use tvm-only endpoints in cluster connection. - FLUENT_FIELD_DEFAULT(bool, TvmOnly, false); - - /// @brief Use HTTPs (use HTTP client from yt/yt/core always). - /// - /// @see UseCoreHttpClient - FLUENT_FIELD_DEFAULT(bool, UseTLS, false); - - /// @brief Use HTTP client from yt/yt/core. - FLUENT_FIELD_DEFAULT(bool, UseCoreHttpClient, false); - - /// - /// @brief RetryConfig provider allows to fine tune request retries. - /// - /// E.g. set total timeout for all retries. - FLUENT_FIELD_DEFAULT(IRetryConfigProviderPtr, RetryConfigProvider, nullptr); - - /// @brief Override global config for the client. - /// - /// The config contains implementation parameters such as connection timeouts, - /// access token, api version and more. - /// @see NYT::TConfig - FLUENT_FIELD_DEFAULT(TConfigPtr, Config, nullptr); -}; - -/// -/// @brief Options for @ref NYT::IBatchRequest::ExecuteBatch -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#execute_batch -struct TExecuteBatchOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TExecuteBatchOptions; - /// @endcond - - /// - /// @brief How many requests will be executed in parallel on the cluster. - /// - /// This parameter could be used to avoid RequestLimitExceeded errors. - FLUENT_FIELD_OPTION(ui64, Concurrency); - - /// - /// @brief Maximum size of batch sent in one request to server. - /// - /// Huge batches are executed using multiple requests. - /// BatchPartMaxSize is maximum size of single request that goes to server - /// If not specified it is set to `Concurrency * 5' - FLUENT_FIELD_OPTION(ui64, BatchPartMaxSize); -}; - -/// -/// @brief Durability mode. -/// -/// @see NYT::TTabletTransactionOptions::TDurability -/// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#sohrannost -enum class EDurability -{ - /// Sync mode (default). - Sync /* "sync" */, - - /// Async mode (might reduce latency of write requests, but less reliable). - Async /* "async" */, -}; - -/// -/// @brief Atomicity mode. -/// -/// @see NYT::TTabletTransactionOptions::TDurability -/// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#sohrannost -enum class EAtomicity -{ - /// Transactions are non atomic (might reduce latency of write requests). - None /* "none" */, - - /// Transactions are atomic (default). - Full /* "full" */, -}; - -/// -/// @brief Table replica mode. -/// -/// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables#atributy -enum class ETableReplicaMode -{ - Sync /* "sync" */, - Async /* "async" */, -}; - -/// Base class for options dealing with io to dynamic tables. -template <typename TDerived> -struct TTabletTransactionOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// - /// @brief Atomicity mode of operation - /// - /// Setting to NYT::EAtomicity::None allows to improve latency of operations - /// at the cost of weakening contracts. - /// - /// @note Use with care. - /// - /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#oslablenie-garantij - FLUENT_FIELD_OPTION(EAtomicity, Atomicity); - - /// - /// @brief Durability mode of operation - /// - /// Setting to NYT::EDurability::Async allows to improve latency of operations - /// at the cost of weakening contracts. - /// - /// @note Use with care. - /// - /// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#oslablenie-garantij - FLUENT_FIELD_OPTION(EDurability, Durability); -}; - -/// -/// @brief Options for NYT::IClient::InsertRows -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#insert_rows -struct TInsertRowsOptions - : public TTabletTransactionOptions<TInsertRowsOptions> -{ - /// - /// @brief Whether to overwrite missing columns with nulls. - /// - /// By default all columns missing in input data are set to Null and overwrite currently stored value. - /// If `Update' is set to true currently stored value will not be overwritten for columns that are missing in input data. - FLUENT_FIELD_OPTION(bool, Update); - - /// - /// @brief Whether to overwrite or aggregate aggregated columns. - /// - /// Used with aggregating columns. - /// By default value in aggregating column will be overwritten. - /// If `Aggregate' is set to true row will be considered as delta and it will be aggregated with currently stored value. - FLUENT_FIELD_OPTION(bool, Aggregate); - - /// - /// @brief Whether to fail when inserting to table without sync replica. - /// - /// Used for insert operation for tables without sync replica. - /// https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables#write - /// Default value is 'false'. So insertion into table without sync replicas fails. - FLUENT_FIELD_OPTION(bool, RequireSyncReplica); -}; - -/// -/// @brief Options for NYT::IClient::DeleteRows -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#delete_rows -struct TDeleteRowsOptions - : public TTabletTransactionOptions<TDeleteRowsOptions> -{ - /// - /// @brief Whether to fail when deleting from table without sync replica. - /// - // Used for delete operation for tables without sync replica. - // https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables#write - // Default value is 'false'. So deletion into table without sync replicas fails. - FLUENT_FIELD_OPTION(bool, RequireSyncReplica); -}; - -/// -/// @brief Options for NYT::IClient::TrimRows -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#trim_rows -struct TTrimRowsOptions - : public TTabletTransactionOptions<TTrimRowsOptions> -{ }; - -/// @brief Options for NYT::IClient::AlterTableReplica -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#alter_table_replica -/// @see https://yt.yandex-team.ru/docs/description/dynamic_tables/replicated_dynamic_tables -struct TAlterTableReplicaOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TAlterTableReplicaOptions; - /// @endcond - - /// - /// @brief Whether to enable or disable replica. - /// - /// Doesn't change state of replica if `Enabled' is not set. - FLUENT_FIELD_OPTION(bool, Enabled); - - /// - /// @brief Change replica mode. - /// - /// Doesn't change replica mode if `Mode` is not set. - FLUENT_FIELD_OPTION(ETableReplicaMode, Mode); -}; - -/// -/// @brief Options for @ref NYT::IClient::GetFileFromCache -/// -/// @note They are empty for now but options might appear in the future. -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#get_file_from_cache -struct TGetFileFromCacheOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetFileFromCacheOptions; - /// @endcond -}; - -/// -/// @brief Options for @ref NYT::IClient::GetTableColumnarStatistics -/// -/// @note They are empty for now but options might appear in the future. -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#put_file_to_cache -struct TPutFileToCacheOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TPutFileToCacheOptions; - /// @endcond - - /// Whether to preserve `expiration_timeout` attribute of source node. - FLUENT_FIELD_OPTION(bool, PreserveExpirationTimeout); -}; - -/// -/// Type of permission used in ACL. -/// -/// @see https://yt.yandex-team.ru/docs/description/common/access_control -enum class EPermission : int -{ - /// Applies to: all objects. - Read /* "read" */, - - /// Applies to: all objects. - Write /* "write" */, - - /// Applies to: accounts / pools. - Use /* "use" */, - - /// Applies to: all objects. - Administer /* "administer" */, - - /// Applies to: schemas. - Create /* "create" */, - - /// Applies to: all objects. - Remove /* "remove" */, - - /// Applies to: tables. - Mount /* "mount" */, - - /// Applies to: operations. - Manage /* "manage" */, -}; - -/// Whether permission is granted or denied. -enum class ESecurityAction : int -{ - /// Permission is granted. - Allow /* "allow" */, - - /// Permission is denied. - Deny /* "deny" */, -}; - -/// -/// @brief Options for @ref NYT::IClient::CheckPermission -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#check_permission -struct TCheckPermissionOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TCheckPermissionOptions; - /// @endcond - - /// Columns to check permission to (for tables only). - FLUENT_VECTOR_FIELD(TString, Column); -}; - -/// -/// @brief Columnar statistics fetching mode. -/// -/// @ref NYT::TGetTableColumnarStatisticsOptions::FetcherMode -enum class EColumnarStatisticsFetcherMode -{ - /// Slow mode for fetching precise columnar statistics. - FromNodes /* "from_nodes" */, - - /// - /// @brief Fast mode for fetching lightweight columnar statistics. - /// - /// Relative precision is 1 / 256. - /// - /// @note Might be unavailable for old tables in that case some upper bound is returned. - FromMaster /* "from_master" */, - - /// Use lightweight columnar statistics (FromMaster) if available otherwise switch to slow but precise mode (FromNodes). - Fallback /* "fallback" */, -}; - -/// -/// @brief Options for @ref NYT::IClient::GetTableColumnarStatistics -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#get_table_columnar_statistics -struct TGetTableColumnarStatisticsOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetTableColumnarStatisticsOptions; - /// @endcond - - /// - /// @brief Mode of statistics fetching. - /// - /// @ref NYT::EColumnarStatisticsFetcherMode - FLUENT_FIELD_OPTION(EColumnarStatisticsFetcherMode, FetcherMode); -}; - -/// -/// @brief Table partitioning mode. -/// -/// @ref NYT::TGetTablePartitionsOptions::PartitionMode -enum class ETablePartitionMode -{ - /// - /// @brief Ignores the order of input tables and their chunk and sorting orders. - /// - Unordered /* "unordered" */, - - /// - /// @brief The order of table ranges inside each partition obey the order of input tables and their chunk orders. - /// - Ordered /* "ordered" */, -}; - -/// -/// @brief Options for @ref NYT::IClient::GetTablePartitions -/// -struct TGetTablePartitionsOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetTablePartitionsOptions; - /// @endcond - - /// - /// @brief Table partitioning mode. - /// - /// @ref NYT::ETablePartitionMode - FLUENT_FIELD(ETablePartitionMode, PartitionMode); - - /// - /// @brief Approximate data weight of each output partition. - /// - FLUENT_FIELD(i64, DataWeightPerPartition); - - /// - /// @brief Maximum output partition count. - /// - /// Consider the situation when the `MaxPartitionCount` is given - /// and the total data weight exceeds `MaxPartitionCount * DataWeightPerPartition`. - /// If `AdjustDataWeightPerPartition` is |true| - /// `GetTablePartitions` will yield partitions exceeding the `DataWeightPerPartition`. - /// If `AdjustDataWeightPerPartition` is |false| - /// the partitioning will be aborted as soon as the output partition count exceeds this limit. - FLUENT_FIELD_OPTION(int, MaxPartitionCount); - - /// - /// @brief Allow the data weight per partition to exceed `DataWeightPerPartition` when `MaxPartitionCount` is set. - /// - /// |True| by default. - FLUENT_FIELD_DEFAULT(bool, AdjustDataWeightPerPartition, true); -}; - -/// -/// @brief Options for @ref NYT::IClient::GetTabletInfos -/// -/// @note They are empty for now but options might appear in the future. -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#get_tablet_infos -struct TGetTabletInfosOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetTabletInfosOptions; - /// @endcond -}; - -/// Options for @ref NYT::IClient::SkyShareTable -struct TSkyShareTableOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TSkyShareTableOptions; - /// @endcond - - /// - /// @brief Key columns that are used to group files in a table into torrents. - /// - /// One torrent is created for each value of `KeyColumns` columns. - /// If not specified, all files go into single torrent. - FLUENT_FIELD_OPTION(TColumnNames, KeyColumns); - - /// @brief Allow skynet manager to return fastbone links to skynet. See YT-11437 - FLUENT_FIELD_OPTION(bool, EnableFastbone); -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/common.cpp b/yt/cpp/mapreduce/interface/common.cpp deleted file mode 100644 index f6d60127cea..00000000000 --- a/yt/cpp/mapreduce/interface/common.cpp +++ /dev/null @@ -1,664 +0,0 @@ -#include "common.h" - -#include "errors.h" -#include "format.h" -#include "serialize.h" -#include "fluent.h" - -#include <yt/yt_proto/yt/formats/extension.pb.h> - -#include <library/cpp/yson/node/node_builder.h> -#include <library/cpp/yson/node/node_io.h> -#include <library/cpp/type_info/type.h> - -#include <util/generic/xrange.h> - -namespace NYT { - -using ::google::protobuf::FieldDescriptor; -using ::google::protobuf::Descriptor; - -//////////////////////////////////////////////////////////////////////////////// - -TSortColumn::TSortColumn(TStringBuf name, ESortOrder sortOrder) - : Name_(name) - , SortOrder_(sortOrder) -{ } - -TSortColumn::TSortColumn(const TString& name, ESortOrder sortOrder) - : TSortColumn(static_cast<TStringBuf>(name), sortOrder) -{ } - -TSortColumn::TSortColumn(const char* name, ESortOrder sortOrder) - : TSortColumn(static_cast<TStringBuf>(name), sortOrder) -{ } - -const TSortColumn& TSortColumn::EnsureAscending() const -{ - Y_ENSURE(SortOrder() == ESortOrder::SO_ASCENDING); - return *this; -} - -TNode TSortColumn::ToNode() const -{ - return BuildYsonNodeFluently().Value(*this); -} - -//////////////////////////////////////////////////////////////////////////////// -// Below lie backward compatibility methods. -//////////////////////////////////////////////////////////////////////////////// - -TSortColumn& TSortColumn::operator = (TStringBuf name) -{ - EnsureAscending(); - Name_ = name; - return *this; -} - -TSortColumn& TSortColumn::operator = (const TString& name) -{ - return (*this = static_cast<TStringBuf>(name)); -} - -TSortColumn& TSortColumn::operator = (const char* name) -{ - return (*this = static_cast<TStringBuf>(name)); -} - -bool TSortColumn::operator == (TStringBuf rhsName) const -{ - EnsureAscending(); - return Name_ == rhsName; -} - -bool TSortColumn::operator != (TStringBuf rhsName) const -{ - return !(*this == rhsName); -} - -bool TSortColumn::operator == (const TString& rhsName) const -{ - return *this == static_cast<TStringBuf>(rhsName); -} - -bool TSortColumn::operator != (const TString& rhsName) const -{ - return !(*this == rhsName); -} - -bool TSortColumn::operator == (const char* rhsName) const -{ - return *this == static_cast<TStringBuf>(rhsName); -} - -bool TSortColumn::operator != (const char* rhsName) const -{ - return !(*this == rhsName); -} - -TSortColumn::operator TStringBuf() const -{ - EnsureAscending(); - return Name_; -} - -TSortColumn::operator TString() const -{ - return TString(static_cast<TStringBuf>(*this)); -} - -TSortColumn::operator std::string() const -{ - EnsureAscending(); - return static_cast<std::string>(Name_); -} - -//////////////////////////////////////////////////////////////////////////////// - -TSortColumns::TSortColumns() -{ } - -TSortColumns::TSortColumns(const TVector<TString>& names) -{ - Parts_.assign(names.begin(), names.end()); -} - -TSortColumns::TSortColumns(const TColumnNames& names) - : TSortColumns(names.Parts_) -{ } - -TSortColumns::operator TColumnNames() const -{ - return TColumnNames(EnsureAscending().GetNames()); -} - -const TSortColumns& TSortColumns::EnsureAscending() const -{ - for (const auto& sortColumn : Parts_) { - sortColumn.EnsureAscending(); - } - return *this; -} - -TVector<TString> TSortColumns::GetNames() const -{ - TVector<TString> names; - names.reserve(Parts_.size()); - for (const auto& sortColumn : Parts_) { - names.push_back(sortColumn.Name()); - } - return names; -} - -//////////////////////////////////////////////////////////////////////////////// - -static NTi::TTypePtr OldTypeToTypeV3(EValueType type) -{ - switch (type) { - case VT_INT64: - return NTi::Int64(); - case VT_UINT64: - return NTi::Uint64(); - - case VT_DOUBLE: - return NTi::Double(); - - case VT_BOOLEAN: - return NTi::Bool(); - - case VT_STRING: - return NTi::String(); - - case VT_ANY: - return NTi::Yson(); - - case VT_INT8: - return NTi::Int8(); - case VT_INT16: - return NTi::Int16(); - case VT_INT32: - return NTi::Int32(); - - case VT_UINT8: - return NTi::Uint8(); - case VT_UINT16: - return NTi::Uint16(); - case VT_UINT32: - return NTi::Uint32(); - - case VT_UTF8: - return NTi::Utf8(); - - case VT_NULL: - return NTi::Null(); - - case VT_VOID: - return NTi::Void(); - - case VT_DATE: - return NTi::Date(); - case VT_DATETIME: - return NTi::Datetime(); - case VT_TIMESTAMP: - return NTi::Timestamp(); - case VT_INTERVAL: - return NTi::Interval(); - - case VT_FLOAT: - return NTi::Float(); - case VT_JSON: - return NTi::Json(); - } -} - -static std::pair<EValueType, bool> Simplify(const NTi::TTypePtr& type) -{ - using namespace NTi; - const auto typeName = type->GetTypeName(); - switch (typeName) { - case ETypeName::Bool: - return {VT_BOOLEAN, true}; - - case ETypeName::Int8: - return {VT_INT8, true}; - case ETypeName::Int16: - return {VT_INT16, true}; - case ETypeName::Int32: - return {VT_INT32, true}; - case ETypeName::Int64: - return {VT_INT64, true}; - - case ETypeName::Uint8: - return {VT_UINT8, true}; - case ETypeName::Uint16: - return {VT_UINT16, true}; - case ETypeName::Uint32: - return {VT_UINT32, true}; - case ETypeName::Uint64: - return {VT_UINT64, true}; - - case ETypeName::Float: - return {VT_FLOAT, true}; - case ETypeName::Double: - return {VT_DOUBLE, true}; - - case ETypeName::String: - return {VT_STRING, true}; - case ETypeName::Utf8: - return {VT_UTF8, true}; - - case ETypeName::Date: - return {VT_DATE, true}; - case ETypeName::Datetime: - return {VT_DATETIME, true}; - case ETypeName::Timestamp: - return {VT_TIMESTAMP, true}; - case ETypeName::Interval: - return {VT_INTERVAL, true}; - - case ETypeName::TzDate: - case ETypeName::TzDatetime: - case ETypeName::TzTimestamp: - break; - - case ETypeName::Json: - return {VT_JSON, true}; - case ETypeName::Decimal: - return {VT_STRING, true}; - case ETypeName::Uuid: - break; - case ETypeName::Yson: - return {VT_ANY, true}; - - case ETypeName::Void: - return {VT_VOID, false}; - case ETypeName::Null: - return {VT_NULL, false}; - - case ETypeName::Optional: - { - auto itemType = type->AsOptional()->GetItemType(); - if (itemType->IsPrimitive()) { - auto simplified = Simplify(itemType->AsPrimitive()); - if (simplified.second) { - simplified.second = false; - return simplified; - } - } - return {VT_ANY, false}; - } - case ETypeName::List: - return {VT_ANY, true}; - case ETypeName::Dict: - return {VT_ANY, true}; - case ETypeName::Struct: - return {VT_ANY, true}; - case ETypeName::Tuple: - return {VT_ANY, true}; - case ETypeName::Variant: - return {VT_ANY, true}; - case ETypeName::Tagged: - return Simplify(type->AsTagged()->GetItemType()); - } - ythrow TApiUsageError() << "Unsupported type: " << typeName; -} - -NTi::TTypePtr ToTypeV3(EValueType type, bool required) -{ - auto typeV3 = OldTypeToTypeV3(type); - if (!Simplify(typeV3).second) { - if (required) { - ythrow TApiUsageError() << "type: " << type << " cannot be required"; - } else { - return typeV3; - } - } - if (required) { - return typeV3; - } else { - return NTi::Optional(typeV3); - } -} - -TColumnSchema::TColumnSchema() - : TypeV3_(NTi::Optional(NTi::Int64())) -{ } - -EValueType TColumnSchema::Type() const -{ - return Simplify(TypeV3_).first; -} - -TColumnSchema& TColumnSchema::Type(EValueType type) & -{ - return Type(ToTypeV3(type, false)); -} - -TColumnSchema TColumnSchema::Type(EValueType type) && -{ - return Type(ToTypeV3(type, false)); -} - -TColumnSchema& TColumnSchema::Type(const NTi::TTypePtr& type) & -{ - Y_VERIFY(type.Get(), "Cannot create column schema with nullptr type"); - TypeV3_ = type; - return *this; -} - -TColumnSchema TColumnSchema::Type(const NTi::TTypePtr& type) && -{ - Y_VERIFY(type.Get(), "Cannot create column schema with nullptr type"); - TypeV3_ = type; - return *this; -} - -TColumnSchema& TColumnSchema::TypeV3(const NTi::TTypePtr& type) & -{ - return Type(type); -} - -TColumnSchema TColumnSchema::TypeV3(const NTi::TTypePtr& type) && -{ - return Type(type); -} - -NTi::TTypePtr TColumnSchema::TypeV3() const -{ - return TypeV3_; -} - -bool TColumnSchema::Required() const -{ - return Simplify(TypeV3_).second; -} - -TColumnSchema& TColumnSchema::Type(EValueType type, bool required) & -{ - return Type(ToTypeV3(type, required)); -} - -TColumnSchema TColumnSchema::Type(EValueType type, bool required) && -{ - return Type(ToTypeV3(type, required)); -} - -bool operator==(const TColumnSchema& lhs, const TColumnSchema& rhs) -{ - return - lhs.Name() == rhs.Name() && - NTi::NEq::TStrictlyEqual()(lhs.TypeV3(), rhs.TypeV3()) && - lhs.SortOrder() == rhs.SortOrder() && - lhs.Lock() == rhs.Lock() && - lhs.Expression() == rhs.Expression() && - lhs.Aggregate() == rhs.Aggregate() && - lhs.Group() == rhs.Group(); -} - -//////////////////////////////////////////////////////////////////////////////// - -bool TTableSchema::Empty() const -{ - return Columns_.empty(); -} - -TTableSchema& TTableSchema::AddColumn(const TString& name, EValueType type) & -{ - Columns_.push_back(TColumnSchema().Name(name).Type(type)); - return *this; -} - -TTableSchema TTableSchema::AddColumn(const TString& name, EValueType type) && -{ - return std::move(AddColumn(name, type)); -} - -TTableSchema& TTableSchema::AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) & -{ - Columns_.push_back(TColumnSchema().Name(name).Type(type).SortOrder(sortOrder)); - return *this; -} - -TTableSchema TTableSchema::AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) && -{ - return std::move(AddColumn(name, type, sortOrder)); -} - -TTableSchema& TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type) & -{ - Columns_.push_back(TColumnSchema().Name(name).Type(type)); - return *this; -} - -TTableSchema TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type) && -{ - return std::move(AddColumn(name, type)); -} - -TTableSchema& TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) & -{ - Columns_.push_back(TColumnSchema().Name(name).Type(type).SortOrder(sortOrder)); - return *this; -} - -TTableSchema TTableSchema::AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) && -{ - return std::move(AddColumn(name, type, sortOrder)); -} - -TTableSchema& TTableSchema::SortBy(const TSortColumns& sortColumns) & -{ - Y_ENSURE(sortColumns.Parts_.size() <= Columns_.size()); - - THashMap<TString, ui64> sortColumnIndex; - for (auto i: xrange(sortColumns.Parts_.size())) { - Y_ENSURE(sortColumnIndex.emplace(sortColumns.Parts_[i].Name(), i).second, - "Key column name '" << sortColumns.Parts_[i].Name() << "' repeats in columns list"); - } - - TVector<TColumnSchema> newColumnsSorted(sortColumns.Parts_.size()); - TVector<TColumnSchema> newColumnsUnsorted; - for (auto& column : Columns_) { - auto it = sortColumnIndex.find(column.Name()); - if (it == sortColumnIndex.end()) { - column.ResetSortOrder(); - newColumnsUnsorted.push_back(std::move(column)); - } else { - auto index = it->second; - const auto& sortColumn = sortColumns.Parts_[index]; - column.SortOrder(sortColumn.SortOrder()); - newColumnsSorted[index] = std::move(column); - sortColumnIndex.erase(it); - } - } - - Y_ENSURE(sortColumnIndex.empty(), "Column name '" << sortColumnIndex.begin()->first - << "' not found in table schema"); - - newColumnsSorted.insert(newColumnsSorted.end(), newColumnsUnsorted.begin(), newColumnsUnsorted.end()); - Columns_ = std::move(newColumnsSorted); - - return *this; -} - -TTableSchema TTableSchema::SortBy(const TSortColumns& sortColumns) && -{ - return std::move(SortBy(sortColumns)); -} - -TVector<TColumnSchema>& TTableSchema::MutableColumns() -{ - return Columns_; -} - -TNode TTableSchema::ToNode() const -{ - TNode result; - TNodeBuilder builder(&result); - Serialize(*this, &builder); - return result; -} - -TTableSchema TTableSchema::FromNode(const TNode& node) -{ - TTableSchema schema; - Deserialize(schema, node); - return schema; -} - -bool operator==(const TTableSchema& lhs, const TTableSchema& rhs) -{ - return - lhs.Columns() == rhs.Columns() && - lhs.Strict() == rhs.Strict() && - lhs.UniqueKeys() == rhs.UniqueKeys(); -} - -//////////////////////////////////////////////////////////////////////////////// - -TKeyBound::TKeyBound(ERelation relation, TKey key) - : Relation_(relation) - , Key_(std::move(key)) -{ } - -//////////////////////////////////////////////////////////////////////////////// - -TTableSchema CreateTableSchema( - const Descriptor& messageDescriptor, - const TSortColumns& sortColumns, - bool keepFieldsWithoutExtension) -{ - auto result = CreateTableSchema(messageDescriptor, keepFieldsWithoutExtension); - if (!sortColumns.Parts_.empty()) { - result.SortBy(sortColumns.Parts_); - } - return result; -} - -TTableSchema CreateTableSchema(NTi::TTypePtr type) -{ - Y_VERIFY(type); - TTableSchema schema; - Deserialize(schema, NodeFromYsonString(NTi::NIo::AsYtSchema(type.Get()))); - return schema; -} - -//////////////////////////////////////////////////////////////////////////////// - -bool IsTrivial(const TReadLimit& readLimit) -{ - return !readLimit.Key_ && !readLimit.RowIndex_ && !readLimit.Offset_ && !readLimit.TabletIndex_ && !readLimit.KeyBound_; -} - -EValueType NodeTypeToValueType(TNode::EType nodeType) -{ - switch (nodeType) { - case TNode::EType::Int64: return VT_INT64; - case TNode::EType::Uint64: return VT_UINT64; - case TNode::EType::String: return VT_STRING; - case TNode::EType::Double: return VT_DOUBLE; - case TNode::EType::Bool: return VT_BOOLEAN; - default: - ythrow yexception() << "Cannot convert TNode type " << nodeType << " to EValueType"; - } -} - -//////////////////////////////////////////////////////////////////////////////// - -const TVector<TReadRange>& GetRangesCompat(const TRichYPath& path) -{ - static const TVector<TReadRange> empty; - - const auto& maybeRanges = path.GetRanges(); - if (maybeRanges.Empty()) { - return empty; - } else if (maybeRanges->size() > 0) { - return *maybeRanges; - } else { - // If you see this exception, that means that caller of this function doesn't known what to do - // with RichYPath that has set range list, but the range list is empty. - // - // To avoid this exception caller must explicitly handle such case. - // NB. YT-17683 - ythrow TApiUsageError() << "Unsupported RichYPath: explicitly empty range list"; - } -} - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -TString ToString(EValueType type) -{ - switch (type) { - case VT_INT8: - return "int8"; - case VT_INT16: - return "int16"; - case VT_INT32: - return "int32"; - case VT_INT64: - return "int64"; - - case VT_UINT8: - return "uint8"; - case VT_UINT16: - return "uint16"; - case VT_UINT32: - return "uint32"; - case VT_UINT64: - return "uint64"; - - case VT_DOUBLE: - return "double"; - - case VT_BOOLEAN: - return "boolean"; - - case VT_STRING: - return "string"; - case VT_UTF8: - return "utf8"; - - case VT_ANY: - return "any"; - - case VT_NULL: - return "null"; - case VT_VOID: - return "void"; - - case VT_DATE: - return "date"; - case VT_DATETIME: - return "datetime"; - case VT_TIMESTAMP: - return "timestamp"; - case VT_INTERVAL: - return "interval"; - - case VT_FLOAT: - return "float"; - - case VT_JSON: - return "json"; - } - ythrow yexception() << "Invalid value type " << static_cast<int>(type); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -} // namespace NYT - -template <> -void Out<NYT::TSortColumn>(IOutputStream& os, const NYT::TSortColumn& sortColumn) -{ - if (sortColumn.SortOrder() == NYT::ESortOrder::SO_ASCENDING) { - os << sortColumn.Name(); - } else { - os << NYT::BuildYsonStringFluently(NYson::EYsonFormat::Text).Value(sortColumn); - } -} diff --git a/yt/cpp/mapreduce/interface/common.h b/yt/cpp/mapreduce/interface/common.h deleted file mode 100644 index b1754ade70d..00000000000 --- a/yt/cpp/mapreduce/interface/common.h +++ /dev/null @@ -1,1301 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/common.h -/// -/// Header containing miscellaneous structs and classes used in library. - -#include "fwd.h" - -#include <library/cpp/type_info/type_info.h> -#include <library/cpp/yson/node/node.h> - -#include <util/generic/guid.h> -#include <util/generic/map.h> -#include <util/generic/maybe.h> -#include <util/generic/ptr.h> -#include <util/system/type_name.h> -#include <util/generic/vector.h> - -#include <google/protobuf/message.h> - -#include <initializer_list> -#include <type_traits> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// @cond Doxygen_Suppress -#define FLUENT_FIELD(type, name) \ - type name##_; \ - TSelf& name(const type& value) \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - static_assert(true) - -#define FLUENT_FIELD_ENCAPSULATED(type, name) \ -private: \ - type name##_; \ -public: \ - TSelf& name(const type& value) & \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - TSelf name(const type& value) && \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - const type& name() const & \ - { \ - return name##_; \ - } \ - type name() && \ - { \ - return name##_; \ - } \ - static_assert(true) - -#define FLUENT_FIELD_OPTION(type, name) \ - TMaybe<type> name##_; \ - TSelf& name(const type& value) \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - static_assert(true) - -#define FLUENT_FIELD_OPTION_ENCAPSULATED(type, name) \ -private: \ - TMaybe<type> name##_; \ -public: \ - TSelf& name(const type& value) & \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - TSelf name(const type& value) && \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - TSelf& Reset##name() & \ - { \ - name##_ = Nothing(); \ - return static_cast<TSelf&>(*this); \ - } \ - TSelf Reset##name() && \ - { \ - name##_ = Nothing(); \ - return static_cast<TSelf&>(*this); \ - } \ - const TMaybe<type>& name() const& \ - { \ - return name##_; \ - } \ - TMaybe<type> name() && \ - { \ - return name##_; \ - } \ - static_assert(true) - -#define FLUENT_FIELD_DEFAULT(type, name, defaultValue) \ - type name##_ = defaultValue; \ - TSelf& name(const type& value) \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - static_assert(true) - -#define FLUENT_FIELD_DEFAULT_ENCAPSULATED(type, name, defaultValue) \ -private: \ - type name##_ = defaultValue; \ -public: \ - TSelf& name(const type& value) & \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - TSelf name(const type& value) && \ - { \ - name##_ = value; \ - return static_cast<TSelf&>(*this); \ - } \ - const type& name() const & \ - { \ - return name##_; \ - } \ - type name() && \ - { \ - return name##_; \ - } \ - static_assert(true) - -#define FLUENT_VECTOR_FIELD(type, name) \ - TVector<type> name##s_; \ - TSelf& Add##name(const type& value) \ - { \ - name##s_.push_back(value); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf& name##s(TVector<type> values) \ - { \ - name##s_ = std::move(values); \ - return static_cast<TSelf&>(*this);\ - } \ - static_assert(true) - -#define FLUENT_OPTIONAL_VECTOR_FIELD_ENCAPSULATED(type, name) \ -private: \ - TMaybe<TVector<type>> name##s_; \ -public: \ - const TMaybe<TVector<type>>& name##s() const & { \ - return name##s_; \ - } \ - TMaybe<TVector<type>>& name##s() & { \ - return name##s_; \ - } \ - TMaybe<TVector<type>> name##s() && { \ - return std::move(name##s_); \ - } \ - TSelf& Add##name(const type& value) & \ - { \ - if (name##s_.Empty()) { \ - name##s_.ConstructInPlace(); \ - } \ - name##s_->push_back(value); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf Add##name(const type& value) && \ - { \ - if (name##s_.Empty()) { \ - name##s_.ConstructInPlace(); \ - } \ - name##s_->push_back(value); \ - return static_cast<TSelf&&>(*this);\ - } \ - TSelf& name##s(TVector<type> values) & \ - { \ - name##s_ = std::move(values); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf name##s(TVector<type> values) && \ - { \ - name##s_ = std::move(values); \ - return static_cast<TSelf&&>(*this);\ - } \ - TSelf& name##s(TNothing) & \ - { \ - name##s_ = Nothing(); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf name##s(TNothing) && \ - { \ - name##s_ = Nothing(); \ - return static_cast<TSelf&&>(*this);\ - } \ - TSelf& Reset##name##s() & \ - { \ - name##s_ = Nothing(); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf Reset##name##s() && \ - { \ - name##s_ = Nothing(); \ - return static_cast<TSelf&&>(*this);\ - } \ - static_assert(true) - -#define FLUENT_VECTOR_FIELD_ENCAPSULATED(type, name) \ -private: \ - TVector<type> name##s_; \ -public: \ - TSelf& Add##name(const type& value) & \ - { \ - name##s_.push_back(value); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf Add##name(const type& value) && \ - { \ - name##s_.push_back(value); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf& name##s(TVector<type> value) & \ - { \ - name##s_ = std::move(value); \ - return static_cast<TSelf&>(*this);\ - } \ - TSelf name##s(TVector<type> value) && \ - { \ - name##s_ = std::move(value); \ - return static_cast<TSelf&>(*this);\ - } \ - const TVector<type>& name##s() const & \ - { \ - return name##s_; \ - } \ - TVector<type> name##s() && \ - { \ - return name##s_; \ - } \ - static_assert(true) - -#define FLUENT_MAP_FIELD(keytype, valuetype, name) \ - TMap<keytype,valuetype> name##_; \ - TSelf& Add##name(const keytype& key, const valuetype& value) \ - { \ - name##_.emplace(key, value); \ - return static_cast<TSelf&>(*this);\ - } \ - static_assert(true) - -/// @endcond - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Convenience class that keeps sequence of items. -/// -/// Designed to be used as function parameter. -/// -/// Users of such function can then pass: -/// - single item, -/// - initializer list of items, -/// - vector of items; -/// as argument to this function. -/// -/// Example: -/// ``` -/// void Foo(const TOneOrMany<int>& arg); -/// ... -/// Foo(1); // ok -/// Foo({1, 2, 3}); // ok -/// ``` -template <class T, class TDerived> -struct TOneOrMany -{ - /// @cond Doxygen_Suppress - using TSelf = std::conditional_t<std::is_void_v<TDerived>, TOneOrMany, TDerived>; - /// @endcond - - /// Initialize with empty sequence. - TOneOrMany() = default; - - // Initialize from initializer list. - template<class U> - TOneOrMany(std::initializer_list<U> il) - { - Parts_.assign(il.begin(), il.end()); - } - - /// Put arguments to sequence - template <class U, class... TArgs> - requires std::is_convertible_v<U, T> - TOneOrMany(U&& arg, TArgs&&... args) - { - Add(arg, std::forward<TArgs>(args)...); - } - - /// Initialize from vector. - TOneOrMany(TVector<T> args) - : Parts_(std::move(args)) - { } - - /// @brief Order is defined the same way as in TVector - bool operator==(const TOneOrMany& rhs) const - { - // N.B. We would like to make this method to be `= default`, - // but this breaks MSVC compiler for the cases when T doesn't - // support comparison. - return Parts_ == rhs.Parts_; - } - - /// - /// @{ - /// - /// @brief Add all arguments to sequence - template <class U, class... TArgs> - requires std::is_convertible_v<U, T> - TSelf& Add(U&& part, TArgs&&... args) & - { - Parts_.push_back(std::forward<U>(part)); - if constexpr (sizeof...(args) > 0) { - [[maybe_unused]] int dummy[sizeof...(args)] = {(Parts_.push_back(std::forward<TArgs>(args)), 0) ... }; - } - return static_cast<TSelf&>(*this); - } - - template <class U, class... TArgs> - requires std::is_convertible_v<U, T> - TSelf Add(U&& part, TArgs&&... args) && - { - return std::move(Add(std::forward<U>(part), std::forward<TArgs>(args)...)); - } - /// @} - - /// Content of sequence. - TVector<T> Parts_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Type of the value that can occur in YT table. -/// -/// @ref NYT::TTableSchema -/// https://yt.yandex-team.ru/docs/description/storage/data_types -enum EValueType : int -{ - /// Int64, signed integer of 64 bits. - VT_INT64, - - /// Uint64, unsigned integer of 64 bits. - VT_UINT64, - - /// Double, floating point number of double precision (64 bits). - VT_DOUBLE, - /// Boolean, `true` or `false`. - VT_BOOLEAN, - - /// String, arbitrary byte sequence. - VT_STRING, - - /// Any, arbitrary yson document. - VT_ANY, - - /// Int8, signed integer of 8 bits. - VT_INT8, - /// Int16, signed integer of 16 bits. - VT_INT16, - /// Int32, signed integer of 32 bits. - VT_INT32, - - /// Uint8, unsigned integer of 8 bits. - VT_UINT8, - /// Uint16, unsigned integer of 16 bits. - VT_UINT16, - /// Uint32, unsigned integer of 32 bits. - VT_UINT32, - - /// Utf8, byte sequence that is valid utf8. - VT_UTF8, - - /// Null, absence of value (almost never used in schemas) - VT_NULL, - /// Void, absence of value (almost never used in schemas) the difference between null, and void is yql-specific. - VT_VOID, - - /// Date, number of days since Unix epoch (unsigned) - VT_DATE, - /// Datetime, number of seconds since Unix epoch (unsigned) - VT_DATETIME, - /// Timestamp, number of milliseconds since Unix epoch (unsigned) - VT_TIMESTAMP, - /// Interval, difference between two timestamps (signed) - VT_INTERVAL, - - /// Float, floating point number (32 bits) - VT_FLOAT, - /// Json, sequence of bytes that is valid json. - VT_JSON, -}; - -/// -/// @brief Sort order. -/// -/// @ref NYT::TTableSchema -enum ESortOrder : int -{ - /// Ascending sort order. - SO_ASCENDING /* "ascending" */, - /// Descending sort order. - SO_DESCENDING /* "descending" */, -}; - -/// -/// @brief Value of "optimize_for" attribute. -/// -/// @ref NYT::TRichYPath -enum EOptimizeForAttr : i8 -{ - /// Optimize for scan - OF_SCAN_ATTR /* "scan" */, - - /// Optimize for lookup - OF_LOOKUP_ATTR /* "lookup" */, -}; - -/// -/// @brief Value of "erasure_codec" attribute. -/// -/// @ref NYT::TRichYPath -enum EErasureCodecAttr : i8 -{ - /// @cond Doxygen_Suppress - EC_NONE_ATTR /* "none" */, - EC_REED_SOLOMON_6_3_ATTR /* "reed_solomon_6_3" */, - EC_LRC_12_2_2_ATTR /* "lrc_12_2_2" */, - EC_ISA_LRC_12_2_2_ATTR /* "isa_lrc_12_2_2" */, - /// @endcond -}; - -/// -/// @brief Value of "schema_modification" attribute. -/// -/// @ref NYT::TRichYPath -enum ESchemaModificationAttr : i8 -{ - SM_NONE_ATTR /* "none" */, - SM_UNVERSIONED_UPDATE /* "unversioned_update" */, -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Table key column description. -/// -/// The description includes column name and sort order. -/// -/// @anchor TSortOrder_backward_compatibility -/// @note -/// Many functions that use `TSortOrder` as argument used to take `TString` -/// (the only allowed sort order was "ascending" and user didn't have to specify it). -/// @note -/// This class is designed to provide backward compatibility for such code and therefore -/// objects of this class can be constructed and assigned from TString-like objects only. -/// -/// @see NYT::TSortOperationSpec -class TSortColumn -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TSortColumn; - /// @endcond - - /// Column name - FLUENT_FIELD_ENCAPSULATED(TString, Name); - - /// Sort order - FLUENT_FIELD_DEFAULT_ENCAPSULATED(ESortOrder, SortOrder, ESortOrder::SO_ASCENDING); - - /// - /// @{ - /// - /// @brief Construct object from name and sort order - /// - /// Constructors are intentionally implicit so `TSortColumn` can be compatible with old code. - /// @ref TSortOrder_backward_compatibility - TSortColumn(TStringBuf name = {}, ESortOrder sortOrder = ESortOrder::SO_ASCENDING); - TSortColumn(const TString& name, ESortOrder sortOrder = ESortOrder::SO_ASCENDING); - TSortColumn(const char* name, ESortOrder sortOrder = ESortOrder::SO_ASCENDING); - /// @} - - /// Check that sort order is ascending, throw exception otherwise. - const TSortColumn& EnsureAscending() const; - - /// @brief Convert sort to yson representation as YT API expects it. - TNode ToNode() const; - - /// @brief Comparison is default and checks both name and sort order. - bool operator == (const TSortColumn& rhs) const = default; - - /// - /// @{ - /// - /// @brief Assign object from column name, and set sort order to `ascending`. - /// - /// This is backward compatibility methods. - /// - /// @ref TSortOrder_backward_compatibility - TSortColumn& operator = (TStringBuf name); - TSortColumn& operator = (const TString& name); - TSortColumn& operator = (const char* name); - /// @} - - bool operator == (const TStringBuf rhsName) const; - bool operator != (const TStringBuf rhsName) const; - bool operator == (const TString& rhsName) const; - bool operator != (const TString& rhsName) const; - bool operator == (const char* rhsName) const; - bool operator != (const char* rhsName) const; - - // Intentionally implicit conversions. - operator TString() const; - operator TStringBuf() const; - operator std::string() const; - - Y_SAVELOAD_DEFINE(Name_, SortOrder_); -}; - -/// -/// @brief List of @ref TSortColumn -/// -/// Contains a bunch of helper methods such as constructing from single object. -class TSortColumns - : public TOneOrMany<TSortColumn, TSortColumns> -{ -public: - using TOneOrMany<TSortColumn, TSortColumns>::TOneOrMany; - - /// Construct empty list. - TSortColumns(); - - /// - /// @{ - /// - /// @brief Construct list of ascending sort order columns by their names. - /// - /// Required for backward compatibility. - /// - /// @ref TSortOrder_backward_compatibility - TSortColumns(const TVector<TString>& names); - TSortColumns(const TColumnNames& names); - /// @} - - - /// - /// @brief Implicit conversion to column list. - /// - /// If all columns has ascending sort order return list of their names. - /// Throw exception otherwise. - /// - /// Required for backward compatibility. - /// - /// @ref TSortOrder_backward_compatibility - operator TColumnNames() const; - - /// Make sure that all columns are of ascending sort order. - const TSortColumns& EnsureAscending() const; - - /// Get list of column names. - TVector<TString> GetNames() const; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// Helper function to create new style type from old style one. -NTi::TTypePtr ToTypeV3(EValueType type, bool required); - -/// -/// @brief Single column description -/// -/// Each field describing column has setter and getter. -/// -/// Example reading field: -/// ``` -/// ... columnSchema.Name() ... -/// ``` -/// -/// Example setting field: -/// ``` -/// columnSchema.Name("my-column").Type(VT_INT64); // set name and type -/// ``` -/// -/// @ref https://yt.yandex-team.ru/docs/description/storage/static_schema -class TColumnSchema -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TColumnSchema; - /// @endcond - - /// - /// @brief Construct empty column schemas - /// - /// @note - /// Such schema cannot be used in schema as it it doesn't have name. - TColumnSchema(); - - /// - /// @{ - /// - /// @brief Copy and move constructors are default. - TColumnSchema(const TColumnSchema&) = default; - TColumnSchema& operator=(const TColumnSchema&) = default; - /// @} - - - FLUENT_FIELD_ENCAPSULATED(TString, Name); - - /// - /// @brief Functions to work with type in old manner. - /// - /// @deprecated New code is recommended to work with types using @ref NTi::TTypePtr from type_info library. - TColumnSchema& Type(EValueType type) &; - TColumnSchema Type(EValueType type) &&; - EValueType Type() const; - - /// @brief Set and get column type. - /// @{ - TColumnSchema& Type(const NTi::TTypePtr& type) &; - TColumnSchema Type(const NTi::TTypePtr& type) &&; - - TColumnSchema& TypeV3(const NTi::TTypePtr& type) &; - TColumnSchema TypeV3(const NTi::TTypePtr& type) &&; - NTi::TTypePtr TypeV3() const; - /// @} - - /// - /// @brief Raw yson representation of column type - /// @deprecated Prefer to use `TypeV3` methods. - FLUENT_FIELD_OPTION_ENCAPSULATED(TNode, RawTypeV3); - - /// Column sort order - FLUENT_FIELD_OPTION_ENCAPSULATED(ESortOrder, SortOrder); - - /// - /// @brief Lock group name - /// - /// @ref https://yt.yandex-team.ru/docs/description/dynamic_tables/sorted_dynamic_tables#blokirovka-stroki - FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Lock); - - /// Expression defining column value - FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Expression); - - /// Aggregating function name - FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Aggregate); - - /// - /// @brief Storage group name - /// - /// @ref https://yt.yandex-team.ru/docs/description/storage/static_schema - FLUENT_FIELD_OPTION_ENCAPSULATED(TString, Group); - - /// - /// @brief Column requiredness. - /// - /// Required columns doesn't accept NULL values. - /// Usually if column is required it means that it has Optional<...> type - bool Required() const; - - /// - /// @{ - /// - /// @brief Set type in old-style manner - TColumnSchema& Type(EValueType type, bool required) &; - TColumnSchema Type(EValueType type, bool required) &&; - /// @} - -private: - friend void Deserialize(TColumnSchema& columnSchema, const TNode& node); - NTi::TTypePtr TypeV3_; - bool Required_ = false; -}; - -/// Equality check checks all fields of column schema. -bool operator==(const TColumnSchema& lhs, const TColumnSchema& rhs); - -/// -/// @brief Description of table schema -/// -/// @see https://yt.yandex-team.ru/docs/description/storage/static_schema -class TTableSchema -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TTableSchema; - /// @endcond - - /// Column schema - FLUENT_VECTOR_FIELD_ENCAPSULATED(TColumnSchema, Column); - - /// - /// @brief Strictness of the schema - /// - /// Strict schemas are not allowed to have columns not described in schema. - /// Nonstrict schemas are allowed to have such columns, all such missing columns are assumed to have - FLUENT_FIELD_DEFAULT_ENCAPSULATED(bool, Strict, true); - - /// - /// @brief Whether keys are unique - /// - /// This flag can be set only for schemas that have sorted columns. - /// If flag is set table cannot have multiple rows with same key. - FLUENT_FIELD_DEFAULT_ENCAPSULATED(bool, UniqueKeys, false); - - /// Get modifiable column list - TVector<TColumnSchema>& MutableColumns(); - - /// Check if schema has any described column - [[nodiscard]] bool Empty() const; - - /// Add column - TTableSchema& AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &; - /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&; - TTableSchema AddColumn(const TString& name, const NTi::TTypePtr& type, ESortOrder sortOrder) &&; - - /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&; - TTableSchema& AddColumn(const TString& name, const NTi::TTypePtr& type) &; - /// @copydoc NYT::TTableSchema::AddColumn(const TString&, const NTi::TTypePtr&, ESortOrder)&; - TTableSchema AddColumn(const TString& name, const NTi::TTypePtr& type) &&; - - /// Add optional column of specified type - TTableSchema& AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &; - /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&; - TTableSchema AddColumn(const TString& name, EValueType type, ESortOrder sortOrder) &&; - - /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&; - TTableSchema& AddColumn(const TString& name, EValueType type) &; - /// @copydoc NYT::TTableSchema::AddColumn(const TString&, EValueType, ESortOrder)&; - TTableSchema AddColumn(const TString& name, EValueType type) &&; - - /// - /// @brief Make table schema sorted by specified columns - /// - /// Resets old key columns if any - TTableSchema& SortBy(const TSortColumns& columns) &; - - /// @copydoc NYT::TTableSchema::SortBy(const TSortColumns&)&; - TTableSchema SortBy(const TSortColumns& columns) &&; - - /// Get yson description of table schema - [[nodiscard]] TNode ToNode() const; - - /// Parse schema from yson node - static NYT::TTableSchema FromNode(const TNode& node); - - friend void Deserialize(TTableSchema& tableSchema, const TNode& node); -}; - -/// Check for equality of all columns and all schema attributes -bool operator==(const TTableSchema& lhs, const TTableSchema& rhs); - -/// Create table schema by protobuf message descriptor -TTableSchema CreateTableSchema( - const ::google::protobuf::Descriptor& messageDescriptor, - const TSortColumns& sortColumns = TSortColumns(), - bool keepFieldsWithoutExtension = true); - -/// Create table schema by protobuf message type -template <class TProtoType, typename = std::enable_if_t<std::is_base_of_v<::google::protobuf::Message, TProtoType>>> -inline TTableSchema CreateTableSchema( - const TSortColumns& sortColumns = TSortColumns(), - bool keepFieldsWithoutExtension = true) -{ - static_assert( - std::is_base_of_v<::google::protobuf::Message, TProtoType>, - "Template argument must be derived from ::google::protobuf::Message"); - - return CreateTableSchema( - *TProtoType::descriptor(), - sortColumns, - keepFieldsWithoutExtension); -} - -/// -/// @brief Create strict table schema from `struct` type. -/// -/// Names and types of columns are taken from struct member names and types. -/// `Strict` flag is set to true, all other attribute of schema and columns -/// are left with default values -TTableSchema CreateTableSchema(NTi::TTypePtr type); - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Enumeration describing comparison operation used in key bound. -/// -/// ERelation is a part of @ref NYT::TKeyBound that can be used as -/// lower or upper key limit in @ref TReadLimit. -/// -/// Relations `Less` and `LessOrEqual` are for upper limit and -/// relations `Greater` and `GreaterOrEqual` are for lower limit. -/// -/// It is a error to use relation in the limit of wrong kind. -/// -/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath -enum class ERelation -{ - /// - /// @brief Relation "less" - /// - /// Specifies range of keys that are before specified key. - /// Can only be used in upper limit. - Less /* "<" */, - - /// - /// @brief Relation "less or equal" - /// - /// Specifies range of keys that are before or equal specified key. - /// Can only be used in upper limit. - LessOrEqual /* "<=" */, - - /// - /// @brief Relation "greater" - /// - /// Specifies range of keys that are after specified key. - /// Can only be used in lower limit. - Greater /* ">" */, - - /// - /// @brief Relation "greater or equal" - /// - /// Specifies range of keys that are after or equal than specified key. - /// Can only be used in lower limit. - GreaterOrEqual /* ">=" */, -}; - -/// -/// @brief Key with relation specifying interval of keys in lower or upper limit of @ref NYT::TReadRange -/// -/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath -struct TKeyBound -{ - /// @cond Doxygen_Suppress - using TSelf = TKeyBound; - - explicit TKeyBound(ERelation relation = ERelation::Less, TKey key = TKey{}); - - FLUENT_FIELD_DEFAULT_ENCAPSULATED(ERelation, Relation, ERelation::Less); - FLUENT_FIELD_DEFAULT_ENCAPSULATED(TKey, Key, TKey{}); - /// @endcond -}; - -/// -/// @brief Description of the read limit. -/// -/// It is actually a variant and must store exactly one field. -/// -/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath -struct TReadLimit -{ - /// @cond Doxygen_Suppress - using TSelf = TReadLimit; - /// @endcond - - /// - /// @brief KeyBound specifies table key and whether to include it - /// - /// It can be used in lower or upper limit when reading tables. - FLUENT_FIELD_OPTION(TKeyBound, KeyBound); - - /// - /// @brief Table key - /// - /// It can be used in exact, lower or upper limit when reading tables. - FLUENT_FIELD_OPTION(TKey, Key); - - /// - /// @brief Row index - /// - /// It can be used in exact, lower or upper limit when reading tables. - FLUENT_FIELD_OPTION(i64, RowIndex); - - /// - /// @brief File offset - /// - /// It can be used in lower or upper limit when reading files. - FLUENT_FIELD_OPTION(i64, Offset); - - /// - /// @brief Tablet index - /// - /// It can be used in lower or upper limit in dynamic table operations - FLUENT_FIELD_OPTION(i64, TabletIndex); -}; - -/// -/// @brief Range of a table or a file -/// -/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath -struct TReadRange -{ - using TSelf = TReadRange; - - /// - /// @brief Lower limit of the range - /// - /// It is usually inclusive (except when @ref NYT::TKeyBound with relation @ref NYT::ERelation::Greater is used). - FLUENT_FIELD(TReadLimit, LowerLimit); - - /// - /// @brief Lower limit of the range - /// - /// It is usually exclusive (except when @ref NYT::TKeyBound with relation @ref NYT::ERelation::LessOrEqual is used). - FLUENT_FIELD(TReadLimit, UpperLimit); - - /// Exact key or row index. - FLUENT_FIELD(TReadLimit, Exact); - - /// Create read range from row indexes. - static TReadRange FromRowIndices(i64 lowerLimit, i64 upperLimit) - { - return TReadRange() - .LowerLimit(TReadLimit().RowIndex(lowerLimit)) - .UpperLimit(TReadLimit().RowIndex(upperLimit)); - } - - /// Create read range from keys. - static TReadRange FromKeys(const TKey& lowerKeyInclusive, const TKey& upperKeyExclusive) - { - return TReadRange() - .LowerLimit(TReadLimit().Key(lowerKeyInclusive)) - .UpperLimit(TReadLimit().Key(upperKeyExclusive)); - } -}; - -/// -/// @brief Path with additional attributes. -/// -/// Allows to specify additional attributes for path used in some operations. -/// -/// @see https://yt.yandex-team.ru/docs/description/common/ypath#rich_ypath -struct TRichYPath -{ - /// @cond Doxygen_Suppress - using TSelf = TRichYPath; - /// @endcond - - /// Path itself. - FLUENT_FIELD(TYPath, Path); - - /// Specifies that path should be appended not overwritten - FLUENT_FIELD_OPTION(bool, Append); - - /// @deprecated Deprecated attribute. - FLUENT_FIELD_OPTION(bool, PartiallySorted); - - /// Specifies that path is expected to be sorted by these columns. - FLUENT_FIELD(TSortColumns, SortedBy); - - /// Add range to read. - TRichYPath& AddRange(TReadRange range) - { - if (!Ranges_) { - Ranges_.ConstructInPlace(); - } - Ranges_->push_back(std::move(range)); - return *this; - } - - TRichYPath& ResetRanges() - { - Ranges_.Clear(); - return *this; - } - - /// - /// @{ - /// - /// Return ranges to read. - /// - /// NOTE: Nothing (in TMaybe) and empty TVector are different ranges. - /// Nothing represents universal range (reader reads all table rows). - /// Empty TVector represents empty range (reader returns empty set of rows). - const TMaybe<TVector<TReadRange>>& GetRanges() const - { - return Ranges_; - } - - TMaybe<TVector<TReadRange>>& MutableRanges() - { - return Ranges_; - } - - /// - /// @{ - /// - /// Get range view, that is convenient way to iterate through all ranges. - TArrayRef<TReadRange> MutableRangesView() - { - if (Ranges_.Defined()) { - return TArrayRef(Ranges_->data(), Ranges_->size()); - } else { - return {}; - } - } - - TArrayRef<const TReadRange> GetRangesView() const - { - if (Ranges_.Defined()) { - return TArrayRef(Ranges_->data(), Ranges_->size()); - } else { - return {}; - } - } - /// @} - - /// @{ - /// - /// Get range by index. - const TReadRange& GetRange(ssize_t i) const - { - return Ranges_.GetRef()[i]; - } - - TReadRange& MutableRange(ssize_t i) - { - return Ranges_.GetRef()[i]; - } - /// @} - - /// - /// @brief Specifies columns that should be read. - /// - /// If it's set to Nothing then all columns will be read. - /// If empty TColumnNames is specified then each read row will be empty. - FLUENT_FIELD_OPTION(TColumnNames, Columns); - - FLUENT_FIELD_OPTION(bool, Teleport); - FLUENT_FIELD_OPTION(bool, Primary); - FLUENT_FIELD_OPTION(bool, Foreign); - FLUENT_FIELD_OPTION(i64, RowCountLimit); - - FLUENT_FIELD_OPTION(TString, FileName); - - /// Specifies original path to be shown in Web UI - FLUENT_FIELD_OPTION(TYPath, OriginalPath); - - /// - /// @brief Specifies that this path points to executable file - /// - /// Used in operation specs. - FLUENT_FIELD_OPTION(bool, Executable); - - /// - /// @brief Specify format to use when loading table. - /// - /// Used in operation specs. - FLUENT_FIELD_OPTION(TNode, Format); - - /// @brief Specifies table schema that will be set on the path - FLUENT_FIELD_OPTION(TTableSchema, Schema); - - /// Specifies compression codec that will be set on the path - FLUENT_FIELD_OPTION(TString, CompressionCodec); - - /// Specifies erasure codec that will be set on the path - FLUENT_FIELD_OPTION(EErasureCodecAttr, ErasureCodec); - - /// Specifies schema modification that will be set on the path - FLUENT_FIELD_OPTION(ESchemaModificationAttr, SchemaModification); - - /// Specifies optimize_for attribute that will be set on the path - FLUENT_FIELD_OPTION(EOptimizeForAttr, OptimizeFor); - - /// - /// @brief Do not put file used in operation into node cache - /// - /// If BypassArtifactCache == true, file will be loaded into the job's sandbox bypassing the cache on the YT node. - /// It helps jobs that use tmpfs to start faster, - /// because files will be loaded into tmpfs directly bypassing disk cache - FLUENT_FIELD_OPTION(bool, BypassArtifactCache); - - /// - /// @brief Timestamp of dynamic table. - /// - /// NOTE: it is _not_ unix timestamp - /// (instead it's transaction timestamp, that is more complex structure). - FLUENT_FIELD_OPTION(i64, Timestamp); - - /// - /// @brief Specify transaction that should be used to access this path. - /// - /// Allows to start cross-transactional operations. - FLUENT_FIELD_OPTION(TTransactionId, TransactionId); - - using TRenameColumnsDescriptor = THashMap<TString, TString>; - - /// Specifies columnar mapping which will be applied to columns before transfer to job. - FLUENT_FIELD_OPTION(TRenameColumnsDescriptor, RenameColumns); - - /// Create empty path with no attributes - TRichYPath() - { } - - /// - /// @{ - /// - /// @brief Create path from string - TRichYPath(const char* path) - : Path_(path) - { } - - TRichYPath(const TYPath& path) - : Path_(path) - { } - /// @} - -private: - TMaybe<TVector<TReadRange>> Ranges_; -}; - -/// -/// @ref Create copy of @ref NYT::TRichYPath with schema derived from proto message. -/// -/// -template <typename TProtoType> -TRichYPath WithSchema(const TRichYPath& path, const TSortColumns& sortBy = TSortColumns()) -{ - static_assert(std::is_base_of_v<::google::protobuf::Message, TProtoType>, "TProtoType must be Protobuf message"); - - auto schemedPath = path; - if (!schemedPath.Schema_) { - schemedPath.Schema(CreateTableSchema<TProtoType>(sortBy)); - } - return schemedPath; -} - -/// -/// @brief Create copy of @ref NYT::TRichYPath with schema derived from TRowType if possible. -/// -/// If TRowType is protobuf message schema is derived from it and set to returned path. -/// Otherwise schema of original path is left unchanged (and probably unset). -template <typename TRowType> -TRichYPath MaybeWithSchema(const TRichYPath& path, const TSortColumns& sortBy = TSortColumns()) -{ - if constexpr (std::is_base_of_v<::google::protobuf::Message, TRowType>) { - return WithSchema<TRowType>(path, sortBy); - } else { - return path; - } -} - -/// -/// @brief Get the list of ranges related to path in compatibility mode. -/// -/// - If path is missing ranges, empty list is returned. -/// - If path has associated range list and the list is not empty, function returns this list. -/// - If path has associated range list and this list is empty, exception is thrown. -/// -/// Before YT-17683 RichYPath didn't support empty range list and empty range actualy meant universal range. -/// This function emulates this old behavior. -/// -/// @see https://st.yandex-team.ru/YT-17683 -const TVector<TReadRange>& GetRangesCompat(const TRichYPath& path); - -//////////////////////////////////////////////////////////////////////////////// - -/// Statistics about table columns. -struct TTableColumnarStatistics -{ - /// Total data weight for all chunks for each of requested columns. - THashMap<TString, i64> ColumnDataWeight; - - /// Total weight of all old chunks that don't keep columnar statistics. - i64 LegacyChunksDataWeight = 0; - - /// Timestamps total weight (only for dynamic tables). - TMaybe<i64> TimestampTotalWeight; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// Description of a partition. -struct TMultiTablePartition -{ - struct TStatistics - { - i64 ChunkCount = 0; - i64 DataWeight = 0; - i64 RowCount = 0; - }; - - /// Ranges of input tables for this partition. - TVector<TRichYPath> TableRanges; - - /// Aggregate statistics of all the table ranges in the partition. - TStatistics AggregateStatistics; -}; - -/// Table partitions from GetTablePartitions command. -struct TMultiTablePartitions -{ - /// Disjoint partitions into which the input tables were divided. - TVector<TMultiTablePartition> Partitions; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Contains information about tablet -/// -/// @see NYT::IClient::GetTabletInfos -struct TTabletInfo -{ - /// - /// @brief Indicates the total number of rows added to the tablet (including trimmed ones). - /// - /// Currently only provided for ordered tablets. - i64 TotalRowCount = 0; - - /// - /// @brief Contains the number of front rows that are trimmed and are not guaranteed to be accessible. - /// - /// Only makes sense for ordered tablet. - i64 TrimmedRowCount = 0; - - /// - /// @brief Tablet cell barrier timestamp, which lags behind the current timestamp - /// - /// It is guaranteed that all transactions with commit timestamp not exceeding the barrier are fully committed; - /// e.g. all their added rows are visible (and are included in @ref NYT::TTabletInfo::TotalRowCount). - /// Mostly makes sense for ordered tablets. - ui64 BarrierTimestamp; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// List of attributes to retrieve in operations like @ref NYT::ICypressClient::Get -struct TAttributeFilter -{ - /// @cond Doxygen_Suppress - using TSelf = TAttributeFilter; - /// @endcond - - /// List of attributes. - FLUENT_VECTOR_FIELD(TString, Attribute); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Check if none of the fields of @ref NYT::TReadLimit is set. -/// -/// @return true if any field of readLimit is set and false otherwise. -bool IsTrivial(const TReadLimit& readLimit); - -/// Convert yson node type to table schema type -EValueType NodeTypeToValueType(TNode::EType nodeType); - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Enumeration for specifying how reading from master is performed. -/// -/// Used in operations like NYT::ICypressClient::Get -enum class EMasterReadKind : int -{ - /// - /// @brief Reading from leader. - /// - /// Should almost never be used since it's expensive and for regular uses has no difference from - /// "follower" read. - Leader /* "leader" */, - - /// @brief Reading from master follower (default). - Follower /* "follower" */, - Cache /* "cache" */, - MasterCache /* "master_cache" */, -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @cond Doxygen_Suppress -namespace NDetail { - -// MUST NOT BE USED BY CLIENTS -// TODO: we should use default GENERATE_ENUM_SERIALIZATION -TString ToString(EValueType type); - -} // namespace NDetail -/// @endcond - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/common_ut.cpp b/yt/cpp/mapreduce/interface/common_ut.cpp deleted file mode 100644 index 3f19433816e..00000000000 --- a/yt/cpp/mapreduce/interface/common_ut.cpp +++ /dev/null @@ -1,303 +0,0 @@ -#include "common_ut.h" - -#include "fluent.h" - -#include <yt/cpp/mapreduce/interface/common.h> - -#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h> - -#include <library/cpp/testing/unittest/registar.h> - -#include <library/cpp/yson/node/node_io.h> -#include <library/cpp/yson/node/node_builder.h> - -#include <util/generic/xrange.h> - -using namespace NYT; - -template <class T> -TString SaveToString(const T& obj) -{ - TString s; - TStringOutput out(s); - ::Save(&out, obj); - return s; -} - -template <class T> -T LoadFromString(TStringBuf s) -{ - TMemoryInput in(s); - T obj; - ::Load(&in, obj); - return obj; -} - -template <class T> -T SaveLoad(const T& obj) -{ - return LoadFromString<T>(SaveToString(obj)); -} - -Y_UNIT_TEST_SUITE(Common) -{ - Y_UNIT_TEST(SortColumnsLegacy) - { - TSortColumns keys1("a", "b"); - UNIT_ASSERT((keys1.Parts_ == TSortColumns{"a", "b"})); - - keys1.Add("c", "d"); - UNIT_ASSERT((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"})); - - auto keys2 = TSortColumns(keys1).Add("e", "f"); - UNIT_ASSERT((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"})); - UNIT_ASSERT((keys2.Parts_ == TSortColumns{"a", "b", "c", "d", "e", "f"})); - - auto keys3 = TSortColumns(keys1).Add("e").Add("f").Add("g"); - UNIT_ASSERT((keys1.Parts_ == TSortColumns{"a", "b", "c", "d"})); - UNIT_ASSERT((keys3.Parts_ == TSortColumns{"a", "b", "c", "d", "e", "f", "g"})); - } - - Y_UNIT_TEST(SortColumn) - { - auto ascending = TSortColumn("a"); - UNIT_ASSERT_VALUES_EQUAL(ascending.Name(), "a"); - UNIT_ASSERT_VALUES_EQUAL(ascending.SortOrder(), ESortOrder::SO_ASCENDING); - UNIT_ASSERT_VALUES_EQUAL(ascending, TSortColumn("a", ESortOrder::SO_ASCENDING)); - UNIT_ASSERT_VALUES_UNEQUAL(ascending, TSortColumn("a", ESortOrder::SO_DESCENDING)); - - UNIT_ASSERT_NO_EXCEPTION(ascending.EnsureAscending()); - UNIT_ASSERT_VALUES_EQUAL(static_cast<TString>(ascending), "a"); - UNIT_ASSERT_VALUES_EQUAL(ascending, "a"); - - auto another = ascending; - UNIT_ASSERT_NO_EXCEPTION(another = "another"); - UNIT_ASSERT_VALUES_EQUAL(another.Name(), "another"); - UNIT_ASSERT_VALUES_EQUAL(another.SortOrder(), ESortOrder::SO_ASCENDING); - UNIT_ASSERT_VALUES_EQUAL(another, TSortColumn("another", ESortOrder::SO_ASCENDING)); - UNIT_ASSERT_VALUES_UNEQUAL(another, TSortColumn("another", ESortOrder::SO_DESCENDING)); - - auto ascendingNode = BuildYsonNodeFluently().Value(ascending); - UNIT_ASSERT_VALUES_EQUAL(ascendingNode, TNode("a")); - - UNIT_ASSERT_VALUES_EQUAL(SaveLoad(ascending), ascending); - UNIT_ASSERT_VALUES_UNEQUAL(SaveToString(ascending), SaveToString(TString("a"))); - - auto descending = TSortColumn("a", ESortOrder::SO_DESCENDING); - UNIT_ASSERT_VALUES_EQUAL(descending.Name(), "a"); - UNIT_ASSERT_VALUES_EQUAL(descending.SortOrder(), ESortOrder::SO_DESCENDING); - UNIT_ASSERT_VALUES_EQUAL(descending, TSortColumn("a", ESortOrder::SO_DESCENDING)); - UNIT_ASSERT_VALUES_UNEQUAL(descending, TSortColumn("a", ESortOrder::SO_ASCENDING)); - - UNIT_ASSERT_EXCEPTION(descending.EnsureAscending(), yexception); - UNIT_ASSERT_EXCEPTION(static_cast<TString>(descending), yexception); - UNIT_ASSERT_EXCEPTION(descending == "a", yexception); - UNIT_ASSERT_EXCEPTION(descending = "a", yexception); - - auto descendingNode = BuildYsonNodeFluently().Value(descending); - UNIT_ASSERT_VALUES_EQUAL(descendingNode, TNode()("name", "a")("sort_order", "descending")); - - UNIT_ASSERT_VALUES_EQUAL(SaveLoad(descending), descending); - UNIT_ASSERT_VALUES_UNEQUAL(SaveToString(descending), SaveToString("a")); - - UNIT_ASSERT_VALUES_EQUAL(ToString(TSortColumn("blah")), "blah"); - UNIT_ASSERT_VALUES_EQUAL(ToString(TSortColumn("blah", ESortOrder::SO_DESCENDING)), "{\"name\"=\"blah\";\"sort_order\"=\"descending\"}"); - } - - Y_UNIT_TEST(SortColumns) - { - TSortColumns ascending("a", "b"); - UNIT_ASSERT(ascending.Parts_ == (TSortColumns{"a", "b"})); - UNIT_ASSERT_NO_EXCEPTION(ascending.EnsureAscending()); - UNIT_ASSERT_VALUES_EQUAL(static_cast<TColumnNames>(ascending).Parts_, (TVector<TString>{"a", "b"})); - UNIT_ASSERT_VALUES_EQUAL(ascending.GetNames(), (TVector<TString>{"a", "b"})); - - auto mixed = ascending; - mixed.Add(TSortColumn("c", ESortOrder::SO_DESCENDING), "d"); - UNIT_ASSERT((mixed.Parts_ != TVector<TSortColumn>{"a", "b", "c", "d"})); - UNIT_ASSERT((mixed.Parts_ == TVector<TSortColumn>{"a", "b", TSortColumn("c", ESortOrder::SO_DESCENDING), "d"})); - UNIT_ASSERT_VALUES_EQUAL(mixed.GetNames(), (TVector<TString>{"a", "b", "c", "d"})); - UNIT_ASSERT_EXCEPTION(mixed.EnsureAscending(), yexception); - UNIT_ASSERT_EXCEPTION(static_cast<TColumnNames>(mixed), yexception); - } - - Y_UNIT_TEST(KeyBound) - { - auto keyBound = TKeyBound(ERelation::Greater, TKey(7, "a", TNode()("x", "y"))); - UNIT_ASSERT_VALUES_EQUAL(keyBound.Relation(), ERelation::Greater); - UNIT_ASSERT_EQUAL(keyBound.Key(), TKey(7, "a", TNode()("x", "y"))); - - auto keyBound1 = TKeyBound().Relation(ERelation::Greater).Key(TKey(7, "a", TNode()("x", "y"))); - auto expectedNode = TNode() - .Add(">") - .Add(TNode().Add(7).Add("a").Add(TNode()("x", "y"))); - - UNIT_ASSERT_VALUES_EQUAL(expectedNode, BuildYsonNodeFluently().Value(keyBound)); - UNIT_ASSERT_VALUES_EQUAL(expectedNode, BuildYsonNodeFluently().Value(keyBound1)); - - keyBound.Relation(ERelation::LessOrEqual); - keyBound.Key(TKey("A", 7)); - UNIT_ASSERT_VALUES_EQUAL(keyBound.Relation(), ERelation::LessOrEqual); - UNIT_ASSERT_EQUAL(keyBound.Key(), TKey("A", 7)); - - UNIT_ASSERT_VALUES_EQUAL( - BuildYsonNodeFluently().Value(keyBound), - TNode() - .Add("<=") - .Add(TNode().Add("A").Add(7))); - } - - Y_UNIT_TEST(TTableSchema) - { - TTableSchema schema; - schema - .AddColumn(TColumnSchema().Name("a").Type(EValueType::VT_STRING).SortOrder(SO_ASCENDING)) - .AddColumn(TColumnSchema().Name("b").Type(EValueType::VT_UINT64)) - .AddColumn(TColumnSchema().Name("c").Type(EValueType::VT_INT64)); - auto checkSortBy = [](TTableSchema schema, const TVector<TString>& columns) { - auto initialSchema = schema; - schema.SortBy(columns); - for (auto i: xrange(columns.size())) { - UNIT_ASSERT_VALUES_EQUAL(schema.Columns()[i].Name(), columns[i]); - UNIT_ASSERT_VALUES_EQUAL(schema.Columns()[i].SortOrder(), ESortOrder::SO_ASCENDING); - } - for (auto i: xrange(columns.size(), (size_t)initialSchema.Columns().size())) { - UNIT_ASSERT_VALUES_EQUAL(schema.Columns()[i].SortOrder(), Nothing()); - } - UNIT_ASSERT_VALUES_EQUAL(initialSchema.Columns().size(), schema.Columns().size()); - return schema; - }; - auto newSchema = checkSortBy(schema, {"b"}); - UNIT_ASSERT_VALUES_EQUAL(newSchema.Columns()[1].Name(), TString("a")); - UNIT_ASSERT_VALUES_EQUAL(newSchema.Columns()[2].Name(), TString("c")); - checkSortBy(schema, {"b", "c"}); - checkSortBy(schema, {"c", "a"}); - UNIT_ASSERT_EXCEPTION(checkSortBy(schema, {"b", "b"}), yexception); - UNIT_ASSERT_EXCEPTION(checkSortBy(schema, {"a", "junk"}), yexception); - } - - Y_UNIT_TEST(TColumnSchema_TypeV3) - { - { - auto column = TColumnSchema().Type(NTi::Interval()); - UNIT_ASSERT_VALUES_EQUAL(column.Required(), true); - UNIT_ASSERT_VALUES_EQUAL(column.Type(), VT_INTERVAL); - } - { - auto column = TColumnSchema().Type(NTi::Optional(NTi::Date())); - UNIT_ASSERT_VALUES_EQUAL(column.Required(), false); - UNIT_ASSERT_VALUES_EQUAL(column.Type(), VT_DATE); - } - { - auto column = TColumnSchema().Type(NTi::Null()); - UNIT_ASSERT_VALUES_EQUAL(column.Required(), false); - UNIT_ASSERT_VALUES_EQUAL(column.Type(), VT_NULL); - } - { - auto column = TColumnSchema().Type(NTi::Optional(NTi::Null())); - UNIT_ASSERT_VALUES_EQUAL(column.Required(), false); - UNIT_ASSERT_VALUES_EQUAL(column.Type(), VT_ANY); - } - } - - Y_UNIT_TEST(ToTypeV3) - { - UNIT_ASSERT_VALUES_EQUAL(*ToTypeV3(VT_INT32, true), *NTi::Int32()); - UNIT_ASSERT_VALUES_EQUAL(*ToTypeV3(VT_UTF8, false), *NTi::Optional(NTi::Utf8())); - } - - Y_UNIT_TEST(DeserializeColumn) - { - auto deserialize = [] (TStringBuf yson) { - auto node = NodeFromYsonString(yson); - TColumnSchema column; - Deserialize(column, node); - return column; - }; - - auto column = deserialize("{name=foo; type=int64; required=%false}"); - UNIT_ASSERT_VALUES_EQUAL(column.Name(), "foo"); - UNIT_ASSERT_VALUES_EQUAL(*column.TypeV3(), *NTi::Optional(NTi::Int64())); - - column = deserialize("{name=bar; type=utf8; required=%true; type_v3=utf8}"); - UNIT_ASSERT_VALUES_EQUAL(column.Name(), "bar"); - UNIT_ASSERT_VALUES_EQUAL(*column.TypeV3(), *NTi::Utf8()); - } - - Y_UNIT_TEST(ColumnSchemaEquality) - { - auto base = TColumnSchema() - .Name("col") - .TypeV3(NTi::Optional(NTi::List(NTi::String()))) - .SortOrder(ESortOrder::SO_ASCENDING) - .Lock("lock") - .Expression("x + 12") - .Aggregate("sum") - .Group("group"); - - auto other = base; - ASSERT_SERIALIZABLES_EQUAL(other, base); - other.Name("other"); - ASSERT_SERIALIZABLES_UNEQUAL(other, base); - - other = base; - other.TypeV3(NTi::List(NTi::String())); - ASSERT_SERIALIZABLES_UNEQUAL(other, base); - - other = base; - other.ResetSortOrder(); - ASSERT_SERIALIZABLES_UNEQUAL(other, base); - - other = base; - other.Lock("lock1"); - ASSERT_SERIALIZABLES_UNEQUAL(other, base); - - other = base; - other.Expression("x + 13"); - ASSERT_SERIALIZABLES_UNEQUAL(other, base); - - other = base; - other.ResetAggregate(); - ASSERT_SERIALIZABLES_UNEQUAL(other, base); - - other = base; - other.Group("group1"); - ASSERT_SERIALIZABLES_UNEQUAL(other, base); - } - - Y_UNIT_TEST(TableSchemaEquality) - { - auto col1 = TColumnSchema() - .Name("col1") - .TypeV3(NTi::Optional(NTi::List(NTi::String()))) - .SortOrder(ESortOrder::SO_ASCENDING); - - auto col2 = TColumnSchema() - .Name("col2") - .TypeV3(NTi::Uint32()); - - auto schema = TTableSchema() - .AddColumn(col1) - .AddColumn(col2) - .Strict(true) - .UniqueKeys(true); - - auto other = schema; - ASSERT_SERIALIZABLES_EQUAL(other, schema); - - other.Strict(false); - ASSERT_SERIALIZABLES_UNEQUAL(other, schema); - - other = schema; - other.MutableColumns()[0].TypeV3(NTi::List(NTi::String())); - ASSERT_SERIALIZABLES_UNEQUAL(other, schema); - - other = schema; - other.MutableColumns().push_back(col1); - ASSERT_SERIALIZABLES_UNEQUAL(other, schema); - - other = schema; - other.UniqueKeys(false); - ASSERT_SERIALIZABLES_UNEQUAL(other, schema); - } -} diff --git a/yt/cpp/mapreduce/interface/common_ut.h b/yt/cpp/mapreduce/interface/common_ut.h deleted file mode 100644 index 6f70f09beec..00000000000 --- a/yt/cpp/mapreduce/interface/common_ut.h +++ /dev/null @@ -1 +0,0 @@ -#pragma once diff --git a/yt/cpp/mapreduce/interface/config.cpp b/yt/cpp/mapreduce/interface/config.cpp deleted file mode 100644 index b474dc0844e..00000000000 --- a/yt/cpp/mapreduce/interface/config.cpp +++ /dev/null @@ -1,321 +0,0 @@ -#include "config.h" - -#include "operation.h" - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <library/cpp/json/json_reader.h> -#include <library/cpp/svnversion/svnversion.h> - -#include <library/cpp/yson/node/node_builder.h> -#include <library/cpp/yson/node/node_io.h> - -#include <library/cpp/yson/json/yson2json_adapter.h> - -#include <util/string/strip.h> -#include <util/folder/dirut.h> -#include <util/folder/path.h> -#include <util/stream/file.h> -#include <util/generic/singleton.h> -#include <util/string/builder.h> -#include <util/string/cast.h> -#include <util/string/type.h> -#include <util/system/hostname.h> -#include <util/system/user.h> -#include <util/system/env.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -bool TConfig::GetBool(const char* var, bool defaultValue) -{ - TString val = GetEnv(var, ""); - if (val.empty()) { - return defaultValue; - } - return IsTrue(val); -} - -int TConfig::GetInt(const char* var, int defaultValue) -{ - int result = 0; - TString val = GetEnv(var, ""); - if (val.empty()) { - return defaultValue; - } - try { - result = FromString<int>(val); - } catch (const yexception& e) { - ythrow yexception() << "Cannot parse " << var << '=' << val << " as integer: " << e.what(); - } - return result; -} - -TDuration TConfig::GetDuration(const char* var, TDuration defaultValue) -{ - return TDuration::Seconds(GetInt(var, defaultValue.Seconds())); -} - -EEncoding TConfig::GetEncoding(const char* var) -{ - const TString encodingName = GetEnv(var, "identity"); - EEncoding encoding; - if (TryFromString(encodingName, encoding)) { - return encoding; - } else { - ythrow yexception() << var << ": encoding '" << encodingName << "' is not supported"; - } -} - - EUploadDeduplicationMode TConfig::GetUploadingDeduplicationMode( - const char* var, - EUploadDeduplicationMode defaultValue) -{ - const TString deduplicationMode = GetEnv(var, TEnumTraits<EUploadDeduplicationMode>::ToString(defaultValue)); - return TEnumTraits<EUploadDeduplicationMode>::FromString(deduplicationMode); -} - -void TConfig::ValidateToken(const TString& token) -{ - for (size_t i = 0; i < token.size(); ++i) { - ui8 ch = token[i]; - if (ch < 0x21 || ch > 0x7e) { - ythrow yexception() << "Incorrect token character '" << ch << "' at position " << i; - } - } -} - -TString TConfig::LoadTokenFromFile(const TString& tokenPath) -{ - TFsPath path(tokenPath); - return path.IsFile() ? Strip(TIFStream(path).ReadAll()) : TString(); -} - -TNode TConfig::LoadJsonSpec(const TString& strSpec) -{ - TNode spec; - TStringInput input(strSpec); - TNodeBuilder builder(&spec); - TYson2JsonCallbacksAdapter callbacks(&builder); - - Y_ENSURE(NJson::ReadJson(&input, &callbacks), "Cannot parse json spec: " << strSpec); - Y_ENSURE(spec.IsMap(), "Json spec is not a map"); - - return spec; -} - -TRichYPath TConfig::LoadApiFilePathOptions(const TString& ysonMap) -{ - TNode attributes; - try { - attributes = NodeFromYsonString(ysonMap); - } catch (const yexception& exc) { - ythrow yexception() << "Failed to parse YT_API_FILE_PATH_OPTIONS (it must be yson map): " << exc; - } - TNode pathNode = ""; - pathNode.Attributes() = attributes; - TRichYPath path; - Deserialize(path, pathNode); - return path; -} - -void TConfig::LoadToken() -{ - if (auto envToken = GetEnv("YT_TOKEN")) { - Token = envToken; - } else if (auto envToken = GetEnv("YT_SECURE_VAULT_YT_TOKEN")) { - // If this code runs inside an vanilla peration in YT - // it should not use regular environment variable `YT_TOKEN` - // because it would be visible in UI. - // Token should be passed via `secure_vault` parameter in operation spec. - Token = envToken; - } else if (auto tokenPath = GetEnv("YT_TOKEN_PATH")) { - Token = LoadTokenFromFile(tokenPath); - } else { - Token = LoadTokenFromFile(GetHomeDir() + "/.yt/token"); - } - ValidateToken(Token); -} - -void TConfig::LoadSpec() -{ - TString strSpec = GetEnv("YT_SPEC", "{}"); - Spec = LoadJsonSpec(strSpec); - - strSpec = GetEnv("YT_TABLE_WRITER", "{}"); - TableWriter = LoadJsonSpec(strSpec); -} - -void TConfig::LoadTimings() -{ - ConnectTimeout = GetDuration("YT_CONNECT_TIMEOUT", - TDuration::Seconds(10)); - - SocketTimeout = GetDuration("YT_SOCKET_TIMEOUT", - GetDuration("YT_SEND_RECEIVE_TIMEOUT", // common - TDuration::Seconds(60))); - - AddressCacheExpirationTimeout = TDuration::Minutes(15); - - CacheLockTimeoutPerGb = TDuration::MilliSeconds(1000.0 * 1_GB * 8 / 20_MB); // 20 Mbps = 20 MBps / 8. - - TxTimeout = GetDuration("YT_TX_TIMEOUT", - TDuration::Seconds(120)); - - PingTimeout = GetDuration("YT_PING_TIMEOUT", - TDuration::Seconds(5)); - - PingInterval = GetDuration("YT_PING_INTERVAL", - TDuration::Seconds(5)); - - WaitLockPollInterval = TDuration::Seconds(5); - - RetryInterval = GetDuration("YT_RETRY_INTERVAL", - TDuration::Seconds(3)); - - ChunkErrorsRetryInterval = GetDuration("YT_CHUNK_ERRORS_RETRY_INTERVAL", - TDuration::Seconds(60)); - - RateLimitExceededRetryInterval = GetDuration("YT_RATE_LIMIT_EXCEEDED_RETRY_INTERVAL", - TDuration::Seconds(60)); - - StartOperationRetryInterval = GetDuration("YT_START_OPERATION_RETRY_INTERVAL", - TDuration::Seconds(60)); - - HostListUpdateInterval = TDuration::Seconds(60); -} - -void TConfig::Reset() -{ - Hosts = GetEnv("YT_HOSTS", "hosts"); - Pool = GetEnv("YT_POOL"); - Prefix = GetEnv("YT_PREFIX"); - ApiVersion = GetEnv("YT_VERSION", "v3"); - LogLevel = GetEnv("YT_LOG_LEVEL", "error"); - - ContentEncoding = GetEncoding("YT_CONTENT_ENCODING"); - AcceptEncoding = GetEncoding("YT_ACCEPT_ENCODING"); - - GlobalTxId = GetEnv("YT_TRANSACTION", ""); - - UseAsyncTxPinger = false; - AsyncHttpClientThreads = 1; - AsyncTxPingerPoolThreads = 1; - - ForceIpV4 = GetBool("YT_FORCE_IPV4"); - ForceIpV6 = GetBool("YT_FORCE_IPV6"); - UseHosts = GetBool("YT_USE_HOSTS", true); - - LoadToken(); - LoadSpec(); - LoadTimings(); - - CacheUploadDeduplicationMode = GetUploadingDeduplicationMode("YT_UPLOAD_DEDUPLICATION", EUploadDeduplicationMode::Host); - - RetryCount = Max(GetInt("YT_RETRY_COUNT", 10), 1); - ReadRetryCount = Max(GetInt("YT_READ_RETRY_COUNT", 30), 1); - StartOperationRetryCount = Max(GetInt("YT_START_OPERATION_RETRY_COUNT", 30), 1); - - RemoteTempFilesDirectory = GetEnv("YT_FILE_STORAGE", - "//tmp/yt_wrapper/file_storage"); - RemoteTempTablesDirectory = GetEnv("YT_TEMP_TABLES_STORAGE", - "//tmp/yt_wrapper/table_storage"); - RemoteTempTablesDirectory = GetEnv("YT_TEMP_DIR", - RemoteTempTablesDirectory); - - InferTableSchema = false; - - UseClientProtobuf = GetBool("YT_USE_CLIENT_PROTOBUF", false); - NodeReaderFormat = ENodeReaderFormat::Auto; - ProtobufFormatWithDescriptors = true; - - MountSandboxInTmpfs = GetBool("YT_MOUNT_SANDBOX_IN_TMPFS"); - - ApiFilePathOptions = LoadApiFilePathOptions(GetEnv("YT_API_FILE_PATH_OPTIONS", "{}")); - - ConnectionPoolSize = GetInt("YT_CONNECTION_POOL_SIZE", 16); - - TraceHttpRequestsMode = FromString<ETraceHttpRequestsMode>(to_lower(GetEnv("YT_TRACE_HTTP_REQUESTS", "never"))); - - CommandsWithFraming = { - "read_table", - "get_table_columnar_statistics", - "get_job_input", - "concatenate", - "partition_tables", - }; -} - -TConfig::TConfig() -{ - Reset(); -} - -TConfigPtr TConfig::Get() -{ - struct TConfigHolder - { - TConfigHolder() - : Config(::MakeIntrusive<TConfig>()) - { } - - TConfigPtr Config; - }; - - return Singleton<TConfigHolder>()->Config; -} - -//////////////////////////////////////////////////////////////////////////////// - -TProcessState::TProcessState() -{ - try { - FqdnHostName = ::FQDNHostName(); - } catch (const yexception& e) { - try { - FqdnHostName = ::HostName(); - } catch (const yexception& e) { - ythrow yexception() << "Cannot get fqdn and host name: " << e.what(); - } - } - - try { - UserName = ::GetUsername(); - } catch (const yexception& e) { - ythrow yexception() << "Cannot get user name: " << e.what(); - } - - Pid = static_cast<int>(getpid()); - - if (!ClientVersion) { - ClientVersion = ::TStringBuilder() << "YT C++ native " << GetProgramCommitId(); - } -} - -static TString CensorString(TString input) -{ - static const TString prefix = "AQAD-"; - if (input.find(prefix) == TString::npos) { - return input; - } else { - return TString(input.size(), '*'); - } -} - -void TProcessState::SetCommandLine(int argc, const char* argv[]) -{ - for (int i = 0; i < argc; ++i) { - CommandLine.push_back(argv[i]); - CensoredCommandLine.push_back(CensorString(CommandLine.back())); - } -} - -TProcessState* TProcessState::Get() -{ - return Singleton<TProcessState>(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/config.h b/yt/cpp/mapreduce/interface/config.h deleted file mode 100644 index c44ad25f1ca..00000000000 --- a/yt/cpp/mapreduce/interface/config.h +++ /dev/null @@ -1,228 +0,0 @@ -#pragma once - -#include "fwd.h" -#include "common.h" -#include "node.h" - -#include <library/cpp/yt/misc/enum.h> - -#include <util/generic/maybe.h> -#include <util/generic/string.h> -#include <util/generic/hash_set.h> - -#include <util/datetime/base.h> - -namespace NYT { - -enum EEncoding : int -{ - E_IDENTITY /* "identity" */, - E_GZIP /* "gzip" */, - E_BROTLI /* "br" */, - E_Z_LZ4 /* "z-lz4" */, -}; - -enum class ENodeReaderFormat : int -{ - Yson, // Always use YSON format, - Skiff, // Always use Skiff format, throw exception if it's not possible (non-strict schema, dynamic table etc.) - Auto, // Use Skiff format if it's possible, YSON otherwise -}; - -enum class ETraceHttpRequestsMode -{ - // Never dump http requests. - Never /* "never" */, - // Dump failed http requests. - Error /* "error" */, - // Dump all http requests. - Always /* "always" */, -}; - -DEFINE_ENUM(EUploadDeduplicationMode, - // For each file only one process' thread from all possible hosts can upload it to the file cache at the same time. - // The others will wait for the uploading to finish and use already cached file. - ((Global) (0)) - - // For each file and each particular host only one process' thread can upload it to the file cache at the same time. - // The others will wait for the uploading to finish and use already cached file. - ((Host) (1)) - - // All processes' threads will upload a file to the cache concurrently. - ((Disabled) (2)) -); - -//////////////////////////////////////////////////////////////////////////////// - -struct TConfig - : public TThrRefBase -{ - TString Hosts; - TString Pool; - TString Token; - TString Prefix; - TString ApiVersion; - TString LogLevel; - - // Compression for data that is sent to YT cluster. - EEncoding ContentEncoding; - - // Compression for data that is read from YT cluster. - EEncoding AcceptEncoding; - - TString GlobalTxId; - - bool ForceIpV4; - bool ForceIpV6; - bool UseHosts; - - TDuration HostListUpdateInterval; - - TNode Spec; - TNode TableWriter; - - TDuration ConnectTimeout; - TDuration SocketTimeout; - TDuration AddressCacheExpirationTimeout; - TDuration TxTimeout; - TDuration PingTimeout; - TDuration PingInterval; - - bool UseAsyncTxPinger; - int AsyncHttpClientThreads; - int AsyncTxPingerPoolThreads; - - // How often should we poll for lock state - TDuration WaitLockPollInterval; - - TDuration RetryInterval; - TDuration ChunkErrorsRetryInterval; - - TDuration RateLimitExceededRetryInterval; - TDuration StartOperationRetryInterval; - - int RetryCount; - int ReadRetryCount; - int StartOperationRetryCount; - - /// @brief Period for checking status of running operation. - TDuration OperationTrackerPollPeriod = TDuration::Seconds(5); - - TString RemoteTempFilesDirectory; - TString RemoteTempTablesDirectory; - - // - // Infer schemas for nonexstent tables from typed rows (e.g. protobuf) - // when writing from operation or client writer. - // This options can be overriden in TOperationOptions and TTableWriterOptions. - bool InferTableSchema; - - bool UseClientProtobuf; - ENodeReaderFormat NodeReaderFormat; - bool ProtobufFormatWithDescriptors; - - int ConnectionPoolSize; - - /// Defines replication factor that is used for files that are uploaded to YT - /// to use them in operations. - int FileCacheReplicationFactor = 10; - - /// @brief Used when waiting for other process which uploads the same file to the file cache. - /// - /// If CacheUploadDeduplicationMode is not Disabled, current process can wait for some other - /// process which is uploading the same file. This value is proportional to the timeout of waiting, - /// actual timeout computes as follows: fileSizeGb * CacheLockTimeoutPerGb. - /// Default timeout assumes that host has uploading speed equal to 20 Mb/s. - /// If timeout was reached, the file will be uploaded by current process without any other waits. - TDuration CacheLockTimeoutPerGb; - - /// @brief Used to prevent concurrent uploading of the same file to the file cache. - /// NB: Each mode affects only users with the same mode enabled. - EUploadDeduplicationMode CacheUploadDeduplicationMode; - - bool MountSandboxInTmpfs; - - /// @brief Set upload options (e.g.) for files created by library. - /// - /// Path itself is always ignored but path options (e.g. `BypassArtifactCache`) are used when uploading system files: - /// cppbinary, job state, etc - TRichYPath ApiFilePathOptions; - - // Testing options, should never be used in user programs. - bool UseAbortableResponse = false; - bool EnableDebugMetrics = false; - - // - // There is optimization used with local YT that enables to skip binary upload and use real binary path. - // When EnableLocalModeOptimization is set to false this optimization is completely disabled. - bool EnableLocalModeOptimization = true; - - // - // If you want see stderr even if you jobs not failed set this true. - bool WriteStderrSuccessfulJobs = false; - - // - // This configuration is useful for debug. - // If set to ETraceHttpRequestsMode::Error library will dump all http error requests. - // If set to ETraceHttpRequestsMode::All library will dump all http requests. - // All tracing occurres as DEBUG level logging. - ETraceHttpRequestsMode TraceHttpRequestsMode = ETraceHttpRequestsMode::Never; - - TString SkynetApiHost; - - // Sets SO_PRIORITY option on the socket - TMaybe<int> SocketPriority; - - // Framing settings - // (cf. https://yt.yandex-team.ru/docs/description/proxy/http_proxy_reference#framing). - THashSet<TString> CommandsWithFraming; - - static bool GetBool(const char* var, bool defaultValue = false); - static int GetInt(const char* var, int defaultValue); - static TDuration GetDuration(const char* var, TDuration defaultValue); - static EEncoding GetEncoding(const char* var); - static EUploadDeduplicationMode GetUploadingDeduplicationMode( - const char* var, - EUploadDeduplicationMode defaultValue); - - static void ValidateToken(const TString& token); - static TString LoadTokenFromFile(const TString& tokenPath); - - static TNode LoadJsonSpec(const TString& strSpec); - - static TRichYPath LoadApiFilePathOptions(const TString& ysonMap); - - void LoadToken(); - void LoadSpec(); - void LoadTimings(); - - void Reset(); - - TConfig(); - - static TConfigPtr Get(); -}; - -//////////////////////////////////////////////////////////////////////////////// - -struct TProcessState -{ - TString FqdnHostName; - TString UserName; - TVector<TString> CommandLine; - - // Command line with everything that looks like tokens censored. - TVector<TString> CensoredCommandLine; - int Pid; - TString ClientVersion; - - TProcessState(); - - void SetCommandLine(int argc, const char* argv[]); - - static TProcessState* Get(); -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/config_ut.cpp b/yt/cpp/mapreduce/interface/config_ut.cpp deleted file mode 100644 index e49ba021080..00000000000 --- a/yt/cpp/mapreduce/interface/config_ut.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include <library/cpp/testing/unittest/registar.h> - -#include <yt/cpp/mapreduce/interface/config.h> - -using namespace NYT; - -Y_UNIT_TEST_SUITE(ConfigSuite) -{ - Y_UNIT_TEST(TestReset) { - // very limited test, checks only one config field - - auto origConfig = *TConfig::Get(); - TConfig::Get()->Reset(); - UNIT_ASSERT_VALUES_EQUAL(origConfig.Hosts, TConfig::Get()->Hosts); - - TConfig::Get()->Hosts = "hosts/fb867"; - TConfig::Get()->Reset(); - UNIT_ASSERT_VALUES_EQUAL(origConfig.Hosts, TConfig::Get()->Hosts); - } -} diff --git a/yt/cpp/mapreduce/interface/constants.h b/yt/cpp/mapreduce/interface/constants.h deleted file mode 100644 index 4f704108146..00000000000 --- a/yt/cpp/mapreduce/interface/constants.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - - -#include <util/system/defaults.h> - - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - - -// Maximum number of input tables for operation. -// If greater number of input tables are provided behaviour is undefined -// (it might work ok or it might fail or it might work very slowly). -constexpr size_t MaxInputTableCount = 1000; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/cypress.cpp b/yt/cpp/mapreduce/interface/cypress.cpp deleted file mode 100644 index 53686effd27..00000000000 --- a/yt/cpp/mapreduce/interface/cypress.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include "cypress.h" - -#include "config.h" - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -void ICypressClient::Concatenate( - const TVector<TYPath>& sourcePaths, - const TYPath& destinationPath, - const TConcatenateOptions& options) -{ - TVector<TRichYPath> richSourcePaths; - richSourcePaths.reserve(sourcePaths.size()); - for (const auto& path : sourcePaths) { - richSourcePaths.emplace_back(path); - } - Concatenate(richSourcePaths, destinationPath, options); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/cypress.h b/yt/cpp/mapreduce/interface/cypress.h deleted file mode 100644 index e05316ebc6c..00000000000 --- a/yt/cpp/mapreduce/interface/cypress.h +++ /dev/null @@ -1,252 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/cypress.h -/// -/// Header containing interface to execute [Cypress](https://yt.yandex-team.ru/docs/description/common/cypress.html)-related commands. - -#include "fwd.h" - -#include "client_method_options.h" -#include "common.h" -#include "node.h" - -#include <util/generic/maybe.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// Client interface to execute [Cypress](https://yt.yandex-team.ru/docs/description/common/cypress.html)-related commands. -class ICypressClient -{ -public: - virtual ~ICypressClient() = default; - - /// - /// @brief Create Cypress node of given type. - /// - /// @param path Path in Cypress to the new object. - /// @param type New node type. - /// @param options Optional parameters. - /// - /// @return Id of the created node. - /// - /// @note All but the last components must exist unless @ref NYT::TCreateOptions::Recursive is `true`. - /// - /// @note The node itself must not exist unless @ref NYT::TCreateOptions::IgnoreExisting or @ref NYT::TCreateOptions::Force are `true`. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#create) - virtual TNodeId Create( - const TYPath& path, - ENodeType type, - const TCreateOptions& options = TCreateOptions()) = 0; - - /// - /// @brief Create table with schema inferred from the template argument. - /// - /// @tparam TRowType type of C++ representation of the row to be stored in the table. - /// @param path Path in Cypress to the new table. - /// @param sortColumns List of columns to mark as sorted in schema. - /// @param options Optional parameters. - /// - /// @return Id of the created node. - /// - /// @note If "schema" is passed in `options.Attributes` it has priority over the deduced schema (the latter is ignored). - template <typename TRowType> - TNodeId CreateTable( - const TYPath& path, - const TSortColumns& sortColumns = TSortColumns(), - const TCreateOptions& options = TCreateOptions()); - - /// - /// @brief Remove Cypress node. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#remove) - virtual void Remove( - const TYPath& path, - const TRemoveOptions& options = TRemoveOptions()) = 0; - - /// - /// @brief Check if Cypress node exists. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#exists) - virtual bool Exists( - const TYPath& path, - const TExistsOptions& options = TExistsOptions()) = 0; - - /// - /// @brief Get Cypress node contents. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get) - virtual TNode Get( - const TYPath& path, - const TGetOptions& options = TGetOptions()) = 0; - - /// - /// @brief Set Cypress node contents. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#set) - virtual void Set( - const TYPath& path, - const TNode& value, - const TSetOptions& options = TSetOptions()) = 0; - - /// - /// @brief Set multiple attributes for cypress path. - /// - /// @param path Path to root of the attributes to be set e.g. "//path/to/table/@"; - /// it is important to make sure that path ends with "/@". - /// @param attributes Map with attributes - /// @param options Optional parameters. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#multiset_attributes) - virtual void MultisetAttributes( - const TYPath& path, - const TNode::TMapType& attributes, - const TMultisetAttributesOptions& options = TMultisetAttributesOptions()) = 0; - - /// - /// @brief List Cypress map or attribute node keys. - /// - /// @param path Path in the tree to the node in question. - /// @param options Optional parameters. - /// - /// @return List of keys with attributes (if they were required in @ref NYT::TListOptions::AttributeFilter). - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#list) - virtual TNode::TListType List( - const TYPath& path, - const TListOptions& options = TListOptions()) = 0; - - /// - /// @brief Copy Cypress node. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#copy) - virtual TNodeId Copy( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TCopyOptions& options = TCopyOptions()) = 0; - - /// - /// @brief Move Cypress node (equivalent to copy-then-remove). - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#move) - virtual TNodeId Move( - const TYPath& sourcePath, - const TYPath& destinationPath, - const TMoveOptions& options = TMoveOptions()) = 0; - - /// - /// @brief Create link to Cypress node. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#link) - virtual TNodeId Link( - const TYPath& targetPath, - const TYPath& linkPath, - const TLinkOptions& options = TLinkOptions()) = 0; - - /// - /// @brief Concatenate several tables into one. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#concatenate) - virtual void Concatenate( - const TVector<TRichYPath>& sourcePaths, - const TRichYPath& destinationPath, - const TConcatenateOptions& options = TConcatenateOptions()) = 0; - - /// - /// @brief Concatenate several tables into one. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#concatenate) - virtual void Concatenate( - const TVector<TYPath>& sourcePaths, - const TYPath& destinationPath, - const TConcatenateOptions& options = TConcatenateOptions()); - - /// - /// @brief Canonize YPath, moving all the complex YPath features to attributes. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#parse-ypath) - virtual TRichYPath CanonizeYPath(const TRichYPath& path) = 0; - - /// - /// @brief Get statistics for given sets of columns in given table ranges. - /// - /// @note Paths must contain column selectors. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-table-columnar-statistics) - virtual TVector<TTableColumnarStatistics> GetTableColumnarStatistics( - const TVector<TRichYPath>& paths, - const TGetTableColumnarStatisticsOptions& options = {}) = 0; - - /// - /// @brief Divide input tables into disjoint partitions. - /// - /// Resulted partitions are vectors of rich YPaths. - /// Each partition can be given to a separate worker for further independent processing. - /// - virtual TMultiTablePartitions GetTablePartitions( - const TVector<TRichYPath>& paths, - const TGetTablePartitionsOptions& options) = 0; - - /// - /// @brief Get file from file cache. - /// - /// @param md5Signature MD5 digest of the file. - /// @param cachePath Path to the file cache. - /// @param options Optional parameters. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#get-file-from-cache) - virtual TMaybe<TYPath> GetFileFromCache( - const TString& md5Signature, - const TYPath& cachePath, - const TGetFileFromCacheOptions& options = TGetFileFromCacheOptions()) = 0; - - /// - /// @brief Put file to file cache. - /// - /// @param filePath Path in Cypress to the file to cache. - /// @param md5Signature Expected MD5 digest of the file. - /// @param cachePath Path to the file cache. - /// @param options Optional parameters. - /// - /// @note The file in `filePath` must have been written with @ref NYT::TFileWriterOptions::ComputeMD5 set to `true`. - /// - /// @see [YT doc](https://yt.yandex-team.ru/docs/api/commands.html#put-file-to-cache) - virtual TYPath PutFileToCache( - const TYPath& filePath, - const TString& md5Signature, - const TYPath& cachePath, - const TPutFileToCacheOptions& options = TPutFileToCacheOptions()) = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TRowType> -TNodeId ICypressClient::CreateTable( - const TYPath& path, - const TSortColumns& sortColumns, - const TCreateOptions& options) -{ - static_assert( - std::is_base_of_v<::google::protobuf::Message, TRowType>, - "TRowType must be inherited from google::protobuf::Message"); - - TCreateOptions actualOptions = options; - if (!actualOptions.Attributes_) { - actualOptions.Attributes_ = TNode::CreateMap(); - } - - if (!actualOptions.Attributes_->HasKey("schema")) { - actualOptions.Attributes_->AsMap().emplace( - "schema", - CreateTableSchema<TRowType>(sortColumns).ToNode()); - } - - return Create(path, ENodeType::NT_TABLE, actualOptions); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/error_codes.h b/yt/cpp/mapreduce/interface/error_codes.h deleted file mode 100644 index d8d76e04fd8..00000000000 --- a/yt/cpp/mapreduce/interface/error_codes.h +++ /dev/null @@ -1,468 +0,0 @@ -#pragma once - -// -// generated by generate-error-codes.py -// - -namespace NYT { -namespace NClusterErrorCodes { - - - -// from ./core/misc/public.h - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int OK = 0; - constexpr int Generic = 1; - constexpr int Canceled = 2; - constexpr int Timeout = 3; - -//////////////////////////////////////////////////////////////////////////////// - - - - -// from ./core/rpc/public.h -namespace NRpc { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int TransportError = 100; - constexpr int ProtocolError = 101; - constexpr int NoSuchService = 102; - constexpr int NoSuchMethod = 103; - constexpr int Unavailable = 105; - constexpr int PoisonPill = 106; - constexpr int RequestQueueSizeLimitExceeded = 108; - constexpr int AuthenticationError = 109; - constexpr int InvalidCsrfToken = 110; - constexpr int InvalidCredentials = 111; - constexpr int StreamingNotSupported = 112; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NRpc - - - -// from ./core/bus/public.h -namespace NBus { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int TransportError = 100; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NBus - - - -// from ./client/scheduler/public.h -namespace NScheduler { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int NoSuchOperation = 200; - constexpr int InvalidOperationState = 201; - constexpr int TooManyOperations = 202; - constexpr int NoSuchJob = 203; - constexpr int OperationFailedOnJobRestart = 210; - constexpr int OperationFailedWithInconsistentLocking = 211; - constexpr int OperationControllerCrashed = 212; - constexpr int TestingError = 213; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NScheduler - - - -// from ./client/table_client/public.h -namespace NTableClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int SortOrderViolation = 301; - constexpr int InvalidDoubleValue = 302; - constexpr int IncomparableType = 303; - constexpr int UnhashableType = 304; - // E.g. name table with more than #MaxColumnId columns (may come from legacy chunks). - constexpr int CorruptedNameTable = 305; - constexpr int UniqueKeyViolation = 306; - constexpr int SchemaViolation = 307; - constexpr int RowWeightLimitExceeded = 308; - constexpr int InvalidColumnFilter = 309; - constexpr int InvalidColumnRenaming = 310; - constexpr int IncompatibleKeyColumns = 311; - constexpr int ReaderDeadlineExpired = 312; - constexpr int TimestampOutOfRange = 313; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NTableClient - - - -// from ./client/cypress_client/public.h -namespace NCypressClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int SameTransactionLockConflict = 400; - constexpr int DescendantTransactionLockConflict = 401; - constexpr int ConcurrentTransactionLockConflict = 402; - constexpr int PendingLockConflict = 403; - constexpr int LockDestroyed = 404; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NCypressClient - - - -// from ./core/ytree/public.h -namespace NYTree { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int ResolveError = 500; - constexpr int AlreadyExists = 501; - constexpr int MaxChildCountViolation = 502; - constexpr int MaxStringLengthViolation = 503; - constexpr int MaxAttributeSizeViolation = 504; - constexpr int MaxKeyLengthViolation = 505; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYTree - - - -// from ./client/hydra/public.h -namespace NHydra { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int NoSuchSnapshot = 600; - constexpr int NoSuchChangelog = 601; - constexpr int InvalidEpoch = 602; - constexpr int InvalidVersion = 603; - constexpr int OutOfOrderMutations = 609; - constexpr int InvalidSnapshotVersion = 610; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NHydra - - - -// from ./client/chunk_client/public.h -namespace NChunkClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int AllTargetNodesFailed = 700; - constexpr int SendBlocksFailed = 701; - constexpr int NoSuchSession = 702; - constexpr int SessionAlreadyExists = 703; - constexpr int ChunkAlreadyExists = 704; - constexpr int WindowError = 705; - constexpr int BlockContentMismatch = 706; - constexpr int NoSuchBlock = 707; - constexpr int NoSuchChunk = 708; - constexpr int NoLocationAvailable = 710; - constexpr int IOError = 711; - constexpr int MasterCommunicationFailed = 712; - constexpr int NoSuchChunkTree = 713; - constexpr int MasterNotConnected = 714; - constexpr int ChunkUnavailable = 716; - constexpr int NoSuchChunkList = 717; - constexpr int WriteThrottlingActive = 718; - constexpr int NoSuchMedium = 719; - constexpr int OptimisticLockFailure = 720; - constexpr int InvalidBlockChecksum = 721; - constexpr int BlockOutOfRange = 722; - constexpr int ObjectNotReplicated = 723; - constexpr int MissingExtension = 724; - constexpr int BandwidthThrottlingFailed = 725; - constexpr int ReaderTimeout = 726; - constexpr int NoSuchChunkView = 727; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NChunkClient - - - -// from ./client/election/public.h -namespace NElection { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int InvalidState = 800; - constexpr int InvalidLeader = 801; - constexpr int InvalidEpoch = 802; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NElection - - - -// from ./client/security_client/public.h -namespace NSecurityClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int AuthenticationError = 900; - constexpr int AuthorizationError = 901; - constexpr int AccountLimitExceeded = 902; - constexpr int UserBanned = 903; - constexpr int RequestQueueSizeLimitExceeded = 904; - constexpr int NoSuchAccount = 905; - constexpr int SafeModeEnabled = 906; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NSecurityClient - - - -// from ./client/object_client/public.h -namespace NObjectClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int PrerequisiteCheckFailed = 1000; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NObjectClient - - - -// from ./server/lib/exec_agent/public.h -namespace NExecAgent { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int ConfigCreationFailed = 1100; - constexpr int AbortByScheduler = 1101; - constexpr int ResourceOverdraft = 1102; - constexpr int WaitingJobTimeout = 1103; - constexpr int SlotNotFound = 1104; - constexpr int JobEnvironmentDisabled = 1105; - constexpr int JobProxyConnectionFailed = 1106; - constexpr int ArtifactCopyingFailed = 1107; - constexpr int NodeDirectoryPreparationFailed = 1108; - constexpr int SlotLocationDisabled = 1109; - constexpr int QuotaSettingFailed = 1110; - constexpr int RootVolumePreparationFailed = 1111; - constexpr int NotEnoughDiskSpace = 1112; - constexpr int ArtifactDownloadFailed = 1113; - constexpr int JobProxyPreparationTimeout = 1114; - constexpr int JobPreparationTimeout = 1115; - constexpr int JobProxyFailed = 1120; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NExecAgent - - - -// from ./ytlib/job_proxy/public.h -namespace NJobProxy { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int MemoryLimitExceeded = 1200; - constexpr int MemoryCheckFailed = 1201; - constexpr int JobTimeLimitExceeded = 1202; - constexpr int UnsupportedJobType = 1203; - constexpr int JobNotPrepared = 1204; - constexpr int UserJobFailed = 1205; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NJobProxy - - - -// from ./server/node/data_node/public.h -namespace NDataNode { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int LocalChunkReaderFailed = 1300; - constexpr int LayerUnpackingFailed = 1301; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDataNode - - - -// from ./core/net/public.h -namespace NNet { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int Aborted = 1500; - constexpr int ResolveTimedOut = 1501; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NNet - - - -// from ./client/node_tracker_client/public.h -namespace NNodeTrackerClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int NoSuchNode = 1600; - constexpr int InvalidState = 1601; - constexpr int NoSuchNetwork = 1602; - constexpr int NoSuchRack = 1603; - constexpr int NoSuchDataCenter = 1604; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NNodeTrackerClient - - - -// from ./client/tablet_client/public.h -namespace NTabletClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int TransactionLockConflict = 1700; - constexpr int NoSuchTablet = 1701; - constexpr int TabletNotMounted = 1702; - constexpr int AllWritesDisabled = 1703; - constexpr int InvalidMountRevision = 1704; - constexpr int TableReplicaAlreadyExists = 1705; - constexpr int InvalidTabletState = 1706; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NTabletClient - - - -// from ./server/lib/shell/public.h -namespace NShell { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int ShellExited = 1800; - constexpr int ShellManagerShutDown = 1801; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NShell - - - -// from ./client/api/public.h -namespace NApi { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int TooManyConcurrentRequests = 1900; - constexpr int JobArchiveUnavailable = 1910; - constexpr int RetriableArchiveError = 1911; - constexpr int NoSuchOperation = 1915; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NApi - - - -// from ./server/controller_agent/chunk_pools/public.h -namespace NChunkPools { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int DataSliceLimitExceeded = 2000; - constexpr int MaxDataWeightPerJobExceeded = 2001; - constexpr int MaxPrimaryDataWeightPerJobExceeded = 2002; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NChunkPools - - - -// from ./client/api/rpc_proxy/public.h -namespace NApi { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int ProxyBanned = 2100; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NApi - - - -// from ./ytlib/controller_agent/public.h -namespace NControllerAgent { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int AgentCallFailed = 4400; - constexpr int NoOnlineNodeToScheduleJob = 4410; - constexpr int MaterializationFailed = 4415; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NControllerAgent - - - -// from ./client/transaction_client/public.h -namespace NTransactionClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int NoSuchTransaction = 11000; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NTransactionClient - - - -// from ./server/lib/containers/public.h -namespace NContainers { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int FailedToStartContainer = 13000; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NContainers - - - -// from ./ytlib/job_prober_client/public.h -namespace NJobProberClient { - -//////////////////////////////////////////////////////////////////////////////// - - constexpr int JobIsNotRunning = 17000; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NJobProberClient - -} // namespace NClusterErrorCodes -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/error_ut.cpp b/yt/cpp/mapreduce/interface/error_ut.cpp deleted file mode 100644 index 03f2751b239..00000000000 --- a/yt/cpp/mapreduce/interface/error_ut.cpp +++ /dev/null @@ -1,81 +0,0 @@ -#include <library/cpp/testing/unittest/registar.h> - -#include <library/cpp/json/json_reader.h> - -#include <yt/cpp/mapreduce/interface/errors.h> -#include <yt/cpp/mapreduce/common/helpers.h> - -using namespace NYT; - -template<> -void Out<NYT::TNode>(IOutputStream& s, const NYT::TNode& node) -{ - s << "TNode:" << NodeToYsonString(node); -} - -Y_UNIT_TEST_SUITE(ErrorSuite) -{ - Y_UNIT_TEST(TestParseJson) - { - // Scary real world error! Бу! - const char* jsonText = - R"""({)""" - R"""("code":500,)""" - R"""("message":"Error resolving path //home/user/link",)""" - R"""("attributes":{)""" - R"""("fid":18446484571700269066,)""" - R"""("method":"Create",)""" - R"""("tid":17558639495721339338,)""" - R"""("datetime":"2017-04-07T13:38:56.474819Z",)""" - R"""("pid":414529,)""" - R"""("host":"build01-01g.yt.yandex.net"},)""" - R"""("inner_errors":[{)""" - R"""("code":1,)""" - R"""("message":"Node //tt cannot have children",)""" - R"""("attributes":{)""" - R"""("fid":18446484571700269066,)""" - R"""("tid":17558639495721339338,)""" - R"""("datetime":"2017-04-07T13:38:56.474725Z",)""" - R"""("pid":414529,)""" - R"""("host":"build01-01g.yt.yandex.net"},)""" - R"""("inner_errors":[]}]})"""; - - NJson::TJsonValue jsonValue; - ReadJsonFastTree(jsonText, &jsonValue, /*throwOnError=*/ true); - - TYtError error(jsonValue); - UNIT_ASSERT_VALUES_EQUAL(error.GetCode(), 500); - UNIT_ASSERT_VALUES_EQUAL(error.GetMessage(), R"""(Error resolving path //home/user/link)"""); - UNIT_ASSERT_VALUES_EQUAL(error.InnerErrors().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(error.InnerErrors()[0].GetCode(), 1); - - UNIT_ASSERT_VALUES_EQUAL(error.HasAttributes(), true); - UNIT_ASSERT_VALUES_EQUAL(error.GetAttributes().at("method"), TNode("Create")); - - UNIT_ASSERT_VALUES_EQUAL(error.GetAllErrorCodes(), TSet<int>({500, 1})); - } - - Y_UNIT_TEST(TestGetYsonText) { - const char* jsonText = - R"""({)""" - R"""("code":500,)""" - R"""("message":"outer error",)""" - R"""("attributes":{)""" - R"""("method":"Create",)""" - R"""("pid":414529},)""" - R"""("inner_errors":[{)""" - R"""("code":1,)""" - R"""("message":"inner error",)""" - R"""("attributes":{},)""" - R"""("inner_errors":[])""" - R"""(}]})"""; - TYtError error; - error.ParseFrom(jsonText); - TString ysonText = error.GetYsonText(); - TYtError error2(NodeFromYsonString(ysonText)); - UNIT_ASSERT_EQUAL( - ysonText, - R"""({"code"=500;"message"="outer error";"attributes"={"method"="Create";"pid"=414529};"inner_errors"=[{"code"=1;"message"="inner error"}]})"""); - UNIT_ASSERT_EQUAL(error2.GetYsonText(), ysonText); - } -} diff --git a/yt/cpp/mapreduce/interface/errors.cpp b/yt/cpp/mapreduce/interface/errors.cpp deleted file mode 100644 index 49a7c7cfc10..00000000000 --- a/yt/cpp/mapreduce/interface/errors.cpp +++ /dev/null @@ -1,437 +0,0 @@ -#include "errors.h" - -#include <library/cpp/yson/node/node_io.h> -#include <library/cpp/yson/node/node_visitor.h> - -#include <yt/cpp/mapreduce/interface/error_codes.h> - -#include <library/cpp/json/json_reader.h> -#include <library/cpp/yson/writer.h> - -#include <util/string/builder.h> -#include <util/stream/str.h> -#include <util/generic/set.h> - -namespace NYT { - -using namespace NJson; - -//////////////////////////////////////////////////////////////////// - -static void WriteErrorDescription(const TYtError& error, IOutputStream* out) -{ - (*out) << '\'' << error.GetMessage() << '\''; - const auto& innerErrorList = error.InnerErrors(); - if (!innerErrorList.empty()) { - (*out) << " { "; - bool first = true; - for (const auto& innerError : innerErrorList) { - if (first) { - first = false; - } else { - (*out) << " ; "; - } - WriteErrorDescription(innerError, out); - } - (*out) << " }"; - } -} - -static void SerializeError(const TYtError& error, NYson::IYsonConsumer* consumer) -{ - consumer->OnBeginMap(); - { - consumer->OnKeyedItem("code"); - consumer->OnInt64Scalar(error.GetCode()); - - consumer->OnKeyedItem("message"); - consumer->OnStringScalar(error.GetMessage()); - - if (!error.GetAttributes().empty()) { - consumer->OnKeyedItem("attributes"); - consumer->OnBeginMap(); - { - for (const auto& item : error.GetAttributes()) { - consumer->OnKeyedItem(item.first); - TNodeVisitor(consumer).Visit(item.second); - } - } - consumer->OnEndMap(); - } - - if (!error.InnerErrors().empty()) { - consumer->OnKeyedItem("inner_errors"); - { - consumer->OnBeginList(); - for (const auto& innerError : error.InnerErrors()) { - SerializeError(innerError, consumer); - } - consumer->OnEndList(); - } - } - } - consumer->OnEndMap(); -} - -static TString DumpJobInfoForException(const TOperationId& operationId, const TVector<TFailedJobInfo>& failedJobInfoList) -{ - ::TStringBuilder output; - // Exceptions have limit to contain 65508 bytes of text, so we also limit stderr text - constexpr size_t MAX_SIZE = 65508 / 2; - - size_t written = 0; - for (const auto& failedJobInfo : failedJobInfoList) { - if (written >= MAX_SIZE) { - break; - } - TStringStream nextChunk; - nextChunk << '\n'; - nextChunk << "OperationId: " << GetGuidAsString(operationId) << " JobId: " << GetGuidAsString(failedJobInfo.JobId) << '\n'; - nextChunk << "Error: " << failedJobInfo.Error.FullDescription() << '\n'; - if (!failedJobInfo.Stderr.empty()) { - nextChunk << "Stderr: " << Endl; - size_t tmpWritten = written + nextChunk.Str().size(); - if (tmpWritten >= MAX_SIZE) { - break; - } - - if (tmpWritten + failedJobInfo.Stderr.size() > MAX_SIZE) { - nextChunk << failedJobInfo.Stderr.substr(failedJobInfo.Stderr.size() - (MAX_SIZE - tmpWritten)); - } else { - nextChunk << failedJobInfo.Stderr; - } - } - written += nextChunk.Str().size(); - output << nextChunk.Str(); - } - return output; -} - -//////////////////////////////////////////////////////////////////// - -TYtError::TYtError() - : Code_(0) -{ } - -TYtError::TYtError(const TString& message) - : Code_(NYT::NClusterErrorCodes::Generic) - , Message_(message) -{ } - -TYtError::TYtError(int code, const TString& message) - : Code_(code) - , Message_(message) -{ } - -TYtError::TYtError(const TJsonValue& value) -{ - const TJsonValue::TMapType& map = value.GetMap(); - TJsonValue::TMapType::const_iterator it = map.find("message"); - if (it != map.end()) { - Message_ = it->second.GetString(); - } - - it = map.find("code"); - if (it != map.end()) { - Code_ = static_cast<int>(it->second.GetInteger()); - } else { - Code_ = NYT::NClusterErrorCodes::Generic; - } - - it = map.find("inner_errors"); - if (it != map.end()) { - const TJsonValue::TArray& innerErrors = it->second.GetArray(); - for (const auto& innerError : innerErrors) { - InnerErrors_.push_back(TYtError(innerError)); - } - } - - it = map.find("attributes"); - if (it != map.end()) { - auto attributes = NYT::NodeFromJsonValue(it->second); - if (attributes.IsMap()) { - Attributes_ = std::move(attributes.AsMap()); - } - } -} - -TYtError::TYtError(const TNode& node) -{ - const auto& map = node.AsMap(); - auto it = map.find("message"); - if (it != map.end()) { - Message_ = it->second.AsString(); - } - - it = map.find("code"); - if (it != map.end()) { - Code_ = static_cast<int>(it->second.AsInt64()); - } else { - Code_ = NYT::NClusterErrorCodes::Generic; - } - - it = map.find("inner_errors"); - if (it != map.end()) { - const auto& innerErrors = it->second.AsList(); - for (const auto& innerError : innerErrors) { - InnerErrors_.push_back(TYtError(innerError)); - } - } - - it = map.find("attributes"); - if (it != map.end()) { - auto& attributes = it->second; - if (attributes.IsMap()) { - Attributes_ = std::move(attributes.AsMap()); - } - } -} - -int TYtError::GetCode() const -{ - return Code_; -} - -const TString& TYtError::GetMessage() const -{ - return Message_; -} - -const TVector<TYtError>& TYtError::InnerErrors() const -{ - return InnerErrors_; -} - -void TYtError::ParseFrom(const TString& jsonError) -{ - TJsonValue value; - TStringInput input(jsonError); - ReadJsonTree(&input, &value); - *this = TYtError(value); -} - -TSet<int> TYtError::GetAllErrorCodes() const -{ - TDeque<const TYtError*> queue = {this}; - TSet<int> result; - while (!queue.empty()) { - const auto* current = queue.front(); - queue.pop_front(); - result.insert(current->Code_); - for (const auto& error : current->InnerErrors_) { - queue.push_back(&error); - } - } - return result; -} - -bool TYtError::ContainsErrorCode(int code) const -{ - if (Code_ == code) { - return true; - } - for (const auto& error : InnerErrors_) { - if (error.ContainsErrorCode(code)) { - return true; - } - } - return false; -} - - -bool TYtError::ContainsText(const TStringBuf& text) const -{ - if (Message_.Contains(text)) { - return true; - } - for (const auto& error : InnerErrors_) { - if (error.ContainsText(text)) { - return true; - } - } - return false; -} - -bool TYtError::HasAttributes() const -{ - return !Attributes_.empty(); -} - -const TNode::TMapType& TYtError::GetAttributes() const -{ - return Attributes_; -} - -TString TYtError::GetYsonText() const -{ - TStringStream out; - ::NYson::TYsonWriter writer(&out, NYson::EYsonFormat::Text); - SerializeError(*this, &writer); - return std::move(out.Str()); -} - -TString TYtError::ShortDescription() const -{ - TStringStream out; - WriteErrorDescription(*this, &out); - return std::move(out.Str()); -} - -TString TYtError::FullDescription() const -{ - TStringStream s; - WriteErrorDescription(*this, &s); - s << "; full error: " << GetYsonText(); - return s.Str(); -} - -//////////////////////////////////////////////////////////////////////////////// - -TErrorResponse::TErrorResponse(int httpCode, const TString& requestId) - : HttpCode_(httpCode) - , RequestId_(requestId) -{ } - -bool TErrorResponse::IsOk() const -{ - return Error_.GetCode() == 0; -} - -void TErrorResponse::SetRawError(const TString& message) -{ - Error_ = TYtError(message); - Setup(); -} - -void TErrorResponse::SetError(TYtError error) -{ - Error_ = std::move(error); - Setup(); -} - -void TErrorResponse::ParseFromJsonError(const TString& jsonError) -{ - Error_.ParseFrom(jsonError); - Setup(); -} - -void TErrorResponse::SetIsFromTrailers(bool isFromTrailers) -{ - IsFromTrailers_ = isFromTrailers; -} - -int TErrorResponse::GetHttpCode() const -{ - return HttpCode_; -} - -bool TErrorResponse::IsFromTrailers() const -{ - return IsFromTrailers_; -} - -bool TErrorResponse::IsTransportError() const -{ - return HttpCode_ == 503; -} - -TString TErrorResponse::GetRequestId() const -{ - return RequestId_; -} - -const TYtError& TErrorResponse::GetError() const -{ - return Error_; -} - -bool TErrorResponse::IsResolveError() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NYTree::ResolveError); -} - -bool TErrorResponse::IsAccessDenied() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NSecurityClient::AuthorizationError); -} - -bool TErrorResponse::IsConcurrentTransactionLockConflict() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NCypressClient::ConcurrentTransactionLockConflict); -} - -bool TErrorResponse::IsRequestRateLimitExceeded() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NSecurityClient::RequestQueueSizeLimitExceeded); -} - -bool TErrorResponse::IsRequestQueueSizeLimitExceeded() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NRpc::RequestQueueSizeLimitExceeded); -} - -bool TErrorResponse::IsChunkUnavailable() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NChunkClient::ChunkUnavailable); -} - -bool TErrorResponse::IsRequestTimedOut() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::Timeout); -} - -bool TErrorResponse::IsNoSuchTransaction() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NTransactionClient::NoSuchTransaction); -} - -bool TErrorResponse::IsConcurrentOperationsLimitReached() const -{ - return Error_.ContainsErrorCode(NClusterErrorCodes::NScheduler::TooManyOperations); -} - -void TErrorResponse::Setup() -{ - TStringStream s; - *this << Error_.FullDescription(); -} - -//////////////////////////////////////////////////////////////////// - -TOperationFailedError::TOperationFailedError( - EState state, - TOperationId id, - TYtError ytError, - TVector<TFailedJobInfo> failedJobInfo) - : State_(state) - , OperationId_(id) - , Error_(std::move(ytError)) - , FailedJobInfo_(std::move(failedJobInfo)) -{ - *this << Error_.FullDescription(); - if (!FailedJobInfo_.empty()) { - *this << DumpJobInfoForException(OperationId_, FailedJobInfo_); - } -} - -TOperationFailedError::EState TOperationFailedError::GetState() const -{ - return State_; -} - -TOperationId TOperationFailedError::GetOperationId() const -{ - return OperationId_; -} - -const TYtError& TOperationFailedError::GetError() const -{ - return Error_; -} - -const TVector<TFailedJobInfo>& TOperationFailedError::GetFailedJobInfo() const -{ - return FailedJobInfo_; -} - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/errors.h b/yt/cpp/mapreduce/interface/errors.h deleted file mode 100644 index afad58ed723..00000000000 --- a/yt/cpp/mapreduce/interface/errors.h +++ /dev/null @@ -1,290 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/errors.h -/// -/// Errors and exceptions emitted by library. - -#include "fwd.h" -#include "common.h" - -#include <library/cpp/yson/node/node.h> - -#include <util/generic/bt_exception.h> -#include <util/generic/yexception.h> -#include <util/generic/string.h> -#include <util/generic/vector.h> - -namespace NJson { - class TJsonValue; -} // namespace NJson - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Error that is thrown when library detects invalid usage of API. -/// -/// For example trying to start operations on empty table list. -class TApiUsageError - : public TWithBackTrace<yexception> -{ }; - -/// -/// @brief Error that is thrown when request retries continues for too long. -/// -/// @see NYT::TRetryConfig -/// @see NYT::IRetryConfigProvider -class TRequestRetriesTimeout - : public yexception -{ }; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Error returned by YT cluster. -/// -/// An object of this class describe error that happened on YT server. -/// Internally each error is a tree. Each node of the tree contains: -/// - integer error code; -/// - text description of error; -/// - attributes describing error context. -/// -/// To get text description of an error one should use -/// @ref NYT::TYtError::ShortDescription or @ref NYT::TYtError::FullDescription -/// -/// To distinguish between error kinds @ref NYT::TYtError::ContainsErrorCode should be used. -/// -/// @see NYT::TErrorResponse -/// @see NYT::TOperationFailedError -class TYtError -{ -public: - /// Constructs error with NYT::NClusterErrorCodes::OK code and empty message. - TYtError(); - - /// Constructs error with NYT::NClusterErrorCodes::Generic code and given message. - explicit TYtError(const TString& message); - - /// Constructs error with given code and given message. - TYtError(int code, const TString& message); - - /// Construct error from json representation. - TYtError(const ::NJson::TJsonValue& value); - - /// Construct error from TNode representation. - TYtError(const TNode& value); - - /// - /// @brief Check if error or any of inner errors has given error code. - /// - /// Use this method to distinguish kind of error. - bool ContainsErrorCode(int code) const; - - /// - /// @brief Get short description of error. - /// - /// Short description contain text description of error and all inner errors. - /// It is human readable but misses some important information (error codes, error attributes). - /// - /// Usually it's better to use @ref NYT::TYtError::FullDescription to log errors. - TString ShortDescription() const; - - /// - /// @brief Get full description of error. - /// - /// Full description contains readable short description - /// followed by text yson representation of error that contains error codes and attributes. - TString FullDescription() const; - - /// - /// @brief Get error code of the topmost error. - /// - /// @warning Do not use this method to distinguish between error kinds - /// @ref NYT::TYtError::ContainsErrorCode should be used instead. - int GetCode() const; - - /// - /// @brief Get error text of the topmost error. - /// - /// @warning This method should not be used to log errors - /// since text description of inner errors is going to be lost. - /// @ref NYT::TYtError::FullDescription should be used instead. - const TString& GetMessage() const; - - /// - /// @brief Check if error or any of inner errors contains given text chunk. - /// - /// @warning @ref NYT::TYtError::ContainsErrorCode must be used instead of - /// this method when possible. If there is no suitable error code it's - /// better to ask yt@ to add one. This method should only be used as workaround. - bool ContainsText(const TStringBuf& text) const; - - /// @brief Get inner errors. - const TVector<TYtError>& InnerErrors() const; - - /// Parse error from json string. - void ParseFrom(const TString& jsonError); - - /// Collect error codes from entire error tree. - TSet<int> GetAllErrorCodes() const; - - /// Check if error has any attributes. - bool HasAttributes() const; - - /// Get error attributes. - const TNode::TMapType& GetAttributes() const; - - /// Get text yson representation of error - TString GetYsonText() const; - -private: - int Code_; - TString Message_; - TVector<TYtError> InnerErrors_; - TNode::TMapType Attributes_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Generic error response returned by server. -/// -/// TErrorResponse can be thrown from almost any client method when server responds with error. -/// -class TErrorResponse - : public yexception -{ -public: - TErrorResponse(int httpCode, const TString& requestId); - TErrorResponse(int httpCode, TYtError error); - - /// Get error object returned by server. - const TYtError& GetError() const; - - /// Get if (correlation-id) of request that was responded with error. - TString GetRequestId() const; - - /// Get HTTP code of response. - int GetHttpCode() const; - - /// Is error parsed from response trailers. - bool IsFromTrailers() const; - - /// Check if error was caused by transport problems inside YT cluster. - bool IsTransportError() const; - - /// Check if error was caused by failure to resolve cypress path. - bool IsResolveError() const; - - /// Check if error was caused by lack of permissions to execute request. - bool IsAccessDenied() const; - - /// Check if error was caused by failure to lock object because of another transaction is holding lock. - bool IsConcurrentTransactionLockConflict() const; - - /// Check if error was caused by request quota limit exceeding. - bool IsRequestRateLimitExceeded() const; - - // YT can't serve request because it is overloaded. - bool IsRequestQueueSizeLimitExceeded() const; - - /// Check if error was caused by failure to get chunk. Such errors are almost always temporary. - bool IsChunkUnavailable() const; - - /// Check if error was caused by internal YT timeout. - bool IsRequestTimedOut() const; - - /// Check if error was caused by trying to work with transaction that was finished or never existed. - bool IsNoSuchTransaction() const; - - // User reached their limit of concurrently running operations. - bool IsConcurrentOperationsLimitReached() const; - - /// @deprecated This method must not be used. - bool IsOk() const; - - void SetRawError(const TString& message); - void SetError(TYtError error); - void ParseFromJsonError(const TString& jsonError); - void SetIsFromTrailers(bool isFromTrailers); - -private: - void Setup(); - -private: - int HttpCode_; - TString RequestId_; - TYtError Error_; - bool IsFromTrailers_ = false; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// Info about failed jobs. -/// -/// @see NYT::TOperationFailedError -struct TFailedJobInfo -{ - /// Id of a job. - TJobId JobId; - - /// Error describing job failure. - TYtError Error; - - /// Stderr of job. - /// - /// @note YT doesn't store all job stderrs, check @ref NYT::IOperationClient::GetJobStderr - /// for list of limitations. - /// - /// @see NYT::IOperationClient::GetJobStderr - TString Stderr; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Error that is thrown when operation watched by library fails. -/// -/// This error is thrown from operation starting methods when they are started in sync mode (@ refNYT::TOperationOptions::Wait == true) -/// or from future returned by NYT::IOperation::Watch. -/// -/// @see NYT::IOperationClient -class TOperationFailedError - : public yexception -{ -public: - /// Final state of operation. - enum EState { - /// Operation was failed due to some error. - Failed, - /// Operation didn't experienced errors, but was aborted by user request or by YT. - Aborted, - }; - -public: - TOperationFailedError(EState state, TOperationId id, TYtError ytError, TVector<TFailedJobInfo> failedJobInfo); - - /// Get final state of operation. - EState GetState() const; - - /// Get operation id. - TOperationId GetOperationId() const; - - /// Return operation error. - const TYtError& GetError() const; - - /// Return info about failed jobs (if any). - const TVector<TFailedJobInfo>& GetFailedJobInfo() const; - -private: - EState State_; - TOperationId OperationId_; - TYtError Error_; - TVector<TFailedJobInfo> FailedJobInfo_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/finish_or_die.h b/yt/cpp/mapreduce/interface/finish_or_die.h deleted file mode 100644 index 9d7dcece025..00000000000 --- a/yt/cpp/mapreduce/interface/finish_or_die.h +++ /dev/null @@ -1,41 +0,0 @@ -#pragma once - -#include <util/system/yassert.h> - -#include <exception> - -/// @cond Doxygen_Suppress -namespace NYT::NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -void FinishOrDie(T* pThis, const char* className) noexcept -{ - auto fail = [&] (const char* what) { - Y_FAIL( - "\n\n" - "Destructor of %s caught exception during Finish: %s.\n" - "Some data is probably has not been written.\n" - "In order to handle such exceptions consider explicitly call Finish() method.\n", - className, - what); - }; - - try { - pThis->Finish(); - } catch (const std::exception& ex) { - if (!std::uncaught_exceptions()) { - fail(ex.what()); - } - } catch (...) { - if (!std::uncaught_exceptions()) { - fail("<unknown exception>"); - } - } -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail -/// @endcond diff --git a/yt/cpp/mapreduce/interface/fluent.h b/yt/cpp/mapreduce/interface/fluent.h deleted file mode 100644 index 8ca6e86336b..00000000000 --- a/yt/cpp/mapreduce/interface/fluent.h +++ /dev/null @@ -1,678 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/fluent.h -/// -/// Adapters for working with @ref NYson::IYsonConsumer in a structured way, with compile-time syntax checks. -/// -/// The following documentation is copied verbatim from `yt/core/ytree/fluent.h`. -/// -/// WHAT IS THIS -/// -/// Fluent adapters encapsulate invocation of IYsonConsumer methods in a -/// convenient structured manner. Key advantage of fluent-like code is that -/// attempt of building syntactically incorrect YSON structure will result -/// in a compile-time error. -/// -/// Each fluent object is associated with a context that defines possible YSON -/// tokens that may appear next. For example, TFluentMap is a fluent object -/// that corresponds to a location within YSON map right before a key-value -/// pair or the end of the map. -/// -/// More precisely, each object that may be obtained by a sequence of fluent -/// method calls has the full history of its enclosing YSON composite types in -/// its single template argument hereinafter referred to as TParent. This allows -/// us not to forget the original context after opening and closing the embedded -/// composite structure. -/// -/// It is possible to invoke a separate YSON building procedure by calling -/// one of convenience Do* methods. There are two possibilities here: it is -/// possible to delegate invocation context either as a fluent object (like -/// TFluentMap, TFluentList, TFluentAttributes or TFluentAny) or as a raw -/// IYsonConsumer*. The latter is discouraged since it is impossible to check -/// if a given side-built YSON structure fits current fluent context. -/// For example it is possible to call Do() method inside YSON map passing -/// consumer to a procedure that will treat context like it is in a list. -/// Passing typed fluent builder saves you from such a misbehaviour. -/// -/// TFluentXxx corresponds to an internal class of TXxx -/// without any history hidden in template argument. It allows you to -/// write procedures of form: -/// -/// void BuildSomeAttributesInYson(TFluentMap fluent) { ... } -/// -/// without thinking about the exact way how this procedure is nested in other -/// procedures. -/// -/// An important notation: we will refer to a function whose first argument -/// is TFluentXxx as TFuncXxx. -/// -/// -/// BRIEF LIST OF AVAILABLE METHODS -/// -/// Only the most popular methods are covered here. Refer to the code for the -/// rest of them. -/// -/// TAny: -/// * Value(T value) -> TParent, serialize `value` using underlying consumer. -/// T should be such that free function Serialize(NYson::IYsonConsumer*, const T&) is -/// defined; -/// * BeginMap() -> TFluentMap, open map; -/// * BeginList() -> TFluentList, open list; -/// * BeginAttributes() -> TFluentAttributes, open attributes; -/// -/// * Do(TFuncAny func) -> TAny, delegate invocation to a separate procedure. -/// * DoIf(bool condition, TFuncAny func) -> TAny, same as Do() but invoke -/// `func` only if `condition` is true; -/// * DoFor(TCollection collection, TFuncAny func) -> TAny, same as Do() -/// but iterate over `collection` and pass each of its elements as a second -/// argument to `func`. Instead of passing a collection you may it is possible -/// to pass two iterators as an argument; -/// -/// * DoMap(TFuncMap func) -> TAny, open a map, delegate invocation to a separate -/// procedure and close map; -/// * DoMapFor(TCollection collection, TFuncMap func) -> TAny, open a map, iterate -/// over `collection` and pass each of its elements as a second argument to `func` -/// and close map; -/// * DoList(TFuncList func) -> TAny, same as DoMap(); -/// * DoListFor(TCollection collection, TFuncList func) -> TAny; same as DoMapFor(). -/// -/// -/// TFluentMap: -/// * Item(TStringBuf key) -> TAny, open an element keyed with `key`; -/// * EndMap() -> TParent, close map; -/// * Do(TFuncMap func) -> TFluentMap, same as Do() for TAny; -/// * DoIf(bool condition, TFuncMap func) -> TFluentMap, same as DoIf() for TAny; -/// * DoFor(TCollection collection, TFuncMap func) -> TFluentMap, same as DoFor() for TAny. -/// -/// -/// TFluentList: -/// * Item() -> TAny, open an new list element; -/// * EndList() -> TParent, close list; -/// * Do(TFuncList func) -> TFluentList, same as Do() for TAny; -/// * DoIf(bool condition, TFuncList func) -> TFluentList, same as DoIf() for TAny; -/// * DoFor(TCollection collection, TListMap func) -> TFluentList, same as DoFor() for TAny. -/// -/// -/// TFluentAttributes: -/// * Item(TStringBuf key) -> TAny, open an element keyed with `key`. -/// * EndAttributes() -> TParentWithoutAttributes, close attributes. Note that -/// this method leads to a context that is forces not to have attributes, -/// preventing us from putting attributes twice before an object. -/// * Do(TFuncAttributes func) -> TFluentAttributes, same as Do() for TAny; -/// * DoIf(bool condition, TFuncAttributes func) -> TFluentAttributes, same as DoIf() -/// for TAny; -/// * DoFor(TCollection collection, TListAttributes func) -> TFluentAttributes, same as DoFor() -/// for TAny. -/// - - -#include "common.h" -#include "serialize.h" - -#include <library/cpp/yson/node/serialize.h> -#include <library/cpp/yson/node/node_builder.h> - -#include <library/cpp/yson/consumer.h> -#include <library/cpp/yson/writer.h> - -#include <util/generic/noncopyable.h> -#include <util/generic/ptr.h> -#include <util/stream/str.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -struct TFluentYsonUnwrapper -{ - using TUnwrapped = T; - - static TUnwrapped Unwrap(T t) - { - return std::move(t); - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -struct TFluentYsonVoid -{ }; - -template <> -struct TFluentYsonUnwrapper<TFluentYsonVoid> -{ - using TUnwrapped = void; - - static TUnwrapped Unwrap(TFluentYsonVoid) - { } -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// This class is actually a namespace for specific fluent adapter classes. -class TFluentYsonBuilder - : private TNonCopyable -{ -private: - template <class T> - static void WriteValue(NYT::NYson::IYsonConsumer* consumer, const T& value) - { - Serialize(value, consumer); - } - -public: - class TFluentAny; - template <class TParent> class TAny; - template <class TParent> class TToAttributes; - template <class TParent> class TAttributes; - template <class TParent> class TListType; - template <class TParent> class TMapType; - - /// Base class for all fluent adapters. - template <class TParent> - class TFluentBase - { - public: - /// Implicit conversion to yson consumer - operator NYT::NYson::IYsonConsumer* () const - { - return Consumer; - } - - protected: - /// @cond Doxygen_Suppress - NYT::NYson::IYsonConsumer* Consumer; - TParent Parent; - - TFluentBase(NYT::NYson::IYsonConsumer* consumer, TParent parent) - : Consumer(consumer) - , Parent(std::move(parent)) - { } - - using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped; - - TUnwrappedParent GetUnwrappedParent() - { - return TFluentYsonUnwrapper<TParent>::Unwrap(std::move(Parent)); - } - /// @endcond Doxygen_Suppress - }; - - /// Base class for fluent adapters for fragment of list, map or attributes. - template <template <class TParent> class TThis, class TParent> - class TFluentFragmentBase - : public TFluentBase<TParent> - { - public: - using TDeepThis = TThis<TParent>; - using TShallowThis = TThis<TFluentYsonVoid>; - using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped; - - explicit TFluentFragmentBase(NYT::NYson::IYsonConsumer* consumer, TParent parent = TParent()) - : TFluentBase<TParent>(consumer, std::move(parent)) - { } - - /// Delegate invocation to a separate procedure. - template <class TFunc> - TDeepThis& Do(const TFunc& func) - { - func(TShallowThis(this->Consumer)); - return *static_cast<TDeepThis*>(this); - } - - /// Conditionally delegate invocation to a separate procedure. - template <class TFunc> - TDeepThis& DoIf(bool condition, const TFunc& func) - { - if (condition) { - func(TShallowThis(this->Consumer)); - } - return *static_cast<TDeepThis*>(this); - } - - /// Calls `func(*this, element)` for each `element` in range `[begin, end)`. - template <class TFunc, class TIterator> - TDeepThis& DoFor(const TIterator& begin, const TIterator& end, const TFunc& func) - { - for (auto current = begin; current != end; ++current) { - func(TShallowThis(this->Consumer), current); - } - return *static_cast<TDeepThis*>(this); - } - - /// Calls `func(*this, element)` for each `element` in `collection`. - template <class TFunc, class TCollection> - TDeepThis& DoFor(const TCollection& collection, const TFunc& func) - { - for (const auto& item : collection) { - func(TShallowThis(this->Consumer), item); - } - return *static_cast<TDeepThis*>(this); - } - - }; - - /// Fluent adapter of a value without attributes. - template <class TParent> - class TAnyWithoutAttributes - : public TFluentBase<TParent> - { - public: - using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped; - - TAnyWithoutAttributes(NYT::NYson::IYsonConsumer* consumer, TParent parent) - : TFluentBase<TParent>(consumer, std::move(parent)) - { } - - /// Pass `value` to underlying consumer. - template <class T> - TUnwrappedParent Value(const T& value) - { - WriteValue(this->Consumer, value); - return this->GetUnwrappedParent(); - } - - /// Call `OnEntity()` of underlying consumer. - TUnwrappedParent Entity() - { - this->Consumer->OnEntity(); - return this->GetUnwrappedParent(); - } - - /// Serialize `collection` to underlying consumer as a list. - template <class TCollection> - TUnwrappedParent List(const TCollection& collection) - { - this->Consumer->OnBeginList(); - for (const auto& item : collection) { - this->Consumer->OnListItem(); - WriteValue(this->Consumer, item); - } - this->Consumer->OnEndList(); - return this->GetUnwrappedParent(); - } - - /// Serialize maximum `maxSize` elements of `collection` to underlying consumer as a list. - template <class TCollection> - TUnwrappedParent ListLimited(const TCollection& collection, size_t maxSize) - { - this->Consumer->OnBeginAttributes(); - this->Consumer->OnKeyedItem("count"); - this->Consumer->OnInt64Scalar(collection.size()); - this->Consumer->OnEndAttributes(); - this->Consumer->OnBeginList(); - size_t printedSize = 0; - for (const auto& item : collection) { - if (printedSize >= maxSize) - break; - this->Consumer->OnListItem(); - WriteValue(this->Consumer, item); - ++printedSize; - } - this->Consumer->OnEndList(); - return this->GetUnwrappedParent(); - } - - /// Open a list. - TListType<TParent> BeginList() - { - this->Consumer->OnBeginList(); - return TListType<TParent>(this->Consumer, this->Parent); - } - - /// Open a list, delegate invocation to `func`, then close the list. - template <class TFunc> - TUnwrappedParent DoList(const TFunc& func) - { - this->Consumer->OnBeginList(); - func(TListType<TFluentYsonVoid>(this->Consumer)); - this->Consumer->OnEndList(); - return this->GetUnwrappedParent(); - } - - /// Open a list, call `func(*this, element)` for each `element` of range, then close the list. - template <class TFunc, class TIterator> - TUnwrappedParent DoListFor(const TIterator& begin, const TIterator& end, const TFunc& func) - { - this->Consumer->OnBeginList(); - for (auto current = begin; current != end; ++current) { - func(TListType<TFluentYsonVoid>(this->Consumer), current); - } - this->Consumer->OnEndList(); - return this->GetUnwrappedParent(); - } - - /// Open a list, call `func(*this, element)` for each `element` of `collection`, then close the list. - template <class TFunc, class TCollection> - TUnwrappedParent DoListFor(const TCollection& collection, const TFunc& func) - { - this->Consumer->OnBeginList(); - for (const auto& item : collection) { - func(TListType<TFluentYsonVoid>(this->Consumer), item); - } - this->Consumer->OnEndList(); - return this->GetUnwrappedParent(); - } - - /// Open a map. - TMapType<TParent> BeginMap() - { - this->Consumer->OnBeginMap(); - return TMapType<TParent>(this->Consumer, this->Parent); - } - - /// Open a map, delegate invocation to `func`, then close the map. - template <class TFunc> - TUnwrappedParent DoMap(const TFunc& func) - { - this->Consumer->OnBeginMap(); - func(TMapType<TFluentYsonVoid>(this->Consumer)); - this->Consumer->OnEndMap(); - return this->GetUnwrappedParent(); - } - - /// Open a map, call `func(*this, element)` for each `element` of range, then close the map. - template <class TFunc, class TIterator> - TUnwrappedParent DoMapFor(const TIterator& begin, const TIterator& end, const TFunc& func) - { - this->Consumer->OnBeginMap(); - for (auto current = begin; current != end; ++current) { - func(TMapType<TFluentYsonVoid>(this->Consumer), current); - } - this->Consumer->OnEndMap(); - return this->GetUnwrappedParent(); - } - - /// Open a map, call `func(*this, element)` for each `element` of `collection`, then close the map. - template <class TFunc, class TCollection> - TUnwrappedParent DoMapFor(const TCollection& collection, const TFunc& func) - { - this->Consumer->OnBeginMap(); - for (const auto& item : collection) { - func(TMapType<TFluentYsonVoid>(this->Consumer), item); - } - this->Consumer->OnEndMap(); - return this->GetUnwrappedParent(); - } - }; - - /// Fluent adapter of any value. - template <class TParent> - class TAny - : public TAnyWithoutAttributes<TParent> - { - public: - using TBase = TAnyWithoutAttributes<TParent>; - - explicit TAny(NYT::NYson::IYsonConsumer* consumer, TParent parent) - : TBase(consumer, std::move(parent)) - { } - - /// Open attributes. - TAttributes<TBase> BeginAttributes() - { - this->Consumer->OnBeginAttributes(); - return TAttributes<TBase>( - this->Consumer, - TBase(this->Consumer, this->Parent)); - } - }; - - /// Fluent adapter of attributes fragment (the inside part of attributes). - template <class TParent = TFluentYsonVoid> - class TAttributes - : public TFluentFragmentBase<TAttributes, TParent> - { - public: - using TThis = TAttributes<TParent>; - using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped; - - explicit TAttributes(NYT::NYson::IYsonConsumer* consumer, TParent parent = TParent()) - : TFluentFragmentBase<TFluentYsonBuilder::TAttributes, TParent>(consumer, std::move(parent)) - { } - - /// Pass attribute key to underlying consumer. - TAny<TThis> Item(const TStringBuf& key) - { - this->Consumer->OnKeyedItem(key); - return TAny<TThis>(this->Consumer, *this); - } - - /// Pass attribute key to underlying consumer. - template <size_t Size> - TAny<TThis> Item(const char (&key)[Size]) - { - return Item(TStringBuf(key, Size - 1)); - } - - //TODO: from TNode - - /// Close the attributes. - TUnwrappedParent EndAttributes() - { - this->Consumer->OnEndAttributes(); - return this->GetUnwrappedParent(); - } - }; - - /// Fluent adapter of list fragment (the inside part of a list). - template <class TParent = TFluentYsonVoid> - class TListType - : public TFluentFragmentBase<TListType, TParent> - { - public: - using TThis = TListType<TParent>; - using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped; - - explicit TListType(NYT::NYson::IYsonConsumer* consumer, TParent parent = TParent()) - : TFluentFragmentBase<TFluentYsonBuilder::TListType, TParent>(consumer, std::move(parent)) - { } - - /// Call `OnListItem()` of underlying consumer. - TAny<TThis> Item() - { - this->Consumer->OnListItem(); - return TAny<TThis>(this->Consumer, *this); - } - - // TODO: from TNode - - /// Close the list. - TUnwrappedParent EndList() - { - this->Consumer->OnEndList(); - return this->GetUnwrappedParent(); - } - }; - - /// Fluent adapter of map fragment (the inside part of a map). - template <class TParent = TFluentYsonVoid> - class TMapType - : public TFluentFragmentBase<TMapType, TParent> - { - public: - using TThis = TMapType<TParent>; - using TUnwrappedParent = typename TFluentYsonUnwrapper<TParent>::TUnwrapped; - - explicit TMapType(NYT::NYson::IYsonConsumer* consumer, TParent parent = TParent()) - : TFluentFragmentBase<TFluentYsonBuilder::TMapType, TParent>(consumer, std::move(parent)) - { } - - /// Pass map key to underlying consumer. - template <size_t Size> - TAny<TThis> Item(const char (&key)[Size]) - { - return Item(TStringBuf(key, Size - 1)); - } - - /// Pass map key to underlying consumer. - TAny<TThis> Item(const TStringBuf& key) - { - this->Consumer->OnKeyedItem(key); - return TAny<TThis>(this->Consumer, *this); - } - - // TODO: from TNode - - /// Close the map. - TUnwrappedParent EndMap() - { - this->Consumer->OnEndMap(); - return this->GetUnwrappedParent(); - } - }; - -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// Builder representing any value. -using TFluentAny = TFluentYsonBuilder::TAny<TFluentYsonVoid>; - -/// Builder representing the inside of a list (list fragment). -using TFluentList = TFluentYsonBuilder::TListType<TFluentYsonVoid>; - -/// Builder representing the inside of a map (map fragment). -using TFluentMap = TFluentYsonBuilder::TMapType<TFluentYsonVoid>; - -/// Builder representing the inside of attributes. -using TFluentAttributes = TFluentYsonBuilder::TAttributes<TFluentYsonVoid>; - -//////////////////////////////////////////////////////////////////////////////// - -/// Create a fluent adapter to invoke methods of `consumer`. -static inline TFluentAny BuildYsonFluently(NYT::NYson::IYsonConsumer* consumer) -{ - return TFluentAny(consumer, TFluentYsonVoid()); -} - -/// Create a fluent adapter to invoke methods of `consumer` describing the contents of a list. -static inline TFluentList BuildYsonListFluently(NYT::NYson::IYsonConsumer* consumer) -{ - return TFluentList(consumer); -} - -/// Create a fluent adapter to invoke methods of `consumer` describing the contents of a map. -static inline TFluentMap BuildYsonMapFluently(NYT::NYson::IYsonConsumer* consumer) -{ - return TFluentMap(consumer); -} - -//////////////////////////////////////////////////////////////////////////////// - -class TFluentYsonWriterState - : public TThrRefBase -{ -public: - using TValue = TString; - - explicit TFluentYsonWriterState(::NYson::EYsonFormat format) - : Writer(&Output, format) - { } - - TString GetValue() - { - return Output.Str(); - } - - NYT::NYson::IYsonConsumer* GetConsumer() - { - return &Writer; - } - -private: - TStringStream Output; - ::NYson::TYsonWriter Writer; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TFluentYsonBuilderState - : public TThrRefBase -{ -public: - using TValue = TNode; - - explicit TFluentYsonBuilderState() - : Builder(&Node) - { } - - TNode GetValue() - { - return std::move(Node); - } - - NYT::NYson::IYsonConsumer* GetConsumer() - { - return &Builder; - } - -private: - TNode Node; - TNodeBuilder Builder; -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <class TState> -class TFluentYsonHolder -{ -public: - explicit TFluentYsonHolder(::TIntrusivePtr<TState> state) - : State(state) - { } - - ::TIntrusivePtr<TState> GetState() const - { - return State; - } - -private: - ::TIntrusivePtr<TState> State; -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <class TState> -struct TFluentYsonUnwrapper< TFluentYsonHolder<TState> > -{ - using TUnwrapped = typename TState::TValue; - - static TUnwrapped Unwrap(const TFluentYsonHolder<TState>& holder) - { - return std::move(holder.GetState()->GetValue()); - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <class TState> -TFluentYsonBuilder::TAny<TFluentYsonHolder<TState>> -BuildYsonFluentlyWithState(::TIntrusivePtr<TState> state) -{ - return TFluentYsonBuilder::TAny<TFluentYsonHolder<TState>>( - state->GetConsumer(), - TFluentYsonHolder<TState>(state)); -} - -/// Create a fluent adapter returning a `TString` with corresponding YSON when construction is finished. -inline TFluentYsonBuilder::TAny<TFluentYsonHolder<TFluentYsonWriterState>> -BuildYsonStringFluently(::NYson::EYsonFormat format = ::NYson::EYsonFormat::Text) -{ - ::TIntrusivePtr<TFluentYsonWriterState> state(new TFluentYsonWriterState(format)); - return BuildYsonFluentlyWithState(state); -} - -/// Create a fluent adapter returning a @ref NYT::TNode when construction is finished. -inline TFluentYsonBuilder::TAny<TFluentYsonHolder<TFluentYsonBuilderState>> -BuildYsonNodeFluently() -{ - ::TIntrusivePtr<TFluentYsonBuilderState> state(new TFluentYsonBuilderState); - return BuildYsonFluentlyWithState(state); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/format.cpp b/yt/cpp/mapreduce/interface/format.cpp deleted file mode 100644 index f8318310a40..00000000000 --- a/yt/cpp/mapreduce/interface/format.cpp +++ /dev/null @@ -1,135 +0,0 @@ -#include "format.h" -#include "protobuf_format.h" - -#include "errors.h" - -#include <google/protobuf/descriptor.h> -#include <google/protobuf/messagext.h> - -namespace NYT { - -TTableSchema CreateTableSchema( - const ::google::protobuf::Descriptor& messageDescriptor, - bool keepFieldsWithoutExtension) -{ - return NDetail::CreateTableSchemaImpl(messageDescriptor, keepFieldsWithoutExtension); -} - -//////////////////////////////////////////////////////////////////////////////// - -TFormat::TFormat(const TNode& config) - : Config(config) -{ } - - -TFormat TFormat::Protobuf( - const TVector<const ::google::protobuf::Descriptor*>& descriptors, - bool withDescriptors) -{ - if (withDescriptors) { - return TFormat(NDetail::MakeProtoFormatConfigWithDescriptors(descriptors)); - } else { - return TFormat(NDetail::MakeProtoFormatConfigWithTables(descriptors)); - } -} - -TFormat TFormat::YsonText() -{ - TNode config("yson"); - config.Attributes()("format", "text"); - return TFormat(config); -} - -TFormat TFormat::YsonBinary() -{ - TNode config("yson"); - config.Attributes()("format", "binary"); - return TFormat(config); -} - -TFormat TFormat::YaMRLenval() -{ - TNode config("yamr"); - config.Attributes()("lenval", true)("has_subkey", true); - return TFormat(config); -} - -TFormat TFormat::Json() -{ - return TFormat(TNode("json")); -} - -bool TFormat::IsTextYson() const -{ - if (!Config.IsString() || Config.AsString() != "yson") { - return false; - } - if (!Config.HasAttributes()) { - return false; - } - const auto& attributes = Config.GetAttributes(); - if (!attributes.HasKey("format") || attributes["format"] != TNode("text")) { - return false; - } - return true; -} - -bool TFormat::IsProtobuf() const -{ - return Config.IsString() && Config.AsString() == "protobuf"; -} - -bool TFormat::IsYamredDsv() const -{ - return Config.IsString() && Config.AsString() == "yamred_dsv"; -} - -static TString FormatName(const TFormat& format) -{ - if (!format.Config.IsString()) { - Y_VERIFY(format.Config.IsUndefined()); - return "<undefined>"; - } - return format.Config.AsString(); -} - -TYamredDsvAttributes TFormat::GetYamredDsvAttributes() const -{ - if (!IsYamredDsv()) { - ythrow TApiUsageError() << "Cannot get yamred_dsv attributes for " << FormatName(*this) << " format"; - } - TYamredDsvAttributes attributes; - - const auto& nodeAttributes = Config.GetAttributes(); - { - const auto& keyColumns = nodeAttributes["key_column_names"]; - if (!keyColumns.IsList()) { - ythrow yexception() << "Ill-formed format: key_column_names is of non-list type: " << keyColumns.GetType(); - } - for (auto& column : keyColumns.AsList()) { - if (!column.IsString()) { - ythrow yexception() << "Ill-formed format: key_column_names: " << column.GetType(); - } - attributes.KeyColumnNames.push_back(column.AsString()); - } - } - - if (nodeAttributes.HasKey("subkey_column_names")) { - const auto& subkeyColumns = nodeAttributes["subkey_column_names"]; - if (!subkeyColumns.IsList()) { - ythrow yexception() << "Ill-formed format: subkey_column_names is not a list: " << subkeyColumns.GetType(); - } - for (const auto& column : subkeyColumns.AsList()) { - if (!column.IsString()) { - ythrow yexception() << "Ill-formed format: non-string inside subkey_key_column_names: " << column.GetType(); - } - attributes.SubkeyColumnNames.push_back(column.AsString()); - } - } - - return attributes; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/format.h b/yt/cpp/mapreduce/interface/format.h deleted file mode 100644 index e2975764642..00000000000 --- a/yt/cpp/mapreduce/interface/format.h +++ /dev/null @@ -1,122 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/format.h -/// -/// Header containing class to work with raw [YT formats](https://yt.yandex-team.ru/docs/description/storage/formats.html). - -#include "node.h" - -#include <google/protobuf/descriptor.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// @deprecated -struct TYamredDsvAttributes -{ - /// Names of key columns. - TVector<TString> KeyColumnNames; - - /// Names of subkey columns. - TVector<TString> SubkeyColumnNames; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Class representing YT data format. -/// -/// Normally the user does not need to use it. -/// However, the class is handy for "raw" operations and table reading and writing, -/// e.g. @ref NYT::IOperationClient::RawMap and other raw operations, -/// @ref NYT::IIOClient::CreateRawReader and @ref NYT::IIOClient::CreateRawWriter. -/// Anyway, the static factory methods should be preferred to the constructor. -/// -/// @see [YT doc](https://yt.yandex-team.ru/docs/description/storage/formats.html). -struct TFormat -{ -public: - /// Format representation understandable by YT. - TNode Config; - -public: - /// @brief Construct format from given YT format representation. - /// - /// @note Prefer using static factory methods (e.g. @ref NYT::TFormat::YsonBinary, @ref NYT::TFormat::YsonText, @ref NYT::TFormat::Protobuf). - explicit TFormat(const TNode& config = TNode()); - - /// @brief Create text YSON format. - /// - /// @see [the doc](https://yt.yandex-team.ru/docs/description/storage/formats.html#YSON) - static TFormat YsonText(); - - /// @brief Create binary YSON format. - /// - /// @see [the doc](https://yt.yandex-team.ru/docs/description/storage/formats.html#YSON) - static TFormat YsonBinary(); - - /// @brief Create YaMR format. - /// - /// @deprecated - static TFormat YaMRLenval(); - - /// @brief Create protobuf format from protobuf message descriptors. - /// - /// @see [the doc](https://yt.yandex-team.ru/docs/api/c++/protobuf.html). - static TFormat Protobuf( - const TVector<const ::google::protobuf::Descriptor*>& descriptors, - bool withDescriptors = false); - - /// @brief Create JSON format. - /// - /// @see [the doc](https://yt.yandex-team.ru/docs/description/storage/formats.html#JSON) - static TFormat Json(); - - /// @brief Create protobuf format for the message specified in template parameter. - /// - /// `T` must be inherited from `Message`. - /// - /// @see [the doc](https://yt.yandex-team.ru/docs/api/c++/protobuf.html). - template<typename T> - static inline TFormat Protobuf(bool withDescriptors = false); - - /// @brief Is the format text YSON? - /// - /// @see [the doc](https://yt.yandex-team.ru/docs/description/storage/formats.html#YSON) - bool IsTextYson() const; - - /// @brief Is the format protobuf? - /// - /// @see [the doc](https://yt.yandex-team.ru/docs/api/c++/protobuf.html) - bool IsProtobuf() const; - - /// @brief Is the format YaMR? - /// - /// @deprecated - bool IsYamredDsv() const; - - /// @brief For YAMR format returns its attributes in structured way. - /// - /// @deprecated - TYamredDsvAttributes GetYamredDsvAttributes() const; -}; - -//////////////////////////////////////////////////////////////////////////////// - -template<typename T> -TFormat TFormat::Protobuf(bool withDescriptors) { - return TFormat::Protobuf({T::descriptor()}, withDescriptors); -} - -/// @brief Create table schema from protobuf message descriptor. -/// -/// @param messageDescriptor Message descriptor -/// @param keepFieldsWithoutExtension Add to schema fields without "column_name" or "key_column_name" extensions. -TTableSchema CreateTableSchema( - const ::google::protobuf::Descriptor& messageDescriptor, - bool keepFieldsWithoutExtension); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/format_ut.cpp b/yt/cpp/mapreduce/interface/format_ut.cpp deleted file mode 100644 index 069c29087df..00000000000 --- a/yt/cpp/mapreduce/interface/format_ut.cpp +++ /dev/null @@ -1,235 +0,0 @@ -#include "common.h" -#include "errors.h" -#include "format.h" -#include "common_ut.h" - -#include <yt/cpp/mapreduce/interface/proto3_ut.pb.h> -#include <yt/cpp/mapreduce/interface/protobuf_table_schema_ut.pb.h> - -#include <library/cpp/testing/unittest/registar.h> - -using namespace NYT; - -static TNode GetColumns(const TFormat& format, int tableIndex = 0) -{ - return format.Config.GetAttributes()["tables"][tableIndex]["columns"]; -} - -Y_UNIT_TEST_SUITE(ProtobufFormat) -{ - Y_UNIT_TEST(TIntegral) - { - const auto format = TFormat::Protobuf<NUnitTesting::TIntegral>(); - auto columns = GetColumns(format); - - struct TColumn - { - TString Name; - TString ProtoType; - int FieldNumber; - }; - - auto expected = TVector<TColumn>{ - {"DoubleField", "double", 1}, - {"FloatField", "float", 2}, - {"Int32Field", "int32", 3}, - {"Int64Field", "int64", 4}, - {"Uint32Field", "uint32", 5}, - {"Uint64Field", "uint64", 6}, - {"Sint32Field", "sint32", 7}, - {"Sint64Field", "sint64", 8}, - {"Fixed32Field", "fixed32", 9}, - {"Fixed64Field", "fixed64", 10}, - {"Sfixed32Field", "sfixed32", 11}, - {"Sfixed64Field", "sfixed64", 12}, - {"BoolField", "bool", 13}, - {"EnumField", "enum_string", 14}, - }; - - UNIT_ASSERT_VALUES_EQUAL(columns.Size(), expected.size()); - for (int i = 0; i < static_cast<int>(columns.Size()); ++i) { - UNIT_ASSERT_VALUES_EQUAL(columns[i]["name"], expected[i].Name); - UNIT_ASSERT_VALUES_EQUAL(columns[i]["proto_type"], expected[i].ProtoType); - UNIT_ASSERT_VALUES_EQUAL(columns[i]["field_number"], expected[i].FieldNumber); - } - } - - Y_UNIT_TEST(TRowFieldSerializationOption) - { - const auto format = TFormat::Protobuf<NUnitTesting::TRowFieldSerializationOption>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns[0]["name"], "UrlRow_1"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["field_number"], 1); - const auto& fields = columns[0]["fields"]; - UNIT_ASSERT_VALUES_EQUAL(fields[0]["name"], "Host"); - UNIT_ASSERT_VALUES_EQUAL(fields[0]["proto_type"], "string"); - UNIT_ASSERT_VALUES_EQUAL(fields[0]["field_number"], 1); - - UNIT_ASSERT_VALUES_EQUAL(fields[1]["name"], "Path"); - UNIT_ASSERT_VALUES_EQUAL(fields[1]["proto_type"], "string"); - UNIT_ASSERT_VALUES_EQUAL(fields[1]["field_number"], 2); - - UNIT_ASSERT_VALUES_EQUAL(fields[2]["name"], "HttpCode"); - UNIT_ASSERT_VALUES_EQUAL(fields[2]["proto_type"], "sint32"); - UNIT_ASSERT_VALUES_EQUAL(fields[2]["field_number"], 3); - - UNIT_ASSERT_VALUES_EQUAL(columns[1]["name"], "UrlRow_2"); - UNIT_ASSERT_VALUES_EQUAL(columns[1]["proto_type"], "message"); - UNIT_ASSERT_VALUES_EQUAL(columns[1]["field_number"], 2); - } - - Y_UNIT_TEST(Packed) - { - const auto format = TFormat::Protobuf<NUnitTesting::TPacked>(); - auto column = GetColumns(format)[0]; - - UNIT_ASSERT_VALUES_EQUAL(column["name"], "PackedListInt64"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["field_number"], 1); - UNIT_ASSERT_VALUES_EQUAL(column["packed"], true); - UNIT_ASSERT_VALUES_EQUAL(column["repeated"], true); - } - - Y_UNIT_TEST(Cyclic) - { - UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic::TA>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic::TB>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic::TC>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(TFormat::Protobuf<NUnitTesting::TCyclic::TD>(), TApiUsageError); - - const auto format = TFormat::Protobuf<NUnitTesting::TCyclic::TE>(); - auto column = GetColumns(format)[0]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "d"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "message"); - UNIT_ASSERT_VALUES_EQUAL(column["field_number"], 1); - } - - Y_UNIT_TEST(Map) - { - const auto format = TFormat::Protobuf<NUnitTesting::TWithMap>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns.Size(), 5); - { - const auto& column = columns[0]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapDefault"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "message"); - } - { - const auto& column = columns[1]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapListOfStructsLegacy"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "message"); - } - { - const auto& column = columns[2]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapListOfStructs"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message"); - } - { - const auto& column = columns[3]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapOptionalDict"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message"); - } - { - const auto& column = columns[4]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapDict"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message"); - } - } - - Y_UNIT_TEST(Oneof) - { - const auto format = TFormat::Protobuf<NUnitTesting::TWithOneof>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns.Size(), 4); - auto check = [] (const TNode& column, TStringBuf name, TStringBuf oneof2Name) { - UNIT_ASSERT_VALUES_EQUAL(column["name"], name); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 5); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "field"); - - const auto& oneof2 = column["fields"][1]; - UNIT_ASSERT_VALUES_EQUAL(oneof2["name"], oneof2Name); - UNIT_ASSERT_VALUES_EQUAL(oneof2["proto_type"], "oneof"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][0]["name"], "y2"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][1]["name"], "z2"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][1]["proto_type"], "structured_message"); - const auto& embeddedOneof = oneof2["fields"][1]["fields"][0]; - UNIT_ASSERT_VALUES_EQUAL(embeddedOneof["name"], "Oneof"); - UNIT_ASSERT_VALUES_EQUAL(embeddedOneof["fields"][0]["name"], "x"); - UNIT_ASSERT_VALUES_EQUAL(embeddedOneof["fields"][1]["name"], "y"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][2]["name"], "x2"); - - UNIT_ASSERT_VALUES_EQUAL(column["fields"][2]["name"], "x1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][3]["name"], "y1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][4]["name"], "z1"); - }; - - check(columns[0], "DefaultSeparateFields", "variant_field_name"); - check(columns[1], "NoDefault", "Oneof2"); - - { - const auto& column = columns[2]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "SerializationProtobuf"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 3); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "x1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["name"], "y1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][2]["name"], "z1"); - } - { - const auto& column = columns[3]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "TopLevelOneof"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "oneof"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 1); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "MemberOfTopLevelOneof"); - } - } -} - -Y_UNIT_TEST_SUITE(Proto3) -{ - Y_UNIT_TEST(TWithOptional) - { - const auto format = TFormat::Protobuf<NTestingProto3::TWithOptional>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns[0]["name"], "x"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["field_number"], 1); - } - - Y_UNIT_TEST(TWithOptionalMessage) - { - const auto format = TFormat::Protobuf<NTestingProto3::TWithOptionalMessage>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns[0]["name"], "x"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["field_number"], 1); - - UNIT_ASSERT_VALUES_EQUAL(columns[0]["fields"].Size(), 1); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["fields"][0]["name"], "x"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["fields"][0]["field_number"], 1); - } -} diff --git a/yt/cpp/mapreduce/interface/fwd.h b/yt/cpp/mapreduce/interface/fwd.h deleted file mode 100644 index 0434c03d8b9..00000000000 --- a/yt/cpp/mapreduce/interface/fwd.h +++ /dev/null @@ -1,397 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/fwd.h -/// -/// Header containing mostly forward declarations of types. - - -#include <util/generic/fwd.h> -#include <util/system/types.h> - -#include <variant> - -/// @cond Doxygen_Suppress -namespace google::protobuf { - class Message; -} - -namespace NYT { - - //////////////////////////////////////////////////////////////////////////////// - // batch_request.h - //////////////////////////////////////////////////////////////////////////////// - - class IBatchRequest; - using TBatchRequestPtr = ::TIntrusivePtr<IBatchRequest>; - - //////////////////////////////////////////////////////////////////////////////// - // client.h - //////////////////////////////////////////////////////////////////////////////// - - enum ELockMode : int; - - struct TStartTransactionOptions; - - struct TLockOptions; - - template <class TDerived> - struct TTabletOptions; - - struct TMountTableOptions; - - struct TUnmountTableOptions; - - struct TRemountTableOptions; - - struct TReshardTableOptions; - - struct TAlterTableOptions; - - struct TLookupRowsOptions; - - struct TSelectRowsOptions; - - struct TCreateClientOptions; - - struct TAlterTableReplicaOptions; - - struct TGetFileFromCacheOptions; - - struct TPutFileToCacheOptions; - - struct TCheckPermissionResult; - struct TCheckPermissionResponse; - struct TCheckPermissionOptions; - - struct TTabletInfo; - - class ILock; - using ILockPtr = ::TIntrusivePtr<ILock>; - - class ITransaction; - using ITransactionPtr = ::TIntrusivePtr<ITransaction>; - - class ITransactionPinger; - using ITransactionPingerPtr = ::TIntrusivePtr<ITransactionPinger>; - - struct IOperation; - using IOperationPtr = ::TIntrusivePtr<IOperation>; - - class IClientBase; - - class IClient; - - using IClientPtr = ::TIntrusivePtr<IClient>; - using IClientBasePtr = ::TIntrusivePtr<IClientBase>; - - //////////////////////////////////////////////////////////////////////////////// - // config.h - //////////////////////////////////////////////////////////////////////////////// - - struct TConfig; - using TConfigPtr = ::TIntrusivePtr<TConfig>; - - //////////////////////////////////////////////////////////////////////////////// - // cypress.h - //////////////////////////////////////////////////////////////////////////////// - - enum ENodeType : int; - - struct TCreateOptions; - - struct TRemoveOptions; - - struct TGetOptions; - - struct TSetOptions; - - struct TMultisetAttributesOptions; - - struct TListOptions; - - struct TCopyOptions; - - struct TMoveOptions; - - struct TLinkOptions; - - struct TConcatenateOptions; - - struct TInsertRowsOptions; - - struct TDeleteRowsOptions; - - struct TTrimRowsOptions; - - class ICypressClient; - - //////////////////////////////////////////////////////////////////////////////// - // errors.h - //////////////////////////////////////////////////////////////////////////////// - - class TApiUsageError; - - class TYtError; - - class TErrorResponse; - - struct TFailedJobInfo; - - class TOperationFailedError; - - //////////////////////////////////////////////////////////////////////////////// - // node.h - //////////////////////////////////////////////////////////////////////////////// - - class TNode; - - //////////////////////////////////////////////////////////////////////////////// - // common.h - //////////////////////////////////////////////////////////////////////////////// - - using TTransactionId = TGUID; - using TNodeId = TGUID; - using TLockId = TGUID; - using TOperationId = TGUID; - using TTabletCellId = TGUID; - using TReplicaId = TGUID; - using TJobId = TGUID; - - using TYPath = TString; - using TLocalFilePath = TString; - - template <class T, class TDerived = void> - struct TOneOrMany; - - // key column values - using TKey = TOneOrMany<TNode>; - - class TSortColumn; - - // column names - using TColumnNames = TOneOrMany<TString>; - - // key column descriptors. - class TSortColumns; - - enum EValueType : int; - - enum ESortOrder : int; - - enum EOptimizeForAttr : i8; - - enum EErasureCodecAttr : i8; - - enum ESchemaModificationAttr : i8; - - enum class EMasterReadKind : int; - - class TColumnSchema; - - class TTableSchema; - - enum class ERelation; - - struct TKeyBound; - - struct TReadLimit; - - struct TReadRange; - - struct TRichYPath; - - struct TAttributeFilter; - - //////////////////////////////////////////////////////////////////////////////// - // io.h - //////////////////////////////////////////////////////////////////////////////// - - enum class EFormatType : int; - - struct TFormat; - - class IFileReader; - - using IFileReaderPtr = ::TIntrusivePtr<IFileReader>; - - class IFileWriter; - - using IFileWriterPtr = ::TIntrusivePtr<IFileWriter>; - - class IBlobTableReader; - using IBlobTableReaderPtr = ::TIntrusivePtr<IBlobTableReader>; - - class TRawTableReader; - - using TRawTableReaderPtr = ::TIntrusivePtr<TRawTableReader>; - - class TRawTableWriter; - - using TRawTableWriterPtr = ::TIntrusivePtr<TRawTableWriter>; - - template <class T, class = void> - class TTableReader; - - template <class T, class = void> - class TTableRangesReader; - - template <typename T> - using TTableRangesReaderPtr = ::TIntrusivePtr<TTableRangesReader<T>>; - - template <class T> - using TTableReaderPtr = ::TIntrusivePtr<TTableReader<T>>; - - template <class T, class = void> - class TTableWriter; - - template <class T> - using TTableWriterPtr = ::TIntrusivePtr<TTableWriter<T>>; - - struct TYaMRRow; - - using ::google::protobuf::Message; - - class ISkiffRowParser; - - using ISkiffRowParserPtr = ::TIntrusivePtr<ISkiffRowParser>; - - class ISkiffRowSkipper; - - using ISkiffRowSkipperPtr = ::TIntrusivePtr<ISkiffRowSkipper>; - - namespace NDetail { - - class TYdlGenericRowType; - - } // namespace NDetail - - template<class... TYdlRowTypes> - class TYdlOneOf; - - template<class... TProtoRowTypes> - class TProtoOneOf; - - template<class... TSkiffRowTypes> - class TSkiffRowOneOf; - - using TYaMRReader = TTableReader<TYaMRRow>; - using TYaMRWriter = TTableWriter<TYaMRRow>; - using TNodeReader = TTableReader<TNode>; - using TNodeWriter = TTableWriter<TNode>; - using TMessageReader = TTableReader<Message>; - using TMessageWriter = TTableWriter<Message>; - using TYdlTableWriter = TTableWriter<NDetail::TYdlGenericRowType>; - - template <class TDerived> - struct TIOOptions; - - struct TFileReaderOptions; - - struct TFileWriterOptions; - - struct TTableReaderOptions; - - class TSkiffRowHints; - - struct TTableWriterOptions; - - //////////////////////////////////////////////////////////////////////////////// - // job_statistics.h - //////////////////////////////////////////////////////////////////////////////// - - class TJobStatistics; - - template <typename T> - class TJobStatisticsEntry; - - //////////////////////////////////////////////////////////////////////////////// - // operation.h - //////////////////////////////////////////////////////////////////////////////// - - class TFormatHints; - - struct TUserJobSpec; - - struct TMapOperationSpec; - - struct TRawMapOperationSpec; - - struct TReduceOperationSpec; - - struct TMapReduceOperationSpec; - - struct TJoinReduceOperationSpec; - - struct TSortOperationSpec; - - class IIOperationPreparationContext; - - class IJob; - using IJobPtr = ::TIntrusivePtr<IJob>; - - class IRawJob; - using IRawJobPtr = ::TIntrusivePtr<IRawJob>; - - enum EMergeMode : int; - - struct TMergeOperationSpec; - - struct TEraseOperationSpec; - - template <class TR, class TW> - class IMapper; - - template <class TR, class TW> - class IReducer; - - template <class TR, class TW> - class IAggregatorReducer; - - struct TSuspendOperationOptions; - - struct TResumeOperationOptions; - - enum class EOperationBriefState : int; - - struct TOperationAttributes; - - struct TOperationOptions; - - enum class EOperationAttribute : int; - - struct TOperationAttributeFilter; - - struct TGetOperationOptions; - - struct TListOperationsOptions; - - struct TGetJobOptions; - - struct TListJobsOptions; - - struct IOperationClient; - - enum class EFinishedJobState : int; - - enum class EJobType : int; - enum class EJobState : int; - enum class ETaskName : int; - class TTaskName; - - struct TJobBinaryDefault; - - struct TJobBinaryLocalPath; - - struct TJobBinaryCypressPath; - - using TJobBinaryConfig = std::variant< - TJobBinaryDefault, - TJobBinaryLocalPath, - TJobBinaryCypressPath>; - - struct TRetryConfig; - class IRetryConfigProvider; - using IRetryConfigProviderPtr = ::TIntrusivePtr<IRetryConfigProvider>; -} -/// @endcond diff --git a/yt/cpp/mapreduce/interface/init.h b/yt/cpp/mapreduce/interface/init.h deleted file mode 100644 index 302be268fc4..00000000000 --- a/yt/cpp/mapreduce/interface/init.h +++ /dev/null @@ -1,71 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/init.h -/// -/// Initialization functions of YT Wrapper. - -#include <yt/cpp/mapreduce/interface/wait_proxy.h> - -#include <util/generic/fwd.h> - -#include <functional> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// Options for @ref NYT::Initialize() and @ref NYT::JoblessInitialize() functions -struct TInitializeOptions -{ - using TSelf = TInitializeOptions; - - /// - /// @brief Override waiting functions for YT Wrapper. - /// - /// This options allows to override functions used by this library to wait something. - FLUENT_FIELD_DEFAULT(::TIntrusivePtr<IWaitProxy>, WaitProxy, nullptr); - - /// - /// @brief Enable/disable cleanup when program execution terminates abnormally. - /// - /// When set to true, library will abort all active transactions and running operations when program - /// terminates on error or signal. - FLUENT_FIELD_DEFAULT(bool, CleanupOnTermination, false); - - /// - /// @brief Set callback to be called before exit() in job mode. - /// - /// Provided function will be called just before exit() when program is started in job mode. - /// This might be useful for shutting down libraries that are used inside operations. - /// - /// NOTE: Keep in mind that inside job execution environment differs from client execution environment. - /// So JobOnExitFunction should not depend on argc/argv environment variables etc. - FLUENT_FIELD_OPTION(std::function<void()>, JobOnExitFunction); -}; - -/// -/// @brief Performs basic initialization (logging, termination handlers, etc). -/// -/// This function never switches to job mode. -void JoblessInitialize(const TInitializeOptions& options = TInitializeOptions()); - -/// -/// @brief Performs basic initialization and switches to a job mode if required. -/// -/// This function performs basic initialization (it sets up logging reads the config, etc) and checks if binary is launched -/// on YT machine inside a job. If latter is true this function launches proper job and after job is done it calls exit(). -/// -/// This function must be called if application starts any operation. -/// This function must be called immediately after entering main() function before any argument parsing is done. -void Initialize(int argc, const char **argv, const TInitializeOptions &options = TInitializeOptions()); - -/// Similar to @ref NYT::Initialize(int, const char**, const TInitializeOptions&) -void Initialize(int argc, char **argv, const TInitializeOptions &options = TInitializeOptions()); - -/// Similar to @ref NYT::Initialize(int, const char**, const TInitializeOptions&) -void Initialize(const TInitializeOptions &options = TInitializeOptions()); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/io-inl.h b/yt/cpp/mapreduce/interface/io-inl.h deleted file mode 100644 index c35ebb74811..00000000000 --- a/yt/cpp/mapreduce/interface/io-inl.h +++ /dev/null @@ -1,1015 +0,0 @@ -#pragma once - -#ifndef IO_INL_H_ -#error "Direct inclusion of this file is not allowed, use io.h" -#endif -#undef IO_INL_H_ - -#include "finish_or_die.h" - -#include <util/generic/typetraits.h> -#include <util/generic/yexception.h> -#include <util/stream/length.h> - -#include <util/system/mutex.h> -#include <util/system/spinlock.h> - -#include <library/cpp/yson/node/node_builder.h> - -#include <yt/cpp/mapreduce/interface/serialize.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -template<class T> -struct TIsProtoOneOf - : std::false_type -{ }; - -template <class ...TProtoRowTypes> -struct TIsProtoOneOf<TProtoOneOf<TProtoRowTypes...>> - : std::true_type -{ }; - -template <class T> -struct TIsSkiffRowOneOf - : std::false_type -{ }; - -template <class ...TSkiffRowTypes> -struct TIsSkiffRowOneOf<TSkiffRowOneOf<TSkiffRowTypes...>> - : std::true_type -{ }; - -} // namespace NDetail - -//////////////////////////////////////////////////////////////////////////////// - -template <class T, class = void> -struct TRowTraits; - -template <> -struct TRowTraits<TNode> -{ - using TRowType = TNode; - using IReaderImpl = INodeReaderImpl; - using IWriterImpl = INodeWriterImpl; -}; - -template <> -struct TRowTraits<TYaMRRow> -{ - using TRowType = TYaMRRow; - using IReaderImpl = IYaMRReaderImpl; - using IWriterImpl = IYaMRWriterImpl; -}; - -template <> -struct TRowTraits<Message> -{ - using TRowType = Message; - using IReaderImpl = IProtoReaderImpl; - using IWriterImpl = IProtoWriterImpl; -}; - -template <class T> -struct TRowTraits<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>> -{ - using TRowType = T; - using IReaderImpl = IProtoReaderImpl; - using IWriterImpl = IProtoWriterImpl; -}; - -template <class T> -struct TRowTraits<T, std::enable_if_t<TIsSkiffRow<T>::value>> -{ - using TRowType = T; - using IReaderImpl = ISkiffRowReaderImpl; -}; - -template <class... TSkiffRowTypes> -struct TRowTraits<TSkiffRowOneOf<TSkiffRowTypes...>> -{ - using TRowType = TSkiffRowOneOf<TSkiffRowTypes...>; - using IReaderImpl = ISkiffRowReaderImpl; -}; - -template <class... TProtoRowTypes> -struct TRowTraits<TProtoOneOf<TProtoRowTypes...>> -{ - using TRowType = TProtoOneOf<TProtoRowTypes...>; - using IReaderImpl = IProtoReaderImpl; - using IWriterImpl = IProtoWriterImpl; -}; - -//////////////////////////////////////////////////////////////////////////////// - -struct IReaderImplBase - : public TThrRefBase -{ - virtual bool IsValid() const = 0; - virtual void Next() = 0; - virtual ui32 GetTableIndex() const = 0; - virtual ui32 GetRangeIndex() const = 0; - virtual ui64 GetRowIndex() const = 0; - virtual void NextKey() = 0; - - // Not pure virtual because of clients that has already implemented this interface. - virtual TMaybe<size_t> GetReadByteCount() const; - virtual i64 GetTabletIndex() const; - virtual bool IsEndOfStream() const; - virtual bool IsRawReaderExhausted() const; -}; - -struct INodeReaderImpl - : public IReaderImplBase -{ - virtual const TNode& GetRow() const = 0; - virtual void MoveRow(TNode* row) = 0; -}; - -struct IYaMRReaderImpl - : public IReaderImplBase -{ - virtual const TYaMRRow& GetRow() const = 0; - virtual void MoveRow(TYaMRRow* row) - { - *row = GetRow(); - } -}; - -struct IProtoReaderImpl - : public IReaderImplBase -{ - virtual void ReadRow(Message* row) = 0; -}; - -struct ISkiffRowReaderImpl - : public IReaderImplBase -{ - virtual void ReadRow(const ISkiffRowParserPtr& parser) = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -// We don't include <yt/cpp/mapreduce/interface/logging/yt_log.h> in this file -// to avoid macro name clashes (specifically YT_LOG_DEBUG) -void LogTableReaderStatistics(ui64 rowCount, TMaybe<size_t> byteCount); - -template <class T> -class TTableReaderBase - : public TThrRefBase -{ -public: - using TRowType = typename TRowTraits<T>::TRowType; - using IReaderImpl = typename TRowTraits<T>::IReaderImpl; - - explicit TTableReaderBase(::TIntrusivePtr<IReaderImpl> reader) - : Reader_(reader) - { } - - ~TTableReaderBase() override - { - NDetail::LogTableReaderStatistics(ReadRowCount_, Reader_->GetReadByteCount()); - } - - bool IsValid() const - { - return Reader_->IsValid(); - } - - void Next() - { - Reader_->Next(); - ++ReadRowCount_; - RowState_ = ERowState::None; - } - - bool IsEndOfStream() - { - return Reader_->IsEndOfStream(); - } - - bool IsRawReaderExhausted() - { - return Reader_->IsRawReaderExhausted(); - } - - ui32 GetTableIndex() const - { - return Reader_->GetTableIndex(); - } - - ui32 GetRangeIndex() const - { - return Reader_->GetRangeIndex(); - } - - ui64 GetRowIndex() const - { - return Reader_->GetRowIndex(); - } - - i64 GetTabletIndex() const - { - return Reader_->GetTabletIndex(); - } - -protected: - template <typename TCacher, typename TCacheGetter> - const auto& DoGetRowCached(TCacher cacher, TCacheGetter cacheGetter) const - { - switch (RowState_) { - case ERowState::None: - cacher(); - RowState_ = ERowState::Cached; - break; - case ERowState::Cached: - break; - case ERowState::MovedOut: - ythrow yexception() << "Row is already moved"; - } - return *cacheGetter(); - } - - template <typename U, typename TMover, typename TCacheMover> - void DoMoveRowCached(U* result, TMover mover, TCacheMover cacheMover) - { - Y_VERIFY(result); - switch (RowState_) { - case ERowState::None: - mover(result); - break; - case ERowState::Cached: - cacheMover(result); - break; - case ERowState::MovedOut: - ythrow yexception() << "Row is already moved"; - } - RowState_ = ERowState::MovedOut; - } - -private: - enum class ERowState - { - None, - Cached, - MovedOut, - }; - -protected: - ::TIntrusivePtr<IReaderImpl> Reader_; - -private: - ui64 ReadRowCount_ = 0; - mutable ERowState RowState_ = ERowState::None; -}; - -template <class T> -class TSimpleTableReader - : public TTableReaderBase<T> -{ -public: - using TBase = TTableReaderBase<T>; - using typename TBase::TRowType; - - using TBase::TBase; - - const TRowType& GetRow() const - { - // Caching is implemented in underlying reader. - return TBase::DoGetRowCached( - /* cacher */ [&] {}, - /* cacheGetter */ [&] { - return &Reader_->GetRow(); - }); - } - - void MoveRow(TRowType* result) - { - // Caching is implemented in underlying reader. - TBase::DoMoveRowCached( - result, - /* mover */ [&] (TRowType* result) { - Reader_->MoveRow(result); - }, - /* cacheMover */ [&] (TRowType* result) { - Reader_->MoveRow(result); - }); - } - - TRowType MoveRow() - { - TRowType result; - MoveRow(&result); - return result; - } - -private: - using TBase::Reader_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail - -template <> -class TTableReader<TNode> - : public NDetail::TSimpleTableReader<TNode> -{ - using TSimpleTableReader<TNode>::TSimpleTableReader; -}; - -template <> -class TTableReader<TYaMRRow> - : public NDetail::TSimpleTableReader<TYaMRRow> -{ - using TSimpleTableReader<TYaMRRow>::TSimpleTableReader; -}; - -template <> -class TTableReader<Message> - : public NDetail::TTableReaderBase<Message> -{ -public: - using TBase = NDetail::TTableReaderBase<Message>; - - using TBase::TBase; - - template <class U> - const U& GetRow() const - { - static_assert(TIsBaseOf<Message, U>::Value); - - return TBase::DoGetRowCached( - /* cacher */ [&] { - CachedRow_.Reset(new U); - Reader_->ReadRow(CachedRow_.Get()); - }, - /* cacheGetter */ [&] { - auto result = dynamic_cast<const U*>(CachedRow_.Get()); - Y_VERIFY(result); - return result; - }); - } - - template <class U> - void MoveRow(U* result) - { - static_assert(TIsBaseOf<Message, U>::Value); - - TBase::DoMoveRowCached( - result, - /* mover */ [&] (U* result) { - Reader_->ReadRow(result); - }, - /* cacheMover */ [&] (U* result) { - auto cast = dynamic_cast<U*>(CachedRow_.Get()); - Y_VERIFY(cast); - result->Swap(cast); - }); - } - - template <class U> - U MoveRow() - { - static_assert(TIsBaseOf<Message, U>::Value); - - U result; - MoveRow(&result); - return result; - } - - ::TIntrusivePtr<IProtoReaderImpl> GetReaderImpl() const - { - return Reader_; - } - -private: - using TBase::Reader_; - mutable THolder<Message> CachedRow_; -}; - -template<class... TProtoRowTypes> -class TTableReader<TProtoOneOf<TProtoRowTypes...>> - : public NDetail::TTableReaderBase<TProtoOneOf<TProtoRowTypes...>> -{ -public: - using TBase = NDetail::TTableReaderBase<TProtoOneOf<TProtoRowTypes...>>; - - using TBase::TBase; - - template <class U> - const U& GetRow() const - { - AssertIsOneOf<U>(); - return TBase::DoGetRowCached( - /* cacher */ [&] { - Reader_->ReadRow(&std::get<U>(CachedRows_)); - CachedIndex_ = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value; - }, - /* cacheGetter */ [&] { - return &std::get<U>(CachedRows_); - }); - } - - template <class U> - void MoveRow(U* result) - { - AssertIsOneOf<U>(); - return TBase::DoMoveRowCached( - result, - /* mover */ [&] (U* result) { - Reader_->ReadRow(result); - }, - /* cacheMover */ [&] (U* result) { - Y_VERIFY((NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value) == CachedIndex_); - *result = std::move(std::get<U>(CachedRows_)); - }); - } - - template <class U> - U MoveRow() - { - U result; - MoveRow(&result); - return result; - } - - ::TIntrusivePtr<IProtoReaderImpl> GetReaderImpl() const - { - return Reader_; - } - -private: - using TBase::Reader_; - // std::variant could also be used here, but std::tuple leads to better performance - // because of deallocations that std::variant has to do - mutable std::tuple<TProtoRowTypes...> CachedRows_; - mutable int CachedIndex_; - - template <class U> - static constexpr void AssertIsOneOf() - { - static_assert( - (std::is_same<U, TProtoRowTypes>::value || ...), - "Template parameter must be one of TProtoOneOf template parameter"); - } -}; - -template <class T> -class TTableReader<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>> - : public TTableReader<TProtoOneOf<T>> -{ -public: - using TRowType = T; - using TBase = TTableReader<TProtoOneOf<T>>; - - using TBase::TBase; - - const T& GetRow() const - { - return TBase::template GetRow<T>(); - } - - void MoveRow(T* result) - { - TBase::template MoveRow<T>(result); - } - - T MoveRow() - { - return TBase::template MoveRow<T>(); - } -}; - -template<class... TSkiffRowTypes> -class TTableReader<TSkiffRowOneOf<TSkiffRowTypes...>> - : public NDetail::TTableReaderBase<TSkiffRowOneOf<TSkiffRowTypes...>> -{ -public: - using TBase = NDetail::TTableReaderBase<TSkiffRowOneOf<TSkiffRowTypes...>>; - - using TBase::TBase; - - explicit TTableReader(::TIntrusivePtr<typename TBase::IReaderImpl> reader, const TMaybe<TSkiffRowHints>& hints) - : TBase(reader) - , Parsers_({(CreateSkiffParser<TSkiffRowTypes>(&std::get<TSkiffRowTypes>(CachedRows_), hints))...}) - { } - - template <class U> - const U& GetRow() const - { - AssertIsOneOf<U>(); - return TBase::DoGetRowCached( - /* cacher */ [&] { - auto index = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value; - Reader_->ReadRow(Parsers_[index]); - CachedIndex_ = index; - }, - /* cacheGetter */ [&] { - return &std::get<U>(CachedRows_); - }); - } - - template <class U> - void MoveRow(U* result) - { - AssertIsOneOf<U>(); - return TBase::DoMoveRowCached( - result, - /* mover */ [&] (U* result) { - auto index = NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value; - Reader_->ReadRow(Parsers_[index]); - *result = std::move(std::get<U>(CachedRows_)); - }, - /* cacheMover */ [&] (U* result) { - Y_VERIFY((NDetail::TIndexInTuple<U, decltype(CachedRows_)>::Value) == CachedIndex_); - *result = std::move(std::get<U>(CachedRows_)); - }); - } - - template <class U> - U MoveRow() - { - U result; - MoveRow(&result); - return result; - } - - ::TIntrusivePtr<ISkiffRowReaderImpl> GetReaderImpl() const - { - return Reader_; - } - -private: - using TBase::Reader_; - // std::variant could also be used here, but std::tuple leads to better performance - // because of deallocations that std::variant has to do - mutable std::tuple<TSkiffRowTypes...> CachedRows_; - mutable std::vector<ISkiffRowParserPtr> Parsers_; - mutable int CachedIndex_; - - template <class U> - static constexpr void AssertIsOneOf() - { - static_assert( - (std::is_same<U, TSkiffRowTypes>::value || ...), - "Template parameter must be one of TSkiffRowOneOf template parameter"); - } -}; - -template <class T> -class TTableReader<T, std::enable_if_t<TIsSkiffRow<T>::value>> - : public TTableReader<TSkiffRowOneOf<T>> -{ -public: - using TRowType = T; - using TBase = TTableReader<TSkiffRowOneOf<T>>; - - using TBase::TBase; - - const T& GetRow() - { - return TBase::template GetRow<T>(); - } - - void MoveRow(T* result) - { - TBase::template MoveRow<T>(result); - } - - T MoveRow() - { - return TBase::template MoveRow<T>(); - } -}; - -template <> -inline TTableReaderPtr<TNode> IIOClient::CreateTableReader<TNode>( - const TRichYPath& path, const TTableReaderOptions& options) -{ - return new TTableReader<TNode>(CreateNodeReader(path, options)); -} - -template <> -inline TTableReaderPtr<TYaMRRow> IIOClient::CreateTableReader<TYaMRRow>( - const TRichYPath& path, const TTableReaderOptions& options) -{ - return new TTableReader<TYaMRRow>(CreateYaMRReader(path, options)); -} - -template <class T, class = std::enable_if_t<TIsBaseOf<Message, T>::Value>> -struct TReaderCreator -{ - static TTableReaderPtr<T> Create(::TIntrusivePtr<IProtoReaderImpl> reader) - { - return new TTableReader<T>(reader); - } -}; - -template <class T> -inline TTableReaderPtr<T> IIOClient::CreateTableReader( - const TRichYPath& path, const TTableReaderOptions& options) -{ - if constexpr (TIsBaseOf<Message, T>::Value) { - TAutoPtr<T> prototype(new T); - return new TTableReader<T>(CreateProtoReader(path, options, prototype.Get())); - } else if constexpr (TIsSkiffRow<T>::value) { - const auto& hints = options.FormatHints_ ? options.FormatHints_->SkiffRowHints_ : Nothing(); - auto schema = GetSkiffSchema<T>(hints); - auto skipper = CreateSkiffSkipper<T>(hints); - return new TTableReader<T>(CreateSkiffRowReader(path, options, skipper, schema), hints); - } else { - static_assert(TDependentFalse<T>, "Unsupported type for table reader"); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -TTableReaderPtr<T> CreateTableReader( - IInputStream* stream, - const TTableReaderOptions& options) -{ - return TReaderCreator<T>::Create(NDetail::CreateProtoReader(stream, options, T::descriptor())); -} - -template <class... Ts> -TTableReaderPtr<typename NDetail::TProtoOneOfUnique<Ts...>::TType> CreateProtoMultiTableReader( - IInputStream* stream, - const TTableReaderOptions& options) -{ - return new TTableReader<typename NDetail::TProtoOneOfUnique<Ts...>::TType>( - NDetail::CreateProtoReader(stream, options, {Ts::descriptor()...})); -} - -template <class T> -TTableReaderPtr<T> CreateProtoMultiTableReader( - IInputStream* stream, - int tableCount, - const TTableReaderOptions& options) -{ - static_assert(TIsBaseOf<::google::protobuf::Message, T>::Value); - TVector<const ::google::protobuf::Descriptor*> descriptors(tableCount, T::descriptor()); - return new TTableReader<T>(NDetail::CreateProtoReader(stream, options, std::move(descriptors))); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -class TTableRangesReader<T> - : public TThrRefBase -{ -public: - using TRowType = T; - -private: - using TReaderImpl = typename TRowTraits<TRowType>::IReaderImpl; - -public: - TTableRangesReader(::TIntrusivePtr<TReaderImpl> readerImpl) - : ReaderImpl_(readerImpl) - , Reader_(MakeIntrusive<TTableReader<TRowType>>(readerImpl)) - , IsValid_(Reader_->IsValid()) - { } - - TTableReader<T>& GetRange() - { - return *Reader_; - } - - bool IsValid() const - { - return IsValid_; - } - - void Next() - { - ReaderImpl_->NextKey(); - if ((IsValid_ = Reader_->IsValid())) { - Reader_->Next(); - } - } - -private: - ::TIntrusivePtr<TReaderImpl> ReaderImpl_; - ::TIntrusivePtr<TTableReader<TRowType>> Reader_; - bool IsValid_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -struct IWriterImplBase - : public TThrRefBase -{ - virtual void AddRow(const T& row, size_t tableIndex) = 0; - - virtual void AddRow(const T& row, size_t tableIndex, size_t /*rowWeight*/) - { - AddRow(row, tableIndex); - } - - virtual void AddRow(T&& row, size_t tableIndex) = 0; - - virtual void AddRow(T&& row, size_t tableIndex, size_t /*rowWeight*/) - { - AddRow(std::move(row), tableIndex); - } - - virtual void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex, size_t rowBatchWeight = 0) - { - for (const auto& row : rowBatch) { - AddRow(row, tableIndex, rowBatchWeight / rowBatch.size()); - } - } - - virtual void AddRowBatch(TVector<T>&& rowBatch, size_t tableIndex, size_t rowBatchWeight = 0) - { - auto rowBatchSize = rowBatch.size(); - for (auto&& row : std::move(rowBatch)) { - AddRow(std::move(row), tableIndex, rowBatchWeight / rowBatchSize); - } - } - - virtual size_t GetTableCount() const = 0; - virtual void FinishTable(size_t tableIndex) = 0; - virtual void Abort() - { } -}; - -struct INodeWriterImpl - : public IWriterImplBase<TNode> -{ -}; - -struct IYaMRWriterImpl - : public IWriterImplBase<TYaMRRow> -{ -}; - -struct IProtoWriterImpl - : public IWriterImplBase<Message> -{ -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -class TTableWriterBase - : public TThrRefBase -{ -public: - using TRowType = T; - using IWriterImpl = typename TRowTraits<T>::IWriterImpl; - - explicit TTableWriterBase(::TIntrusivePtr<IWriterImpl> writer) - : Writer_(writer) - , Locks_(MakeAtomicShared<TVector<TAdaptiveLock>>(writer->GetTableCount())) - { } - - ~TTableWriterBase() override - { - if (Locks_.RefCount() == 1) { - NDetail::FinishOrDie(this, "TTableWriterBase"); - } - } - - void Abort() - { - Writer_->Abort(); - } - - void AddRow(const T& row, size_t tableIndex = 0, size_t rowWeight = 0) - { - DoAddRow<T>(row, tableIndex, rowWeight); - } - - void AddRow(T&& row, size_t tableIndex = 0, size_t rowWeight = 0) - { - DoAddRow<T>(std::move(row), tableIndex, rowWeight); - } - - void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0) - { - DoAddRowBatch<T>(rowBatch, tableIndex, rowBatchWeight); - } - - void AddRowBatch(TVector<T>&& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0) - { - DoAddRowBatch<T>(std::move(rowBatch), tableIndex, rowBatchWeight); - } - - void Finish() - { - for (size_t i = 0; i < Locks_->size(); ++i) { - auto guard = Guard((*Locks_)[i]); - Writer_->FinishTable(i); - } - } - -protected: - template <class U> - void DoAddRow(const U& row, size_t tableIndex = 0, size_t rowWeight = 0) - { - if (tableIndex >= Locks_->size()) { - ythrow TIOException() << - "Table index " << tableIndex << - " is out of range [0, " << Locks_->size() << ")"; - } - - auto guard = Guard((*Locks_)[tableIndex]); - Writer_->AddRow(row, tableIndex, rowWeight); - } - - template <class U> - void DoAddRow(U&& row, size_t tableIndex = 0, size_t rowWeight = 0) - { - if (tableIndex >= Locks_->size()) { - ythrow TIOException() << - "Table index " << tableIndex << - " is out of range [0, " << Locks_->size() << ")"; - } - - auto guard = Guard((*Locks_)[tableIndex]); - Writer_->AddRow(std::move(row), tableIndex, rowWeight); - } - - template <class U> - void DoAddRowBatch(const TVector<U>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0) - { - if (tableIndex >= Locks_->size()) { - ythrow TIOException() << - "Table index " << tableIndex << - " is out of range [0, " << Locks_->size() << ")"; - } - - auto guard = Guard((*Locks_)[tableIndex]); - Writer_->AddRowBatch(rowBatch, tableIndex, rowBatchWeight); - } - - template <class U> - void DoAddRowBatch(TVector<U>&& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0) - { - if (tableIndex >= Locks_->size()) { - ythrow TIOException() << - "Table index " << tableIndex << - " is out of range [0, " << Locks_->size() << ")"; - } - - auto guard = Guard((*Locks_)[tableIndex]); - Writer_->AddRowBatch(std::move(rowBatch), tableIndex, rowBatchWeight); - } - - ::TIntrusivePtr<IWriterImpl> GetWriterImpl() - { - return Writer_; - } - -private: - ::TIntrusivePtr<IWriterImpl> Writer_; - TAtomicSharedPtr<TVector<TAdaptiveLock>> Locks_; -}; - -template <> -class TTableWriter<TNode> - : public TTableWriterBase<TNode> -{ -public: - using TBase = TTableWriterBase<TNode>; - - explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer) - : TBase(writer) - { } -}; - -template <> -class TTableWriter<TYaMRRow> - : public TTableWriterBase<TYaMRRow> -{ -public: - using TBase = TTableWriterBase<TYaMRRow>; - - explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer) - : TBase(writer) - { } -}; - -template <> -class TTableWriter<Message> - : public TTableWriterBase<Message> -{ -public: - using TBase = TTableWriterBase<Message>; - - explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer) - : TBase(writer) - { } - - template <class U, std::enable_if_t<std::is_base_of<Message, U>::value>* = nullptr> - void AddRow(const U& row, size_t tableIndex = 0, size_t rowWeight = 0) - { - TBase::AddRow(row, tableIndex, rowWeight); - } - - template <class U, std::enable_if_t<std::is_base_of<Message, U>::value>* = nullptr> - void AddRowBatch(const TVector<U>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0) - { - for (const auto& row : rowBatch) { - AddRow(row, tableIndex, rowBatchWeight / rowBatch.size()); - } - } -}; - -template <class T> -class TTableWriter<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>> - : public TTableWriter<Message> -{ -public: - using TRowType = T; - using TBase = TTableWriter<Message>; - - explicit TTableWriter(::TIntrusivePtr<IWriterImpl> writer) - : TBase(writer) - { } - - void AddRow(const T& row, size_t tableIndex = 0, size_t rowWeight = 0) - { - TBase::AddRow<T>(row, tableIndex, rowWeight); - } - - void AddRowBatch(const TVector<T>& rowBatch, size_t tableIndex = 0, size_t rowBatchWeight = 0) - { - TBase::AddRowBatch<T>(rowBatch, tableIndex, rowBatchWeight); - } -}; - -template <> -inline TTableWriterPtr<TNode> IIOClient::CreateTableWriter<TNode>( - const TRichYPath& path, const TTableWriterOptions& options) -{ - return new TTableWriter<TNode>(CreateNodeWriter(path, options)); -} - -template <> -inline TTableWriterPtr<TYaMRRow> IIOClient::CreateTableWriter<TYaMRRow>( - const TRichYPath& path, const TTableWriterOptions& options) -{ - return new TTableWriter<TYaMRRow>(CreateYaMRWriter(path, options)); -} - -template <class T> -inline TTableWriterPtr<T> IIOClient::CreateTableWriter( - const TRichYPath& path, const TTableWriterOptions& options) -{ - if constexpr (TIsBaseOf<Message, T>::Value) { - TAutoPtr<T> prototype(new T); - return new TTableWriter<T>(CreateProtoWriter(path, options, prototype.Get())); - } else { - static_assert(TDependentFalse<T>, "Unsupported type for table writer"); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -TTableReaderPtr<T> CreateConcreteProtobufReader(TTableReader<Message>* reader) -{ - static_assert(std::is_base_of_v<Message, T>, "T must be a protobuf type (either Message or its descendant)"); - Y_ENSURE(reader, "reader must be non-null"); - return ::MakeIntrusive<TTableReader<T>>(reader->GetReaderImpl()); -} - -template <typename T> -TTableReaderPtr<T> CreateConcreteProtobufReader(const TTableReaderPtr<Message>& reader) -{ - Y_ENSURE(reader, "reader must be non-null"); - return CreateConcreteProtobufReader<T>(reader.Get()); -} - -template <typename T> -TTableReaderPtr<Message> CreateGenericProtobufReader(TTableReader<T>* reader) -{ - static_assert(std::is_base_of_v<Message, T>, "T must be a protobuf type (either Message or its descendant)"); - Y_ENSURE(reader, "reader must be non-null"); - return ::MakeIntrusive<TTableReader<Message>>(reader->GetReaderImpl()); -} - -template <typename T> -TTableReaderPtr<Message> CreateGenericProtobufReader(const TTableReaderPtr<T>& reader) -{ - Y_ENSURE(reader, "reader must be non-null"); - return CreateGenericProtobufReader(reader.Get()); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/io.cpp b/yt/cpp/mapreduce/interface/io.cpp deleted file mode 100644 index f97629721a7..00000000000 --- a/yt/cpp/mapreduce/interface/io.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include "io.h" - -#include <yt/cpp/mapreduce/interface/logging/yt_log.h> - -#include <util/string/cast.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -TMaybe<size_t> IReaderImplBase::GetReadByteCount() const -{ - return Nothing(); -} - -i64 IReaderImplBase::GetTabletIndex() const -{ - Y_FAIL("Unimplemented"); -} - -bool IReaderImplBase::IsEndOfStream() const -{ - Y_FAIL("Unimplemented"); -} - -bool IReaderImplBase::IsRawReaderExhausted() const -{ - Y_FAIL("Unimplemented"); -} - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -void LogTableReaderStatistics(ui64 rowCount, TMaybe<size_t> byteCount) -{ - TString byteCountStr = (byteCount ? ::ToString(*byteCount) : "<unknown>"); - YT_LOG_DEBUG("Table reader has read %v rows, %v bytes", - rowCount, - byteCountStr); -} - -} // namespace NDetail - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/io.h b/yt/cpp/mapreduce/interface/io.h deleted file mode 100644 index e2b20a18029..00000000000 --- a/yt/cpp/mapreduce/interface/io.h +++ /dev/null @@ -1,586 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/io.h -/// -/// Header containing client interface for reading and writing tables and files. - - -#include "fwd.h" - -#include "client_method_options.h" -#include "common.h" -#include "format.h" -#include "node.h" -#include "mpl.h" -#include "skiff_row.h" - -#include <google/protobuf/message.h> - -#include <util/stream/input.h> -#include <util/stream/output.h> -#include <util/generic/yexception.h> -#include <util/generic/maybe.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief "Marker" type to use for several protobuf types in @ref NYT::TTableReader. -/// -/// @tparam Ts Possible types of rows to be read. -template<class... TProtoRowTypes> -class TProtoOneOf -{ -public: - static_assert( - (TIsBaseOf<::google::protobuf::Message, TProtoRowTypes>::Value && ...), - "Template parameters can only be protobuf types"); - - TProtoOneOf() = delete; -}; - -/// -/// @brief "Marker" type to use for several skiff row types in @ref NYT::TTableReader. -/// -/// @tparam Ts Possible types of rows to be read. -template<class... TSkiffRowTypes> -class TSkiffRowOneOf -{ -public: - static_assert( - (TIsSkiffRow<TSkiffRowTypes>::value && ...), - "Template parameters can only be SkiffRow types"); - - TSkiffRowOneOf() = delete; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @cond Doxygen_Suppress -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -template <class TTuple> -struct TProtoOneOfFromTuple; - -template <class... Ts> -struct TProtoOneOfFromTuple<std::tuple<Ts...>> -{ - using TType = TProtoOneOf<Ts...>; -}; - -template <class... Ts> -struct TProtoOneOfUnique -{ - using TTuple = typename TUniqueTypes<std::tuple<>, std::tuple<Ts...>>::TType; - using TType = typename TProtoOneOfFromTuple<TTuple>::TType; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail -/// @endcond - -//////////////////////////////////////////////////////////////////////////////// - -struct INodeReaderImpl; -struct IYaMRReaderImpl; -struct IProtoReaderImpl; -struct ISkiffRowReaderImpl; -struct INodeWriterImpl; -struct IYaMRWriterImpl; -struct IProtoWriterImpl; - -//////////////////////////////////////////////////////////////////////////////// - -/// Class of exceptions connected to reading or writing tables or files. -class TIOException - : public yexception -{ }; - -/////////////////////////////////////////////////////////////////////////////// - -/// Interface representing YT file reader. -class IFileReader - : public TThrRefBase - , public IInputStream -{ }; - -/// Interface representing YT file writer. -class IFileWriter - : public TThrRefBase - , public IOutputStream -{ }; - -//////////////////////////////////////////////////////////////////////////////// - -/// Low-level interface to read YT table with retries. -class TRawTableReader - : public TThrRefBase - , public IInputStream -{ -public: - /// @brief Retry table read starting from the specified `rangeIndex` and `rowIndex`. - /// - /// @param rangeIndex Index of first range to read - /// @param rowIndex Index of first row to read; if `rowIndex == Nothing` entire request will be retried. - /// - /// @return `true` on successful request retry, `false` if no retry attempts are left (then `Retry()` shouldn't be called any more). - /// - /// `rowIndex` must be inside the range with index `rangeIndex` if the latter is specified. - /// - /// After successful retry the user should reset `rangeIndex` / `rowIndex` values and read new ones - /// from the stream. - virtual bool Retry( - const TMaybe<ui32>& rangeIndex, - const TMaybe<ui64>& rowIndex) = 0; - - /// Resets retry attempt count to the initial value (then `Retry()` can be called again). - virtual void ResetRetries() = 0; - - /// @brief May the input stream contain table ranges? - /// - /// In the case when it is `true` the `TRawTableReader` user is responsible - /// to track active range index in order to pass it to Retry(). - virtual bool HasRangeIndices() const = 0; -}; - -/// @brief Low-level interface to write YT table. -/// -/// Retries must be handled by implementation. -class TRawTableWriter - : public TThrRefBase - , public IOutputStream -{ -public: - /// @brief Call this method after complete row representation is written to the stream. - /// - /// When this method is called `TRowTableWriter` can check its buffer - /// and if it is full send data to YT. - /// @note `TRawTableWriter` never sends partial records to YT (due to retries). - virtual void NotifyRowEnd() = 0; - - /// @brief Try to abort writing process as soon as possible (makes sense for multi-threaded writers). - /// - /// By default it does nothing, but implementations are welcome to override this method. - virtual void Abort() - { } -}; - -/// @brief Interface to deal with multiple raw output streams. -class IProxyOutput -{ -public: - virtual ~IProxyOutput() - { } - - /// Get amount of managed streams. - virtual size_t GetStreamCount() const = 0; - - /// Get stream corresponding to the specified table index. - virtual IOutputStream* GetStream(size_t tableIndex) const = 0; - - /// This handler must be called right after the next row has been written. - virtual void OnRowFinished(size_t tableIndex) = 0; - - /// @brief Try to abort writing process as soon as possible (makes sense for multi-threaded writers). - /// - /// By default it does nothing, but implementations are welcome to override this method. - virtual void Abort() - { } -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Class template to read typed rows from YT tables. -/// -/// @tparam T Row type. -/// -/// Correct usage of this class usually looks like -/// ``` -/// for (const auto& cursor : *reader) { -/// const auto& row = cursor.GetRow(); -/// ... -/// } -/// ``` -/// or, more verbosely, -/// ``` -/// for (; reader->IsValid(); reader->Next()) { -/// const auto& row = reader->GetRow(); -/// ... -/// } -/// ``` -/// -/// @note Actual (partial) specializations of this template may look a bit different, -/// e.g. @ref NYT::TTableReader::GetRow, @ref NYT::TTableReader::MoveRow may be method templates. -template <class T, class> -class TTableReader - : public TThrRefBase -{ -public: - /// Get current row. - const T& GetRow() const; - - /// Extract current row; further calls to `GetRow` and `MoveRow` will fail. - T MoveRow(); - - /// Extract current row to `result`; further calls to `GetRow` and `MoveRow` will fail. - void MoveRow(T* result); - - /// Check whether all the rows were read. - bool IsValid() const; - - /// Move the cursor to the next row. - void Next(); - - /// Get table index of the current row. - ui32 GetTableIndex() const; - - /// Get range index of the current row (zero if it is unknown or read request contains no ranges) - ui32 GetRangeIndex() const; - - /// Get current row index (zero if it unknown). - ui64 GetRowIndex() const; - - /// Get current tablet index (for ordered dynamic tables). - i64 GetTabletIndex() const; - - /// Returns `true` if job consumed all the input and `false` otherwise. - bool IsEndOfStream() const; - - /// Returns `true` if job raw input stream was closed and `false` otherwise. - bool IsRawReaderExhausted() const; -}; - -/// @brief Iterator for use in range-based-for. -/// -/// @note Idiomatic usage: -/// ``` -/// for (const auto& cursor : *reader) { -/// const auto& row = cursor.GetRow(); -/// ... -/// } -/// ``` -template <class T> -class TTableReaderIterator -{ -public: - /// Construct iterator from table reader (can be `nullptr`). - explicit TTableReaderIterator<T>(TTableReader<T>* reader) - { - if (reader && reader->IsValid()) { - Reader_ = reader; - } else { - Reader_ = nullptr; - } - } - - /// Equality operator. - bool operator==(const TTableReaderIterator& it) const - { - return Reader_ == it.Reader_; - } - - /// Inequality operator. - bool operator!=(const TTableReaderIterator& it) const - { - return Reader_ != it.Reader_; - } - - /// Dereference operator. - TTableReader<T>& operator*() - { - return *Reader_; - } - - /// Const dereference operator. - const TTableReader<T>& operator*() const - { - return *Reader_; - } - - /// Preincrement operator. - TTableReaderIterator& operator++() - { - Reader_->Next(); - if (!Reader_->IsValid()) { - Reader_ = nullptr; - } - return *this; - } - -private: - TTableReader<T>* Reader_; -}; - -/// @brief Function to facilitate range-based-for for @ref NYT::TTableReader. -/// -/// @see @ref NYT::TTableReaderIterator -template <class T> -TTableReaderIterator<T> begin(TTableReader<T>& reader) -{ - return TTableReaderIterator<T>(&reader); -} - -/// @brief Function to facilitate range-based-for for @ref NYT::TTableReader. -/// -/// @see @ref NYT::TTableReaderIterator -template <class T> -TTableReaderIterator<T> end(TTableReader<T>&) -{ - return TTableReaderIterator<T>(nullptr); -} - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Class to facilitate reading table rows sorted by key. -/// -/// Each reader returned from @ref NYT::TTableRangesReader::GetRange represents -/// a range of rows with the same key. -/// -/// @note Idiomatic usage: -/// ``` -/// for (; reader->IsValid(); reader->Next()) { -/// auto& rangeReader = reader->GetRange(); -/// ... -/// } -/// ``` -template <class T, class> -class TTableRangesReader - : public TThrRefBase -{ -public: - /// Get reader for rows with the same key. - TTableReader<T>& GetRange(); - - /// Check whether all rows are read. - bool IsValid() const; - - /// Move cursor to the next range. - void Next(); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// Class template to write typed rows to YT tables. -template <class T, class> -class TTableWriter - : public TThrRefBase -{ -public: - /// @brief Submit a row for writing. - /// - /// The row may (and very probably will) *not* be written immediately. - void AddRow(const T& row); - - /// Stop writing data as soon as possible (without flushing data, e.g. before aborting parent transaction). - void Finish(); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Type representing YaMR table row. -/// -/// @deprecated -struct TYaMRRow -{ - /// Key column. - TStringBuf Key; - - /// Subkey column. - TStringBuf SubKey; - - /// Value column. - TStringBuf Value; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// Interface for creating table and file readers and writer. -class IIOClient -{ -public: - virtual ~IIOClient() = default; - - /// Create a reader for file at `path`. - virtual IFileReaderPtr CreateFileReader( - const TRichYPath& path, - const TFileReaderOptions& options = TFileReaderOptions()) = 0; - - /// Create a writer for file at `path`. - virtual IFileWriterPtr CreateFileWriter( - const TRichYPath& path, - const TFileWriterOptions& options = TFileWriterOptions()) = 0; - - /// Create a typed reader for table at `path`. - template <class T> - TTableReaderPtr<T> CreateTableReader( - const TRichYPath& path, - const TTableReaderOptions& options = TTableReaderOptions()); - - /// Create a typed writer for table at `path`. - template <class T> - TTableWriterPtr<T> CreateTableWriter( - const TRichYPath& path, - const TTableWriterOptions& options = TTableWriterOptions()); - - /// Create a writer to write protobuf messages with specified descriptor. - virtual TTableWriterPtr<::google::protobuf::Message> CreateTableWriter( - const TRichYPath& path, - const ::google::protobuf::Descriptor& descriptor, - const TTableWriterOptions& options = TTableWriterOptions()) = 0; - - /// Create a reader to read a table using specified format. - virtual TRawTableReaderPtr CreateRawReader( - const TRichYPath& path, - const TFormat& format, - const TTableReaderOptions& options = TTableReaderOptions()) = 0; - - /// Create a reader to write a table using specified format. - virtual TRawTableWriterPtr CreateRawWriter( - const TRichYPath& path, - const TFormat& format, - const TTableWriterOptions& options = TTableWriterOptions()) = 0; - - /// - /// @brief Create a reader for [blob table](https://docs.yandex-team.ru/docs/yt/description/storage/blobtables) at `path`. - /// - /// @param path Blob table path. - /// @param blobId Key identifying the blob. - /// @param options Optional parameters - /// - /// Blob table is a table that stores a number of blobs. - /// Blobs are sliced into parts of the same size (maybe except of last part). - /// Those parts are stored in the separate rows. - /// - /// Blob table have constraints on its schema. - /// - There must be columns that identify blob (blob id columns). That columns might be of any type. - /// - There must be a column of `int64` type that identify part inside the blob (this column is called `part index`). - /// - There must be a column of `string` type that stores actual data (this column is called `data column`). - virtual IFileReaderPtr CreateBlobTableReader( - const TYPath& path, - const TKey& blobId, - const TBlobTableReaderOptions& options = TBlobTableReaderOptions()) = 0; - -private: - virtual ::TIntrusivePtr<INodeReaderImpl> CreateNodeReader( - const TRichYPath& path, const TTableReaderOptions& options) = 0; - - virtual ::TIntrusivePtr<IYaMRReaderImpl> CreateYaMRReader( - const TRichYPath& path, const TTableReaderOptions& options) = 0; - - virtual ::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader( - const TRichYPath& path, - const TTableReaderOptions& options, - const ::google::protobuf::Message* prototype) = 0; - - virtual ::TIntrusivePtr<ISkiffRowReaderImpl> CreateSkiffRowReader( - const TRichYPath& path, - const TTableReaderOptions& options, - const ISkiffRowSkipperPtr& skipper, - const NSkiff::TSkiffSchemaPtr& schema) = 0; - - virtual ::TIntrusivePtr<INodeWriterImpl> CreateNodeWriter( - const TRichYPath& path, const TTableWriterOptions& options) = 0; - - virtual ::TIntrusivePtr<IYaMRWriterImpl> CreateYaMRWriter( - const TRichYPath& path, const TTableWriterOptions& options) = 0; - - virtual ::TIntrusivePtr<IProtoWriterImpl> CreateProtoWriter( - const TRichYPath& path, - const TTableWriterOptions& options, - const ::google::protobuf::Message* prototype) = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Create a protobuf table reader from a stream. -/// -/// @tparam T Protobuf message type to read (must be inherited from `Message`). -/// -/// @param stream Input stream in YT protobuf format. -template <typename T> -TTableReaderPtr<T> CreateTableReader( - IInputStream* stream, - const TTableReaderOptions& options = {}); - -/// -/// @brief Create a protobuf multi table reader from a stream. -/// -/// @tparam Ts Protobuf message types to read (must be inherited from `Message`). -/// -/// @param stream Input stream in YT protobuf format. -template <class... Ts> -TTableReaderPtr<typename NDetail::TProtoOneOfUnique<Ts...>::TType> CreateProtoMultiTableReader( - IInputStream* stream, - const TTableReaderOptions& options = {}); - -/// -/// @brief Create a homogenous protobuf multi table reader from a stream. -/// -/// @tparam T Protobuf message type to read (must be inherited from `Message`). -/// -/// @param stream Input stream in YT protobuf format. -/// @param tableCount Number of tables in input stream. -template <class T> -TTableReaderPtr<T> CreateProtoMultiTableReader( - IInputStream* stream, - int tableCount, - const TTableReaderOptions& options = {}); - -/// Create a @ref NYT::TNode table reader from a stream. -template <> -TTableReaderPtr<TNode> CreateTableReader<TNode>( - IInputStream* stream, const TTableReaderOptions& options); - -/// Create a @ref NYT::TYaMRRow table reader from a stream. -template <> -TTableReaderPtr<TYaMRRow> CreateTableReader<TYaMRRow>( - IInputStream* stream, const TTableReaderOptions& options); - -namespace NDetail { - -/// Create a protobuf table reader from a stream. -::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader( - IInputStream* stream, - const TTableReaderOptions& options, - const ::google::protobuf::Descriptor* descriptor); - - -/// Create a protobuf table reader from a stream that can contain table switches. -::TIntrusivePtr<IProtoReaderImpl> CreateProtoReader( - IInputStream* stream, - const TTableReaderOptions& options, - TVector<const ::google::protobuf::Descriptor*> descriptors); - -} // namespace NDetail - -//////////////////////////////////////////////////////////////////////////////// - -/// Convert generic protobuf table reader to a concrete one (for certain type `T`). -template <typename T> -TTableReaderPtr<T> CreateConcreteProtobufReader(TTableReader<Message>* reader); - -/// Convert generic protobuf table reader to a concrete one (for certain type `T`). -template <typename T> -TTableReaderPtr<T> CreateConcreteProtobufReader(const TTableReaderPtr<Message>& reader); - -/// Convert a concrete (for certain type `T`) protobuf table reader to a generic one. -template <typename T> -TTableReaderPtr<Message> CreateGenericProtobufReader(TTableReader<T>* reader); - -/// Convert a concrete (for certain type `T`) protobuf table reader to a generic one. -template <typename T> -TTableReaderPtr<Message> CreateGenericProtobufReader(const TTableReaderPtr<T>& reader); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT - -#define IO_INL_H_ -#include "io-inl.h" -#undef IO_INL_H_ diff --git a/yt/cpp/mapreduce/interface/job_counters.cpp b/yt/cpp/mapreduce/interface/job_counters.cpp deleted file mode 100644 index 6d4a2a6fcb3..00000000000 --- a/yt/cpp/mapreduce/interface/job_counters.cpp +++ /dev/null @@ -1,164 +0,0 @@ -#include "job_counters.h" - -namespace NYT { - -//////////////////////////////////////////////////////////////////// - -namespace { - ui64 CountTotal(const TNode& data) - { - if (data.IsMap()) { - if (auto totalPtr = data.AsMap().FindPtr("total")) { - return data["total"].IntCast<ui64>(); - } else { - ui64 total = 0; - for (const auto& keyVal: data.AsMap()) { - total += CountTotal(keyVal.second); - } - return total; - } - } else { - return data.IntCast<ui64>(); - } - } - - TNode GetNode(const TNode& data, const TStringBuf& key) - { - if (auto resPtr = data.AsMap().FindPtr(key)) { - return *resPtr; - } - return TNode(); - } -} // namespace - -//////////////////////////////////////////////////////////////////// - -TJobCounter::TJobCounter(TNode data) - : Data_(std::move(data)) -{ - if (Data_.HasValue()) { - Total_ = CountTotal(Data_); - } -} - -TJobCounter::TJobCounter(ui64 total) - : Total_(total) -{ } - -ui64 TJobCounter::GetTotal() const -{ - return Total_; -} - -ui64 TJobCounter::GetValue(const TStringBuf key) const -{ - if (Data_.HasValue()) { - return CountTotal(Data_[key]); - } - return 0; -} - -//////////////////////////////////////////////////////////////////// - -TJobCounters::TJobCounters(const NYT::TNode& counters) - : Total_(0) -{ - if (!counters.IsMap()) { - ythrow yexception() << "TJobCounters must be initialized with Map type TNode"; - } - auto abortedNode = GetNode(counters, "aborted"); - if (abortedNode.HasValue()) { - Aborted_ = TJobCounter(GetNode(abortedNode, "total")); - AbortedScheduled_ = TJobCounter(GetNode(abortedNode, "scheduled")); - AbortedNonScheduled_ = TJobCounter(GetNode(abortedNode, "non_scheduled")); - } - auto completedNode = GetNode(counters, "completed"); - if (completedNode.HasValue()) { - Completed_ = TJobCounter(GetNode(completedNode, "total")); - CompletedNonInterrupted_ = TJobCounter(GetNode(completedNode, "non-interrupted")); - CompletedInterrupted_ = TJobCounter(GetNode(completedNode, "interrupted")); - } - Lost_ = TJobCounter(GetNode(counters, "lost")); - Invalidated_ = TJobCounter(GetNode(counters, "invalidated")); - Failed_ = TJobCounter(GetNode(counters, "failed")); - Running_ = TJobCounter(GetNode(counters, "running")); - Suspended_ = TJobCounter(GetNode(counters, "suspended")); - Pending_ = TJobCounter(GetNode(counters, "pending")); - Blocked_ = TJobCounter(GetNode(counters, "blocked")); - Total_ = CountTotal(counters); -} - - -const TJobCounter& TJobCounters::GetAborted() const -{ - return Aborted_; -} - -const TJobCounter& TJobCounters::GetAbortedScheduled() const -{ - return AbortedScheduled_; -} - -const TJobCounter& TJobCounters::GetAbortedNonScheduled() const -{ - return AbortedNonScheduled_; -} - -const TJobCounter& TJobCounters::GetCompleted() const -{ - return Completed_; -} - -const TJobCounter& TJobCounters::GetCompletedNonInterrupted() const -{ - return CompletedNonInterrupted_; -} - -const TJobCounter& TJobCounters::GetCompletedInterrupted() const -{ - return CompletedInterrupted_; -} - -const TJobCounter& TJobCounters::GetLost() const -{ - return Lost_; -} - -const TJobCounter& TJobCounters::GetInvalidated() const -{ - return Invalidated_; -} - -const TJobCounter& TJobCounters::GetFailed() const -{ - return Failed_; -} - -const TJobCounter& TJobCounters::GetRunning() const -{ - return Running_; -} - -const TJobCounter& TJobCounters::GetSuspended() const -{ - return Suspended_; -} - -const TJobCounter& TJobCounters::GetPending() const -{ - return Pending_; -} - -const TJobCounter& TJobCounters::GetBlocked() const -{ - return Blocked_; -} - -ui64 TJobCounters::GetTotal() const -{ - return Total_; -} - -//////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/job_counters.h b/yt/cpp/mapreduce/interface/job_counters.h deleted file mode 100644 index 9257cc1ec1b..00000000000 --- a/yt/cpp/mapreduce/interface/job_counters.h +++ /dev/null @@ -1,74 +0,0 @@ -#pragma once - -#include "fwd.h" - -#include <yt/cpp/mapreduce/interface/node.h> - -namespace NYT { - -class TJobCounter -{ -private: - TNode Data_; - ui64 Total_ = 0; - -public: - TJobCounter() = default; - - TJobCounter(TNode data); - TJobCounter(ui64 total); - - ui64 GetTotal() const; - - ui64 GetValue(const TStringBuf key) const; -}; - -/// Class representing a collection of job counters. -class TJobCounters -{ -public: - /// - /// Construct empty counter. - TJobCounters() = default; - - /// - /// Construct counter from counters node. - TJobCounters(const NYT::TNode& counters); - - const TJobCounter& GetAborted() const; - const TJobCounter& GetAbortedScheduled() const; - const TJobCounter& GetAbortedNonScheduled() const; - const TJobCounter& GetCompleted() const; - const TJobCounter& GetCompletedNonInterrupted() const; - const TJobCounter& GetCompletedInterrupted() const; - const TJobCounter& GetLost() const; - const TJobCounter& GetInvalidated() const; - const TJobCounter& GetFailed() const; - const TJobCounter& GetRunning() const; - const TJobCounter& GetSuspended() const; - const TJobCounter& GetPending() const; - const TJobCounter& GetBlocked() const; - - ui64 GetTotal() const; - -private: - ui64 Total_ = 0; - - TJobCounter Aborted_; - TJobCounter AbortedScheduled_; - TJobCounter AbortedNonScheduled_; - TJobCounter Completed_; - TJobCounter CompletedNonInterrupted_; - TJobCounter CompletedInterrupted_; - TJobCounter Lost_; - TJobCounter Invalidated_; - TJobCounter Failed_; - TJobCounter Running_; - TJobCounter Suspended_; - TJobCounter Pending_; - TJobCounter Blocked_; -}; - -//////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/job_counters_ut.cpp b/yt/cpp/mapreduce/interface/job_counters_ut.cpp deleted file mode 100644 index 56d3932b8fd..00000000000 --- a/yt/cpp/mapreduce/interface/job_counters_ut.cpp +++ /dev/null @@ -1,103 +0,0 @@ -#include <yt/cpp/mapreduce/interface/job_counters.h> -#include <yt/cpp/mapreduce/interface/operation.h> - -#include <library/cpp/yson/node/node_io.h> - -#include <library/cpp/testing/unittest/registar.h> - -using namespace NYT; - -Y_UNIT_TEST_SUITE(JobCounters) -{ - Y_UNIT_TEST(Full) - { - const TString input = R"""( - { - "completed" = { - "total" = 6; - "non-interrupted" = 1; - "interrupted" = { - "whatever_interrupted" = 2; - "whatever_else_interrupted" = 3; - }; - }; - "aborted" = { - "non_scheduled" = { - "whatever_non_scheduled" = 4; - "whatever_else_non_scheduled" = 5; - }; - "scheduled" = { - "whatever_scheduled" = 6; - "whatever_else_scheduled" = 7; - }; - "total" = 22; - }; - "lost" = 8; - "invalidated" = 9; - "failed" = 10; - "running" = 11; - "suspended" = 12; - "pending" = 13; - "blocked" = 14; - "total" = 105; - })"""; - - TJobCounters counters(NodeFromYsonString(input)); - - UNIT_ASSERT_VALUES_EQUAL(counters.GetTotal(), 105); - - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompleted().GetTotal(), 6); - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedNonInterrupted().GetTotal(), 1); - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedInterrupted().GetTotal(), 5); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAborted().GetTotal(), 22); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedNonScheduled().GetTotal(), 9); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedScheduled().GetTotal(), 13); - UNIT_ASSERT_VALUES_EQUAL(counters.GetLost().GetTotal(), 8); - UNIT_ASSERT_VALUES_EQUAL(counters.GetInvalidated().GetTotal(), 9); - UNIT_ASSERT_VALUES_EQUAL(counters.GetFailed().GetTotal(), 10); - UNIT_ASSERT_VALUES_EQUAL(counters.GetRunning().GetTotal(), 11); - UNIT_ASSERT_VALUES_EQUAL(counters.GetSuspended().GetTotal(), 12); - UNIT_ASSERT_VALUES_EQUAL(counters.GetPending().GetTotal(), 13); - UNIT_ASSERT_VALUES_EQUAL(counters.GetBlocked().GetTotal(), 14); - - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedInterrupted().GetValue("whatever_interrupted"), 2); - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedInterrupted().GetValue("whatever_else_interrupted"), 3); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedNonScheduled().GetValue("whatever_non_scheduled"), 4); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedNonScheduled().GetValue("whatever_else_non_scheduled"), 5); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedScheduled().GetValue("whatever_scheduled"), 6); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedScheduled().GetValue("whatever_else_scheduled"), 7); - - UNIT_ASSERT_EXCEPTION(counters.GetCompletedInterrupted().GetValue("Nothingness"), yexception); - } - - Y_UNIT_TEST(Empty) - { - const TString input = "{}"; - - TJobCounters counters(NodeFromYsonString(input)); - - UNIT_ASSERT_VALUES_EQUAL(counters.GetTotal(), 0); - - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompleted().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedNonInterrupted().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetCompletedInterrupted().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAborted().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedNonScheduled().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetAbortedScheduled().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetLost().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetInvalidated().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetFailed().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetRunning().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetSuspended().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetPending().GetTotal(), 0); - UNIT_ASSERT_VALUES_EQUAL(counters.GetBlocked().GetTotal(), 0); - } - - Y_UNIT_TEST(Broken) - { - UNIT_ASSERT_EXCEPTION_CONTAINS(TJobCounters(TNode()), yexception, "TJobCounters"); - UNIT_ASSERT_EXCEPTION_CONTAINS(TJobCounters(TNode(1)), yexception, "TJobCounters"); - UNIT_ASSERT_EXCEPTION_CONTAINS(TJobCounters(TNode(1.0)), yexception, "TJobCounters"); - UNIT_ASSERT_EXCEPTION_CONTAINS(TJobCounters(TNode("Whatever")), yexception, "TJobCounters"); - } -} diff --git a/yt/cpp/mapreduce/interface/job_statistics.cpp b/yt/cpp/mapreduce/interface/job_statistics.cpp deleted file mode 100644 index bd9791672d4..00000000000 --- a/yt/cpp/mapreduce/interface/job_statistics.cpp +++ /dev/null @@ -1,361 +0,0 @@ -#include "job_statistics.h" - -#include "operation.h" - -#include <library/cpp/yson/node/node.h> -#include <library/cpp/yson/node/serialize.h> - -#include <library/cpp/yson/writer.h> - -#include <util/datetime/base.h> -#include <util/generic/hash_set.h> -#include <util/generic/ptr.h> -#include <util/stream/file.h> -#include <util/string/cast.h> -#include <util/string/subst.h> -#include <util/system/file.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////// - -template <> -i64 ConvertJobStatisticsEntry(i64 value) -{ - return value; -} - -template <> -TDuration ConvertJobStatisticsEntry(i64 value) -{ - return TDuration::MilliSeconds(value); -} - -//////////////////////////////////////////////////////////////////// - -static TTaskName JobTypeToTaskName(EJobType jobType) -{ - switch (jobType) { - case EJobType::PartitionMap: - return ETaskName::PartitionMap0; - case EJobType::Partition: - return ETaskName::Partition0; - default: - return ToString(jobType); - } -} - -static TTaskName FixTaskName(TString taskName) -{ - if (taskName == "partition") { - return ETaskName::Partition0; - } else if (taskName == "partition_map") { - return ETaskName::PartitionMap0; - } - return taskName; -} - -//////////////////////////////////////////////////////////////////// - -class TJobStatistics::TData - : public TThrRefBase -{ -public: - using TTaskName2Data = THashMap<TString, TJobStatistics::TDataEntry>; - using TState2TaskName2Data = THashMap<EJobState, TTaskName2Data>; - using TName2State2TaskName2Data = THashMap<TString, TState2TaskName2Data>; - -public: - TName2State2TaskName2Data Name2State2TaskName2Data; - -public: - TData() = default; - - TData(const TNode& statisticsNode) - { - ParseNode(statisticsNode, TString(), &Name2State2TaskName2Data); - } - - static void Aggregate(TJobStatistics::TDataEntry* result, const TJobStatistics::TDataEntry& other) - { - result->Max = Max(result->Max, other.Max); - result->Min = Min(result->Min, other.Min); - result->Sum += other.Sum; - result->Count += other.Count; - } - - static void ParseNode(const TNode& node, TState2TaskName2Data* output) - { - auto getInt = [] (const TNode& theNode, TStringBuf key) { - const auto& nodeAsMap = theNode.AsMap(); - auto it = nodeAsMap.find(key); - if (it == nodeAsMap.end()) { - ythrow yexception() << "Key '" << key << "' is not found"; - } - const auto& valueNode = it->second; - if (!valueNode.IsInt64()) { - ythrow yexception() << "Key '" << key << "' is not of int64 type"; - } - return valueNode.AsInt64(); - }; - - for (const auto& [stateStr, taskName2DataNode] : node.AsMap()) { - EJobState state; - if (!TryFromString(stateStr, state)) { - continue; - } - for (const auto& [taskName, dataNode] : taskName2DataNode.AsMap()) { - auto fixedTaskName = FixTaskName(taskName); - auto& data = (*output)[state][fixedTaskName.Get()]; - data.Max = getInt(dataNode, "max"); - data.Min = getInt(dataNode, "min"); - data.Sum = getInt(dataNode, "sum"); - data.Count = getInt(dataNode, "count"); - } - } - } - - static void ParseNode(const TNode& node, const TString& curPath, TName2State2TaskName2Data* output) - { - Y_VERIFY(node.IsMap()); - - for (const auto& [key, value] : node.AsMap()) { - if (key == "$"sv) { - ParseNode(value, &(*output)[curPath]); - } else { - TString childPath = curPath; - if (!childPath.empty()) { - childPath.push_back('/'); - } - if (key.find_first_of('/') != key.npos) { - TString keyCopy(key); - SubstGlobal(keyCopy, "/", "\\/"); - childPath += keyCopy; - } else { - childPath += key; - } - ParseNode(value, childPath, output); - } - } - } -}; - -//////////////////////////////////////////////////////////////////// - -struct TJobStatistics::TFilter - : public TThrRefBase -{ - TVector<TTaskName> TaskNameFilter; - TVector<EJobState> JobStateFilter = {EJobState::Completed}; -}; - -//////////////////////////////////////////////////////////////////// - -const TString TJobStatistics::CustomStatisticsNamePrefix_ = "custom/"; - -TJobStatistics::TJobStatistics() - : Data_(::MakeIntrusive<TData>()) - , Filter_(::MakeIntrusive<TFilter>()) -{ } - - -TJobStatistics::TJobStatistics(const NYT::TNode& statisticsNode) - : Data_(::MakeIntrusive<TData>(statisticsNode)) - , Filter_(::MakeIntrusive<TFilter>()) -{ } - -TJobStatistics::TJobStatistics(::TIntrusivePtr<TData> data, ::TIntrusivePtr<TFilter> filter) - : Data_(data) - , Filter_(::MakeIntrusive<TFilter>(*filter)) -{ } - -TJobStatistics::TJobStatistics(const TJobStatistics& jobStatistics) = default; -TJobStatistics::TJobStatistics(TJobStatistics&&) = default; - -TJobStatistics& TJobStatistics::operator=(const TJobStatistics& jobStatistics) = default; -TJobStatistics& TJobStatistics::operator=(TJobStatistics&& jobStatistics) = default; - -TJobStatistics::~TJobStatistics() = default; - -TJobStatistics TJobStatistics::TaskName(TVector<TTaskName> taskNames) const -{ - auto newFilter = ::MakeIntrusive<TFilter>(*Filter_); - newFilter->TaskNameFilter = std::move(taskNames); - return TJobStatistics(Data_, std::move(newFilter)); -} - -TJobStatistics TJobStatistics::JobState(TVector<EJobState> jobStates) const -{ - auto newFilter = ::MakeIntrusive<TFilter>(*Filter_); - newFilter->JobStateFilter = std::move(jobStates); - return TJobStatistics(Data_, std::move(newFilter)); -} - -TJobStatistics TJobStatistics::JobType(TVector<EJobType> jobTypes) const -{ - TVector<TTaskName> taskNames; - for (auto jobType : jobTypes) { - taskNames.push_back(JobTypeToTaskName(jobType)); - } - return TaskName(std::move(taskNames)); -} - -bool TJobStatistics::HasStatistics(TStringBuf name) const -{ - return Data_->Name2State2TaskName2Data.contains(name); -} - -TJobStatisticsEntry<i64> TJobStatistics::GetStatistics(TStringBuf name) const -{ - return GetStatisticsAs<i64>(name); -} - -TVector<TString> TJobStatistics::GetStatisticsNames() const -{ - TVector<TString> result; - result.reserve(Data_->Name2State2TaskName2Data.size()); - for (const auto& entry : Data_->Name2State2TaskName2Data) { - result.push_back(entry.first); - } - return result; -} - -bool TJobStatistics::HasCustomStatistics(TStringBuf name) const -{ - return HasStatistics(CustomStatisticsNamePrefix_ + name); -} - -TJobStatisticsEntry<i64> TJobStatistics::GetCustomStatistics(TStringBuf name) const -{ - return GetCustomStatisticsAs<i64>(name); -} - -TVector<TString> TJobStatistics::GetCustomStatisticsNames() const -{ - TVector<TString> result; - for (const auto& entry : Data_->Name2State2TaskName2Data) { - if (entry.first.StartsWith(CustomStatisticsNamePrefix_)) { - result.push_back(entry.first.substr(CustomStatisticsNamePrefix_.size())); - } - } - return result; -} - -TMaybe<TJobStatistics::TDataEntry> TJobStatistics::GetStatisticsImpl(TStringBuf name) const -{ - auto name2State2TaskName2DataIt = Data_->Name2State2TaskName2Data.find(name); - Y_ENSURE( - name2State2TaskName2DataIt != Data_->Name2State2TaskName2Data.end(), - "Statistics '" << name << "' are missing"); - const auto& state2TaskName2Data = name2State2TaskName2DataIt->second; - - TMaybe<TDataEntry> result; - auto aggregate = [&] (const TDataEntry& data) { - if (result) { - TData::Aggregate(&result.GetRef(), data); - } else { - result = data; - } - }; - - auto aggregateTaskName2Data = [&] (const TData::TTaskName2Data& taskName2Data) { - if (Filter_->TaskNameFilter.empty()) { - for (const auto& [taskName, data] : taskName2Data) { - aggregate(data); - } - } else { - for (const auto& taskName : Filter_->TaskNameFilter) { - auto it = taskName2Data.find(taskName.Get()); - if (it == taskName2Data.end()) { - continue; - } - const auto& data = it->second; - aggregate(data); - } - } - }; - - if (Filter_->JobStateFilter.empty()) { - for (const auto& [state, taskName2Data] : state2TaskName2Data) { - aggregateTaskName2Data(taskName2Data); - } - } else { - for (auto state : Filter_->JobStateFilter) { - auto it = state2TaskName2Data.find(state); - if (it == state2TaskName2Data.end()) { - continue; - } - const auto& taskName2Data = it->second; - aggregateTaskName2Data(taskName2Data); - } - } - - return result; -} - -//////////////////////////////////////////////////////////////////// - -namespace { - -constexpr int USER_STATISTICS_FILE_DESCRIPTOR = 5; -constexpr char PATH_DELIMITER = '/'; -constexpr char ESCAPE = '\\'; - -IOutputStream* GetStatisticsStream() -{ - static TFile file = Duplicate(USER_STATISTICS_FILE_DESCRIPTOR); - static TFileOutput stream(file); - return &stream; -} - -template <typename T> -void WriteCustomStatisticsAny(TStringBuf path, const T& value) -{ - ::NYson::TYsonWriter writer(GetStatisticsStream(), NYson::EYsonFormat::Binary, ::NYson::EYsonType::ListFragment); - int depth = 0; - size_t begin = 0; - size_t end = 0; - TVector<TString> items; - while (end <= path.size()) { - if (end + 1 < path.size() && path[end] == ESCAPE && path[end + 1] == PATH_DELIMITER) { - end += 2; - continue; - } - if (end == path.size() || path[end] == PATH_DELIMITER) { - writer.OnBeginMap(); - items.emplace_back(path.data() + begin, end - begin); - SubstGlobal(items.back(), "\\/", "/"); - writer.OnKeyedItem(TStringBuf(items.back())); - ++depth; - begin = end + 1; - } - ++end; - } - Serialize(value, &writer); - while (depth > 0) { - writer.OnEndMap(); - --depth; - } -} - -} - -//////////////////////////////////////////////////////////////////// - -void WriteCustomStatistics(const TNode& statistics) -{ - ::NYson::TYsonWriter writer(GetStatisticsStream(), NYson::EYsonFormat::Binary, ::NYson::EYsonType::ListFragment); - Serialize(statistics, &writer); -} - -void WriteCustomStatistics(TStringBuf path, i64 value) -{ - WriteCustomStatisticsAny(path, value); -} - -void FlushCustomStatisticsStream() { - GetStatisticsStream()->Flush(); -} -//////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/job_statistics.h b/yt/cpp/mapreduce/interface/job_statistics.h deleted file mode 100644 index 8af751604fd..00000000000 --- a/yt/cpp/mapreduce/interface/job_statistics.h +++ /dev/null @@ -1,268 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/job_statistics.h -/// -/// Header containing classes and utility functions to work with -/// [job statistics](https://docs.yandex-team.ru/yt/problems/jobstatistics). - -#include "fwd.h" - -#include <library/cpp/yson/node/node.h> - -#include <util/system/defaults.h> -#include <util/generic/maybe.h> -#include <util/generic/ptr.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////// - -/// -/// @brief Convert i64 representation of statistics to other type. -/// -/// Library defines this template for types TDuration and i64. -/// Users may define it for their types. -/// -/// @see @ref NYT::TJobStatistics::GetStatisticsAs method. -template <typename T> -T ConvertJobStatisticsEntry(i64 value); - -//////////////////////////////////////////////////////////////////// - -/// Class representing a collection of job statistics. -class TJobStatistics -{ -public: - /// - /// Construct empty statistics. - TJobStatistics(); - - /// - /// Construct statistics from statistics node. - TJobStatistics(const NYT::TNode& statistics); - - TJobStatistics(const TJobStatistics& jobStatistics); - TJobStatistics(TJobStatistics&& jobStatistics); - - TJobStatistics& operator=(const TJobStatistics& jobStatistics); - TJobStatistics& operator=(TJobStatistics&& jobStatistics); - - ~TJobStatistics(); - - /// - /// @brief Filter statistics by task name. - /// - /// @param taskNames What task names to include (empty means all). - TJobStatistics TaskName(TVector<TTaskName> taskNames) const; - - /// - /// @brief Filter statistics by job state. - /// - /// @param filter What job states to include (empty means all). - /// - /// @note Default statistics include only (successfully) completed jobs. - TJobStatistics JobState(TVector<EJobState> filter) const; - - /// - /// @brief Filter statistics by job type. - /// - /// @param filter What job types to include (empty means all). - /// - /// @deprecated Use @ref TJobStatistics::TaskName instead. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/jobs#obshaya-shema - TJobStatistics JobType(TVector<EJobType> filter) const; - - /// - /// @brief Check that given statistics exist. - /// - /// @param name Slash separated statistics name, e.g. "time/total" (like it appears in web interface). - bool HasStatistics(TStringBuf name) const; - - /// - /// @brief Get statistics by name. - /// - /// @param name Slash separated statistics name, e.g. "time/total" (like it appears in web interface). - /// - /// @note If statistics is missing an exception is thrown. If because of filters - /// no fields remain the returned value is empty (all fields are `Nothing`). - /// - /// @note We don't use `TMaybe<TJobStatisticsEntry>` here; - /// instead, @ref NYT::TJobStatisticsEntry methods return `TMaybe<i64>`, - /// so user easier use `.GetOrElse`: - /// ``` - /// jobStatistics.GetStatistics("some/statistics/name").Max().GetOrElse(0); - /// ``` - TJobStatisticsEntry<i64> GetStatistics(TStringBuf name) const; - - /// - /// @brief Get statistics by name. - /// - /// @param name Slash separated statistics name, e.g. "time/total" (like it appears in web interface). - /// - /// @note In order to use `GetStatisticsAs` method, @ref NYT::ConvertJobStatisticsEntry function must be defined - /// (the library defines it for `i64` and `TDuration`, user may define it for other types). - template <typename T> - TJobStatisticsEntry<T> GetStatisticsAs(TStringBuf name) const; - - /// - /// Get (slash separated) names of statistics. - TVector<TString> GetStatisticsNames() const; - - /// - /// @brief Check if given custom statistics exists. - /// - /// @param name Slash separated custom statistics name. - bool HasCustomStatistics(TStringBuf name) const; - - /// - /// @brief Get custom statistics (those the user can write in job with @ref NYT::WriteCustomStatistics). - /// - /// @param name Slash separated custom statistics name. - TJobStatisticsEntry<i64> GetCustomStatistics(TStringBuf name) const; - - /// - /// @brief Get custom statistics (those the user can write in job with @ref NYT::WriteCustomStatistics). - /// - /// @param name Slash separated custom statistics name. - template <typename T> - TJobStatisticsEntry<T> GetCustomStatisticsAs(TStringBuf name) const; - - /// - /// Get names of all custom statistics. - TVector<TString> GetCustomStatisticsNames() const; - -private: - class TData; - struct TFilter; - - struct TDataEntry { - i64 Max; - i64 Min; - i64 Sum; - i64 Count; - }; - - static const TString CustomStatisticsNamePrefix_; - -private: - TJobStatistics(::TIntrusivePtr<TData> data, ::TIntrusivePtr<TFilter> filter); - - TMaybe<TDataEntry> GetStatisticsImpl(TStringBuf name) const; - -private: - ::TIntrusivePtr<TData> Data_; - ::TIntrusivePtr<TFilter> Filter_; - -private: - template<typename T> - friend class TJobStatisticsEntry; -}; - -//////////////////////////////////////////////////////////////////// - -/// Class representing single statistic. -template <typename T> -class TJobStatisticsEntry -{ -public: - TJobStatisticsEntry(TMaybe<TJobStatistics::TDataEntry> data) - : Data_(std::move(data)) - { } - - /// Sum of the statistic over all jobs. - TMaybe<T> Sum() const - { - if (Data_) { - return ConvertJobStatisticsEntry<T>(Data_->Sum); - } - return Nothing(); - } - - /// @brief Average of the statistic over all jobs. - /// - /// @note Only jobs that emitted statistics are taken into account. - TMaybe<T> Avg() const - { - if (Data_ && Data_->Count) { - return ConvertJobStatisticsEntry<T>(Data_->Sum / Data_->Count); - } - return Nothing(); - } - - /// @brief Number of jobs that emitted this statistic. - TMaybe<T> Count() const - { - if (Data_) { - return ConvertJobStatisticsEntry<T>(Data_->Count); - } - return Nothing(); - } - - /// @brief Maximum value of the statistic over all jobs. - TMaybe<T> Max() const - { - if (Data_) { - return ConvertJobStatisticsEntry<T>(Data_->Max); - } - return Nothing(); - } - - /// @brief Minimum value of the statistic over all jobs. - TMaybe<T> Min() const - { - if (Data_) { - return ConvertJobStatisticsEntry<T>(Data_->Min); - } - return Nothing(); - } - -private: - TMaybe<TJobStatistics::TDataEntry> Data_; - -private: - friend class TJobStatistics; -}; - -//////////////////////////////////////////////////////////////////// - -template <typename T> -TJobStatisticsEntry<T> TJobStatistics::GetStatisticsAs(TStringBuf name) const -{ - return TJobStatisticsEntry<T>(GetStatisticsImpl(name)); -} - -template <typename T> -TJobStatisticsEntry<T> TJobStatistics::GetCustomStatisticsAs(TStringBuf name) const -{ - return TJobStatisticsEntry<T>(GetStatisticsImpl(CustomStatisticsNamePrefix_ + name)); -} - -//////////////////////////////////////////////////////////////////// - -/// -/// @brief Write [custom statistics](https://yt.yandex-team.ru/docs/description/mr/jobs#user_stats). -/// -/// @param path Slash-separated path (length must not exceed 512 bytes). -/// @param value Value of the statistic. -/// -/// @note The function must be called in job. -/// Total number of statistics (with different paths) must not exceed 128. -void WriteCustomStatistics(TStringBuf path, i64 value); - -/// -/// @brief Write several [custom statistics](https://yt.yandex-team.ru/docs/description/mr/jobs#user_stats) at once. -/// -/// @param statistics A tree of map nodes with leaves of type `i64`. -/// -/// @note The call is equivalent to calling @ref NYT::WriteCustomStatistics(TStringBuf, i64) for every path in the given map. -void WriteCustomStatistics(const TNode& statistics); - -/// -/// @brief Flush [custom statistics stream](https://yt.yandex-team.ru/docs/description/mr/jobs#user_stats) -/// -void FlushCustomStatisticsStream(); -//////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/job_statistics_ut.cpp b/yt/cpp/mapreduce/interface/job_statistics_ut.cpp deleted file mode 100644 index 0cf53d771ac..00000000000 --- a/yt/cpp/mapreduce/interface/job_statistics_ut.cpp +++ /dev/null @@ -1,257 +0,0 @@ -#include <yt/cpp/mapreduce/interface/job_statistics.h> -#include <yt/cpp/mapreduce/interface/operation.h> - -#include <library/cpp/yson/node/node_io.h> - -#include <library/cpp/testing/unittest/registar.h> - -using namespace NYT; - -Y_UNIT_TEST_SUITE(JobStatistics) -{ - Y_UNIT_TEST(Simple) - { - const TString input = R"""( - { - "data" = { - "output" = { - "0" = { - "uncompressed_data_size" = { - "$" = { - "completed" = { - "simple_sort" = { - "max" = 130; - "count" = 1; - "min" = 130; - "sum" = 130; - }; - "map" = { - "max" = 42; - "count" = 1; - "min" = 42; - "sum" = 42; - }; - }; - "aborted" = { - "simple_sort" = { - "max" = 24; - "count" = 1; - "min" = 24; - "sum" = 24; - }; - }; - }; - }; - }; - }; - }; - })"""; - - TJobStatistics stat(NodeFromYsonString(input)); - - UNIT_ASSERT(stat.HasStatistics("data/output/0/uncompressed_data_size")); - UNIT_ASSERT(!stat.HasStatistics("nonexistent-statistics")); - UNIT_ASSERT_EXCEPTION_CONTAINS(stat.GetStatistics("BLAH-BLAH"), yexception, "Statistics"); - - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatisticsNames(), TVector<TString>{"data/output/0/uncompressed_data_size"}); - - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Max(), 130); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Count(), 2); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Min(), 42); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Sum(), 172); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Avg(), 172 / 2); - - UNIT_ASSERT_VALUES_EQUAL(stat.JobState({EJobState::Aborted}).GetStatistics("data/output/0/uncompressed_data_size").Sum(), 24); - UNIT_ASSERT_VALUES_EQUAL(stat.JobType({EJobType::Map}).JobState({EJobState::Aborted}).GetStatistics("data/output/0/uncompressed_data_size").Sum(), TMaybe<i64>()); - } - - Y_UNIT_TEST(TestOtherTypes) - { - const TString input = R"""( - { - "time" = { - "exec" = { - "$" = { - "completed" = { - "map" = { - "max" = 2482468; - "count" = 38; - "min" = 578976; - "sum" = 47987270; - }; - }; - }; - }; - }; - })"""; - - TJobStatistics stat(NodeFromYsonString(input)); - - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatisticsAs<TDuration>("time/exec").Max(), TDuration::MilliSeconds(2482468)); - } - - Y_UNIT_TEST(Custom) - { - const TString input = R"""( - { - "custom" = { - "some" = { - "path" = { - "$" = { - "completed" = { - "map" = { - "max" = -1; - "count" = 1; - "min" = -1; - "sum" = -1; - }; - }; - }; - }; - }; - "another" = { - "path" = { - "$" = { - "completed" = { - "map" = { - "max" = 1001; - "count" = 2; - "min" = 1001; - "sum" = 2002; - }; - }; - }; - }; - }; - }; - })"""; - - TJobStatistics stat(NodeFromYsonString(input)); - - UNIT_ASSERT(stat.HasCustomStatistics("some/path")); - UNIT_ASSERT(!stat.HasCustomStatistics("nonexistent-statistics")); - UNIT_ASSERT_EXCEPTION_CONTAINS(stat.GetCustomStatistics("BLAH-BLAH"), yexception, "Statistics"); - - const auto names = stat.GetCustomStatisticsNames(); - const THashSet<TString> expected = {"some/path", "another/path"}; - UNIT_ASSERT_VALUES_EQUAL(THashSet<TString>(names.begin(), names.end()), expected); - - UNIT_ASSERT_VALUES_EQUAL(stat.GetCustomStatistics("some/path").Max(), -1); - UNIT_ASSERT_VALUES_EQUAL(stat.GetCustomStatistics("another/path").Avg(), 1001); - } - - Y_UNIT_TEST(TaskNames) - { - const TString input = R"""( - { - "data" = { - "output" = { - "0" = { - "uncompressed_data_size" = { - "$" = { - "completed" = { - "partition_map" = { - "max" = 130; - "count" = 1; - "min" = 130; - "sum" = 130; - }; - "partition(0)" = { - "max" = 42; - "count" = 1; - "min" = 42; - "sum" = 42; - }; - }; - "aborted" = { - "simple_sort" = { - "max" = 24; - "count" = 1; - "min" = 24; - "sum" = 24; - }; - }; - }; - }; - }; - }; - }; - })"""; - - TJobStatistics stat(NodeFromYsonString(input)); - - UNIT_ASSERT(stat.HasStatistics("data/output/0/uncompressed_data_size")); - UNIT_ASSERT(!stat.HasStatistics("nonexistent-statistics")); - UNIT_ASSERT_EXCEPTION_CONTAINS(stat.GetStatistics("BLAH-BLAH"), yexception, "Statistics"); - - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatisticsNames(), TVector<TString>{"data/output/0/uncompressed_data_size"}); - - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Max(), 130); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Count(), 2); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Min(), 42); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Sum(), 172); - UNIT_ASSERT_VALUES_EQUAL(stat.GetStatistics("data/output/0/uncompressed_data_size").Avg(), 172 / 2); - - UNIT_ASSERT_VALUES_EQUAL( - stat - .JobState({EJobState::Aborted}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - 24); - UNIT_ASSERT_VALUES_EQUAL( - stat - .JobType({EJobType::Partition}) - .JobState({EJobState::Aborted}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - TMaybe<i64>()); - UNIT_ASSERT_VALUES_EQUAL( - stat - .TaskName({"partition(0)"}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - 42); - UNIT_ASSERT_VALUES_EQUAL( - stat - .TaskName({"partition"}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - TMaybe<i64>()); - UNIT_ASSERT_VALUES_EQUAL( - stat - .TaskName({"partition_map(0)"}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - 130); - UNIT_ASSERT_VALUES_EQUAL( - stat - .JobType({EJobType::Partition}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - 42); - UNIT_ASSERT_VALUES_EQUAL( - stat - .JobType({EJobType::PartitionMap}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - 130); - UNIT_ASSERT_VALUES_EQUAL( - stat - .TaskName({ETaskName::Partition0}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - 42); - UNIT_ASSERT_VALUES_EQUAL( - stat - .TaskName({ETaskName::Partition1}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - TMaybe<i64>()); - UNIT_ASSERT_VALUES_EQUAL( - stat - .TaskName({ETaskName::PartitionMap0}) - .GetStatistics("data/output/0/uncompressed_data_size") - .Sum(), - 130); - } -} diff --git a/yt/cpp/mapreduce/interface/logging/logger.cpp b/yt/cpp/mapreduce/interface/logging/logger.cpp deleted file mode 100644 index bfa56b94f6d..00000000000 --- a/yt/cpp/mapreduce/interface/logging/logger.cpp +++ /dev/null @@ -1,188 +0,0 @@ -#include "logger.h" - -#include <util/datetime/base.h> - -#include <util/stream/file.h> -#include <util/stream/format.h> -#include <util/stream/printf.h> -#include <util/stream/str.h> - -#include <util/system/mutex.h> -#include <util/system/rwlock.h> -#include <util/system/thread.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -static TStringBuf StripFileName(TStringBuf path) { - TStringBuf l, r; - if (path.TryRSplit('/', l, r) || path.TryRSplit('\\', l, r)) { - return r; - } else { - return path; - } -} - -static char GetLogLevelCode(ILogger::ELevel level) { - switch (level) { - case ILogger::FATAL: return 'F'; - case ILogger::ERROR: return 'E'; - case ILogger::INFO: return 'I'; - case ILogger::DEBUG: return 'D'; - } - Y_UNREACHABLE(); -} - -//////////////////////////////////////////////////////////////////////////////// - -class TNullLogger - : public ILogger -{ -public: - void Log(ELevel level, const TSourceLocation& sourceLocation, const char* format, va_list args) override - { - Y_UNUSED(level); - Y_UNUSED(sourceLocation); - Y_UNUSED(format); - Y_UNUSED(args); - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TLoggerBase - : public ILogger -{ -public: - TLoggerBase(ELevel cutLevel) - : CutLevel_(cutLevel) - { } - - virtual void OutputLine(const TString& line) = 0; - - void Log(ELevel level, const TSourceLocation& sourceLocation, const char* format, va_list args) override - { - if (level > CutLevel_) { - return; - } - - TStringStream stream; - stream << TInstant::Now().ToStringLocal() - << " " << GetLogLevelCode(level) - << " [" << Hex(TThread::CurrentThreadId(), HF_FULL) << "] "; - Printf(stream, format, args); - stream << " - " << StripFileName(sourceLocation.File) << ':' << sourceLocation.Line << Endl; - - TGuard<TMutex> guard(Mutex_); - OutputLine(stream.Str()); - } - -private: - ELevel CutLevel_; - TMutex Mutex_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TStdErrLogger - : public TLoggerBase -{ -public: - TStdErrLogger(ELevel cutLevel) - : TLoggerBase(cutLevel) - { } - - void OutputLine(const TString& line) override - { - Cerr << line; - } -}; - -ILoggerPtr CreateStdErrLogger(ILogger::ELevel cutLevel) -{ - return new TStdErrLogger(cutLevel); -} - -//////////////////////////////////////////////////////////////////////////////// - -class TFileLogger - : public TLoggerBase -{ -public: - TFileLogger(ELevel cutLevel, const TString& path, bool append) - : TLoggerBase(cutLevel) - , Stream_(TFile(path, OpenAlways | WrOnly | Seq | (append ? ForAppend : EOpenMode()))) - { } - - void OutputLine(const TString& line) override - { - Stream_ << line; - } - -private: - TUnbufferedFileOutput Stream_; -}; - -ILoggerPtr CreateFileLogger(ILogger::ELevel cutLevel, const TString& path, bool append) -{ - return new TFileLogger(cutLevel, path, append); -} -//////////////////////////////////////////////////////////////////////////////// - -class TBufferedFileLogger - : public TLoggerBase -{ -public: - TBufferedFileLogger(ELevel cutLevel, const TString& path, bool append) - : TLoggerBase(cutLevel) - , Stream_(TFile(path, OpenAlways | WrOnly | Seq | (append ? ForAppend : EOpenMode()))) - { } - - void OutputLine(const TString& line) override - { - Stream_ << line; - } - -private: - TFileOutput Stream_; -}; - -ILoggerPtr CreateBufferedFileLogger(ILogger::ELevel cutLevel, const TString& path, bool append) -{ - return new TBufferedFileLogger(cutLevel, path, append); -} - -//////////////////////////////////////////////////////////////////////////////// - -static TRWMutex LoggerMutex; -static ILoggerPtr Logger; - -struct TLoggerInitializer -{ - TLoggerInitializer() - { - Logger = new TNullLogger; - } -} LoggerInitializer; - -void SetLogger(ILoggerPtr logger) -{ - auto guard = TWriteGuard(LoggerMutex); - if (logger) { - Logger = logger; - } else { - Logger = new TNullLogger; - } -} - -ILoggerPtr GetLogger() -{ - auto guard = TReadGuard(LoggerMutex); - return Logger; -} - -//////////////////////////////////////////////////////////////////////////////// - -} - diff --git a/yt/cpp/mapreduce/interface/logging/logger.h b/yt/cpp/mapreduce/interface/logging/logger.h deleted file mode 100644 index 2b5aae87d14..00000000000 --- a/yt/cpp/mapreduce/interface/logging/logger.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -#include <util/generic/ptr.h> -#include <util/generic/string.h> -#include <util/system/compat.h> -#include <util/system/src_location.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -class ILogger - : public TThrRefBase -{ -public: - enum ELevel - { - FATAL /* "fatal", "FATAL" */, - // We don't have such level as `warning', but we support it for compatibility with other APIs. - ERROR /* "error", "warning", "ERROR", "WARNING" */, - INFO /* "info", "INFO" */, - DEBUG /* "debug", "DEBUG" */ - }; - - virtual void Log(ELevel level, const ::TSourceLocation& sourceLocation, const char* format, va_list args) = 0; -}; - -using ILoggerPtr = ::TIntrusivePtr<ILogger>; - -void SetLogger(ILoggerPtr logger); -ILoggerPtr GetLogger(); - -ILoggerPtr CreateStdErrLogger(ILogger::ELevel cutLevel); -ILoggerPtr CreateFileLogger(ILogger::ELevel cutLevel, const TString& path, bool append = false); - -/** - * Create logger that writes to a file in a buffered manner. - * It should result in fewer system calls (useful if you expect a lot of log messages), - * but in case of a crash, you would lose some log messages that haven't been flushed yet. - */ -ILoggerPtr CreateBufferedFileLogger(ILogger::ELevel cutLevel, const TString& path, bool append = false); - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/logging/ya.make b/yt/cpp/mapreduce/interface/logging/ya.make deleted file mode 100644 index 8095bfe4bae..00000000000 --- a/yt/cpp/mapreduce/interface/logging/ya.make +++ /dev/null @@ -1,16 +0,0 @@ -LIBRARY() - -INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc) - -SRCS( - logger.cpp - yt_log.cpp -) - -PEERDIR( - library/cpp/yt/logging -) - -GENERATE_ENUM_SERIALIZATION(logger.h) - -END() diff --git a/yt/cpp/mapreduce/interface/logging/yt_log.cpp b/yt/cpp/mapreduce/interface/logging/yt_log.cpp deleted file mode 100644 index 9fa7b915802..00000000000 --- a/yt/cpp/mapreduce/interface/logging/yt_log.cpp +++ /dev/null @@ -1,126 +0,0 @@ -#include "yt_log.h" - -#include "logger.h" - -#include <util/generic/guid.h> - -#include <util/system/mutex.h> - -namespace NYT { - -using namespace NLogging; - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -class TLogManager - : public ILogManager -{ -public: - static constexpr TStringBuf CategoryName = "Wrapper"; - -public: - void RegisterStaticAnchor( - TLoggingAnchor* anchor, - ::TSourceLocation sourceLocation, - TStringBuf anchorMessage) override - { - if (anchor->Registered.exchange(true)) { - return; - } - - anchor->Enabled.store(true); - - auto guard = Guard(Mutex_); - anchor->SourceLocation = sourceLocation; - anchor->AnchorMessage = anchorMessage; - } - - void UpdateAnchor(TLoggingAnchor* /*position*/) override - { } - - void Enqueue(TLogEvent&& event) override - { - auto message = TString(event.MessageRef.ToStringBuf()); - LogMessage( - ToImplLevel(event.Level), - ::TSourceLocation(event.SourceFile, event.SourceLine), - "%.*s", - event.MessageRef.size(), - event.MessageRef.begin()); - } - - const TLoggingCategory* GetCategory(TStringBuf categoryName) override - { - Y_VERIFY(categoryName == CategoryName); - return &Category_; - } - - void UpdateCategory(TLoggingCategory* /*category*/) override - { - Y_FAIL(); - } - - bool GetAbortOnAlert() const override - { - return false; - } - -private: - static ILogger::ELevel ToImplLevel(ELogLevel level) - { - switch (level) { - case ELogLevel::Minimum: - case ELogLevel::Trace: - case ELogLevel::Debug: - return ILogger::ELevel::DEBUG; - case ELogLevel::Info: - return ILogger::ELevel::INFO; - case ELogLevel::Warning: - case ELogLevel::Error: - return ILogger::ELevel::ERROR; - case ELogLevel::Alert: - case ELogLevel::Fatal: - case ELogLevel::Maximum: - return ILogger::ELevel::FATAL; - } - } - - static void LogMessage(ILogger::ELevel level, const ::TSourceLocation& sourceLocation, const char* format, ...) - { - va_list args; - va_start(args, format); - GetLogger()->Log(level, sourceLocation, format, args); - va_end(args); - } - -private: - ::TMutex Mutex_; - std::atomic<int> ActualVersion_{1}; - const TLoggingCategory Category_{ - .Name{CategoryName}, - .MinPlainTextLevel{ELogLevel::Minimum}, - .CurrentVersion{1}, - .ActualVersion = &ActualVersion_, - }; -}; - -TLogManager LogManager; - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - -TLogger Logger(&LogManager, TLogManager::CategoryName); - -//////////////////////////////////////////////////////////////////////////////// - -void FormatValue(TStringBuilderBase* builder, const TGUID& value, TStringBuf /*format*/) -{ - builder->AppendString(GetGuidAsString(value)); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/logging/yt_log.h b/yt/cpp/mapreduce/interface/logging/yt_log.h deleted file mode 100644 index 4cf93a6ba1d..00000000000 --- a/yt/cpp/mapreduce/interface/logging/yt_log.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include <library/cpp/yt/logging/logger.h> - -struct TGUID; - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -extern NLogging::TLogger Logger; - -void FormatValue(TStringBuilderBase* builder, const TGUID& value, TStringBuf format); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/mpl.h b/yt/cpp/mapreduce/interface/mpl.h deleted file mode 100644 index 9865e28b6c3..00000000000 --- a/yt/cpp/mapreduce/interface/mpl.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once - -#include "fwd.h" - -#include <tuple> -#include <type_traits> - -namespace NYT { - -/// @cond Doxygen_Suppress - -//////////////////////////////////////////////////////////////////////////////// - -template <class TBase, class TDerived> -struct TIsBaseOf -{ - static constexpr bool Value = std::is_base_of_v<TBase, TDerived> && !std::is_same_v<TBase, TDerived>; -}; - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -template <class T, class Tuple> -struct TIndexInTuple; - -template <class T, class... Types> -struct TIndexInTuple<T, std::tuple<T, Types...>> -{ - static constexpr int Value = 0; -}; - -template <class T> -struct TIndexInTuple<T, std::tuple<>> -{ - static constexpr int Value = 0; -}; - -template <class T, class U, class... Types> -struct TIndexInTuple<T, std::tuple<U, Types...>> -{ - static constexpr int Value = 1 + TIndexInTuple<T, std::tuple<Types...>>::Value; -}; - -template <class T, class TTuple> -constexpr bool DoesTupleContainType = (TIndexInTuple<T, TTuple>::Value < std::tuple_size<TTuple>{}); - -template <class TOut, class TIn = std::tuple<>> -struct TUniqueTypes; - -template <class... TOut, class TInCar, class... TInCdr> -struct TUniqueTypes<std::tuple<TOut...>, std::tuple<TInCar, TInCdr...>> -{ - using TType = std::conditional_t< - DoesTupleContainType<TInCar, std::tuple<TOut...>>, - typename TUniqueTypes<std::tuple<TOut...>, std::tuple<TInCdr...>>::TType, - typename TUniqueTypes<std::tuple<TOut..., TInCar>, std::tuple<TInCdr...>>::TType - >; -}; - -template <class TOut> -struct TUniqueTypes<TOut, std::tuple<>> -{ - using TType = TOut; -}; - -} // namespace NDetail - -/// @endcond Doxygen_Suppress - -//////////////////////////////////////////////////////////////////////////////// - -} diff --git a/yt/cpp/mapreduce/interface/node.h b/yt/cpp/mapreduce/interface/node.h deleted file mode 100644 index fece1b36dea..00000000000 --- a/yt/cpp/mapreduce/interface/node.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -// Backward compatibility -#include "fwd.h" -#include <library/cpp/yson/node/node.h> - - diff --git a/yt/cpp/mapreduce/interface/operation-inl.h b/yt/cpp/mapreduce/interface/operation-inl.h deleted file mode 100644 index 8d53cd446fe..00000000000 --- a/yt/cpp/mapreduce/interface/operation-inl.h +++ /dev/null @@ -1,928 +0,0 @@ -#pragma once - -#ifndef OPERATION_INL_H_ -#error "Direct inclusion of this file is not allowed, use operation.h" -#include "operation.h" -#endif -#undef OPERATION_INL_H_ - -#include "errors.h" - -#include <util/generic/bt_exception.h> -#include <util/generic/singleton.h> -#include <util/system/type_name.h> - -#include <util/stream/file.h> -#include <util/stream/buffer.h> -#include <util/string/subst.h> - -#include <typeindex> - -namespace NYT { - -namespace NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -template<class T> -void Assign(TVector<T>& array, size_t idx, const T& value) { - array.resize(std::max(array.size(), idx + 1)); - array[idx] = value; -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TRow> -TStructuredRowStreamDescription GetStructuredRowStreamDescription() -{ - if constexpr (std::is_same_v<TRow, NYT::TNode>) { - return TTNodeStructuredRowStream{}; - } else if constexpr (std::is_same_v<TRow, NYT::TYaMRRow>) { - return TTYaMRRowStructuredRowStream{}; - } else if constexpr (std::is_same_v<::google::protobuf::Message, TRow>) { - return TProtobufStructuredRowStream{nullptr}; - } else if constexpr (TIsBaseOf<::google::protobuf::Message, TRow>::Value) { - return TProtobufStructuredRowStream{TRow::descriptor()}; - } else if constexpr (TIsProtoOneOf<TRow>::value) { - return TProtobufStructuredRowStream{nullptr}; - } else { - static_assert(TDependentFalse<TRow>, "Unknown row type"); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NDetail - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TRow> -TStructuredTablePath Structured(TRichYPath richYPath) -{ - return TStructuredTablePath(std::move(richYPath), StructuredTableDescription<TRow>()); -} - -template <typename TRow> -TTableStructure StructuredTableDescription() -{ - if constexpr (std::is_same_v<TRow, NYT::TNode>) { - return TUnspecifiedTableStructure{}; - } else if constexpr (std::is_same_v<TRow, NYT::TYaMRRow>) { - return TUnspecifiedTableStructure{}; - } else if constexpr (std::is_base_of_v<::google::protobuf::Message, TRow>) { - if constexpr (std::is_same_v<::google::protobuf::Message, TRow>) { - static_assert(TDependentFalse<TRow>, "Cannot use ::google::protobuf::Message as table descriptor"); - } else { - return TProtobufTableStructure{TRow::descriptor()}; - } - } else { - static_assert(TDependentFalse<TRow>, "Unknown row type"); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TDerived> -TDerived& TRawOperationIoTableSpec<TDerived>::AddInput(const TRichYPath& path) -{ - Inputs_.push_back(path); - return static_cast<TDerived&>(*this); -} - -template <typename TDerived> -TDerived& TRawOperationIoTableSpec<TDerived>::SetInput(size_t tableIndex, const TRichYPath& path) -{ - NDetail::Assign(Inputs_, tableIndex, path); -} - -template <typename TDerived> -TDerived& TRawOperationIoTableSpec<TDerived>::AddOutput(const TRichYPath& path) -{ - Outputs_.push_back(path); - return static_cast<TDerived&>(*this); -} - -template <typename TDerived> -TDerived& TRawOperationIoTableSpec<TDerived>::SetOutput(size_t tableIndex, const TRichYPath& path) -{ - NDetail::Assign(Outputs_, tableIndex, path); -} - -template <typename TDerived> -const TVector<TRichYPath>& TRawOperationIoTableSpec<TDerived>::GetInputs() const -{ - return Inputs_; -} - -template <typename TDerived> -const TVector<TRichYPath>& TRawOperationIoTableSpec<TDerived>::GetOutputs() const -{ - return Outputs_; -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TDerived> -TDerived& TRawMapReduceOperationIoSpec<TDerived>::AddMapOutput(const TRichYPath& path) -{ - MapOutputs_.push_back(path); - return static_cast<TDerived&>(*this); -} - -template <typename TDerived> -TDerived& TRawMapReduceOperationIoSpec<TDerived>::SetMapOutput(size_t tableIndex, const TRichYPath& path) -{ - NDetail::Assign(MapOutputs_, tableIndex, path); -} - -template <typename TDerived> -const TVector<TRichYPath>& TRawMapReduceOperationIoSpec<TDerived>::GetMapOutputs() const -{ - return MapOutputs_; -} - -//////////////////////////////////////////////////////////////////////////////// - -::TIntrusivePtr<INodeReaderImpl> CreateJobNodeReader(TRawTableReaderPtr rawTableReader); -::TIntrusivePtr<IYaMRReaderImpl> CreateJobYaMRReader(TRawTableReaderPtr rawTableReader); -::TIntrusivePtr<IProtoReaderImpl> CreateJobProtoReader(TRawTableReaderPtr rawTableReader); - -::TIntrusivePtr<INodeWriterImpl> CreateJobNodeWriter(THolder<IProxyOutput> rawTableWriter); -::TIntrusivePtr<IYaMRWriterImpl> CreateJobYaMRWriter(THolder<IProxyOutput> rawTableWriter); -::TIntrusivePtr<IProtoWriterImpl> CreateJobProtoWriter(THolder<IProxyOutput> rawTableWriter); - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -inline ::TIntrusivePtr<typename TRowTraits<T>::IReaderImpl> CreateJobReaderImpl(TRawTableReaderPtr rawTableReader); - -template <> -inline ::TIntrusivePtr<INodeReaderImpl> CreateJobReaderImpl<TNode>(TRawTableReaderPtr rawTableReader) -{ - return CreateJobNodeReader(rawTableReader); -} - -template <> -inline ::TIntrusivePtr<IYaMRReaderImpl> CreateJobReaderImpl<TYaMRRow>(TRawTableReaderPtr rawTableReader) -{ - return CreateJobYaMRReader(rawTableReader); -} - -template <> -inline ::TIntrusivePtr<IProtoReaderImpl> CreateJobReaderImpl<Message>(TRawTableReaderPtr rawTableReader) -{ - return CreateJobProtoReader(rawTableReader); -} - -template <class T> -inline ::TIntrusivePtr<typename TRowTraits<T>::IReaderImpl> CreateJobReaderImpl(TRawTableReaderPtr rawTableReader) -{ - if constexpr (TIsBaseOf<Message, T>::Value || NDetail::TIsProtoOneOf<T>::value) { - return CreateJobProtoReader(rawTableReader); - } else { - static_assert(TDependentFalse<T>, "Unknown row type"); - } -} - -template <class T> -inline TTableReaderPtr<T> CreateJobReader(TRawTableReaderPtr rawTableReader) -{ - return new TTableReader<T>(CreateJobReaderImpl<T>(rawTableReader)); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -TTableWriterPtr<T> CreateJobWriter(THolder<IProxyOutput> rawJobWriter); - -template <> -inline TTableWriterPtr<TNode> CreateJobWriter<TNode>(THolder<IProxyOutput> rawJobWriter) -{ - return new TTableWriter<TNode>(CreateJobNodeWriter(std::move(rawJobWriter))); -} - -template <> -inline TTableWriterPtr<TYaMRRow> CreateJobWriter<TYaMRRow>(THolder<IProxyOutput> rawJobWriter) -{ - return new TTableWriter<TYaMRRow>(CreateJobYaMRWriter(std::move(rawJobWriter))); -} - -template <> -inline TTableWriterPtr<Message> CreateJobWriter<Message>(THolder<IProxyOutput> rawJobWriter) -{ - return new TTableWriter<Message>(CreateJobProtoWriter(std::move(rawJobWriter))); -} - -template <class T, class = void> -struct TProtoWriterCreator; - -template <class T> -struct TProtoWriterCreator<T, std::enable_if_t<TIsBaseOf<Message, T>::Value>> -{ - static TTableWriterPtr<T> Create(::TIntrusivePtr<IProtoWriterImpl> writer) - { - return new TTableWriter<T>(writer); - } -}; - -template <class T> -inline TTableWriterPtr<T> CreateJobWriter(THolder<IProxyOutput> rawJobWriter) -{ - if constexpr (TIsBaseOf<Message, T>::Value) { - return TProtoWriterCreator<T>::Create(CreateJobProtoWriter(std::move(rawJobWriter))); - } else { - static_assert(TDependentFalse<T>, "Unknown row type"); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -void TOperationInputSpecBase::AddInput(const TRichYPath& path) -{ - Inputs_.push_back(path); - StructuredInputs_.emplace_back(Structured<T>(path)); -} - -template <class T> -void TOperationInputSpecBase::SetInput(size_t tableIndex, const TRichYPath& path) -{ - NDetail::Assign(Inputs_, tableIndex, path); - NDetail::Assign(StructuredInputs_, tableIndex, Structured<T>(path)); -} - - -template <class T> -void TOperationOutputSpecBase::AddOutput(const TRichYPath& path) -{ - Outputs_.push_back(path); - StructuredOutputs_.emplace_back(Structured<T>(path)); -} - -template <class T> -void TOperationOutputSpecBase::SetOutput(size_t tableIndex, const TRichYPath& path) -{ - NDetail::Assign(Outputs_, tableIndex, path); - NDetail::Assign(StructuredOutputs_, tableIndex, Structured<T>(path)); -} - -template <class TDerived> -template <class T> -TDerived& TOperationIOSpec<TDerived>::AddInput(const TRichYPath& path) -{ - static_assert(!std::is_same<T, Message>::value, "input type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)"); - TOperationInputSpecBase::AddInput<T>(path); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -template <class T> -TDerived& TOperationIOSpec<TDerived>::SetInput(size_t tableIndex, const TRichYPath& path) -{ - static_assert(!std::is_same<T, Message>::value, "input type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)"); - TOperationInputSpecBase::SetInput<T>(tableIndex, path); - return *static_cast<TDerived*>(this); -} - - -template <class TDerived> -template <class T> -TDerived& TOperationIOSpec<TDerived>::AddOutput(const TRichYPath& path) -{ - static_assert(!std::is_same<T, Message>::value, "output type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)"); - TOperationOutputSpecBase::AddOutput<T>(path); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -template <class T> -TDerived& TOperationIOSpec<TDerived>::SetOutput(size_t tableIndex, const TRichYPath& path) -{ - static_assert(!std::is_same<T, Message>::value, "output type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)"); - TOperationOutputSpecBase::SetOutput<T>(tableIndex, path); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -TDerived& TOperationIOSpec<TDerived>::AddStructuredInput(TStructuredTablePath path) -{ - TOperationInputSpecBase::AddStructuredInput(std::move(path)); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -TDerived& TOperationIOSpec<TDerived>::AddStructuredOutput(TStructuredTablePath path) -{ - TOperationOutputSpecBase::AddStructuredOutput(std::move(path)); - return *static_cast<TDerived*>(this); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -TVanillaTask& TVanillaTask::AddOutput(const TRichYPath& path) -{ - static_assert(!std::is_same<T, Message>::value, "output type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)"); - TOperationOutputSpecBase::AddOutput<T>(path); - return *this; -} - -template <class T> -TVanillaTask& TVanillaTask::SetOutput(size_t tableIndex, const TRichYPath& path) -{ - static_assert(!std::is_same<T, Message>::value, "output type can't be Message, it can only be its strict subtype (see st.yandex-team.ru/YT-7609)"); - TOperationOutputSpecBase::SetOutput<T>(tableIndex, path); - return *this; -} - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - -void ResetUseClientProtobuf(const char* methodName); - -} // namespace NDetail - -template <class TDerived> -TDerived& TOperationIOSpec<TDerived>::AddProtobufInput_VerySlow_Deprecated(const TRichYPath& path) -{ - NDetail::ResetUseClientProtobuf("AddProtobufInput_VerySlow_Deprecated"); - Inputs_.push_back(path); - StructuredInputs_.emplace_back(TStructuredTablePath(path, TProtobufTableStructure{nullptr})); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -TDerived& TOperationIOSpec<TDerived>::AddProtobufOutput_VerySlow_Deprecated(const TRichYPath& path) -{ - NDetail::ResetUseClientProtobuf("AddProtobufOutput_VerySlow_Deprecated"); - Outputs_.push_back(path); - StructuredOutputs_.emplace_back(TStructuredTablePath(path, TProtobufTableStructure{nullptr})); - return *static_cast<TDerived*>(this); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TRow> -TJobOperationPreparer::TInputGroup& TJobOperationPreparer::TInputGroup::Description() -{ - for (auto i : Indices_) { - Preparer_.InputDescription<TRow>(i); - } - return *this; -} - -template <typename TRow> -TJobOperationPreparer::TOutputGroup& TJobOperationPreparer::TOutputGroup::Description(bool inferSchema) -{ - for (auto i : Indices_) { - Preparer_.OutputDescription<TRow>(i, inferSchema); - } - return *this; -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TCont> -TJobOperationPreparer::TInputGroup TJobOperationPreparer::BeginInputGroup(const TCont& indices) -{ - for (auto i : indices) { - ValidateInputTableIndex(i, TStringBuf("BeginInputGroup()")); - } - return TInputGroup(*this, TVector<int>(std::begin(indices), std::end(indices))); -} - -template <typename TCont> -TJobOperationPreparer::TOutputGroup TJobOperationPreparer::BeginOutputGroup(const TCont& indices) -{ - for (auto i : indices) { - ValidateOutputTableIndex(i, TStringBuf("BeginOutputGroup()")); - } - return TOutputGroup(*this, indices); -} - - -template <typename TRow> -TJobOperationPreparer& TJobOperationPreparer::InputDescription(int tableIndex) -{ - ValidateMissingInputDescription(tableIndex); - InputTableDescriptions_[tableIndex] = StructuredTableDescription<TRow>(); - return *this; -} - -template <typename TRow> -TJobOperationPreparer& TJobOperationPreparer::OutputDescription(int tableIndex, bool inferSchema) -{ - ValidateMissingOutputDescription(tableIndex); - OutputTableDescriptions_[tableIndex] = StructuredTableDescription<TRow>(); - if (inferSchema && !OutputSchemas_[tableIndex]) { - OutputSchemas_[tableIndex] = CreateTableSchema<TRow>(); - } - return *this; -} - -//////////////////////////////////////////////////////////////////////////////// - -template <class TDerived> -template <class TRow> -TDerived& TIntermediateTablesHintSpec<TDerived>::HintMapOutput() -{ - IntermediateMapOutputDescription_ = StructuredTableDescription<TRow>(); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -template <class TRow> -TDerived& TIntermediateTablesHintSpec<TDerived>::AddMapOutput(const TRichYPath& path) -{ - MapOutputs_.push_back(path); - StructuredMapOutputs_.emplace_back(Structured<TRow>(path)); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -template <class TRow> -TDerived& TIntermediateTablesHintSpec<TDerived>::HintReduceCombinerInput() -{ - IntermediateReduceCombinerInputDescription_ = StructuredTableDescription<TRow>(); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -template <class TRow> -TDerived& TIntermediateTablesHintSpec<TDerived>::HintReduceCombinerOutput() -{ - IntermediateReduceCombinerOutputDescription_ = StructuredTableDescription<TRow>(); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -template <class TRow> -TDerived& TIntermediateTablesHintSpec<TDerived>::HintReduceInput() -{ - IntermediateReducerInputDescription_ = StructuredTableDescription<TRow>(); - return *static_cast<TDerived*>(this); -} - -template <class TDerived> -const TVector<TStructuredTablePath>& TIntermediateTablesHintSpec<TDerived>::GetStructuredMapOutputs() const -{ - return StructuredMapOutputs_; -} - -template <class TDerived> -const TMaybe<TTableStructure>& TIntermediateTablesHintSpec<TDerived>::GetIntermediateMapOutputDescription() const -{ - return IntermediateMapOutputDescription_; -} - -template <class TDerived> -const TMaybe<TTableStructure>& TIntermediateTablesHintSpec<TDerived>::GetIntermediateReduceCombinerInputDescription() const -{ - return IntermediateReduceCombinerInputDescription_; -} - -template <class TDerived> -const TMaybe<TTableStructure>& TIntermediateTablesHintSpec<TDerived>::GetIntermediateReduceCombinerOutputDescription() const -{ - return IntermediateReduceCombinerOutputDescription_; -} - -template <class TDerived> -const TMaybe<TTableStructure>& TIntermediateTablesHintSpec<TDerived>::GetIntermediateReducerInputDescription() const -{ - return IntermediateReducerInputDescription_; -} - -//////////////////////////////////////////////////////////////////////////////// - -struct TReducerContext -{ - bool Break = false; - static TReducerContext* Get() { return Singleton<TReducerContext>(); } -}; - -template <class TR, class TW> -inline void IReducer<TR, TW>::Break() -{ - TReducerContext::Get()->Break = true; -} - -template <typename TReader, typename TWriter> -void FeedJobInput( - IMapper<TReader, TWriter>* mapper, - typename TRowTraits<typename TReader::TRowType>::IReaderImpl* readerImpl, - TWriter* writer) -{ - using TInputRow = typename TReader::TRowType; - - auto reader = MakeIntrusive<TTableReader<TInputRow>>(readerImpl); - mapper->Do(reader.Get(), writer); -} - -template <typename TReader, typename TWriter> -void FeedJobInput( - IReducer<TReader, TWriter>* reducer, - typename TRowTraits<typename TReader::TRowType>::IReaderImpl* readerImpl, - TWriter* writer) -{ - using TInputRow = typename TReader::TRowType; - - auto rangesReader = MakeIntrusive<TTableRangesReader<TInputRow>>(readerImpl); - for (; rangesReader->IsValid(); rangesReader->Next()) { - reducer->Do(&rangesReader->GetRange(), writer); - if (TReducerContext::Get()->Break) { - break; - } - } -} - -template <typename TReader, typename TWriter> -void FeedJobInput( - IAggregatorReducer<TReader, TWriter>* reducer, - typename TRowTraits<typename TReader::TRowType>::IReaderImpl* readerImpl, - TWriter* writer) -{ - using TInputRow = typename TReader::TRowType; - - auto rangesReader = MakeIntrusive<TTableRangesReader<TInputRow>>(readerImpl); - reducer->Do(rangesReader.Get(), writer); -} - -template <class TRawJob> -int RunRawJob(size_t outputTableCount, IInputStream& jobStateStream) -{ - TRawJobContext context(outputTableCount); - - TRawJob job; - job.Load(jobStateStream); - job.Do(context); - return 0; -} - -template <> -inline int RunRawJob<TCommandRawJob>(size_t /* outputTableCount */, IInputStream& /* jobStateStream */) -{ - Y_FAIL(); -} - -template <class TVanillaJob> -int RunVanillaJob(size_t outputTableCount, IInputStream& jobStateStream) -{ - TVanillaJob job; - job.Load(jobStateStream); - - if constexpr (std::is_base_of<IVanillaJob<>, TVanillaJob>::value) { - Y_VERIFY(outputTableCount == 0, "Void vanilla job expects zero 'outputTableCount'"); - job.Do(); - } else { - Y_VERIFY(outputTableCount, "Vanilla job with table writer expects nonzero 'outputTableCount'"); - using TOutputRow = typename TVanillaJob::TWriter::TRowType; - - THolder<IProxyOutput> rawJobWriter; - if (auto customWriter = job.CreateCustomRawJobWriter(outputTableCount)) { - rawJobWriter = std::move(customWriter); - } else { - rawJobWriter = CreateRawJobWriter(outputTableCount); - } - auto writer = CreateJobWriter<TOutputRow>(std::move(rawJobWriter)); - - job.Start(writer.Get()); - job.Do(writer.Get()); - job.Finish(writer.Get()); - - writer->Finish(); - } - return 0; -} - -template <> -inline int RunVanillaJob<TCommandVanillaJob>(size_t /* outputTableCount */, IInputStream& /* jobStateStream */) -{ - Y_FAIL(); -} - -template <class TJob> - requires TIsBaseOf<IStructuredJob, TJob>::Value -int RunJob(size_t outputTableCount, IInputStream& jobStateStream) -{ - using TInputRow = typename TJob::TReader::TRowType; - using TOutputRow = typename TJob::TWriter::TRowType; - - auto job = MakeIntrusive<TJob>(); - job->Load(jobStateStream); - - TRawTableReaderPtr rawJobReader; - if (auto customReader = job->CreateCustomRawJobReader(/*fd*/ 0)) { - rawJobReader = customReader; - } else { - rawJobReader = CreateRawJobReader(/*fd*/ 0); - } - auto readerImpl = CreateJobReaderImpl<TInputRow>(rawJobReader); - - // Many users don't expect to have jobs with empty input so we skip such jobs. - if (!readerImpl->IsValid()) { - return 0; - } - - THolder<IProxyOutput> rawJobWriter; - if (auto customWriter = job->CreateCustomRawJobWriter(outputTableCount)) { - rawJobWriter = std::move(customWriter); - } else { - rawJobWriter = CreateRawJobWriter(outputTableCount); - } - auto writer = CreateJobWriter<TOutputRow>(std::move(rawJobWriter)); - - job->Start(writer.Get()); - FeedJobInput(job.Get(), readerImpl.Get(), writer.Get()); - job->Finish(writer.Get()); - - writer->Finish(); - - return 0; -} - -// -// We leave RunMapJob/RunReduceJob/RunAggregatorReducer for backward compatibility, -// some user use them already. :( - -template <class TMapper> -int RunMapJob(size_t outputTableCount, IInputStream& jobStateStream) -{ - return RunJob<TMapper>(outputTableCount, jobStateStream); -} - -template <class TReducer> -int RunReduceJob(size_t outputTableCount, IInputStream& jobStateStream) -{ - return RunJob<TReducer>(outputTableCount, jobStateStream); -} - -template <class TReducer> -int RunAggregatorReducer(size_t outputTableCount, IInputStream& jobStateStream) -{ - return RunJob<TReducer>(outputTableCount, jobStateStream); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T, typename = void> -struct TIsConstructibleFromNode - : std::false_type -{ }; - -template <typename T> -struct TIsConstructibleFromNode<T, std::void_t<decltype(T::FromNode(std::declval<TNode&>()))>> - : std::true_type -{ }; - -template <class TJob> -::TIntrusivePtr<NYT::IStructuredJob> ConstructJobFromNode(const TNode& node) -{ - if constexpr (TIsConstructibleFromNode<TJob>::value) { - Y_ENSURE(node.GetType() != TNode::Undefined, - "job has FromNode method but constructor arguments were not provided"); - return TJob::FromNode(node); - } else { - Y_ENSURE(node.GetType() == TNode::Undefined, - "constructor arguments provided but job does not contain FromNode method"); - return MakeIntrusive<TJob>(); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -using TJobFunction = int (*)(size_t, IInputStream&); -using TConstructJobFunction = ::TIntrusivePtr<NYT::IStructuredJob> (*)(const TNode&); - -class TJobFactory -{ -public: - static TJobFactory* Get() - { - return Singleton<TJobFactory>(); - } - - template <class TJob> - void RegisterJob(const char* name) - { - RegisterJobImpl<TJob>(name, RunJob<TJob>); - JobConstructors[name] = ConstructJobFromNode<TJob>; - } - - template <class TRawJob> - void RegisterRawJob(const char* name) - { - RegisterJobImpl<TRawJob>(name, RunRawJob<TRawJob>); - } - - template <class TVanillaJob> - void RegisterVanillaJob(const char* name) - { - RegisterJobImpl<TVanillaJob>(name, RunVanillaJob<TVanillaJob>); - } - - TString GetJobName(const IJob* job) - { - const auto typeIndex = std::type_index(typeid(*job)); - CheckJobRegistered(typeIndex); - return JobNames[typeIndex]; - } - - TJobFunction GetJobFunction(const char* name) - { - CheckNameRegistered(name); - return JobFunctions[name]; - } - - TConstructJobFunction GetConstructingFunction(const char* name) - { - CheckNameRegistered(name); - return JobConstructors[name]; - } - -private: - TMap<std::type_index, TString> JobNames; - THashMap<TString, TJobFunction> JobFunctions; - THashMap<TString, TConstructJobFunction> JobConstructors; - - template <typename TJob, typename TRunner> - void RegisterJobImpl(const char* name, TRunner runner) { - const auto typeIndex = std::type_index(typeid(TJob)); - CheckNotRegistered(typeIndex, name); - JobNames[typeIndex] = name; - JobFunctions[name] = runner; - } - - void CheckNotRegistered(const std::type_index& typeIndex, const char* name) - { - Y_ENSURE(!JobNames.contains(typeIndex), - "type_info '" << typeIndex.name() << "'" - "is already registered under name '" << JobNames[typeIndex] << "'"); - Y_ENSURE(!JobFunctions.contains(name), - "job with name '" << name << "' is already registered"); - } - - void CheckJobRegistered(const std::type_index& typeIndex) - { - Y_ENSURE(JobNames.contains(typeIndex), - "type_info '" << typeIndex.name() << "' is not registered, use REGISTER_* macros"); - } - - void CheckNameRegistered(const char* name) - { - Y_ENSURE(JobFunctions.contains(name), - "job with name '" << name << "' is not registered, use REGISTER_* macros"); - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -template <class TMapper> -struct TMapperRegistrator -{ - TMapperRegistrator(const char* name) - { - static_assert(TMapper::JobType == IJob::EType::Mapper, - "REGISTER_MAPPER is not compatible with this job class"); - - NYT::TJobFactory::Get()->RegisterJob<TMapper>(name); - } -}; - -template <class TReducer> -struct TReducerRegistrator -{ - TReducerRegistrator(const char* name) - { - static_assert(TReducer::JobType == IJob::EType::Reducer || - TReducer::JobType == IJob::EType::ReducerAggregator, - "REGISTER_REDUCER is not compatible with this job class"); - - NYT::TJobFactory::Get()->RegisterJob<TReducer>(name); - } -}; - -template <class TRawJob> -struct TRawJobRegistrator -{ - TRawJobRegistrator(const char* name) - { - static_assert(TRawJob::JobType == IJob::EType::RawJob, - "REGISTER_RAW_JOB is not compatible with this job class"); - NYT::TJobFactory::Get()->RegisterRawJob<TRawJob>(name); - } -}; - -template <class TVanillaJob> -struct TVanillaJobRegistrator -{ - TVanillaJobRegistrator(const char* name) - { - static_assert(TVanillaJob::JobType == IJob::EType::VanillaJob, - "REGISTER_VANILLA_JOB is not compatible with this job class"); - NYT::TJobFactory::Get()->RegisterVanillaJob<TVanillaJob>(name); - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -inline TString YtRegistryTypeName(const TString& name) { - TString res = name; -#ifdef _win_ - SubstGlobal(res, "class ", ""); -#endif - return res; -} - -//////////////////////////////////////////////////////////////////////////////// - -#define REGISTER_MAPPER(...) \ -static const NYT::TMapperRegistrator<__VA_ARGS__> \ -Y_GENERATE_UNIQUE_ID(TJobRegistrator)(NYT::YtRegistryTypeName(TypeName<__VA_ARGS__>()).data()); - -#define REGISTER_NAMED_MAPPER(name, ...) \ -static const NYT::TMapperRegistrator<__VA_ARGS__> \ -Y_GENERATE_UNIQUE_ID(TJobRegistrator)(name); - -#define REGISTER_REDUCER(...) \ -static const NYT::TReducerRegistrator<__VA_ARGS__> \ -Y_GENERATE_UNIQUE_ID(TJobRegistrator)(NYT::YtRegistryTypeName(TypeName<__VA_ARGS__>()).data()); - -#define REGISTER_NAMED_REDUCER(name, ...) \ -static const NYT::TReducerRegistrator<__VA_ARGS__> \ -Y_GENERATE_UNIQUE_ID(TJobRegistrator)(name); - -#define REGISTER_NAMED_RAW_JOB(name, ...) \ -static const NYT::TRawJobRegistrator<__VA_ARGS__> \ -Y_GENERATE_UNIQUE_ID(TJobRegistrator)(name); - -#define REGISTER_RAW_JOB(...) \ -REGISTER_NAMED_RAW_JOB((NYT::YtRegistryTypeName(TypeName<__VA_ARGS__>()).data()), __VA_ARGS__) - -#define REGISTER_NAMED_VANILLA_JOB(name, ...) \ -static NYT::TVanillaJobRegistrator<__VA_ARGS__> \ -Y_GENERATE_UNIQUE_ID(TJobRegistrator)(name); - -#define REGISTER_VANILLA_JOB(...) \ -REGISTER_NAMED_VANILLA_JOB((NYT::YtRegistryTypeName(TypeName<__VA_ARGS__>()).data()), __VA_ARGS__) - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TReader, typename TWriter> -TStructuredRowStreamDescription IMapper<TReader, TWriter>::GetInputRowStreamDescription() const -{ - return NYT::NDetail::GetStructuredRowStreamDescription<typename TReader::TRowType>(); -} - -template <typename TReader, typename TWriter> -TStructuredRowStreamDescription IMapper<TReader, TWriter>::GetOutputRowStreamDescription() const -{ - return NYT::NDetail::GetStructuredRowStreamDescription<typename TWriter::TRowType>(); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TReader, typename TWriter> -TStructuredRowStreamDescription IReducer<TReader, TWriter>::GetInputRowStreamDescription() const -{ - return NYT::NDetail::GetStructuredRowStreamDescription<typename TReader::TRowType>(); -} - -template <typename TReader, typename TWriter> -TStructuredRowStreamDescription IReducer<TReader, TWriter>::GetOutputRowStreamDescription() const -{ - return NYT::NDetail::GetStructuredRowStreamDescription<typename TWriter::TRowType>(); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TReader, typename TWriter> -TStructuredRowStreamDescription IAggregatorReducer<TReader, TWriter>::GetInputRowStreamDescription() const -{ - return NYT::NDetail::GetStructuredRowStreamDescription<typename TReader::TRowType>(); -} - -template <typename TReader, typename TWriter> -TStructuredRowStreamDescription IAggregatorReducer<TReader, TWriter>::GetOutputRowStreamDescription() const -{ - return NYT::NDetail::GetStructuredRowStreamDescription<typename TWriter::TRowType>(); -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename TWriter> -TStructuredRowStreamDescription IVanillaJob<TWriter>::GetInputRowStreamDescription() const -{ - return TVoidStructuredRowStream(); -} - -template <typename TWriter> -TStructuredRowStreamDescription IVanillaJob<TWriter>::GetOutputRowStreamDescription() const -{ - return NYT::NDetail::GetStructuredRowStreamDescription<typename TWriter::TRowType>(); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/operation.cpp b/yt/cpp/mapreduce/interface/operation.cpp deleted file mode 100644 index 706fc4caa4c..00000000000 --- a/yt/cpp/mapreduce/interface/operation.cpp +++ /dev/null @@ -1,663 +0,0 @@ -#include "operation.h" - -#include <util/generic/iterator_range.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -namespace NDetail { - i64 OutputTableCount = -1; -} // namespace NDetail - -//////////////////////////////////////////////////////////////////////////////// - -TTaskName::TTaskName(TString taskName) - : TaskName_(std::move(taskName)) -{ } - -TTaskName::TTaskName(const char* taskName) - : TaskName_(taskName) -{ } - -TTaskName::TTaskName(ETaskName taskName) - : TaskName_(ToString(taskName)) -{ } - -const TString& TTaskName::Get() const -{ - return TaskName_; -} - -//////////////////////////////////////////////////////////////////////////////// - -TCommandRawJob::TCommandRawJob(TStringBuf command) - : Command_(command) -{ } - -const TString& TCommandRawJob::GetCommand() const -{ - return Command_; -} - -void TCommandRawJob::Do(const TRawJobContext& /* jobContext */) -{ - Y_FAIL("TCommandRawJob::Do must not be called"); -} - -REGISTER_NAMED_RAW_JOB("NYT::TCommandRawJob", TCommandRawJob) - -//////////////////////////////////////////////////////////////////////////////// - -TCommandVanillaJob::TCommandVanillaJob(TStringBuf command) - : Command_(command) -{ } - -const TString& TCommandVanillaJob::GetCommand() const -{ - return Command_; -} - -void TCommandVanillaJob::Do() -{ - Y_FAIL("TCommandVanillaJob::Do must not be called"); -} - -REGISTER_NAMED_VANILLA_JOB("NYT::TCommandVanillaJob", TCommandVanillaJob); - -//////////////////////////////////////////////////////////////////////////////// - -bool operator==(const TUnspecifiedTableStructure&, const TUnspecifiedTableStructure&) -{ - return true; -} - -bool operator==(const TProtobufTableStructure& lhs, const TProtobufTableStructure& rhs) -{ - return lhs.Descriptor == rhs.Descriptor; -} - -//////////////////////////////////////////////////////////////////////////////// - -const TVector<TStructuredTablePath>& TOperationInputSpecBase::GetStructuredInputs() const -{ - return StructuredInputs_; -} - -const TVector<TStructuredTablePath>& TOperationOutputSpecBase::GetStructuredOutputs() const -{ - return StructuredOutputs_; -} - -void TOperationInputSpecBase::AddStructuredInput(TStructuredTablePath path) -{ - Inputs_.push_back(path.RichYPath); - StructuredInputs_.push_back(std::move(path)); -} - -void TOperationOutputSpecBase::AddStructuredOutput(TStructuredTablePath path) -{ - Outputs_.push_back(path.RichYPath); - StructuredOutputs_.push_back(std::move(path)); -} - -//////////////////////////////////////////////////////////////////////////////// - -TVanillaTask& TVanillaTask::AddStructuredOutput(TStructuredTablePath path) -{ - TOperationOutputSpecBase::AddStructuredOutput(std::move(path)); - return *this; -} - -//////////////////////////////////////////////////////////////////////////////// - -TStructuredRowStreamDescription IVanillaJob<void>::GetInputRowStreamDescription() const -{ - return TVoidStructuredRowStream(); -} - -TStructuredRowStreamDescription IVanillaJob<void>::GetOutputRowStreamDescription() const -{ - return TVoidStructuredRowStream(); -} - -//////////////////////////////////////////////////////////////////////////////// - -TRawJobContext::TRawJobContext(size_t outputTableCount) - : InputFile_(Duplicate(0)) -{ - for (size_t i = 0; i != outputTableCount; ++i) { - OutputFileList_.emplace_back(Duplicate(3 * i + 1)); - } -} - -const TFile& TRawJobContext::GetInputFile() const -{ - return InputFile_; -} - -const TVector<TFile>& TRawJobContext::GetOutputFileList() const -{ - return OutputFileList_; -} - -//////////////////////////////////////////////////////////////////////////////// - -TUserJobSpec& TUserJobSpec::AddLocalFile( - const TLocalFilePath& path, - const TAddLocalFileOptions& options) -{ - LocalFiles_.emplace_back(path, options); - return *this; -} - -TUserJobSpec& TUserJobSpec::JobBinaryLocalPath(TString path, TMaybe<TString> md5) -{ - JobBinary_ = TJobBinaryLocalPath{path, md5}; - return *this; -} - -TUserJobSpec& TUserJobSpec::JobBinaryCypressPath(TString path, TMaybe<TTransactionId> transactionId) -{ - JobBinary_ = TJobBinaryCypressPath{path, transactionId}; - return *this; -} - -const TJobBinaryConfig& TUserJobSpec::GetJobBinary() const -{ - return JobBinary_; -} - -TVector<std::tuple<TLocalFilePath, TAddLocalFileOptions>> TUserJobSpec::GetLocalFiles() const -{ - return LocalFiles_; -} - -//////////////////////////////////////////////////////////////////////////////// - -TJobOperationPreparer::TInputGroup::TInputGroup(TJobOperationPreparer& preparer, TVector<int> indices) - : Preparer_(preparer) - , Indices_(std::move(indices)) -{ } - -TJobOperationPreparer::TInputGroup& TJobOperationPreparer::TInputGroup::ColumnRenaming(const THashMap<TString, TString>& renaming) -{ - for (auto i : Indices_) { - Preparer_.InputColumnRenaming(i, renaming); - } - return *this; -} - -TJobOperationPreparer::TInputGroup& TJobOperationPreparer::TInputGroup::ColumnFilter(const TVector<TString>& columns) -{ - for (auto i : Indices_) { - Preparer_.InputColumnFilter(i, columns); - } - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::TInputGroup::EndInputGroup() -{ - return Preparer_; -} - -TJobOperationPreparer::TOutputGroup::TOutputGroup(TJobOperationPreparer& preparer, TVector<int> indices) - : Preparer_(preparer) - , Indices_(std::move(indices)) -{ } - -TJobOperationPreparer::TOutputGroup& TJobOperationPreparer::TOutputGroup::Schema(const TTableSchema &schema) -{ - for (auto i : Indices_) { - Preparer_.OutputSchema(i, schema); - } - return *this; -} - -TJobOperationPreparer::TOutputGroup& TJobOperationPreparer::TOutputGroup::NoSchema() -{ - for (auto i : Indices_) { - Preparer_.NoOutputSchema(i); - } - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::TOutputGroup::EndOutputGroup() -{ - return Preparer_; -} - -//////////////////////////////////////////////////////////////////////////////// - -TJobOperationPreparer::TJobOperationPreparer(const IOperationPreparationContext& context) - : Context_(context) - , OutputSchemas_(context.GetOutputCount()) - , InputColumnRenamings_(context.GetInputCount()) - , InputColumnFilters_(context.GetInputCount()) - , InputTableDescriptions_(context.GetInputCount()) - , OutputTableDescriptions_(context.GetOutputCount()) -{ } - -TJobOperationPreparer::TInputGroup TJobOperationPreparer::BeginInputGroup(int begin, int end) -{ - Y_ENSURE_EX(begin <= end, TApiUsageError() - << "BeginInputGroup(): begin must not exceed end, got " << begin << ", " << end); - TVector<int> indices; - for (int i = begin; i < end; ++i) { - ValidateInputTableIndex(i, TStringBuf("BeginInputGroup()")); - indices.push_back(i); - } - return TInputGroup(*this, std::move(indices)); -} - - -TJobOperationPreparer::TOutputGroup TJobOperationPreparer::BeginOutputGroup(int begin, int end) -{ - Y_ENSURE_EX(begin <= end, TApiUsageError() - << "BeginOutputGroup(): begin must not exceed end, got " << begin << ", " << end); - TVector<int> indices; - for (int i = begin; i < end; ++i) { - ValidateOutputTableIndex(i, TStringBuf("BeginOutputGroup()")); - indices.push_back(i); - } - return TOutputGroup(*this, std::move(indices)); -} - -TJobOperationPreparer& TJobOperationPreparer::NodeOutput(int tableIndex) -{ - ValidateMissingOutputDescription(tableIndex); - OutputTableDescriptions_[tableIndex] = StructuredTableDescription<TNode>(); - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::OutputSchema(int tableIndex, TTableSchema schema) -{ - ValidateMissingOutputSchema(tableIndex); - OutputSchemas_[tableIndex] = std::move(schema); - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::NoOutputSchema(int tableIndex) -{ - ValidateMissingOutputSchema(tableIndex); - OutputSchemas_[tableIndex] = EmptyNonstrictSchema(); - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::InputColumnRenaming( - int tableIndex, - const THashMap<TString,TString>& renaming) -{ - ValidateInputTableIndex(tableIndex, TStringBuf("InputColumnRenaming()")); - InputColumnRenamings_[tableIndex] = renaming; - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::InputColumnFilter(int tableIndex, const TVector<TString>& columns) -{ - ValidateInputTableIndex(tableIndex, TStringBuf("InputColumnFilter()")); - InputColumnFilters_[tableIndex] = columns; - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::FormatHints(TUserJobFormatHints newFormatHints) -{ - FormatHints_ = newFormatHints; - return *this; -} - -void TJobOperationPreparer::Finish() -{ - FinallyValidate(); -} - -TVector<TTableSchema> TJobOperationPreparer::GetOutputSchemas() -{ - TVector<TTableSchema> result; - result.reserve(OutputSchemas_.size()); - for (auto& schema : OutputSchemas_) { - Y_VERIFY(schema.Defined()); - result.push_back(std::move(*schema)); - schema.Clear(); - } - return result; -} - -void TJobOperationPreparer::FinallyValidate() const -{ - TVector<int> illegallyMissingSchemaIndices; - for (int i = 0; i < static_cast<int>(OutputSchemas_.size()); ++i) { - if (!OutputSchemas_[i]) { - illegallyMissingSchemaIndices.push_back(i); - } - } - if (illegallyMissingSchemaIndices.empty()) { - return; - } - TApiUsageError error; - error << "Output table schemas are missing: "; - for (auto i : illegallyMissingSchemaIndices) { - error << "no. " << i; - if (auto path = Context_.GetInputPath(i)) { - error << "(" << *path << ")"; - } - error << "; "; - } - ythrow std::move(error); -} - -//////////////////////////////////////////////////////////////////////////////// - -void TJobOperationPreparer::ValidateInputTableIndex(int tableIndex, TStringBuf message) const -{ - Y_ENSURE_EX( - 0 <= tableIndex && tableIndex < static_cast<int>(Context_.GetInputCount()), - TApiUsageError() << - message << ": input table index " << tableIndex << " us out of range [0;" << - OutputSchemas_.size() << ")"); -} - -void TJobOperationPreparer::ValidateOutputTableIndex(int tableIndex, TStringBuf message) const -{ - Y_ENSURE_EX( - 0 <= tableIndex && tableIndex < static_cast<int>(Context_.GetOutputCount()), - TApiUsageError() << - message << ": output table index " << tableIndex << " us out of range [0;" << - OutputSchemas_.size() << ")"); -} - -void TJobOperationPreparer::ValidateMissingOutputSchema(int tableIndex) const -{ - ValidateOutputTableIndex(tableIndex, "ValidateMissingOutputSchema()"); - Y_ENSURE_EX(!OutputSchemas_[tableIndex], - TApiUsageError() << - "Output table schema no. " << tableIndex << " " << - "(" << Context_.GetOutputPath(tableIndex).GetOrElse("<unknown path>") << ") " << - "is already set"); -} - -void TJobOperationPreparer::ValidateMissingInputDescription(int tableIndex) const -{ - ValidateInputTableIndex(tableIndex, "ValidateMissingInputDescription()"); - Y_ENSURE_EX(!InputTableDescriptions_[tableIndex], - TApiUsageError() << - "Description for input no. " << tableIndex << " " << - "(" << Context_.GetOutputPath(tableIndex).GetOrElse("<unknown path>") << ") " << - "is already set"); -} - -void TJobOperationPreparer::ValidateMissingOutputDescription(int tableIndex) const -{ - ValidateOutputTableIndex(tableIndex, "ValidateMissingOutputDescription()"); - Y_ENSURE_EX(!OutputTableDescriptions_[tableIndex], - TApiUsageError() << - "Description for output no. " << tableIndex << " " << - "(" << Context_.GetOutputPath(tableIndex).GetOrElse("<unknown path>") << ") " << - "is already set"); -} - -TTableSchema TJobOperationPreparer::EmptyNonstrictSchema() { - return TTableSchema().Strict(false); -} - -//////////////////////////////////////////////////////////////////////////////// - -const TVector<THashMap<TString, TString>>& TJobOperationPreparer::GetInputColumnRenamings() const -{ - return InputColumnRenamings_; -} - -const TVector<TMaybe<TVector<TString>>>& TJobOperationPreparer::GetInputColumnFilters() const -{ - return InputColumnFilters_; -} - -const TVector<TMaybe<TTableStructure>>& TJobOperationPreparer::GetInputDescriptions() const -{ - return InputTableDescriptions_; -} - -const TVector<TMaybe<TTableStructure>>& TJobOperationPreparer::GetOutputDescriptions() const -{ - return OutputTableDescriptions_; -} - -const TUserJobFormatHints& TJobOperationPreparer::GetFormatHints() const -{ - return FormatHints_; -} - -TJobOperationPreparer& TJobOperationPreparer::InputFormatHints(TFormatHints hints) -{ - FormatHints_.InputFormatHints(hints); - return *this; -} - -TJobOperationPreparer& TJobOperationPreparer::OutputFormatHints(TFormatHints hints) -{ - FormatHints_.OutputFormatHints(hints); - return *this; -} - -//////////////////////////////////////////////////////////////////////////////// - -void IJob::PrepareOperation(const IOperationPreparationContext& context, TJobOperationPreparer& resultBuilder) const -{ - for (int i = 0; i < context.GetOutputCount(); ++i) { - resultBuilder.NoOutputSchema(i); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -IOperationPtr IOperationClient::Map( - const TMapOperationSpec& spec, - ::TIntrusivePtr<IMapperBase> mapper, - const TOperationOptions& options) -{ - Y_VERIFY(mapper.Get()); - - return DoMap( - spec, - std::move(mapper), - options); -} - -IOperationPtr IOperationClient::Map( - ::TIntrusivePtr<IMapperBase> mapper, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TMapOperationSpec& spec, - const TOperationOptions& options) -{ - Y_ENSURE_EX(spec.Inputs_.empty(), - TApiUsageError() << "TMapOperationSpec::Inputs MUST be empty"); - Y_ENSURE_EX(spec.Outputs_.empty(), - TApiUsageError() << "TMapOperationSpec::Outputs MUST be empty"); - - auto mapSpec = spec; - for (const auto& inputPath : input.Parts_) { - mapSpec.AddStructuredInput(inputPath); - } - for (const auto& outputPath : output.Parts_) { - mapSpec.AddStructuredOutput(outputPath); - } - return Map(mapSpec, std::move(mapper), options); -} - -IOperationPtr IOperationClient::Reduce( - const TReduceOperationSpec& spec, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options) -{ - Y_VERIFY(reducer.Get()); - - return DoReduce( - spec, - std::move(reducer), - options); -} - -IOperationPtr IOperationClient::Reduce( - ::TIntrusivePtr<IReducerBase> reducer, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TSortColumns& reduceBy, - const TReduceOperationSpec& spec, - const TOperationOptions& options) -{ - Y_ENSURE_EX(spec.Inputs_.empty(), - TApiUsageError() << "TReduceOperationSpec::Inputs MUST be empty"); - Y_ENSURE_EX(spec.Outputs_.empty(), - TApiUsageError() << "TReduceOperationSpec::Outputs MUST be empty"); - Y_ENSURE_EX(spec.ReduceBy_.Parts_.empty(), - TApiUsageError() << "TReduceOperationSpec::ReduceBy MUST be empty"); - - auto reduceSpec = spec; - for (const auto& inputPath : input.Parts_) { - reduceSpec.AddStructuredInput(inputPath); - } - for (const auto& outputPath : output.Parts_) { - reduceSpec.AddStructuredOutput(outputPath); - } - reduceSpec.ReduceBy(reduceBy); - return Reduce(reduceSpec, std::move(reducer), options); -} - -IOperationPtr IOperationClient::JoinReduce( - const TJoinReduceOperationSpec& spec, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options) -{ - Y_VERIFY(reducer.Get()); - - return DoJoinReduce( - spec, - std::move(reducer), - options); -} - -IOperationPtr IOperationClient::MapReduce( - const TMapReduceOperationSpec& spec, - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options) -{ - Y_VERIFY(reducer.Get()); - - return DoMapReduce( - spec, - std::move(mapper), - nullptr, - std::move(reducer), - options); -} - -IOperationPtr IOperationClient::MapReduce( - const TMapReduceOperationSpec& spec, - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reduceCombiner, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options) -{ - Y_VERIFY(reducer.Get()); - - return DoMapReduce( - spec, - std::move(mapper), - std::move(reduceCombiner), - std::move(reducer), - options); -} - -IOperationPtr IOperationClient::MapReduce( - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reducer, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TSortColumns& reduceBy, - TMapReduceOperationSpec spec, - const TOperationOptions& options) -{ - Y_ENSURE_EX(spec.Inputs_.empty(), - TApiUsageError() << "TMapReduceOperationSpec::Inputs MUST be empty"); - Y_ENSURE_EX(spec.Outputs_.empty(), - TApiUsageError() << "TMapReduceOperationSpec::Outputs MUST be empty"); - Y_ENSURE_EX(spec.ReduceBy_.Parts_.empty(), - TApiUsageError() << "TMapReduceOperationSpec::ReduceBy MUST be empty"); - - for (const auto& inputPath : input.Parts_) { - spec.AddStructuredInput(inputPath); - } - for (const auto& outputPath : output.Parts_) { - spec.AddStructuredOutput(outputPath); - } - spec.ReduceBy(reduceBy); - return MapReduce(spec, std::move(mapper), std::move(reducer), options); -} - -IOperationPtr IOperationClient::MapReduce( - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reduceCombiner, - ::TIntrusivePtr<IReducerBase> reducer, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TSortColumns& reduceBy, - TMapReduceOperationSpec spec, - const TOperationOptions& options) -{ - Y_ENSURE_EX(spec.Inputs_.empty(), - TApiUsageError() << "TMapReduceOperationSpec::Inputs MUST be empty"); - Y_ENSURE_EX(spec.Outputs_.empty(), - TApiUsageError() << "TMapReduceOperationSpec::Outputs MUST be empty"); - Y_ENSURE_EX(spec.ReduceBy_.Parts_.empty(), - TApiUsageError() << "TMapReduceOperationSpec::ReduceBy MUST be empty"); - - for (const auto& inputPath : input.Parts_) { - spec.AddStructuredInput(inputPath); - } - for (const auto& outputPath : output.Parts_) { - spec.AddStructuredOutput(outputPath); - } - spec.ReduceBy(reduceBy); - return MapReduce(spec, std::move(mapper), std::move(reduceCombiner), std::move(reducer), options); -} - -IOperationPtr IOperationClient::Sort( - const TOneOrMany<TRichYPath>& input, - const TRichYPath& output, - const TSortColumns& sortBy, - const TSortOperationSpec& spec, - const TOperationOptions& options) -{ - Y_ENSURE_EX(spec.Inputs_.empty(), - TApiUsageError() << "TSortOperationSpec::Inputs MUST be empty"); - Y_ENSURE_EX(spec.Output_.Path_.empty(), - TApiUsageError() << "TSortOperationSpec::Output MUST be empty"); - Y_ENSURE_EX(spec.SortBy_.Parts_.empty(), - TApiUsageError() << "TSortOperationSpec::SortBy MUST be empty"); - - auto sortSpec = spec; - for (const auto& inputPath : input.Parts_) { - sortSpec.AddInput(inputPath); - } - sortSpec.Output(output); - sortSpec.SortBy(sortBy); - return Sort(sortSpec, options); -} - -//////////////////////////////////////////////////////////////////////////////// - -TRawTableReaderPtr IStructuredJob::CreateCustomRawJobReader(int) const -{ - return nullptr; -} - -THolder<IProxyOutput> IStructuredJob::CreateCustomRawJobWriter(size_t) const -{ - return nullptr; -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/operation.h b/yt/cpp/mapreduce/interface/operation.h deleted file mode 100644 index 171a7e4af79..00000000000 --- a/yt/cpp/mapreduce/interface/operation.h +++ /dev/null @@ -1,3494 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/operation.h -/// -/// Header containing interface to run operations in YT -/// and retrieve information about them. -/// @see [the doc](https://yt.yandex-team.ru/docs/description/mr/map_reduce_overview.html). - -#include "client_method_options.h" -#include "errors.h" -#include "io.h" -#include "job_statistics.h" -#include "job_counters.h" - -#include <library/cpp/threading/future/future.h> -#include <library/cpp/type_info/type_info.h> - -#include <util/datetime/base.h> -#include <util/generic/variant.h> -#include <util/generic/vector.h> -#include <util/generic/maybe.h> -#include <util/system/file.h> -#include <util/system/types.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// Tag class marking that the row type for table is not specified. -struct TUnspecifiedTableStructure -{ }; - -/// Tag class marking that table rows have protobuf type. -struct TProtobufTableStructure -{ - /// @brief Descriptor of the protobuf type of table rows. - /// - /// @note If table is tagged with @ref ::google::protobuf::Message instead of real proto class - /// this descriptor might be null. - const ::google::protobuf::Descriptor* Descriptor = nullptr; -}; - - -/// Tag class to specify table row type. -using TTableStructure = std::variant< - TUnspecifiedTableStructure, - TProtobufTableStructure ->; - -bool operator==(const TUnspecifiedTableStructure&, const TUnspecifiedTableStructure&); -bool operator==(const TProtobufTableStructure& lhs, const TProtobufTableStructure& rhs); - -/// Table path marked with @ref NYT::TTableStructure tag. -struct TStructuredTablePath -{ - TStructuredTablePath(TRichYPath richYPath = TRichYPath(), TTableStructure description = TUnspecifiedTableStructure()) - : RichYPath(std::move(richYPath)) - , Description(std::move(description)) - { } - - TStructuredTablePath(TRichYPath richYPath, const ::google::protobuf::Descriptor* descriptor) - : RichYPath(std::move(richYPath)) - , Description(TProtobufTableStructure({descriptor})) - { } - - TStructuredTablePath(TYPath path) - : RichYPath(std::move(path)) - , Description(TUnspecifiedTableStructure()) - { } - - TStructuredTablePath(const char* path) - : RichYPath(path) - , Description(TUnspecifiedTableStructure()) - { } - - TRichYPath RichYPath; - TTableStructure Description; -}; - -/// Create marked table path from row type. -template <typename TRow> -TStructuredTablePath Structured(TRichYPath richYPath); - -/// Create tag class from row type. -template <typename TRow> -TTableStructure StructuredTableDescription(); - -/////////////////////////////////////////////////////////////////////////////// - -/// Tag class marking that row stream is empty. -struct TVoidStructuredRowStream -{ }; - -/// Tag class marking that row stream consists of `NYT::TNode`. -struct TTNodeStructuredRowStream -{ }; - -/// Tag class marking that row stream consists of @ref NYT::TYaMRRow. -struct TTYaMRRowStructuredRowStream -{ }; - -/// Tag class marking that row stream consists of protobuf rows of given type. -struct TProtobufStructuredRowStream -{ - /// @brief Descriptor of the protobuf type of table rows. - /// - /// @note If `Descriptor` is nullptr, then row stream consists of multiple message types. - const ::google::protobuf::Descriptor* Descriptor = nullptr; -}; - -/// Tag class to specify type of rows in an operation row stream -using TStructuredRowStreamDescription = std::variant< - TVoidStructuredRowStream, - TTNodeStructuredRowStream, - TTYaMRRowStructuredRowStream, - TProtobufStructuredRowStream ->; - -/////////////////////////////////////////////////////////////////////////////// - -/// Tag class marking that current binary should be used in operation. -struct TJobBinaryDefault -{ }; - -/// Tag class marking that binary from specified local path should be used in operation. -struct TJobBinaryLocalPath -{ - TString Path; - TMaybe<TString> MD5CheckSum; -}; - -/// Tag class marking that binary from specified Cypress path should be used in operation. -struct TJobBinaryCypressPath -{ - TYPath Path; - TMaybe<TTransactionId> TransactionId; -}; - -//////////////////////////////////////////////////////////////////////////////// - - -/// @cond Doxygen_Suppress -namespace NDetail { - extern i64 OutputTableCount; -} // namespace NDetail -/// @endcond - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Auto merge mode. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/automerge -enum class EAutoMergeMode -{ - /// Auto merge is disabled. - Disabled /* "disabled" */, - - /// Mode that tries to achieve good chunk sizes and doesn't limit usage of chunk quota for intermediate chunks. - Relaxed /* "relaxed" */, - - /// Mode that tries to optimize usage of chunk quota for intermediate chunks, operation might run slower. - Economy /* "economy" */, - - /// - /// @brief Manual configuration of automerge parameters. - /// - /// @ref TAutoMergeSpec - Manual /* "manual" */, -}; - -/// -/// @brief Options for auto merge operation stage. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/automerge -class TAutoMergeSpec -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TAutoMergeSpec; - /// @endcond - - /// Mode of the auto merge. - FLUENT_FIELD_OPTION(EAutoMergeMode, Mode); - - /// @brief Upper limit for number of intermediate chunks. - /// - /// Works only for Manual mode. - FLUENT_FIELD_OPTION(i64, MaxIntermediateChunkCount); - - /// @brief Number of chunks limit to merge in one job. - /// - /// Works only for Manual mode. - FLUENT_FIELD_OPTION(i64, ChunkCountPerMergeJob); - - /// @brief Automerge will not merge chunks that are larger than `DesiredChunkSize * (ChunkSizeThreshold / 100.)` - /// - /// Works only for Manual mode. - FLUENT_FIELD_OPTION(i64, ChunkSizeThreshold); -}; - -/// Base for operations with auto merge options. -template <class TDerived> -class TWithAutoMergeSpec -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// @brief Options for auto merge operation stage. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/automerge - FLUENT_FIELD_OPTION(TAutoMergeSpec, AutoMerge); -}; - -/// -/// @brief Resources controlled by scheduler and used by running operations. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/scheduler/scheduler_and_pools#resursy -class TSchedulerResources -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TSchedulerResources; - /// @endcond - - /// Each job consumes exactly one user slot. - FLUENT_FIELD_OPTION_ENCAPSULATED(i64, UserSlots); - - /// Number of (virtual) cpu cores consumed by all jobs. - FLUENT_FIELD_OPTION_ENCAPSULATED(i64, Cpu); - - /// Amount of memory in bytes. - FLUENT_FIELD_OPTION_ENCAPSULATED(i64, Memory); -}; - -/// Base for input format hints of a user job. -template <class TDerived> -class TUserJobInputFormatHintsBase -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// @brief Fine tune input format of the job. - FLUENT_FIELD_OPTION(TFormatHints, InputFormatHints); -}; - -/// Base for output format hints of a user job. -template <class TDerived> -class TUserJobOutputFormatHintsBase -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// @brief Fine tune output format of the job. - FLUENT_FIELD_OPTION(TFormatHints, OutputFormatHints); -}; - -/// Base for format hints of a user job. -template <class TDerived> -class TUserJobFormatHintsBase - : public TUserJobInputFormatHintsBase<TDerived> - , public TUserJobOutputFormatHintsBase<TDerived> -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond -}; - -/// User job format hints. -class TUserJobFormatHints - : public TUserJobFormatHintsBase<TUserJobFormatHints> -{ }; - -/// Spec of input and output tables of a raw operation. -template <class TDerived> -class TRawOperationIoTableSpec -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// Add input table path to input path list. - TDerived& AddInput(const TRichYPath& path); - - /// Set input table path no. `tableIndex`. - TDerived& SetInput(size_t tableIndex, const TRichYPath& path); - - /// Add output table path to output path list. - TDerived& AddOutput(const TRichYPath& path); - - /// Set output table path no. `tableIndex`. - TDerived& SetOutput(size_t tableIndex, const TRichYPath& path); - - /// Get all input table paths. - const TVector<TRichYPath>& GetInputs() const; - - /// Get all output table paths. - const TVector<TRichYPath>& GetOutputs() const; - -private: - TVector<TRichYPath> Inputs_; - TVector<TRichYPath> Outputs_; -}; - -/// Base spec for IO in "simple" raw operations (Map, Reduce etc.). -template <class TDerived> -struct TSimpleRawOperationIoSpec - : public TRawOperationIoTableSpec<TDerived> -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// @brief Describes format for both input and output. - /// - /// @note `Format' is overriden by `InputFormat' and `OutputFormat'. - FLUENT_FIELD_OPTION(TFormat, Format); - - /// Describes input format. - FLUENT_FIELD_OPTION(TFormat, InputFormat); - - /// Describes output format. - FLUENT_FIELD_OPTION(TFormat, OutputFormat); -}; - -/// Spec for IO in MapReduce operation. -template <class TDerived> -class TRawMapReduceOperationIoSpec - : public TRawOperationIoTableSpec<TDerived> -{ -public: - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// @brief Describes format for both input and output of mapper. - /// - /// @note `MapperFormat' is overriden by `MapperInputFormat' and `MapperOutputFormat'. - FLUENT_FIELD_OPTION(TFormat, MapperFormat); - - /// Describes mapper input format. - FLUENT_FIELD_OPTION(TFormat, MapperInputFormat); - - /// Describes mapper output format. - FLUENT_FIELD_OPTION(TFormat, MapperOutputFormat); - - /// @brief Describes format for both input and output of reduce combiner. - /// - /// @note `ReduceCombinerFormat' is overriden by `ReduceCombinerInputFormat' and `ReduceCombinerOutputFormat'. - FLUENT_FIELD_OPTION(TFormat, ReduceCombinerFormat); - - /// Describes reduce combiner input format. - FLUENT_FIELD_OPTION(TFormat, ReduceCombinerInputFormat); - - /// Describes reduce combiner output format. - FLUENT_FIELD_OPTION(TFormat, ReduceCombinerOutputFormat); - - /// @brief Describes format for both input and output of reducer. - /// - /// @note `ReducerFormat' is overriden by `ReducerInputFormat' and `ReducerOutputFormat'. - FLUENT_FIELD_OPTION(TFormat, ReducerFormat); - - /// Describes reducer input format. - FLUENT_FIELD_OPTION(TFormat, ReducerInputFormat); - - /// Describes reducer output format. - FLUENT_FIELD_OPTION(TFormat, ReducerOutputFormat); - - /// Add direct map output table path. - TDerived& AddMapOutput(const TRichYPath& path); - - /// Set direct map output table path no. `tableIndex`. - TDerived& SetMapOutput(size_t tableIndex, const TRichYPath& path); - - /// Get all direct map output table paths - const TVector<TRichYPath>& GetMapOutputs() const; - -private: - TVector<TRichYPath> MapOutputs_; -}; - -/// -/// @brief Base spec of operations with input tables. -class TOperationInputSpecBase -{ -public: - template <class T, class = void> - struct TFormatAdder; - - /// - /// @brief Add input table path to input path list and specify type of rows. - template <class T> - void AddInput(const TRichYPath& path); - - /// - /// @brief Add input table path as structured paths. - void AddStructuredInput(TStructuredTablePath path); - - /// - /// @brief Set input table path and type. - template <class T> - void SetInput(size_t tableIndex, const TRichYPath& path); - - /// - /// @brief All input paths. - TVector<TRichYPath> Inputs_; - - /// - /// @brief Get all input structured paths. - const TVector<TStructuredTablePath>& GetStructuredInputs() const; - -private: - TVector<TStructuredTablePath> StructuredInputs_; - friend struct TOperationIOSpecBase; - template <class T> - friend struct TOperationIOSpec; -}; - -/// -/// @brief Base spec of operations with output tables. -class TOperationOutputSpecBase -{ -public: - template <class T, class = void> - struct TFormatAdder; - - /// - /// @brief Add output table path to output path list and specify type of rows. - template <class T> - void AddOutput(const TRichYPath& path); - - /// - /// @brief Add output table path as structured paths. - void AddStructuredOutput(TStructuredTablePath path); - - /// - /// @brief Set output table path and type. - template <class T> - void SetOutput(size_t tableIndex, const TRichYPath& path); - - /// - /// @brief All output paths. - TVector<TRichYPath> Outputs_; - - /// - /// @brief Get all output structured paths. - const TVector<TStructuredTablePath>& GetStructuredOutputs() const; - -private: - TVector<TStructuredTablePath> StructuredOutputs_; - friend struct TOperationIOSpecBase; - template <class T> - friend struct TOperationIOSpec; -}; - -/// -/// @brief Base spec for operations with inputs and outputs. -struct TOperationIOSpecBase - : public TOperationInputSpecBase - , public TOperationOutputSpecBase -{ }; - -/// -/// @brief Base spec for operations with inputs and outputs. -template <class TDerived> -struct TOperationIOSpec - : public TOperationIOSpecBase -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - template <class T> - TDerived& AddInput(const TRichYPath& path); - - TDerived& AddStructuredInput(TStructuredTablePath path); - - template <class T> - TDerived& SetInput(size_t tableIndex, const TRichYPath& path); - - template <class T> - TDerived& AddOutput(const TRichYPath& path); - - TDerived& AddStructuredOutput(TStructuredTablePath path); - - template <class T> - TDerived& SetOutput(size_t tableIndex, const TRichYPath& path); - - - // DON'T USE THESE METHODS! They are left solely for backward compatibility. - // These methods are the only way to do equivalent of (Add/Set)(Input/Output)<Message> - // but please consider using (Add/Set)(Input/Output)<TConcreteMessage> - // (where TConcreteMessage is some descendant of Message) - // because they are faster and better (see https://st.yandex-team.ru/YT-6967) - TDerived& AddProtobufInput_VerySlow_Deprecated(const TRichYPath& path); - TDerived& AddProtobufOutput_VerySlow_Deprecated(const TRichYPath& path); -}; - -/// -/// @brief Base spec for all operations. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/operations_options -template <class TDerived> -struct TOperationSpecBase -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// - /// @brief Limit on operation execution time. - /// - /// If operation doesn't finish in time it will be aborted. - FLUENT_FIELD_OPTION(TDuration, TimeLimit); - - /// @brief Title to be shown in web interface. - FLUENT_FIELD_OPTION(TString, Title); - - /// @brief Pool to be used for this operation. - FLUENT_FIELD_OPTION(TString, Pool); - - /// @brief Weight of operation. - /// - /// Coefficient defining how much resources operation gets relative to its siblings in the same pool. - FLUENT_FIELD_OPTION(double, Weight); - - /// @breif Pool tree list that operation will use. - FLUENT_OPTIONAL_VECTOR_FIELD_ENCAPSULATED(TString, PoolTree); - - /// How much resources can be consumed by operation. - FLUENT_FIELD_OPTION_ENCAPSULATED(TSchedulerResources, ResourceLimits); -}; - -/// -/// @brief Base spec for all operations with user jobs. -template <class TDerived> -struct TUserOperationSpecBase - : TOperationSpecBase<TDerived> -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// How many jobs can fail before operation is failed. - FLUENT_FIELD_OPTION(ui64, MaxFailedJobCount); - - /// On any unsuccessful job completion (i.e. abortion or failure) force the whole operation to fail. - FLUENT_FIELD_OPTION(bool, FailOnJobRestart); - - /// - /// @brief Table to save whole stderr of operation. - /// - /// @see https://clubs.at.yandex-team.ru/yt/1045 - FLUENT_FIELD_OPTION(TYPath, StderrTablePath); - - /// - /// @brief Table to save coredumps of operation. - /// - /// @see https://clubs.at.yandex-team.ru/yt/1045 - FLUENT_FIELD_OPTION(TYPath, CoreTablePath); - - /// - /// @brief How long should the scheduler wait for the job to be started on a node. - /// - /// When you run huge jobs that require preemption of all the other jobs on - /// a node, the default timeout might be insufficient and your job may be - /// aborted with 'waiting_timeout' reason. This is especially problematic - /// when you are setting 'FailOnJobRestart' option. - /// - /// @note The value must be between 10 seconds and 10 minutes. - FLUENT_FIELD_OPTION(TDuration, WaitingJobTimeout); -}; - -/// -/// @brief Class to provide information on intermediate mapreduce stream protobuf types. -/// -/// When using protobuf format it is important to know exact types of proto messages -/// that are used in input/output. -/// -/// Sometimes such messages cannot be derived from job class -/// i.e. when job class uses `NYT::TTableReader<::google::protobuf::Message>` -/// or `NYT::TTableWriter<::google::protobuf::Message>`. -/// -/// When using such jobs user can provide exact message type using this class. -/// -/// @note Only input/output that relate to intermediate tables can be hinted. -/// Input to map and output of reduce is derived from `AddInput`/`AddOutput`. -template <class TDerived> -struct TIntermediateTablesHintSpec -{ - /// Specify intermediate map output type. - template <class T> - TDerived& HintMapOutput(); - - /// Specify reduce combiner input. - template <class T> - TDerived& HintReduceCombinerInput(); - - /// Specify reduce combiner output. - template <class T> - TDerived& HintReduceCombinerOutput(); - - /// Specify reducer input. - template <class T> - TDerived& HintReduceInput(); - - /// - /// @brief Add output of map stage. - /// - /// Mapper output table #0 is always intermediate table that is going to be reduced later. - /// Rows that mapper write to tables #1, #2, ... are saved in MapOutput tables. - template <class T> - TDerived& AddMapOutput(const TRichYPath& path); - - TVector<TRichYPath> MapOutputs_; - - const TVector<TStructuredTablePath>& GetStructuredMapOutputs() const; - const TMaybe<TTableStructure>& GetIntermediateMapOutputDescription() const; - const TMaybe<TTableStructure>& GetIntermediateReduceCombinerInputDescription() const; - const TMaybe<TTableStructure>& GetIntermediateReduceCombinerOutputDescription() const; - const TMaybe<TTableStructure>& GetIntermediateReducerInputDescription() const; - -private: - TVector<TStructuredTablePath> StructuredMapOutputs_; - TMaybe<TTableStructure> IntermediateMapOutputDescription_; - TMaybe<TTableStructure> IntermediateReduceCombinerInputDescription_; - TMaybe<TTableStructure> IntermediateReduceCombinerOutputDescription_; - TMaybe<TTableStructure> IntermediateReducerInputDescription_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -struct TAddLocalFileOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TAddLocalFileOptions; - /// @endcond - - /// - /// @brief Path by which job will see the uploaded file. - /// - /// Defaults to basename of the local path. - FLUENT_FIELD_OPTION(TString, PathInJob); - - /// - /// @brief MD5 checksum of uploaded file. - /// - /// If not specified it is computed by this library. - /// If this argument is provided, the user can some cpu and disk IO. - FLUENT_FIELD_OPTION(TString, MD5CheckSum); - - /// - /// @brief Do not put file into node cache - /// - /// @see NYT::TRichYPath::BypassArtifactCache - FLUENT_FIELD_OPTION(bool, BypassArtifactCache); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// @brief Binary to run job profiler on. -enum class EProfilingBinary -{ - /// Profile job proxy. - JobProxy /* "job_proxy" */, - - /// Profile user job. - UserJob /* "user_job" */, -}; - -/// @brief Type of job profiler. -enum class EProfilerType -{ - /// Profile CPU usage. - Cpu /* "cpu" */, - - /// Profile memory usage. - Memory /* "memory" */, - - /// Profiler peak memory usage. - PeakMemory /* "peak_memory" */, -}; - -/// @brief Specifies a job profiler. -struct TJobProfilerSpec -{ - /// @cond Doxygen_Suppress - using TSelf = TJobProfilerSpec; - /// @endcond - - /// @brief Binary to profile. - FLUENT_FIELD_OPTION(EProfilingBinary, ProfilingBinary); - - /// @brief Type of the profiler. - FLUENT_FIELD_OPTION(EProfilerType, ProfilerType); - - /// @brief Probabiliy of the job being selected for profiling. - FLUENT_FIELD_OPTION(double, ProfilingProbability); - - /// @brief For sampling profilers, sets the number of samples per second. - FLUENT_FIELD_OPTION(int, SamplingFrequency); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Spec of user job. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/operations_options#user_script_options -struct TUserJobSpec -{ - /// @cond Doxygen_Suppress - using TSelf = TUserJobSpec; - /// @endcond - - /// - /// @brief Specify a local file to upload to Cypress and prepare for use in job. - TSelf& AddLocalFile(const TLocalFilePath& path, const TAddLocalFileOptions& options = TAddLocalFileOptions()); - - /// - /// @brief Get the list of all added local files. - TVector<std::tuple<TLocalFilePath, TAddLocalFileOptions>> GetLocalFiles() const; - - /// @brief Paths to files in Cypress to use in job. - FLUENT_VECTOR_FIELD(TRichYPath, File); - - /// - /// @brief MemoryLimit specifies how much memory job process can use. - /// - /// @note - /// If job uses tmpfs (check @ref NYT::TOperationOptions::MountSandboxInTmpfs) - /// YT computes its memory usage as total of: - /// - memory usage of job process itself (including mapped files); - /// - total size of tmpfs used by this job. - /// - /// @note - /// When @ref NYT::TOperationOptions::MountSandboxInTmpfs is enabled library will compute - /// total size of all files used by this job and add this total size to MemoryLimit. - /// Thus you shouldn't include size of your files (e.g. binary file) into MemoryLimit. - /// - /// @note - /// Final memory memory_limit passed to YT is calculated as follows: - /// - /// @note - /// ``` - /// memory_limit = MemoryLimit + <total-size-of-used-files> + ExtraTmpfsSize - /// ``` - /// - /// @see NYT::TUserJobSpec::ExtraTmpfsSize - FLUENT_FIELD_OPTION(i64, MemoryLimit); - - /// - /// @brief Size of data that is going to be written to tmpfs. - /// - /// This option should be used if job writes data to tmpfs. - /// - /// ExtraTmpfsSize should not include size of files specified with - /// @ref NYT::TUserJobSpec::AddLocalFile or @ref NYT::TUserJobSpec::AddFile - /// These files are copied to tmpfs automatically and their total size - /// is computed automatically. - /// - /// @see NYT::TOperationOptions::MountSandboxInTmpfs - /// @see NYT::TUserJobSpec::MemoryLimit - FLUENT_FIELD_OPTION(i64, ExtraTmpfsSize); - - /// - /// @brief Maximum number of CPU cores for a single job to use. - FLUENT_FIELD_OPTION(double, CpuLimit); - - /// - /// @brief Fraction of @ref NYT::TUserJobSpec::MemoryLimit that job gets at start. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/operations_options#memory_reserve_factor - FLUENT_FIELD_OPTION(double, MemoryReserveFactor); - - /// - /// @brief Local path to executable to be used inside jobs. - //// - /// Provided executable must use C++ YT API library (this library) - /// and implement job class that is going to be used. - /// - /// This option might be useful if we want to start operation from nonlinux machines - /// (in that case we use `JobBinary` to provide path to the same program compiled for linux). - /// Other example of using this option is uploading executable to cypress in advance - /// and save the time required to upload current executable to cache. - /// `md5` argument can be used to save cpu time and disk IO when binary MD5 checksum is known. - /// When argument is not provided library will compute it itself. - TUserJobSpec& JobBinaryLocalPath(TString path, TMaybe<TString> md5 = Nothing()); - - /// - /// @brief Cypress path to executable to be used inside jobs. - TUserJobSpec& JobBinaryCypressPath(TString path, TMaybe<TTransactionId> transactionId = Nothing()); - - /// - /// @brief String that will be prepended to the command. - /// - /// This option overrides @ref NYT::TOperationOptions::JobCommandPrefix. - FLUENT_FIELD(TString, JobCommandPrefix); - - /// - /// @brief String that will be appended to the command. - /// - /// This option overrides @ref NYT::TOperationOptions::JobCommandSuffix. - FLUENT_FIELD(TString, JobCommandSuffix); - - /// - /// @brief Map of environment variables that will be set for jobs. - FLUENT_MAP_FIELD(TString, TString, Environment); - - /// - /// @brief Limit for all files inside job sandbox (in bytes). - FLUENT_FIELD_OPTION(ui64, DiskSpaceLimit); - - /// - /// @brief Number of ports reserved for the job (passed through environment in YT_PORT_0, YT_PORT_1, ...). - FLUENT_FIELD_OPTION(ui16, PortCount); - - /// - /// @brief Network project used to isolate job network. - FLUENT_FIELD_OPTION(TString, NetworkProject); - - /// - /// @brief Limit on job execution time. - /// - /// Jobs that exceed this limit will be considered failed. - FLUENT_FIELD_OPTION(TDuration, JobTimeLimit); - - /// - /// @brief Get job binary config. - const TJobBinaryConfig& GetJobBinary() const; - - /// - /// @brief List of profilers to run. - FLUENT_VECTOR_FIELD(TJobProfilerSpec, JobProfiler); - -private: - TVector<std::tuple<TLocalFilePath, TAddLocalFileOptions>> LocalFiles_; - TJobBinaryConfig JobBinary_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Spec of Map operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/map -template <typename TDerived> -struct TMapOperationSpecBase - : public TUserOperationSpecBase<TDerived> - , public TWithAutoMergeSpec<TDerived> -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// - /// @brief Spec of mapper job. - FLUENT_FIELD(TUserJobSpec, MapperSpec); - - /// - /// @brief Whether to guarantee the order of rows passed to mapper matches the order in the table. - /// - /// When `Ordered' is false (by default), there is no guaranties about order of reading rows. - /// In this case mapper might work slightly faster because row delivered from fast node can be processed YT waits - /// response from slow nodes. - /// When `Ordered' is true, rows will come in order in which they are stored in input tables. - FLUENT_FIELD_OPTION(bool, Ordered); - - /// - /// @brief Recommended number of jobs to run. - /// - /// `JobCount' has higher priority than @ref NYT::TMapOperationSpecBase::DataSizePerJob. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui32, JobCount); - - /// - /// @brief Recommended of data size for each job. - /// - /// `DataSizePerJob` has lower priority that @ref NYT::TMapOperationSpecBase::JobCount. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui64, DataSizePerJob); -}; - -/// -/// @brief Spec of Map operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/map -struct TMapOperationSpec - : public TMapOperationSpecBase<TMapOperationSpec> - , public TOperationIOSpec<TMapOperationSpec> - , public TUserJobFormatHintsBase<TMapOperationSpec> -{ }; - -/// -/// @brief Spec of raw Map operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/map -struct TRawMapOperationSpec - : public TMapOperationSpecBase<TRawMapOperationSpec> - , public TSimpleRawOperationIoSpec<TRawMapOperationSpec> -{ }; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Spec of Reduce operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/reduce -template <typename TDerived> -struct TReduceOperationSpecBase - : public TUserOperationSpecBase<TDerived> - , public TWithAutoMergeSpec<TDerived> -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// - /// @brief Spec of reduce job. - FLUENT_FIELD(TUserJobSpec, ReducerSpec); - - /// - /// @brief Columns to sort rows by (must include `ReduceBy` as prefix). - FLUENT_FIELD(TSortColumns, SortBy); - - /// - /// @brief Columns to group rows by. - FLUENT_FIELD(TSortColumns, ReduceBy); - - /// - /// @brief Columns to join foreign tables by (must be prefix of `ReduceBy`). - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables - FLUENT_FIELD_OPTION(TSortColumns, JoinBy); - - /// - /// @brief Guarantee to feed all rows with same `ReduceBy` columns to a single job (`true` by default). - FLUENT_FIELD_OPTION(bool, EnableKeyGuarantee); - - /// - /// @brief Recommended number of jobs to run. - /// - /// `JobCount' has higher priority than @ref NYT::TReduceOperationSpecBase::DataSizePerJob. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui32, JobCount); - - /// - /// @brief Recommended of data size for each job. - /// - /// `DataSizePerJob` has lower priority that @ref NYT::TReduceOperationSpecBase::JobCount. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui64, DataSizePerJob); -}; - -/// -/// @brief Spec of Reduce operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/reduce -struct TReduceOperationSpec - : public TReduceOperationSpecBase<TReduceOperationSpec> - , public TOperationIOSpec<TReduceOperationSpec> - , public TUserJobFormatHintsBase<TReduceOperationSpec> -{ }; - -/// -/// @brief Spec of raw Reduce operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/reduce -struct TRawReduceOperationSpec - : public TReduceOperationSpecBase<TRawReduceOperationSpec> - , public TSimpleRawOperationIoSpec<TRawReduceOperationSpec> -{ }; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Spec of JoinReduce operation. -/// -/// @deprecated Instead the user should run a reduce operation -/// with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false`. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables -template <typename TDerived> -struct TJoinReduceOperationSpecBase - : public TUserOperationSpecBase<TDerived> -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// - /// @brief Spec of reduce job. - FLUENT_FIELD(TUserJobSpec, ReducerSpec); - - /// - /// @brief Columns to join foreign tables by (must be prefix of `ReduceBy`). - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables - FLUENT_FIELD(TSortColumns, JoinBy); - - /// - /// @brief Recommended number of jobs to run. - /// - /// `JobCount' has higher priority than @ref NYT::TJoinReduceOperationSpecBase::DataSizePerJob. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui32, JobCount); - - /// - /// @brief Recommended of data size for each job. - /// - /// `DataSizePerJob` has lower priority that @ref NYT::TJoinReduceOperationSpecBase::JobCount. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui64, DataSizePerJob); -}; - -/// -/// @brief Spec of JoinReduce operation. -/// -/// @deprecated Instead the user should run a reduce operation -/// with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false`. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables -struct TJoinReduceOperationSpec - : public TJoinReduceOperationSpecBase<TJoinReduceOperationSpec> - , public TOperationIOSpec<TJoinReduceOperationSpec> - , public TUserJobFormatHintsBase<TJoinReduceOperationSpec> -{ }; - -/// -/// @brief Spec of raw JoinReduce operation. -/// -/// @deprecated Instead the user should run a reduce operation -/// with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false`. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/reduce#foreign_tables -struct TRawJoinReduceOperationSpec - : public TJoinReduceOperationSpecBase<TRawJoinReduceOperationSpec> - , public TSimpleRawOperationIoSpec<TRawJoinReduceOperationSpec> -{ }; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Spec of MapReduce operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce -template <typename TDerived> -struct TMapReduceOperationSpecBase - : public TUserOperationSpecBase<TDerived> -{ - /// @cond Doxygen_Suppress - using TSelf = TDerived; - /// @endcond - - /// - /// @brief Spec of map job. - FLUENT_FIELD(TUserJobSpec, MapperSpec); - - /// - /// @brief Spec of reduce job. - FLUENT_FIELD(TUserJobSpec, ReducerSpec); - - /// - /// @brief Spec of reduce combiner. - FLUENT_FIELD(TUserJobSpec, ReduceCombinerSpec); - - /// - /// @brief Columns to sort rows by (must include `ReduceBy` as prefix). - FLUENT_FIELD(TSortColumns, SortBy); - - /// - /// @brief Columns to group rows by. - FLUENT_FIELD(TSortColumns, ReduceBy); - - /// - /// @brief Recommended number of map jobs to run. - /// - /// `JobCount' has higher priority than @ref NYT::TMapReduceOperationSpecBase::DataSizePerMapJob. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui32, MapJobCount); - - /// - /// @brief Recommended of data size for each map job. - /// - /// `DataSizePerJob` has lower priority that @ref NYT::TMapReduceOperationSpecBase::MapJobCount. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui64, DataSizePerMapJob); - - /// - /// @brief Recommended number of intermediate data partitions. - FLUENT_FIELD_OPTION(ui64, PartitionCount); - - /// - /// @brief Recommended size of intermediate data partitions. - FLUENT_FIELD_OPTION(ui64, PartitionDataSize); - - /// - /// @brief Account to use for intermediate data. - FLUENT_FIELD_OPTION(TString, IntermediateDataAccount); - - /// - /// @brief Replication factor for intermediate data (1 by default). - FLUENT_FIELD_OPTION(ui64, IntermediateDataReplicationFactor); - - /// - /// @brief Recommended size of data to be passed to a single reduce combiner. - FLUENT_FIELD_OPTION(ui64, DataSizePerSortJob); - - /// - /// @brief Whether to guarantee the order of rows passed to mapper matches the order in the table. - /// - /// @see @ref NYT::TMapOperationSpec::Ordered for more info. - FLUENT_FIELD_OPTION(bool, Ordered); - - /// - /// @brief Guarantee to run reduce combiner before reducer. - FLUENT_FIELD_OPTION(bool, ForceReduceCombiners); -}; - -/// -/// @brief Spec of MapReduce operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce -struct TMapReduceOperationSpec - : public TMapReduceOperationSpecBase<TMapReduceOperationSpec> - , public TOperationIOSpec<TMapReduceOperationSpec> - , public TIntermediateTablesHintSpec<TMapReduceOperationSpec> -{ - /// @cond Doxygen_Suppress - using TSelf = TMapReduceOperationSpec; - /// @endcond - - /// - /// @brief Format hints for mapper. - FLUENT_FIELD_DEFAULT(TUserJobFormatHints, MapperFormatHints, TUserJobFormatHints()); - - /// - /// @brief Format hints for reducer. - FLUENT_FIELD_DEFAULT(TUserJobFormatHints, ReducerFormatHints, TUserJobFormatHints()); - - /// - /// @brief Format hints for reduce combiner. - FLUENT_FIELD_DEFAULT(TUserJobFormatHints, ReduceCombinerFormatHints, TUserJobFormatHints()); -}; - -/// -/// @brief Spec of raw MapReduce operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce -struct TRawMapReduceOperationSpec - : public TMapReduceOperationSpecBase<TRawMapReduceOperationSpec> - , public TRawMapReduceOperationIoSpec<TRawMapReduceOperationSpec> -{ }; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Schema inference mode. -/// -/// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference -enum class ESchemaInferenceMode : int -{ - FromInput /* "from_input" */, - FromOutput /* "from_output" */, - Auto /* "auto" */, -}; - -/// -/// @brief Spec of Sort operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/sort -struct TSortOperationSpec - : TOperationSpecBase<TSortOperationSpec> -{ - /// @cond Doxygen_Suppress - using TSelf = TSortOperationSpec; - /// @endcond - - /// - /// @brief Paths to input tables. - FLUENT_VECTOR_FIELD(TRichYPath, Input); - - /// - /// @brief Path to output table. - FLUENT_FIELD(TRichYPath, Output); - - /// - /// @brief Columns to sort table by. - FLUENT_FIELD(TSortColumns, SortBy); - - /// - /// @brief Recommended number of intermediate data partitions. - FLUENT_FIELD_OPTION(ui64, PartitionCount); - - /// - /// @brief Recommended size of intermediate data partitions. - FLUENT_FIELD_OPTION(ui64, PartitionDataSize); - - /// - /// @brief Recommended number of partition jobs to run. - /// - /// `JobCount' has higher priority than @ref NYT::TSortOperationSpec::DataSizePerPartitionJob. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui64, PartitionJobCount); - - /// - /// @brief Recommended of data size for each partition job. - /// - /// `DataSizePerJob` has lower priority that @ref NYT::TSortOperationSpec::PartitionJobCount. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui64, DataSizePerPartitionJob); - - /// - /// @brief Inference mode for output table schema. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference - FLUENT_FIELD_OPTION(ESchemaInferenceMode, SchemaInferenceMode); - - /// - /// @brief Account to use for intermediate data. - FLUENT_FIELD_OPTION(TString, IntermediateDataAccount); - - /// - /// @brief Replication factor for intermediate data (1 by default). - FLUENT_FIELD_OPTION(ui64, IntermediateDataReplicationFactor); -}; - - -/// -/// @brief Merge mode. -enum EMergeMode : int -{ - MM_UNORDERED /* "unordered" */, - MM_ORDERED /* "ordered" */, - MM_SORTED /* "sorted" */, -}; - -/// -/// @brief Spec of Merge operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/merge -struct TMergeOperationSpec - : TOperationSpecBase<TMergeOperationSpec> -{ - /// @cond Doxygen_Suppress - using TSelf = TMergeOperationSpec; - /// @endcond - - /// - /// @brief Paths to input tables. - FLUENT_VECTOR_FIELD(TRichYPath, Input); - - /// - /// @brief Path to output table. - FLUENT_FIELD(TRichYPath, Output); - - /// - /// @brief Columns by which to merge (for @ref NYT::EMergeMode::MM_SORTED). - FLUENT_FIELD(TSortColumns, MergeBy); - - /// - /// @brief Merge mode. - FLUENT_FIELD_DEFAULT(EMergeMode, Mode, MM_UNORDERED); - - /// - /// @brief Combine output chunks to larger ones. - FLUENT_FIELD_DEFAULT(bool, CombineChunks, false); - - /// - /// @brief Guarantee that all input chunks will be read. - FLUENT_FIELD_DEFAULT(bool, ForceTransform, false); - - /// - /// @brief Recommended number of jobs to run. - /// - /// `JobCount' has higher priority than @ref NYT::TMergeOperationSpec::DataSizePerJob. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui32, JobCount); - - /// - /// @brief Recommended of data size for each job. - /// - /// `DataSizePerJob` has lower priority that @ref NYT::TMergeOperationSpec::JobCount. - /// This option only provide a recommendation and may be ignored if conflicting with YT internal limits. - FLUENT_FIELD_OPTION(ui64, DataSizePerJob); - - /// - /// @brief Inference mode for output table schema. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference - FLUENT_FIELD_OPTION(ESchemaInferenceMode, SchemaInferenceMode); -}; - -/// -/// @brief Spec of Erase operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/erase -struct TEraseOperationSpec - : TOperationSpecBase<TEraseOperationSpec> -{ - /// @cond Doxygen_Suppress - using TSelf = TEraseOperationSpec; - /// @endcond - - /// - /// @brief Which table (or row range) to erase. - FLUENT_FIELD(TRichYPath, TablePath); - - /// - /// Combine output chunks to larger ones. - FLUENT_FIELD_DEFAULT(bool, CombineChunks, false); - - /// - /// @brief Inference mode for output table schema. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference - FLUENT_FIELD_OPTION(ESchemaInferenceMode, SchemaInferenceMode); -}; - -/// -/// @brief Spec of RemoteCopy operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/remote_copy -struct TRemoteCopyOperationSpec - : TOperationSpecBase<TRemoteCopyOperationSpec> -{ - /// @cond Doxygen_Suppress - using TSelf = TRemoteCopyOperationSpec; - /// @endcond - - /// - /// @brief Source cluster name. - FLUENT_FIELD(TString, ClusterName); - - /// - /// @brief Network to use for copy (all remote cluster nodes must have it configured). - FLUENT_FIELD_OPTION(TString, NetworkName); - - /// - /// @brief Paths to input tables. - FLUENT_VECTOR_FIELD(TRichYPath, Input); - - /// - /// @brief Path to output table. - FLUENT_FIELD(TRichYPath, Output); - - /// - /// @brief Inference mode for output table schema. - /// - /// @see https://yt.yandex-team.ru/docs/description/storage/static_schema.html#schema_inference - FLUENT_FIELD_OPTION(ESchemaInferenceMode, SchemaInferenceMode); - - /// - /// @brief Copy user attributes from input to output table (allowed only for single input table). - FLUENT_FIELD_DEFAULT(bool, CopyAttributes, false); - - /// - /// @brief Names of user attributes to copy from input to output table. - /// - /// @note To make this option make sense set @ref NYT::TRemoteCopyOperationSpec::CopyAttributes to `true`. - FLUENT_VECTOR_FIELD(TString, AttributeKey); - -private: - - /// - /// @brief Config for remote cluster connection. - FLUENT_FIELD_OPTION(TNode, ClusterConnection); -}; - -class IVanillaJobBase; - -/// -/// @brief Task of Vanilla operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/vanilla -struct TVanillaTask - : public TOperationOutputSpecBase - , public TUserJobOutputFormatHintsBase<TVanillaTask> -{ - /// @cond Doxygen_Suppress - using TSelf = TVanillaTask; - /// @endcond - - /// - /// @brief Add output table path and specify the task output type (i.e. TMyProtoMessage). - template <class T> - TSelf& AddOutput(const TRichYPath& path); - - /// - /// @brief Add output table path as structured path. - TSelf& AddStructuredOutput(TStructuredTablePath path); - - /// - /// @brief Set output table path and specify the task output type (i.e. TMyProtoMessage). - template <class T> - TSelf& SetOutput(size_t tableIndex, const TRichYPath& path); - - /// - /// @brief Task name. - FLUENT_FIELD(TString, Name); - - /// - /// @brief Job to be executed in this task. - FLUENT_FIELD(::TIntrusivePtr<IVanillaJobBase>, Job); - - /// - /// @brief User job spec. - FLUENT_FIELD(TUserJobSpec, Spec); - - /// - /// @brief Number of jobs to run and wait for successful completion. - /// - /// @note If @ref NYT::TUserOperationSpecBase::FailOnJobRestart is `false`, a failed job will be restarted - /// and will not count in this amount. - FLUENT_FIELD(ui64, JobCount); - - /// - /// @brief Network project name. - FLUENT_FIELD(TMaybe<TString>, NetworkProject); - -}; - -/// -/// @brief Spec of Vanilla operation. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/vanilla -struct TVanillaOperationSpec - : TUserOperationSpecBase<TVanillaOperationSpec> -{ - /// @cond Doxygen_Suppress - using TSelf = TVanillaOperationSpec; - /// @endcond - - /// - /// @brief Description of tasks to run in this operation. - FLUENT_VECTOR_FIELD(TVanillaTask, Task); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Options for @ref NYT::IOperationClient::Map and other operation start commands. -struct TOperationOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TOperationOptions; - /// @endcond - - /// - /// @brief Additional field to put to operation spec. - FLUENT_FIELD_OPTION(TNode, Spec); - - /// - /// @brief Start operation mode. - enum class EStartOperationMode : int - { - /// - /// @brief Prepare operation asynchronously. Call IOperation::Start() to start operation. - AsyncPrepare, - - /// - /// @brief Prepare and start operation asynchronously. Don't wait for operation completion. - AsyncStart, - - /// - /// @brief Prepare and start operation synchronously. Don't wait for operation completion. - SyncStart, - - /// - /// @brief Prepare, start and wait for operation completion synchronously. - SyncWait, - }; - - /// - /// @brief Start operation mode. - FLUENT_FIELD_DEFAULT(EStartOperationMode, StartOperationMode, EStartOperationMode::SyncWait); - - /// - /// @brief Wait for operation finish synchronously. - /// - /// @deprecated Use StartOperationMode() instead. - TSelf& Wait(bool value) { - StartOperationMode_ = value ? EStartOperationMode::SyncWait : EStartOperationMode::SyncStart; - return static_cast<TSelf&>(*this); - } - - /// - /// - /// @brief Use format from table attribute (for YAMR-like format). - /// - /// @deprecated - FLUENT_FIELD_DEFAULT(bool, UseTableFormats, false); - - /// - /// @brief Prefix for bash command running the jobs. - /// - /// Can be overridden for the specific job type in the @ref NYT::TUserJobSpec. - FLUENT_FIELD(TString, JobCommandPrefix); - - /// - /// @brief Suffix for bash command running the jobs. - /// - /// Can be overridden for the specific job type in the @ref NYT::TUserJobSpec. - FLUENT_FIELD(TString, JobCommandSuffix); - - /// - /// @brief Put all files required by the job into tmpfs. - /// - /// This option can be set globally using @ref NYT::TConfig::MountSandboxInTmpfs. - /// @see https://yt.yandex-team.ru/docs/problems/woodpeckers - FLUENT_FIELD_DEFAULT(bool, MountSandboxInTmpfs, false); - - /// - /// @brief Path to directory to store temporary files. - FLUENT_FIELD_OPTION(TString, FileStorage); - - /// - /// @brief Expiration timeout for uploaded files. - FLUENT_FIELD_OPTION(TDuration, FileExpirationTimeout); - - /// - /// @brief Info to be passed securely to the job. - FLUENT_FIELD_OPTION(TNode, SecureVault); - - /// - /// @brief File cache mode. - enum class EFileCacheMode : int - { - /// - /// @brief Use YT API commands "get_file_from_cache" and "put_file_to_cache". - ApiCommandBased, - - /// - /// @brief Upload files to random paths inside @ref NYT::TOperationOptions::FileStorage without caching. - CachelessRandomPathUpload, - }; - - /// - /// @brief File cache mode. - FLUENT_FIELD_DEFAULT(EFileCacheMode, FileCacheMode, EFileCacheMode::ApiCommandBased); - - /// - /// @brief Id of transaction within which all Cypress file storage entries will be checked/created. - /// - /// By default, the root transaction is used. - /// - /// @note Set a specific transaction only if you - /// 1. specify non-default file storage path in @ref NYT::TOperationOptions::FileStorage or in @ref NYT::TConfig::RemoteTempFilesDirectory. - /// 2. use `CachelessRandomPathUpload` caching mode (@ref NYT::TOperationOptions::FileCacheMode). - FLUENT_FIELD(TTransactionId, FileStorageTransactionId); - - /// - /// @brief Ensure stderr and core tables exist before starting operation. - /// - /// If set to `false`, it is user's responsibility to ensure these tables exist. - FLUENT_FIELD_DEFAULT(bool, CreateDebugOutputTables, true); - - /// - /// @brief Ensure output tables exist before starting operation. - /// - /// If set to `false`, it is user's responsibility to ensure output tables exist. - FLUENT_FIELD_DEFAULT(bool, CreateOutputTables, true); - - /// - /// @brief Try to infer schema of inexistent table from the type of written rows. - /// - /// @note Default values for this option may differ depending on the row type. - /// For protobuf it's currently `false` by default. - FLUENT_FIELD_OPTION(bool, InferOutputSchema); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Get operation secure vault (specified in @ref NYT::TOperationOptions::SecureVault) inside a job. -const TNode& GetJobSecureVault(); - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Context passed to @ref NYT::IRawJob::Do. -class TRawJobContext -{ -public: - explicit TRawJobContext(size_t outputTableCount); - - /// - /// @brief Get file corresponding to input stream. - const TFile& GetInputFile() const; - - /// - /// @brief Get files corresponding to output streams. - const TVector<TFile>& GetOutputFileList() const; - -private: - TFile InputFile_; - TVector<TFile> OutputFileList_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Interface for classes that can be Saved/Loaded (to be used with @ref Y_SAVELOAD_JOB). -class ISerializableForJob -{ -public: - virtual ~ISerializableForJob() = default; - - /// - /// @brief Dump state to output stream to be restored in job. - virtual void Save(IOutputStream& stream) const = 0; - - /// - /// @brief Load state from a stream. - virtual void Load(IInputStream& stream) = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Provider of information about operation inputs/outputs during @ref NYT::IJob::PrepareOperation. -class IOperationPreparationContext -{ -public: - virtual ~IOperationPreparationContext() = default; - - /// @brief Get the number of input tables. - virtual int GetInputCount() const = 0; - - /// @brief Get the number of output tables. - virtual int GetOutputCount() const = 0; - - /// @brief Get the schema of input table no. `index`. - virtual const TTableSchema& GetInputSchema(int index) const = 0; - - /// @brief Get all the input table schemas. - virtual const TVector<TTableSchema>& GetInputSchemas() const = 0; - - /// @brief Path to the input table if available (`Nothing()` for intermediate tables). - virtual TMaybe<TYPath> GetInputPath(int index) const = 0; - - /// @brief Path to the output table if available (`Nothing()` for intermediate tables). - virtual TMaybe<TYPath> GetOutputPath(int index) const = 0; -}; - -/// -/// @brief Fluent builder class for @ref NYT::IJob::PrepareOperation. -/// -/// @note Method calls are supposed to be chained. -class TJobOperationPreparer -{ -public: - - /// - /// @brief Group of input tables that allows to specify properties on all of them at once. - /// - /// The instances are created with @ref NYT::TJobOperationPreparer::BeginInputGroup, not directly. - class TInputGroup - { - public: - TInputGroup(TJobOperationPreparer& preparer, TVector<int> indices); - - /// @brief Specify the type of input rows. - template <typename TRow> - TInputGroup& Description(); - - /// @brief Specify renaming of input columns. - TInputGroup& ColumnRenaming(const THashMap<TString, TString>& renaming); - - /// @brief Specify what input columns to send to job - /// - /// @note Filter is applied before renaming, so it must specify original column names. - TInputGroup& ColumnFilter(const TVector<TString>& columns); - - /// @brief Finish describing the input group. - TJobOperationPreparer& EndInputGroup(); - - private: - TJobOperationPreparer& Preparer_; - TVector<int> Indices_; - }; - - /// - /// @brief Group of output tables that allows to specify properties on all of them at once. - /// - /// The instances are created with @ref NYT::TJobOperationPreparer::BeginOutputGroup, not directly. - class TOutputGroup - { - public: - TOutputGroup(TJobOperationPreparer& preparer, TVector<int> indices); - - /// @brief Specify the type of output rows. - /// - /// @tparam TRow type of output rows from tables of this group. - /// @param inferSchema Infer schema from `TRow` and specify it for these output tables. - template <typename TRow> - TOutputGroup& Description(bool inferSchema = true); - - /// @brief Specify schema for these tables. - TOutputGroup& Schema(const TTableSchema& schema); - - /// @brief Specify that all the the tables in this group are unschematized. - /// - /// It is equivalent of `.Schema(TTableSchema().Strict(false)`. - TOutputGroup& NoSchema(); - - /// @brief Finish describing the output group. - TJobOperationPreparer& EndOutputGroup(); - - private: - TJobOperationPreparer& Preparer_; - TVector<int> Indices_; - }; - -public: - explicit TJobOperationPreparer(const IOperationPreparationContext& context); - - /// @brief Begin input group consisting of tables with indices `[begin, end)`. - /// - /// @param begin First index. - /// @param end Index after the last one. - TInputGroup BeginInputGroup(int begin, int end); - - /// @brief Begin input group consisting of tables with indices from `indices`. - /// - /// @tparam TCont Container with integers. Must support `std::begin` and `std::end` functions. - /// @param indices Indices of tables to include in the group. - template <typename TCont> - TInputGroup BeginInputGroup(const TCont& indices); - - /// @brief Begin output group consisting of tables with indices `[begin, end)`. - /// - /// @param begin First index. - /// @param end Index after the last one. - TOutputGroup BeginOutputGroup(int begin, int end); - - /// @brief Begin input group consisting of tables with indices from `indices`. - /// - /// @tparam TCont Container with integers. Must support `std::begin` and `std::end` functions. - /// @param indices Indices of tables to include in the group. - template <typename TCont> - TOutputGroup BeginOutputGroup(const TCont& indices); - - /// @brief Specify the schema for output table no `tableIndex`. - /// - /// @note All the output schemas must be specified either with this method, `NoOutputSchema` or `OutputDescription` with `inferSchema == true` - TJobOperationPreparer& OutputSchema(int tableIndex, TTableSchema schema); - - /// @brief Mark the output table no. `tableIndex` as unschematized. - TJobOperationPreparer& NoOutputSchema(int tableIndex); - - /// @brief Specify renaming of input columns for table no. `tableIndex`. - TJobOperationPreparer& InputColumnRenaming(int tableIndex, const THashMap<TString, TString>& renaming); - - /// @brief Specify what input columns of table no. `tableIndex` to send to job - /// - /// @note Filter is applied before renaming, so it must specify original column names. - TJobOperationPreparer& InputColumnFilter(int tableIndex, const TVector<TString>& columns); - - /// @brief Specify the type of input rows for table no. `tableIndex`. - /// - /// @tparam TRow type of input rows. - template <typename TRow> - TJobOperationPreparer& InputDescription(int tableIndex); - - /// @brief Specify the type of output rows for table no. `tableIndex`. - /// - /// @tparam TRow type of output rows. - /// @param inferSchema Infer schema from `TRow` and specify it for the output tables. - template <typename TRow> - TJobOperationPreparer& OutputDescription(int tableIndex, bool inferSchema = true); - - /// @brief Set type of output rows for table no. `tableIndex` to TNode - /// - /// @note Set schema via `OutputSchema` if needed - TJobOperationPreparer& NodeOutput(int tableIndex); - - /// @brief Specify input format hints. - /// - /// These hints have lower priority than ones specified in spec. - TJobOperationPreparer& InputFormatHints(TFormatHints hints); - - /// @brief Specify output format hints. - /// - /// These hints have lower priority than ones specified in spec. - TJobOperationPreparer& OutputFormatHints(TFormatHints hints); - - /// @brief Specify format hints. - /// - /// These hints have lower priority than ones specified in spec. - TJobOperationPreparer& FormatHints(TUserJobFormatHints newFormatHints); - - /// @name "Private" members - /// The following methods should not be used by clients in @ref NYT::IJob::PrepareOperation - ///@{ - - /// @brief Finish the building process. - void Finish(); - - /// @brief Get output table schemas as specified by the user. - TVector<TTableSchema> GetOutputSchemas(); - - /// @brief Get input column renamings as specified by the user. - const TVector<THashMap<TString, TString>>& GetInputColumnRenamings() const; - - /// @brief Get input column filters as specified by the user. - const TVector<TMaybe<TVector<TString>>>& GetInputColumnFilters() const; - - /// @brief Get input column descriptions as specified by the user. - const TVector<TMaybe<TTableStructure>>& GetInputDescriptions() const; - - /// @brief Get output column descriptions as specified by the user. - const TVector<TMaybe<TTableStructure>>& GetOutputDescriptions() const; - - /// @brief Get format hints as specified by the user. - const TUserJobFormatHints& GetFormatHints() const; - - ///@} -private: - - /// @brief Validate that schema for output table no. `tableIndex` has not been set yet. - void ValidateMissingOutputSchema(int tableIndex) const; - - /// @brief Validate that description for input table no. `tableIndex` has not been set yet. - void ValidateMissingInputDescription(int tableIndex) const; - - /// @brief Validate that description for output table no. `tableIndex` has not been set yet. - void ValidateMissingOutputDescription(int tableIndex) const; - - /// @brief Validate that `tableIndex` is in correct range for input table indices. - /// - /// @param message Message to add to the exception in case of violation. - void ValidateInputTableIndex(int tableIndex, TStringBuf message) const; - - /// @brief Validate that `tableIndex` is in correct range for output table indices. - /// - /// @param message Message to add to the exception in case of violation. - void ValidateOutputTableIndex(int tableIndex, TStringBuf message) const; - - /// @brief Validate that all the output schemas has been set. - void FinallyValidate() const; - - static TTableSchema EmptyNonstrictSchema(); - -private: - const IOperationPreparationContext& Context_; - - TVector<TMaybe<TTableSchema>> OutputSchemas_; - TVector<THashMap<TString, TString>> InputColumnRenamings_; - TVector<TMaybe<TVector<TString>>> InputColumnFilters_; - TVector<TMaybe<TTableStructure>> InputTableDescriptions_; - TVector<TMaybe<TTableStructure>> OutputTableDescriptions_; - TUserJobFormatHints FormatHints_ = {}; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Interface for all user jobs. -class IJob - : public TThrRefBase -{ -public: - - /// - /// @brief Type of job. - enum EType - { - Mapper, - Reducer, - ReducerAggregator, - RawJob, - VanillaJob, - }; - - /// - /// @brief Save job state to stream to be restored on cluster nodes. - virtual void Save(IOutputStream& stream) const - { - Y_UNUSED(stream); - } - - /// - /// @brief Restore job state from a stream. - virtual void Load(IInputStream& stream) - { - Y_UNUSED(stream); - } - - /// - /// @brief Get operation secure vault (specified in @ref NYT::TOperationOptions::SecureVault) inside a job. - const TNode& SecureVault() const - { - return GetJobSecureVault(); - } - - /// - /// @brief Get number of output tables. - i64 GetOutputTableCount() const - { - Y_VERIFY(NDetail::OutputTableCount > 0); - - return NDetail::OutputTableCount; - } - - /// - /// @brief Method allowing user to control some properties of input and output tables and formats. - /// - /// User can override this method in their job class to: - /// - specify output table schemas. - /// The most natural way is usually through @ref NYT::TJobOperationPreparer::OutputDescription (especially for protobuf), - /// but you can use @ref NYT::TJobOperationPreparer::OutputSchema directly - /// - specify output row type (@ref NYT::TJobOperationPreparer::OutputDescription) - /// - specify input row type (@ref NYT::TJobOperationPreparer::InputDescription) - /// - specify input column filter and renaming (@ref NYT::TJobOperationPreparer::InputColumnFilter and @ref NYT::TJobOperationPreparer::InputColumnRenaming) - /// - specify format hints (@ref NYT::TJobOperationPreparer::InputFormatHints, - /// NYT::TJobOperationPreparer::OutputFormatHints and @ref NYT::TJobOperationPreparer::FormatHints) - /// - maybe something more, cf. the methods of @ref NYT::TJobOperationPreparer. - /// - /// If one has several similar tables, groups can be used. - /// Groups are delimited by @ref NYT::TJobOperationPreparer::BeginInputGroup / - /// @ref NYT::TJobOperationPreparer::TInputGroup::EndInputGroup and - /// @ref NYT::TJobOperationPreparer::BeginOutputGroup / - /// @ref NYT::TJobOperationPreparer::TOutputGroup::EndOutputGroup. - /// Example: - /// @code{.cpp} - /// preparer - /// .BeginInputGroup({1,2,4,8}) - /// .ColumnRenaming({{"a", "b"}, {"c", "d"}}) - /// .ColumnFilter({"a", "c"}) - /// .EndInputGroup(); - /// @endcode - /// - /// @note All the output table schemas must be set - /// (possibly as empty nonstrict using @ref NYT::TJobOperationPreparer::NoOutputSchema or - /// @ref NYT::TJobOperationPreparer::TOutputGroup::NoSchema). - /// By default all the output table schemas are marked as empty nonstrict. - virtual void PrepareOperation(const IOperationPreparationContext& context, TJobOperationPreparer& preparer) const; -}; - -/// -/// @brief Declare what fields of currently declared job class to save and restore on cluster node. -#define Y_SAVELOAD_JOB(...) \ - virtual void Save(IOutputStream& stream) const override { Save(&stream); } \ - virtual void Load(IInputStream& stream) override { Load(&stream); } \ - Y_PASS_VA_ARGS(Y_SAVELOAD_DEFINE(__VA_ARGS__)) - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Interface for jobs with typed inputs and outputs. -class IStructuredJob - : public IJob -{ -public: - /// - /// @brief This methods are called when creating table reader and writer for the job. - /// - /// Override them if you want to implement custom input logic. (e.g. addtitional bufferization) - virtual TRawTableReaderPtr CreateCustomRawJobReader(int fd) const; - virtual THolder<IProxyOutput> CreateCustomRawJobWriter(size_t outputTableCount) const; - - virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const = 0; - virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Create default raw job reader. -TRawTableReaderPtr CreateRawJobReader(int fd = 0); - -/// -/// @brief Create default raw job writer. -THolder<IProxyOutput> CreateRawJobWriter(size_t outputTableCount); - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Base interface for structured (typed) map jobs. -class IMapperBase - : public IStructuredJob -{ }; - -/// -/// @brief Base interface for structured (typed) map jobs with given reader and writer. -template <class TR, class TW> -class IMapper - : public IMapperBase -{ -public: - using TReader = TR; - using TWriter = TW; - -public: - /// Type of job implemented by this class. - static constexpr EType JobType = EType::Mapper; - - /// - /// @brief This method is called before feeding input rows to mapper (before `Do` method). - virtual void Start(TWriter* writer) - { - Y_UNUSED(writer); - } - - /// - /// @brief This method is called exactly once for the whole job input. - /// - /// Read input rows from `reader` and write output ones to `writer`. - virtual void Do(TReader* reader, TWriter* writer) = 0; - - /// - /// @brief This method is called after feeding input rows to mapper (after `Do` method). - virtual void Finish(TWriter* writer) - { - Y_UNUSED(writer); - } - - virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override; - virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Base interface for structured (typed) reduce jobs. -/// -/// It is common base for @ref NYT::IReducer and @ref NYT::IAggregatorReducer. -class IReducerBase - : public IStructuredJob -{ }; - -/// -/// @brief Base interface for structured (typed) reduce jobs with given reader and writer. -template <class TR, class TW> -class IReducer - : public IReducerBase -{ -public: - using TReader = TR; - using TWriter = TW; - -public: - /// Type of job implemented by this class. - static constexpr EType JobType = EType::Reducer; - -public: - - /// - /// @brief This method is called before feeding input rows to reducer (before `Do` method). - virtual void Start(TWriter* writer) - { - Y_UNUSED(writer); - } - - /// - /// @brief This method is called exactly once for each range with same value of `ReduceBy` (or `JoinBy`) keys. - virtual void Do(TReader* reader, TWriter* writer) = 0; - - /// - /// @brief This method is called after feeding input rows to reducer (after `Do` method). - virtual void Finish(TWriter* writer) - { - Y_UNUSED(writer); - } - - /// - /// @brief Refuse to process the remaining row ranges and finish the job (successfully). - void Break(); - - virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override; - virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Base interface of jobs used inside reduce operations. -/// -/// Unlike @ref NYT::IReducer jobs their `Do' method is called only once -/// and takes whole range of records split by key boundaries. -/// -/// Template argument `TR` must be @ref NYT::TTableRangesReader. -template <class TR, class TW> -class IAggregatorReducer - : public IReducerBase -{ -public: - using TReader = TR; - using TWriter = TW; - -public: - /// Type of job implemented by this class. - static constexpr EType JobType = EType::ReducerAggregator; - -public: - /// - /// @brief This method is called before feeding input rows to reducer (before `Do` method). - virtual void Start(TWriter* writer) - { - Y_UNUSED(writer); - } - - /// - /// @brief This method is called exactly once for the whole job input. - virtual void Do(TReader* reader, TWriter* writer) = 0; - - /// - /// @brief This method is called after feeding input rows to reducer (after `Do` method). - virtual void Finish(TWriter* writer) - { - Y_UNUSED(writer); - } - - virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override; - virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Interface for raw jobs (i.e. reading and writing byte streams). -class IRawJob - : public IJob -{ -public: - /// Type of job implemented by this class. - static constexpr EType JobType = EType::RawJob; - - /// - /// @brief This method is called exactly once for the whole job input. - virtual void Do(const TRawJobContext& jobContext) = 0; -}; - -/// -/// @brief Interface of jobs that run the given bash command. -class ICommandJob - : public IJob -{ -public: - /// - /// @brief Get bash command to run. - /// - /// @note This method is called on the client side. - virtual const TString& GetCommand() const = 0; -}; - -/// -/// @brief Raw job executing given bash command. -/// -/// @note The binary will not be uploaded. -class TCommandRawJob - : public IRawJob - , public ICommandJob -{ -public: - /// - /// @brief Create job with specified command. - /// - /// @param command Bash command to run. - explicit TCommandRawJob(TStringBuf command = {}); - - const TString& GetCommand() const override; - void Do(const TRawJobContext& jobContext) override; - -private: - TString Command_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Base interface for vanilla jobs. -/// -/// @see https://yt.yandex-team.ru/docs/description/mr/vanilla -class IVanillaJobBase - : public virtual IStructuredJob -{ -public: - /// Type of job implemented by this class. - static constexpr EType JobType = EType::VanillaJob; -}; - -template <class TW = void> -class IVanillaJob; - -/// -/// @brief Interface of vanilla job without outputs. -template <> -class IVanillaJob<void> - : public IVanillaJobBase -{ -public: - /// - /// @brief This method is called exactly once for each vanilla job. - virtual void Do() = 0; - - virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override; - virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override; -}; - -/// -/// @brief Vanilla job executing given bash command. -/// -/// @note The binary will not be uploaded. -class TCommandVanillaJob - : public IVanillaJob<> - , public ICommandJob -{ -public: - /// - /// @brief Create job with specified command. - /// - /// @param command Bash command to run. - explicit TCommandVanillaJob(TStringBuf command = {}); - - const TString& GetCommand() const override; - void Do() override; - -private: - TString Command_; -}; - -/// -/// @brief Interface for vanilla jobs with output tables. -template <class TW> -class IVanillaJob - : public IVanillaJobBase -{ -public: - using TWriter = TW; - -public: - /// - /// @brief This method is called before `Do` method. - virtual void Start(TWriter* /* writer */) - { } - - /// - /// @brief This method is called exactly once for each vanilla job. - /// - /// Write output rows to `writer`. - virtual void Do(TWriter* writer) = 0; - - /// - /// @brief This method is called after `Do` method. - virtual void Finish(TWriter* /* writer */) - { } - - virtual TStructuredRowStreamDescription GetInputRowStreamDescription() const override; - virtual TStructuredRowStreamDescription GetOutputRowStreamDescription() const override; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Attributes to request for an operation. -enum class EOperationAttribute : int -{ - Id /* "id" */, - Type /* "type" */, - State /* "state" */, - AuthenticatedUser /* "authenticated_user" */, - StartTime /* "start_time" */, - FinishTime /* "finish_time" */, - BriefProgress /* "brief_progress" */, - BriefSpec /* "brief_spec" */, - Suspended /* "suspended" */, - Result /* "result" */, - Progress /* "progress" */, - Events /* "events" */, - Spec /* "spec" */, - FullSpec /* "full_spec" */, - UnrecognizedSpec /* "unrecognized_spec" */, -}; - -/// -/// @brief Class describing which attributes to request in @ref NYT::IClient::GetOperation or @ref NYT::IClient::ListOperations. -struct TOperationAttributeFilter -{ - /// @cond Doxygen_Suppress - using TSelf = TOperationAttributeFilter; - /// @endcond - - TVector<EOperationAttribute> Attributes_; - - /// - /// @brief Add attribute to the filter. Calls are supposed to be chained. - TSelf& Add(EOperationAttribute attribute) - { - Attributes_.push_back(attribute); - return *this; - } -}; - -/// -/// @brief Options for @ref NYT::IClient::GetOperation call. -struct TGetOperationOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetOperationOptions; - /// @endcond - - /// - /// @brief What attributes to request (if omitted, the default set of attributes will be requested). - FLUENT_FIELD_OPTION(TOperationAttributeFilter, AttributeFilter); -}; - -/// -/// @brief "Coarse-grained" state of an operation. -enum class EOperationBriefState : int -{ - InProgress /* "in_progress" */, - Completed /* "completed" */, - Aborted /* "aborted" */, - - /// Failed - Failed /* "failed" */, -}; - -/// -/// @brief Operation type. -enum class EOperationType : int -{ - Map /* "map" */, - Merge /* "merge" */, - Erase /* "erase" */, - Sort /* "sort" */, - Reduce /* "reduce" */, - MapReduce /* "map_reduce" */, - RemoteCopy /* "remote_copy" */, - JoinReduce /* "join_reduce" */, - Vanilla /* "vanilla" */, -}; - -/// -/// @brief Operation progress. -struct TOperationProgress -{ - /// - /// @brief Total job statistics. - TJobStatistics JobStatistics; - - /// - /// @brief Job counter for various job states with hierarchy. - TJobCounters JobCounters; - - /// - /// @brief Time when this progress was built on scheduler or CA. - TMaybe<TInstant> BuildTime; -}; - -/// -/// @brief Brief operation progress (numbers of jobs in these states). -struct TOperationBriefProgress -{ - ui64 Aborted = 0; - ui64 Completed = 0; - ui64 Failed = 0; - ui64 Lost = 0; - ui64 Pending = 0; - ui64 Running = 0; - ui64 Total = 0; -}; - -/// -/// @brief Operation result. -struct TOperationResult -{ - /// - /// @brief For a unsuccessfully finished operation: description of error. - TMaybe<TYtError> Error; -}; - -/// -/// @brief Operation event (change of state). -struct TOperationEvent -{ - /// - /// @brief New state of operation. - TString State; - - /// - /// @brief Time of state change. - TInstant Time; -}; - -/// -/// @brief Operation info. -/// -/// A field may be `Nothing()` either if it was not requested (see @ref NYT::TGetOperationOptions::AttributeFilter) -/// or it is not available (i.e. `FinishTime` for a running operation). -/// @see https://yt.yandex-team.ru/docs/api/commands#get_operation -struct TOperationAttributes -{ - /// - /// @brief Operation id. - TMaybe<TOperationId> Id; - - /// - /// @brief Operation type. - TMaybe<EOperationType> Type; - - /// - /// @brief Operation state. - TMaybe<TString> State; - - /// - /// @brief "Coarse-grained" operation state. - TMaybe<EOperationBriefState> BriefState; - - /// - /// @brief Name of user that started the operation. - TMaybe<TString> AuthenticatedUser; - - /// - /// @brief Operation start time. - TMaybe<TInstant> StartTime; - - /// - /// @brief Operation finish time (if the operation has finished). - TMaybe<TInstant> FinishTime; - - /// - /// @brief Brief progress of the operation. - TMaybe<TOperationBriefProgress> BriefProgress; - - /// - /// @brief Brief spec of operation (light-weight fields only). - TMaybe<TNode> BriefSpec; - - /// - /// @brief Spec of the operation as provided by the user. - TMaybe<TNode> Spec; - - /// - /// @brief Full spec of operation (all fields not specified by user are filled with default values). - TMaybe<TNode> FullSpec; - - /// - /// @brief Fields not recognized by scheduler. - TMaybe<TNode> UnrecognizedSpec; - - /// - /// @brief Is operation suspended. - TMaybe<bool> Suspended; - - /// - /// @brief Operation result. - TMaybe<TOperationResult> Result; - - /// - /// @brief Operation progress. - TMaybe<TOperationProgress> Progress; - - /// - /// @brief List of operation events (changes of state). - TMaybe<TVector<TOperationEvent>> Events; - - /// - /// @brief Map from alert name to its description. - TMaybe<THashMap<TString, TYtError>> Alerts; -}; - -/// -/// @brief Direction of cursor for paging, see @ref NYT::TListOperationsOptions::CursorDirection. -enum class ECursorDirection -{ - Past /* "past" */, - Future /* "future" */, -}; - -/// -/// @brief Options of @ref NYT::IClient::ListOperations command. -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#list_operations -struct TListOperationsOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TListOperationsOptions; - /// @endcond - - /// - /// @name Time range specification - /// - /// List operations with start time in half-closed interval - /// `[CursorTime, ToTime)` if `CursorDirection == Future` or - /// `[FromTime, CursorTime)` if `CursorDirection == Past`. - ///@{ - - /// - /// @brief Search for operations with start time >= `FromTime`. - FLUENT_FIELD_OPTION(TInstant, FromTime); - - /// - /// @brief Search for operations with start time < `ToTime`. - FLUENT_FIELD_OPTION(TInstant, ToTime); - - /// - /// @brief Additional restriction on operation start time (useful for pagination). - /// - /// Search for operations with start time >= `CursorTime` if `CursorDirection == Future` - /// and with start time < `CursorTime` if `CursorDirection == Past` - FLUENT_FIELD_OPTION(TInstant, CursorTime); - - /// - /// @brief Direction of pagination (see @ref NYT::TListOperationsOptions::CursorTime). - FLUENT_FIELD_OPTION(ECursorDirection, CursorDirection); - - ///@} - - /// - /// @name Filters - /// Choose operations satisfying given filters. - ///@{ - - /// - /// @brief Search for `Filter` as a substring in operation text factors - /// (e.g. title or input/output table paths). - FLUENT_FIELD_OPTION(TString, Filter); - - /// - /// @brief Choose operations whose pools include `Pool`. - FLUENT_FIELD_OPTION(TString, Pool); - - /// - /// @brief Choose operations with given @ref NYT::TOperationAttributes::AuthenticatedUser. - FLUENT_FIELD_OPTION(TString, User); - - /// - /// @brief Choose operations with given @ref NYT::TOperationAttributes::State. - FLUENT_FIELD_OPTION(TString, State); - - /// - /// @brief Choose operations with given @ref NYT::TOperationAttributes::Type. - FLUENT_FIELD_OPTION(EOperationType, Type); - - /// - /// @brief Choose operations having (or not having) any failed jobs. - FLUENT_FIELD_OPTION(bool, WithFailedJobs); - - ///@} - - /// - /// @brief Search for operations in the archive in addition to Cypress. - FLUENT_FIELD_OPTION(bool, IncludeArchive); - - /// - /// @brief Include the counters for different filter parameters in the response. - /// - /// Include number of operations for each pool, user, state, type - /// and the number of operations having failed jobs. - FLUENT_FIELD_OPTION(bool, IncludeCounters); - - /// - /// @brief Return no more than `Limit` operations (current default and maximum value is 1000). - FLUENT_FIELD_OPTION(i64, Limit); -}; - -/// -/// @brief Response for @ref NYT::IClient::ListOperations command. -struct TListOperationsResult -{ - /// - /// @brief Found operations' attributes. - TVector<TOperationAttributes> Operations; - - /// - /// @name Counters for different filter. - /// - /// If counters were requested (@ref NYT::TListOperationsOptions::IncludeCounters is `true`) - /// the maps contain the number of operations found for each pool, user, state and type. - /// NOTE: - /// 1) Counters ignore CursorTime and CursorDirection, - /// they always are collected in the whole [FromTime, ToTime) interval. - /// 2) Each next counter in the sequence [pool, user, state, type, with_failed_jobs] - /// takes into account all the previous filters (i.e. if you set User filter to "some-user" - /// type counts describe only operations with user "some-user"). - /// @{ - - /// - /// @brief Number of operations for each pool. - TMaybe<THashMap<TString, i64>> PoolCounts; - - /// - /// @brief Number of operations for each user (subject to previous filters). - TMaybe<THashMap<TString, i64>> UserCounts; - - /// - /// @brief Number of operations for each state (subject to previous filters). - TMaybe<THashMap<TString, i64>> StateCounts; - - /// - /// @brief Number of operations for each type (subject to previous filters). - TMaybe<THashMap<EOperationType, i64>> TypeCounts; - - /// - /// @brief Number of operations having failed jobs (subject to all previous filters). - TMaybe<i64> WithFailedJobsCount; - - /// @} - - /// - /// @brief Whether some operations were not returned due to @ref NYT::TListOperationsOptions::Limit. - /// - /// `Incomplete == true` means that not all operations satisfying filters - /// were returned (limit exceeded) and you need to repeat the request with new @ref NYT::TListOperationsOptions::CursorTime - /// (e.g. `CursorTime == *Operations.back().StartTime`, but don't forget to - /// remove the duplicates). - bool Incomplete; -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Data source for @ref NYT::IClient::ListJobs command. -enum class EListJobsDataSource : int -{ - Runtime /* "runtime" */, - Archive /* "archive" */, - Auto /* "auto" */, - Manual /* "manual" */, -}; - -/// -/// @brief Job type. -enum class EJobType : int -{ - SchedulerFirst /* "scheduler_first" */, - Map /* "map" */, - PartitionMap /* "partition_map" */, - SortedMerge /* "sorted_merge" */, - OrderedMerge /* "ordered_merge" */, - UnorderedMerge /* "unordered_merge" */, - Partition /* "partition" */, - SimpleSort /* "simple_sort" */, - FinalSort /* "final_sort" */, - SortedReduce /* "sorted_reduce" */, - PartitionReduce /* "partition_reduce" */, - ReduceCombiner /* "reduce_combiner" */, - RemoteCopy /* "remote_copy" */, - IntermediateSort /* "intermediate_sort" */, - OrderedMap /* "ordered_map" */, - JoinReduce /* "join_reduce" */, - Vanilla /* "vanilla" */, - SchedulerUnknown /* "scheduler_unknown" */, - SchedulerLast /* "scheduler_last" */, - ReplicatorFirst /* "replicator_first" */, - ReplicateChunk /* "replicate_chunk" */, - RemoveChunk /* "remove_chunk" */, - RepairChunk /* "repair_chunk" */, - SealChunk /* "seal_chunk" */, - ReplicatorLast /* "replicator_last" */, -}; - -/// -/// @brief Well-known task names. -enum class ETaskName : int -{ - Map /* "map" */, - PartitionMap0 /* "partition_map(0)" */, - SortedMerge /* "sorted_merge" */, - OrderedMerge /* "ordered_merge" */, - UnorderedMerge /* "unordered_merge" */, - Partition0 /* "partition(0)" */, - Partition1 /* "partition(1)" */, - Partition2 /* "partition(2)" */, - SimpleSort /* "simple_sort" */, - FinalSort /* "final_sort" */, - SortedReduce /* "sorted_reduce" */, - PartitionReduce /* "partition_reduce" */, - ReduceCombiner /* "reduce_combiner" */, - RemoteCopy /* "remote_copy" */, - IntermediateSort /* "intermediate_sort" */, - OrderedMap /* "ordered_map" */, - JoinReduce /* "join_reduce" */, -}; - -/// -/// @brief Task name (can either well-known or just a string). -class TTaskName -{ -public: - - // Constructors are implicit by design. - - /// - /// @brief Construct a custom task name. - TTaskName(TString taskName); - - /// - /// @brief Construct a custom task name. - TTaskName(const char* taskName); - - /// - /// @brief Construct a well-known task name. - TTaskName(ETaskName taskName); - - const TString& Get() const; - -private: - TString TaskName_; -}; - -/// -/// @brief Job state. -enum class EJobState : int -{ - None /* "none" */, - Waiting /* "waiting" */, - Running /* "running" */, - Aborting /* "aborting" */, - Completed /* "completed" */, - Failed /* "failed" */, - Aborted /* "aborted" */, - Lost /* "lost" */, -}; - -/// -/// @brief Job sort field. -/// -/// @see @ref NYT::TListJobsOptions. -enum class EJobSortField : int -{ - Type /* "type" */, - State /* "state" */, - StartTime /* "start_time" */, - FinishTime /* "finish_time" */, - Address /* "address" */, - Duration /* "duration" */, - Progress /* "progress" */, - Id /* "id" */, -}; - -/// -/// @brief Job sort direction. -/// -/// @see @ref NYT::TListJobsOptions. -enum class EJobSortDirection : int -{ - Ascending /* "ascending" */, - Descending /* "descending" */, -}; - -/// -/// @brief Options for @ref NYT::IClient::ListJobs. -/// -/// @see https://yt.yandex-team.ru/docs/api/commands.html#list_jobs -struct TListJobsOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TListJobsOptions; - /// @endcond - - /// - /// @name Filters - /// Return only jobs with given value of parameter (type, state, address and existence of stderr). - /// If a field is `Nothing()`, return jobs with all possible values of the corresponding parameter. - /// @{ - - /// - /// @brief Job type. - FLUENT_FIELD_OPTION(EJobType, Type); - - /// - /// @brief Job state. - FLUENT_FIELD_OPTION(EJobState, State); - - /// - /// @brief Address of the cluster node where job was running. - FLUENT_FIELD_OPTION(TString, Address); - - /// - /// @brief Return only jobs whose stderr has been saved. - FLUENT_FIELD_OPTION(bool, WithStderr); - - /// - /// @brief Return only jobs whose spec has been saved. - FLUENT_FIELD_OPTION(bool, WithSpec); - - /// - /// @brief Return only jobs whose fail context has been saved. - FLUENT_FIELD_OPTION(bool, WithFailContext); - - /// @} - - /// - /// @name Sort options - /// @{ - - /// - /// @brief Sort by this field. - FLUENT_FIELD_OPTION(EJobSortField, SortField); - - /// - /// @brief Sort order. - FLUENT_FIELD_OPTION(ESortOrder, SortOrder); - - /// @} - - /// - /// @brief Data source. - /// - /// Where to search for jobs: in scheduler and Cypress ('Runtime'), in archive ('Archive'), - /// automatically basing on operation presence in Cypress ('Auto') or choose manually (`Manual'). - FLUENT_FIELD_OPTION(EListJobsDataSource, DataSource); - - /// @deprecated - FLUENT_FIELD_OPTION(bool, IncludeCypress); - - /// @deprecated - FLUENT_FIELD_OPTION(bool, IncludeControllerAgent); - - /// @deprecated - FLUENT_FIELD_OPTION(bool, IncludeArchive); - - /// - /// @brief Maximum number of jobs to return. - FLUENT_FIELD_OPTION(i64, Limit); - - /// - /// @brief Number of jobs (in specified sort order) to skip. - /// - /// Together with @ref NYT::TListJobsOptions::Limit may be used for pagination. - FLUENT_FIELD_OPTION(i64, Offset); -}; - -/// -/// @brief Description of a core dump that happened in the job. -struct TCoreInfo -{ - i64 ProcessId; - TString ExecutableName; - TMaybe<ui64> Size; - TMaybe<TYtError> Error; -}; - -/// -/// @brief Job attributes. -/// -/// A field may be `Nothing()` if it is not available (i.e. `FinishTime` for a running job). -/// -/// @see https://yt.yandex-team.ru/docs/api/commands#get_job -struct TJobAttributes -{ - /// - /// @brief Job id. - TMaybe<TJobId> Id; - - /// - /// @brief Job type - TMaybe<EJobType> Type; - - /// - /// @brief Job state. - TMaybe<EJobState> State; - - /// - /// @brief Address of a cluster node where job was running. - TMaybe<TString> Address; - - /// - /// @brief The name of the task that job corresponds to. - TMaybe<TString> TaskName; - - /// - /// @brief Job start time. - TMaybe<TInstant> StartTime; - - /// - /// @brief Job finish time (for a finished job). - TMaybe<TInstant> FinishTime; - - /// - /// @brief Estimated ratio of job's completed work. - TMaybe<double> Progress; - - /// - /// @brief Size of saved job stderr. - TMaybe<i64> StderrSize; - - /// - /// @brief Error for a unsuccessfully finished job. - TMaybe<TYtError> Error; - - /// - /// @brief Job brief statistics. - TMaybe<TNode> BriefStatistics; - - /// - /// @brief Job input paths (with ranges). - TMaybe<TVector<TRichYPath>> InputPaths; - - /// - /// @brief Infos for core dumps produced by job. - TMaybe<TVector<TCoreInfo>> CoreInfos; -}; - -/// -/// @brief Response for @ref NYT::IOperation::ListJobs. -struct TListJobsResult -{ - /// - /// @brief Jobs. - TVector<TJobAttributes> Jobs; - - /// - /// @deprecated - TMaybe<i64> CypressJobCount; - - /// - /// @brief Number of jobs retrieved from controller agent. - TMaybe<i64> ControllerAgentJobCount; - - /// - /// @brief Number of jobs retrieved from archive. - TMaybe<i64> ArchiveJobCount; -}; - -//////////////////////////////////////////////////////////////////// - -/// -/// @brief Options for @ref NYT::IClient::GetJob. -struct TGetJobOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetJobOptions; - /// @endcond -}; - -/// -/// @brief Options for @ref NYT::IClient::GetJobInput. -struct TGetJobInputOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetJobInputOptions; - /// @endcond -}; - -/// -/// @brief Options for @ref NYT::IClient::GetJobFailContext. -struct TGetJobFailContextOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetJobFailContextOptions; - /// @endcond -}; - -/// -/// @brief Options for @ref NYT::IClient::GetJobStderr. -struct TGetJobStderrOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetJobStderrOptions; - /// @endcond -}; - -//////////////////////////////////////////////////////////////////// - -/// -/// @brief Options for @ref NYT::IOperation::GetFailedJobInfo. -struct TGetFailedJobInfoOptions -{ - /// @cond Doxygen_Suppress - using TSelf = TGetFailedJobInfoOptions; - /// @endcond - - /// - /// @brief How many jobs to download. Which jobs will be chosen is undefined. - FLUENT_FIELD_DEFAULT(ui64, MaxJobCount, 10); - - /// - /// @brief How much of stderr tail should be downloaded. - FLUENT_FIELD_DEFAULT(ui64, StderrTailSize, 64 * 1024); -}; - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Interface representing an operation. -struct IOperation - : public TThrRefBase -{ - virtual ~IOperation() = default; - - /// - /// @brief Get operation id. - virtual const TOperationId& GetId() const = 0; - - /// - /// @brief Get URL of the operation in YT Web UI. - virtual TString GetWebInterfaceUrl() const = 0; - - /// - /// @brief Get last error for not started operations. Get state on YT cluster for started operations. - /// - /// For not started operations last error is an error that's being retried during operation - /// preparation/start (e.g. lock files, start operation request). - virtual TString GetStatus() const = 0; - - /// - /// @brief Get preparation future. - /// - /// @return future that is set when operation is prepared. - virtual ::NThreading::TFuture<void> GetPreparedFuture() = 0; - - /// - /// @brief Start operation synchronously. - /// - /// @note: Do NOT call this method twice. - /// - /// If operation is not prepared yet, Start() will block waiting for preparation finish. - /// Be ready to catch exception if operation preparation or start failed. - virtual void Start() = 0; - - /// - /// @brief Is the operation started - /// - /// Returns true if the operation is started on the cluster - virtual bool IsStarted() const = 0; - - /// - /// @brief Get start future. - /// - /// @return future that is set when operation is started. - virtual ::NThreading::TFuture<void> GetStartedFuture() = 0; - - /// - /// @brief Start watching operation. - /// - /// @return future that is set when operation is complete. - /// - /// @note: the user should check value of returned future to ensure that operation completed successfully e.g. - /// @code{.cpp} - /// auto operationComplete = operation->Watch(); - /// operationComplete.Wait(); - /// operationComplete.GetValue(); /// will throw if operation completed with errors - /// @endcode - /// - /// If operation is completed successfully the returned future contains void value. - /// If operation is completed with error future contains @ref NYT::TOperationFailedError. - /// In rare cases when error occurred while waiting (e.g. YT become unavailable) future might contain other exception. - virtual ::NThreading::TFuture<void> Watch() = 0; - - /// - /// @brief Get information about failed jobs. - /// - /// Can be called for operation in any stage. - /// Though user should keep in mind that this method always fetches info from cypress - /// and doesn't work when operation is archived. Successfully completed operations can be archived - /// quite quickly (in about ~30 seconds). - virtual TVector<TFailedJobInfo> GetFailedJobInfo(const TGetFailedJobInfoOptions& options = TGetFailedJobInfoOptions()) = 0; - - /// - /// Get operation brief state. - virtual EOperationBriefState GetBriefState() = 0; - - /// - /// @brief Get error (if operation has failed). - /// - /// @return `Nothing()` if operation is in 'Completed' or 'InProgress' state (or reason for failed / aborted operation). - virtual TMaybe<TYtError> GetError() = 0; - - /// - /// Get job statistics. - virtual TJobStatistics GetJobStatistics() = 0; - - /// - /// Get operation progress. - /// - /// @return `Nothing()` if operation has no running jobs yet, e.g. when it is in "materializing" or "pending" state. - virtual TMaybe<TOperationBriefProgress> GetBriefProgress() = 0; - - /// - /// @brief Abort operation. - /// - /// Operation will be finished immediately. - /// All results of completed/running jobs will be lost. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#abort_op - virtual void AbortOperation() = 0; - - /// - /// @brief Complete operation. - /// - /// Operation will be finished immediately. - /// All results of completed jobs will appear in output tables. - /// All results of running (not completed) jobs will be lost. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#complete_op - virtual void CompleteOperation() = 0; - - /// - /// @brief Suspend operation. - /// - /// Jobs will not be aborted by default, c.f. @ref NYT::TSuspendOperationOptions. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#suspend_op - virtual void SuspendOperation( - const TSuspendOperationOptions& options = TSuspendOperationOptions()) = 0; - - /// - /// @brief Resume previously suspended operation. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#resume_op - virtual void ResumeOperation( - const TResumeOperationOptions& options = TResumeOperationOptions()) = 0; - - /// - /// @brief Get operation attributes. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#get_operation - virtual TOperationAttributes GetAttributes( - const TGetOperationOptions& options = TGetOperationOptions()) = 0; - - /// - /// @brief Update operation runtime parameters. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#update_op_parameters - virtual void UpdateParameters( - const TUpdateOperationParametersOptions& options = TUpdateOperationParametersOptions()) = 0; - - /// - /// @brief Get job attributes. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#get_job - virtual TJobAttributes GetJob( - const TJobId& jobId, - const TGetJobOptions& options = TGetJobOptions()) = 0; - - /// - /// List jobs satisfying given filters (see @ref NYT::TListJobsOptions). - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#list_jobs - virtual TListJobsResult ListJobs( - const TListJobsOptions& options = TListJobsOptions()) = 0; -}; - -/// -/// @brief Interface of client capable of managing operations. -struct IOperationClient -{ - /// - /// @brief Run Map operation. - /// - /// @param spec Operation spec. - /// @param mapper Instance of a job to run. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/map - IOperationPtr Map( - const TMapOperationSpec& spec, - ::TIntrusivePtr<IMapperBase> mapper, - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run Map operation. - /// - /// @param mapper Instance of a job to run. - /// @param input Input table(s) - /// @param output Output table(s) - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/map - IOperationPtr Map( - ::TIntrusivePtr<IMapperBase> mapper, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TMapOperationSpec& spec = TMapOperationSpec(), - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run raw Map operation. - /// - /// @param spec Operation spec. - /// @param rawJob Instance of a raw mapper to run. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/map - virtual IOperationPtr RawMap( - const TRawMapOperationSpec& spec, - ::TIntrusivePtr<IRawJob> rawJob, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run Reduce operation. - /// - /// @param spec Operation spec. - /// @param reducer Instance of a job to run. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/reduce - IOperationPtr Reduce( - const TReduceOperationSpec& spec, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run Reduce operation. - /// - /// @param reducer Instance of a job to run. - /// @param input Input table(s) - /// @param output Output table(s) - /// @param reduceBy Columns to group rows by. - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/reduce - IOperationPtr Reduce( - ::TIntrusivePtr<IReducerBase> reducer, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TSortColumns& reduceBy, - const TReduceOperationSpec& spec = TReduceOperationSpec(), - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run raw Reduce operation. - /// - /// @param spec Operation spec. - /// @param rawJob Instance of a raw reducer to run. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/reduce - virtual IOperationPtr RawReduce( - const TRawReduceOperationSpec& spec, - ::TIntrusivePtr<IRawJob> rawJob, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run JoinReduce operation. - /// - /// @param spec Operation spec. - /// @param reducer Instance of a job to run. - /// @param options Optional parameters. - /// - /// @deprecated Use @ref NYT::IOperationClient::Reduce with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false. - IOperationPtr JoinReduce( - const TJoinReduceOperationSpec& spec, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run raw JoinReduce operation. - /// - /// @param spec Operation spec. - /// @param rawJob Instance of a raw reducer to run. - /// @param options Optional parameters. - /// - /// @deprecated Use @ref NYT::IOperationClient::RawReduce with @ref NYT::TReduceOperationSpec::EnableKeyGuarantee set to `false. - virtual IOperationPtr RawJoinReduce( - const TRawJoinReduceOperationSpec& spec, - ::TIntrusivePtr<IRawJob> rawJob, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run MapReduce operation. - /// - /// @param spec Operation spec. - /// @param mapper Instance of a map job to run (identity mapper if `nullptr`). - /// @param reducer Instance of a reduce job to run. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce - IOperationPtr MapReduce( - const TMapReduceOperationSpec& spec, - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run MapReduce operation. - /// - /// @param spec Operation spec. - /// @param mapper Instance of a map job to run (identity mapper if `nullptr`). - /// @param reducerCombiner Instance of a reduce combiner to run (identity reduce combiner if `nullptr`). - /// @param reducer Instance of a reduce job to run. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce - IOperationPtr MapReduce( - const TMapReduceOperationSpec& spec, - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reduceCombiner, - ::TIntrusivePtr<IReducerBase> reducer, - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run MapReduce operation. - /// - /// @param mapper Instance of mapper to run (identity mapper if `nullptr`). - /// @param reducer Instance of reducer to run. - /// @param input Input table(s) - /// @param output Output table(s) - /// @param reduceBy Columns to group rows by. - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce - IOperationPtr MapReduce( - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reducer, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TSortColumns& reduceBy, - TMapReduceOperationSpec spec = TMapReduceOperationSpec(), - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run MapReduce operation. - /// - /// @param mapper Instance of mapper to run (identity mapper if `nullptr`). - /// @param reduceCombiner Instance of reduceCombiner to run (identity reduce combiner if `nullptr`). - /// @param reducer Instance of reducer to run. - /// @param input Input table(s) - /// @param output Output table(s) - /// @param reduceBy Columns to group rows by. - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce - IOperationPtr MapReduce( - ::TIntrusivePtr<IMapperBase> mapper, - ::TIntrusivePtr<IReducerBase> reduceCombiner, - ::TIntrusivePtr<IReducerBase> reducer, - const TOneOrMany<TStructuredTablePath>& input, - const TOneOrMany<TStructuredTablePath>& output, - const TSortColumns& reduceBy, - TMapReduceOperationSpec spec = TMapReduceOperationSpec(), - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run raw MapReduce operation. - /// - /// @param spec Operation spec. - /// @param mapper Instance of a raw mapper to run (identity mapper if `nullptr`). - /// @param mapper Instance of a raw reduce combiner to run (identity reduce combiner if `nullptr`). - /// @param mapper Instance of a raw reducer to run. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/mapreduce - virtual IOperationPtr RawMapReduce( - const TRawMapReduceOperationSpec& spec, - ::TIntrusivePtr<IRawJob> mapper, - ::TIntrusivePtr<IRawJob> reduceCombiner, - ::TIntrusivePtr<IRawJob> reducer, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run Sort operation. - /// - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/sort - virtual IOperationPtr Sort( - const TSortOperationSpec& spec, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run Sort operation. - /// - /// @param input Input table(s). - /// @param output Output table. - /// @param sortBy Columns to sort input rows by. - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/sort - IOperationPtr Sort( - const TOneOrMany<TRichYPath>& input, - const TRichYPath& output, - const TSortColumns& sortBy, - const TSortOperationSpec& spec = TSortOperationSpec(), - const TOperationOptions& options = TOperationOptions()); - - /// - /// @brief Run Merge operation. - /// - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/merge - virtual IOperationPtr Merge( - const TMergeOperationSpec& spec, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run Erase operation. - /// - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/erase - virtual IOperationPtr Erase( - const TEraseOperationSpec& spec, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run RemoteCopy operation. - /// - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/remote_copy - virtual IOperationPtr RemoteCopy( - const TRemoteCopyOperationSpec& spec, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Run Vanilla operation. - /// - /// @param spec Operation spec. - /// @param options Optional parameters. - /// - /// @see https://yt.yandex-team.ru/docs/description/mr/vanilla - virtual IOperationPtr RunVanilla( - const TVanillaOperationSpec& spec, - const TOperationOptions& options = TOperationOptions()) = 0; - - /// - /// @brief Abort operation. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#abort_op - virtual void AbortOperation( - const TOperationId& operationId) = 0; - - /// - /// @brief Complete operation. - /// - /// @see https://yt.yandex-team.ru/docs/api/commands#complete_op - virtual void CompleteOperation( - const TOperationId& operationId) = 0; - - /// - /// @brief Wait for operation to finish. - virtual void WaitForOperation( - const TOperationId& operationId) = 0; - - /// - /// @brief Check and return operation status. - /// - /// @note this function will never return @ref NYT::EOperationBriefState::Failed or @ref NYT::EOperationBriefState::Aborted status, - /// it will throw @ref NYT::TOperationFailedError instead. - virtual EOperationBriefState CheckOperation( - const TOperationId& operationId) = 0; - - /// - /// @brief Create an operation object given operation id. - /// - /// @throw @ref NYT::TErrorResponse if the operation doesn't exist. - virtual IOperationPtr AttachOperation(const TOperationId& operationId) = 0; - -private: - virtual IOperationPtr DoMap( - const TMapOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> mapper, - const TOperationOptions& options) = 0; - - virtual IOperationPtr DoReduce( - const TReduceOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> reducer, - const TOperationOptions& options) = 0; - - virtual IOperationPtr DoJoinReduce( - const TJoinReduceOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> reducer, - const TOperationOptions& options) = 0; - - virtual IOperationPtr DoMapReduce( - const TMapReduceOperationSpec& spec, - ::TIntrusivePtr<IStructuredJob> mapper, - ::TIntrusivePtr<IStructuredJob> reduceCombiner, - ::TIntrusivePtr<IStructuredJob> reducer, - const TOperationOptions& options) = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT - -#define OPERATION_INL_H_ -#include "operation-inl.h" -#undef OPERATION_INL_H_ diff --git a/yt/cpp/mapreduce/interface/operation_ut.cpp b/yt/cpp/mapreduce/interface/operation_ut.cpp deleted file mode 100644 index 0fa62e1568b..00000000000 --- a/yt/cpp/mapreduce/interface/operation_ut.cpp +++ /dev/null @@ -1,269 +0,0 @@ -#include <yt/cpp/mapreduce/interface/common_ut.h> -#include <yt/cpp/mapreduce/interface/job_statistics.h> -#include <yt/cpp/mapreduce/interface/operation.h> -#include <yt/cpp/mapreduce/interface/protobuf_table_schema_ut.pb.h> - -#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h> - -#include <library/cpp/yson/node/node_io.h> - -#include <library/cpp/testing/unittest/registar.h> - -using namespace NYT; -using namespace NYT::NUnitTesting; - -class TDummyInferenceContext - : public IOperationPreparationContext -{ -public: - TDummyInferenceContext(int inputCount, int outputCount) - : InputCount_(inputCount) - , OutputCount_(outputCount) - , InputSchemas_(inputCount) - { } - - int GetInputCount() const override - { - return InputCount_; - } - - int GetOutputCount() const override - { - return OutputCount_; - } - - const TVector<TTableSchema>& GetInputSchemas() const override - { - return InputSchemas_; - } - - const TTableSchema& GetInputSchema(int index) const override - { - return InputSchemas_[index]; - } - - TMaybe<TYPath> GetInputPath(int) const override - { - return Nothing(); - } - - TMaybe<TYPath> GetOutputPath(int) const override - { - return Nothing(); - } - -private: - int InputCount_; - int OutputCount_; - TVector<TTableSchema> InputSchemas_; -}; - -Y_UNIT_TEST_SUITE(PrepareOperation) -{ - - Y_UNIT_TEST(BasicSchemas) - { - auto firstSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64)); - auto otherSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("other_column").Type(EValueType::VT_BOOLEAN)); - auto thirdSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("third_column").Type(EValueType::VT_STRING)); - - TDummyInferenceContext context(3,7); - TJobOperationPreparer builder(context); - - builder - .OutputSchema(1, firstSchema) - .BeginOutputGroup(TVector<int>{2, 5}) - .Schema(otherSchema) - .EndOutputGroup() - .BeginOutputGroup(3, 5) - .Schema(thirdSchema) - .EndOutputGroup() - .BeginOutputGroup(TVector<int>{0, 6}) - .Schema(thirdSchema) - .EndOutputGroup(); - - UNIT_ASSERT_EXCEPTION(builder.OutputSchema(1, otherSchema), TApiUsageError); - UNIT_ASSERT_EXCEPTION(builder.BeginOutputGroup(3, 5).Schema(otherSchema), TApiUsageError); - UNIT_ASSERT_EXCEPTION(builder.BeginOutputGroup(TVector<int>{3,6,7}).Schema(otherSchema), TApiUsageError); - - builder.Finish(); - auto result = builder.GetOutputSchemas(); - - ASSERT_SERIALIZABLES_EQUAL(result[0], thirdSchema); - ASSERT_SERIALIZABLES_EQUAL(result[1], firstSchema); - ASSERT_SERIALIZABLES_EQUAL(result[2], otherSchema); - ASSERT_SERIALIZABLES_EQUAL(result[3], thirdSchema); - ASSERT_SERIALIZABLES_EQUAL(result[4], thirdSchema); - ASSERT_SERIALIZABLES_EQUAL(result[5], otherSchema); - ASSERT_SERIALIZABLES_EQUAL(result[6], thirdSchema); - } - - Y_UNIT_TEST(NoSchema) - { - auto schema = TTableSchema() - .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64)); - - TDummyInferenceContext context(3,4); - TJobOperationPreparer builder(context); - - builder - .OutputSchema(1, schema) - .NoOutputSchema(0) - .BeginOutputGroup(2, 4) - .Schema(schema) - .EndOutputGroup(); - - UNIT_ASSERT_EXCEPTION(builder.OutputSchema(0, schema), TApiUsageError); - - builder.Finish(); - auto result = builder.GetOutputSchemas(); - - UNIT_ASSERT(result[0].Empty()); - - ASSERT_SERIALIZABLES_EQUAL(result[1], schema); - ASSERT_SERIALIZABLES_EQUAL(result[2], schema); - ASSERT_SERIALIZABLES_EQUAL(result[3], schema); - } - - Y_UNIT_TEST(Descriptions) - { - auto urlRowSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("Host").Type(NTi::Optional(NTi::String()))) - .AddColumn(TColumnSchema().Name("Path").Type(NTi::Optional(NTi::String()))) - .AddColumn(TColumnSchema().Name("HttpCode").Type(NTi::Optional(NTi::Int32()))); - - auto urlRowStruct = NTi::Struct({ - {"Host", NTi::Optional(NTi::String())}, - {"Path", NTi::Optional(NTi::String())}, - {"HttpCode", NTi::Optional(NTi::Int32())}, - }); - - auto rowFieldSerializationOptionSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("UrlRow_1").Type(NTi::Optional(urlRowStruct))) - .AddColumn(TColumnSchema().Name("UrlRow_2").Type(NTi::Optional(NTi::String()))); - - auto rowSerializedRepeatedFieldsSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("Ints").Type(NTi::List(NTi::Int64()))) - .AddColumn(TColumnSchema().Name("UrlRows").Type(NTi::List(urlRowStruct))); - - TDummyInferenceContext context(5,7); - TJobOperationPreparer builder(context); - - builder - .InputDescription<TUrlRow>(0) - .BeginInputGroup(2, 3) - .Description<TUrlRow>() - .EndInputGroup() - .BeginInputGroup(TVector<int>{1, 4}) - .Description<TRowSerializedRepeatedFields>() - .EndInputGroup() - .InputDescription<TUrlRow>(3); - - UNIT_ASSERT_EXCEPTION(builder.InputDescription<TUrlRow>(0), TApiUsageError); - - builder - .OutputDescription<TUrlRow>(0, false) - .OutputDescription<TRowFieldSerializationOption>(1) - .BeginOutputGroup(2, 4) - .Description<TUrlRow>() - .EndOutputGroup() - .BeginOutputGroup(TVector<int>{4,6}) - .Description<TRowSerializedRepeatedFields>() - .EndOutputGroup() - .OutputDescription<TUrlRow>(5, false); - - UNIT_ASSERT_EXCEPTION(builder.OutputDescription<TUrlRow>(0), TApiUsageError); - UNIT_ASSERT_NO_EXCEPTION(builder.OutputSchema(0, urlRowSchema)); - UNIT_ASSERT_NO_EXCEPTION(builder.OutputSchema(5, urlRowSchema)); - UNIT_ASSERT_EXCEPTION(builder.OutputSchema(1, urlRowSchema), TApiUsageError); - - builder.Finish(); - auto result = builder.GetOutputSchemas(); - - ASSERT_SERIALIZABLES_EQUAL(result[0], urlRowSchema); - ASSERT_SERIALIZABLES_EQUAL(result[1], rowFieldSerializationOptionSchema); - ASSERT_SERIALIZABLES_EQUAL(result[2], urlRowSchema); - ASSERT_SERIALIZABLES_EQUAL(result[3], urlRowSchema); - ASSERT_SERIALIZABLES_EQUAL(result[4], rowSerializedRepeatedFieldsSchema); - ASSERT_SERIALIZABLES_EQUAL(result[5], urlRowSchema); - ASSERT_SERIALIZABLES_EQUAL(result[6], rowSerializedRepeatedFieldsSchema); - - auto expectedInputDescriptions = TVector<TMaybe<TTableStructure>>{ - {TProtobufTableStructure{TUrlRow::descriptor()}}, - {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, - {TProtobufTableStructure{TUrlRow::descriptor()}}, - {TProtobufTableStructure{TUrlRow::descriptor()}}, - {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, - }; - UNIT_ASSERT_EQUAL(expectedInputDescriptions, builder.GetInputDescriptions()); - - auto expectedOutputDescriptions = TVector<TMaybe<TTableStructure>>{ - {TProtobufTableStructure{TUrlRow::descriptor()}}, - {TProtobufTableStructure{TRowFieldSerializationOption::descriptor()}}, - {TProtobufTableStructure{TUrlRow::descriptor()}}, - {TProtobufTableStructure{TUrlRow::descriptor()}}, - {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, - {TProtobufTableStructure{TUrlRow::descriptor()}}, - {TProtobufTableStructure{TRowSerializedRepeatedFields::descriptor()}}, - }; - UNIT_ASSERT_EQUAL(expectedOutputDescriptions, builder.GetOutputDescriptions()); - } - - Y_UNIT_TEST(InputColumns) - { - TDummyInferenceContext context(5, 1); - TJobOperationPreparer builder(context); - builder - .InputColumnFilter(2, {"a", "b"}) - .BeginInputGroup(0, 2) - .ColumnFilter({"b", "c"}) - .ColumnRenaming({{"b", "B"}, {"c", "C"}}) - .EndInputGroup() - .InputColumnRenaming(3, {{"a", "AAA"}}) - .NoOutputSchema(0); - builder.Finish(); - - auto expectedRenamings = TVector<THashMap<TString, TString>>{ - {{"b", "B"}, {"c", "C"}}, - {{"b", "B"}, {"c", "C"}}, - {}, - {{"a", "AAA"}}, - {}, - }; - UNIT_ASSERT_EQUAL(builder.GetInputColumnRenamings(), expectedRenamings); - - auto expectedFilters = TVector<TMaybe<TVector<TString>>>{ - {{"b", "c"}}, - {{"b", "c"}}, - {{"a", "b"}}, - {}, - {}, - }; - UNIT_ASSERT_EQUAL(builder.GetInputColumnFilters(), expectedFilters); - } - - Y_UNIT_TEST(Bug_r7349102) - { - auto firstSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("some_column").Type(EValueType::VT_UINT64)); - auto otherSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("other_column").Type(EValueType::VT_BOOLEAN)); - auto thirdSchema = TTableSchema() - .AddColumn(TColumnSchema().Name("third_column").Type(EValueType::VT_STRING)); - - TDummyInferenceContext context(3,1); - TJobOperationPreparer builder(context); - - builder - .InputDescription<TUrlRow>(0) - .InputDescription<TUrlRow>(1) - .InputDescription<TUrlRow>(2) - .OutputDescription<TUrlRow>(0); - - builder.Finish(); - } - -} // Y_UNIT_TEST_SUITE(SchemaInference) diff --git a/yt/cpp/mapreduce/interface/proto3_ut.proto b/yt/cpp/mapreduce/interface/proto3_ut.proto deleted file mode 100644 index b24c13085bd..00000000000 --- a/yt/cpp/mapreduce/interface/proto3_ut.proto +++ /dev/null @@ -1,17 +0,0 @@ -syntax = "proto3"; - -import "yt/yt_proto/yt/formats/extension.proto"; - -package NYT.NTestingProto3; - -option (NYT.file_default_field_flags) = SERIALIZATION_YT; - -message TWithOptional -{ - optional int64 x = 1; -} - -message TWithOptionalMessage -{ - optional TWithOptional x = 1; -} diff --git a/yt/cpp/mapreduce/interface/protobuf_file_options_ut.cpp b/yt/cpp/mapreduce/interface/protobuf_file_options_ut.cpp deleted file mode 100644 index 5ffa9564d7e..00000000000 --- a/yt/cpp/mapreduce/interface/protobuf_file_options_ut.cpp +++ /dev/null @@ -1,271 +0,0 @@ -#include "errors.h" -#include "format.h" -#include "common_ut.h" - -#include <yt/cpp/mapreduce/interface/protobuf_file_options_ut.pb.h> - -#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h> - -#include <library/cpp/testing/unittest/registar.h> - -using namespace NYT; - -Y_UNIT_TEST_SUITE(ProtobufFileOptions) -{ - NTi::TTypePtr GetUrlRowType(bool required) - { - static const NTi::TTypePtr structType = NTi::Struct({ - {"Host", ToTypeV3(EValueType::VT_STRING, false)}, - {"Path", ToTypeV3(EValueType::VT_STRING, false)}, - {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)}}); - return required ? structType : NTi::TTypePtr(NTi::Optional(structType)); - } - - Y_UNIT_TEST(TRowFieldSerializationOption) - { - const auto schema = CreateTableSchema<NTestingFileOptions::TRowFieldSerializationOption>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("UrlRow_1").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false)))); - } - - Y_UNIT_TEST(TRowMixedSerializationOptions) - { - const auto schema = CreateTableSchema<NTestingFileOptions::TRowMixedSerializationOptions>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("UrlRow_1").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false)))); - } - - Y_UNIT_TEST(FieldSortOrder) - { - const auto schema = CreateTableSchema<NTestingFileOptions::TFieldSortOrder>(); - - auto asInProtoFile = NTi::Optional(NTi::Struct({ - {"x", NTi::Optional(NTi::Int64())}, - {"y", NTi::Optional(NTi::String())}, - {"z", NTi::Optional(NTi::Bool())}, - })); - auto byFieldNumber = NTi::Optional(NTi::Struct({ - {"z", NTi::Optional(NTi::Bool())}, - {"x", NTi::Optional(NTi::Int64())}, - {"y", NTi::Optional(NTi::String())}, - })); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("EmbeddedDefault").Type(asInProtoFile)) - .AddColumn(TColumnSchema().Name("EmbeddedAsInProtoFile").Type(asInProtoFile)) - .AddColumn(TColumnSchema().Name("EmbeddedByFieldNumber").Type(byFieldNumber))); - } - - Y_UNIT_TEST(Map) - { - const auto schema = CreateTableSchema<NTestingFileOptions::TWithMap>(); - - auto createKeyValueStruct = [] (NTi::TTypePtr key, NTi::TTypePtr value) { - return NTi::List(NTi::Struct({ - {"key", NTi::Optional(key)}, - {"value", NTi::Optional(value)}, - })); - }; - - auto embedded = NTi::Struct({ - {"x", NTi::Optional(NTi::Int64())}, - {"y", NTi::Optional(NTi::String())}, - }); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("MapDefault") - .Type(createKeyValueStruct(NTi::Int64(), embedded))) - .AddColumn(TColumnSchema() - .Name("MapDict") - .Type(NTi::Dict(NTi::Int64(), embedded)))); - } - - Y_UNIT_TEST(Oneof) - { - const auto schema = CreateTableSchema<NTestingFileOptions::TWithOneof>(); - - auto embedded = NTi::Struct({ - {"x", NTi::Optional(NTi::Int64())}, - {"y", NTi::Optional(NTi::String())}, - }); - - auto defaultVariantType = NTi::Optional(NTi::Struct({ - {"field", NTi::Optional(NTi::String())}, - {"Oneof2", NTi::Optional(NTi::Variant(NTi::Struct({ - {"y2", NTi::String()}, - {"z2", embedded}, - {"x2", NTi::Int64()}, - })))}, - {"x1", NTi::Optional(NTi::Int64())}, - {"y1", NTi::Optional(NTi::String())}, - {"z1", NTi::Optional(embedded)}, - })); - - auto noDefaultType = NTi::Optional(NTi::Struct({ - {"field", NTi::Optional(NTi::String())}, - {"y2", NTi::Optional(NTi::String())}, - {"z2", NTi::Optional(embedded)}, - {"x2", NTi::Optional(NTi::Int64())}, - {"x1", NTi::Optional(NTi::Int64())}, - {"y1", NTi::Optional(NTi::String())}, - {"z1", NTi::Optional(embedded)}, - })); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("DefaultVariant") - .Type(defaultVariantType) - ) - .AddColumn(TColumnSchema() - .Name("NoDefault") - .Type(noDefaultType) - ) - .AddColumn(TColumnSchema() - .Name("SerializationProtobuf") - .Type(NTi::Optional(NTi::Struct({ - {"x1", NTi::Optional(NTi::Int64())}, - {"y1", NTi::Optional(NTi::String())}, - {"z1", NTi::Optional(NTi::String())}, - }))) - ) - .AddColumn(TColumnSchema() - .Name("MemberOfTopLevelOneof") - .Type(NTi::Optional(NTi::Int64())) - ) - ); - } -} - -static TNode GetColumns(const TFormat& format, int tableIndex = 0) -{ - return format.Config.GetAttributes()["tables"][tableIndex]["columns"]; -} - -Y_UNIT_TEST_SUITE(ProtobufFormatFileOptions) -{ - Y_UNIT_TEST(TRowFieldSerializationOption) - { - const auto format = TFormat::Protobuf<NTestingFileOptions::TRowFieldSerializationOption>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns[0]["name"], "UrlRow_1"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["proto_type"], "message"); - UNIT_ASSERT_VALUES_EQUAL(columns[0]["field_number"], 1); - - UNIT_ASSERT_VALUES_EQUAL(columns[1]["name"], "UrlRow_2"); - UNIT_ASSERT_VALUES_EQUAL(columns[1]["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(columns[1]["field_number"], 2); - const auto& fields = columns[1]["fields"]; - UNIT_ASSERT_VALUES_EQUAL(fields[0]["name"], "Host"); - UNIT_ASSERT_VALUES_EQUAL(fields[0]["proto_type"], "string"); - UNIT_ASSERT_VALUES_EQUAL(fields[0]["field_number"], 1); - - UNIT_ASSERT_VALUES_EQUAL(fields[1]["name"], "Path"); - UNIT_ASSERT_VALUES_EQUAL(fields[1]["proto_type"], "string"); - UNIT_ASSERT_VALUES_EQUAL(fields[1]["field_number"], 2); - - UNIT_ASSERT_VALUES_EQUAL(fields[2]["name"], "HttpCode"); - UNIT_ASSERT_VALUES_EQUAL(fields[2]["proto_type"], "sint32"); - UNIT_ASSERT_VALUES_EQUAL(fields[2]["field_number"], 3); - } - - Y_UNIT_TEST(Map) - { - const auto format = TFormat::Protobuf<NTestingFileOptions::TWithMap>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns.Size(), 2); - { - const auto& column = columns[0]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapDefault"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message"); - } - { - const auto& column = columns[1]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MapDict"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 2); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["proto_type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["proto_type"], "structured_message"); - } - } - - Y_UNIT_TEST(Oneof) - { - const auto format = TFormat::Protobuf<NTestingFileOptions::TWithOneof>(); - auto columns = GetColumns(format); - - UNIT_ASSERT_VALUES_EQUAL(columns.Size(), 4); - - { - const auto& column = columns[0]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "DefaultVariant"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 5); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "field"); - - const auto& oneof2 = column["fields"][1]; - UNIT_ASSERT_VALUES_EQUAL(oneof2["name"], "Oneof2"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["proto_type"], "oneof"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][0]["name"], "y2"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][1]["name"], "z2"); - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][1]["proto_type"], "structured_message"); - const auto& embeddedFields = oneof2["fields"][1]["fields"]; - UNIT_ASSERT_VALUES_EQUAL(embeddedFields[0]["name"], "x"); - UNIT_ASSERT_VALUES_EQUAL(embeddedFields[1]["name"], "y"); - - UNIT_ASSERT_VALUES_EQUAL(oneof2["fields"][2]["name"], "x2"); - - UNIT_ASSERT_VALUES_EQUAL(column["fields"][2]["name"], "x1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][3]["name"], "y1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][4]["name"], "z1"); - }; - - { - const auto& column = columns[1]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "NoDefault"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - const auto& fields = column["fields"]; - UNIT_ASSERT_VALUES_EQUAL(fields.Size(), 7); - - UNIT_ASSERT_VALUES_EQUAL(fields[0]["name"], "field"); - - UNIT_ASSERT_VALUES_EQUAL(fields[1]["name"], "y2"); - - UNIT_ASSERT_VALUES_EQUAL(fields[2]["name"], "z2"); - UNIT_ASSERT_VALUES_EQUAL(fields[2]["proto_type"], "structured_message"); - const auto& embeddedFields = fields[2]["fields"]; - UNIT_ASSERT_VALUES_EQUAL(embeddedFields[0]["name"], "x"); - UNIT_ASSERT_VALUES_EQUAL(embeddedFields[1]["name"], "y"); - - UNIT_ASSERT_VALUES_EQUAL(fields[3]["name"], "x2"); - - UNIT_ASSERT_VALUES_EQUAL(fields[4]["name"], "x1"); - UNIT_ASSERT_VALUES_EQUAL(fields[5]["name"], "y1"); - UNIT_ASSERT_VALUES_EQUAL(fields[6]["name"], "z1"); - }; - - { - const auto& column = columns[2]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "SerializationProtobuf"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "structured_message"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"].Size(), 3); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][0]["name"], "x1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][1]["name"], "y1"); - UNIT_ASSERT_VALUES_EQUAL(column["fields"][2]["name"], "z1"); - } - { - const auto& column = columns[3]; - UNIT_ASSERT_VALUES_EQUAL(column["name"], "MemberOfTopLevelOneof"); - UNIT_ASSERT_VALUES_EQUAL(column["proto_type"], "int64"); - } - } -} diff --git a/yt/cpp/mapreduce/interface/protobuf_file_options_ut.proto b/yt/cpp/mapreduce/interface/protobuf_file_options_ut.proto deleted file mode 100644 index 4804b2f60c1..00000000000 --- a/yt/cpp/mapreduce/interface/protobuf_file_options_ut.proto +++ /dev/null @@ -1,142 +0,0 @@ -import "yt/yt_proto/yt/formats/extension.proto"; - -package NYT.NTestingFileOptions; - -option (NYT.file_default_field_flags) = SERIALIZATION_YT; -option (NYT.file_default_field_flags) = MAP_AS_LIST_OF_STRUCTS; -option (NYT.file_default_message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE; -option (NYT.file_default_oneof_flags) = SEPARATE_FIELDS; - -message TUrlRow -{ - optional string Host = 1 [(NYT.column_name) = "Host"]; - optional string Path = 2 [(NYT.column_name) = "Path"]; - optional sint32 HttpCode = 3 [(NYT.column_name) = "HttpCode"]; -} - -message TRowFieldSerializationOption -{ - optional TUrlRow UrlRow_1 = 1 [(NYT.flags) = SERIALIZATION_PROTOBUF]; - optional TUrlRow UrlRow_2 = 2; -} - -message TRowMixedSerializationOptions -{ - option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF; - optional TUrlRow UrlRow_1 = 1; - optional TUrlRow UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_YT]; -} - -message TRowSerializedRepeatedFields -{ - repeated int64 Ints = 1; - repeated TUrlRow UrlRows = 2; -} - -message TFieldSortOrder -{ - message TEmbeddedDefault { - optional int64 x = 2; - optional string y = 12; - optional bool z = 1; - } - message TEmbeddedAsInProtoFile { - option (NYT.message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE; - optional int64 x = 2; - optional string y = 12; - optional bool z = 1; - } - message TEmbeddedByFieldNumber { - option (NYT.message_flags) = SORT_FIELDS_BY_FIELD_NUMBER; - optional int64 x = 2; - optional string y = 12; - optional bool z = 1; - } - option (NYT.default_field_flags) = SERIALIZATION_YT; - - optional TEmbeddedDefault EmbeddedDefault = 1; - optional TEmbeddedAsInProtoFile EmbeddedAsInProtoFile = 2; - optional TEmbeddedByFieldNumber EmbeddedByFieldNumber = 3; -} - -message TWithMap -{ - message TEmbedded { - optional int64 x = 1; - optional string y = 2; - } - - map<int64, TEmbedded> MapDefault = 1; - map<int64, TEmbedded> MapDict = 5 [(NYT.flags) = MAP_AS_DICT]; -} - -message TWithOneof -{ - message TEmbedded - { - oneof Oneof { - int64 x = 1; - string y = 2; - } - } - - message TDefaultVariant - { - option (NYT.default_oneof_flags) = VARIANT; - optional string field = 1; - - oneof Oneof2 - { - string y2 = 4; - TEmbedded z2 = 6; - int64 x2 = 2; - } - - oneof Oneof1 - { - option (NYT.oneof_flags) = SEPARATE_FIELDS; - int64 x1 = 10; - string y1 = 3; - TEmbedded z1 = 5; - } - } - - message TNoDefault - { - optional string field = 1; - - oneof Oneof2 - { - string y2 = 4; - TEmbedded z2 = 6; - int64 x2 = 2; - } - - oneof Oneof1 - { - int64 x1 = 10; - string y1 = 3; - TEmbedded z1 = 5; - } - } - - message TSerializationProtobuf - { - option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF; - oneof Oneof - { - int64 x1 = 2; - string y1 = 1; - TEmbedded z1 = 3; - } - } - - optional TDefaultVariant DefaultVariant = 1; - optional TNoDefault NoDefault = 2; - optional TSerializationProtobuf SerializationProtobuf = 3; - - oneof TopLevelOneof - { - int64 MemberOfTopLevelOneof = 4; - } -} diff --git a/yt/cpp/mapreduce/interface/protobuf_format.cpp b/yt/cpp/mapreduce/interface/protobuf_format.cpp deleted file mode 100644 index 3d57ed2797d..00000000000 --- a/yt/cpp/mapreduce/interface/protobuf_format.cpp +++ /dev/null @@ -1,1498 +0,0 @@ -#include "protobuf_format.h" - -#include "errors.h" - -#include <yt/yt_proto/yt/formats/extension.pb.h> - -#include <google/protobuf/text_format.h> - -#include <library/cpp/yson/node/node_io.h> - -#include <util/generic/hash_set.h> -#include <util/generic/stack.h> -#include <util/generic/overloaded.h> - -#include <util/stream/output.h> -#include <util/stream/file.h> - -namespace NYT::NDetail { - -using ::google::protobuf::Descriptor; -using ::google::protobuf::DescriptorProto; -using ::google::protobuf::EnumDescriptor; -using ::google::protobuf::EnumDescriptorProto; -using ::google::protobuf::FieldDescriptor; -using ::google::protobuf::FieldDescriptorProto; -using ::google::protobuf::OneofDescriptor; -using ::google::protobuf::Message; -using ::google::protobuf::FileDescriptor; -using ::google::protobuf::FileDescriptorProto; -using ::google::protobuf::FileDescriptorSet; -using ::google::protobuf::FieldOptions; -using ::google::protobuf::FileOptions; -using ::google::protobuf::OneofOptions; -using ::google::protobuf::MessageOptions; - -using ::ToString; - -namespace { - -//////////////////////////////////////////////////////////////////////////////// - -using TOneofOption = std::variant< - EProtobufOneofMode>; - -using TFieldOption = std::variant< - EProtobufType, - EProtobufSerializationMode, - EProtobufListMode, - EProtobufMapMode, - EProtobufEnumWritingMode>; - -using TMessageOption = std::variant< - EProtobufFieldSortOrder>; - -struct TOtherColumns -{ }; - -using TValueTypeOrOtherColumns = std::variant<EValueType, TOtherColumns>; - -//////////////////////////////////////////////////////////////////////////////// - -TFieldOption FieldFlagToOption(EWrapperFieldFlag::Enum flag) -{ - using EFlag = EWrapperFieldFlag; - switch (flag) { - case EFlag::SERIALIZATION_PROTOBUF: - return EProtobufSerializationMode::Protobuf; - case EFlag::SERIALIZATION_YT: - return EProtobufSerializationMode::Yt; - - case EFlag::ANY: - return EProtobufType::Any; - case EFlag::OTHER_COLUMNS: - return EProtobufType::OtherColumns; - case EFlag::ENUM_INT: - return EProtobufType::EnumInt; - case EFlag::ENUM_STRING: - return EProtobufType::EnumString; - - case EFlag::OPTIONAL_LIST: - return EProtobufListMode::Optional; - case EFlag::REQUIRED_LIST: - return EProtobufListMode::Required; - - case EFlag::MAP_AS_LIST_OF_STRUCTS_LEGACY: - return EProtobufMapMode::ListOfStructsLegacy; - case EFlag::MAP_AS_LIST_OF_STRUCTS: - return EProtobufMapMode::ListOfStructs; - case EFlag::MAP_AS_DICT: - return EProtobufMapMode::Dict; - case EFlag::MAP_AS_OPTIONAL_DICT: - return EProtobufMapMode::OptionalDict; - case EFlag::EMBEDDED: - return EProtobufSerializationMode::Embedded; - - case EFlag::ENUM_SKIP_UNKNOWN_VALUES: - return EProtobufEnumWritingMode::SkipUnknownValues; - case EFlag::ENUM_CHECK_VALUES: - return EProtobufEnumWritingMode::CheckValues; - } - Y_FAIL(); -} - -TMessageOption MessageFlagToOption(EWrapperMessageFlag::Enum flag) -{ - using EFlag = EWrapperMessageFlag; - switch (flag) { - case EFlag::DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE: - return EProtobufFieldSortOrder::AsInProtoFile; - case EFlag::SORT_FIELDS_BY_FIELD_NUMBER: - return EProtobufFieldSortOrder::ByFieldNumber; - } - Y_FAIL(); -} - -TOneofOption OneofFlagToOption(EWrapperOneofFlag::Enum flag) -{ - using EFlag = EWrapperOneofFlag; - switch (flag) { - case EFlag::SEPARATE_FIELDS: - return EProtobufOneofMode::SeparateFields; - case EFlag::VARIANT: - return EProtobufOneofMode::Variant; - } - Y_FAIL(); -} - -EWrapperFieldFlag::Enum OptionToFieldFlag(TFieldOption option) -{ - using EFlag = EWrapperFieldFlag; - struct TVisitor - { - EFlag::Enum operator() (EProtobufType type) - { - switch (type) { - case EProtobufType::Any: - return EFlag::ANY; - case EProtobufType::OtherColumns: - return EFlag::OTHER_COLUMNS; - case EProtobufType::EnumInt: - return EFlag::ENUM_INT; - case EProtobufType::EnumString: - return EFlag::ENUM_STRING; - } - Y_FAIL(); - } - EFlag::Enum operator() (EProtobufSerializationMode serializationMode) - { - switch (serializationMode) { - case EProtobufSerializationMode::Yt: - return EFlag::SERIALIZATION_YT; - case EProtobufSerializationMode::Protobuf: - return EFlag::SERIALIZATION_PROTOBUF; - case EProtobufSerializationMode::Embedded: - return EFlag::EMBEDDED; - } - Y_FAIL(); - } - EFlag::Enum operator() (EProtobufListMode listMode) - { - switch (listMode) { - case EProtobufListMode::Optional: - return EFlag::OPTIONAL_LIST; - case EProtobufListMode::Required: - return EFlag::REQUIRED_LIST; - } - Y_FAIL(); - } - EFlag::Enum operator() (EProtobufMapMode mapMode) - { - switch (mapMode) { - case EProtobufMapMode::ListOfStructsLegacy: - return EFlag::MAP_AS_LIST_OF_STRUCTS_LEGACY; - case EProtobufMapMode::ListOfStructs: - return EFlag::MAP_AS_LIST_OF_STRUCTS; - case EProtobufMapMode::Dict: - return EFlag::MAP_AS_DICT; - case EProtobufMapMode::OptionalDict: - return EFlag::MAP_AS_OPTIONAL_DICT; - } - Y_FAIL(); - } - EFlag::Enum operator() (EProtobufEnumWritingMode enumWritingMode) - { - switch (enumWritingMode) { - case EProtobufEnumWritingMode::SkipUnknownValues: - return EFlag::ENUM_SKIP_UNKNOWN_VALUES; - case EProtobufEnumWritingMode::CheckValues: - return EFlag::ENUM_CHECK_VALUES; - } - Y_FAIL(); - } - }; - - return std::visit(TVisitor(), option); -} - -EWrapperMessageFlag::Enum OptionToMessageFlag(TMessageOption option) -{ - using EFlag = EWrapperMessageFlag; - struct TVisitor - { - EFlag::Enum operator() (EProtobufFieldSortOrder sortOrder) - { - switch (sortOrder) { - case EProtobufFieldSortOrder::AsInProtoFile: - return EFlag::DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE; - case EProtobufFieldSortOrder::ByFieldNumber: - return EFlag::SORT_FIELDS_BY_FIELD_NUMBER; - } - Y_FAIL(); - } - }; - - return std::visit(TVisitor(), option); -} - -EWrapperOneofFlag::Enum OptionToOneofFlag(TOneofOption option) -{ - using EFlag = EWrapperOneofFlag; - struct TVisitor - { - EFlag::Enum operator() (EProtobufOneofMode mode) - { - switch (mode) { - case EProtobufOneofMode::SeparateFields: - return EFlag::SEPARATE_FIELDS; - case EProtobufOneofMode::Variant: - return EFlag::VARIANT; - } - Y_FAIL(); - } - }; - - return std::visit(TVisitor(), option); -} - - -template <typename T, typename TOptionToFlag> -void SetOption(TMaybe<T>& option, T newOption, TOptionToFlag optionToFlag) -{ - if (option) { - if (*option == newOption) { - ythrow yexception() << "Duplicate protobuf flag " << optionToFlag(newOption); - } else { - ythrow yexception() << "Incompatible protobuf flags " << - optionToFlag(*option) << " and " << optionToFlag(newOption); - } - } - option = newOption; -} - -class TParseProtobufFieldOptionsVisitor -{ -public: - void operator() (EProtobufType type) - { - SetOption(Type, type); - } - - void operator() (EProtobufSerializationMode serializationMode) - { - SetOption(SerializationMode, serializationMode); - } - - void operator() (EProtobufListMode listMode) - { - SetOption(ListMode, listMode); - } - - void operator() (EProtobufMapMode mapMode) - { - SetOption(MapMode, mapMode); - } - - void operator() (EProtobufEnumWritingMode enumWritingMode) - { - SetOption(EnumWritingMode, enumWritingMode); - } - - template <typename T> - void SetOption(TMaybe<T>& option, T newOption) - { - NYT::NDetail::SetOption(option, newOption, OptionToFieldFlag); - } - -public: - TMaybe<EProtobufType> Type; - TMaybe<EProtobufSerializationMode> SerializationMode; - TMaybe<EProtobufListMode> ListMode; - TMaybe<EProtobufMapMode> MapMode; - TMaybe<EProtobufEnumWritingMode> EnumWritingMode; -}; - -class TParseProtobufMessageOptionsVisitor -{ -public: - void operator() (EProtobufFieldSortOrder fieldSortOrder) - { - SetOption(FieldSortOrder, fieldSortOrder); - } - - template <typename T> - void SetOption(TMaybe<T>& option, T newOption) - { - NYT::NDetail::SetOption(option, newOption, OptionToMessageFlag); - } - -public: - TMaybe<EProtobufFieldSortOrder> FieldSortOrder; -}; - -class TParseProtobufOneofOptionsVisitor -{ -public: - void operator() (EProtobufOneofMode mode) - { - SetOption(Mode, mode); - } - - template <typename T> - void SetOption(TMaybe<T>& option, T newOption) - { - NYT::NDetail::SetOption(option, newOption, OptionToOneofFlag); - } - -public: - TMaybe<EProtobufOneofMode> Mode; -}; - -void ParseProtobufFieldOptions( - const ::google::protobuf::RepeatedField<EWrapperFieldFlag::Enum>& flags, - TProtobufFieldOptions* fieldOptions) -{ - TParseProtobufFieldOptionsVisitor visitor; - for (auto flag : flags) { - std::visit(visitor, FieldFlagToOption(flag)); - } - if (visitor.Type) { - fieldOptions->Type = *visitor.Type; - } - if (visitor.SerializationMode) { - fieldOptions->SerializationMode = *visitor.SerializationMode; - } - if (visitor.ListMode) { - fieldOptions->ListMode = *visitor.ListMode; - } - if (visitor.MapMode) { - fieldOptions->MapMode = *visitor.MapMode; - } -} - -void ParseProtobufMessageOptions( - const ::google::protobuf::RepeatedField<EWrapperMessageFlag::Enum>& flags, - TProtobufMessageOptions* messageOptions) -{ - TParseProtobufMessageOptionsVisitor visitor; - for (auto flag : flags) { - std::visit(visitor, MessageFlagToOption(flag)); - } - if (visitor.FieldSortOrder) { - messageOptions->FieldSortOrder = *visitor.FieldSortOrder; - } -} - -void ParseProtobufOneofOptions( - const ::google::protobuf::RepeatedField<EWrapperOneofFlag::Enum>& flags, - TProtobufOneofOptions* messageOptions) -{ - TParseProtobufOneofOptionsVisitor visitor; - for (auto flag : flags) { - std::visit(visitor, OneofFlagToOption(flag)); - } - if (visitor.Mode) { - messageOptions->Mode = *visitor.Mode; - } -} - -TProtobufFieldOptions GetDefaultFieldOptions( - const Descriptor* descriptor, - TProtobufFieldOptions defaultFieldOptions = {}) -{ - ParseProtobufFieldOptions( - descriptor->file()->options().GetRepeatedExtension(file_default_field_flags), - &defaultFieldOptions); - ParseProtobufFieldOptions( - descriptor->options().GetRepeatedExtension(default_field_flags), - &defaultFieldOptions); - return defaultFieldOptions; -} - -TProtobufOneofOptions GetDefaultOneofOptions(const Descriptor* descriptor) -{ - TProtobufOneofOptions defaultOneofOptions; - ParseProtobufOneofOptions( - descriptor->file()->options().GetRepeatedExtension(file_default_oneof_flags), - &defaultOneofOptions); - ParseProtobufOneofOptions( - descriptor->options().GetRepeatedExtension(default_oneof_flags), - &defaultOneofOptions); - switch (defaultOneofOptions.Mode) { - case EProtobufOneofMode::Variant: { - auto defaultFieldOptions = GetDefaultFieldOptions(descriptor); - switch (defaultFieldOptions.SerializationMode) { - case EProtobufSerializationMode::Protobuf: - // For Protobuf serialization mode default is SeparateFields. - defaultOneofOptions.Mode = EProtobufOneofMode::SeparateFields; - return defaultOneofOptions; - case EProtobufSerializationMode::Yt: - case EProtobufSerializationMode::Embedded: - return defaultOneofOptions; - } - Y_FAIL(); - } - case EProtobufOneofMode::SeparateFields: - return defaultOneofOptions; - } - Y_FAIL(); -} - -//////////////////////////////////////////////////////////////////////////////// - -void ValidateProtobufType(const FieldDescriptor& fieldDescriptor, EProtobufType protobufType) -{ - const auto fieldType = fieldDescriptor.type(); - auto ensureType = [&] (FieldDescriptor::Type expectedType) { - Y_ENSURE(fieldType == expectedType, - "Type of field " << fieldDescriptor.name() << "does not match specified field flag " << - OptionToFieldFlag(protobufType) << ": " - "expected " << FieldDescriptor::TypeName(expectedType) << ", " << - "got " << FieldDescriptor::TypeName(fieldType)); - }; - switch (protobufType) { - case EProtobufType::Any: - ensureType(FieldDescriptor::TYPE_BYTES); - return; - case EProtobufType::OtherColumns: - ensureType(FieldDescriptor::TYPE_BYTES); - return; - case EProtobufType::EnumInt: - ensureType(FieldDescriptor::TYPE_ENUM); - return; - case EProtobufType::EnumString: - ensureType(FieldDescriptor::TYPE_ENUM); - return; - } - Y_FAIL(); -} - -//////////////////////////////////////////////////////////////////////////////// - -class TCycleChecker -{ -private: - class TGuard - { - public: - TGuard(TCycleChecker* checker, const Descriptor* descriptor) - : Checker_(checker) - , Descriptor_(descriptor) - { - Checker_->ActiveVertices_.insert(Descriptor_); - Checker_->Stack_.push(Descriptor_); - } - - ~TGuard() - { - Checker_->ActiveVertices_.erase(Descriptor_); - Checker_->Stack_.pop(); - } - - private: - TCycleChecker* Checker_; - const Descriptor* Descriptor_; - }; - -public: - [[nodiscard]] TGuard Enter(const Descriptor* descriptor) - { - if (ActiveVertices_.contains(descriptor)) { - Y_VERIFY(!Stack_.empty()); - ythrow TApiUsageError() << "Cyclic reference found for protobuf messages. " << - "Consider removing " << EWrapperFieldFlag::SERIALIZATION_YT << " flag " << - "somewhere on the cycle containing " << - Stack_.top()->full_name() << " and " << descriptor->full_name(); - } - return TGuard(this, descriptor); - } - -private: - THashSet<const Descriptor*> ActiveVertices_; - TStack<const Descriptor*> Stack_; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - -TProtobufFieldOptions GetFieldOptions( - const FieldDescriptor* fieldDescriptor, - const TMaybe<TProtobufFieldOptions>& defaultFieldOptions) -{ - TProtobufFieldOptions options; - if (defaultFieldOptions) { - options = *defaultFieldOptions; - } else { - options = GetDefaultFieldOptions(fieldDescriptor->containing_type()); - } - ParseProtobufFieldOptions(fieldDescriptor->options().GetRepeatedExtension(flags), &options); - return options; -} - -TProtobufOneofOptions GetOneofOptions( - const OneofDescriptor* oneofDescriptor, - const TMaybe<TProtobufOneofOptions>& defaultOneofOptions) -{ - TProtobufOneofOptions options; - if (defaultOneofOptions) { - options = *defaultOneofOptions; - } else { - options = GetDefaultOneofOptions(oneofDescriptor->containing_type()); - } - ParseProtobufOneofOptions(oneofDescriptor->options().GetRepeatedExtension(oneof_flags), &options); - - if (oneofDescriptor->is_synthetic()) { - options.Mode = EProtobufOneofMode::SeparateFields; - } - - auto variantFieldName = oneofDescriptor->options().GetExtension(variant_field_name); - switch (options.Mode) { - case EProtobufOneofMode::SeparateFields: - if (variantFieldName) { - ythrow TApiUsageError() << "\"variant_field_name\" requires (NYT.oneof_flags) = VARIANT"; - } - break; - case EProtobufOneofMode::Variant: - if (variantFieldName) { - options.VariantFieldName = variantFieldName; - } else { - options.VariantFieldName = oneofDescriptor->name(); - } - break; - } - return options; -} - - -TProtobufMessageOptions GetMessageOptions(const Descriptor* descriptor) -{ - TProtobufMessageOptions options; - ParseProtobufMessageOptions( - descriptor->file()->options().GetRepeatedExtension(file_default_message_flags), - &options); - ParseProtobufMessageOptions( - descriptor->options().GetRepeatedExtension(message_flags), - &options); - return options; -} - -TNode MakeEnumerationConfig(const ::google::protobuf::EnumDescriptor* enumDescriptor) -{ - auto config = TNode::CreateMap(); - for (int i = 0; i < enumDescriptor->value_count(); ++i) { - config[enumDescriptor->value(i)->name()] = enumDescriptor->value(i)->number(); - } - return config; -} - -TString DeduceProtobufType( - const FieldDescriptor* fieldDescriptor, - const TProtobufFieldOptions& options) -{ - if (options.Type) { - ValidateProtobufType(*fieldDescriptor, *options.Type); - return ToString(*options.Type); - } - switch (fieldDescriptor->type()) { - case FieldDescriptor::TYPE_ENUM: - return ToString(EProtobufType::EnumString); - case FieldDescriptor::TYPE_MESSAGE: - switch (options.SerializationMode) { - case EProtobufSerializationMode::Protobuf: - return "message"; - case EProtobufSerializationMode::Yt: - return "structured_message"; - case EProtobufSerializationMode::Embedded: - return "embedded_message"; - } - Y_FAIL(); - default: - return fieldDescriptor->type_name(); - } - Y_FAIL(); -} - -TString GetColumnName(const ::google::protobuf::FieldDescriptor& field) -{ - const auto& options = field.options(); - const auto columnName = options.GetExtension(column_name); - if (!columnName.empty()) { - return columnName; - } - const auto keyColumnName = options.GetExtension(key_column_name); - if (!keyColumnName.empty()) { - return keyColumnName; - } - return field.name(); -} - -TNode MakeProtoFormatMessageFieldsConfig( - const Descriptor* descriptor, - TNode* enumerations, - TCycleChecker& cycleChecker); - -TNode MakeProtoFormatMessageFieldsConfig( - const Descriptor* descriptor, - TNode* enumerations, - const TProtobufFieldOptions& defaultFieldOptions, - const TProtobufOneofOptions& defaultOneofOptions, - TCycleChecker& cycleChecker); - -TNode MakeMapFieldsConfig( - const FieldDescriptor* fieldDescriptor, - TNode* enumerations, - const TProtobufFieldOptions& fieldOptions, - TCycleChecker& cycleChecker) -{ - Y_VERIFY(fieldDescriptor->is_map()); - auto message = fieldDescriptor->message_type(); - switch (fieldOptions.MapMode) { - case EProtobufMapMode::ListOfStructsLegacy: - return MakeProtoFormatMessageFieldsConfig( - message, - enumerations, - cycleChecker); - case EProtobufMapMode::ListOfStructs: - case EProtobufMapMode::Dict: - case EProtobufMapMode::OptionalDict: { - TProtobufFieldOptions defaultFieldOptions; - defaultFieldOptions.SerializationMode = EProtobufSerializationMode::Yt; - return MakeProtoFormatMessageFieldsConfig( - message, - enumerations, - defaultFieldOptions, - TProtobufOneofOptions{}, - cycleChecker); - } - } - Y_FAIL(); -} - -TNode MakeProtoFormatFieldConfig( - const FieldDescriptor* fieldDescriptor, - TNode* enumerations, - const TProtobufFieldOptions& defaultOptions, - TCycleChecker& cycleChecker) -{ - auto fieldConfig = TNode::CreateMap(); - fieldConfig["field_number"] = fieldDescriptor->number(); - fieldConfig["name"] = GetColumnName(*fieldDescriptor); - - auto fieldOptions = GetFieldOptions(fieldDescriptor, defaultOptions); - - Y_ENSURE(fieldOptions.SerializationMode != EProtobufSerializationMode::Embedded, - "EMBEDDED flag is currently supported only with " - "ProtobufFormatWithDescriptors config option set to true"); - - if (fieldDescriptor->is_repeated()) { - Y_ENSURE_EX(fieldOptions.SerializationMode == EProtobufSerializationMode::Yt, - TApiUsageError() << "Repeated field \"" << fieldDescriptor->full_name() << "\" " << - "must have flag \"" << EWrapperFieldFlag::SERIALIZATION_YT << "\""); - } - fieldConfig["repeated"] = fieldDescriptor->is_repeated(); - fieldConfig["packed"] = fieldDescriptor->is_packed(); - - fieldConfig["proto_type"] = DeduceProtobufType(fieldDescriptor, fieldOptions); - - if (fieldDescriptor->type() == FieldDescriptor::TYPE_ENUM) { - auto* enumeration = fieldDescriptor->enum_type(); - (*enumerations)[enumeration->full_name()] = MakeEnumerationConfig(enumeration); - fieldConfig["enumeration_name"] = enumeration->full_name(); - } - - if (fieldOptions.SerializationMode != EProtobufSerializationMode::Yt) { - return fieldConfig; - } - - if (fieldDescriptor->is_map()) { - fieldConfig["fields"] = MakeMapFieldsConfig(fieldDescriptor, enumerations, fieldOptions, cycleChecker); - return fieldConfig; - } - - if (fieldDescriptor->type() == FieldDescriptor::TYPE_MESSAGE) { - fieldConfig["fields"] = MakeProtoFormatMessageFieldsConfig( - fieldDescriptor->message_type(), - enumerations, - cycleChecker); - } - - return fieldConfig; -} - -void MakeProtoFormatOneofConfig( - const OneofDescriptor* oneofDescriptor, - TNode* enumerations, - const TProtobufFieldOptions& defaultFieldOptions, - const TProtobufOneofOptions& defaultOneofOptions, - TCycleChecker& cycleChecker, - TNode* fields) -{ - auto addFields = [&] (TNode* fields) { - for (int i = 0; i < oneofDescriptor->field_count(); ++i) { - fields->Add(MakeProtoFormatFieldConfig( - oneofDescriptor->field(i), - enumerations, - defaultFieldOptions, - cycleChecker)); - } - }; - - auto oneofOptions = GetOneofOptions(oneofDescriptor, defaultOneofOptions); - switch (oneofOptions.Mode) { - case EProtobufOneofMode::SeparateFields: - addFields(fields); - return; - case EProtobufOneofMode::Variant: { - auto oneofFields = TNode::CreateList(); - addFields(&oneofFields); - auto oneofField = TNode() - ("proto_type", "oneof") - ("name", oneofOptions.VariantFieldName) - ("fields", std::move(oneofFields)); - fields->Add(std::move(oneofField)); - return; - } - } - Y_FAIL(); -} - -TNode MakeProtoFormatMessageFieldsConfig( - const Descriptor* descriptor, - TNode* enumerations, - const TProtobufFieldOptions& defaultFieldOptions, - const TProtobufOneofOptions& defaultOneofOptions, - TCycleChecker& cycleChecker) -{ - auto fields = TNode::CreateList(); - THashSet<const OneofDescriptor*> visitedOneofs; - auto guard = cycleChecker.Enter(descriptor); - for (int fieldIndex = 0; fieldIndex < descriptor->field_count(); ++fieldIndex) { - auto fieldDescriptor = descriptor->field(fieldIndex); - auto oneofDescriptor = fieldDescriptor->containing_oneof(); - if (!oneofDescriptor) { - fields.Add(MakeProtoFormatFieldConfig( - fieldDescriptor, - enumerations, - defaultFieldOptions, - cycleChecker)); - } else if (!visitedOneofs.contains(oneofDescriptor)) { - MakeProtoFormatOneofConfig( - oneofDescriptor, - enumerations, - defaultFieldOptions, - defaultOneofOptions, - cycleChecker, - &fields); - visitedOneofs.insert(oneofDescriptor); - } - } - return fields; -} - -TNode MakeProtoFormatMessageFieldsConfig( - const Descriptor* descriptor, - TNode* enumerations, - TCycleChecker& cycleChecker) -{ - return MakeProtoFormatMessageFieldsConfig( - descriptor, - enumerations, - GetDefaultFieldOptions(descriptor), - GetDefaultOneofOptions(descriptor), - cycleChecker); -} - -TNode MakeProtoFormatConfigWithTables(const TVector<const Descriptor*>& descriptors) -{ - TNode config("protobuf"); - config.Attributes() - ("enumerations", TNode::CreateMap()) - ("tables", TNode::CreateList()); - - auto& enumerations = config.Attributes()["enumerations"]; - - for (auto* descriptor : descriptors) { - TCycleChecker cycleChecker; - auto columns = MakeProtoFormatMessageFieldsConfig(descriptor, &enumerations, cycleChecker); - config.Attributes()["tables"].Add( - TNode()("columns", std::move(columns))); - } - - return config; -} - -//////////////////////////////////////////////////////////////////////////////// - -class TFileDescriptorSetBuilder -{ -public: - TFileDescriptorSetBuilder() - : ExtensionFile_(EWrapperFieldFlag::descriptor()->file()) - { } - - void AddDescriptor(const Descriptor* descriptor) - { - auto [it, inserted] = AllDescriptors_.insert(descriptor); - if (!inserted) { - return; - } - - const auto* containingType = descriptor->containing_type(); - while (containingType) { - AddDescriptor(containingType); - containingType = containingType->containing_type(); - } - for (int i = 0; i < descriptor->field_count(); ++i) { - AddField(descriptor->field(i)); - } - } - - FileDescriptorSet Build() - { - THashSet<const FileDescriptor*> visitedFiles; - TVector<const FileDescriptor*> fileTopoOrder; - for (const auto* descriptor : AllDescriptors_) { - TraverseDependencies(descriptor->file(), visitedFiles, fileTopoOrder); - } - - THashSet<TString> messageTypeNames; - THashSet<TString> enumTypeNames; - for (const auto* descriptor : AllDescriptors_) { - messageTypeNames.insert(descriptor->full_name()); - } - for (const auto* enumDescriptor : EnumDescriptors_) { - enumTypeNames.insert(enumDescriptor->full_name()); - } - FileDescriptorSet fileDescriptorSetProto; - for (const auto* file : fileTopoOrder) { - auto* fileProto = fileDescriptorSetProto.add_file(); - file->CopyTo(fileProto); - Strip(fileProto, messageTypeNames, enumTypeNames); - } - return fileDescriptorSetProto; - } - -private: - void AddField(const FieldDescriptor* fieldDescriptor) - { - if (fieldDescriptor->message_type()) { - AddDescriptor(fieldDescriptor->message_type()); - } - if (fieldDescriptor->enum_type()) { - AddEnumDescriptor(fieldDescriptor->enum_type()); - } - } - - void AddEnumDescriptor(const EnumDescriptor* enumDescriptor) - { - auto [it, inserted] = EnumDescriptors_.insert(enumDescriptor); - if (!inserted) { - return; - } - const auto* containingType = enumDescriptor->containing_type(); - while (containingType) { - AddDescriptor(containingType); - containingType = containingType->containing_type(); - } - } - - void TraverseDependencies( - const FileDescriptor* current, - THashSet<const FileDescriptor*>& visited, - TVector<const FileDescriptor*>& topoOrder) - { - auto [it, inserted] = visited.insert(current); - if (!inserted) { - return; - } - for (int i = 0; i < current->dependency_count(); ++i) { - TraverseDependencies(current->dependency(i), visited, topoOrder); - } - topoOrder.push_back(current); - } - - template <typename TOptions> - void StripUnknownOptions(TOptions* options) - { - std::vector<const FieldDescriptor*> fields; - auto reflection = options->GetReflection(); - reflection->ListFields(*options, &fields); - for (auto field : fields) { - if (field->is_extension() && field->file() != ExtensionFile_) { - reflection->ClearField(options, field); - } - } - } - - template <typename TRepeatedField, typename TPredicate> - void RemoveIf(TRepeatedField* repeatedField, TPredicate predicate) - { - repeatedField->erase( - std::remove_if(repeatedField->begin(), repeatedField->end(), predicate), - repeatedField->end()); - } - - void Strip( - const TString& containingTypePrefix, - DescriptorProto* messageProto, - const THashSet<TString>& messageTypeNames, - const THashSet<TString>& enumTypeNames) - { - const auto prefix = containingTypePrefix + messageProto->name() + '.'; - - RemoveIf(messageProto->mutable_nested_type(), [&] (const DescriptorProto& descriptorProto) { - return !messageTypeNames.contains(prefix + descriptorProto.name()); - }); - RemoveIf(messageProto->mutable_enum_type(), [&] (const EnumDescriptorProto& enumDescriptorProto) { - return !enumTypeNames.contains(prefix + enumDescriptorProto.name()); - }); - - messageProto->clear_extension(); - StripUnknownOptions(messageProto->mutable_options()); - for (auto& fieldProto : *messageProto->mutable_field()) { - StripUnknownOptions(fieldProto.mutable_options()); - } - for (auto& oneofProto : *messageProto->mutable_oneof_decl()) { - StripUnknownOptions(oneofProto.mutable_options()); - } - for (auto& nestedTypeProto : *messageProto->mutable_nested_type()) { - Strip(prefix, &nestedTypeProto, messageTypeNames, enumTypeNames); - } - for (auto& enumProto : *messageProto->mutable_enum_type()) { - StripUnknownOptions(enumProto.mutable_options()); - for (auto& enumValue : *enumProto.mutable_value()) { - StripUnknownOptions(enumValue.mutable_options()); - } - } - } - - void Strip( - FileDescriptorProto* fileProto, - const THashSet<TString>& messageTypeNames, - const THashSet<TString>& enumTypeNames) - { - const auto prefix = fileProto->package().Empty() - ? "" - : fileProto->package() + '.'; - - RemoveIf(fileProto->mutable_message_type(), [&] (const DescriptorProto& descriptorProto) { - return !messageTypeNames.contains(prefix + descriptorProto.name()); - }); - RemoveIf(fileProto->mutable_enum_type(), [&] (const EnumDescriptorProto& enumDescriptorProto) { - return !enumTypeNames.contains(prefix + enumDescriptorProto.name()); - }); - - fileProto->clear_service(); - fileProto->clear_extension(); - - StripUnknownOptions(fileProto->mutable_options()); - for (auto& messageProto : *fileProto->mutable_message_type()) { - Strip(prefix, &messageProto, messageTypeNames, enumTypeNames); - } - for (auto& enumProto : *fileProto->mutable_enum_type()) { - StripUnknownOptions(enumProto.mutable_options()); - for (auto& enumValue : *enumProto.mutable_value()) { - StripUnknownOptions(enumValue.mutable_options()); - } - } - } - -private: - const FileDescriptor* const ExtensionFile_; - THashSet<const Descriptor*> AllDescriptors_; - THashSet<const EnumDescriptor*> EnumDescriptors_; -}; - -TNode MakeProtoFormatConfigWithDescriptors(const TVector<const Descriptor*>& descriptors) -{ - TFileDescriptorSetBuilder builder; - auto typeNames = TNode::CreateList(); - for (const auto* descriptor : descriptors) { - builder.AddDescriptor(descriptor); - typeNames.Add(descriptor->full_name()); - } - - auto fileDescriptorSetText = builder.Build().ShortDebugString(); - TNode config("protobuf"); - config.Attributes() - ("file_descriptor_set_text", std::move(fileDescriptorSetText)) - ("type_names", std::move(typeNames)); - return config; -} - -//////////////////////////////////////////////////////////////////////////////// - -using TTypePtrOrOtherColumns = std::variant<NTi::TTypePtr, TOtherColumns>; - -struct TMember { - TString Name; - TTypePtrOrOtherColumns TypeOrOtherColumns; -}; - -//////////////////////////////////////////////////////////////////////////////// - -TValueTypeOrOtherColumns GetScalarFieldType( - const FieldDescriptor& fieldDescriptor, - const TProtobufFieldOptions& options) -{ - if (options.Type) { - switch (*options.Type) { - case EProtobufType::EnumInt: - return EValueType::VT_INT64; - case EProtobufType::EnumString: - return EValueType::VT_STRING; - case EProtobufType::Any: - return EValueType::VT_ANY; - case EProtobufType::OtherColumns: - return TOtherColumns{}; - } - Y_FAIL(); - } - - switch (fieldDescriptor.cpp_type()) { - case FieldDescriptor::CPPTYPE_INT32: - return EValueType::VT_INT32; - case FieldDescriptor::CPPTYPE_INT64: - return EValueType::VT_INT64; - case FieldDescriptor::CPPTYPE_UINT32: - return EValueType::VT_UINT32; - case FieldDescriptor::CPPTYPE_UINT64: - return EValueType::VT_UINT64; - case FieldDescriptor::CPPTYPE_FLOAT: - case FieldDescriptor::CPPTYPE_DOUBLE: - return EValueType::VT_DOUBLE; - case FieldDescriptor::CPPTYPE_BOOL: - return EValueType::VT_BOOLEAN; - case FieldDescriptor::CPPTYPE_STRING: - case FieldDescriptor::CPPTYPE_MESSAGE: - case FieldDescriptor::CPPTYPE_ENUM: - return EValueType::VT_STRING; - default: - ythrow yexception() << - "Unexpected field type '" << fieldDescriptor.cpp_type_name() << "' " << - "for field " << fieldDescriptor.name(); - } -} - -bool HasNameExtension(const FieldDescriptor& fieldDescriptor) -{ - const auto& options = fieldDescriptor.options(); - return options.HasExtension(column_name) || options.HasExtension(key_column_name); -} - -void SortFields(TVector<const FieldDescriptor*>& fieldDescriptors, EProtobufFieldSortOrder fieldSortOrder) -{ - switch (fieldSortOrder) { - case EProtobufFieldSortOrder::AsInProtoFile: - return; - case EProtobufFieldSortOrder::ByFieldNumber: - SortBy(fieldDescriptors, [] (const FieldDescriptor* fieldDescriptor) { - return fieldDescriptor->number(); - }); - return; - } - Y_FAIL(); -} - -NTi::TTypePtr CreateStruct(TStringBuf fieldName, TVector<TMember> members) -{ - TVector<NTi::TStructType::TOwnedMember> structMembers; - structMembers.reserve(members.size()); - for (auto& member : members) { - std::visit(TOverloaded{ - [&] (TOtherColumns) { - ythrow TApiUsageError() << - "Could not deduce YT type for field " << member.Name << " of " << - "embedded message field " << fieldName << " " << - "(note that " << EWrapperFieldFlag::OTHER_COLUMNS << " fields " << - "are not allowed inside embedded messages)"; - }, - [&] (NTi::TTypePtr& type) { - structMembers.emplace_back(std::move(member.Name), std::move(type)); - }, - }, member.TypeOrOtherColumns); - } - return NTi::Struct(std::move(structMembers)); -} - -TMaybe<TVector<TString>> InferColumnFilter(const ::google::protobuf::Descriptor& descriptor) -{ - auto isOtherColumns = [] (const ::google::protobuf::FieldDescriptor& field) { - return GetFieldOptions(&field).Type == EProtobufType::OtherColumns; - }; - - TVector<TString> result; - result.reserve(descriptor.field_count()); - for (int i = 0; i < descriptor.field_count(); ++i) { - const auto& field = *descriptor.field(i); - if (isOtherColumns(field)) { - return {}; - } - result.push_back(GetColumnName(field)); - } - return result; -} - -//////////////////////////////////////////////////////////////////////////////// - -class TTableSchemaInferrer -{ -public: - TTableSchemaInferrer(bool keepFieldsWithoutExtension) - : KeepFieldsWithoutExtension_(keepFieldsWithoutExtension) - { } - - TTableSchema InferSchema(const Descriptor& messageDescriptor); - -private: - TTypePtrOrOtherColumns GetFieldType( - const FieldDescriptor& fieldDescriptor, - const TProtobufFieldOptions& defaultOptions); - - void ProcessOneofField( - TStringBuf containingFieldName, - const OneofDescriptor& oneofDescriptor, - const TProtobufFieldOptions& defaultFieldOptions, - const TProtobufOneofOptions& defaultOneofOptions, - EProtobufFieldSortOrder fieldSortOrder, - TVector<TMember>* members); - - TVector<TMember> GetMessageMembers( - TStringBuf containingFieldName, - const Descriptor& fieldDescriptor, - TProtobufFieldOptions defaultFieldOptions, - std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder = std::nullopt); - - NTi::TTypePtr GetMessageType( - const FieldDescriptor& fieldDescriptor, - TProtobufFieldOptions defaultFieldOptions); - - NTi::TTypePtr GetMapType( - const FieldDescriptor& fieldDescriptor, - const TProtobufFieldOptions& fieldOptions); - -private: - void GetMessageMembersImpl( - TStringBuf containingFieldName, - const Descriptor& fieldDescriptor, - TProtobufFieldOptions defaultFieldOptions, - std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder, - TVector<TMember>* members); - -private: - const bool KeepFieldsWithoutExtension_; - TCycleChecker CycleChecker_; -}; - -void TTableSchemaInferrer::ProcessOneofField( - TStringBuf containingFieldName, - const OneofDescriptor& oneofDescriptor, - const TProtobufFieldOptions& defaultFieldOptions, - const TProtobufOneofOptions& defaultOneofOptions, - EProtobufFieldSortOrder fieldSortOrder, - TVector<TMember>* members) -{ - auto oneofOptions = GetOneofOptions(&oneofDescriptor, defaultOneofOptions); - - auto addFields = [&] (TVector<TMember>* members, bool removeOptionality) { - TVector<const FieldDescriptor*> fieldDescriptors; - for (int i = 0; i < oneofDescriptor.field_count(); ++i) { - fieldDescriptors.push_back(oneofDescriptor.field(i)); - } - SortFields(fieldDescriptors, fieldSortOrder); - for (auto innerFieldDescriptor : fieldDescriptors) { - auto typeOrOtherColumns = GetFieldType( - *innerFieldDescriptor, - defaultFieldOptions); - if (auto* maybeType = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns); - maybeType && removeOptionality && (*maybeType)->IsOptional()) - { - typeOrOtherColumns = (*maybeType)->AsOptional()->GetItemType(); - } - members->push_back(TMember{ - GetColumnName(*innerFieldDescriptor), - std::move(typeOrOtherColumns), - }); - } - }; - - switch (oneofOptions.Mode) { - case EProtobufOneofMode::SeparateFields: - addFields(members, /* removeOptionality */ false); - return; - case EProtobufOneofMode::Variant: { - TVector<TMember> variantMembers; - addFields(&variantMembers, /* removeOptionality */ true); - members->push_back(TMember{ - oneofOptions.VariantFieldName, - NTi::Optional( - NTi::Variant( - CreateStruct(containingFieldName, std::move(variantMembers)) - ) - ) - }); - return; - } - } - Y_FAIL(); -} - -TVector<TMember> TTableSchemaInferrer::GetMessageMembers( - TStringBuf containingFieldName, - const Descriptor& messageDescriptor, - TProtobufFieldOptions defaultFieldOptions, - std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder) -{ - TVector<TMember> members; - GetMessageMembersImpl( - containingFieldName, - messageDescriptor, - defaultFieldOptions, - overrideFieldSortOrder, - &members - ); - return members; -} - -void TTableSchemaInferrer::GetMessageMembersImpl( - TStringBuf containingFieldName, - const Descriptor& messageDescriptor, - TProtobufFieldOptions defaultFieldOptions, - std::optional<EProtobufFieldSortOrder> overrideFieldSortOrder, - TVector<TMember>* members) -{ - auto guard = CycleChecker_.Enter(&messageDescriptor); - defaultFieldOptions = GetDefaultFieldOptions(&messageDescriptor, defaultFieldOptions); - auto messageOptions = GetMessageOptions(&messageDescriptor); - auto defaultOneofOptions = GetDefaultOneofOptions(&messageDescriptor); - - TVector<const FieldDescriptor*> fieldDescriptors; - fieldDescriptors.reserve(messageDescriptor.field_count()); - for (int i = 0; i < messageDescriptor.field_count(); ++i) { - if (!KeepFieldsWithoutExtension_ && !HasNameExtension(*messageDescriptor.field(i))) { - continue; - } - fieldDescriptors.push_back(messageDescriptor.field(i)); - } - - auto fieldSortOrder = overrideFieldSortOrder.value_or(messageOptions.FieldSortOrder); - SortFields(fieldDescriptors, fieldSortOrder); - - THashSet<const OneofDescriptor*> visitedOneofs; - for (const auto innerFieldDescriptor : fieldDescriptors) { - auto oneofDescriptor = innerFieldDescriptor->containing_oneof(); - if (oneofDescriptor) { - if (visitedOneofs.contains(oneofDescriptor)) { - continue; - } - ProcessOneofField( - containingFieldName, - *oneofDescriptor, - defaultFieldOptions, - defaultOneofOptions, - messageOptions.FieldSortOrder, - members); - visitedOneofs.insert(oneofDescriptor); - continue; - } - auto fieldOptions = GetFieldOptions(innerFieldDescriptor, defaultFieldOptions); - if (fieldOptions.SerializationMode == EProtobufSerializationMode::Embedded) { - Y_ENSURE(innerFieldDescriptor->type() == FieldDescriptor::TYPE_MESSAGE, - "EMBEDDED column must have message type"); - Y_ENSURE(innerFieldDescriptor->label() == FieldDescriptor::LABEL_REQUIRED, - "EMBEDDED column must be marked required"); - GetMessageMembersImpl( - innerFieldDescriptor->full_name(), - *innerFieldDescriptor->message_type(), - defaultFieldOptions, - /*overrideFieldSortOrder*/ std::nullopt, - members); - } else { - auto typeOrOtherColumns = GetFieldType( - *innerFieldDescriptor, - defaultFieldOptions); - members->push_back(TMember{ - GetColumnName(*innerFieldDescriptor), - std::move(typeOrOtherColumns), - }); - } - } -} - -NTi::TTypePtr TTableSchemaInferrer::GetMessageType( - const FieldDescriptor& fieldDescriptor, - TProtobufFieldOptions defaultFieldOptions) -{ - Y_VERIFY(fieldDescriptor.message_type()); - const auto& messageDescriptor = *fieldDescriptor.message_type(); - auto members = GetMessageMembers( - fieldDescriptor.full_name(), - messageDescriptor, - defaultFieldOptions); - - return CreateStruct(fieldDescriptor.full_name(), std::move(members)); -} - -NTi::TTypePtr TTableSchemaInferrer::GetMapType( - const FieldDescriptor& fieldDescriptor, - const TProtobufFieldOptions& fieldOptions) -{ - Y_VERIFY(fieldDescriptor.is_map()); - switch (fieldOptions.MapMode) { - case EProtobufMapMode::ListOfStructsLegacy: - case EProtobufMapMode::ListOfStructs: { - TProtobufFieldOptions embeddedOptions; - if (fieldOptions.MapMode == EProtobufMapMode::ListOfStructs) { - embeddedOptions.SerializationMode = EProtobufSerializationMode::Yt; - } - auto list = NTi::List(GetMessageType(fieldDescriptor, embeddedOptions)); - switch (fieldOptions.ListMode) { - case EProtobufListMode::Required: - return list; - case EProtobufListMode::Optional: - return NTi::Optional(std::move(list)); - } - Y_FAIL(); - } - case EProtobufMapMode::Dict: - case EProtobufMapMode::OptionalDict: { - auto message = fieldDescriptor.message_type(); - Y_VERIFY(message->field_count() == 2); - auto keyVariant = GetScalarFieldType(*message->field(0), TProtobufFieldOptions{}); - Y_VERIFY(std::holds_alternative<EValueType>(keyVariant)); - auto key = std::get<EValueType>(keyVariant); - TProtobufFieldOptions embeddedOptions; - embeddedOptions.SerializationMode = EProtobufSerializationMode::Yt; - auto valueVariant = GetFieldType(*message->field(1), embeddedOptions); - Y_VERIFY(std::holds_alternative<NTi::TTypePtr>(valueVariant)); - auto value = std::get<NTi::TTypePtr>(valueVariant); - Y_VERIFY(value->IsOptional()); - value = value->AsOptional()->GetItemType(); - auto dict = NTi::Dict(ToTypeV3(key, true), value); - if (fieldOptions.MapMode == EProtobufMapMode::OptionalDict) { - return NTi::Optional(dict); - } else { - return dict; - } - } - } -} - -TTypePtrOrOtherColumns TTableSchemaInferrer::GetFieldType( - const FieldDescriptor& fieldDescriptor, - const TProtobufFieldOptions& defaultOptions) -{ - auto fieldOptions = GetFieldOptions(&fieldDescriptor, defaultOptions); - if (fieldOptions.Type) { - ValidateProtobufType(fieldDescriptor, *fieldOptions.Type); - } - - auto getScalarType = [&] { - auto valueTypeOrOtherColumns = GetScalarFieldType(fieldDescriptor, fieldOptions); - return std::visit(TOverloaded{ - [] (TOtherColumns) -> TTypePtrOrOtherColumns { - return TOtherColumns{}; - }, - [] (EValueType valueType) -> TTypePtrOrOtherColumns { - return ToTypeV3(valueType, true); - } - }, valueTypeOrOtherColumns); - }; - - auto withFieldLabel = [&] (const TTypePtrOrOtherColumns& typeOrOtherColumns) -> TTypePtrOrOtherColumns { - switch (fieldDescriptor.label()) { - case FieldDescriptor::Label::LABEL_REPEATED: { - Y_ENSURE(fieldOptions.SerializationMode == EProtobufSerializationMode::Yt, - "Repeated fields are supported only for YT serialization mode, field \"" + fieldDescriptor.full_name() + - "\" has incorrect serialization mode"); - auto* type = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns); - Y_ENSURE(type, "OTHER_COLUMNS field can not be repeated"); - switch (fieldOptions.ListMode) { - case EProtobufListMode::Required: - return NTi::TTypePtr(NTi::List(*type)); - case EProtobufListMode::Optional: - return NTi::TTypePtr(NTi::Optional(NTi::List(*type))); - } - Y_FAIL(); - } - case FieldDescriptor::Label::LABEL_OPTIONAL: - return std::visit(TOverloaded{ - [] (TOtherColumns) -> TTypePtrOrOtherColumns { - return TOtherColumns{}; - }, - [] (NTi::TTypePtr type) -> TTypePtrOrOtherColumns { - return NTi::TTypePtr(NTi::Optional(std::move(type))); - } - }, typeOrOtherColumns); - case FieldDescriptor::LABEL_REQUIRED: { - auto* type = std::get_if<NTi::TTypePtr>(&typeOrOtherColumns); - Y_ENSURE(type, "OTHER_COLUMNS field can not be required"); - return *type; - } - } - Y_FAIL(); - }; - - switch (fieldOptions.SerializationMode) { - case EProtobufSerializationMode::Protobuf: - return withFieldLabel(getScalarType()); - case EProtobufSerializationMode::Yt: - if (fieldDescriptor.type() == FieldDescriptor::TYPE_MESSAGE) { - if (fieldDescriptor.is_map()) { - return GetMapType(fieldDescriptor, fieldOptions); - } else { - return withFieldLabel(GetMessageType(fieldDescriptor, TProtobufFieldOptions{})); - } - } else { - return withFieldLabel(getScalarType()); - } - case EProtobufSerializationMode::Embedded: - ythrow yexception() << "EMBEDDED field is not allowed for field " - << fieldDescriptor.full_name(); - } - Y_FAIL(); -} - -TTableSchema TTableSchemaInferrer::InferSchema(const Descriptor& messageDescriptor) -{ - TTableSchema result; - - auto defaultFieldOptions = GetDefaultFieldOptions(&messageDescriptor); - auto members = GetMessageMembers( - messageDescriptor.full_name(), - messageDescriptor, - defaultFieldOptions, - // Use special sort order for top level messages. - /*overrideFieldSortOrder*/ EProtobufFieldSortOrder::AsInProtoFile); - - for (auto& member : members) { - std::visit(TOverloaded{ - [&] (TOtherColumns) { - result.Strict(false); - }, - [&] (NTi::TTypePtr& type) { - result.AddColumn(TColumnSchema() - .Name(std::move(member.Name)) - .Type(std::move(type)) - ); - }, - }, member.TypeOrOtherColumns); - } - - return result; -} - -TTableSchema CreateTableSchemaImpl( - const Descriptor& messageDescriptor, - bool keepFieldsWithoutExtension) -{ - TTableSchemaInferrer inferrer(keepFieldsWithoutExtension); - return inferrer.InferSchema(messageDescriptor); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail - -//////////////////////////////////////////////////////////////////////////////// - -template <> -void Out<NYT::EWrapperFieldFlag::Enum>(IOutputStream& stream, NYT::EWrapperFieldFlag::Enum value) -{ - stream << NYT::EWrapperFieldFlag_Enum_Name(value); -} - -template <> -void Out<NYT::EWrapperMessageFlag::Enum>(IOutputStream& stream, NYT::EWrapperMessageFlag::Enum value) -{ - stream << NYT::EWrapperMessageFlag_Enum_Name(value); -} - -template <> -void Out<NYT::EWrapperOneofFlag::Enum>(IOutputStream& stream, NYT::EWrapperOneofFlag::Enum value) -{ - stream << NYT::EWrapperOneofFlag_Enum_Name(value); -} diff --git a/yt/cpp/mapreduce/interface/protobuf_format.h b/yt/cpp/mapreduce/interface/protobuf_format.h deleted file mode 100644 index aafbced3869..00000000000 --- a/yt/cpp/mapreduce/interface/protobuf_format.h +++ /dev/null @@ -1,106 +0,0 @@ -#pragma once - -#include "common.h" - -#include <yt/yt_proto/yt/formats/extension.pb.h> - -#include <util/generic/maybe.h> - -#include <google/protobuf/message.h> - -/// @cond Doxygen_Suppress -namespace NYT::NDetail { - -//////////////////////////////////////////////////////////////////////////////// - -enum class EProtobufType -{ - EnumInt /* "enum_int" */, - EnumString /* "enum_string" */, - Any /* "any" */, - OtherColumns /* "other_columns" */, -}; - -enum class EProtobufSerializationMode -{ - Protobuf, - Yt, - Embedded, -}; - -enum class EProtobufListMode -{ - Optional, - Required, -}; - -enum class EProtobufMapMode -{ - ListOfStructsLegacy, - ListOfStructs, - Dict, - OptionalDict, -}; - -enum class EProtobufFieldSortOrder -{ - AsInProtoFile, - ByFieldNumber, -}; - -enum class EProtobufOneofMode -{ - SeparateFields, - Variant, -}; - -enum class EProtobufEnumWritingMode -{ - SkipUnknownValues, - CheckValues, -}; - -struct TProtobufOneofOptions -{ - EProtobufOneofMode Mode = EProtobufOneofMode::Variant; - TString VariantFieldName; -}; - -struct TProtobufFieldOptions -{ - TMaybe<EProtobufType> Type; - EProtobufSerializationMode SerializationMode = EProtobufSerializationMode::Protobuf; - EProtobufListMode ListMode = EProtobufListMode::Required; - EProtobufMapMode MapMode = EProtobufMapMode::ListOfStructsLegacy; -}; - -struct TProtobufMessageOptions -{ - EProtobufFieldSortOrder FieldSortOrder = EProtobufFieldSortOrder::ByFieldNumber; -}; - -TString GetColumnName(const ::google::protobuf::FieldDescriptor& field); - -TProtobufFieldOptions GetFieldOptions( - const ::google::protobuf::FieldDescriptor* fieldDescriptor, - const TMaybe<TProtobufFieldOptions>& defaultFieldOptions = {}); - -TProtobufOneofOptions GetOneofOptions( - const ::google::protobuf::OneofDescriptor* oneofDescriptor, - const TMaybe<TProtobufOneofOptions>& defaultOneofOptions = {}); - -TProtobufMessageOptions GetMessageOptions(const ::google::protobuf::Descriptor* descriptor); - -TMaybe<TVector<TString>> InferColumnFilter(const ::google::protobuf::Descriptor& descriptor); - -TNode MakeProtoFormatConfigWithTables(const TVector<const ::google::protobuf::Descriptor*>& descriptors); -TNode MakeProtoFormatConfigWithDescriptors(const TVector<const ::google::protobuf::Descriptor*>& descriptors); - -TTableSchema CreateTableSchemaImpl( - const ::google::protobuf::Descriptor& messageDescriptor, - bool keepFieldsWithoutExtension); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NDetail -/// @endcond diff --git a/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.cpp b/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.cpp deleted file mode 100644 index 19a3d5163f7..00000000000 --- a/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.cpp +++ /dev/null @@ -1,451 +0,0 @@ -#include "common.h" -#include "errors.h" -#include "common_ut.h" -#include "util/generic/fwd.h" - -#include <yt/cpp/mapreduce/interface/protobuf_table_schema_ut.pb.h> -#include <yt/cpp/mapreduce/interface/proto3_ut.pb.h> - -#include <yt/cpp/mapreduce/tests/yt_unittest_lib/yt_unittest_lib.h> - -#include <library/cpp/testing/unittest/registar.h> - -#include <algorithm> - -using namespace NYT; - -bool IsFieldPresent(const TTableSchema& schema, TStringBuf name) -{ - for (const auto& field : schema.Columns()) { - if (field.Name() == name) { - return true; - } - } - return false; -} - -Y_UNIT_TEST_SUITE(ProtoSchemaTest_Simple) -{ - Y_UNIT_TEST(TIntegral) - { - const auto schema = CreateTableSchema<NUnitTesting::TIntegral>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("DoubleField").Type(ToTypeV3(EValueType::VT_DOUBLE, false))) - .AddColumn(TColumnSchema().Name("FloatField").Type(ToTypeV3(EValueType::VT_DOUBLE, false))) - .AddColumn(TColumnSchema().Name("Int32Field").Type(ToTypeV3(EValueType::VT_INT32, false))) - .AddColumn(TColumnSchema().Name("Int64Field").Type(ToTypeV3(EValueType::VT_INT64, false))) - .AddColumn(TColumnSchema().Name("Uint32Field").Type(ToTypeV3(EValueType::VT_UINT32, false))) - .AddColumn(TColumnSchema().Name("Uint64Field").Type(ToTypeV3(EValueType::VT_UINT64, false))) - .AddColumn(TColumnSchema().Name("Sint32Field").Type(ToTypeV3(EValueType::VT_INT32, false))) - .AddColumn(TColumnSchema().Name("Sint64Field").Type(ToTypeV3(EValueType::VT_INT64, false))) - .AddColumn(TColumnSchema().Name("Fixed32Field").Type(ToTypeV3(EValueType::VT_UINT32, false))) - .AddColumn(TColumnSchema().Name("Fixed64Field").Type(ToTypeV3(EValueType::VT_UINT64, false))) - .AddColumn(TColumnSchema().Name("Sfixed32Field").Type(ToTypeV3(EValueType::VT_INT32, false))) - .AddColumn(TColumnSchema().Name("Sfixed64Field").Type(ToTypeV3(EValueType::VT_INT64, false))) - .AddColumn(TColumnSchema().Name("BoolField").Type(ToTypeV3(EValueType::VT_BOOLEAN, false))) - .AddColumn(TColumnSchema().Name("EnumField").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(TOneOf) - { - const auto schema = CreateTableSchema<NUnitTesting::TOneOf>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("DoubleField").Type(ToTypeV3(EValueType::VT_DOUBLE, false))) - .AddColumn(TColumnSchema().Name("Int32Field").Type(ToTypeV3(EValueType::VT_INT32, false))) - .AddColumn(TColumnSchema().Name("BoolField").Type(ToTypeV3(EValueType::VT_BOOLEAN, false)))); - } - - Y_UNIT_TEST(TWithRequired) - { - const auto schema = CreateTableSchema<NUnitTesting::TWithRequired>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("RequiredField").Type(ToTypeV3(EValueType::VT_STRING, true))) - .AddColumn(TColumnSchema().Name("NotRequiredField").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(TAggregated) - { - const auto schema = CreateTableSchema<NUnitTesting::TAggregated>(); - - UNIT_ASSERT_VALUES_EQUAL(6, schema.Columns().size()); - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("StringField").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("BytesField").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("NestedField").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("NestedRepeatedField").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("NestedOneOfField").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("NestedRecursiveField").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(TAliased) - { - const auto schema = CreateTableSchema<NUnitTesting::TAliased>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("key").Type(ToTypeV3(EValueType::VT_INT32, false))) - .AddColumn(TColumnSchema().Name("subkey").Type(ToTypeV3(EValueType::VT_DOUBLE, false))) - .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(SortColumns) - { - const TSortColumns keys = {"key", "subkey"}; - - const auto schema = CreateTableSchema<NUnitTesting::TAliased>(keys); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("key") - .Type(ToTypeV3(EValueType::VT_INT32, false)) - .SortOrder(ESortOrder::SO_ASCENDING)) - .AddColumn(TColumnSchema() - .Name("subkey") - .Type(ToTypeV3(EValueType::VT_DOUBLE, false)) - .SortOrder(ESortOrder::SO_ASCENDING)) - .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(SortColumnsReordered) - { - const TSortColumns keys = {"subkey"}; - - const auto schema = CreateTableSchema<NUnitTesting::TAliased>(keys); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("subkey") - .Type(ToTypeV3(EValueType::VT_DOUBLE, false)) - .SortOrder(ESortOrder::SO_ASCENDING)) - .AddColumn(TColumnSchema().Name("key").Type(ToTypeV3(EValueType::VT_INT32, false))) - .AddColumn(TColumnSchema().Name("Data").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(SortColumnsInvalid) - { - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TAliased>({"subkey", "subkey"}), yexception); - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TAliased>({"key", "junk"}), yexception); - } - - Y_UNIT_TEST(KeepFieldsWithoutExtensionTrue) - { - const auto schema = CreateTableSchema<NUnitTesting::TAliased>({}, true); - UNIT_ASSERT(IsFieldPresent(schema, "key")); - UNIT_ASSERT(IsFieldPresent(schema, "subkey")); - UNIT_ASSERT(IsFieldPresent(schema, "Data")); - UNIT_ASSERT(schema.Strict()); - } - - Y_UNIT_TEST(KeepFieldsWithoutExtensionFalse) - { - const auto schema = CreateTableSchema<NUnitTesting::TAliased>({}, false); - UNIT_ASSERT(IsFieldPresent(schema, "key")); - UNIT_ASSERT(IsFieldPresent(schema, "subkey")); - UNIT_ASSERT(!IsFieldPresent(schema, "Data")); - UNIT_ASSERT(schema.Strict()); - } - - Y_UNIT_TEST(ProtobufTypeOption) - { - const auto schema = CreateTableSchema<NUnitTesting::TWithTypeOptions>({}); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .Strict(false) - .AddColumn(TColumnSchema().Name("ColorIntField").Type(ToTypeV3(EValueType::VT_INT64, false))) - .AddColumn(TColumnSchema().Name("ColorStringField").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("AnyField").Type(ToTypeV3(EValueType::VT_ANY, false))) - .AddColumn(TColumnSchema().Name("EmbeddedField").Type( - NTi::Optional(NTi::Struct({ - {"ColorIntField", ToTypeV3(EValueType::VT_INT64, false)}, - {"ColorStringField", ToTypeV3(EValueType::VT_STRING, false)}, - {"AnyField", ToTypeV3(EValueType::VT_ANY, false)}})))) - .AddColumn(TColumnSchema().Name("RepeatedEnumIntField").Type(NTi::List(NTi::Int64())))); - } - - Y_UNIT_TEST(ProtobufTypeOption_TypeMismatch) - { - UNIT_ASSERT_EXCEPTION( - CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_EnumInt>({}), - yexception); - UNIT_ASSERT_EXCEPTION( - CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_EnumString>({}), - yexception); - UNIT_ASSERT_EXCEPTION( - CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_Any>({}), - yexception); - UNIT_ASSERT_EXCEPTION( - CreateTableSchema<NUnitTesting::TWithTypeOptions_TypeMismatch_OtherColumns>({}), - yexception); - } -} - -Y_UNIT_TEST_SUITE(ProtoSchemaTest_Complex) -{ - Y_UNIT_TEST(TRepeated) - { - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TRepeated>(), yexception); - - const auto schema = CreateTableSchema<NUnitTesting::TRepeatedYtMode>(); - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("Int32Field").Type(NTi::List(ToTypeV3(EValueType::VT_INT32, true))))); - } - - Y_UNIT_TEST(TRepeatedOptionalList) - { - const auto schema = CreateTableSchema<NUnitTesting::TOptionalList>(); - auto type = NTi::Optional(NTi::List(NTi::Int64())); - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("OptionalListInt64").TypeV3(type))); - } - - NTi::TTypePtr GetUrlRowType(bool required) - { - static const NTi::TTypePtr structType = NTi::Struct({ - {"Host", ToTypeV3(EValueType::VT_STRING, false)}, - {"Path", ToTypeV3(EValueType::VT_STRING, false)}, - {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)}}); - return required ? structType : NTi::TTypePtr(NTi::Optional(structType)); - } - - Y_UNIT_TEST(TRowFieldSerializationOption) - { - const auto schema = CreateTableSchema<NUnitTesting::TRowFieldSerializationOption>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false))) - .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(TRowMessageSerializationOption) - { - const auto schema = CreateTableSchema<NUnitTesting::TRowMessageSerializationOption>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false))) - .AddColumn(TColumnSchema().Name("UrlRow_2").Type(GetUrlRowType(false)))); - } - - Y_UNIT_TEST(TRowMixedSerializationOptions) - { - const auto schema = CreateTableSchema<NUnitTesting::TRowMixedSerializationOptions>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType(false))) - .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - NTi::TTypePtr GetUrlRowType_ColumnNames(bool required) - { - static const NTi::TTypePtr type = NTi::Struct({ - {"Host_ColumnName", ToTypeV3(EValueType::VT_STRING, false)}, - {"Path_KeyColumnName", ToTypeV3(EValueType::VT_STRING, false)}, - {"HttpCode", ToTypeV3(EValueType::VT_INT32, false)}, - }); - return required ? type : NTi::TTypePtr(NTi::Optional(type)); - } - - Y_UNIT_TEST(TRowMixedSerializationOptions_ColumnNames) - { - const auto schema = CreateTableSchema<NUnitTesting::TRowMixedSerializationOptions_ColumnNames>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("UrlRow_1").Type(GetUrlRowType_ColumnNames(false))) - .AddColumn(TColumnSchema().Name("UrlRow_2").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(NoOptionInheritance) - { - auto deepestEmbedded = NTi::Optional(NTi::Struct({{"x", ToTypeV3(EValueType::VT_INT64, false)}})); - - const auto schema = CreateTableSchema<NUnitTesting::TNoOptionInheritance>(); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("EmbeddedYt_YtOption") - .Type(NTi::Optional(NTi::Struct({{"embedded", deepestEmbedded}})))) - .AddColumn(TColumnSchema().Name("EmbeddedYt_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("EmbeddedYt_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema() - .Name("EmbeddedProtobuf_YtOption") - .Type(NTi::Optional(NTi::Struct({{"embedded", ToTypeV3(EValueType::VT_STRING, false)}})))) - .AddColumn(TColumnSchema().Name("EmbeddedProtobuf_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("EmbeddedProtobuf_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema() - .Name("Embedded_YtOption") - .Type(NTi::Optional(NTi::Struct({{"embedded", ToTypeV3(EValueType::VT_STRING, false)}})))) - .AddColumn(TColumnSchema().Name("Embedded_ProtobufOption").Type(ToTypeV3(EValueType::VT_STRING, false))) - .AddColumn(TColumnSchema().Name("Embedded_NoOption").Type(ToTypeV3(EValueType::VT_STRING, false)))); - } - - Y_UNIT_TEST(Cyclic) - { - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic::TA>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic::TB>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic::TC>(), TApiUsageError); - UNIT_ASSERT_EXCEPTION(CreateTableSchema<NUnitTesting::TCyclic::TD>(), TApiUsageError); - - ASSERT_SERIALIZABLES_EQUAL( - TTableSchema().AddColumn( - TColumnSchema().Name("d").TypeV3(NTi::Optional(NTi::String()))), - CreateTableSchema<NUnitTesting::TCyclic::TE>()); - } - - Y_UNIT_TEST(FieldSortOrder) - { - const auto schema = CreateTableSchema<NUnitTesting::TFieldSortOrder>(); - - auto byFieldNumber = NTi::Optional(NTi::Struct({ - {"z", NTi::Optional(NTi::Bool())}, - {"x", NTi::Optional(NTi::Int64())}, - {"y", NTi::Optional(NTi::String())}, - })); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema().Name("EmbeddedDefault").Type(byFieldNumber)) - .AddColumn(TColumnSchema() - .Name("EmbeddedAsInProtoFile") - .Type(NTi::Optional(NTi::Struct({ - {"x", NTi::Optional(NTi::Int64())}, - {"y", NTi::Optional(NTi::String())}, - {"z", NTi::Optional(NTi::Bool())}, - })))) - .AddColumn(TColumnSchema().Name("EmbeddedByFieldNumber").Type(byFieldNumber))); - } - - Y_UNIT_TEST(Map) - { - const auto schema = CreateTableSchema<NUnitTesting::TWithMap>(); - - auto createKeyValueStruct = [] (NTi::TTypePtr key, NTi::TTypePtr value) { - return NTi::List(NTi::Struct({ - {"key", NTi::Optional(key)}, - {"value", NTi::Optional(value)}, - })); - }; - - auto embedded = NTi::Struct({ - {"x", NTi::Optional(NTi::Int64())}, - {"y", NTi::Optional(NTi::String())}, - }); - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("MapDefault") - .Type(createKeyValueStruct(NTi::Int64(), NTi::String()))) - .AddColumn(TColumnSchema() - .Name("MapListOfStructsLegacy") - .Type(createKeyValueStruct(NTi::Int64(), NTi::String()))) - .AddColumn(TColumnSchema() - .Name("MapListOfStructs") - .Type(createKeyValueStruct(NTi::Int64(), embedded))) - .AddColumn(TColumnSchema() - .Name("MapOptionalDict") - .Type(NTi::Optional(NTi::Dict(NTi::Int64(), embedded)))) - .AddColumn(TColumnSchema() - .Name("MapDict") - .Type(NTi::Dict(NTi::Int64(), embedded)))); - } - - Y_UNIT_TEST(Oneof) - { - const auto schema = CreateTableSchema<NUnitTesting::TWithOneof>(); - - auto embedded = NTi::Struct({ - {"Oneof", NTi::Optional(NTi::Variant(NTi::Struct({ - {"x", NTi::Int64()}, - {"y", NTi::String()}, - })))}, - }); - - auto createType = [&] (TString oneof2Name) { - return NTi::Optional(NTi::Struct({ - {"field", NTi::Optional(NTi::String())}, - {oneof2Name, NTi::Optional(NTi::Variant(NTi::Struct({ - {"x2", NTi::Int64()}, - {"y2", NTi::String()}, - {"z2", embedded}, - })))}, - {"y1", NTi::Optional(NTi::String())}, - {"z1", NTi::Optional(embedded)}, - {"x1", NTi::Optional(NTi::Int64())}, - })); - }; - - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("DefaultSeparateFields") - .Type(createType("variant_field_name"))) - .AddColumn(TColumnSchema() - .Name("NoDefault") - .Type(createType("Oneof2"))) - .AddColumn(TColumnSchema() - .Name("SerializationProtobuf") - .Type(NTi::Optional(NTi::Struct({ - {"y1", NTi::Optional(NTi::String())}, - {"x1", NTi::Optional(NTi::Int64())}, - {"z1", NTi::Optional(NTi::String())}, - })))) - .AddColumn(TColumnSchema() - .Name("TopLevelOneof") - .Type( - NTi::Optional( - NTi::Variant(NTi::Struct({ - {"MemberOfTopLevelOneof", NTi::Int64()} - })) - ) - )) - ); - } - - Y_UNIT_TEST(Embedded) - { - const auto schema = CreateTableSchema<NUnitTesting::TEmbeddingMessage>(); - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .Strict(false) - .AddColumn(TColumnSchema().Name("embedded2_num").Type(NTi::Optional(NTi::Uint64()))) - .AddColumn(TColumnSchema().Name("embedded2_struct").Type(NTi::Optional(NTi::Struct({ - {"float1", NTi::Optional(NTi::Double())}, - {"string1", NTi::Optional(NTi::String())}, - })))) - .AddColumn(TColumnSchema().Name("embedded2_repeated").Type(NTi::List(NTi::String()))) - .AddColumn(TColumnSchema().Name("embedded_num").Type(NTi::Optional(NTi::Uint64()))) - .AddColumn(TColumnSchema().Name("embedded_extra_field").Type(NTi::Optional(NTi::String()))) - .AddColumn(TColumnSchema().Name("variant").Type(NTi::Optional(NTi::Variant(NTi::Struct({ - {"str_variant", NTi::String()}, - {"uint_variant", NTi::Uint64()}, - }))))) - .AddColumn(TColumnSchema().Name("num").Type(NTi::Optional(NTi::Uint64()))) - .AddColumn(TColumnSchema().Name("extra_field").Type(NTi::Optional(NTi::String()))) - ); - } -} - -Y_UNIT_TEST_SUITE(ProtoSchemaTest_Proto3) -{ - Y_UNIT_TEST(TWithOptional) - { - const auto schema = CreateTableSchema<NTestingProto3::TWithOptional>(); - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("x").Type(NTi::Optional(NTi::Int64())) - ) - ); - } - - Y_UNIT_TEST(TWithOptionalMessage) - { - const auto schema = CreateTableSchema<NTestingProto3::TWithOptionalMessage>(); - ASSERT_SERIALIZABLES_EQUAL(schema, TTableSchema() - .AddColumn(TColumnSchema() - .Name("x").Type( - NTi::Optional( - NTi::Struct({{"x", NTi::Optional(NTi::Int64())}}) - ) - ) - ) - ); - } -} diff --git a/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.proto b/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.proto deleted file mode 100644 index 60bad6e650c..00000000000 --- a/yt/cpp/mapreduce/interface/protobuf_table_schema_ut.proto +++ /dev/null @@ -1,402 +0,0 @@ -import "yt/yt_proto/yt/formats/extension.proto"; - -package NYT.NUnitTesting; - -message TIntegral -{ - optional double DoubleField = 1; - optional float FloatField = 2; - optional int32 Int32Field = 3; - optional int64 Int64Field = 4; - optional uint32 Uint32Field = 5; - optional uint64 Uint64Field = 6; - optional sint32 Sint32Field = 7; - optional sint64 Sint64Field = 8; - optional fixed32 Fixed32Field = 9; - optional fixed64 Fixed64Field = 10; - optional sfixed32 Sfixed32Field = 11; - optional sfixed64 Sfixed64Field = 12; - optional bool BoolField = 13; - enum TriBool - { - TRI_FALSE = 0; - TRI_TRUE = 1; - TRI_UNDEF = -1; - } - optional TriBool EnumField = 14; -} - -message TRepeated -{ - repeated int32 Int32Field = 1; -} - -message TRepeatedYtMode -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - repeated int32 Int32Field = 1; -} - -message TWithTypeOptions -{ - enum Color - { - WHITE = 0; - BLUE = 1; - RED = -1; - } - - message TEmbedded - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - - optional Color ColorIntField = 1 [(NYT.flags) = ENUM_INT]; - optional Color ColorStringField = 2 [(NYT.flags) = ENUM_STRING]; - optional bytes AnyField = 3 [(NYT.flags) = ANY]; - } - - optional Color ColorIntField = 1 [(NYT.flags) = ENUM_INT]; - optional Color ColorStringField = 2 [(NYT.flags) = ENUM_STRING]; - optional bytes AnyField = 3 [(NYT.flags) = ANY]; - optional bytes OtherColumnsField = 4 [(NYT.flags) = OTHER_COLUMNS]; - optional TEmbedded EmbeddedField = 5 [(NYT.flags) = SERIALIZATION_YT]; - repeated Color RepeatedEnumIntField = 6 [(NYT.flags) = SERIALIZATION_YT, (NYT.flags) = ENUM_INT]; -} - -message TWithTypeOptions_TypeMismatch_EnumInt -{ - optional int64 EnumField = 1 [(NYT.flags) = ENUM_INT]; -} - -message TWithTypeOptions_TypeMismatch_EnumString -{ - optional string EnumField = 1 [(NYT.flags) = ENUM_STRING]; -} - -message TWithTypeOptions_TypeMismatch_Any -{ - optional string AnyField = 1 [(NYT.flags) = ANY]; -} - -message TWithTypeOptions_TypeMismatch_OtherColumns -{ - optional string OtherColumnsField = 1 [(NYT.flags) = OTHER_COLUMNS]; -} - -message TOneOf -{ - oneof Chooser - { - double DoubleField = 1; - int32 Int32Field = 2; - } - optional bool BoolField = 3; -} - -message TWithRequired -{ - required string RequiredField = 1; - optional string NotRequiredField = 2; -}; - -message TAggregated -{ - optional string StringField = 1; - optional bytes BytesField = 2; - optional TIntegral NestedField = 3; - optional TRepeated NestedRepeatedField = 4; - optional TOneOf NestedOneOfField = 5; - optional TAggregated NestedRecursiveField = 6; -} - -message TAliased -{ - optional int32 Key = 1 [(NYT.key_column_name) = "key"]; - optional double Subkey = 2 [(NYT.key_column_name) = "subkey"]; - optional TAggregated Data = 3; -} - -//////////////////////////////////////////////////////////////////////////////// - -message TUrlRow -{ - optional string Host = 1 [(NYT.column_name) = "Host"]; - optional string Path = 2 [(NYT.column_name) = "Path"]; - optional sint32 HttpCode = 3 [(NYT.column_name) = "HttpCode"]; -} - -message TRowFieldSerializationOption -{ - optional TUrlRow UrlRow_1 = 1 [(NYT.flags) = SERIALIZATION_YT]; - optional TUrlRow UrlRow_2 = 2; -} - -message TRowMessageSerializationOption -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - optional TUrlRow UrlRow_1 = 1; - optional TUrlRow UrlRow_2 = 2; -} - -message TRowMixedSerializationOptions -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - optional TUrlRow UrlRow_1 = 1; - optional TUrlRow UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF]; -} - -message TRowSerializedRepeatedFields -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - repeated int64 Ints = 1; - repeated TUrlRow UrlRows = 2; -} - -message TUrlRowWithColumnNames -{ - optional string Host = 1 [(NYT.column_name) = "Host_ColumnName", (NYT.key_column_name) = "Host_KeyColumnName"]; - optional string Path = 2 [(NYT.key_column_name) = "Path_KeyColumnName"]; - optional sint32 HttpCode = 3; -} - -message TRowMixedSerializationOptions_ColumnNames -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - optional TUrlRowWithColumnNames UrlRow_1 = 1; - optional TUrlRowWithColumnNames UrlRow_2 = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF]; -} - -message TNoOptionInheritance -{ - message TDeepestEmbedded - { - optional int64 x = 1; - } - - message TEmbedded - { - optional TDeepestEmbedded embedded = 1; - } - - message TEmbeddedYt - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - - optional TDeepestEmbedded embedded = 1; - } - - message TEmbeddedProtobuf - { - option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF; - - optional TDeepestEmbedded embedded = 1; - } - - optional TEmbeddedYt EmbeddedYt_YtOption = 1 [(NYT.flags) = SERIALIZATION_YT]; - optional TEmbeddedYt EmbeddedYt_ProtobufOption = 2 [(NYT.flags) = SERIALIZATION_PROTOBUF]; - optional TEmbeddedYt EmbeddedYt_NoOption = 3; - optional TEmbeddedProtobuf EmbeddedProtobuf_YtOption = 4 [(NYT.flags) = SERIALIZATION_YT]; - optional TEmbeddedProtobuf EmbeddedProtobuf_ProtobufOption = 5 [(NYT.flags) = SERIALIZATION_PROTOBUF]; - optional TEmbeddedProtobuf EmbeddedProtobuf_NoOption = 6; - optional TEmbedded Embedded_YtOption = 7 [(NYT.flags) = SERIALIZATION_YT]; - optional TEmbedded Embedded_ProtobufOption = 8 [(NYT.flags) = SERIALIZATION_PROTOBUF]; - optional TEmbedded Embedded_NoOption = 9; -} - -message TOptionalList -{ - repeated int64 OptionalListInt64 = 1 [(NYT.flags) = OPTIONAL_LIST, (NYT.flags) = SERIALIZATION_YT]; -} - -message TPacked -{ - repeated int64 PackedListInt64 = 1 [(NYT.flags) = SERIALIZATION_YT, packed=true]; -} - -message TCyclic -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - - message TA - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - repeated TB b = 1; - optional TC c = 2; - } - - message TB - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - optional TD d = 1; - } - - message TC - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - optional TD d = 1; - } - - message TD - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - optional TA a = 1; - } - - message TE - { - optional TD d = 1 [(NYT.flags) = SERIALIZATION_PROTOBUF]; - } - - optional TA a = 1; -} - -message TFieldSortOrder -{ - message TEmbeddedDefault { - optional int64 x = 2; - optional string y = 12; - optional bool z = 1; - } - message TEmbeddedAsInProtoFile { - option (NYT.message_flags) = DEPRECATED_SORT_FIELDS_AS_IN_PROTO_FILE; - optional int64 x = 2; - optional string y = 12; - optional bool z = 1; - } - message TEmbeddedByFieldNumber { - option (NYT.message_flags) = SORT_FIELDS_BY_FIELD_NUMBER; - optional int64 x = 2; - optional string y = 12; - optional bool z = 1; - } - option (NYT.default_field_flags) = SERIALIZATION_YT; - - optional TEmbeddedDefault EmbeddedDefault = 1; - optional TEmbeddedAsInProtoFile EmbeddedAsInProtoFile = 2; - optional TEmbeddedByFieldNumber EmbeddedByFieldNumber = 3; -} - -message TWithMap -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - - message TEmbedded { - optional int64 x = 1; - optional string y = 2; - } - - map<int64, TEmbedded> MapDefault = 1; - map<int64, TEmbedded> MapListOfStructsLegacy = 2 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS_LEGACY]; - map<int64, TEmbedded> MapListOfStructs = 3 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS]; - map<int64, TEmbedded> MapOptionalDict = 4 [(NYT.flags) = MAP_AS_OPTIONAL_DICT]; - map<int64, TEmbedded> MapDict = 5 [(NYT.flags) = MAP_AS_DICT]; -} - -message TWithOneof -{ - option (NYT.default_field_flags) = SERIALIZATION_YT; - - message TEmbedded - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - oneof Oneof { - int64 x = 1; - string y = 2; - } - } - - message TDefaultSeparateFields - { - option (NYT.default_oneof_flags) = SEPARATE_FIELDS; - option (NYT.default_field_flags) = SERIALIZATION_YT; - - optional string field = 1; - - oneof Oneof2 - { - option (NYT.variant_field_name) = "variant_field_name"; - option (NYT.oneof_flags) = VARIANT; - string y2 = 4; - TEmbedded z2 = 6; - int64 x2 = 2; - } - - oneof Oneof1 - { - int64 x1 = 10; - string y1 = 3; - TEmbedded z1 = 5; - } - } - - message TNoDefault - { - option (NYT.default_field_flags) = SERIALIZATION_YT; - - optional string field = 1; - - oneof Oneof2 - { - string y2 = 4; - TEmbedded z2 = 6; - int64 x2 = 2; - } - - oneof Oneof1 - { - option (NYT.oneof_flags) = SEPARATE_FIELDS; - int64 x1 = 10; - string y1 = 3; - TEmbedded z1 = 5; - } - } - - message TSerializationProtobuf - { - oneof Oneof - { - int64 x1 = 2; - string y1 = 1; - TEmbedded z1 = 3; - } - } - - optional TDefaultSeparateFields DefaultSeparateFields = 1; - optional TNoDefault NoDefault = 2; - optional TSerializationProtobuf SerializationProtobuf = 3; - - oneof TopLevelOneof - { - int64 MemberOfTopLevelOneof = 4; - } -} - -message TEmbeddedStruct { - optional float float1 = 1; - optional string string1 = 2; -} - -message TEmbedded2Message { - option (NYT.default_field_flags) = SERIALIZATION_YT; - optional uint64 embedded2_num = 10; - optional TEmbeddedStruct embedded2_struct = 17; - repeated string embedded2_repeated = 42; -} - -message TEmbedded1Message { - option (NYT.default_field_flags) = SERIALIZATION_YT; - required TEmbedded2Message t2 = 1 [(NYT.flags) = EMBEDDED]; - oneof variant { - string str_variant = 101; - uint64 uint_variant = 102; - } - optional uint64 embedded_num = 10; // make intensional field_num collision! - optional string embedded_extra_field = 11; -} - -message TEmbeddingMessage { - optional bytes other_columns_field = 15 [(NYT.flags) = OTHER_COLUMNS]; - required TEmbedded1Message t1 = 2 [(NYT.flags) = EMBEDDED]; - optional uint64 num = 12; - optional string extra_field = 13; -} diff --git a/yt/cpp/mapreduce/interface/public.h b/yt/cpp/mapreduce/interface/public.h deleted file mode 100644 index bdeda787950..00000000000 --- a/yt/cpp/mapreduce/interface/public.h +++ /dev/null @@ -1,10 +0,0 @@ -#pragma once - -#include <memory> - -namespace NYT::NAuth { - -struct IServiceTicketAuthPtrWrapper; -using IServiceTicketAuthPtrWrapperPtr = std::shared_ptr<IServiceTicketAuthPtrWrapper>; - -} // namespace NYT::NAuth diff --git a/yt/cpp/mapreduce/interface/retry_policy.h b/yt/cpp/mapreduce/interface/retry_policy.h deleted file mode 100644 index c1988390793..00000000000 --- a/yt/cpp/mapreduce/interface/retry_policy.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include <util/datetime/base.h> -#include <util/generic/ptr.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// A configuration that controls retries of a single request. -struct TRetryConfig -{ - /// - /// @brief How long retries of a single YT request can go on. - /// - /// If this limit is reached while retry count is not yet exceeded @ref TRequestRetriesTimeout exception is thrown. - TDuration RetriesTimeLimit = TDuration::Max(); -}; - -/// The library uses this class to understand how to retry individual requests. -class IRetryConfigProvider - : public virtual TThrRefBase -{ -public: - /// - /// @brief Gets retry policy for single request. - /// - /// CreateRetryConfig is called before ANY request. - /// Returned config controls retries of this request. - /// - /// Must be thread safe since it can be used from different threads - /// to perform internal library requests (e.g. pings). - /// - /// Some methods (e.g. IClient::Map) involve multiple requests to YT and therefore - /// this method will be called several times during execution of single method. - /// - /// If user needs to limit overall retries inside long operation they might create - /// retry policy that knows about overall deadline - /// @ref NYT::TRetryConfig::RetriesTimeLimit taking into account that overall deadline. - /// (E.g. when deadline reached it returns zero limit for retries). - virtual TRetryConfig CreateRetryConfig() = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT - diff --git a/yt/cpp/mapreduce/interface/serialize.cpp b/yt/cpp/mapreduce/interface/serialize.cpp deleted file mode 100644 index ae05d9f50d5..00000000000 --- a/yt/cpp/mapreduce/interface/serialize.cpp +++ /dev/null @@ -1,553 +0,0 @@ -#include "serialize.h" - -#include "common.h" -#include "fluent.h" - -#include <library/cpp/yson/parser.h> -#include <library/cpp/yson/node/node_io.h> -#include <library/cpp/yson/node/serialize.h> - -#include <library/cpp/type_info/type_io.h> - -#include <util/generic/string.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -// const auto& nodeMap = node.AsMap(); -#define DESERIALIZE_ITEM(NAME, MEMBER) \ - if (const auto* item = nodeMap.FindPtr(NAME)) { \ - Deserialize(MEMBER, *item); \ - } - -// const auto& attributesMap = node.GetAttributes().AsMap(); -#define DESERIALIZE_ATTR(NAME, MEMBER) \ - if (const auto* attr = attributesMap.FindPtr(NAME)) { \ - Deserialize(MEMBER, *attr); \ - } - -//////////////////////////////////////////////////////////////////////////////// - -void Serialize(const TSortColumn& sortColumn, NYson::IYsonConsumer* consumer) -{ - if (sortColumn.SortOrder() == ESortOrder::SO_ASCENDING) { - Serialize(sortColumn.Name(), consumer); - } else { - BuildYsonFluently(consumer).BeginMap() - .Item("name").Value(sortColumn.Name()) - .Item("sort_order").Value(ToString(sortColumn.SortOrder())) - .EndMap(); - } -} - -void Deserialize(TSortColumn& sortColumn, const TNode& node) -{ - if (node.IsString()) { - sortColumn = TSortColumn(node.AsString()); - } else if (node.IsMap()) { - const auto& name = node["name"].AsString(); - const auto& sortOrderString = node["sort_order"].AsString(); - sortColumn = TSortColumn(name, ::FromString<ESortOrder>(sortOrderString)); - } else { - ythrow yexception() << "Expected sort column to be string or map, got " << node.GetType(); - } -} - -template <class T, class TDerived> -void SerializeOneOrMany(const TOneOrMany<T, TDerived>& oneOrMany, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).List(oneOrMany.Parts_); -} - -template <class T, class TDerived> -void DeserializeOneOrMany(TOneOrMany<T, TDerived>& oneOrMany, const TNode& node) -{ - Deserialize(oneOrMany.Parts_, node); -} - -void Serialize(const TKey& key, NYson::IYsonConsumer* consumer) -{ - SerializeOneOrMany(key, consumer); -} - -void Deserialize(TKey& key, const TNode& node) -{ - DeserializeOneOrMany(key, node); -} - -void Serialize(const TSortColumns& sortColumns, NYson::IYsonConsumer* consumer) -{ - SerializeOneOrMany(sortColumns, consumer); -} - -void Deserialize(TSortColumns& sortColumns, const TNode& node) -{ - DeserializeOneOrMany(sortColumns, node); -} - -void Serialize(const TColumnNames& columnNames, NYson::IYsonConsumer* consumer) -{ - SerializeOneOrMany(columnNames, consumer); -} - -void Deserialize(TColumnNames& columnNames, const TNode& node) -{ - DeserializeOneOrMany(columnNames, node); -} - -//////////////////////////////////////////////////////////////////////////////// - -void Deserialize(EValueType& valueType, const TNode& node) -{ - const auto& nodeStr = node.AsString(); - static const THashMap<TString, EValueType> str2ValueType = { - {"int8", VT_INT8}, - {"int16", VT_INT16}, - {"int32", VT_INT32}, - {"int64", VT_INT64}, - - {"uint8", VT_UINT8}, - {"uint16", VT_UINT16}, - {"uint32", VT_UINT32}, - {"uint64", VT_UINT64}, - - {"boolean", VT_BOOLEAN}, - {"double", VT_DOUBLE}, - - {"string", VT_STRING}, - {"utf8", VT_UTF8}, - - {"any", VT_ANY}, - - {"null", VT_NULL}, - {"void", VT_VOID}, - - {"date", VT_DATE}, - {"datetime", VT_DATETIME}, - {"timestamp", VT_TIMESTAMP}, - {"interval", VT_INTERVAL}, - {"float", VT_FLOAT}, - {"json", VT_JSON}, - }; - - auto it = str2ValueType.find(nodeStr); - if (it == str2ValueType.end()) { - ythrow yexception() << "Invalid value type '" << nodeStr << "'"; - } - - valueType = it->second; -} - -void Deserialize(ESortOrder& sortOrder, const TNode& node) -{ - sortOrder = FromString<ESortOrder>(node.AsString()); -} - -void Deserialize(EOptimizeForAttr& optimizeFor, const TNode& node) -{ - optimizeFor = FromString<EOptimizeForAttr>(node.AsString()); -} - -void Deserialize(EErasureCodecAttr& erasureCodec, const TNode& node) -{ - erasureCodec = FromString<EErasureCodecAttr>(node.AsString()); -} - -void Deserialize(ESchemaModificationAttr& schemaModification, const TNode& node) -{ - schemaModification = FromString<ESchemaModificationAttr>(node.AsString()); -} - -void Serialize(const TColumnSchema& columnSchema, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).BeginMap() - .Item("name").Value(columnSchema.Name()) - .DoIf(!columnSchema.RawTypeV3().Defined(), - [&] (TFluentMap fluent) { - fluent.Item("type").Value(NDetail::ToString(columnSchema.Type())); - fluent.Item("required").Value(columnSchema.Required()); - if (columnSchema.Type() == VT_ANY - && *columnSchema.TypeV3() != *NTi::Optional(NTi::Yson())) - { - // A lot of user canonize serialized schema. - // To be backward compatible we only set type_v3 for new types. - fluent.Item("type_v3").Value(columnSchema.TypeV3()); - } - } - ) - .DoIf(columnSchema.RawTypeV3().Defined(), [&] (TFluentMap fluent) { - const auto& rawTypeV3 = *columnSchema.RawTypeV3(); - fluent.Item("type_v3").Value(rawTypeV3); - - // We going set old fields `type` and `required` to be compatible - // with old clusters that doesn't support type_v3 yet. - - // if type is simple return its name otherwise return empty optional - auto isRequired = [](TStringBuf simpleType) { - return simpleType != "null" && simpleType != "void"; - }; - auto getSimple = [] (const TNode& typeV3) -> TMaybe<TString> { - static const THashMap<TString,TString> typeV3ToOld = { - {"bool", "boolean"}, - {"yson", "any"}, - }; - TMaybe<TString> result; - if (typeV3.IsString()) { - result = typeV3.AsString(); - } else if (typeV3.IsMap() && typeV3.Size() == 1) { - Y_VERIFY(typeV3["type_name"].IsString(), "invalid type is passed"); - result = typeV3["type_name"].AsString(); - } - if (result) { - auto it = typeV3ToOld.find(*result); - if (it != typeV3ToOld.end()) { - result = it->second; - } - } - return result; - }; - auto simplify = [&](const TNode& typeV3) -> TMaybe<std::pair<TString, bool>> { - auto simple = getSimple(typeV3); - if (simple) { - return std::make_pair(*simple, isRequired(*simple)); - } - if (typeV3.IsMap() && typeV3["type_name"] == "optional") { - auto simpleItem = getSimple(typeV3["item"]); - if (simpleItem && isRequired(*simpleItem)) { - return std::make_pair(*simpleItem, false); - } - } - return {}; - }; - - auto simplified = simplify(rawTypeV3); - - if (simplified) { - const auto& [simpleType, required] = *simplified; - fluent - .Item("type").Value(simpleType) - .Item("required").Value(required); - return; - } - }) - .DoIf(columnSchema.SortOrder().Defined(), [&] (TFluentMap fluent) { - fluent.Item("sort_order").Value(ToString(*columnSchema.SortOrder())); - }) - .DoIf(columnSchema.Lock().Defined(), [&] (TFluentMap fluent) { - fluent.Item("lock").Value(*columnSchema.Lock()); - }) - .DoIf(columnSchema.Expression().Defined(), [&] (TFluentMap fluent) { - fluent.Item("expression").Value(*columnSchema.Expression()); - }) - .DoIf(columnSchema.Aggregate().Defined(), [&] (TFluentMap fluent) { - fluent.Item("aggregate").Value(*columnSchema.Aggregate()); - }) - .DoIf(columnSchema.Group().Defined(), [&] (TFluentMap fluent) { - fluent.Item("group").Value(*columnSchema.Group()); - }) - .EndMap(); -} - -void Deserialize(TColumnSchema& columnSchema, const TNode& node) -{ - const auto& nodeMap = node.AsMap(); - DESERIALIZE_ITEM("name", columnSchema.Name_); - DESERIALIZE_ITEM("type_v3", columnSchema.RawTypeV3_); - DESERIALIZE_ITEM("sort_order", columnSchema.SortOrder_); - DESERIALIZE_ITEM("lock", columnSchema.Lock_); - DESERIALIZE_ITEM("expression", columnSchema.Expression_); - DESERIALIZE_ITEM("aggregate", columnSchema.Aggregate_); - DESERIALIZE_ITEM("group", columnSchema.Group_); - - if (nodeMap.contains("type_v3")) { - NTi::TTypePtr type; - DESERIALIZE_ITEM("type_v3", type); - columnSchema.Type(type); - } else { - EValueType oldType = VT_INT64; - bool required = false; - DESERIALIZE_ITEM("type", oldType); - DESERIALIZE_ITEM("required", required); - columnSchema.Type(ToTypeV3(oldType, required)); - } -} - -void Serialize(const TTableSchema& tableSchema, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).BeginAttributes() - .Item("strict").Value(tableSchema.Strict()) - .Item("unique_keys").Value(tableSchema.UniqueKeys()) - .EndAttributes() - .List(tableSchema.Columns()); -} - -void Deserialize(TTableSchema& tableSchema, const TNode& node) -{ - const auto& attributesMap = node.GetAttributes().AsMap(); - DESERIALIZE_ATTR("strict", tableSchema.Strict_); - DESERIALIZE_ATTR("unique_keys", tableSchema.UniqueKeys_); - Deserialize(tableSchema.Columns_, node); -} - -//////////////////////////////////////////////////////////////////////////////// - -void Serialize(const TKeyBound& keyBound, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).BeginList() - .Item().Value(ToString(keyBound.Relation())) - .Item().Value(keyBound.Key()) - .EndList(); -} - -void Deserialize(TKeyBound& keyBound, const TNode& node) -{ - const auto& nodeList = node.AsList(); - Y_ENSURE(nodeList.size() == 2); - - const auto& relationNode = nodeList[0]; - keyBound.Relation(::FromString<ERelation>(relationNode.AsString())); - - const auto& keyNode = nodeList[1]; - TKey key; - Deserialize(key, keyNode); - keyBound.Key(std::move(key)); -} - -//////////////////////////////////////////////////////////////////////////////// - -void Serialize(const TReadLimit& readLimit, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).BeginMap() - .DoIf(readLimit.KeyBound_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("key_bound").Value(*readLimit.KeyBound_); - }) - .DoIf(readLimit.Key_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("key").Value(*readLimit.Key_); - }) - .DoIf(readLimit.RowIndex_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("row_index").Value(*readLimit.RowIndex_); - }) - .DoIf(readLimit.Offset_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("offset").Value(*readLimit.Offset_); - }) - .DoIf(readLimit.TabletIndex_.Defined(), [&] (TFluentMap fluent) { - fluent.Item("tablet_index").Value(*readLimit.TabletIndex_); - }) - .EndMap(); -} - -void Deserialize(TReadLimit& readLimit, const TNode& node) -{ - const auto& nodeMap = node.AsMap(); - DESERIALIZE_ITEM("key_bound", readLimit.KeyBound_); - DESERIALIZE_ITEM("key", readLimit.Key_); - DESERIALIZE_ITEM("row_index", readLimit.RowIndex_); - DESERIALIZE_ITEM("offset", readLimit.Offset_); - DESERIALIZE_ITEM("tablet_index", readLimit.TabletIndex_); -} - -void Serialize(const TReadRange& readRange, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).BeginMap() - .DoIf(!IsTrivial(readRange.LowerLimit_), [&] (TFluentMap fluent) { - fluent.Item("lower_limit").Value(readRange.LowerLimit_); - }) - .DoIf(!IsTrivial(readRange.UpperLimit_), [&] (TFluentMap fluent) { - fluent.Item("upper_limit").Value(readRange.UpperLimit_); - }) - .DoIf(!IsTrivial(readRange.Exact_), [&] (TFluentMap fluent) { - fluent.Item("exact").Value(readRange.Exact_); - }) - .EndMap(); -} - -void Deserialize(TReadRange& readRange, const TNode& node) -{ - const auto& nodeMap = node.AsMap(); - DESERIALIZE_ITEM("lower_limit", readRange.LowerLimit_); - DESERIALIZE_ITEM("upper_limit", readRange.UpperLimit_); - DESERIALIZE_ITEM("exact", readRange.Exact_); -} - -void Serialize(const THashMap<TString, TString>& renameColumns, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer) - .DoMapFor(renameColumns, [] (TFluentMap fluent, const auto& item) { - fluent.Item(item.first).Value(item.second); - }); -} - -void Serialize(const TRichYPath& path, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).BeginAttributes() - .DoIf(path.GetRanges().Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("ranges").List(*path.GetRanges()); - }) - .DoIf(path.Columns_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("columns").Value(*path.Columns_); - }) - .DoIf(path.Append_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("append").Value(*path.Append_); - }) - .DoIf(path.PartiallySorted_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("partially_sorted").Value(*path.PartiallySorted_); - }) - .DoIf(!path.SortedBy_.Parts_.empty(), [&] (TFluentAttributes fluent) { - fluent.Item("sorted_by").Value(path.SortedBy_); - }) - .DoIf(path.Teleport_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("teleport").Value(*path.Teleport_); - }) - .DoIf(path.Primary_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("primary").Value(*path.Primary_); - }) - .DoIf(path.Foreign_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("foreign").Value(*path.Foreign_); - }) - .DoIf(path.RowCountLimit_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("row_count_limit").Value(*path.RowCountLimit_); - }) - .DoIf(path.FileName_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("file_name").Value(*path.FileName_); - }) - .DoIf(path.OriginalPath_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("original_path").Value(*path.OriginalPath_); - }) - .DoIf(path.Executable_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("executable").Value(*path.Executable_); - }) - .DoIf(path.Format_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("format").Value(*path.Format_); - }) - .DoIf(path.Schema_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("schema").Value(*path.Schema_); - }) - .DoIf(path.Timestamp_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("timestamp").Value(*path.Timestamp_); - }) - .DoIf(path.CompressionCodec_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("compression_codec").Value(*path.CompressionCodec_); - }) - .DoIf(path.ErasureCodec_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("erasure_codec").Value(ToString(*path.ErasureCodec_)); - }) - .DoIf(path.SchemaModification_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("schema_modification").Value(ToString(*path.SchemaModification_)); - }) - .DoIf(path.OptimizeFor_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("optimize_for").Value(ToString(*path.OptimizeFor_)); - }) - .DoIf(path.TransactionId_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("transaction_id").Value(GetGuidAsString(*path.TransactionId_)); - }) - .DoIf(path.RenameColumns_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("rename_columns").Value(*path.RenameColumns_); - }) - .DoIf(path.BypassArtifactCache_.Defined(), [&] (TFluentAttributes fluent) { - fluent.Item("bypass_artifact_cache").Value(*path.BypassArtifactCache_); - }) - .EndAttributes() - .Value(path.Path_); -} - -void Deserialize(TRichYPath& path, const TNode& node) -{ - path = {}; - - const auto& attributesMap = node.GetAttributes().AsMap(); - DESERIALIZE_ATTR("ranges", path.MutableRanges()); - DESERIALIZE_ATTR("columns", path.Columns_); - DESERIALIZE_ATTR("append", path.Append_); - DESERIALIZE_ATTR("partially_sorted", path.PartiallySorted_); - DESERIALIZE_ATTR("sorted_by", path.SortedBy_); - DESERIALIZE_ATTR("teleport", path.Teleport_); - DESERIALIZE_ATTR("primary", path.Primary_); - DESERIALIZE_ATTR("foreign", path.Foreign_); - DESERIALIZE_ATTR("row_count_limit", path.RowCountLimit_); - DESERIALIZE_ATTR("file_name", path.FileName_); - DESERIALIZE_ATTR("original_path", path.OriginalPath_); - DESERIALIZE_ATTR("executable", path.Executable_); - DESERIALIZE_ATTR("format", path.Format_); - DESERIALIZE_ATTR("schema", path.Schema_); - DESERIALIZE_ATTR("timestamp", path.Timestamp_); - DESERIALIZE_ATTR("compression_codec", path.CompressionCodec_); - DESERIALIZE_ATTR("erasure_codec", path.ErasureCodec_); - DESERIALIZE_ATTR("schema_modification", path.SchemaModification_); - DESERIALIZE_ATTR("optimize_for", path.OptimizeFor_); - DESERIALIZE_ATTR("transaction_id", path.TransactionId_); - DESERIALIZE_ATTR("rename_columns", path.RenameColumns_); - DESERIALIZE_ATTR("bypass_artifact_cache", path.BypassArtifactCache_); - Deserialize(path.Path_, node); -} - -void Serialize(const TAttributeFilter& filter, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).List(filter.Attributes_); -} - -void Deserialize(TTableColumnarStatistics& statistics, const TNode& node) -{ - const auto& nodeMap = node.AsMap(); - DESERIALIZE_ITEM("column_data_weights", statistics.ColumnDataWeight); - DESERIALIZE_ITEM("legacy_chunks_data_weight", statistics.LegacyChunksDataWeight); - DESERIALIZE_ITEM("timestamp_total_weight", statistics.TimestampTotalWeight); -} - -void Deserialize(TMultiTablePartition::TStatistics& statistics, const TNode& node) -{ - const auto& nodeMap = node.AsMap(); - DESERIALIZE_ITEM("chunk_count", statistics.ChunkCount); - DESERIALIZE_ITEM("data_weight", statistics.DataWeight); - DESERIALIZE_ITEM("row_count", statistics.RowCount); -} - -void Deserialize(TMultiTablePartition& partition, const TNode& node) -{ - const auto& nodeMap = node.AsMap(); - DESERIALIZE_ITEM("table_ranges", partition.TableRanges); - DESERIALIZE_ITEM("aggregate_statistics", partition.AggregateStatistics); -} - -void Deserialize(TMultiTablePartitions& partitions, const TNode& node) -{ - const auto& nodeMap = node.AsMap(); - DESERIALIZE_ITEM("partitions", partitions.Partitions); -} - -void Serialize(const TGUID& value, NYson::IYsonConsumer* consumer) -{ - BuildYsonFluently(consumer).Value(GetGuidAsString(value)); -} - -void Deserialize(TGUID& value, const TNode& node) -{ - value = GetGuid(node.AsString()); -} - -void Deserialize(TTabletInfo& value, const TNode& node) -{ - auto nodeMap = node.AsMap(); - DESERIALIZE_ITEM("total_row_count", value.TotalRowCount) - DESERIALIZE_ITEM("trimmed_row_count", value.TrimmedRowCount) - DESERIALIZE_ITEM("barrier_timestamp", value.BarrierTimestamp) -} - -void Serialize(const NTi::TTypePtr& type, NYson::IYsonConsumer* consumer) -{ - auto yson = NTi::NIo::SerializeYson(type.Get()); - ::NYson::ParseYsonStringBuffer(yson, consumer); -} - -void Deserialize(NTi::TTypePtr& type, const TNode& node) -{ - auto yson = NodeToYsonString(node, NYson::EYsonFormat::Binary); - type = NTi::NIo::DeserializeYson(*NTi::HeapFactory(), yson); -} - -#undef DESERIALIZE_ITEM -#undef DESERIALIZE_ATTR - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/serialize.h b/yt/cpp/mapreduce/interface/serialize.h deleted file mode 100644 index 223dd446ba5..00000000000 --- a/yt/cpp/mapreduce/interface/serialize.h +++ /dev/null @@ -1,90 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/serialize.h -/// -/// Header containing declaration of functions for serializing to/from YSON. - -#include "common.h" - -#include <library/cpp/type_info/fwd.h> - -namespace NYT::NYson { -struct IYsonConsumer; -} // namespace NYT::NYson - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -template <class T> -void Deserialize(TMaybe<T>& value, const TNode& node) -{ - value.ConstructInPlace(); - Deserialize(value.GetRef(), node); -} - -template <class T> -void Deserialize(TVector<T>& value, const TNode& node) -{ - for (const auto& element : node.AsList()) { - value.emplace_back(); - Deserialize(value.back(), element); - } -} - -template <class T> -void Deserialize(THashMap<TString, T>& value, const TNode& node) -{ - for (const auto& item : node.AsMap()) { - Deserialize(value[item.first], item.second); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -void Serialize(const TKey& key, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TKey& key, const TNode& node); - -void Serialize(const TSortColumns& sortColumns, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TSortColumns& sortColumns, const TNode& node); - -void Serialize(const TColumnNames& columnNames, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TColumnNames& columnNames, const TNode& node); - -void Serialize(const TSortColumn& sortColumn, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TSortColumn& sortColumn, const TNode& node); - -void Serialize(const TKeyBound& keyBound, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TKeyBound& keyBound, const TNode& node); - -void Serialize(const TReadLimit& readLimit, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TReadLimit& readLimit, const TNode& node); - -void Serialize(const TReadRange& readRange, NYT::NYson::IYsonConsumer* consumer); - -void Serialize(const TRichYPath& path, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TRichYPath& path, const TNode& node); - -void Serialize(const TAttributeFilter& filter, NYT::NYson::IYsonConsumer* consumer); - -void Serialize(const TColumnSchema& columnSchema, NYT::NYson::IYsonConsumer* consumer); -void Serialize(const TTableSchema& tableSchema, NYT::NYson::IYsonConsumer* consumer); - -void Deserialize(EValueType& valueType, const TNode& node); -void Deserialize(TTableSchema& tableSchema, const TNode& node); -void Deserialize(TColumnSchema& columnSchema, const TNode& node); -void Deserialize(TTableColumnarStatistics& statistics, const TNode& node); -void Deserialize(TMultiTablePartition& partition, const TNode& node); -void Deserialize(TMultiTablePartitions& partitions, const TNode& node); -void Deserialize(TTabletInfo& tabletInfos, const TNode& node); - -void Serialize(const TGUID& path, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(TGUID& value, const TNode& node); - -void Serialize(const NTi::TTypePtr& type, NYT::NYson::IYsonConsumer* consumer); -void Deserialize(NTi::TTypePtr& type, const TNode& node); - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/serialize_ut.cpp b/yt/cpp/mapreduce/interface/serialize_ut.cpp deleted file mode 100644 index 59d4501ee8f..00000000000 --- a/yt/cpp/mapreduce/interface/serialize_ut.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#include <yt/cpp/mapreduce/interface/serialize.h> -#include <yt/cpp/mapreduce/interface/common.h> - -#include <library/cpp/yson/node/node_builder.h> - -#include <library/cpp/testing/unittest/registar.h> - -#include <util/generic/serialized_enum.h> - -using namespace NYT; - -Y_UNIT_TEST_SUITE(Serialization) -{ - Y_UNIT_TEST(TableSchema) - { - auto schema = TTableSchema() - .AddColumn(TColumnSchema().Name("a").Type(EValueType::VT_STRING).SortOrder(SO_ASCENDING)) - .AddColumn(TColumnSchema().Name("b").Type(EValueType::VT_UINT64)) - .AddColumn(TColumnSchema().Name("c").Type(EValueType::VT_INT64, true)); - - auto schemaNode = schema.ToNode(); - UNIT_ASSERT(schemaNode.IsList()); - UNIT_ASSERT_VALUES_EQUAL(schemaNode.Size(), 3); - - - UNIT_ASSERT_VALUES_EQUAL(schemaNode[0]["name"], "a"); - UNIT_ASSERT_VALUES_EQUAL(schemaNode[0]["type"], "string"); - UNIT_ASSERT_VALUES_EQUAL(schemaNode[0]["required"], false); - UNIT_ASSERT_VALUES_EQUAL(schemaNode[0]["sort_order"], "ascending"); - - UNIT_ASSERT_VALUES_EQUAL(schemaNode[1]["name"], "b"); - UNIT_ASSERT_VALUES_EQUAL(schemaNode[1]["type"], "uint64"); - UNIT_ASSERT_VALUES_EQUAL(schemaNode[1]["required"], false); - - UNIT_ASSERT_VALUES_EQUAL(schemaNode[2]["name"], "c"); - UNIT_ASSERT_VALUES_EQUAL(schemaNode[2]["type"], "int64"); - UNIT_ASSERT_VALUES_EQUAL(schemaNode[2]["required"], true); - } - - Y_UNIT_TEST(ValueTypeSerialization) - { - for (const auto value : GetEnumAllValues<EValueType>()) { - TNode serialized = NYT::NDetail::ToString(value); - EValueType deserialized; - Deserialize(deserialized, serialized); - UNIT_ASSERT_VALUES_EQUAL(value, deserialized); - } - } -} diff --git a/yt/cpp/mapreduce/interface/skiff_row.cpp b/yt/cpp/mapreduce/interface/skiff_row.cpp deleted file mode 100644 index 7838bdaee94..00000000000 --- a/yt/cpp/mapreduce/interface/skiff_row.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "skiff_row.h" diff --git a/yt/cpp/mapreduce/interface/skiff_row.h b/yt/cpp/mapreduce/interface/skiff_row.h deleted file mode 100644 index 5dd335cb653..00000000000 --- a/yt/cpp/mapreduce/interface/skiff_row.h +++ /dev/null @@ -1,127 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/skiff_row.h -/// Header containing interfaces that you need to define for using TSkiffRowTableReader -/// What you need to do for your struct type TMyType: -/// 1. Write `true` specialization TIsSkiffRow<TMyType>; -/// 2. Write specialization GetSkiffSchema<TMyType>(); -/// 3. Write your own parser derived from ISkiffRowParser and write specialization GetSkiffParser<TMyType>() which returns this parser. - -#include "fwd.h" - -#include <yt/cpp/mapreduce/skiff/skiff_schema.h> - -#include <yt/cpp/mapreduce/interface/format.h> - -#include <library/cpp/skiff/skiff.h> - -#include <util/generic/maybe.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -//! Need to write `true_type` specialization for your row type `T`. -/// And implement two functions: `GetSkiffSchema` and `CreateSkiffParser`. -/// -/// Example: -/// -/// template <> -/// struct TIsSkiffRow<T> -/// : std::true_type -/// { }; -/// -template<class T> -struct TIsSkiffRow - : std::false_type -{ }; - -//////////////////////////////////////////////////////////////////////////////// - -//! Return skiff schema for row type `T`. -/// Need to write its specialization. -template <typename T> -NSkiff::TSkiffSchemaPtr GetSkiffSchema(const TMaybe<TSkiffRowHints>& /*hints*/) -{ - static_assert(TDependentFalse<T>, "Unimplemented `GetSkiffSchema` method"); -} - -//////////////////////////////////////////////////////////////////////////////// - -//! Allow to parse rows as user's structs from stream (TCheckedInDebugSkiffParser). -/// Need to write derived class for your own row type. -/// -/// Example: -/// -/// class TMySkiffRowParser : public ISkiffRowParser -/// { -/// public: -/// TMySkiffRowParser(TMySkiffRow* row) -/// : Row_(row) -/// {} -/// -/// void Parse(NSkiff::TCheckedInDebugSkiffParser* parser) -/// . { -/// Row_->SomeInt64Field = parser->ParseInt64(); -/// } -/// -/// private: -/// TMySkiffRow* Row_; -/// } -/// -class ISkiffRowParser - : public TThrRefBase -{ -public: - //! Read one row from parser - virtual void Parse(NSkiff::TCheckedInDebugSkiffParser* /*parser*/) = 0; -}; - -//! Creates a parser for row type `T`. -template <typename T> -ISkiffRowParserPtr CreateSkiffParser(T* /*row*/, const TMaybe<TSkiffRowHints>& /*hints*/) -{ - static_assert(TDependentFalse<T>, "Unimplemented `CreateSkiffParser` function"); -} - -//////////////////////////////////////////////////////////////////////////////// - -//! Allow to skip row content without getting row. -/// By default row will be parsed using your parser derived from ISkiffRowParser. -/// If you want, you can write more optimal skipper, but it isn't required. -class ISkiffRowSkipper - : public TThrRefBase -{ -public: - virtual void SkipRow(NSkiff::TCheckedInDebugSkiffParser* /*parser*/) = 0; -}; - -//! Default ISkiffRowSkipper implementation. -template <typename T> -class TSkiffRowSkipper : public ISkiffRowSkipper { -public: - explicit TSkiffRowSkipper(const TMaybe<TSkiffRowHints>& hints) - : Parser_(CreateSkiffParser<T>(&Row_, hints)) - { } - - void SkipRow(NSkiff::TCheckedInDebugSkiffParser* parser) { - Parser_->Parse(parser); - } - -private: - T Row_; - ISkiffRowParserPtr Parser_; -}; - -//! Creates a skipper for row type 'T'. -/// You don't need to write its specialization. -template <typename T> -ISkiffRowSkipperPtr CreateSkiffSkipper(const TMaybe<TSkiffRowHints>& hints) -{ - return ::MakeIntrusive<TSkiffRowSkipper<T>>(hints); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/tvm.cpp b/yt/cpp/mapreduce/interface/tvm.cpp deleted file mode 100644 index bfa3f0304e9..00000000000 --- a/yt/cpp/mapreduce/interface/tvm.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "tvm.h" diff --git a/yt/cpp/mapreduce/interface/tvm.h b/yt/cpp/mapreduce/interface/tvm.h deleted file mode 100644 index d8d16d841b2..00000000000 --- a/yt/cpp/mapreduce/interface/tvm.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once - -#include <yt/yt/library/tvm/tvm_base.h> - -#include <library/cpp/yt/memory/intrusive_ptr.h> - -namespace NYT::NAuth { - -//////////////////////////////////////////////////////////////////////////////// - -/// This wrapper is required because NYT::NAuth::IServiceTicketAuthPtr is NYT::TIntrusivePtr, -/// and, if we used this pointer in interfaces of `mapreduce/yt` client, a lot of users of this library -/// could get unexpected build errors that `TIntrusivePtr` is ambigious -/// (from `::` namespace and from `::NYT::` namespace). -/// So we use this wrapper in our interfaces to avoid such problems for users. -struct IServiceTicketAuthPtrWrapper -{ - // - /// Construct wrapper from NYT::TIntrusivePtr - /// - /// This constructor is implicit so users can transparently pass NYT::TIntrusivePtr to the functions of - /// mapreduce/yt client. - template <class T, class = typename std::enable_if_t<std::is_convertible_v<T*, IServiceTicketAuth*>>> - IServiceTicketAuthPtrWrapper(const TIntrusivePtr<T> ptr) - : Ptr(ptr) - { - } - - /// Wrapped pointer - NYT::TIntrusivePtr<IServiceTicketAuth> Ptr; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT::NAuth diff --git a/yt/cpp/mapreduce/interface/ut/ya.make b/yt/cpp/mapreduce/interface/ut/ya.make deleted file mode 100644 index 0219e6430ca..00000000000 --- a/yt/cpp/mapreduce/interface/ut/ya.make +++ /dev/null @@ -1,25 +0,0 @@ -UNITTEST_FOR(yt/cpp/mapreduce/interface) - -SRCS( - common_ut.cpp - config_ut.cpp - error_ut.cpp - format_ut.cpp - job_counters_ut.cpp - job_statistics_ut.cpp - operation_ut.cpp - proto3_ut.proto - protobuf_table_schema_ut.cpp - protobuf_file_options_ut.cpp - protobuf_table_schema_ut.proto - protobuf_file_options_ut.proto - serialize_ut.cpp -) - -PEERDIR( - contrib/libs/protobuf - library/cpp/testing/unittest - yt/yt_proto/yt/formats -) - -END() diff --git a/yt/cpp/mapreduce/interface/wait_proxy.h b/yt/cpp/mapreduce/interface/wait_proxy.h deleted file mode 100644 index f7d8e0638e9..00000000000 --- a/yt/cpp/mapreduce/interface/wait_proxy.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -/// -/// @file yt/cpp/mapreduce/interface/serialize.h -/// -/// Header containing interface to enable customizable waiting. - -#include <yt/cpp/mapreduce/interface/common.h> - -#include <util/datetime/base.h> - -namespace NThreading { -template <typename T> -class TFuture; -} - -class TSystemEvent; -class TCondVar; -class TMutex; - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -/// -/// @brief Interface to facilitate customizable waiting. -/// -/// All the waiting functions in the library are obliged to use the methods of a wait proxy instead of direct function calls. -class IWaitProxy - : public TThrRefBase -{ -public: - virtual ~IWaitProxy() = default; - - /// - /// @brief Wait for the future setting with timeout. - virtual bool WaitFuture(const ::NThreading::TFuture<void>& future, TDuration timeout) = 0; - - /// - /// @brief Wait for a system event with timeout. - virtual bool WaitEvent(TSystemEvent& event, TDuration timeout) = 0; - - /// - /// @brief Wait for the notification on the condition variable with timeout. - virtual bool WaitCondVar(TCondVar& condVar, TMutex& mutex, TDuration timeout) = 0; - - /// - /// @brief Sleep in the current thread for (approximately) specified amount of time. - virtual void Sleep(TDuration timeout) = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/yt/cpp/mapreduce/interface/ya.make b/yt/cpp/mapreduce/interface/ya.make deleted file mode 100644 index 0e94f146339..00000000000 --- a/yt/cpp/mapreduce/interface/ya.make +++ /dev/null @@ -1,46 +0,0 @@ -LIBRARY() - -INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc) - -SRCS( - batch_request.cpp - client.cpp - client_method_options.cpp - common.cpp - config.cpp - cypress.cpp - errors.cpp - format.cpp - job_counters.cpp - job_statistics.cpp - io.cpp - operation.cpp - protobuf_format.cpp - serialize.cpp - skiff_row.cpp - tvm.cpp -) - -PEERDIR( - contrib/libs/protobuf - library/cpp/type_info - library/cpp/threading/future - library/cpp/yson/node - yt/cpp/mapreduce/interface/logging - yt/yt_proto/yt/formats - yt/yt/library/tvm -) - -GENERATE_ENUM_SERIALIZATION(client_method_options.h) -GENERATE_ENUM_SERIALIZATION(client.h) -GENERATE_ENUM_SERIALIZATION(common.h) -GENERATE_ENUM_SERIALIZATION(config.h) -GENERATE_ENUM_SERIALIZATION(cypress.h) -GENERATE_ENUM_SERIALIZATION(job_counters.h) -GENERATE_ENUM_SERIALIZATION(job_statistics.h) -GENERATE_ENUM_SERIALIZATION(operation.h) -GENERATE_ENUM_SERIALIZATION(protobuf_format.h) - -END() - -RECURSE_FOR_TESTS(ut) |