diff options
author | max42 <max42@yandex-team.com> | 2023-07-29 00:02:16 +0300 |
---|---|---|
committer | max42 <max42@yandex-team.com> | 2023-07-29 00:02:16 +0300 |
commit | 73b89de71748a21e102d27b9f3ed1bf658766cb5 (patch) | |
tree | 188bbd2d622fa91cdcbb1b6d6d77fbc84a0646f5 /yt/cpp/mapreduce/interface/config.h | |
parent | 528e321bcc2a2b67b53aeba58c3bd88305a141ee (diff) | |
download | ydb-73b89de71748a21e102d27b9f3ed1bf658766cb5.tar.gz |
YT-19210: expose YQL shared library for YT.
After this, a new target libyqlplugin.so appears. in open-source cmake build.
Diff in open-source YDB repo looks like the following: https://paste.yandex-team.ru/f302bdb4-7ef2-4362-91c7-6ca45f329264
Diffstat (limited to 'yt/cpp/mapreduce/interface/config.h')
-rw-r--r-- | yt/cpp/mapreduce/interface/config.h | 228 |
1 files changed, 228 insertions, 0 deletions
diff --git a/yt/cpp/mapreduce/interface/config.h b/yt/cpp/mapreduce/interface/config.h new file mode 100644 index 00000000000..c44ad25f1ca --- /dev/null +++ b/yt/cpp/mapreduce/interface/config.h @@ -0,0 +1,228 @@ +#pragma once + +#include "fwd.h" +#include "common.h" +#include "node.h" + +#include <library/cpp/yt/misc/enum.h> + +#include <util/generic/maybe.h> +#include <util/generic/string.h> +#include <util/generic/hash_set.h> + +#include <util/datetime/base.h> + +namespace NYT { + +enum EEncoding : int +{ + E_IDENTITY /* "identity" */, + E_GZIP /* "gzip" */, + E_BROTLI /* "br" */, + E_Z_LZ4 /* "z-lz4" */, +}; + +enum class ENodeReaderFormat : int +{ + Yson, // Always use YSON format, + Skiff, // Always use Skiff format, throw exception if it's not possible (non-strict schema, dynamic table etc.) + Auto, // Use Skiff format if it's possible, YSON otherwise +}; + +enum class ETraceHttpRequestsMode +{ + // Never dump http requests. + Never /* "never" */, + // Dump failed http requests. + Error /* "error" */, + // Dump all http requests. + Always /* "always" */, +}; + +DEFINE_ENUM(EUploadDeduplicationMode, + // For each file only one process' thread from all possible hosts can upload it to the file cache at the same time. + // The others will wait for the uploading to finish and use already cached file. + ((Global) (0)) + + // For each file and each particular host only one process' thread can upload it to the file cache at the same time. + // The others will wait for the uploading to finish and use already cached file. + ((Host) (1)) + + // All processes' threads will upload a file to the cache concurrently. + ((Disabled) (2)) +); + +//////////////////////////////////////////////////////////////////////////////// + +struct TConfig + : public TThrRefBase +{ + TString Hosts; + TString Pool; + TString Token; + TString Prefix; + TString ApiVersion; + TString LogLevel; + + // Compression for data that is sent to YT cluster. + EEncoding ContentEncoding; + + // Compression for data that is read from YT cluster. + EEncoding AcceptEncoding; + + TString GlobalTxId; + + bool ForceIpV4; + bool ForceIpV6; + bool UseHosts; + + TDuration HostListUpdateInterval; + + TNode Spec; + TNode TableWriter; + + TDuration ConnectTimeout; + TDuration SocketTimeout; + TDuration AddressCacheExpirationTimeout; + TDuration TxTimeout; + TDuration PingTimeout; + TDuration PingInterval; + + bool UseAsyncTxPinger; + int AsyncHttpClientThreads; + int AsyncTxPingerPoolThreads; + + // How often should we poll for lock state + TDuration WaitLockPollInterval; + + TDuration RetryInterval; + TDuration ChunkErrorsRetryInterval; + + TDuration RateLimitExceededRetryInterval; + TDuration StartOperationRetryInterval; + + int RetryCount; + int ReadRetryCount; + int StartOperationRetryCount; + + /// @brief Period for checking status of running operation. + TDuration OperationTrackerPollPeriod = TDuration::Seconds(5); + + TString RemoteTempFilesDirectory; + TString RemoteTempTablesDirectory; + + // + // Infer schemas for nonexstent tables from typed rows (e.g. protobuf) + // when writing from operation or client writer. + // This options can be overriden in TOperationOptions and TTableWriterOptions. + bool InferTableSchema; + + bool UseClientProtobuf; + ENodeReaderFormat NodeReaderFormat; + bool ProtobufFormatWithDescriptors; + + int ConnectionPoolSize; + + /// Defines replication factor that is used for files that are uploaded to YT + /// to use them in operations. + int FileCacheReplicationFactor = 10; + + /// @brief Used when waiting for other process which uploads the same file to the file cache. + /// + /// If CacheUploadDeduplicationMode is not Disabled, current process can wait for some other + /// process which is uploading the same file. This value is proportional to the timeout of waiting, + /// actual timeout computes as follows: fileSizeGb * CacheLockTimeoutPerGb. + /// Default timeout assumes that host has uploading speed equal to 20 Mb/s. + /// If timeout was reached, the file will be uploaded by current process without any other waits. + TDuration CacheLockTimeoutPerGb; + + /// @brief Used to prevent concurrent uploading of the same file to the file cache. + /// NB: Each mode affects only users with the same mode enabled. + EUploadDeduplicationMode CacheUploadDeduplicationMode; + + bool MountSandboxInTmpfs; + + /// @brief Set upload options (e.g.) for files created by library. + /// + /// Path itself is always ignored but path options (e.g. `BypassArtifactCache`) are used when uploading system files: + /// cppbinary, job state, etc + TRichYPath ApiFilePathOptions; + + // Testing options, should never be used in user programs. + bool UseAbortableResponse = false; + bool EnableDebugMetrics = false; + + // + // There is optimization used with local YT that enables to skip binary upload and use real binary path. + // When EnableLocalModeOptimization is set to false this optimization is completely disabled. + bool EnableLocalModeOptimization = true; + + // + // If you want see stderr even if you jobs not failed set this true. + bool WriteStderrSuccessfulJobs = false; + + // + // This configuration is useful for debug. + // If set to ETraceHttpRequestsMode::Error library will dump all http error requests. + // If set to ETraceHttpRequestsMode::All library will dump all http requests. + // All tracing occurres as DEBUG level logging. + ETraceHttpRequestsMode TraceHttpRequestsMode = ETraceHttpRequestsMode::Never; + + TString SkynetApiHost; + + // Sets SO_PRIORITY option on the socket + TMaybe<int> SocketPriority; + + // Framing settings + // (cf. https://yt.yandex-team.ru/docs/description/proxy/http_proxy_reference#framing). + THashSet<TString> CommandsWithFraming; + + static bool GetBool(const char* var, bool defaultValue = false); + static int GetInt(const char* var, int defaultValue); + static TDuration GetDuration(const char* var, TDuration defaultValue); + static EEncoding GetEncoding(const char* var); + static EUploadDeduplicationMode GetUploadingDeduplicationMode( + const char* var, + EUploadDeduplicationMode defaultValue); + + static void ValidateToken(const TString& token); + static TString LoadTokenFromFile(const TString& tokenPath); + + static TNode LoadJsonSpec(const TString& strSpec); + + static TRichYPath LoadApiFilePathOptions(const TString& ysonMap); + + void LoadToken(); + void LoadSpec(); + void LoadTimings(); + + void Reset(); + + TConfig(); + + static TConfigPtr Get(); +}; + +//////////////////////////////////////////////////////////////////////////////// + +struct TProcessState +{ + TString FqdnHostName; + TString UserName; + TVector<TString> CommandLine; + + // Command line with everything that looks like tokens censored. + TVector<TString> CensoredCommandLine; + int Pid; + TString ClientVersion; + + TProcessState(); + + void SetCommandLine(int argc, const char* argv[]); + + static TProcessState* Get(); +}; + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT |