diff options
author | orlovorlov <orlovorlov@yandex-team.com> | 2024-03-18 13:15:49 +0300 |
---|---|---|
committer | orlovorlov <orlovorlov@yandex-team.com> | 2024-03-18 15:02:17 +0300 |
commit | 29127cb44e4ff8ecde3924e1af7bb4213fda2a9f (patch) | |
tree | 1fb2dad483a4877ed5110e9df388e0380f661f5b /yt | |
parent | d7c2d188d2464ce858465870a39220c4dfbb5509 (diff) | |
download | ydb-29127cb44e4ff8ecde3924e1af7bb4213fda2a9f.tar.gz |
YT-21141 Avoid content deduplication for files under 10MB
febae4e49cd0f600bf21616025f210e99235cfdc
Diffstat (limited to 'yt')
-rw-r--r-- | yt/cpp/mapreduce/client/operation_preparer.cpp | 11 | ||||
-rw-r--r-- | yt/cpp/mapreduce/client/operation_preparer.h | 2 | ||||
-rw-r--r-- | yt/cpp/mapreduce/interface/config.cpp | 1 | ||||
-rw-r--r-- | yt/cpp/mapreduce/interface/config.h | 3 |
4 files changed, 12 insertions, 5 deletions
diff --git a/yt/cpp/mapreduce/client/operation_preparer.cpp b/yt/cpp/mapreduce/client/operation_preparer.cpp index eb30eed000..ec822e607e 100644 --- a/yt/cpp/mapreduce/client/operation_preparer.cpp +++ b/yt/cpp/mapreduce/client/operation_preparer.cpp @@ -315,7 +315,7 @@ public: return FileName_; } - ui64 GetDataSize() const override + i64 GetDataSize() const override { return GetFileLength(FileName_); } @@ -353,9 +353,9 @@ public: return Description_; } - ui64 GetDataSize() const override + i64 GetDataSize() const override { - return Data_.size(); + return std::ssize(Data_); } private: @@ -694,7 +694,10 @@ TString TJobPreparer::UploadToCacheUsingApi(const IItemToUpload& itemToUpload) c itemToUpload.GetDescription(), OperationPreparer_.GetPreparationId()); - if (OperationPreparer_.GetContext().Config->CacheUploadDeduplicationMode != EUploadDeduplicationMode::Disabled) { + const auto& config = OperationPreparer_.GetContext().Config; + + if (config->CacheUploadDeduplicationMode != EUploadDeduplicationMode::Disabled && + itemToUpload.GetDataSize() > config->CacheUploadDeduplicationThreshold) { if (auto path = TryUploadWithDeduplication(itemToUpload)) { return *path; } diff --git a/yt/cpp/mapreduce/client/operation_preparer.h b/yt/cpp/mapreduce/client/operation_preparer.h index e1b9d59b1d..67eb28b31a 100644 --- a/yt/cpp/mapreduce/client/operation_preparer.h +++ b/yt/cpp/mapreduce/client/operation_preparer.h @@ -56,7 +56,7 @@ struct IItemToUpload virtual TString CalculateMD5() const = 0; virtual THolder<IInputStream> CreateInputStream() const = 0; virtual TString GetDescription() const = 0; - virtual ui64 GetDataSize() const = 0; + virtual i64 GetDataSize() const = 0; }; //////////////////////////////////////////////////////////////////////////////// diff --git a/yt/cpp/mapreduce/interface/config.cpp b/yt/cpp/mapreduce/interface/config.cpp index 407c213226..c012386cc3 100644 --- a/yt/cpp/mapreduce/interface/config.cpp +++ b/yt/cpp/mapreduce/interface/config.cpp @@ -212,6 +212,7 @@ void TConfig::Reset() LoadTimings(); CacheUploadDeduplicationMode = GetUploadingDeduplicationMode("YT_UPLOAD_DEDUPLICATION", EUploadDeduplicationMode::Host); + CacheUploadDeduplicationThreshold = 10_MB; RetryCount = Max(GetInt("YT_RETRY_COUNT", 10), 1); ReadRetryCount = Max(GetInt("YT_READ_RETRY_COUNT", 30), 1); diff --git a/yt/cpp/mapreduce/interface/config.h b/yt/cpp/mapreduce/interface/config.h index b6d34f8895..de5f5ab7fc 100644 --- a/yt/cpp/mapreduce/interface/config.h +++ b/yt/cpp/mapreduce/interface/config.h @@ -155,6 +155,9 @@ struct TConfig /// NB: Each mode affects only users with the same mode enabled. EUploadDeduplicationMode CacheUploadDeduplicationMode; + // @brief Minimum byte size for files to undergo deduplication at upload + i64 CacheUploadDeduplicationThreshold; + bool MountSandboxInTmpfs; /// @brief Set upload options (e.g.) for files created by library. |