diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-02-02 09:29:53 +0300 |
---|---|---|
committer | Alexander Smirnov <alex@ydb.tech> | 2024-02-09 19:17:12 +0300 |
commit | 0460c44a8d10b6286eeafba80720664bcd5e9ee1 (patch) | |
tree | 60dc85342ea90b10b17d257c55d840d193793cb1 /yt | |
parent | 5bb6b881894a01666f5143835c8fc02264ff008e (diff) | |
download | ydb-0460c44a8d10b6286eeafba80720664bcd5e9ee1.tar.gz |
Intermediate changes
Diffstat (limited to 'yt')
-rw-r--r-- | yt/yt/client/chunk_client/data_statistics.cpp | 13 | ||||
-rw-r--r-- | yt/yt/client/chunk_client/data_statistics.h | 3 | ||||
-rw-r--r-- | yt/yt/client/table_client/config.cpp | 10 | ||||
-rw-r--r-- | yt/yt/client/table_client/config.h | 9 | ||||
-rw-r--r-- | yt/yt/client/table_client/public.h | 7 | ||||
-rw-r--r-- | yt/yt/client/table_client/row_base.h | 6 | ||||
-rw-r--r-- | yt/yt_proto/yt/client/chunk_client/proto/chunk_meta.proto | 4 | ||||
-rw-r--r-- | yt/yt_proto/yt/client/table_chunk_format/proto/chunk_meta.proto | 1 |
8 files changed, 47 insertions, 6 deletions
diff --git a/yt/yt/client/chunk_client/data_statistics.cpp b/yt/yt/client/chunk_client/data_statistics.cpp index 1dbc2ad520..ce4ca1cd84 100644 --- a/yt/yt/client/chunk_client/data_statistics.cpp +++ b/yt/yt/client/chunk_client/data_statistics.cpp @@ -188,11 +188,19 @@ TCodecStatistics& TCodecStatistics::Append(const std::pair<ECodec, TDuration>& c return *this; } +TCodecStatistics& TCodecStatistics::AppendToValueDictionaryCompression(TDuration duration) +{ + ValueDictionaryCompressionDuration_ += duration; + TotalDuration_ += duration; + return *this; +} + TCodecStatistics& TCodecStatistics::operator+=(const TCodecStatistics& other) { for (const auto& pair : other.CodecToDuration_) { Append(pair); } + AppendToValueDictionaryCompression(other.ValueDictionaryCompressionDuration_); return *this; } @@ -204,6 +212,11 @@ TDuration TCodecStatistics::GetTotalDuration() const void FormatValue(TStringBuilderBase* builder, const TCodecStatistics& statistics, TStringBuf /* spec */) { FormatKeyValueRange(builder, statistics.CodecToDuration(), TDefaultFormatter()); + if (statistics.ValueDictionaryCompressionDuration()) { + builder->AppendString(", "); + builder->AppendFormat("ValueDictionaryCompressionDuration: %v", + statistics.ValueDictionaryCompressionDuration()); + } } TString ToString(const TCodecStatistics& statistics) diff --git a/yt/yt/client/chunk_client/data_statistics.h b/yt/yt/client/chunk_client/data_statistics.h index 880a510c59..0a8b918627 100644 --- a/yt/yt/client/chunk_client/data_statistics.h +++ b/yt/yt/client/chunk_client/data_statistics.h @@ -52,8 +52,11 @@ public: 1>; DEFINE_BYREF_RO_PROPERTY(TCodecToDuration, CodecToDuration); + DEFINE_BYREF_RO_PROPERTY(TDuration, ValueDictionaryCompressionDuration); + public: TCodecStatistics& Append(const TCodecDuration& codecTime); + TCodecStatistics& AppendToValueDictionaryCompression(TDuration duration); TCodecStatistics& operator+=(const TCodecStatistics& other); diff --git a/yt/yt/client/table_client/config.cpp b/yt/yt/client/table_client/config.cpp index a4de89b4f1..f11756c596 100644 --- a/yt/yt/client/table_client/config.cpp +++ b/yt/yt/client/table_client/config.cpp @@ -276,6 +276,16 @@ void TDictionaryCompressionConfig::Register(TRegistrar registrar) EDictionaryCompressionPolicy::FreshChunkFirst, }); + registrar.Parameter("policy_probation_samples_size", &TThis::PolicyProbationSamplesSize) + .GreaterThan(0) + .Default(12_MB); + registrar.Parameter("max_acceptable_compression_ratio", &TThis::MaxAcceptableCompressionRatio) + .Default(0.7) + .InRange(0, 1); + registrar.Parameter("max_decompression_blob_size", &TThis::MaxDecompressionBlobSize) + .GreaterThan(0) + .Default(64_MB); + registrar.Postprocessor([] (TThis* config) { if (config->DesiredSampleCount > config->MaxProcessedSampleCount) { THROW_ERROR_EXCEPTION("\"desired_sample_count\" cannot be greater than \"max_processed_sample_count\""); diff --git a/yt/yt/client/table_client/config.h b/yt/yt/client/table_client/config.h index 952dd2a94e..cca367f23b 100644 --- a/yt/yt/client/table_client/config.h +++ b/yt/yt/client/table_client/config.h @@ -249,6 +249,15 @@ public: //! Upon each chunk compression will independently decide which dictionary fits best. THashSet<EDictionaryCompressionPolicy> AppliedPolicies; + //! Upon each chunk compression will first accumulate samples of that weight + //! before deciding dictionary of which policy fits the best. + i64 PolicyProbationSamplesSize; + //! Upper limit on acceptable compression ratio. No chunk compression is performed if this limit is exceeded. + double MaxAcceptableCompressionRatio; + + //! Upper limit on content size of a batch that can be decompressed within a single iteration. + i64 MaxDecompressionBlobSize; + REGISTER_YSON_STRUCT(TDictionaryCompressionConfig); static void Register(TRegistrar registrar); diff --git a/yt/yt/client/table_client/public.h b/yt/yt/client/table_client/public.h index eeb18fd506..592cbcacd3 100644 --- a/yt/yt/client/table_client/public.h +++ b/yt/yt/client/table_client/public.h @@ -130,9 +130,10 @@ constexpr int TypicalHunkColumnCount = 8; //////////////////////////////////////////////////////////////////////////////// DEFINE_ENUM_WITH_UNDERLYING_TYPE(EHunkValueTag, ui8, - ((Inline) (0)) - ((LocalRef) (1)) - ((GlobalRef)(2)) + ((Inline) (0)) + ((LocalRef) (1)) + ((GlobalRef) (2)) + ((CompressedInline) (3)) ); // Do not change these values since they are stored in the master snapshot. diff --git a/yt/yt/client/table_client/row_base.h b/yt/yt/client/table_client/row_base.h index b353edd1cf..126806aaad 100644 --- a/yt/yt/client/table_client/row_base.h +++ b/yt/yt/client/table_client/row_base.h @@ -33,9 +33,9 @@ static_assert( "Incorrect type order."); DEFINE_BIT_ENUM_WITH_UNDERLYING_TYPE(EValueFlags, ui8, - ((None) (0x0000)) - ((Aggregate) (0x0001)) - ((Hunk) (0x0002)) + ((None) (0x00)) + ((Aggregate) (0x01)) + ((Hunk) (0x02)) ); DEFINE_ENUM_WITH_UNDERLYING_TYPE(ESimpleLogicalValueType, ui32, diff --git a/yt/yt_proto/yt/client/chunk_client/proto/chunk_meta.proto b/yt/yt_proto/yt/client/chunk_client/proto/chunk_meta.proto index d7e724b4cc..874ec93151 100644 --- a/yt/yt_proto/yt/client/chunk_client/proto/chunk_meta.proto +++ b/yt/yt_proto/yt/client/chunk_client/proto/chunk_meta.proto @@ -1,5 +1,6 @@ package NYT.NChunkClient.NProto; +import "yt_proto/yt/core/misc/proto/guid.proto"; import "yt_proto/yt/core/misc/proto/protobuf_helpers.proto"; option go_package = "a.yandex-team.ru/yt/go/proto/client/chunk_client"; @@ -91,6 +92,9 @@ message TMiscExt // If present, chunk is a compression dictionary of respective policy. optional int32 dictionary_compression_policy = 24; // EDictionaryCompressionPolicy + // If present, chunk values are comressed with dictionary of respective id. + optional NYT.NProto.TGuid compression_dictionary_id = 25; + reserved 20; } diff --git a/yt/yt_proto/yt/client/table_chunk_format/proto/chunk_meta.proto b/yt/yt_proto/yt/client/table_chunk_format/proto/chunk_meta.proto index 875f93c819..6518287f9d 100644 --- a/yt/yt_proto/yt/client/table_chunk_format/proto/chunk_meta.proto +++ b/yt/yt_proto/yt/client/table_chunk_format/proto/chunk_meta.proto @@ -294,6 +294,7 @@ message THunkChunkRef optional int64 hunk_count = 2; optional int64 total_hunk_length = 3; optional int32 erasure_codec = 4; // NErasure::ECodec + optional NYT.NProto.TGuid compression_dictionary_id = 5; } message THunkChunkRefsExt |