aboutsummaryrefslogtreecommitdiffstats
path: root/yt
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2024-02-02 09:29:53 +0300
committerAlexander Smirnov <alex@ydb.tech>2024-02-09 19:17:12 +0300
commit0460c44a8d10b6286eeafba80720664bcd5e9ee1 (patch)
tree60dc85342ea90b10b17d257c55d840d193793cb1 /yt
parent5bb6b881894a01666f5143835c8fc02264ff008e (diff)
downloadydb-0460c44a8d10b6286eeafba80720664bcd5e9ee1.tar.gz
Intermediate changes
Diffstat (limited to 'yt')
-rw-r--r--yt/yt/client/chunk_client/data_statistics.cpp13
-rw-r--r--yt/yt/client/chunk_client/data_statistics.h3
-rw-r--r--yt/yt/client/table_client/config.cpp10
-rw-r--r--yt/yt/client/table_client/config.h9
-rw-r--r--yt/yt/client/table_client/public.h7
-rw-r--r--yt/yt/client/table_client/row_base.h6
-rw-r--r--yt/yt_proto/yt/client/chunk_client/proto/chunk_meta.proto4
-rw-r--r--yt/yt_proto/yt/client/table_chunk_format/proto/chunk_meta.proto1
8 files changed, 47 insertions, 6 deletions
diff --git a/yt/yt/client/chunk_client/data_statistics.cpp b/yt/yt/client/chunk_client/data_statistics.cpp
index 1dbc2ad520..ce4ca1cd84 100644
--- a/yt/yt/client/chunk_client/data_statistics.cpp
+++ b/yt/yt/client/chunk_client/data_statistics.cpp
@@ -188,11 +188,19 @@ TCodecStatistics& TCodecStatistics::Append(const std::pair<ECodec, TDuration>& c
return *this;
}
+TCodecStatistics& TCodecStatistics::AppendToValueDictionaryCompression(TDuration duration)
+{
+ ValueDictionaryCompressionDuration_ += duration;
+ TotalDuration_ += duration;
+ return *this;
+}
+
TCodecStatistics& TCodecStatistics::operator+=(const TCodecStatistics& other)
{
for (const auto& pair : other.CodecToDuration_) {
Append(pair);
}
+ AppendToValueDictionaryCompression(other.ValueDictionaryCompressionDuration_);
return *this;
}
@@ -204,6 +212,11 @@ TDuration TCodecStatistics::GetTotalDuration() const
void FormatValue(TStringBuilderBase* builder, const TCodecStatistics& statistics, TStringBuf /* spec */)
{
FormatKeyValueRange(builder, statistics.CodecToDuration(), TDefaultFormatter());
+ if (statistics.ValueDictionaryCompressionDuration()) {
+ builder->AppendString(", ");
+ builder->AppendFormat("ValueDictionaryCompressionDuration: %v",
+ statistics.ValueDictionaryCompressionDuration());
+ }
}
TString ToString(const TCodecStatistics& statistics)
diff --git a/yt/yt/client/chunk_client/data_statistics.h b/yt/yt/client/chunk_client/data_statistics.h
index 880a510c59..0a8b918627 100644
--- a/yt/yt/client/chunk_client/data_statistics.h
+++ b/yt/yt/client/chunk_client/data_statistics.h
@@ -52,8 +52,11 @@ public:
1>;
DEFINE_BYREF_RO_PROPERTY(TCodecToDuration, CodecToDuration);
+ DEFINE_BYREF_RO_PROPERTY(TDuration, ValueDictionaryCompressionDuration);
+
public:
TCodecStatistics& Append(const TCodecDuration& codecTime);
+ TCodecStatistics& AppendToValueDictionaryCompression(TDuration duration);
TCodecStatistics& operator+=(const TCodecStatistics& other);
diff --git a/yt/yt/client/table_client/config.cpp b/yt/yt/client/table_client/config.cpp
index a4de89b4f1..f11756c596 100644
--- a/yt/yt/client/table_client/config.cpp
+++ b/yt/yt/client/table_client/config.cpp
@@ -276,6 +276,16 @@ void TDictionaryCompressionConfig::Register(TRegistrar registrar)
EDictionaryCompressionPolicy::FreshChunkFirst,
});
+ registrar.Parameter("policy_probation_samples_size", &TThis::PolicyProbationSamplesSize)
+ .GreaterThan(0)
+ .Default(12_MB);
+ registrar.Parameter("max_acceptable_compression_ratio", &TThis::MaxAcceptableCompressionRatio)
+ .Default(0.7)
+ .InRange(0, 1);
+ registrar.Parameter("max_decompression_blob_size", &TThis::MaxDecompressionBlobSize)
+ .GreaterThan(0)
+ .Default(64_MB);
+
registrar.Postprocessor([] (TThis* config) {
if (config->DesiredSampleCount > config->MaxProcessedSampleCount) {
THROW_ERROR_EXCEPTION("\"desired_sample_count\" cannot be greater than \"max_processed_sample_count\"");
diff --git a/yt/yt/client/table_client/config.h b/yt/yt/client/table_client/config.h
index 952dd2a94e..cca367f23b 100644
--- a/yt/yt/client/table_client/config.h
+++ b/yt/yt/client/table_client/config.h
@@ -249,6 +249,15 @@ public:
//! Upon each chunk compression will independently decide which dictionary fits best.
THashSet<EDictionaryCompressionPolicy> AppliedPolicies;
+ //! Upon each chunk compression will first accumulate samples of that weight
+ //! before deciding dictionary of which policy fits the best.
+ i64 PolicyProbationSamplesSize;
+ //! Upper limit on acceptable compression ratio. No chunk compression is performed if this limit is exceeded.
+ double MaxAcceptableCompressionRatio;
+
+ //! Upper limit on content size of a batch that can be decompressed within a single iteration.
+ i64 MaxDecompressionBlobSize;
+
REGISTER_YSON_STRUCT(TDictionaryCompressionConfig);
static void Register(TRegistrar registrar);
diff --git a/yt/yt/client/table_client/public.h b/yt/yt/client/table_client/public.h
index eeb18fd506..592cbcacd3 100644
--- a/yt/yt/client/table_client/public.h
+++ b/yt/yt/client/table_client/public.h
@@ -130,9 +130,10 @@ constexpr int TypicalHunkColumnCount = 8;
////////////////////////////////////////////////////////////////////////////////
DEFINE_ENUM_WITH_UNDERLYING_TYPE(EHunkValueTag, ui8,
- ((Inline) (0))
- ((LocalRef) (1))
- ((GlobalRef)(2))
+ ((Inline) (0))
+ ((LocalRef) (1))
+ ((GlobalRef) (2))
+ ((CompressedInline) (3))
);
// Do not change these values since they are stored in the master snapshot.
diff --git a/yt/yt/client/table_client/row_base.h b/yt/yt/client/table_client/row_base.h
index b353edd1cf..126806aaad 100644
--- a/yt/yt/client/table_client/row_base.h
+++ b/yt/yt/client/table_client/row_base.h
@@ -33,9 +33,9 @@ static_assert(
"Incorrect type order.");
DEFINE_BIT_ENUM_WITH_UNDERLYING_TYPE(EValueFlags, ui8,
- ((None) (0x0000))
- ((Aggregate) (0x0001))
- ((Hunk) (0x0002))
+ ((None) (0x00))
+ ((Aggregate) (0x01))
+ ((Hunk) (0x02))
);
DEFINE_ENUM_WITH_UNDERLYING_TYPE(ESimpleLogicalValueType, ui32,
diff --git a/yt/yt_proto/yt/client/chunk_client/proto/chunk_meta.proto b/yt/yt_proto/yt/client/chunk_client/proto/chunk_meta.proto
index d7e724b4cc..874ec93151 100644
--- a/yt/yt_proto/yt/client/chunk_client/proto/chunk_meta.proto
+++ b/yt/yt_proto/yt/client/chunk_client/proto/chunk_meta.proto
@@ -1,5 +1,6 @@
package NYT.NChunkClient.NProto;
+import "yt_proto/yt/core/misc/proto/guid.proto";
import "yt_proto/yt/core/misc/proto/protobuf_helpers.proto";
option go_package = "a.yandex-team.ru/yt/go/proto/client/chunk_client";
@@ -91,6 +92,9 @@ message TMiscExt
// If present, chunk is a compression dictionary of respective policy.
optional int32 dictionary_compression_policy = 24; // EDictionaryCompressionPolicy
+ // If present, chunk values are comressed with dictionary of respective id.
+ optional NYT.NProto.TGuid compression_dictionary_id = 25;
+
reserved 20;
}
diff --git a/yt/yt_proto/yt/client/table_chunk_format/proto/chunk_meta.proto b/yt/yt_proto/yt/client/table_chunk_format/proto/chunk_meta.proto
index 875f93c819..6518287f9d 100644
--- a/yt/yt_proto/yt/client/table_chunk_format/proto/chunk_meta.proto
+++ b/yt/yt_proto/yt/client/table_chunk_format/proto/chunk_meta.proto
@@ -294,6 +294,7 @@ message THunkChunkRef
optional int64 hunk_count = 2;
optional int64 total_hunk_length = 3;
optional int32 erasure_codec = 4; // NErasure::ECodec
+ optional NYT.NProto.TGuid compression_dictionary_id = 5;
}
message THunkChunkRefsExt