diff options
author | sabdenovch <sabdenovch@yandex-team.com> | 2024-11-15 23:35:53 +0300 |
---|---|---|
committer | sabdenovch <sabdenovch@yandex-team.com> | 2024-11-15 23:50:19 +0300 |
commit | 04b5229959af1b72ccc722dc45c01ae1490fe5bf (patch) | |
tree | 9c38447439e8e08f3b336d2aca89b67c576f111a | |
parent | b7294c0951553e0e13beb4abe957b93b0498a54c (diff) | |
download | ydb-04b5229959af1b72ccc722dc45c01ae1490fe5bf.tar.gz |
YT-23439: Fix block size estimation in scan writer
commit_hash:6bc02d261d376710e0d08f9372def30d3acfa949
-rw-r--r-- | yt/yt/client/table_client/config.cpp | 2 | ||||
-rw-r--r-- | yt/yt/client/table_client/config.h | 1 | ||||
-rw-r--r-- | yt/yt/core/misc/bit_packed_unsigned_vector-inl.h | 2 | ||||
-rw-r--r-- | yt/yt/core/misc/bit_packed_unsigned_vector.cpp | 20 | ||||
-rw-r--r-- | yt/yt/core/misc/bit_packed_unsigned_vector.h | 2 | ||||
-rw-r--r-- | yt/yt/library/column_converters/string_column_converter.cpp | 8 |
6 files changed, 18 insertions, 17 deletions
diff --git a/yt/yt/client/table_client/config.cpp b/yt/yt/client/table_client/config.cpp index 49898440e2..6bfd4b410c 100644 --- a/yt/yt/client/table_client/config.cpp +++ b/yt/yt/client/table_client/config.cpp @@ -464,6 +464,8 @@ void TChunkWriterOptions::Register(TRegistrar registrar) .Default(false); registrar.Parameter("enable_column_meta_in_chunk_meta", &TThis::EnableColumnMetaInChunkMeta) .Default(true); + registrar.Parameter("consider_min_row_range_data_weight", &TThis::ConsiderMinRowRangeDataWeight) + .Default(true); registrar.Parameter("schema_modification", &TThis::SchemaModification) .Default(ETableSchemaModification::None); diff --git a/yt/yt/client/table_client/config.h b/yt/yt/client/table_client/config.h index a8accf974b..2f91bcd032 100644 --- a/yt/yt/client/table_client/config.h +++ b/yt/yt/client/table_client/config.h @@ -429,6 +429,7 @@ public: bool EnableRowCountInColumnarStatistics; bool EnableSegmentMetaInBlocks; bool EnableColumnMetaInChunkMeta; + bool ConsiderMinRowRangeDataWeight; NYTree::INodePtr CastAnyToCompositeNode; diff --git a/yt/yt/core/misc/bit_packed_unsigned_vector-inl.h b/yt/yt/core/misc/bit_packed_unsigned_vector-inl.h index d1abbbbd0a..3dbb3ce3e5 100644 --- a/yt/yt/core/misc/bit_packed_unsigned_vector-inl.h +++ b/yt/yt/core/misc/bit_packed_unsigned_vector-inl.h @@ -25,7 +25,7 @@ inline size_t CompressedUnsignedVectorSizeInWords(ui64 maxValue, size_t count) inline size_t CompressedUnsignedVectorSizeInBytes(ui64 maxValue, size_t count) { - static size_t wordSize = sizeof(ui64); + static constexpr size_t wordSize = sizeof(ui64); return CompressedUnsignedVectorSizeInWords(maxValue, count) * wordSize; } diff --git a/yt/yt/core/misc/bit_packed_unsigned_vector.cpp b/yt/yt/core/misc/bit_packed_unsigned_vector.cpp index 05ff7ed88f..bec799a9f1 100644 --- a/yt/yt/core/misc/bit_packed_unsigned_vector.cpp +++ b/yt/yt/core/misc/bit_packed_unsigned_vector.cpp @@ -8,24 +8,26 @@ namespace NYT { //////////////////////////////////////////////////////////////////////////////// -void PrepareDiffFromExpected(std::vector<ui32>* values, ui32* expected, ui32* maxDiff) +std::pair<ui32, ui32> PrepareDiffFromExpected(std::vector<ui32>* values) { + ui32 expected = 0; + ui32 maxDiff = 0; + if (values->empty()) { - *expected = 0; - *maxDiff = 0; - return; + return {expected, maxDiff}; } - *expected = DivRound<int>(values->back(), values->size()); + expected = DivRound<int>(values->back(), values->size()); - *maxDiff = 0; i64 expectedValue = 0; for (int i = 0; i < std::ssize(*values); ++i) { - expectedValue += *expected; - i32 diff = values->at(i) - expectedValue; + expectedValue += expected; + i32 diff = (*values)[i] - expectedValue; (*values)[i] = ZigZagEncode32(diff); - *maxDiff = std::max(*maxDiff, (*values)[i]); + maxDiff = std::max(maxDiff, (*values)[i]); } + + return {expected, maxDiff}; } //////////////////////////////////////////////////////////////////////////////// diff --git a/yt/yt/core/misc/bit_packed_unsigned_vector.h b/yt/yt/core/misc/bit_packed_unsigned_vector.h index 5a12aaba56..4f95561267 100644 --- a/yt/yt/core/misc/bit_packed_unsigned_vector.h +++ b/yt/yt/core/misc/bit_packed_unsigned_vector.h @@ -59,7 +59,7 @@ private: //////////////////////////////////////////////////////////////////////////////// -void PrepareDiffFromExpected(std::vector<ui32>* values, ui32* expected, ui32* maxDiff); +std::pair<ui32, ui32> PrepareDiffFromExpected(std::vector<ui32>* values); //////////////////////////////////////////////////////////////////////////////// diff --git a/yt/yt/library/column_converters/string_column_converter.cpp b/yt/yt/library/column_converters/string_column_converter.cpp index 60b4e28337..797f64f4ee 100644 --- a/yt/yt/library/column_converters/string_column_converter.cpp +++ b/yt/yt/library/column_converters/string_column_converter.cpp @@ -128,9 +128,7 @@ private: auto offsets = GetDirectDenseOffsets(); // Save offsets as diff from expected. - ui32 expectedLength; - ui32 maxDiff; - PrepareDiffFromExpected(&offsets, &expectedLength, &maxDiff); + auto [expectedLength, maxDiff] = PrepareDiffFromExpected(&offsets); auto directDataSize = DirectBuffer_->GetSize(); auto directData = DirectBuffer_->Finish(); @@ -213,9 +211,7 @@ private: auto idsRef = TSharedRef::MakeCopy<TConverterTag>(TRef(ids.data(), sizeof(ui32) * ids.size())); // 2. Dictionary offsets. - ui32 expectedLength; - ui32 maxDiff; - PrepareDiffFromExpected(&dictionaryOffsets, &expectedLength, &maxDiff); + auto [expectedLength, maxDiff] = PrepareDiffFromExpected(&dictionaryOffsets); auto dictionaryOffsetsRef = TSharedRef::MakeCopy<TConverterTag>(TRef(dictionaryOffsets.data(), sizeof(ui32) * dictionaryOffsets.size())); auto primaryColumn = std::make_shared<TBatchColumn>(); |