aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsabdenovch <sabdenovch@yandex-team.com>2024-11-15 23:35:53 +0300
committersabdenovch <sabdenovch@yandex-team.com>2024-11-15 23:50:19 +0300
commit04b5229959af1b72ccc722dc45c01ae1490fe5bf (patch)
tree9c38447439e8e08f3b336d2aca89b67c576f111a
parentb7294c0951553e0e13beb4abe957b93b0498a54c (diff)
downloadydb-04b5229959af1b72ccc722dc45c01ae1490fe5bf.tar.gz
YT-23439: Fix block size estimation in scan writer
commit_hash:6bc02d261d376710e0d08f9372def30d3acfa949
-rw-r--r--yt/yt/client/table_client/config.cpp2
-rw-r--r--yt/yt/client/table_client/config.h1
-rw-r--r--yt/yt/core/misc/bit_packed_unsigned_vector-inl.h2
-rw-r--r--yt/yt/core/misc/bit_packed_unsigned_vector.cpp20
-rw-r--r--yt/yt/core/misc/bit_packed_unsigned_vector.h2
-rw-r--r--yt/yt/library/column_converters/string_column_converter.cpp8
6 files changed, 18 insertions, 17 deletions
diff --git a/yt/yt/client/table_client/config.cpp b/yt/yt/client/table_client/config.cpp
index 49898440e2..6bfd4b410c 100644
--- a/yt/yt/client/table_client/config.cpp
+++ b/yt/yt/client/table_client/config.cpp
@@ -464,6 +464,8 @@ void TChunkWriterOptions::Register(TRegistrar registrar)
.Default(false);
registrar.Parameter("enable_column_meta_in_chunk_meta", &TThis::EnableColumnMetaInChunkMeta)
.Default(true);
+ registrar.Parameter("consider_min_row_range_data_weight", &TThis::ConsiderMinRowRangeDataWeight)
+ .Default(true);
registrar.Parameter("schema_modification", &TThis::SchemaModification)
.Default(ETableSchemaModification::None);
diff --git a/yt/yt/client/table_client/config.h b/yt/yt/client/table_client/config.h
index a8accf974b..2f91bcd032 100644
--- a/yt/yt/client/table_client/config.h
+++ b/yt/yt/client/table_client/config.h
@@ -429,6 +429,7 @@ public:
bool EnableRowCountInColumnarStatistics;
bool EnableSegmentMetaInBlocks;
bool EnableColumnMetaInChunkMeta;
+ bool ConsiderMinRowRangeDataWeight;
NYTree::INodePtr CastAnyToCompositeNode;
diff --git a/yt/yt/core/misc/bit_packed_unsigned_vector-inl.h b/yt/yt/core/misc/bit_packed_unsigned_vector-inl.h
index d1abbbbd0a..3dbb3ce3e5 100644
--- a/yt/yt/core/misc/bit_packed_unsigned_vector-inl.h
+++ b/yt/yt/core/misc/bit_packed_unsigned_vector-inl.h
@@ -25,7 +25,7 @@ inline size_t CompressedUnsignedVectorSizeInWords(ui64 maxValue, size_t count)
inline size_t CompressedUnsignedVectorSizeInBytes(ui64 maxValue, size_t count)
{
- static size_t wordSize = sizeof(ui64);
+ static constexpr size_t wordSize = sizeof(ui64);
return CompressedUnsignedVectorSizeInWords(maxValue, count) * wordSize;
}
diff --git a/yt/yt/core/misc/bit_packed_unsigned_vector.cpp b/yt/yt/core/misc/bit_packed_unsigned_vector.cpp
index 05ff7ed88f..bec799a9f1 100644
--- a/yt/yt/core/misc/bit_packed_unsigned_vector.cpp
+++ b/yt/yt/core/misc/bit_packed_unsigned_vector.cpp
@@ -8,24 +8,26 @@ namespace NYT {
////////////////////////////////////////////////////////////////////////////////
-void PrepareDiffFromExpected(std::vector<ui32>* values, ui32* expected, ui32* maxDiff)
+std::pair<ui32, ui32> PrepareDiffFromExpected(std::vector<ui32>* values)
{
+ ui32 expected = 0;
+ ui32 maxDiff = 0;
+
if (values->empty()) {
- *expected = 0;
- *maxDiff = 0;
- return;
+ return {expected, maxDiff};
}
- *expected = DivRound<int>(values->back(), values->size());
+ expected = DivRound<int>(values->back(), values->size());
- *maxDiff = 0;
i64 expectedValue = 0;
for (int i = 0; i < std::ssize(*values); ++i) {
- expectedValue += *expected;
- i32 diff = values->at(i) - expectedValue;
+ expectedValue += expected;
+ i32 diff = (*values)[i] - expectedValue;
(*values)[i] = ZigZagEncode32(diff);
- *maxDiff = std::max(*maxDiff, (*values)[i]);
+ maxDiff = std::max(maxDiff, (*values)[i]);
}
+
+ return {expected, maxDiff};
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/yt/yt/core/misc/bit_packed_unsigned_vector.h b/yt/yt/core/misc/bit_packed_unsigned_vector.h
index 5a12aaba56..4f95561267 100644
--- a/yt/yt/core/misc/bit_packed_unsigned_vector.h
+++ b/yt/yt/core/misc/bit_packed_unsigned_vector.h
@@ -59,7 +59,7 @@ private:
////////////////////////////////////////////////////////////////////////////////
-void PrepareDiffFromExpected(std::vector<ui32>* values, ui32* expected, ui32* maxDiff);
+std::pair<ui32, ui32> PrepareDiffFromExpected(std::vector<ui32>* values);
////////////////////////////////////////////////////////////////////////////////
diff --git a/yt/yt/library/column_converters/string_column_converter.cpp b/yt/yt/library/column_converters/string_column_converter.cpp
index 60b4e28337..797f64f4ee 100644
--- a/yt/yt/library/column_converters/string_column_converter.cpp
+++ b/yt/yt/library/column_converters/string_column_converter.cpp
@@ -128,9 +128,7 @@ private:
auto offsets = GetDirectDenseOffsets();
// Save offsets as diff from expected.
- ui32 expectedLength;
- ui32 maxDiff;
- PrepareDiffFromExpected(&offsets, &expectedLength, &maxDiff);
+ auto [expectedLength, maxDiff] = PrepareDiffFromExpected(&offsets);
auto directDataSize = DirectBuffer_->GetSize();
auto directData = DirectBuffer_->Finish();
@@ -213,9 +211,7 @@ private:
auto idsRef = TSharedRef::MakeCopy<TConverterTag>(TRef(ids.data(), sizeof(ui32) * ids.size()));
// 2. Dictionary offsets.
- ui32 expectedLength;
- ui32 maxDiff;
- PrepareDiffFromExpected(&dictionaryOffsets, &expectedLength, &maxDiff);
+ auto [expectedLength, maxDiff] = PrepareDiffFromExpected(&dictionaryOffsets);
auto dictionaryOffsetsRef = TSharedRef::MakeCopy<TConverterTag>(TRef(dictionaryOffsets.data(), sizeof(ui32) * dictionaryOffsets.size()));
auto primaryColumn = std::make_shared<TBatchColumn>();