diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2023-09-25 17:03:24 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2023-09-25 17:36:42 +0300 |
commit | dabbc4dc060aa8a91a8bf6eb8866de4123e410de (patch) | |
tree | 7af3f801a962ba972cbd4ff71731e38c4f24599c | |
parent | f35fa989ed187792c3224b72ce742f141c25fd1a (diff) | |
download | ydb-dabbc4dc060aa8a91a8bf6eb8866de4123e410de.tar.gz |
Intermediate changes
-rw-r--r-- | yt/yt/library/column_converters/string_column_converter.cpp | 28 |
1 files changed, 22 insertions, 6 deletions
diff --git a/yt/yt/library/column_converters/string_column_converter.cpp b/yt/yt/library/column_converters/string_column_converter.cpp index c8a4354c47d..59a77592eed 100644 --- a/yt/yt/library/column_converters/string_column_converter.cpp +++ b/yt/yt/library/column_converters/string_column_converter.cpp @@ -7,6 +7,8 @@ #include <yt/yt/core/misc/bit_packed_unsigned_vector.h> +#include <library/cpp/yt/memory/chunked_output_stream.h> + #include <library/cpp/yt/string/string_builder.h> namespace NYT::NColumnConverters { @@ -75,15 +77,18 @@ private: std::vector<TStringBuf> Values_; THashMap<TStringBuf, ui32> Dictionary_; - TStringBuilder DirectBuffer_; + std::unique_ptr<TChunkedOutputStream> DirectBuffer_; void Reset() { AllStringsSize_ = 0; RowCount_ = 0; DictionaryByteSize_ = 0; + DirectBuffer_ = std::make_unique<TChunkedOutputStream>( + GetRefCountedTypeCookie<TConverterTag>(), + 256_KB, + 1_MB); - DirectBuffer_.Reset(); Values_.clear(); Dictionary_.clear(); } @@ -122,10 +127,21 @@ private: ui32 maxDiff; PrepareDiffFromExpected(&offsets, &expectedLength, &maxDiff); - auto directData = DirectBuffer_.GetBuffer(); + auto directDataSize = DirectBuffer_->GetSize(); + auto directData = DirectBuffer_->Finish(); auto offsetsRef = TSharedRef::MakeCopy<TConverterTag>(TRef(offsets.data(), sizeof(ui32) * offsets.size())); - auto directDataPtr = TSharedRef::MakeCopy<TConverterTag>(TRef(directData.data(), directData.size())); + + auto directDataPtr = TSharedMutableRef::Allocate<TConverterTag>(directDataSize, {.InitializeStorage = false}); + ui32 directOffset = 0; + for (auto directDataChunk : directData) { + std::memcpy( + directDataPtr.Begin() + directOffset, + directDataChunk.Begin(), + directDataChunk.Size()); + directOffset += directDataChunk.Size(); + } + auto column = std::make_shared<TBatchColumn>(); FillColumnarStringValues( @@ -310,7 +326,7 @@ private: ? GetYsonSize(unversionedValue) : static_cast<i64>(unversionedValue.Length); - char* buffer = DirectBuffer_.Preallocate(valueCapacity); + char* buffer = DirectBuffer_->Preallocate(valueCapacity); if (!buffer) { // This means, that we reserved nothing, because all strings are either null or empty. // To distinguish between null and empty, we set preallocated pointer to special value. @@ -335,7 +351,7 @@ private: YT_VERIFY(value.size() <= valueCapacity); - DirectBuffer_.Advance(value.size()); + DirectBuffer_->Advance(value.size()); if (Dictionary_.emplace(value, Dictionary_.size() + 1).second) { DictionaryByteSize_ += value.size(); |