diff options
author | ivanmorozov333 <ivanmorozov@ydb.tech> | 2025-03-04 11:15:29 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-03-04 11:15:29 +0300 |
commit | 16444c953bcbe1b53cd0f08d8f707c8267cdf85e (patch) | |
tree | 54e7d0c6d139b730c01d69853202eea67502fc03 | |
parent | 3a5cfede787f6bf6cc63627bf8bd30d9c0cfd4e8 (diff) | |
download | ydb-16444c953bcbe1b53cd0f08d8f707c8267cdf85e.tar.gz |
optimize sub columns iterations (#15271)
3 files changed, 89 insertions, 70 deletions
diff --git a/ydb/core/formats/arrow/accessor/sub_columns/columns_storage.h b/ydb/core/formats/arrow/accessor/sub_columns/columns_storage.h index 4e25d24b14..d1095c7c99 100644 --- a/ydb/core/formats/arrow/accessor/sub_columns/columns_storage.h +++ b/ydb/core/formats/arrow/accessor/sub_columns/columns_storage.h @@ -49,7 +49,7 @@ public: private: ui32 KeyIndex; std::shared_ptr<IChunkedArray> GlobalChunkedArray; - std::shared_ptr<arrow::StringArray> CurrentArrayData; + const arrow::StringArray* CurrentArrayData; std::optional<IChunkedArray::TFullChunkedArrayAddress> FullArrayAddress; std::optional<IChunkedArray::TFullDataAddress> ChunkAddress; ui32 CurrentIndex = 0; @@ -63,7 +63,7 @@ public: const ui32 localIndex = FullArrayAddress->GetAddress().GetLocalIndex(CurrentIndex); ChunkAddress = FullArrayAddress->GetArray()->GetChunk(ChunkAddress, localIndex); AFL_VERIFY(ChunkAddress->GetArray()->type()->id() == arrow::utf8()->id()); - CurrentArrayData = std::static_pointer_cast<arrow::StringArray>(ChunkAddress->GetArray()); + CurrentArrayData = static_cast<const arrow::StringArray*>(ChunkAddress->GetArray().get()); if (FullArrayAddress->GetArray()->GetType() == IChunkedArray::EType::Array) { if (CurrentArrayData->IsNull(localIndex)) { Next(); diff --git a/ydb/core/formats/arrow/accessor/sub_columns/iterators.h b/ydb/core/formats/arrow/accessor/sub_columns/iterators.h index d80557e46c..bf642e0e33 100644 --- a/ydb/core/formats/arrow/accessor/sub_columns/iterators.h +++ b/ydb/core/formats/arrow/accessor/sub_columns/iterators.h @@ -9,105 +9,123 @@ private: std::variant<TColumnsData::TIterator, TOthersData::TIterator> Iterator; std::optional<ui32> RemappedKey; std::vector<ui32> RemapKeys; + ui32 RecordIndex = 0; + ui32 KeyIndex = 0; + bool IsValidFlag = false; + bool HasValueFlag = false; + std::string_view Value; + bool IsColumnKeyFlag = false; + + void InitFromIterator(const TColumnsData::TIterator& iterator) { + RecordIndex = iterator.GetCurrentRecordIndex(); + KeyIndex = RemappedKey.value_or(iterator.GetKeyIndex()); + IsValidFlag = true; + HasValueFlag = iterator.HasValue(); + Value = iterator.GetValue(); + } + + void InitFromIterator(const TOthersData::TIterator& iterator) { + RecordIndex = iterator.GetRecordIndex(); + KeyIndex = RemapKeys.size() ? RemapKeys[iterator.GetKeyIndex()] : iterator.GetKeyIndex(); + IsValidFlag = true; + HasValueFlag = iterator.HasValue(); + Value = iterator.GetValue(); + } + + bool Initialize() { + struct TVisitor { + private: + TGeneralIterator& Owner; + public: + TVisitor(TGeneralIterator& owner) + : Owner(owner) { + } + bool operator()(TOthersData::TIterator& iterator) { + Owner.IsColumnKeyFlag = false; + if (iterator.IsValid()) { + Owner.InitFromIterator(iterator); + } else { + Owner.IsValidFlag = false; + } + return Owner.IsValidFlag; + } + bool operator()(TColumnsData::TIterator& iterator) { + Owner.IsColumnKeyFlag = true; + if (iterator.IsValid()) { + Owner.InitFromIterator(iterator); + } else { + Owner.IsValidFlag = false; + } + return Owner.IsValidFlag; + } + }; + return std::visit(TVisitor(*this), Iterator); + } public: TGeneralIterator(TColumnsData::TIterator&& iterator, const std::optional<ui32> remappedKey = {}) : Iterator(iterator) , RemappedKey(remappedKey) { + Initialize(); } TGeneralIterator(TOthersData::TIterator&& iterator, const std::vector<ui32>& remapKeys = {}) : Iterator(iterator) , RemapKeys(remapKeys) { + Initialize(); } bool IsColumnKey() const { - struct TVisitor { - bool operator()(const TOthersData::TIterator& /*iterator*/) { - return false; - } - bool operator()(const TColumnsData::TIterator& /*iterator*/) { - return true; - } - }; - TVisitor visitor; - return std::visit(visitor, Iterator); + return IsColumnKeyFlag; } bool Next() { struct TVisitor { + private: + TGeneralIterator& Owner; + public: + TVisitor(TGeneralIterator& owner) + : Owner(owner) + { + + } bool operator()(TOthersData::TIterator& iterator) { - return iterator.Next(); + if (iterator.Next()) { + Owner.InitFromIterator(iterator); + } else { + Owner.IsValidFlag = false; + } + return Owner.IsValidFlag; } bool operator()(TColumnsData::TIterator& iterator) { - return iterator.Next(); + if (iterator.Next()) { + Owner.InitFromIterator(iterator); + } else { + Owner.IsValidFlag = false; + } + return Owner.IsValidFlag; } }; - return std::visit(TVisitor(), Iterator); + return std::visit(TVisitor(*this), Iterator); } bool IsValid() const { - struct TVisitor { - bool operator()(const TOthersData::TIterator& iterator) { - return iterator.IsValid(); - } - bool operator()(const TColumnsData::TIterator& iterator) { - return iterator.IsValid(); - } - }; - return std::visit(TVisitor(), Iterator); + return IsValidFlag; } ui32 GetRecordIndex() const { - struct TVisitor { - ui32 operator()(const TOthersData::TIterator& iterator) { - return iterator.GetRecordIndex(); - } - ui32 operator()(const TColumnsData::TIterator& iterator) { - return iterator.GetCurrentRecordIndex(); - } - }; - return std::visit(TVisitor(), Iterator); + AFL_VERIFY(IsValidFlag); + return RecordIndex; } ui32 GetKeyIndex() const { - struct TVisitor { - private: - const TGeneralIterator& Owner; - - public: - TVisitor(const TGeneralIterator& owner) - : Owner(owner) { - } - ui32 operator()(const TOthersData::TIterator& iterator) { - return Owner.RemapKeys.size() ? Owner.RemapKeys[iterator.GetKeyIndex()] : iterator.GetKeyIndex(); - } - ui32 operator()(const TColumnsData::TIterator& iterator) { - return Owner.RemappedKey.value_or(iterator.GetKeyIndex()); - } - }; - return std::visit(TVisitor(*this), Iterator); + AFL_VERIFY(IsValidFlag); + return KeyIndex; } std::string_view GetValue() const { - struct TVisitor { - std::string_view operator()(const TOthersData::TIterator& iterator) { - return iterator.GetValue(); - } - std::string_view operator()(const TColumnsData::TIterator& iterator) { - return iterator.GetValue(); - } - }; - return std::visit(TVisitor(), Iterator); + AFL_VERIFY(IsValidFlag); + return Value; } - bool HasValue() const { - struct TVisitor { - bool operator()(const TOthersData::TIterator& iterator) { - return iterator.HasValue(); - } - bool operator()(const TColumnsData::TIterator& iterator) { - return iterator.HasValue(); - } - }; - return std::visit(TVisitor(), Iterator); + AFL_VERIFY(IsValidFlag); + return HasValueFlag; } - bool operator<(const TGeneralIterator& item) const { - return std::tuple(item.GetRecordIndex(), item.GetKeyIndex()) < std::tuple(GetRecordIndex(), GetKeyIndex()); + return std::tie(item.RecordIndex, item.KeyIndex) < std::tie(RecordIndex, KeyIndex); } }; diff --git a/ydb/core/tx/columnshard/engines/reader/common_reader/iterator/sub_columns_fetching.h b/ydb/core/tx/columnshard/engines/reader/common_reader/iterator/sub_columns_fetching.h index 276751c9fa..083c2ddc3c 100644 --- a/ydb/core/tx/columnshard/engines/reader/common_reader/iterator/sub_columns_fetching.h +++ b/ydb/core/tx/columnshard/engines/reader/common_reader/iterator/sub_columns_fetching.h @@ -191,6 +191,7 @@ private: AFL_VERIFY(!!StorageId); TBlobsAction blobsAction(Source->GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN); auto reading = blobsAction.GetReading(*StorageId); + reading->SetIsBackgroundProcess(false); for (auto&& i : ColumnChunks) { if (!!i.GetHeaderRange()) { const TString readBlob = blobs.Extract(*StorageId, *i.GetHeaderRange()); |