aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorivanmorozov333 <ivanmorozov@ydb.tech>2025-03-04 11:15:29 +0300
committerGitHub <noreply@github.com>2025-03-04 11:15:29 +0300
commit16444c953bcbe1b53cd0f08d8f707c8267cdf85e (patch)
tree54e7d0c6d139b730c01d69853202eea67502fc03
parent3a5cfede787f6bf6cc63627bf8bd30d9c0cfd4e8 (diff)
downloadydb-16444c953bcbe1b53cd0f08d8f707c8267cdf85e.tar.gz
optimize sub columns iterations (#15271)
-rw-r--r--ydb/core/formats/arrow/accessor/sub_columns/columns_storage.h4
-rw-r--r--ydb/core/formats/arrow/accessor/sub_columns/iterators.h154
-rw-r--r--ydb/core/tx/columnshard/engines/reader/common_reader/iterator/sub_columns_fetching.h1
3 files changed, 89 insertions, 70 deletions
diff --git a/ydb/core/formats/arrow/accessor/sub_columns/columns_storage.h b/ydb/core/formats/arrow/accessor/sub_columns/columns_storage.h
index 4e25d24b14..d1095c7c99 100644
--- a/ydb/core/formats/arrow/accessor/sub_columns/columns_storage.h
+++ b/ydb/core/formats/arrow/accessor/sub_columns/columns_storage.h
@@ -49,7 +49,7 @@ public:
private:
ui32 KeyIndex;
std::shared_ptr<IChunkedArray> GlobalChunkedArray;
- std::shared_ptr<arrow::StringArray> CurrentArrayData;
+ const arrow::StringArray* CurrentArrayData;
std::optional<IChunkedArray::TFullChunkedArrayAddress> FullArrayAddress;
std::optional<IChunkedArray::TFullDataAddress> ChunkAddress;
ui32 CurrentIndex = 0;
@@ -63,7 +63,7 @@ public:
const ui32 localIndex = FullArrayAddress->GetAddress().GetLocalIndex(CurrentIndex);
ChunkAddress = FullArrayAddress->GetArray()->GetChunk(ChunkAddress, localIndex);
AFL_VERIFY(ChunkAddress->GetArray()->type()->id() == arrow::utf8()->id());
- CurrentArrayData = std::static_pointer_cast<arrow::StringArray>(ChunkAddress->GetArray());
+ CurrentArrayData = static_cast<const arrow::StringArray*>(ChunkAddress->GetArray().get());
if (FullArrayAddress->GetArray()->GetType() == IChunkedArray::EType::Array) {
if (CurrentArrayData->IsNull(localIndex)) {
Next();
diff --git a/ydb/core/formats/arrow/accessor/sub_columns/iterators.h b/ydb/core/formats/arrow/accessor/sub_columns/iterators.h
index d80557e46c..bf642e0e33 100644
--- a/ydb/core/formats/arrow/accessor/sub_columns/iterators.h
+++ b/ydb/core/formats/arrow/accessor/sub_columns/iterators.h
@@ -9,105 +9,123 @@ private:
std::variant<TColumnsData::TIterator, TOthersData::TIterator> Iterator;
std::optional<ui32> RemappedKey;
std::vector<ui32> RemapKeys;
+ ui32 RecordIndex = 0;
+ ui32 KeyIndex = 0;
+ bool IsValidFlag = false;
+ bool HasValueFlag = false;
+ std::string_view Value;
+ bool IsColumnKeyFlag = false;
+
+ void InitFromIterator(const TColumnsData::TIterator& iterator) {
+ RecordIndex = iterator.GetCurrentRecordIndex();
+ KeyIndex = RemappedKey.value_or(iterator.GetKeyIndex());
+ IsValidFlag = true;
+ HasValueFlag = iterator.HasValue();
+ Value = iterator.GetValue();
+ }
+
+ void InitFromIterator(const TOthersData::TIterator& iterator) {
+ RecordIndex = iterator.GetRecordIndex();
+ KeyIndex = RemapKeys.size() ? RemapKeys[iterator.GetKeyIndex()] : iterator.GetKeyIndex();
+ IsValidFlag = true;
+ HasValueFlag = iterator.HasValue();
+ Value = iterator.GetValue();
+ }
+
+ bool Initialize() {
+ struct TVisitor {
+ private:
+ TGeneralIterator& Owner;
+ public:
+ TVisitor(TGeneralIterator& owner)
+ : Owner(owner) {
+ }
+ bool operator()(TOthersData::TIterator& iterator) {
+ Owner.IsColumnKeyFlag = false;
+ if (iterator.IsValid()) {
+ Owner.InitFromIterator(iterator);
+ } else {
+ Owner.IsValidFlag = false;
+ }
+ return Owner.IsValidFlag;
+ }
+ bool operator()(TColumnsData::TIterator& iterator) {
+ Owner.IsColumnKeyFlag = true;
+ if (iterator.IsValid()) {
+ Owner.InitFromIterator(iterator);
+ } else {
+ Owner.IsValidFlag = false;
+ }
+ return Owner.IsValidFlag;
+ }
+ };
+ return std::visit(TVisitor(*this), Iterator);
+ }
public:
TGeneralIterator(TColumnsData::TIterator&& iterator, const std::optional<ui32> remappedKey = {})
: Iterator(iterator)
, RemappedKey(remappedKey) {
+ Initialize();
}
TGeneralIterator(TOthersData::TIterator&& iterator, const std::vector<ui32>& remapKeys = {})
: Iterator(iterator)
, RemapKeys(remapKeys) {
+ Initialize();
}
bool IsColumnKey() const {
- struct TVisitor {
- bool operator()(const TOthersData::TIterator& /*iterator*/) {
- return false;
- }
- bool operator()(const TColumnsData::TIterator& /*iterator*/) {
- return true;
- }
- };
- TVisitor visitor;
- return std::visit(visitor, Iterator);
+ return IsColumnKeyFlag;
}
bool Next() {
struct TVisitor {
+ private:
+ TGeneralIterator& Owner;
+ public:
+ TVisitor(TGeneralIterator& owner)
+ : Owner(owner)
+ {
+
+ }
bool operator()(TOthersData::TIterator& iterator) {
- return iterator.Next();
+ if (iterator.Next()) {
+ Owner.InitFromIterator(iterator);
+ } else {
+ Owner.IsValidFlag = false;
+ }
+ return Owner.IsValidFlag;
}
bool operator()(TColumnsData::TIterator& iterator) {
- return iterator.Next();
+ if (iterator.Next()) {
+ Owner.InitFromIterator(iterator);
+ } else {
+ Owner.IsValidFlag = false;
+ }
+ return Owner.IsValidFlag;
}
};
- return std::visit(TVisitor(), Iterator);
+ return std::visit(TVisitor(*this), Iterator);
}
bool IsValid() const {
- struct TVisitor {
- bool operator()(const TOthersData::TIterator& iterator) {
- return iterator.IsValid();
- }
- bool operator()(const TColumnsData::TIterator& iterator) {
- return iterator.IsValid();
- }
- };
- return std::visit(TVisitor(), Iterator);
+ return IsValidFlag;
}
ui32 GetRecordIndex() const {
- struct TVisitor {
- ui32 operator()(const TOthersData::TIterator& iterator) {
- return iterator.GetRecordIndex();
- }
- ui32 operator()(const TColumnsData::TIterator& iterator) {
- return iterator.GetCurrentRecordIndex();
- }
- };
- return std::visit(TVisitor(), Iterator);
+ AFL_VERIFY(IsValidFlag);
+ return RecordIndex;
}
ui32 GetKeyIndex() const {
- struct TVisitor {
- private:
- const TGeneralIterator& Owner;
-
- public:
- TVisitor(const TGeneralIterator& owner)
- : Owner(owner) {
- }
- ui32 operator()(const TOthersData::TIterator& iterator) {
- return Owner.RemapKeys.size() ? Owner.RemapKeys[iterator.GetKeyIndex()] : iterator.GetKeyIndex();
- }
- ui32 operator()(const TColumnsData::TIterator& iterator) {
- return Owner.RemappedKey.value_or(iterator.GetKeyIndex());
- }
- };
- return std::visit(TVisitor(*this), Iterator);
+ AFL_VERIFY(IsValidFlag);
+ return KeyIndex;
}
std::string_view GetValue() const {
- struct TVisitor {
- std::string_view operator()(const TOthersData::TIterator& iterator) {
- return iterator.GetValue();
- }
- std::string_view operator()(const TColumnsData::TIterator& iterator) {
- return iterator.GetValue();
- }
- };
- return std::visit(TVisitor(), Iterator);
+ AFL_VERIFY(IsValidFlag);
+ return Value;
}
-
bool HasValue() const {
- struct TVisitor {
- bool operator()(const TOthersData::TIterator& iterator) {
- return iterator.HasValue();
- }
- bool operator()(const TColumnsData::TIterator& iterator) {
- return iterator.HasValue();
- }
- };
- return std::visit(TVisitor(), Iterator);
+ AFL_VERIFY(IsValidFlag);
+ return HasValueFlag;
}
-
bool operator<(const TGeneralIterator& item) const {
- return std::tuple(item.GetRecordIndex(), item.GetKeyIndex()) < std::tuple(GetRecordIndex(), GetKeyIndex());
+ return std::tie(item.RecordIndex, item.KeyIndex) < std::tie(RecordIndex, KeyIndex);
}
};
diff --git a/ydb/core/tx/columnshard/engines/reader/common_reader/iterator/sub_columns_fetching.h b/ydb/core/tx/columnshard/engines/reader/common_reader/iterator/sub_columns_fetching.h
index 276751c9fa..083c2ddc3c 100644
--- a/ydb/core/tx/columnshard/engines/reader/common_reader/iterator/sub_columns_fetching.h
+++ b/ydb/core/tx/columnshard/engines/reader/common_reader/iterator/sub_columns_fetching.h
@@ -191,6 +191,7 @@ private:
AFL_VERIFY(!!StorageId);
TBlobsAction blobsAction(Source->GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN);
auto reading = blobsAction.GetReading(*StorageId);
+ reading->SetIsBackgroundProcess(false);
for (auto&& i : ColumnChunks) {
if (!!i.GetHeaderRange()) {
const TString readBlob = blobs.Extract(*StorageId, *i.GetHeaderRange());