diff options
author | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-09-05 13:42:28 +0300 |
---|---|---|
committer | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-09-05 14:00:10 +0300 |
commit | 9702a974d6921277cc238fc97dd6c577cdf05713 (patch) | |
tree | f65b0c93f5df734f31f606fb7d1fd077575b1ff6 | |
parent | 05b1f5c6ed07a105a251d3adf7233141e3ce8059 (diff) | |
download | ydb-9702a974d6921277cc238fc97dd6c577cdf05713.tar.gz |
KIKIMR-19213: cleaning
-rw-r--r-- | ydb/core/formats/arrow/arrow_filter.h | 2 | ||||
-rw-r--r-- | ydb/core/formats/arrow/arrow_helpers.cpp | 20 | ||||
-rw-r--r-- | ydb/core/formats/arrow/permutations.cpp | 2 | ||||
-rw-r--r-- | ydb/core/formats/arrow/permutations.h | 2 | ||||
-rw-r--r-- | ydb/core/formats/arrow/program.cpp | 9 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/filter.cpp | 5 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/predicate/container.h | 2 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/predicate/filter.cpp | 2 |
8 files changed, 23 insertions, 21 deletions
diff --git a/ydb/core/formats/arrow/arrow_filter.h b/ydb/core/formats/arrow/arrow_filter.h index 9e2edfb37b3..67423ec640d 100644 --- a/ydb/core/formats/arrow/arrow_filter.h +++ b/ydb/core/formats/arrow/arrow_filter.h @@ -154,8 +154,6 @@ public: const std::vector<bool>& BuildSimpleFilter(const ui32 expectedSize) const; - TColumnFilter() = default; - std::shared_ptr<arrow::BooleanArray> BuildArrowFilter(const ui32 expectedSize) const; bool IsTotalAllowFilter() const; diff --git a/ydb/core/formats/arrow/arrow_helpers.cpp b/ydb/core/formats/arrow/arrow_helpers.cpp index 0216b673945..e0a43ddf773 100644 --- a/ydb/core/formats/arrow/arrow_helpers.cpp +++ b/ydb/core/formats/arrow/arrow_helpers.cpp @@ -908,26 +908,26 @@ std::shared_ptr<arrow::RecordBatch> ReallocateBatch(std::shared_ptr<arrow::Recor return DeserializeBatch(SerializeBatch(original, arrow::ipc::IpcWriteOptions::Defaults()), original->schema()); } -std::shared_ptr<arrow::RecordBatch> MergeColumns(const std::vector<std::shared_ptr<arrow::RecordBatch>>& rb) { +std::shared_ptr<arrow::RecordBatch> MergeColumns(const std::vector<std::shared_ptr<arrow::RecordBatch>>& batches) { std::vector<std::shared_ptr<arrow::Array>> columns; std::vector<std::shared_ptr<arrow::Field>> fields; std::optional<ui32> recordsCount; std::set<std::string> columnNames; - for (auto&& i : rb) { - if (!i) { + for (auto&& batch : batches) { + if (!batch) { continue; } - for (auto&& c : i->columns()) { - columns.emplace_back(c); + for (auto&& column : batch->columns()) { + columns.emplace_back(column); if (!recordsCount) { - recordsCount = c->length(); + recordsCount = column->length(); } else { - Y_VERIFY(*recordsCount == c->length()); + Y_VERIFY(*recordsCount == column->length()); } } - for (auto&& f : i->schema()->fields()) { - Y_VERIFY(columnNames.emplace(f->name()).second); - fields.emplace_back(f); + for (auto&& field : batch->schema()->fields()) { + AFL_VERIFY(columnNames.emplace(field->name()).second)("field_name", field->name()); + fields.emplace_back(field); } } if (columns.empty()) { diff --git a/ydb/core/formats/arrow/permutations.cpp b/ydb/core/formats/arrow/permutations.cpp index e1856b71a97..f16f43b9c33 100644 --- a/ydb/core/formats/arrow/permutations.cpp +++ b/ydb/core/formats/arrow/permutations.cpp @@ -6,7 +6,7 @@ namespace NKikimr::NArrow { -std::shared_ptr<arrow::UInt64Array> MakePermutation(int size, bool reverse) { +std::shared_ptr<arrow::UInt64Array> MakePermutation(const int size, const bool reverse) { if (size < 1) { return {}; } diff --git a/ydb/core/formats/arrow/permutations.h b/ydb/core/formats/arrow/permutations.h index bf83d85c862..c811cfe6805 100644 --- a/ydb/core/formats/arrow/permutations.h +++ b/ydb/core/formats/arrow/permutations.h @@ -5,7 +5,7 @@ namespace NKikimr::NArrow { -std::shared_ptr<arrow::UInt64Array> MakePermutation(int size, bool reverse = false); +std::shared_ptr<arrow::UInt64Array> MakePermutation(const int size, const bool reverse = false); std::shared_ptr<arrow::UInt64Array> MakeFilterPermutation(const std::vector<ui64>& indexes); std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& sortingKey); diff --git a/ydb/core/formats/arrow/program.cpp b/ydb/core/formats/arrow/program.cpp index 3d34cda429f..4c15596fb92 100644 --- a/ydb/core/formats/arrow/program.cpp +++ b/ydb/core/formats/arrow/program.cpp @@ -701,7 +701,7 @@ arrow::Status TProgramStep::ApplyFilters(TDatumBatch& batch) const { return arrow::Status::OK(); } - NArrow::TColumnFilter bits; + NArrow::TColumnFilter bits = NArrow::TColumnFilter::BuildAllowFilter(); auto status = MakeCombinedFilter(batch, bits); if (!status.ok()) { return status; @@ -833,7 +833,7 @@ std::set<std::string> TProgramStep::GetColumnsInUsage() const { NArrow::TColumnFilter TProgram::MakeEarlyFilter(const std::shared_ptr<arrow::RecordBatch>& srcBatch, arrow::compute::ExecContext* ctx) const { - NArrow::TColumnFilter result; + NArrow::TColumnFilter result = NArrow::TColumnFilter::BuildAllowFilter(); try { if (Steps.empty()) { return result; @@ -849,7 +849,7 @@ NArrow::TColumnFilter TProgram::MakeEarlyFilter(const std::shared_ptr<arrow::Rec if (!step->ApplyAssignes(*rb, ctx).ok()) { return result; } - NArrow::TColumnFilter filter; + NArrow::TColumnFilter filter = NArrow::TColumnFilter::BuildAllowFilter(); if (!step->MakeCombinedFilter(*rb, filter).ok()) { return result; } @@ -865,6 +865,9 @@ std::set<std::string> TProgram::GetEarlyFilterColumns() const { if (Steps.empty()) { return result; } + if (Steps[0]->Filters.empty()) { + return result; + } return Steps[0]->GetColumnsInUsage(); } diff --git a/ydb/core/tx/columnshard/engines/filter.cpp b/ydb/core/tx/columnshard/engines/filter.cpp index 3f375626694..3a309757bd8 100644 --- a/ydb/core/tx/columnshard/engines/filter.cpp +++ b/ydb/core/tx/columnshard/engines/filter.cpp @@ -1,9 +1,10 @@ #include "filter.h" #include "defs.h" -#include "indexed_read_data.h" +#include "reader/read_metadata.h" #include <ydb/core/formats/arrow/arrow_helpers.h> #include <ydb/core/formats/arrow/custom_registry.h> +#include <ydb/core/formats/arrow/program.h> namespace NKikimr::NOlap { @@ -38,7 +39,7 @@ NArrow::TColumnFilter MakeSnapshotFilter(const std::shared_ptr<arrow::RecordBatc Y_VERIFY(snapSchema->num_fields() == 2); auto steps = batch->GetColumnByName(snapSchema->fields()[0]->name()); auto ids = batch->GetColumnByName(snapSchema->fields()[1]->name()); - NArrow::TColumnFilter result; + NArrow::TColumnFilter result = NArrow::TColumnFilter::BuildAllowFilter(); TSnapshotGetter getter(steps, ids, snapshot); result.Reset(steps->length(), std::move(getter)); return result; diff --git a/ydb/core/tx/columnshard/engines/predicate/container.h b/ydb/core/tx/columnshard/engines/predicate/container.h index 22991579d86..7437872e343 100644 --- a/ydb/core/tx/columnshard/engines/predicate/container.h +++ b/ydb/core/tx/columnshard/engines/predicate/container.h @@ -65,7 +65,7 @@ public: NKikimr::NArrow::TColumnFilter BuildFilter(std::shared_ptr<arrow::RecordBatch> data) const { if (!Object) { - return NArrow::TColumnFilter(); + return NArrow::TColumnFilter::BuildAllowFilter(); } return NArrow::TColumnFilter::MakePredicateFilter(data, Object->Batch, CompareType); } diff --git a/ydb/core/tx/columnshard/engines/predicate/filter.cpp b/ydb/core/tx/columnshard/engines/predicate/filter.cpp index dbd1f61c0d3..3fbbf134524 100644 --- a/ydb/core/tx/columnshard/engines/predicate/filter.cpp +++ b/ydb/core/tx/columnshard/engines/predicate/filter.cpp @@ -5,7 +5,7 @@ namespace NKikimr::NOlap { NKikimr::NArrow::TColumnFilter TPKRangesFilter::BuildFilter(std::shared_ptr<arrow::RecordBatch> data) const { if (SortedRanges.empty()) { - return NArrow::TColumnFilter(); + return NArrow::TColumnFilter::BuildAllowFilter(); } NArrow::TColumnFilter result = SortedRanges.front().BuildFilter(data); for (ui32 i = 1; i < SortedRanges.size(); ++i) { |