aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorivanmorozov <ivanmorozov@yandex-team.com>2023-09-05 13:42:28 +0300
committerivanmorozov <ivanmorozov@yandex-team.com>2023-09-05 14:00:10 +0300
commit9702a974d6921277cc238fc97dd6c577cdf05713 (patch)
treef65b0c93f5df734f31f606fb7d1fd077575b1ff6
parent05b1f5c6ed07a105a251d3adf7233141e3ce8059 (diff)
downloadydb-9702a974d6921277cc238fc97dd6c577cdf05713.tar.gz
KIKIMR-19213: cleaning
-rw-r--r--ydb/core/formats/arrow/arrow_filter.h2
-rw-r--r--ydb/core/formats/arrow/arrow_helpers.cpp20
-rw-r--r--ydb/core/formats/arrow/permutations.cpp2
-rw-r--r--ydb/core/formats/arrow/permutations.h2
-rw-r--r--ydb/core/formats/arrow/program.cpp9
-rw-r--r--ydb/core/tx/columnshard/engines/filter.cpp5
-rw-r--r--ydb/core/tx/columnshard/engines/predicate/container.h2
-rw-r--r--ydb/core/tx/columnshard/engines/predicate/filter.cpp2
8 files changed, 23 insertions, 21 deletions
diff --git a/ydb/core/formats/arrow/arrow_filter.h b/ydb/core/formats/arrow/arrow_filter.h
index 9e2edfb37b3..67423ec640d 100644
--- a/ydb/core/formats/arrow/arrow_filter.h
+++ b/ydb/core/formats/arrow/arrow_filter.h
@@ -154,8 +154,6 @@ public:
const std::vector<bool>& BuildSimpleFilter(const ui32 expectedSize) const;
- TColumnFilter() = default;
-
std::shared_ptr<arrow::BooleanArray> BuildArrowFilter(const ui32 expectedSize) const;
bool IsTotalAllowFilter() const;
diff --git a/ydb/core/formats/arrow/arrow_helpers.cpp b/ydb/core/formats/arrow/arrow_helpers.cpp
index 0216b673945..e0a43ddf773 100644
--- a/ydb/core/formats/arrow/arrow_helpers.cpp
+++ b/ydb/core/formats/arrow/arrow_helpers.cpp
@@ -908,26 +908,26 @@ std::shared_ptr<arrow::RecordBatch> ReallocateBatch(std::shared_ptr<arrow::Recor
return DeserializeBatch(SerializeBatch(original, arrow::ipc::IpcWriteOptions::Defaults()), original->schema());
}
-std::shared_ptr<arrow::RecordBatch> MergeColumns(const std::vector<std::shared_ptr<arrow::RecordBatch>>& rb) {
+std::shared_ptr<arrow::RecordBatch> MergeColumns(const std::vector<std::shared_ptr<arrow::RecordBatch>>& batches) {
std::vector<std::shared_ptr<arrow::Array>> columns;
std::vector<std::shared_ptr<arrow::Field>> fields;
std::optional<ui32> recordsCount;
std::set<std::string> columnNames;
- for (auto&& i : rb) {
- if (!i) {
+ for (auto&& batch : batches) {
+ if (!batch) {
continue;
}
- for (auto&& c : i->columns()) {
- columns.emplace_back(c);
+ for (auto&& column : batch->columns()) {
+ columns.emplace_back(column);
if (!recordsCount) {
- recordsCount = c->length();
+ recordsCount = column->length();
} else {
- Y_VERIFY(*recordsCount == c->length());
+ Y_VERIFY(*recordsCount == column->length());
}
}
- for (auto&& f : i->schema()->fields()) {
- Y_VERIFY(columnNames.emplace(f->name()).second);
- fields.emplace_back(f);
+ for (auto&& field : batch->schema()->fields()) {
+ AFL_VERIFY(columnNames.emplace(field->name()).second)("field_name", field->name());
+ fields.emplace_back(field);
}
}
if (columns.empty()) {
diff --git a/ydb/core/formats/arrow/permutations.cpp b/ydb/core/formats/arrow/permutations.cpp
index e1856b71a97..f16f43b9c33 100644
--- a/ydb/core/formats/arrow/permutations.cpp
+++ b/ydb/core/formats/arrow/permutations.cpp
@@ -6,7 +6,7 @@
namespace NKikimr::NArrow {
-std::shared_ptr<arrow::UInt64Array> MakePermutation(int size, bool reverse) {
+std::shared_ptr<arrow::UInt64Array> MakePermutation(const int size, const bool reverse) {
if (size < 1) {
return {};
}
diff --git a/ydb/core/formats/arrow/permutations.h b/ydb/core/formats/arrow/permutations.h
index bf83d85c862..c811cfe6805 100644
--- a/ydb/core/formats/arrow/permutations.h
+++ b/ydb/core/formats/arrow/permutations.h
@@ -5,7 +5,7 @@
namespace NKikimr::NArrow {
-std::shared_ptr<arrow::UInt64Array> MakePermutation(int size, bool reverse = false);
+std::shared_ptr<arrow::UInt64Array> MakePermutation(const int size, const bool reverse = false);
std::shared_ptr<arrow::UInt64Array> MakeFilterPermutation(const std::vector<ui64>& indexes);
std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch,
const std::shared_ptr<arrow::Schema>& sortingKey);
diff --git a/ydb/core/formats/arrow/program.cpp b/ydb/core/formats/arrow/program.cpp
index 3d34cda429f..4c15596fb92 100644
--- a/ydb/core/formats/arrow/program.cpp
+++ b/ydb/core/formats/arrow/program.cpp
@@ -701,7 +701,7 @@ arrow::Status TProgramStep::ApplyFilters(TDatumBatch& batch) const {
return arrow::Status::OK();
}
- NArrow::TColumnFilter bits;
+ NArrow::TColumnFilter bits = NArrow::TColumnFilter::BuildAllowFilter();
auto status = MakeCombinedFilter(batch, bits);
if (!status.ok()) {
return status;
@@ -833,7 +833,7 @@ std::set<std::string> TProgramStep::GetColumnsInUsage() const {
NArrow::TColumnFilter TProgram::MakeEarlyFilter(const std::shared_ptr<arrow::RecordBatch>& srcBatch,
arrow::compute::ExecContext* ctx) const
{
- NArrow::TColumnFilter result;
+ NArrow::TColumnFilter result = NArrow::TColumnFilter::BuildAllowFilter();
try {
if (Steps.empty()) {
return result;
@@ -849,7 +849,7 @@ NArrow::TColumnFilter TProgram::MakeEarlyFilter(const std::shared_ptr<arrow::Rec
if (!step->ApplyAssignes(*rb, ctx).ok()) {
return result;
}
- NArrow::TColumnFilter filter;
+ NArrow::TColumnFilter filter = NArrow::TColumnFilter::BuildAllowFilter();
if (!step->MakeCombinedFilter(*rb, filter).ok()) {
return result;
}
@@ -865,6 +865,9 @@ std::set<std::string> TProgram::GetEarlyFilterColumns() const {
if (Steps.empty()) {
return result;
}
+ if (Steps[0]->Filters.empty()) {
+ return result;
+ }
return Steps[0]->GetColumnsInUsage();
}
diff --git a/ydb/core/tx/columnshard/engines/filter.cpp b/ydb/core/tx/columnshard/engines/filter.cpp
index 3f375626694..3a309757bd8 100644
--- a/ydb/core/tx/columnshard/engines/filter.cpp
+++ b/ydb/core/tx/columnshard/engines/filter.cpp
@@ -1,9 +1,10 @@
#include "filter.h"
#include "defs.h"
-#include "indexed_read_data.h"
+#include "reader/read_metadata.h"
#include <ydb/core/formats/arrow/arrow_helpers.h>
#include <ydb/core/formats/arrow/custom_registry.h>
+#include <ydb/core/formats/arrow/program.h>
namespace NKikimr::NOlap {
@@ -38,7 +39,7 @@ NArrow::TColumnFilter MakeSnapshotFilter(const std::shared_ptr<arrow::RecordBatc
Y_VERIFY(snapSchema->num_fields() == 2);
auto steps = batch->GetColumnByName(snapSchema->fields()[0]->name());
auto ids = batch->GetColumnByName(snapSchema->fields()[1]->name());
- NArrow::TColumnFilter result;
+ NArrow::TColumnFilter result = NArrow::TColumnFilter::BuildAllowFilter();
TSnapshotGetter getter(steps, ids, snapshot);
result.Reset(steps->length(), std::move(getter));
return result;
diff --git a/ydb/core/tx/columnshard/engines/predicate/container.h b/ydb/core/tx/columnshard/engines/predicate/container.h
index 22991579d86..7437872e343 100644
--- a/ydb/core/tx/columnshard/engines/predicate/container.h
+++ b/ydb/core/tx/columnshard/engines/predicate/container.h
@@ -65,7 +65,7 @@ public:
NKikimr::NArrow::TColumnFilter BuildFilter(std::shared_ptr<arrow::RecordBatch> data) const {
if (!Object) {
- return NArrow::TColumnFilter();
+ return NArrow::TColumnFilter::BuildAllowFilter();
}
return NArrow::TColumnFilter::MakePredicateFilter(data, Object->Batch, CompareType);
}
diff --git a/ydb/core/tx/columnshard/engines/predicate/filter.cpp b/ydb/core/tx/columnshard/engines/predicate/filter.cpp
index dbd1f61c0d3..3fbbf134524 100644
--- a/ydb/core/tx/columnshard/engines/predicate/filter.cpp
+++ b/ydb/core/tx/columnshard/engines/predicate/filter.cpp
@@ -5,7 +5,7 @@ namespace NKikimr::NOlap {
NKikimr::NArrow::TColumnFilter TPKRangesFilter::BuildFilter(std::shared_ptr<arrow::RecordBatch> data) const {
if (SortedRanges.empty()) {
- return NArrow::TColumnFilter();
+ return NArrow::TColumnFilter::BuildAllowFilter();
}
NArrow::TColumnFilter result = SortedRanges.front().BuildFilter(data);
for (ui32 i = 1; i < SortedRanges.size(); ++i) {