aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorivanmorozov333 <111685085+ivanmorozov333@users.noreply.github.com>2024-01-09 22:55:11 +0300
committerGitHub <noreply@github.com>2024-01-09 22:55:11 +0300
commit24e98245ccd5f64696c1932292cbc366453c6005 (patch)
tree322071b7cade5fa299f402d70848701f3ed37e90
parent3175dd350105c702ac17667232d4467c32734457 (diff)
downloadydb-24e98245ccd5f64696c1932292cbc366453c6005.tar.gz
fix trivial batch modification detector (#896)
-rw-r--r--ydb/core/formats/arrow/arrow_helpers.cpp7
-rw-r--r--ydb/core/formats/arrow/permutations.cpp8
-rw-r--r--ydb/core/formats/arrow/permutations.h3
3 files changed, 11 insertions, 7 deletions
diff --git a/ydb/core/formats/arrow/arrow_helpers.cpp b/ydb/core/formats/arrow/arrow_helpers.cpp
index c66abcf7a5..eac6a6670e 100644
--- a/ydb/core/formats/arrow/arrow_helpers.cpp
+++ b/ydb/core/formats/arrow/arrow_helpers.cpp
@@ -363,7 +363,10 @@ std::vector<std::shared_ptr<arrow::RecordBatch>> SliceSortedBatches(const std::v
}
// Check if the permutation doesn't reorder anything
-bool IsNoOp(const arrow::UInt64Array& permutation) {
+bool IsTrivial(const arrow::UInt64Array& permutation, const ui64 originalLength) {
+ if ((ui64)permutation.length() != originalLength) {
+ return false;
+ }
for (i64 i = 0; i < permutation.length(); ++i) {
if (permutation.Value(i) != (ui64)i) {
return false;
@@ -376,7 +379,7 @@ std::shared_ptr<arrow::RecordBatch> Reorder(const std::shared_ptr<arrow::RecordB
const std::shared_ptr<arrow::UInt64Array>& permutation, const bool canRemove) {
Y_ABORT_UNLESS(permutation->length() == batch->num_rows() || canRemove);
- auto res = IsNoOp(*permutation) ? batch : arrow::compute::Take(batch, permutation);
+ auto res = IsTrivial(*permutation, batch->num_rows()) ? batch : arrow::compute::Take(batch, permutation);
Y_ABORT_UNLESS(res.ok());
return (*res).record_batch();
}
diff --git a/ydb/core/formats/arrow/permutations.cpp b/ydb/core/formats/arrow/permutations.cpp
index a2f13517f7..6f38f9fb00 100644
--- a/ydb/core/formats/arrow/permutations.cpp
+++ b/ydb/core/formats/arrow/permutations.cpp
@@ -46,9 +46,11 @@ std::shared_ptr<arrow::UInt64Array> MakePermutation(const int size, const bool r
return out;
}
-std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch,
- const std::shared_ptr<arrow::Schema>& sortingKey, const bool andUnique) {
- auto keyBatch = ExtractColumns(batch, sortingKey);
+std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& sortingKey, const bool andUnique) {
+ auto keyBatch = ExtractColumns(batch, sortingKey, false);
+ AFL_VERIFY(batch);
+ AFL_VERIFY(sortingKey);
+ AFL_VERIFY(!!keyBatch)("problem", "cannot_find_columns")("schema", batch->schema()->ToString())("columns", sortingKey->ToString());
auto keyColumns = std::make_shared<TArrayVec>(keyBatch->columns());
std::vector<TRawReplaceKey> points;
points.reserve(keyBatch->num_rows());
diff --git a/ydb/core/formats/arrow/permutations.h b/ydb/core/formats/arrow/permutations.h
index b451aea788..584db83508 100644
--- a/ydb/core/formats/arrow/permutations.h
+++ b/ydb/core/formats/arrow/permutations.h
@@ -140,8 +140,7 @@ public:
std::shared_ptr<arrow::UInt64Array> MakePermutation(const int size, const bool reverse = false);
std::shared_ptr<arrow::UInt64Array> MakeFilterPermutation(const std::vector<ui64>& indexes);
std::shared_ptr<arrow::UInt64Array> MakeFilterPermutation(const std::vector<ui32>& indexes);
-std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch,
- const std::shared_ptr<arrow::Schema>& sortingKey, const bool andUnique);
+std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& sortingKey, const bool andUnique);
std::shared_ptr<arrow::RecordBatch> ReverseRecords(const std::shared_ptr<arrow::RecordBatch>& batch);
std::shared_ptr<arrow::Array> CopyRecords(const std::shared_ptr<arrow::Array>& source, const std::vector<ui64>& indexes);