diff options
author | ivanmorozov333 <111685085+ivanmorozov333@users.noreply.github.com> | 2024-01-09 22:55:11 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-09 22:55:11 +0300 |
commit | 24e98245ccd5f64696c1932292cbc366453c6005 (patch) | |
tree | 322071b7cade5fa299f402d70848701f3ed37e90 | |
parent | 3175dd350105c702ac17667232d4467c32734457 (diff) | |
download | ydb-24e98245ccd5f64696c1932292cbc366453c6005.tar.gz |
fix trivial batch modification detector (#896)
-rw-r--r-- | ydb/core/formats/arrow/arrow_helpers.cpp | 7 | ||||
-rw-r--r-- | ydb/core/formats/arrow/permutations.cpp | 8 | ||||
-rw-r--r-- | ydb/core/formats/arrow/permutations.h | 3 |
3 files changed, 11 insertions, 7 deletions
diff --git a/ydb/core/formats/arrow/arrow_helpers.cpp b/ydb/core/formats/arrow/arrow_helpers.cpp index c66abcf7a5..eac6a6670e 100644 --- a/ydb/core/formats/arrow/arrow_helpers.cpp +++ b/ydb/core/formats/arrow/arrow_helpers.cpp @@ -363,7 +363,10 @@ std::vector<std::shared_ptr<arrow::RecordBatch>> SliceSortedBatches(const std::v } // Check if the permutation doesn't reorder anything -bool IsNoOp(const arrow::UInt64Array& permutation) { +bool IsTrivial(const arrow::UInt64Array& permutation, const ui64 originalLength) { + if ((ui64)permutation.length() != originalLength) { + return false; + } for (i64 i = 0; i < permutation.length(); ++i) { if (permutation.Value(i) != (ui64)i) { return false; @@ -376,7 +379,7 @@ std::shared_ptr<arrow::RecordBatch> Reorder(const std::shared_ptr<arrow::RecordB const std::shared_ptr<arrow::UInt64Array>& permutation, const bool canRemove) { Y_ABORT_UNLESS(permutation->length() == batch->num_rows() || canRemove); - auto res = IsNoOp(*permutation) ? batch : arrow::compute::Take(batch, permutation); + auto res = IsTrivial(*permutation, batch->num_rows()) ? batch : arrow::compute::Take(batch, permutation); Y_ABORT_UNLESS(res.ok()); return (*res).record_batch(); } diff --git a/ydb/core/formats/arrow/permutations.cpp b/ydb/core/formats/arrow/permutations.cpp index a2f13517f7..6f38f9fb00 100644 --- a/ydb/core/formats/arrow/permutations.cpp +++ b/ydb/core/formats/arrow/permutations.cpp @@ -46,9 +46,11 @@ std::shared_ptr<arrow::UInt64Array> MakePermutation(const int size, const bool r return out; } -std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch, - const std::shared_ptr<arrow::Schema>& sortingKey, const bool andUnique) { - auto keyBatch = ExtractColumns(batch, sortingKey); +std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& sortingKey, const bool andUnique) { + auto keyBatch = ExtractColumns(batch, sortingKey, false); + AFL_VERIFY(batch); + AFL_VERIFY(sortingKey); + AFL_VERIFY(!!keyBatch)("problem", "cannot_find_columns")("schema", batch->schema()->ToString())("columns", sortingKey->ToString()); auto keyColumns = std::make_shared<TArrayVec>(keyBatch->columns()); std::vector<TRawReplaceKey> points; points.reserve(keyBatch->num_rows()); diff --git a/ydb/core/formats/arrow/permutations.h b/ydb/core/formats/arrow/permutations.h index b451aea788..584db83508 100644 --- a/ydb/core/formats/arrow/permutations.h +++ b/ydb/core/formats/arrow/permutations.h @@ -140,8 +140,7 @@ public: std::shared_ptr<arrow::UInt64Array> MakePermutation(const int size, const bool reverse = false); std::shared_ptr<arrow::UInt64Array> MakeFilterPermutation(const std::vector<ui64>& indexes); std::shared_ptr<arrow::UInt64Array> MakeFilterPermutation(const std::vector<ui32>& indexes); -std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch, - const std::shared_ptr<arrow::Schema>& sortingKey, const bool andUnique); +std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& sortingKey, const bool andUnique); std::shared_ptr<arrow::RecordBatch> ReverseRecords(const std::shared_ptr<arrow::RecordBatch>& batch); std::shared_ptr<arrow::Array> CopyRecords(const std::shared_ptr<arrow::Array>& source, const std::vector<ui64>& indexes); |