// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/row/compare_internal.h" #include #include #include #include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/util.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/util_internal.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/bit_util.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/ubsan.h" namespace arrow20 { namespace compute { template void KeyCompare::NullUpdateColumnToRow(uint32_t id_col, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows, bool are_cols_in_encoding_order, uint8_t* match_bytevector) { if (!rows.has_any_nulls(ctx) && !col.data(0)) { return; } uint32_t num_processed = 0; #if defined(ARROW_HAVE_RUNTIME_AVX2) if (ctx->has_avx2()) { num_processed = NullUpdateColumnToRow_avx2( use_selection, id_col, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, match_bytevector); } #endif const uint32_t null_bit_id = ColIdInEncodingOrder(rows, id_col, are_cols_in_encoding_order); if (!col.data(0)) { // Remove rows from the result for which the column value is a null for (uint32_t i = num_processed; i < num_rows_to_compare; ++i) { uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i; uint32_t irow_right = left_to_right_map[irow_left]; match_bytevector[i] &= (rows.is_null(irow_right, null_bit_id) ? 0 : 0xff); } } else if (!rows.has_any_nulls(ctx)) { // Remove rows from the result for which the column value on left side is // null const uint8_t* non_nulls = col.data(0); ARROW_DCHECK(non_nulls); for (uint32_t i = num_processed; i < num_rows_to_compare; ++i) { uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i; match_bytevector[i] &= bit_util::GetBit(non_nulls, irow_left + col.bit_offset(0)) ? 0xff : 0; } } else { const uint8_t* non_nulls = col.data(0); ARROW_DCHECK(non_nulls); for (uint32_t i = num_processed; i < num_rows_to_compare; ++i) { uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i; uint32_t irow_right = left_to_right_map[irow_left]; int right_null = rows.is_null(irow_right, null_bit_id) ? 0xff : 0; int left_null = bit_util::GetBit(non_nulls, irow_left + col.bit_offset(0)) ? 0 : 0xff; match_bytevector[i] |= left_null & right_null; match_bytevector[i] &= ~(left_null ^ right_null); } } } template void KeyCompare::CompareBinaryColumnToRowHelper( uint32_t offset_within_row, uint32_t first_row_to_compare, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows, uint8_t* match_bytevector, COMPARE_FN compare_fn) { bool is_fixed_length = rows.metadata().is_fixed_length; if (is_fixed_length) { uint32_t fixed_length = rows.metadata().fixed_length; const uint8_t* rows_left = col.data(1); const uint8_t* rows_right = rows.fixed_length_rows(/*row_id=*/0); for (uint32_t i = first_row_to_compare; i < num_rows_to_compare; ++i) { uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i; // irow_right is used to index into row data so promote to the row offset type. RowTableImpl::offset_type irow_right = left_to_right_map[irow_left]; RowTableImpl::offset_type offset_right = irow_right * fixed_length + offset_within_row; match_bytevector[i] = compare_fn(rows_left, rows_right, irow_left, offset_right); } } else { const uint8_t* rows_left = col.data(1); const RowTableImpl::offset_type* offsets_right = rows.offsets(); const uint8_t* rows_right = rows.var_length_rows(); for (uint32_t i = first_row_to_compare; i < num_rows_to_compare; ++i) { uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i; uint32_t irow_right = left_to_right_map[irow_left]; RowTableImpl::offset_type offset_right = offsets_right[irow_right] + offset_within_row; match_bytevector[i] = compare_fn(rows_left, rows_right, irow_left, offset_right); } } } template void KeyCompare::CompareBinaryColumnToRow(uint32_t offset_within_row, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows, uint8_t* match_bytevector) { uint32_t num_processed = 0; #if defined(ARROW_HAVE_RUNTIME_AVX2) if (ctx->has_avx2()) { num_processed = CompareBinaryColumnToRow_avx2( use_selection, offset_within_row, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, match_bytevector); } #endif uint32_t col_width = col.metadata().fixed_length; if (col_width == 0) { int bit_offset = col.bit_offset(1); CompareBinaryColumnToRowHelper( offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, match_bytevector, [bit_offset](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left, RowTableImpl::offset_type offset_right) { uint8_t left = bit_util::GetBit(left_base, irow_left + bit_offset) ? 0xff : 0x00; uint8_t right = right_base[offset_right]; return left == right ? 0xff : 0; }); } else if (col_width == 1) { CompareBinaryColumnToRowHelper( offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, match_bytevector, [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left, RowTableImpl::offset_type offset_right) { uint8_t left = left_base[irow_left]; uint8_t right = right_base[offset_right]; return left == right ? 0xff : 0; }); } else if (col_width == 2) { CompareBinaryColumnToRowHelper( offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, match_bytevector, [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left, RowTableImpl::offset_type offset_right) { util::CheckAlignment(left_base); util::CheckAlignment(right_base + offset_right); uint16_t left = reinterpret_cast(left_base)[irow_left]; uint16_t right = *reinterpret_cast(right_base + offset_right); return left == right ? 0xff : 0; }); } else if (col_width == 4) { CompareBinaryColumnToRowHelper( offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, match_bytevector, [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left, RowTableImpl::offset_type offset_right) { util::CheckAlignment(left_base); util::CheckAlignment(right_base + offset_right); uint32_t left = reinterpret_cast(left_base)[irow_left]; uint32_t right = *reinterpret_cast(right_base + offset_right); return left == right ? 0xff : 0; }); } else if (col_width == 8) { CompareBinaryColumnToRowHelper( offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, match_bytevector, [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left, RowTableImpl::offset_type offset_right) { util::CheckAlignment(left_base); util::CheckAlignment(right_base + offset_right); uint64_t left = reinterpret_cast(left_base)[irow_left]; uint64_t right = *reinterpret_cast(right_base + offset_right); return left == right ? 0xff : 0; }); } else { CompareBinaryColumnToRowHelper( offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, match_bytevector, [&col](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left, RowTableImpl::offset_type offset_right) { uint32_t length = col.metadata().fixed_length; // Non-zero length guarantees no underflow int32_t num_loops_less_one = static_cast(bit_util::CeilDiv(length, 8)) - 1; int32_t num_tail_bytes = length - num_loops_less_one * 8; const uint64_t* key_left_ptr = reinterpret_cast(left_base + irow_left * length); util::CheckAlignment(right_base + offset_right); const uint64_t* key_right_ptr = reinterpret_cast(right_base + offset_right); uint64_t result_or = 0; int32_t i; // length cannot be zero for (i = 0; i < num_loops_less_one; ++i) { uint64_t key_left = util::SafeLoad(key_left_ptr + i); uint64_t key_right = key_right_ptr[i]; result_or |= key_left ^ key_right; } uint64_t key_left = 0; memcpy(&key_left, key_left_ptr + i, num_tail_bytes); uint64_t key_right = 0; memcpy(&key_right, key_right_ptr + i, num_tail_bytes); result_or |= key_left ^ key_right; return result_or == 0 ? 0xff : 0; }); } } // Overwrites the match_bytevector instead of updating it template void KeyCompare::CompareVarBinaryColumnToRowHelper( uint32_t id_varbinary_col, uint32_t first_row_to_compare, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows, uint8_t* match_bytevector) { const uint32_t* offsets_left = col.offsets(); const RowTableImpl::offset_type* offsets_right = rows.offsets(); const uint8_t* rows_left = col.data(2); const uint8_t* rows_right = rows.var_length_rows(); for (uint32_t i = first_row_to_compare; i < num_rows_to_compare; ++i) { uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i; uint32_t irow_right = left_to_right_map[irow_left]; uint32_t begin_left = offsets_left[irow_left]; uint32_t length_left = offsets_left[irow_left + 1] - begin_left; RowTableImpl::offset_type begin_right = offsets_right[irow_right]; uint32_t length_right; uint32_t offset_within_row; if (!is_first_varbinary_col) { rows.metadata().nth_varbinary_offset_and_length( rows_right + begin_right, id_varbinary_col, &offset_within_row, &length_right); } else { rows.metadata().first_varbinary_offset_and_length( rows_right + begin_right, &offset_within_row, &length_right); } begin_right += offset_within_row; uint32_t length = std::min(length_left, length_right); const uint64_t* key_left_ptr = reinterpret_cast(rows_left + begin_left); util::CheckAlignment(rows_right + begin_right); const uint64_t* key_right_ptr = reinterpret_cast(rows_right + begin_right); uint64_t result_or = 0; if (length > 0) { int32_t j; // length can be zero for (j = 0; j < static_cast(bit_util::CeilDiv(length, 8)) - 1; ++j) { uint64_t key_left = util::SafeLoad(key_left_ptr + j); uint64_t key_right = key_right_ptr[j]; result_or |= key_left ^ key_right; } int32_t tail_length = length - j * 8; uint64_t tail_mask = ~0ULL >> (64 - 8 * tail_length); uint64_t key_left = 0; // NOTE: UBSAN may falsely report "misaligned load" in `std::memcpy` on some // platforms when using 64-bit pointers. Cast to an 8-bit pointer to work around // this. const uint8_t* src_bytes = reinterpret_cast(key_left_ptr + j); std::memcpy(&key_left, src_bytes, tail_length); uint64_t key_right = key_right_ptr[j]; result_or |= tail_mask & (key_left ^ key_right); } int result = result_or == 0 ? 0xff : 0; result *= (length_left == length_right ? 1 : 0); match_bytevector[i] = result; } } // Overwrites the match_bytevector instead of updating it template void KeyCompare::CompareVarBinaryColumnToRow(uint32_t id_varbinary_col, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows, uint8_t* match_bytevector) { uint32_t num_processed = 0; #if defined(ARROW_HAVE_RUNTIME_AVX2) if (ctx->has_avx2()) { num_processed = CompareVarBinaryColumnToRow_avx2( use_selection, is_first_varbinary_col, id_varbinary_col, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, match_bytevector); } #endif CompareVarBinaryColumnToRowHelper( id_varbinary_col, num_processed, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, match_bytevector); } void KeyCompare::AndByteVectors(LightContext* ctx, uint32_t num_elements, uint8_t* bytevector_A, const uint8_t* bytevector_B) { uint32_t num_processed = 0; #if defined(ARROW_HAVE_RUNTIME_AVX2) if (ctx->has_avx2()) { num_processed = AndByteVectors_avx2(num_elements, bytevector_A, bytevector_B); } #endif for (uint32_t i = num_processed / 8; i < bit_util::CeilDiv(num_elements, 8); ++i) { uint64_t* a = reinterpret_cast(bytevector_A); const uint64_t* b = reinterpret_cast(bytevector_B); a[i] &= b[i]; } } void KeyCompare::CompareColumnsToRows( uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, LightContext* ctx, uint32_t* out_num_rows, uint16_t* out_sel_left_maybe_same, const std::vector& cols, const RowTableImpl& rows, bool are_cols_in_encoding_order, uint8_t* out_match_bitvector_maybe_null) { if (num_rows_to_compare == 0) { if (out_match_bitvector_maybe_null) { DCHECK_EQ(out_num_rows, nullptr); DCHECK_EQ(out_sel_left_maybe_same, nullptr); bit_util::ClearBitmap(out_match_bitvector_maybe_null, 0, num_rows_to_compare); } else { *out_num_rows = 0; } return; } // Allocate temporary byte and bit vectors auto bytevector_A_holder = util::TempVectorHolder(ctx->stack, num_rows_to_compare); auto bytevector_B_holder = util::TempVectorHolder(ctx->stack, num_rows_to_compare); auto bitvector_holder = util::TempVectorHolder(ctx->stack, num_rows_to_compare); uint8_t* match_bytevector_A = bytevector_A_holder.mutable_data(); uint8_t* match_bytevector_B = bytevector_B_holder.mutable_data(); uint8_t* match_bitvector = bitvector_holder.mutable_data(); bool is_first_column = true; for (size_t icol = 0; icol < cols.size(); ++icol) { const KeyColumnArray& col = cols[icol]; if (col.metadata().is_null_type) { // If this null type col is the first column, the match_bytevector_A needs to be // initialized with 0xFF. Otherwise, the calculation can be skipped if (is_first_column) { std::memset(match_bytevector_A, 0xFF, num_rows_to_compare * sizeof(uint8_t)); } continue; } uint32_t offset_within_row = rows.metadata().encoded_field_offset(ColIdInEncodingOrder( rows, static_cast(icol), are_cols_in_encoding_order)); if (col.metadata().is_fixed_length) { if (sel_left_maybe_null) { CompareBinaryColumnToRow( offset_within_row, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, is_first_column ? match_bytevector_A : match_bytevector_B); NullUpdateColumnToRow( static_cast(icol), num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, is_first_column ? match_bytevector_A : match_bytevector_B); } else { // Version without using selection vector CompareBinaryColumnToRow( offset_within_row, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, is_first_column ? match_bytevector_A : match_bytevector_B); NullUpdateColumnToRow( static_cast(icol), num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, is_first_column ? match_bytevector_A : match_bytevector_B); } if (!is_first_column) { AndByteVectors(ctx, num_rows_to_compare, match_bytevector_A, match_bytevector_B); } is_first_column = false; } } uint32_t ivarbinary = 0; for (size_t icol = 0; icol < cols.size(); ++icol) { const KeyColumnArray& col = cols[icol]; if (!col.metadata().is_fixed_length) { // Process varbinary and nulls if (sel_left_maybe_null) { if (ivarbinary == 0) { CompareVarBinaryColumnToRow( ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, is_first_column ? match_bytevector_A : match_bytevector_B); } else { CompareVarBinaryColumnToRow(ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, match_bytevector_B); } NullUpdateColumnToRow( static_cast(icol), num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, is_first_column ? match_bytevector_A : match_bytevector_B); } else { if (ivarbinary == 0) { CompareVarBinaryColumnToRow( ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, is_first_column ? match_bytevector_A : match_bytevector_B); } else { CompareVarBinaryColumnToRow( ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, match_bytevector_B); } NullUpdateColumnToRow( static_cast(icol), num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, is_first_column ? match_bytevector_A : match_bytevector_B); } if (!is_first_column) { AndByteVectors(ctx, num_rows_to_compare, match_bytevector_A, match_bytevector_B); } is_first_column = false; ++ivarbinary; } } util::bit_util::bytes_to_bits(ctx->hardware_flags, num_rows_to_compare, match_bytevector_A, match_bitvector); if (out_match_bitvector_maybe_null) { DCHECK_EQ(out_num_rows, nullptr); DCHECK_EQ(out_sel_left_maybe_same, nullptr); memcpy(out_match_bitvector_maybe_null, match_bitvector, bit_util::BytesForBits(num_rows_to_compare)); } else { if (sel_left_maybe_null) { int out_num_rows_int; util::bit_util::bits_filter_indexes(0, ctx->hardware_flags, num_rows_to_compare, match_bitvector, sel_left_maybe_null, &out_num_rows_int, out_sel_left_maybe_same); *out_num_rows = out_num_rows_int; } else { int out_num_rows_int; util::bit_util::bits_to_indexes(0, ctx->hardware_flags, num_rows_to_compare, match_bitvector, &out_num_rows_int, out_sel_left_maybe_same); *out_num_rows = out_num_rows_int; } } } } // namespace compute } // namespace arrow20