summaryrefslogtreecommitdiffstats
path: root/contrib/libs/apache/arrow_next/cpp/src/arrow/compute/row/compare_internal.h
blob: 503cf0592198127ad2d380ef1bbf0d3f087b9f1c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#pragma clang system_header
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cstdint>

#include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/light_array_internal.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/row/encode_internal.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/row/row_internal.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/util.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/memory_pool.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/result.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/status.h"

namespace arrow20 {
namespace compute {

class ARROW_EXPORT KeyCompare {
 public:
  // Clarify the max temp stack usage for CompareColumnsToRows, which might be necessary
  // for the caller to be aware of (possibly at compile time) to reserve enough stack size
  // in advance. The CompareColumnsToRows implementation uses three uint8 temp vectors as
  // buffers for match vectors, all are of size num_rows. Plus extra kMiniBatchLength to
  // cope with stack padding and aligning.
  constexpr static int64_t CompareColumnsToRowsTempStackUsage(int64_t num_rows) {
    return (sizeof(uint8_t) + sizeof(uint8_t) + sizeof(uint8_t)) * num_rows +
           /*extra=*/util::MiniBatch::kMiniBatchLength;
  }

  /// \brief Compare a batch of rows in columnar format to the specified rows in row
  /// format.
  ///
  /// The comparison result is populated in either a 16-bit selection vector of rows that
  /// failed comparison, or a match bitvector with 1 for matched rows and 0 otherwise.
  ///
  /// @param num_rows_to_compare The number of rows to compare.
  /// @param sel_left_maybe_null Optional input selection vector on the left, the
  ///        comparison is only performed on the selected rows. Null if all rows in
  ///        `left_to_right_map` are to be compared.
  /// @param left_to_right_map The mapping from the left to the right rows. Left row `i`
  ///        in `cols` is compared to right row `left_to_right_map[i]` in `row`.
  /// @param ctx The light context needed for the comparison.
  /// @param out_num_rows The number of rows that failed comparison. Must be null if
  ///        `out_match_bitvector_maybe_null` is not null.
  /// @param out_sel_left_maybe_same The selection vector of rows that failed comparison.
  ///        Can be the same as `sel_left_maybe_null` for in-place update. Must be null if
  ///        `out_match_bitvector_maybe_null` is not null.
  /// @param cols The left rows in columnar format to compare.
  /// @param rows The right rows in row format to compare.
  /// @param are_cols_in_encoding_order Whether the columns are in encoding order.
  /// @param out_match_bitvector_maybe_null The optional output match bitvector, 1 for
  ///        matched rows and 0 otherwise. Won't be populated if `out_num_rows` and
  ///        `out_sel_left_maybe_same` are not null.
  static void CompareColumnsToRows(
      uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
      const uint32_t* left_to_right_map, LightContext* ctx, uint32_t* out_num_rows,
      uint16_t* out_sel_left_maybe_same, const std::vector<KeyColumnArray>& cols,
      const RowTableImpl& rows, bool are_cols_in_encoding_order,
      uint8_t* out_match_bitvector_maybe_null = NULLPTR);

 private:
  static uint32_t ColIdInEncodingOrder(const RowTableImpl& rows, uint32_t id_col,
                                       bool are_cols_in_encoding_order) {
    return are_cols_in_encoding_order ? id_col
                                      : rows.metadata().pos_after_encoding(id_col);
  }

  template <bool use_selection>
  static void NullUpdateColumnToRow(uint32_t id_col, uint32_t num_rows_to_compare,
                                    const uint16_t* sel_left_maybe_null,
                                    const uint32_t* left_to_right_map, LightContext* ctx,
                                    const KeyColumnArray& col, const RowTableImpl& rows,
                                    bool are_cols_in_encoding_order,
                                    uint8_t* match_bytevector);

  template <bool use_selection, class COMPARE_FN>
  static void CompareBinaryColumnToRowHelper(
      uint32_t offset_within_row, uint32_t first_row_to_compare,
      uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
      const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col,
      const RowTableImpl& rows, uint8_t* match_bytevector, COMPARE_FN compare_fn);

  template <bool use_selection>
  static void CompareBinaryColumnToRow(uint32_t offset_within_row,
                                       uint32_t num_rows_to_compare,
                                       const uint16_t* sel_left_maybe_null,
                                       const uint32_t* left_to_right_map,
                                       LightContext* ctx, const KeyColumnArray& col,
                                       const RowTableImpl& rows,
                                       uint8_t* match_bytevector);

  template <bool use_selection, bool is_first_varbinary_col>
  static void CompareVarBinaryColumnToRowHelper(
      uint32_t id_varlen_col, uint32_t first_row_to_compare, uint32_t num_rows_to_compare,
      const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
      LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows,
      uint8_t* match_bytevector);

  template <bool use_selection, bool is_first_varbinary_col>
  static void CompareVarBinaryColumnToRow(uint32_t id_varlen_col,
                                          uint32_t num_rows_to_compare,
                                          const uint16_t* sel_left_maybe_null,
                                          const uint32_t* left_to_right_map,
                                          LightContext* ctx, const KeyColumnArray& col,
                                          const RowTableImpl& rows,
                                          uint8_t* match_bytevector);

  static void AndByteVectors(LightContext* ctx, uint32_t num_elements,
                             uint8_t* bytevector_A, const uint8_t* bytevector_B);

#if defined(ARROW_HAVE_RUNTIME_AVX2)

  template <bool use_selection>
  static uint32_t NullUpdateColumnToRowImp_avx2(
      uint32_t id_col, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
      const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col,
      const RowTableImpl& rows, bool are_cols_in_encoding_order,
      uint8_t* match_bytevector);

  template <bool use_selection, class COMPARE8_FN>
  static uint32_t CompareBinaryColumnToRowHelper_avx2(
      uint32_t offset_within_row, uint32_t num_rows_to_compare,
      const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
      LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows,
      uint8_t* match_bytevector, COMPARE8_FN compare8_fn);

  template <bool use_selection>
  static uint32_t CompareBinaryColumnToRowImp_avx2(
      uint32_t offset_within_row, uint32_t num_rows_to_compare,
      const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
      LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows,
      uint8_t* match_bytevector);

  template <bool use_selection, bool is_first_varbinary_col>
  static void CompareVarBinaryColumnToRowImp_avx2(
      uint32_t id_varlen_col, uint32_t num_rows_to_compare,
      const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
      LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows,
      uint8_t* match_bytevector);

  static uint32_t AndByteVectors_avx2(uint32_t num_elements, uint8_t* bytevector_A,
                                      const uint8_t* bytevector_B);

  static uint32_t NullUpdateColumnToRow_avx2(
      bool use_selection, uint32_t id_col, uint32_t num_rows_to_compare,
      const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
      LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows,
      bool are_cols_in_encoding_order, uint8_t* match_bytevector);

  static uint32_t CompareBinaryColumnToRow_avx2(
      bool use_selection, uint32_t offset_within_row, uint32_t num_rows_to_compare,
      const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
      LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows,
      uint8_t* match_bytevector);

  static uint32_t CompareVarBinaryColumnToRow_avx2(
      bool use_selection, bool is_first_varbinary_col, uint32_t id_varlen_col,
      uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
      const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col,
      const RowTableImpl& rows, uint8_t* match_bytevector);

#endif
};

}  // namespace compute
}  // namespace arrow20