1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
|
#pragma clang system_header
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <type_traits>
#include <utility>
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/util.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/buffer.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/kernels/codegen_internal.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/type_fwd.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/bit_run_reader.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/bitmap_ops.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/math_constants.h"
namespace arrow20 {
using internal::CountAndSetBits;
using internal::CountSetBits;
namespace compute {
class ScalarFunction;
namespace internal {
template <typename T>
using maybe_make_unsigned =
typename std::conditional<std::is_integral<T>::value && !std::is_same<T, bool>::value,
std::make_unsigned<T>, std::common_type<T>>::type;
template <typename T, typename Unsigned = typename maybe_make_unsigned<T>::type>
constexpr Unsigned to_unsigned(T signed_) {
return static_cast<Unsigned>(signed_);
}
// Return (min, max) of a numerical array, ignore nulls.
// For empty array, return the maximal number limit as 'min', and minimal limit as 'max'.
template <typename T>
ARROW_NOINLINE std::pair<T, T> GetMinMax(const ArraySpan& data) {
T min = std::numeric_limits<T>::max();
T max = std::numeric_limits<T>::lowest();
const T* values = data.GetValues<T>(1);
arrow20::internal::VisitSetBitRunsVoid(data.buffers[0].data, data.offset, data.length,
[&](int64_t pos, int64_t len) {
for (int64_t i = 0; i < len; ++i) {
min = std::min(min, values[pos + i]);
max = std::max(max, values[pos + i]);
}
});
return std::make_pair(min, max);
}
template <typename T>
std::pair<T, T> GetMinMax(const ChunkedArray& arr) {
T min = std::numeric_limits<T>::max();
T max = std::numeric_limits<T>::lowest();
for (const auto& chunk : arr.chunks()) {
T local_min, local_max;
std::tie(local_min, local_max) = GetMinMax<T>(*chunk->data());
min = std::min(min, local_min);
max = std::max(max, local_max);
}
return std::make_pair(min, max);
}
// Count value occurrences of an array, ignore nulls.
// 'counts' must be zeroed and with enough size.
template <typename T>
ARROW_NOINLINE int64_t CountValues(const ArraySpan& data, T min, uint64_t* counts) {
const int64_t n = data.length - data.GetNullCount();
if (n > 0) {
const T* values = data.GetValues<T>(1);
arrow20::internal::VisitSetBitRunsVoid(data.buffers[0].data, data.offset, data.length,
[&](int64_t pos, int64_t len) {
for (int64_t i = 0; i < len; ++i) {
++counts[values[pos + i] - min];
}
});
}
return n;
}
template <typename T>
int64_t CountValues(const ChunkedArray& values, T min, uint64_t* counts) {
int64_t n = 0;
for (const auto& array : values.chunks()) {
n += CountValues<T>(*array->data(), min, counts);
}
return n;
}
// Copy numerical array values to a buffer, ignore nulls.
template <typename T>
ARROW_NOINLINE int64_t CopyNonNullValues(const ArraySpan& data, T* out) {
const int64_t n = data.length - data.GetNullCount();
if (n > 0) {
int64_t index = 0;
const T* values = data.GetValues<T>(1);
arrow20::internal::VisitSetBitRunsVoid(
data.buffers[0].data, data.offset, data.length, [&](int64_t pos, int64_t len) {
memcpy(out + index, values + pos, len * sizeof(T));
index += len;
});
}
return n;
}
template <typename T>
int64_t CopyNonNullValues(const ChunkedArray& arr, T* out) {
int64_t n = 0;
for (const auto& chunk : arr.chunks()) {
n += CopyNonNullValues(*chunk->data(), out + n);
}
return n;
}
ExecValue GetExecValue(const Datum& value);
int64_t GetTrueCount(const ArraySpan& mask);
template <template <typename... Args> class KernelGenerator, typename Op>
ArrayKernelExec GenerateArithmeticFloatingPoint(detail::GetTypeId get_id) {
switch (get_id.id) {
case Type::FLOAT:
return KernelGenerator<FloatType, FloatType, Op>::Exec;
case Type::DOUBLE:
return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
default:
DCHECK(false);
return nullptr;
}
}
// A scalar kernel that ignores (assumed all-null) inputs and returns null.
void AddNullExec(ScalarFunction* func);
inline Result<std::shared_ptr<Buffer>> GetOrCopyNullBitmapBuffer(
const ArraySpan& in_array, MemoryPool* pool) {
if (in_array.buffers[0].data == nullptr) {
return nullptr;
}
if (in_array.offset == 0) {
return in_array.GetBuffer(0);
}
if (in_array.offset % 8 == 0) {
return SliceBuffer(in_array.GetBuffer(0), /*offset=*/in_array.offset / 8);
}
// If a non-zero offset, we need to shift the bitmap
return arrow20::internal::CopyBitmap(pool, in_array.buffers[0].data, in_array.offset,
in_array.length);
}
} // namespace internal
} // namespace compute
} // namespace arrow20
|