1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
|
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/builder.h"
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/status.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/type.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/checked_cast.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/hashing.h"
#include "contrib/libs/apache/arrow_next/cpp/src/arrow/visit_type_inline.h"
namespace arrow20 {
class MemoryPool;
// ----------------------------------------------------------------------
// Helper functions
using arrow20::internal::checked_cast;
// Generic int builder that delegates to the builder for a specific
// type. Used to reduce the number of template instantiations in the
// exact_index_type case below, to reduce build time and memory usage.
class ARROW_EXPORT TypeErasedIntBuilder : public ArrayBuilder {
public:
explicit TypeErasedIntBuilder(MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: ArrayBuilder(pool, alignment) {
// Not intended to be used, but adding this is easier than adding a bunch of enable_if
// magic to builder_dict.h
DCHECK(false);
}
explicit TypeErasedIntBuilder(const std::shared_ptr<DataType>& type,
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: ArrayBuilder(pool), type_id_(type->id()) {
DCHECK(is_integer(type_id_));
switch (type_id_) {
case Type::UINT8:
builder_ = std::make_unique<UInt8Builder>(pool);
break;
case Type::INT8:
builder_ = std::make_unique<Int8Builder>(pool);
break;
case Type::UINT16:
builder_ = std::make_unique<UInt16Builder>(pool);
break;
case Type::INT16:
builder_ = std::make_unique<Int16Builder>(pool);
break;
case Type::UINT32:
builder_ = std::make_unique<UInt32Builder>(pool);
break;
case Type::INT32:
builder_ = std::make_unique<Int32Builder>(pool);
break;
case Type::UINT64:
builder_ = std::make_unique<UInt64Builder>(pool);
break;
case Type::INT64:
builder_ = std::make_unique<Int64Builder>(pool);
break;
default:
DCHECK(false);
}
}
void Reset() override { return builder_->Reset(); }
Status Append(int32_t value) {
switch (type_id_) {
case Type::UINT8:
return checked_cast<UInt8Builder*>(builder_.get())->Append(value);
case Type::INT8:
return checked_cast<Int8Builder*>(builder_.get())->Append(value);
case Type::UINT16:
return checked_cast<UInt16Builder*>(builder_.get())->Append(value);
case Type::INT16:
return checked_cast<Int16Builder*>(builder_.get())->Append(value);
case Type::UINT32:
return checked_cast<UInt32Builder*>(builder_.get())->Append(value);
case Type::INT32:
return checked_cast<Int32Builder*>(builder_.get())->Append(value);
case Type::UINT64:
return checked_cast<UInt64Builder*>(builder_.get())->Append(value);
case Type::INT64:
return checked_cast<Int64Builder*>(builder_.get())->Append(value);
default:
DCHECK(false);
}
return Status::NotImplemented("Internal implementation error");
}
Status AppendNull() override { return builder_->AppendNull(); }
Status AppendNulls(int64_t length) override { return builder_->AppendNulls(length); }
Status AppendEmptyValue() override { return builder_->AppendEmptyValue(); }
Status AppendEmptyValues(int64_t length) override {
return builder_->AppendEmptyValues(length);
}
Status AppendScalar(const Scalar& scalar, int64_t n_repeats) override {
return builder_->AppendScalar(scalar, n_repeats);
}
Status AppendScalars(const ScalarVector& scalars) override {
return builder_->AppendScalars(scalars);
}
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
return builder_->AppendArraySlice(array, offset, length);
}
Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
return builder_->FinishInternal(out);
}
std::shared_ptr<DataType> type() const override { return builder_->type(); }
private:
std::unique_ptr<ArrayBuilder> builder_;
Type::type type_id_;
};
struct DictionaryBuilderCase {
template <typename ValueType, typename Enable = typename ValueType::c_type>
Status Visit(const ValueType&) {
return CreateFor<ValueType>();
}
Status Visit(const NullType&) { return CreateFor<NullType>(); }
Status Visit(const BinaryType&) { return CreateFor<BinaryType>(); }
Status Visit(const StringType&) { return CreateFor<StringType>(); }
Status Visit(const LargeBinaryType&) { return CreateFor<LargeBinaryType>(); }
Status Visit(const LargeStringType&) { return CreateFor<LargeStringType>(); }
Status Visit(const BinaryViewType&) { return CreateFor<BinaryViewType>(); }
Status Visit(const StringViewType&) { return CreateFor<StringViewType>(); }
Status Visit(const FixedSizeBinaryType&) { return CreateFor<FixedSizeBinaryType>(); }
Status Visit(const Decimal32Type&) { return CreateFor<Decimal32Type>(); }
Status Visit(const Decimal64Type&) { return CreateFor<Decimal64Type>(); }
Status Visit(const Decimal128Type&) { return CreateFor<Decimal128Type>(); }
Status Visit(const Decimal256Type&) { return CreateFor<Decimal256Type>(); }
Status Visit(const DataType& value_type) { return NotImplemented(value_type); }
Status Visit(const HalfFloatType& value_type) { return NotImplemented(value_type); }
Status NotImplemented(const DataType& value_type) {
return Status::NotImplemented(
"MakeBuilder: cannot construct builder for dictionaries with value type ",
value_type);
}
template <typename ValueType>
Status CreateFor() {
using AdaptiveBuilderType = DictionaryBuilder<ValueType>;
if (dictionary != nullptr) {
out->reset(new AdaptiveBuilderType(dictionary, pool));
} else if (exact_index_type) {
if (!is_integer(index_type->id())) {
return Status::TypeError("MakeBuilder: invalid index type ", *index_type);
}
out->reset(new internal::DictionaryBuilderBase<TypeErasedIntBuilder, ValueType>(
index_type, value_type, pool));
} else {
auto start_int_size = index_type->byte_width();
out->reset(new AdaptiveBuilderType(start_int_size, value_type, pool));
}
return Status::OK();
}
Status Make() { return VisitTypeInline(*value_type, this); }
MemoryPool* pool;
const std::shared_ptr<DataType>& index_type;
const std::shared_ptr<DataType>& value_type;
const std::shared_ptr<Array>& dictionary;
bool exact_index_type;
std::unique_ptr<ArrayBuilder>* out;
};
struct MakeBuilderImpl {
template <typename T>
enable_if_not_nested<T, Status> Visit(const T& t) {
out.reset(new typename TypeTraits<T>::BuilderType(type, pool));
return Status::OK();
}
Status Visit(const DictionaryType& dict_type) {
DictionaryBuilderCase visitor = {pool,
dict_type.index_type(),
dict_type.value_type(),
/*dictionary=*/nullptr,
exact_index_type,
&out};
return visitor.Make();
}
Status Visit(const ListType& list_type) {
std::shared_ptr<DataType> value_type = list_type.value_type();
ARROW_ASSIGN_OR_RAISE(auto value_builder, ChildBuilder(value_type));
out.reset(new ListBuilder(pool, std::move(value_builder), type));
return Status::OK();
}
Status Visit(const LargeListType& list_type) {
std::shared_ptr<DataType> value_type = list_type.value_type();
ARROW_ASSIGN_OR_RAISE(auto value_builder, ChildBuilder(value_type));
out.reset(new LargeListBuilder(pool, std::move(value_builder), type));
return Status::OK();
}
Status Visit(const ListViewType& list_view_type) {
std::shared_ptr<DataType> value_type = list_view_type.value_type();
ARROW_ASSIGN_OR_RAISE(auto value_builder, ChildBuilder(value_type));
out.reset(new ListViewBuilder(pool, std::move(value_builder), std::move(type)));
return Status::OK();
}
Status Visit(const LargeListViewType& large_list_view_type) {
std::shared_ptr<DataType> value_type = large_list_view_type.value_type();
ARROW_ASSIGN_OR_RAISE(auto value_builder, ChildBuilder(value_type));
out.reset(new LargeListViewBuilder(pool, std::move(value_builder), std::move(type)));
return Status::OK();
}
Status Visit(const MapType& map_type) {
ARROW_ASSIGN_OR_RAISE(auto key_builder, ChildBuilder(map_type.key_type()));
ARROW_ASSIGN_OR_RAISE(auto item_builder, ChildBuilder(map_type.item_type()));
out.reset(
new MapBuilder(pool, std::move(key_builder), std::move(item_builder), type));
return Status::OK();
}
Status Visit(const FixedSizeListType& list_type) {
auto value_type = list_type.value_type();
ARROW_ASSIGN_OR_RAISE(auto value_builder, ChildBuilder(value_type));
out.reset(new FixedSizeListBuilder(pool, std::move(value_builder), type));
return Status::OK();
}
Status Visit(const StructType& struct_type) {
ARROW_ASSIGN_OR_RAISE(auto field_builders, FieldBuilders(*type, pool));
out.reset(new StructBuilder(type, pool, std::move(field_builders)));
return Status::OK();
}
Status Visit(const SparseUnionType&) {
ARROW_ASSIGN_OR_RAISE(auto field_builders, FieldBuilders(*type, pool));
out.reset(new SparseUnionBuilder(pool, std::move(field_builders), type));
return Status::OK();
}
Status Visit(const DenseUnionType&) {
ARROW_ASSIGN_OR_RAISE(auto field_builders, FieldBuilders(*type, pool));
out.reset(new DenseUnionBuilder(pool, std::move(field_builders), type));
return Status::OK();
}
Status Visit(const RunEndEncodedType& ree_type) {
ARROW_ASSIGN_OR_RAISE(auto run_end_builder, ChildBuilder(ree_type.run_end_type()));
ARROW_ASSIGN_OR_RAISE(auto value_builder, ChildBuilder(ree_type.value_type()));
out.reset(new RunEndEncodedBuilder(pool, std::move(run_end_builder),
std::move(value_builder), type));
return Status::OK();
}
Status Visit(const ExtensionType&) { return NotImplemented(); }
Status Visit(const DataType&) { return NotImplemented(); }
Status NotImplemented() {
return Status::NotImplemented("MakeBuilder: cannot construct builder for type ",
type->ToString());
}
Result<std::unique_ptr<ArrayBuilder>> ChildBuilder(
const std::shared_ptr<DataType>& type) {
MakeBuilderImpl impl{pool, type, exact_index_type, /*out=*/nullptr};
RETURN_NOT_OK(VisitTypeInline(*type, &impl));
return std::move(impl.out);
}
Result<std::vector<std::shared_ptr<ArrayBuilder>>> FieldBuilders(const DataType& type,
MemoryPool* pool) {
std::vector<std::shared_ptr<ArrayBuilder>> field_builders;
for (const auto& field : type.fields()) {
std::unique_ptr<ArrayBuilder> builder;
MakeBuilderImpl impl{pool, field->type(), exact_index_type, /*out=*/nullptr};
RETURN_NOT_OK(VisitTypeInline(*field->type(), &impl));
field_builders.emplace_back(std::move(impl.out));
}
return field_builders;
}
MemoryPool* pool;
const std::shared_ptr<DataType>& type;
bool exact_index_type;
std::unique_ptr<ArrayBuilder> out;
};
Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
std::unique_ptr<ArrayBuilder>* out) {
MakeBuilderImpl impl{pool, type, /*exact_index_type=*/false, /*out=*/nullptr};
RETURN_NOT_OK(VisitTypeInline(*type, &impl));
*out = std::move(impl.out);
return Status::OK();
}
Status MakeBuilderExactIndex(MemoryPool* pool, const std::shared_ptr<DataType>& type,
std::unique_ptr<ArrayBuilder>* out) {
MakeBuilderImpl impl{pool, type, /*exact_index_type=*/true, /*out=*/nullptr};
RETURN_NOT_OK(VisitTypeInline(*type, &impl));
*out = std::move(impl.out);
return Status::OK();
}
Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
const std::shared_ptr<Array>& dictionary,
std::unique_ptr<ArrayBuilder>* out) {
const auto& dict_type = static_cast<const DictionaryType&>(*type);
DictionaryBuilderCase visitor = {
pool, dict_type.index_type(), dict_type.value_type(),
dictionary, /*exact_index_type=*/false, out};
return visitor.Make();
}
} // namespace arrow20
|