summaryrefslogtreecommitdiffstats
path: root/contrib/libs/apache/arrow_next/cpp/src/arrow/array/array_base.h
blob: 73115bdea264ee29085beb2ea741b82f9198725f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#pragma clang system_header
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cstdint>
#include <iosfwd>
#include <memory>
#include <string>
#include <vector>

#include "arrow/array/data.h"
#include "arrow/buffer.h"
#include "arrow/compare.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
#include "arrow/visitor.h"

namespace arrow20 {

// ----------------------------------------------------------------------
// User array accessor types

/// \brief Array base type
/// Immutable data array with some logical type and some length.
///
/// Any memory is owned by the respective Buffer instance (or its parents).
///
/// The base class is only required to have a null bitmap buffer if the null
/// count is greater than 0
///
/// If known, the null count can be provided in the base Array constructor. If
/// the null count is not known, pass -1 to indicate that the null count is to
/// be computed on the first call to null_count()
class ARROW_EXPORT Array {
 public:
  virtual ~Array() = default;

  /// \brief Return true if value at index is null. Does not boundscheck
  bool IsNull(int64_t i) const { return !IsValid(i); }

  /// \brief Return true if value at index is valid (not null). Does not
  /// boundscheck
  bool IsValid(int64_t i) const {
    if (null_bitmap_data_ != NULLPTR) {
      return bit_util::GetBit(null_bitmap_data_, i + data_->offset);
    }
    // Dispatching with a few conditionals like this makes IsNull more
    // efficient for how it is used in practice. Making IsNull virtual
    // would add a vtable lookup to every call and prevent inlining +
    // a potential inner-branch removal.
    if (type_id() == Type::SPARSE_UNION) {
      return !internal::IsNullSparseUnion(*data_, i);
    }
    if (type_id() == Type::DENSE_UNION) {
      return !internal::IsNullDenseUnion(*data_, i);
    }
    if (type_id() == Type::RUN_END_ENCODED) {
      return !internal::IsNullRunEndEncoded(*data_, i);
    }
    return data_->null_count != data_->length;
  }

  /// \brief Return a Scalar containing the value of this array at i
  Result<std::shared_ptr<Scalar>> GetScalar(int64_t i) const;

  /// Size in the number of elements this array contains.
  int64_t length() const { return data_->length; }

  /// A relative position into another array's data, to enable zero-copy
  /// slicing. This value defaults to zero
  int64_t offset() const { return data_->offset; }

  /// The number of null entries in the array. If the null count was not known
  /// at time of construction (and set to a negative value), then the null
  /// count will be computed and cached on the first invocation of this
  /// function
  int64_t null_count() const;

  /// \brief Computes the logical null count for arrays of all types including
  /// those that do not have a validity bitmap like union and run-end encoded
  /// arrays
  ///
  /// If the array has a validity bitmap, this function behaves the same as
  /// null_count(). For types that have no validity bitmap, this function will
  /// recompute the null count every time it is called.
  ///
  /// \see GetNullCount
  int64_t ComputeLogicalNullCount() const;

  const std::shared_ptr<DataType>& type() const { return data_->type; }
  Type::type type_id() const { return data_->type->id(); }

  /// Buffer for the validity (null) bitmap, if any. Note that Union types
  /// never have a null bitmap.
  ///
  /// Note that for `null_count == 0` or for null type, this will be null.
  /// This buffer does not account for any slice offset
  const std::shared_ptr<Buffer>& null_bitmap() const { return data_->buffers[0]; }

  /// Raw pointer to the null bitmap.
  ///
  /// Note that for `null_count == 0` or for null type, this will be null.
  /// This buffer does not account for any slice offset
  const uint8_t* null_bitmap_data() const { return null_bitmap_data_; }

  /// Equality comparison with another array
  bool Equals(const Array& arr, const EqualOptions& = EqualOptions::Defaults()) const;
  bool Equals(const std::shared_ptr<Array>& arr,
              const EqualOptions& = EqualOptions::Defaults()) const;

  /// \brief Return the formatted unified diff of arrow20::Diff between this
  /// Array and another Array
  std::string Diff(const Array& other) const;

  /// Approximate equality comparison with another array
  ///
  /// epsilon is only used if this is FloatArray or DoubleArray
  bool ApproxEquals(const std::shared_ptr<Array>& arr,
                    const EqualOptions& = EqualOptions::Defaults()) const;
  bool ApproxEquals(const Array& arr,
                    const EqualOptions& = EqualOptions::Defaults()) const;

  /// Compare if the range of slots specified are equal for the given array and
  /// this array.  end_idx exclusive.  This methods does not bounds check.
  bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
                   const Array& other,
                   const EqualOptions& = EqualOptions::Defaults()) const;
  bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
                   const std::shared_ptr<Array>& other,
                   const EqualOptions& = EqualOptions::Defaults()) const;
  bool RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx,
                   int64_t other_start_idx,
                   const EqualOptions& = EqualOptions::Defaults()) const;
  bool RangeEquals(const std::shared_ptr<Array>& other, int64_t start_idx,
                   int64_t end_idx, int64_t other_start_idx,
                   const EqualOptions& = EqualOptions::Defaults()) const;

  /// \brief Apply the ArrayVisitor::Visit() method specialized to the array type
  Status Accept(ArrayVisitor* visitor) const;

  /// Construct a zero-copy view of this array with the given type.
  ///
  /// This method checks if the types are layout-compatible.
  /// Nested types are traversed in depth-first order. Data buffers must have
  /// the same item sizes, even though the logical types may be different.
  /// An error is returned if the types are not layout-compatible.
  Result<std::shared_ptr<Array>> View(const std::shared_ptr<DataType>& type) const;

  /// \brief Construct a copy of the array with all buffers on destination
  /// Memory Manager
  ///
  /// This method recursively copies the array's buffers and those of its children
  /// onto the destination MemoryManager device and returns the new Array.
  Result<std::shared_ptr<Array>> CopyTo(const std::shared_ptr<MemoryManager>& to) const;

  /// \brief Construct a new array attempting to zero-copy view if possible.
  ///
  /// Like CopyTo this method recursively goes through all of the array's buffers
  /// and those of it's children and first attempts to create zero-copy
  /// views on the destination MemoryManager device. If it can't, it falls back
  /// to performing a copy. See Buffer::ViewOrCopy.
  Result<std::shared_ptr<Array>> ViewOrCopyTo(
      const std::shared_ptr<MemoryManager>& to) const;

  /// Construct a zero-copy slice of the array with the indicated offset and
  /// length
  ///
  /// \param[in] offset the position of the first element in the constructed
  /// slice
  /// \param[in] length the length of the slice. If there are not enough
  /// elements in the array, the length will be adjusted accordingly
  ///
  /// \return a new object wrapped in std::shared_ptr<Array>
  std::shared_ptr<Array> Slice(int64_t offset, int64_t length) const;

  /// Slice from offset until end of the array
  std::shared_ptr<Array> Slice(int64_t offset) const;

  /// Input-checking variant of Array::Slice
  Result<std::shared_ptr<Array>> SliceSafe(int64_t offset, int64_t length) const;
  /// Input-checking variant of Array::Slice
  Result<std::shared_ptr<Array>> SliceSafe(int64_t offset) const;

  const std::shared_ptr<ArrayData>& data() const { return data_; }

  int num_fields() const { return static_cast<int>(data_->child_data.size()); }

  /// \return PrettyPrint representation of array suitable for debugging
  std::string ToString() const;

  /// \brief Perform cheap validation checks to determine obvious inconsistencies
  /// within the array's internal data.
  ///
  /// This is O(k) where k is the number of descendents.
  ///
  /// \return Status
  Status Validate() const;

  /// \brief Perform extensive validation checks to determine inconsistencies
  /// within the array's internal data.
  ///
  /// This is potentially O(k*n) where k is the number of descendents and n
  /// is the array length.
  ///
  /// \return Status
  Status ValidateFull() const;

  /// \brief Return the device_type that this array's data is allocated on
  ///
  /// This just delegates to calling device_type on the underlying ArrayData
  /// object which backs this Array.
  ///
  /// \return DeviceAllocationType
  DeviceAllocationType device_type() const { return data_->device_type(); }

  /// \brief Return the statistics of this Array
  ///
  /// This just delegates to calling statistics on the underlying ArrayData
  /// object which backs this Array.
  ///
  /// \return const std::shared_ptr<ArrayStatistics>&
  const std::shared_ptr<ArrayStatistics>& statistics() const { return data_->statistics; }

 protected:
  Array() = default;
  ARROW_DEFAULT_MOVE_AND_ASSIGN(Array);

  std::shared_ptr<ArrayData> data_;
  const uint8_t* null_bitmap_data_ = NULLPTR;

  /// Protected method for constructors
  void SetData(const std::shared_ptr<ArrayData>& data) {
    if (data->buffers.size() > 0) {
      null_bitmap_data_ = data->GetValuesSafe<uint8_t>(0, /*offset=*/0);
    } else {
      null_bitmap_data_ = NULLPTR;
    }
    data_ = data;
  }

 private:
  ARROW_DISALLOW_COPY_AND_ASSIGN(Array);

  ARROW_FRIEND_EXPORT friend void PrintTo(const Array& x, std::ostream* os);
};

static inline std::ostream& operator<<(std::ostream& os, const Array& x) {
  os << x.ToString();
  return os;
}

/// Base class for non-nested arrays
class ARROW_EXPORT FlatArray : public Array {
 protected:
  using Array::Array;
};

/// Base class for arrays of fixed-size logical types
class ARROW_EXPORT PrimitiveArray : public FlatArray {
 public:
  /// Does not account for any slice offset
  const std::shared_ptr<Buffer>& values() const { return data_->buffers[1]; }

 protected:
  PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t length,
                 const std::shared_ptr<Buffer>& data,
                 const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
                 int64_t null_count = kUnknownNullCount, int64_t offset = 0);

  PrimitiveArray() : raw_values_(NULLPTR) {}

  void SetData(const std::shared_ptr<ArrayData>& data) {
    this->Array::SetData(data);
    raw_values_ = data->GetValuesSafe<uint8_t>(1, /*offset=*/0);
  }

  explicit PrimitiveArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }

  const uint8_t* raw_values_;
};

/// Degenerate null type Array
class ARROW_EXPORT NullArray : public FlatArray {
 public:
  using TypeClass = NullType;

  explicit NullArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
  explicit NullArray(int64_t length);

 private:
  void SetData(const std::shared_ptr<ArrayData>& data) {
    null_bitmap_data_ = NULLPTR;
    data->null_count = data->length;
    data_ = data;
  }
};

}  // namespace arrow20