#pragma clang system_header // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/util.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/buffer.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/kernels/codegen_internal.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/type_fwd.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/bit_run_reader.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/bitmap_ops.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/math_constants.h" namespace arrow20 { using internal::CountAndSetBits; using internal::CountSetBits; namespace compute { class ScalarFunction; namespace internal { template using maybe_make_unsigned = typename std::conditional::value && !std::is_same::value, std::make_unsigned, std::common_type>::type; template ::type> constexpr Unsigned to_unsigned(T signed_) { return static_cast(signed_); } // Return (min, max) of a numerical array, ignore nulls. // For empty array, return the maximal number limit as 'min', and minimal limit as 'max'. template ARROW_NOINLINE std::pair GetMinMax(const ArraySpan& data) { T min = std::numeric_limits::max(); T max = std::numeric_limits::lowest(); const T* values = data.GetValues(1); arrow20::internal::VisitSetBitRunsVoid(data.buffers[0].data, data.offset, data.length, [&](int64_t pos, int64_t len) { for (int64_t i = 0; i < len; ++i) { min = std::min(min, values[pos + i]); max = std::max(max, values[pos + i]); } }); return std::make_pair(min, max); } template std::pair GetMinMax(const ChunkedArray& arr) { T min = std::numeric_limits::max(); T max = std::numeric_limits::lowest(); for (const auto& chunk : arr.chunks()) { T local_min, local_max; std::tie(local_min, local_max) = GetMinMax(*chunk->data()); min = std::min(min, local_min); max = std::max(max, local_max); } return std::make_pair(min, max); } // Count value occurrences of an array, ignore nulls. // 'counts' must be zeroed and with enough size. template ARROW_NOINLINE int64_t CountValues(const ArraySpan& data, T min, uint64_t* counts) { const int64_t n = data.length - data.GetNullCount(); if (n > 0) { const T* values = data.GetValues(1); arrow20::internal::VisitSetBitRunsVoid(data.buffers[0].data, data.offset, data.length, [&](int64_t pos, int64_t len) { for (int64_t i = 0; i < len; ++i) { ++counts[values[pos + i] - min]; } }); } return n; } template int64_t CountValues(const ChunkedArray& values, T min, uint64_t* counts) { int64_t n = 0; for (const auto& array : values.chunks()) { n += CountValues(*array->data(), min, counts); } return n; } // Copy numerical array values to a buffer, ignore nulls. template ARROW_NOINLINE int64_t CopyNonNullValues(const ArraySpan& data, T* out) { const int64_t n = data.length - data.GetNullCount(); if (n > 0) { int64_t index = 0; const T* values = data.GetValues(1); arrow20::internal::VisitSetBitRunsVoid( data.buffers[0].data, data.offset, data.length, [&](int64_t pos, int64_t len) { memcpy(out + index, values + pos, len * sizeof(T)); index += len; }); } return n; } template int64_t CopyNonNullValues(const ChunkedArray& arr, T* out) { int64_t n = 0; for (const auto& chunk : arr.chunks()) { n += CopyNonNullValues(*chunk->data(), out + n); } return n; } ExecValue GetExecValue(const Datum& value); int64_t GetTrueCount(const ArraySpan& mask); template