// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/api_vector.h" #include #include #include #include #include #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/array_nested.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/builder_primitive.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/exec.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/function.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/function_internal.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/kernels/vector_sort_internal.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/registry.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/datum.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/record_batch.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/result.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/checked_cast.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/logging.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/reflection_internal.h" namespace arrow20 { using internal::checked_cast; using internal::checked_pointer_cast; namespace internal { using compute::DictionaryEncodeOptions; using compute::FilterOptions; using compute::NullPlacement; using compute::RankOptions; using compute::RankQuantileOptions; template <> struct EnumTraits : BasicEnumTraits { static std::string name() { return "FilterOptions::NullSelectionBehavior"; } static std::string value_name(FilterOptions::NullSelectionBehavior value) { switch (value) { case FilterOptions::DROP: return "DROP"; case FilterOptions::EMIT_NULL: return "EMIT_NULL"; } return ""; } }; template <> struct EnumTraits : BasicEnumTraits { static std::string name() { return "DictionaryEncodeOptions::NullEncodingBehavior"; } static std::string value_name(DictionaryEncodeOptions::NullEncodingBehavior value) { switch (value) { case DictionaryEncodeOptions::ENCODE: return "ENCODE"; case DictionaryEncodeOptions::MASK: return "MASK"; } return ""; } }; template <> struct EnumTraits : BasicEnumTraits { static std::string name() { return "NullPlacement"; } static std::string value_name(NullPlacement value) { switch (value) { case NullPlacement::AtStart: return "AtStart"; case NullPlacement::AtEnd: return "AtEnd"; } return ""; } }; template <> struct EnumTraits : BasicEnumTraits { static std::string name() { return "Tiebreaker"; } static std::string value_name(RankOptions::Tiebreaker value) { switch (value) { case RankOptions::Min: return "Min"; case RankOptions::Max: return "Max"; case RankOptions::First: return "First"; case RankOptions::Dense: return "Dense"; } return ""; } }; } // namespace internal namespace compute { // ---------------------------------------------------------------------- // Function options namespace internal { namespace { using ::arrow20::internal::DataMember; static auto kFilterOptionsType = GetFunctionOptionsType( DataMember("null_selection_behavior", &FilterOptions::null_selection_behavior)); static auto kTakeOptionsType = GetFunctionOptionsType( DataMember("boundscheck", &TakeOptions::boundscheck)); static auto kDictionaryEncodeOptionsType = GetFunctionOptionsType(DataMember( "null_encoding_behavior", &DictionaryEncodeOptions::null_encoding_behavior)); static auto kRunEndEncodeOptionsType = GetFunctionOptionsType( DataMember("run_end_type", &RunEndEncodeOptions::run_end_type)); static auto kArraySortOptionsType = GetFunctionOptionsType( DataMember("order", &ArraySortOptions::order), DataMember("null_placement", &ArraySortOptions::null_placement)); static auto kSortOptionsType = GetFunctionOptionsType( DataMember("sort_keys", &SortOptions::sort_keys), DataMember("null_placement", &SortOptions::null_placement)); static auto kPartitionNthOptionsType = GetFunctionOptionsType( DataMember("pivot", &PartitionNthOptions::pivot), DataMember("null_placement", &PartitionNthOptions::null_placement)); static auto kWinsorizeOptionsType = GetFunctionOptionsType( DataMember("lower_limit", &WinsorizeOptions::lower_limit), DataMember("upper_limit", &WinsorizeOptions::upper_limit)); static auto kSelectKOptionsType = GetFunctionOptionsType( DataMember("k", &SelectKOptions::k), DataMember("sort_keys", &SelectKOptions::sort_keys)); static auto kCumulativeOptionsType = GetFunctionOptionsType( DataMember("start", &CumulativeOptions::start), DataMember("skip_nulls", &CumulativeOptions::skip_nulls)); static auto kRankOptionsType = GetFunctionOptionsType( DataMember("sort_keys", &RankOptions::sort_keys), DataMember("null_placement", &RankOptions::null_placement), DataMember("tiebreaker", &RankOptions::tiebreaker)); static auto kRankQuantileOptionsType = GetFunctionOptionsType( DataMember("sort_keys", &RankQuantileOptions::sort_keys), DataMember("null_placement", &RankQuantileOptions::null_placement)); static auto kPairwiseOptionsType = GetFunctionOptionsType( DataMember("periods", &PairwiseOptions::periods)); static auto kListFlattenOptionsType = GetFunctionOptionsType( DataMember("recursive", &ListFlattenOptions::recursive)); static auto kInversePermutationOptionsType = GetFunctionOptionsType( DataMember("max_index", &InversePermutationOptions::max_index), DataMember("output_type", &InversePermutationOptions::output_type)); static auto kScatterOptionsType = GetFunctionOptionsType( DataMember("max_index", &ScatterOptions::max_index)); } // namespace } // namespace internal FilterOptions::FilterOptions(NullSelectionBehavior null_selection) : FunctionOptions(internal::kFilterOptionsType), null_selection_behavior(null_selection) {} constexpr char FilterOptions::kTypeName[]; TakeOptions::TakeOptions(bool boundscheck) : FunctionOptions(internal::kTakeOptionsType), boundscheck(boundscheck) {} constexpr char TakeOptions::kTypeName[]; DictionaryEncodeOptions::DictionaryEncodeOptions(NullEncodingBehavior null_encoding) : FunctionOptions(internal::kDictionaryEncodeOptionsType), null_encoding_behavior(null_encoding) {} constexpr char DictionaryEncodeOptions::kTypeName[]; RunEndEncodeOptions::RunEndEncodeOptions(std::shared_ptr run_end_type) : FunctionOptions(internal::kRunEndEncodeOptionsType), run_end_type{std::move(run_end_type)} {} ArraySortOptions::ArraySortOptions(SortOrder order, NullPlacement null_placement) : FunctionOptions(internal::kArraySortOptionsType), order(order), null_placement(null_placement) {} constexpr char ArraySortOptions::kTypeName[]; SortOptions::SortOptions(std::vector sort_keys, NullPlacement null_placement) : FunctionOptions(internal::kSortOptionsType), sort_keys(std::move(sort_keys)), null_placement(null_placement) {} SortOptions::SortOptions(const Ordering& ordering) : FunctionOptions(internal::kSortOptionsType), sort_keys(ordering.sort_keys()), null_placement(ordering.null_placement()) {} constexpr char SortOptions::kTypeName[]; PartitionNthOptions::PartitionNthOptions(int64_t pivot, NullPlacement null_placement) : FunctionOptions(internal::kPartitionNthOptionsType), pivot(pivot), null_placement(null_placement) {} constexpr char PartitionNthOptions::kTypeName[]; WinsorizeOptions::WinsorizeOptions(double lower_limit, double upper_limit) : FunctionOptions(internal::kWinsorizeOptionsType), lower_limit(lower_limit), upper_limit(upper_limit) {} SelectKOptions::SelectKOptions(int64_t k, std::vector sort_keys) : FunctionOptions(internal::kSelectKOptionsType), k(k), sort_keys(std::move(sort_keys)) {} constexpr char SelectKOptions::kTypeName[]; CumulativeOptions::CumulativeOptions(bool skip_nulls) : FunctionOptions(internal::kCumulativeOptionsType), skip_nulls(skip_nulls) {} CumulativeOptions::CumulativeOptions(double start, bool skip_nulls) : CumulativeOptions(std::make_shared(start), skip_nulls) {} CumulativeOptions::CumulativeOptions(std::shared_ptr start, bool skip_nulls) : FunctionOptions(internal::kCumulativeOptionsType), start(std::move(start)), skip_nulls(skip_nulls) {} constexpr char CumulativeOptions::kTypeName[]; RankOptions::RankOptions(std::vector sort_keys, NullPlacement null_placement, RankOptions::Tiebreaker tiebreaker) : FunctionOptions(internal::kRankOptionsType), sort_keys(std::move(sort_keys)), null_placement(null_placement), tiebreaker(tiebreaker) {} constexpr char RankOptions::kTypeName[]; RankQuantileOptions::RankQuantileOptions(std::vector sort_keys, NullPlacement null_placement) : FunctionOptions(internal::kRankQuantileOptionsType), sort_keys(std::move(sort_keys)), null_placement(null_placement) {} constexpr char RankQuantileOptions::kTypeName[]; PairwiseOptions::PairwiseOptions(int64_t periods) : FunctionOptions(internal::kPairwiseOptionsType), periods(periods) {} constexpr char PairwiseOptions::kTypeName[]; ListFlattenOptions::ListFlattenOptions(bool recursive) : FunctionOptions(internal::kListFlattenOptionsType), recursive(recursive) {} constexpr char ListFlattenOptions::kTypeName[]; InversePermutationOptions::InversePermutationOptions( int64_t max_index, std::shared_ptr output_type) : FunctionOptions(internal::kInversePermutationOptionsType), max_index(max_index), output_type(std::move(output_type)) {} constexpr char InversePermutationOptions::kTypeName[]; ScatterOptions::ScatterOptions(int64_t max_index) : FunctionOptions(internal::kScatterOptionsType), max_index(max_index) {} constexpr char ScatterOptions::kTypeName[]; namespace internal { void RegisterVectorOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kFilterOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kTakeOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kDictionaryEncodeOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kRunEndEncodeOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kArraySortOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kSortOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kSelectKOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kRankOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kRankQuantileOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kPairwiseOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kListFlattenOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kInversePermutationOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kScatterOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kWinsorizeOptionsType)); } } // namespace internal // ---------------------------------------------------------------------- // Direct exec interface to kernels Result> NthToIndices(const Array& values, const PartitionNthOptions& options, ExecContext* ctx) { ARROW_ASSIGN_OR_RAISE(Datum result, CallFunction("partition_nth_indices", {Datum(values)}, &options, ctx)); return result.make_array(); } Result> NthToIndices(const Array& values, int64_t n, ExecContext* ctx) { PartitionNthOptions options(/*pivot=*/n); ARROW_ASSIGN_OR_RAISE(Datum result, CallFunction("partition_nth_indices", {Datum(values)}, &options, ctx)); return result.make_array(); } Result> SelectKUnstable(const Datum& datum, const SelectKOptions& options, ExecContext* ctx) { ARROW_ASSIGN_OR_RAISE(Datum result, CallFunction("select_k_unstable", {datum}, &options, ctx)); return result.make_array(); } Result ReplaceWithMask(const Datum& values, const Datum& mask, const Datum& replacements, ExecContext* ctx) { return CallFunction("replace_with_mask", {values, mask, replacements}, ctx); } Result FillNullForward(const Datum& values, ExecContext* ctx) { return CallFunction("fill_null_forward", {values}, ctx); } Result FillNullBackward(const Datum& values, ExecContext* ctx) { return CallFunction("fill_null_backward", {values}, ctx); } Result> SortIndices(const Array& values, const ArraySortOptions& options, ExecContext* ctx) { ARROW_ASSIGN_OR_RAISE( Datum result, CallFunction("array_sort_indices", {Datum(values)}, &options, ctx)); return result.make_array(); } Result> SortIndices(const Array& values, SortOrder order, ExecContext* ctx) { ArraySortOptions options(order); ARROW_ASSIGN_OR_RAISE( Datum result, CallFunction("array_sort_indices", {Datum(values)}, &options, ctx)); return result.make_array(); } Result> SortIndices(const ChunkedArray& chunked_array, const ArraySortOptions& array_options, ExecContext* ctx) { SortOptions options({SortKey("", array_options.order)}, array_options.null_placement); ARROW_ASSIGN_OR_RAISE( Datum result, CallFunction("sort_indices", {Datum(chunked_array)}, &options, ctx)); return result.make_array(); } Result> SortIndices(const ChunkedArray& chunked_array, SortOrder order, ExecContext* ctx) { return SortIndices(chunked_array, ArraySortOptions(order), ctx); } Result> SortIndices(const Datum& datum, const SortOptions& options, ExecContext* ctx) { ARROW_ASSIGN_OR_RAISE(Datum result, CallFunction("sort_indices", {datum}, &options, ctx)); return result.make_array(); } Result> Unique(const Datum& value, ExecContext* ctx) { ARROW_ASSIGN_OR_RAISE(Datum result, CallFunction("unique", {value}, ctx)); return result.make_array(); } Result DictionaryEncode(const Datum& value, const DictionaryEncodeOptions& options, ExecContext* ctx) { return CallFunction("dictionary_encode", {value}, &options, ctx); } Result RunEndEncode(const Datum& value, const RunEndEncodeOptions& options, ExecContext* ctx) { return CallFunction("run_end_encode", {value}, &options, ctx); } Result RunEndDecode(const Datum& value, ExecContext* ctx) { return CallFunction("run_end_decode", {value}, ctx); } const char kValuesFieldName[] = "values"; const char kCountsFieldName[] = "counts"; const int32_t kValuesFieldIndex = 0; const int32_t kCountsFieldIndex = 1; Result> ValueCounts(const Datum& value, ExecContext* ctx) { ARROW_ASSIGN_OR_RAISE(Datum result, CallFunction("value_counts", {value}, ctx)); return checked_pointer_cast(result.make_array()); } Result> PairwiseDiff(const Array& array, const PairwiseOptions& options, bool check_overflow, ExecContext* ctx) { auto func_name = check_overflow ? "pairwise_diff_checked" : "pairwise_diff"; ARROW_ASSIGN_OR_RAISE(Datum result, CallFunction(func_name, {Datum(array)}, &options, ctx)); return result.make_array(); } // ---------------------------------------------------------------------- // Filter- and take-related selection functions Result Filter(const Datum& values, const Datum& filter, const FilterOptions& options, ExecContext* ctx) { // Invoke metafunction which deals with Datum kinds other than just Array, // ChunkedArray. return CallFunction("filter", {values, filter}, &options, ctx); } Result Take(const Datum& values, const Datum& indices, const TakeOptions& options, ExecContext* ctx) { // Invoke metafunction which deals with Datum kinds other than just Array, // ChunkedArray. return CallFunction("take", {values, indices}, &options, ctx); } Result> Take(const Array& values, const Array& indices, const TakeOptions& options, ExecContext* ctx) { ARROW_ASSIGN_OR_RAISE(Datum out, Take(Datum(values), Datum(indices), options, ctx)); return out.make_array(); } // ---------------------------------------------------------------------- // Dropnull functions Result DropNull(const Datum& values, ExecContext* ctx) { // Invoke metafunction which deals with Datum kinds other than just Array, // ChunkedArray. return CallFunction("drop_null", {values}, ctx); } Result> DropNull(const Array& values, ExecContext* ctx) { ARROW_ASSIGN_OR_RAISE(Datum out, DropNull(Datum(values), ctx)); return out.make_array(); } // ---------------------------------------------------------------------- // Cumulative functions Result CumulativeSum(const Datum& values, const CumulativeOptions& options, bool check_overflow, ExecContext* ctx) { auto func_name = check_overflow ? "cumulative_sum_checked" : "cumulative_sum"; return CallFunction(func_name, {Datum(values)}, &options, ctx); } Result CumulativeProd(const Datum& values, const CumulativeOptions& options, bool check_overflow, ExecContext* ctx) { auto func_name = check_overflow ? "cumulative_prod_checked" : "cumulative_prod"; return CallFunction(func_name, {Datum(values)}, &options, ctx); } Result CumulativeMax(const Datum& values, const CumulativeOptions& options, ExecContext* ctx) { return CallFunction("cumulative_max", {Datum(values)}, &options, ctx); } Result CumulativeMin(const Datum& values, const CumulativeOptions& options, ExecContext* ctx) { return CallFunction("cumulative_min", {Datum(values)}, &options, ctx); } Result CumulativeMean(const Datum& values, const CumulativeOptions& options, ExecContext* ctx) { return CallFunction("cumulative_mean", {Datum(values)}, &options, ctx); } // ---------------------------------------------------------------------- // Swizzle functions Result InversePermutation(const Datum& indices, const InversePermutationOptions& options, ExecContext* ctx) { return CallFunction("inverse_permutation", {indices}, &options, ctx); } Result Scatter(const Datum& values, const Datum& indices, const ScatterOptions& options, ExecContext* ctx) { return CallFunction("scatter", {values, indices}, &options, ctx); } } // namespace compute } // namespace arrow20