summaryrefslogtreecommitdiffstats
path: root/contrib/libs/apache/arrow_next/cpp/src/arrow/compute/function.h
diff options
context:
space:
mode:
authorrobot-piglet <[email protected]>2025-08-14 11:26:15 +0300
committerrobot-piglet <[email protected]>2025-08-14 12:06:36 +0300
commitdc2bf727ea4698fa382f0f8623a8854c4900e212 (patch)
treea621e92060fd7560066f33a323b4b8aca34f1e36 /contrib/libs/apache/arrow_next/cpp/src/arrow/compute/function.h
parent322ee7d149464c6f18d6a330d937227cb022b9f3 (diff)
Intermediate changes
commit_hash:746e9b78ab4c78ba4f30511f1fa9330c0d56a406
Diffstat (limited to 'contrib/libs/apache/arrow_next/cpp/src/arrow/compute/function.h')
-rw-r--r--contrib/libs/apache/arrow_next/cpp/src/arrow/compute/function.h410
1 files changed, 410 insertions, 0 deletions
diff --git a/contrib/libs/apache/arrow_next/cpp/src/arrow/compute/function.h b/contrib/libs/apache/arrow_next/cpp/src/arrow/compute/function.h
new file mode 100644
index 00000000000..d3c31d7212b
--- /dev/null
+++ b/contrib/libs/apache/arrow_next/cpp/src/arrow/compute/function.h
@@ -0,0 +1,410 @@
+#pragma clang system_header
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// NOTE: API is EXPERIMENTAL and will change without going through a
+// deprecation cycle.
+
+#pragma once
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/datum.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/compare.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow20 {
+namespace compute {
+
+/// \addtogroup compute-functions
+/// @{
+
+/// \brief Contains the number of required arguments for the function.
+///
+/// Naming conventions taken from https://en.wikipedia.org/wiki/Arity.
+struct ARROW_EXPORT Arity {
+ /// \brief A function taking no arguments
+ static Arity Nullary() { return Arity(0, false); }
+
+ /// \brief A function taking 1 argument
+ static Arity Unary() { return Arity(1, false); }
+
+ /// \brief A function taking 2 arguments
+ static Arity Binary() { return Arity(2, false); }
+
+ /// \brief A function taking 3 arguments
+ static Arity Ternary() { return Arity(3, false); }
+
+ /// \brief A function taking a variable number of arguments
+ ///
+ /// \param[in] min_args the minimum number of arguments required when
+ /// invoking the function
+ static Arity VarArgs(int min_args = 0) { return Arity(min_args, true); }
+
+ // NOTE: the 0-argument form (default constructor) is required for Cython
+ explicit Arity(int num_args = 0, bool is_varargs = false)
+ : num_args(num_args), is_varargs(is_varargs) {}
+
+ /// The number of required arguments (or the minimum number for varargs
+ /// functions).
+ int num_args;
+
+ /// If true, then the num_args is the minimum number of required arguments.
+ bool is_varargs = false;
+};
+
+struct ARROW_EXPORT FunctionDoc {
+ /// \brief A one-line summary of the function, using a verb.
+ ///
+ /// For example, "Add two numeric arrays or scalars".
+ std::string summary;
+
+ /// \brief A detailed description of the function, meant to follow the summary.
+ std::string description;
+
+ /// \brief Symbolic names (identifiers) for the function arguments.
+ ///
+ /// Some bindings may use this to generate nicer function signatures.
+ std::vector<std::string> arg_names;
+
+ // TODO add argument descriptions?
+
+ /// \brief Name of the options class, if any.
+ std::string options_class;
+
+ /// \brief Whether options are required for function execution
+ ///
+ /// If false, then either the function does not have an options class
+ /// or there is a usable default options value.
+ bool options_required;
+
+ FunctionDoc() = default;
+
+ FunctionDoc(std::string summary, std::string description,
+ std::vector<std::string> arg_names, std::string options_class = "",
+ bool options_required = false)
+ : summary(std::move(summary)),
+ description(std::move(description)),
+ arg_names(std::move(arg_names)),
+ options_class(std::move(options_class)),
+ options_required(options_required) {}
+
+ static const FunctionDoc& Empty();
+};
+
+/// \brief An executor of a function with a preconfigured kernel
+class ARROW_EXPORT FunctionExecutor {
+ public:
+ virtual ~FunctionExecutor() = default;
+ /// \brief Initialize or re-initialize the preconfigured kernel
+ ///
+ /// This method may be called zero or more times. Depending on how
+ /// the FunctionExecutor was obtained, it may already have been initialized.
+ virtual Status Init(const FunctionOptions* options = NULLPTR,
+ ExecContext* exec_ctx = NULLPTR) = 0;
+ /// \brief Execute the preconfigured kernel with arguments that must fit it
+ ///
+ /// The method requires the arguments be castable to the preconfigured types.
+ ///
+ /// \param[in] args Arguments to execute the function on
+ /// \param[in] length Length of arguments batch or -1 to default it. If the
+ /// function has no parameters, this determines the batch length, defaulting
+ /// to 0. Otherwise, if the function is scalar, this must equal the argument
+ /// batch's inferred length or be -1 to default to it. This is ignored for
+ /// vector functions.
+ virtual Result<Datum> Execute(const std::vector<Datum>& args, int64_t length = -1) = 0;
+};
+
+/// \brief Base class for compute functions. Function implementations contain a
+/// collection of "kernels" which are implementations of the function for
+/// specific argument types. Selecting a viable kernel for executing a function
+/// is referred to as "dispatching".
+class ARROW_EXPORT Function {
+ public:
+ /// \brief The kind of function, which indicates in what contexts it is
+ /// valid for use.
+ enum Kind {
+ /// A function that performs scalar data operations on whole arrays of
+ /// data. Can generally process Array or Scalar values. The size of the
+ /// output will be the same as the size (or broadcasted size, in the case
+ /// of mixing Array and Scalar inputs) of the input.
+ SCALAR,
+
+ /// A function with array input and output whose behavior depends on the
+ /// values of the entire arrays passed, rather than the value of each scalar
+ /// value.
+ VECTOR,
+
+ /// A function that computes scalar summary statistics from array input.
+ SCALAR_AGGREGATE,
+
+ /// A function that computes grouped summary statistics from array input
+ /// and an array of group identifiers.
+ HASH_AGGREGATE,
+
+ /// A function that dispatches to other functions and does not contain its
+ /// own kernels.
+ META
+ };
+
+ virtual ~Function() = default;
+
+ /// \brief The name of the kernel. The registry enforces uniqueness of names.
+ const std::string& name() const { return name_; }
+
+ /// \brief The kind of kernel, which indicates in what contexts it is valid
+ /// for use.
+ Function::Kind kind() const { return kind_; }
+
+ /// \brief Contains the number of arguments the function requires, or if the
+ /// function accepts variable numbers of arguments.
+ const Arity& arity() const { return arity_; }
+
+ /// \brief Return the function documentation
+ const FunctionDoc& doc() const { return doc_; }
+
+ /// \brief Returns the number of registered kernels for this function.
+ virtual int num_kernels() const = 0;
+
+ /// \brief Return a kernel that can execute the function given the exact
+ /// argument types (without implicit type casts).
+ ///
+ /// NB: This function is overridden in CastFunction.
+ virtual Result<const Kernel*> DispatchExact(const std::vector<TypeHolder>& types) const;
+
+ /// \brief Return a best-match kernel that can execute the function given the argument
+ /// types, after implicit casts are applied.
+ ///
+ /// \param[in,out] values Argument types. An element may be modified to
+ /// indicate that the returned kernel only approximately matches the input
+ /// value descriptors; callers are responsible for casting inputs to the type
+ /// required by the kernel.
+ virtual Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* values) const;
+
+ /// \brief Get a function executor with a best-matching kernel
+ ///
+ /// The returned executor will by default work with the default FunctionOptions
+ /// and KernelContext. If you want to change that, call `FunctionExecutor::Init`.
+ virtual Result<std::shared_ptr<FunctionExecutor>> GetBestExecutor(
+ std::vector<TypeHolder> inputs) const;
+
+ /// \brief Execute the function eagerly with the passed input arguments with
+ /// kernel dispatch, batch iteration, and memory allocation details taken
+ /// care of.
+ ///
+ /// If the `options` pointer is null, then `default_options()` will be used.
+ ///
+ /// This function can be overridden in subclasses.
+ virtual Result<Datum> Execute(const std::vector<Datum>& args,
+ const FunctionOptions* options, ExecContext* ctx) const;
+
+ virtual Result<Datum> Execute(const ExecBatch& batch, const FunctionOptions* options,
+ ExecContext* ctx) const;
+
+ /// \brief Returns the default options for this function.
+ ///
+ /// Whatever option semantics a Function has, implementations must guarantee
+ /// that default_options() is valid to pass to Execute as options.
+ const FunctionOptions* default_options() const { return default_options_; }
+
+ virtual Status Validate() const;
+
+ /// \brief Returns the pure property for this function.
+ ///
+ /// Impure functions are those that may return different results for the same
+ /// input arguments. For example, a function that returns a random number is
+ /// not pure. An expression containing only pure functions can be simplified by
+ /// pre-evaluating any sub-expressions that have constant arguments.
+ virtual bool is_pure() const { return true; }
+
+ protected:
+ Function(std::string name, Function::Kind kind, const Arity& arity, FunctionDoc doc,
+ const FunctionOptions* default_options)
+ : name_(std::move(name)),
+ kind_(kind),
+ arity_(arity),
+ doc_(std::move(doc)),
+ default_options_(default_options) {}
+
+ Status CheckArity(size_t num_args) const;
+
+ std::string name_;
+ Function::Kind kind_;
+ Arity arity_;
+ const FunctionDoc doc_;
+ const FunctionOptions* default_options_ = NULLPTR;
+};
+
+namespace detail {
+
+template <typename KernelType>
+class FunctionImpl : public Function {
+ public:
+ /// \brief Return pointers to current-available kernels for inspection
+ std::vector<const KernelType*> kernels() const {
+ std::vector<const KernelType*> result;
+ for (const auto& kernel : kernels_) {
+ result.push_back(&kernel);
+ }
+ return result;
+ }
+
+ int num_kernels() const override { return static_cast<int>(kernels_.size()); }
+
+ protected:
+ FunctionImpl(std::string name, Function::Kind kind, const Arity& arity, FunctionDoc doc,
+ const FunctionOptions* default_options)
+ : Function(std::move(name), kind, arity, std::move(doc), default_options) {}
+
+ std::vector<KernelType> kernels_;
+};
+
+/// \brief Look up a kernel in a function. If no Kernel is found, nullptr is returned.
+ARROW_EXPORT
+const Kernel* DispatchExactImpl(const Function* func, const std::vector<TypeHolder>&);
+
+/// \brief Return an error message if no Kernel is found.
+ARROW_EXPORT
+Status NoMatchingKernel(const Function* func, const std::vector<TypeHolder>&);
+
+} // namespace detail
+
+/// \brief A function that executes elementwise operations on arrays or
+/// scalars, and therefore whose results generally do not depend on the order
+/// of the values in the arguments. Accepts and returns arrays that are all of
+/// the same size. These functions roughly correspond to the functions used in
+/// SQL expressions.
+class ARROW_EXPORT ScalarFunction : public detail::FunctionImpl<ScalarKernel> {
+ public:
+ using KernelType = ScalarKernel;
+
+ ScalarFunction(std::string name, const Arity& arity, FunctionDoc doc,
+ const FunctionOptions* default_options = NULLPTR, bool is_pure = true)
+ : detail::FunctionImpl<ScalarKernel>(std::move(name), Function::SCALAR, arity,
+ std::move(doc), default_options),
+ is_pure_(is_pure) {}
+
+ /// \brief Add a kernel with given input/output types, no required state
+ /// initialization, preallocation for fixed-width types, and default null
+ /// handling (intersect validity bitmaps of inputs).
+ Status AddKernel(std::vector<InputType> in_types, OutputType out_type,
+ ArrayKernelExec exec, KernelInit init = NULLPTR);
+
+ /// \brief Add a kernel (function implementation). Returns error if the
+ /// kernel's signature does not match the function's arity.
+ Status AddKernel(ScalarKernel kernel);
+
+ /// \brief Returns the pure property for this function.
+ bool is_pure() const override { return is_pure_; }
+
+ private:
+ const bool is_pure_;
+};
+
+/// \brief A function that executes general array operations that may yield
+/// outputs of different sizes or have results that depend on the whole array
+/// contents. These functions roughly correspond to the functions found in
+/// non-SQL array languages like APL and its derivatives.
+class ARROW_EXPORT VectorFunction : public detail::FunctionImpl<VectorKernel> {
+ public:
+ using KernelType = VectorKernel;
+
+ VectorFunction(std::string name, const Arity& arity, FunctionDoc doc,
+ const FunctionOptions* default_options = NULLPTR)
+ : detail::FunctionImpl<VectorKernel>(std::move(name), Function::VECTOR, arity,
+ std::move(doc), default_options) {}
+
+ /// \brief Add a simple kernel with given input/output types, no required
+ /// state initialization, no data preallocation, and no preallocation of the
+ /// validity bitmap.
+ Status AddKernel(std::vector<InputType> in_types, OutputType out_type,
+ ArrayKernelExec exec, KernelInit init = NULLPTR);
+
+ /// \brief Add a kernel (function implementation). Returns error if the
+ /// kernel's signature does not match the function's arity.
+ Status AddKernel(VectorKernel kernel);
+};
+
+class ARROW_EXPORT ScalarAggregateFunction
+ : public detail::FunctionImpl<ScalarAggregateKernel> {
+ public:
+ using KernelType = ScalarAggregateKernel;
+
+ ScalarAggregateFunction(std::string name, const Arity& arity, FunctionDoc doc,
+ const FunctionOptions* default_options = NULLPTR)
+ : detail::FunctionImpl<ScalarAggregateKernel>(std::move(name),
+ Function::SCALAR_AGGREGATE, arity,
+ std::move(doc), default_options) {}
+
+ /// \brief Add a kernel (function implementation). Returns error if the
+ /// kernel's signature does not match the function's arity.
+ Status AddKernel(ScalarAggregateKernel kernel);
+};
+
+class ARROW_EXPORT HashAggregateFunction
+ : public detail::FunctionImpl<HashAggregateKernel> {
+ public:
+ using KernelType = HashAggregateKernel;
+
+ HashAggregateFunction(std::string name, const Arity& arity, FunctionDoc doc,
+ const FunctionOptions* default_options = NULLPTR)
+ : detail::FunctionImpl<HashAggregateKernel>(std::move(name),
+ Function::HASH_AGGREGATE, arity,
+ std::move(doc), default_options) {}
+
+ /// \brief Add a kernel (function implementation). Returns error if the
+ /// kernel's signature does not match the function's arity.
+ Status AddKernel(HashAggregateKernel kernel);
+};
+
+/// \brief A function that dispatches to other functions. Must implement
+/// MetaFunction::ExecuteImpl.
+///
+/// For Array, ChunkedArray, and Scalar Datum kinds, may rely on the execution
+/// of concrete Function types, but must handle other Datum kinds on its own.
+class ARROW_EXPORT MetaFunction : public Function {
+ public:
+ int num_kernels() const override { return 0; }
+
+ Result<Datum> Execute(const std::vector<Datum>& args, const FunctionOptions* options,
+ ExecContext* ctx) const override;
+
+ Result<Datum> Execute(const ExecBatch& batch, const FunctionOptions* options,
+ ExecContext* ctx) const override;
+
+ protected:
+ virtual Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
+ const FunctionOptions* options,
+ ExecContext* ctx) const = 0;
+
+ MetaFunction(std::string name, const Arity& arity, FunctionDoc doc,
+ const FunctionOptions* default_options = NULLPTR)
+ : Function(std::move(name), Function::META, arity, std::move(doc),
+ default_options) {}
+};
+
+/// @}
+
+} // namespace compute
+} // namespace arrow20