#pragma clang system_header // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/builder_base.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/builder_binary.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/builder_nested.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/api_vector.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/function.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/compute/type_fwd.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/result.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/scalar.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/status.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/checked_cast.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/key_value_metadata.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/reflection_internal.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/string.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/visibility.h" namespace arrow20 { struct Scalar; struct StructScalar; using ::arrow20::internal::checked_cast; namespace internal { template <> struct EnumTraits : BasicEnumTraits { static std::string name() { return "SortOrder"; } static std::string value_name(compute::SortOrder value) { switch (value) { case compute::SortOrder::Ascending: return "Ascending"; case compute::SortOrder::Descending: return "Descending"; } return ""; } }; } // namespace internal namespace compute { namespace internal { using arrow20::internal::EnumTraits; using arrow20::internal::has_enum_traits; template ::type> Result ValidateEnumValue(CType raw) { for (auto valid : EnumTraits::values()) { if (raw == static_cast(valid)) { return static_cast(raw); } } return Status::Invalid("Invalid value for ", EnumTraits::name(), ": ", raw); } class ARROW_EXPORT GenericOptionsType : public FunctionOptionsType { public: Result> Serialize(const FunctionOptions&) const override; Result> Deserialize( const Buffer& buffer) const override; virtual Status ToStructScalar(const FunctionOptions& options, std::vector* field_names, std::vector>* values) const = 0; virtual Result> FromStructScalar( const StructScalar& scalar) const = 0; }; ARROW_EXPORT Result> FunctionOptionsToStructScalar( const FunctionOptions&); ARROW_EXPORT Result> FunctionOptionsFromStructScalar( const StructScalar&); ARROW_EXPORT Result> DeserializeFunctionOptions(const Buffer& buffer); template static inline enable_if_t::value, std::string> GenericToString( const T& value) { std::stringstream ss; ss << value; return ss.str(); } template static inline enable_if_t::value, std::string> GenericToString( const std::optional& value) { return value.has_value() ? GenericToString(value.value()) : "nullopt"; } static inline std::string GenericToString(bool value) { return value ? "true" : "false"; } static inline std::string GenericToString(const std::string& value) { std::stringstream ss; ss << '"' << value << '"'; return ss.str(); } template static inline enable_if_t::value, std::string> GenericToString( const T value) { return EnumTraits::value_name(value); } template static inline std::string GenericToString(const std::shared_ptr& value) { std::stringstream ss; return value ? value->ToString() : ""; } static inline std::string GenericToString(const std::shared_ptr& value) { std::stringstream ss; if (value) { ss << value->type->ToString() << ":" << value->ToString(); } else { ss << ""; } return ss.str(); } static inline std::string GenericToString( const std::shared_ptr& value) { std::stringstream ss; ss << "KeyValueMetadata{"; if (value) { bool first = true; for (const auto& pair : value->sorted_pairs()) { if (!first) ss << ", "; first = false; ss << pair.first << ':' << pair.second; } } ss << '}'; return ss.str(); } static inline std::string GenericToString(const Datum& value) { switch (value.kind()) { case Datum::NONE: return ""; case Datum::SCALAR: return GenericToString(value.scalar()); case Datum::ARRAY: { std::stringstream ss; ss << value.type()->ToString() << ':' << value.make_array()->ToString(); return ss.str(); } default: return value.ToString(); } } template static inline std::string GenericToString(const std::vector& value) { std::stringstream ss; ss << "["; bool first = true; // Don't use range-for with auto& to avoid Clang -Wrange-loop-analysis for (auto it = value.begin(); it != value.end(); it++) { if (!first) ss << ", "; first = false; ss << GenericToString(*it); } ss << ']'; return ss.str(); } static inline std::string GenericToString(SortOrder value) { switch (value) { case SortOrder::Ascending: return "Ascending"; case SortOrder::Descending: return "Descending"; } return ""; } static inline std::string GenericToString(const std::vector& value) { std::stringstream ss; ss << '['; bool first = true; for (const auto& key : value) { if (!first) { ss << ", "; } first = false; ss << key.ToString(); } ss << ']'; return ss.str(); } template static inline bool GenericEquals(const T& left, const T& right) { return left == right; } template static inline bool GenericEquals(const std::shared_ptr& left, const std::shared_ptr& right) { if (left && right) { return left->Equals(*right); } return left == right; } static inline bool IsEmpty(const std::shared_ptr& meta) { return !meta || meta->size() == 0; } static inline bool GenericEquals(const std::shared_ptr& left, const std::shared_ptr& right) { // Special case since null metadata is considered equivalent to empty if (IsEmpty(left) || IsEmpty(right)) { return IsEmpty(left) && IsEmpty(right); } return left->Equals(*right); } template static inline bool GenericEquals(const std::vector& left, const std::vector& right) { if (left.size() != right.size()) return false; for (size_t i = 0; i < left.size(); i++) { if (!GenericEquals(left[i], right[i])) return false; } return true; } template static inline decltype(TypeTraits::ArrowType>::type_singleton()) GenericTypeSingleton() { return TypeTraits::ArrowType>::type_singleton(); } template static inline enable_if_same, std::shared_ptr> GenericTypeSingleton() { return map(binary(), binary()); } template static inline enable_if_t::value, std::shared_ptr> GenericTypeSingleton() { return TypeTraits::Type>::type_singleton(); } template static inline enable_if_same> GenericTypeSingleton() { std::vector> fields; fields.emplace_back(new Field("target", GenericTypeSingleton())); fields.emplace_back(new Field("order", GenericTypeSingleton())); return std::make_shared(std::move(fields)); } // N.B. ordering of overloads is relatively fragile template static inline Result()))> GenericToScalar( const T& value) { return MakeScalar(value); } // For Clang/libc++: when iterating through vector, we can't // pass it by reference so the overload above doesn't apply static inline Result> GenericToScalar(bool value) { return MakeScalar(value); } static inline Result> GenericToScalar(const FieldRef& ref) { return MakeScalar(ref.ToDotPath()); } template ::value>> static inline Result> GenericToScalar(const T value) { using CType = typename EnumTraits::CType; return GenericToScalar(static_cast(value)); } static inline Result> GenericToScalar(const SortKey& key) { ARROW_ASSIGN_OR_RAISE(auto target, GenericToScalar(key.target)); ARROW_ASSIGN_OR_RAISE(auto order, GenericToScalar(key.order)); return StructScalar::Make({target, order}, {"target", "order"}); } static inline Result> GenericToScalar( const std::shared_ptr& value) { auto ty = GenericTypeSingleton>(); std::unique_ptr builder; RETURN_NOT_OK(MakeBuilder(default_memory_pool(), ty, &builder)); auto* map_builder = checked_cast(builder.get()); auto* key_builder = checked_cast(map_builder->key_builder()); auto* item_builder = checked_cast(map_builder->item_builder()); RETURN_NOT_OK(map_builder->Append()); if (value) { RETURN_NOT_OK(key_builder->AppendValues(value->keys())); RETURN_NOT_OK(item_builder->AppendValues(value->values())); } std::shared_ptr arr; RETURN_NOT_OK(map_builder->Finish(&arr)); return arr->GetScalar(0); } template static inline Result> GenericToScalar( const std::vector& value) { std::shared_ptr type = GenericTypeSingleton(); std::vector> scalars; scalars.reserve(value.size()); // Don't use range-for with auto& to avoid Clang -Wrange-loop-analysis for (auto it = value.begin(); it != value.end(); it++) { ARROW_ASSIGN_OR_RAISE(auto scalar, GenericToScalar(*it)); scalars.push_back(std::move(scalar)); } std::unique_ptr builder; RETURN_NOT_OK( MakeBuilder(default_memory_pool(), type ? type : scalars[0]->type, &builder)); RETURN_NOT_OK(builder->AppendScalars(scalars)); std::shared_ptr out; RETURN_NOT_OK(builder->Finish(&out)); return std::make_shared(std::move(out)); } static inline Result> GenericToScalar( const std::shared_ptr& value) { if (!value) { return Status::Invalid("shared_ptr is nullptr"); } return MakeNullScalar(value); } static inline Result> GenericToScalar(const TypeHolder& value) { return GenericToScalar(value.GetSharedPtr()); } static inline Result> GenericToScalar( const std::shared_ptr& value) { return value; } static inline Result> GenericToScalar( const std::shared_ptr& value) { return std::make_shared(value); } static inline Result> GenericToScalar(const Datum& value) { // TODO(ARROW-9434): store in a union instead. switch (value.kind()) { case Datum::ARRAY: return GenericToScalar(value.make_array()); break; default: return Status::NotImplemented("Cannot serialize Datum kind ", value.kind()); } } static inline Result> GenericToScalar(std::nullopt_t) { return std::make_shared(); } template static inline auto GenericToScalar(const std::optional& value) -> Result { return value.has_value() ? MakeScalar(value.value()) : std::make_shared(); } template static inline enable_if_primitive_ctype::ArrowType, Result> GenericFromScalar(const std::shared_ptr& value) { using ArrowType = typename CTypeTraits::ArrowType; using ScalarType = typename TypeTraits::ScalarType; if (value->type->id() != ArrowType::type_id) { return Status::Invalid("Expected type ", ArrowType::type_id, " but got ", value->type->ToString()); } const auto& holder = checked_cast(*value); if (!holder.is_valid) return Status::Invalid("Got null scalar"); return holder.value; } template static inline enable_if_primitive_ctype::Type, Result> GenericFromScalar(const std::shared_ptr& value) { ARROW_ASSIGN_OR_RAISE(auto raw_val, GenericFromScalar::CType>(value)); return ValidateEnumValue(raw_val); } template using enable_if_same_result = enable_if_same>; template static inline enable_if_same_result GenericFromScalar( const std::shared_ptr& value) { if (!is_base_binary_like(value->type->id())) { return Status::Invalid("Expected binary-like type but got ", value->type->ToString()); } const auto& holder = checked_cast(*value); if (!holder.is_valid) return Status::Invalid("Got null scalar"); return holder.value->ToString(); } template static inline enable_if_same_result GenericFromScalar( const std::shared_ptr& value) { ARROW_ASSIGN_OR_RAISE(auto path, GenericFromScalar(value)); return FieldRef::FromDotPath(path); } template static inline enable_if_same_result GenericFromScalar( const std::shared_ptr& value) { if (value->type->id() != Type::STRUCT) { return Status::Invalid("Expected type STRUCT but got ", value->type->id()); } if (!value->is_valid) return Status::Invalid("Got null scalar"); const auto& holder = checked_cast(*value); ARROW_ASSIGN_OR_RAISE(auto target_holder, holder.field("target")); ARROW_ASSIGN_OR_RAISE(auto order_holder, holder.field("order")); ARROW_ASSIGN_OR_RAISE(auto target, GenericFromScalar(target_holder)); ARROW_ASSIGN_OR_RAISE(auto order, GenericFromScalar(order_holder)); return SortKey{std::move(target), order}; } template static inline enable_if_same_result> GenericFromScalar( const std::shared_ptr& value) { return value->type; } template static inline enable_if_same_result GenericFromScalar( const std::shared_ptr& value) { return value->type; } template static inline enable_if_same_result> GenericFromScalar( const std::shared_ptr& value) { return value; } template static inline enable_if_same_result> GenericFromScalar(const std::shared_ptr& value) { auto ty = GenericTypeSingleton>(); if (!value->type->Equals(ty)) { return Status::Invalid("Expected ", ty->ToString(), " but got ", value->type->ToString()); } const auto& holder = checked_cast(*value); std::vector keys; std::vector values; const auto& list = checked_cast(*holder.value); const auto& key_arr = checked_cast(*list.field(0)); const auto& value_arr = checked_cast(*list.field(1)); for (int64_t i = 0; i < list.length(); i++) { keys.push_back(key_arr.GetString(i)); values.push_back(value_arr.GetString(i)); } return key_value_metadata(std::move(keys), std::move(values)); } template static inline enable_if_same_result GenericFromScalar( const std::shared_ptr& value) { if (value->type->id() == Type::LIST) { const auto& holder = checked_cast(*value); return holder.value; } // TODO(ARROW-9434): handle other possible datum kinds by looking for a union return Status::Invalid("Cannot deserialize Datum from ", value->ToString()); } template constexpr inline bool is_optional_v = false; template constexpr inline bool is_optional_v> = true; template <> constexpr inline bool is_optional_v = true; template static inline std::enable_if_t, Result> GenericFromScalar( const std::shared_ptr& value) { using value_type = typename T::value_type; if (value->type->id() == Type::NA) { return std::nullopt; } return GenericFromScalar(value); } template static enable_if_same::ArrowType, ListType, Result> GenericFromScalar(const std::shared_ptr& value) { using ValueType = typename T::value_type; if (value->type->id() != Type::LIST) { return Status::Invalid("Expected type LIST but got ", value->type->ToString()); } const auto& holder = checked_cast(*value); if (!holder.is_valid) return Status::Invalid("Got null scalar"); std::vector result; for (int i = 0; i < holder.value->length(); i++) { ARROW_ASSIGN_OR_RAISE(auto scalar, holder.value->GetScalar(i)); ARROW_ASSIGN_OR_RAISE(auto v, GenericFromScalar(scalar)); result.push_back(std::move(v)); } return result; } template struct StringifyImpl { template StringifyImpl(const Options& obj, const Tuple& props) : obj_(obj), members_(props.size()) { props.ForEach(*this); } template void operator()(const Property& prop, size_t i) { std::stringstream ss; ss << prop.name() << '=' << GenericToString(prop.get(obj_)); members_[i] = ss.str(); } std::string Finish() { return "{" + arrow20::internal::JoinStrings(members_, ", ") + "}"; } const Options& obj_; std::vector members_; }; template struct CompareImpl { template CompareImpl(const Options& l, const Options& r, const Tuple& props) : left_(l), right_(r) { props.ForEach(*this); } template void operator()(const Property& prop, size_t) { equal_ &= GenericEquals(prop.get(left_), prop.get(right_)); } const Options& left_; const Options& right_; bool equal_ = true; }; template struct ToStructScalarImpl { template ToStructScalarImpl(const Options& obj, const Tuple& props, std::vector* field_names, std::vector>* values) : obj_(obj), field_names_(field_names), values_(values) { props.ForEach(*this); } template void operator()(const Property& prop, size_t) { if (!status_.ok()) return; auto result = GenericToScalar(prop.get(obj_)); if (!result.ok()) { status_ = result.status().WithMessage("Could not serialize field ", prop.name(), " of options type ", Options::kTypeName, ": ", result.status().message()); return; } field_names_->emplace_back(prop.name()); values_->push_back(result.MoveValueUnsafe()); } const Options& obj_; Status status_; std::vector* field_names_; std::vector>* values_; }; template struct FromStructScalarImpl { template FromStructScalarImpl(Options* obj, const StructScalar& scalar, const Tuple& props) : obj_(obj), scalar_(scalar) { props.ForEach(*this); } template void operator()(const Property& prop, size_t) { if (!status_.ok()) return; auto maybe_holder = scalar_.field(std::string(prop.name())); if (!maybe_holder.ok()) { status_ = maybe_holder.status().WithMessage( "Cannot deserialize field ", prop.name(), " of options type ", Options::kTypeName, ": ", maybe_holder.status().message()); return; } auto holder = maybe_holder.MoveValueUnsafe(); auto result = GenericFromScalar(holder); if (!result.ok()) { status_ = result.status().WithMessage("Cannot deserialize field ", prop.name(), " of options type ", Options::kTypeName, ": ", result.status().message()); return; } prop.set(obj_, result.MoveValueUnsafe()); } Options* obj_; Status status_; const StructScalar& scalar_; }; template struct CopyImpl { template CopyImpl(Options* obj, const Options& options, const Tuple& props) : obj_(obj), options_(options) { props.ForEach(*this); } template void operator()(const Property& prop, size_t) { prop.set(obj_, prop.get(options_)); } Options* obj_; const Options& options_; }; template const FunctionOptionsType* GetFunctionOptionsType(const Properties&... properties) { static const class OptionsType : public GenericOptionsType { public: explicit OptionsType(const arrow20::internal::PropertyTuple properties) : properties_(properties) {} const char* type_name() const override { return Options::kTypeName; } std::string Stringify(const FunctionOptions& options) const override { const auto& self = checked_cast(options); return StringifyImpl(self, properties_).Finish(); } bool Compare(const FunctionOptions& options, const FunctionOptions& other) const override { const auto& lhs = checked_cast(options); const auto& rhs = checked_cast(other); return CompareImpl(lhs, rhs, properties_).equal_; } Status ToStructScalar(const FunctionOptions& options, std::vector* field_names, std::vector>* values) const override { const auto& self = checked_cast(options); RETURN_NOT_OK( ToStructScalarImpl(self, properties_, field_names, values).status_); return Status::OK(); } Result> FromStructScalar( const StructScalar& scalar) const override { auto options = std::make_unique(); RETURN_NOT_OK( FromStructScalarImpl(options.get(), scalar, properties_).status_); // R build with openSUSE155 requires an explicit unique_ptr construction return std::unique_ptr(std::move(options)); } std::unique_ptr Copy(const FunctionOptions& options) const override { auto out = std::make_unique(); CopyImpl(out.get(), checked_cast(options), properties_); return out; } private: const arrow20::internal::PropertyTuple properties_; } instance(arrow20::internal::MakeProperties(properties...)); return &instance; } Status CheckAllArrayOrScalar(const std::vector& values); ARROW_EXPORT Result> GetFunctionArgumentTypes(const std::vector& args); } // namespace internal } // namespace compute } // namespace arrow20