diff options
author | chertus <azuikov@ydb.tech> | 2022-11-07 12:35:24 +0300 |
---|---|---|
committer | chertus <azuikov@ydb.tech> | 2022-11-07 12:35:24 +0300 |
commit | 4440f803dff303fe38644369105d264fe848946c (patch) | |
tree | 41b8e85c3799b53da47118b80ede7bacf478e181 | |
parent | 53f587219b1281a60091e6dba560ff3d7e0a1310 (diff) | |
download | ydb-4440f803dff303fe38644369105d264fe848946c.tar.gz |
extract our arrow kernels into library
36 files changed, 514 insertions, 192 deletions
diff --git a/ydb/core/formats/CMakeLists.txt b/ydb/core/formats/CMakeLists.txt index 4d75a9e95f2..625c3779f09 100644 --- a/ydb/core/formats/CMakeLists.txt +++ b/ydb/core/formats/CMakeLists.txt @@ -20,6 +20,7 @@ target_link_libraries(ydb-core-formats PUBLIC yutil libs-apache-arrow ydb-core-scheme + ydb-library-arrow_kernels ydb-library-binary_json ydb-library-dynumber ydb-library-arrow_clickhouse @@ -29,7 +30,6 @@ target_sources(ydb-core-formats PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow_helpers.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/clickhouse_block.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/custom_registry.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/formats/func_cast.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/merging_sorted_input_stream.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/program.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/ssa_program_optimizer.cpp diff --git a/ydb/core/formats/arrow_helpers.cpp b/ydb/core/formats/arrow_helpers.cpp index 0dc67b9e028..2137751d9cf 100644 --- a/ydb/core/formats/arrow_helpers.cpp +++ b/ydb/core/formats/arrow_helpers.cpp @@ -1241,24 +1241,6 @@ bool TArrowToYdbConverter::Process(const arrow::RecordBatch& batch, TString& err return true; } -std::shared_ptr<arrow::Array> NumVecToArray(const std::shared_ptr<arrow::DataType>& type, - const std::vector<double>& vec) { - std::shared_ptr<arrow::Array> out; - SwitchType(type->id(), [&](const auto& type) { - using TWrap = std::decay_t<decltype(type)>; - if constexpr (arrow::is_number_type<typename TWrap::T>::value) { - typename arrow::TypeTraits<typename TWrap::T>::BuilderType builder; - for (const auto val : vec) { - Y_VERIFY(builder.Append(static_cast<typename TWrap::T::c_type>(val)).ok()); - } - Y_VERIFY(builder.Finish(&out).ok()); - return true; - } - return false; - }); - return out; -} - std::shared_ptr<arrow::Array> BoolVecToArray(const std::vector<bool>& vec) { std::shared_ptr<arrow::Array> out; arrow::BooleanBuilder builder; diff --git a/ydb/core/formats/arrow_helpers.h b/ydb/core/formats/arrow_helpers.h index c9ba1b464a0..4ec2bae4cee 100644 --- a/ydb/core/formats/arrow_helpers.h +++ b/ydb/core/formats/arrow_helpers.h @@ -212,8 +212,6 @@ inline bool HasNulls(const std::shared_ptr<arrow::Array>& column) { } bool ArrayScalarsEqual(const std::shared_ptr<arrow::Array>& lhs, const std::shared_ptr<arrow::Array>& rhs); -std::shared_ptr<arrow::Array> NumVecToArray(const std::shared_ptr<arrow::DataType>& type, - const std::vector<double>& vec); std::shared_ptr<arrow::Array> BoolVecToArray(const std::vector<bool>& vec); } diff --git a/ydb/core/formats/custom_registry.cpp b/ydb/core/formats/custom_registry.cpp index 40be5634f2d..e211a3670f3 100644 --- a/ydb/core/formats/custom_registry.cpp +++ b/ydb/core/formats/custom_registry.cpp @@ -1,7 +1,7 @@ #include "custom_registry.h" -#include "functions.h" -#include "func_common.h" +#include <ydb/library/arrow_kernels/functions.h> +#include <ydb/library/arrow_kernels/func_common.h> #include "program.h" #include <util/system/yassert.h> @@ -19,6 +19,8 @@ namespace cp = ::arrow::compute; namespace NKikimr::NArrow { +using namespace NKernels; + static void RegisterMath(cp::FunctionRegistry* registry) { Y_VERIFY(registry->AddFunction(MakeMathUnary<TAcosh>(TAcosh::Name)).ok()); Y_VERIFY(registry->AddFunction(MakeMathUnary<TAtanh>(TAtanh::Name)).ok()); diff --git a/ydb/core/formats/program.h b/ydb/core/formats/program.h index 7a94f4065a0..1c383b8b23d 100644 --- a/ydb/core/formats/program.h +++ b/ydb/core/formats/program.h @@ -4,87 +4,12 @@ #include <contrib/libs/apache/arrow/cpp/src/arrow/api.h> #include <util/system/types.h> +#include <ydb/library/arrow_kernels/operations.h> #include <ydb/core/scheme_types/scheme_types_defs.h> namespace NKikimr::NArrow { -enum class EOperation { - Unspecified = 0, - Constant, - // - CastBoolean, - CastInt8, - CastInt16, - CastInt32, - CastInt64, - CastUInt8, - CastUInt16, - CastUInt32, - CastUInt64, - CastFloat, - CastDouble, - CastBinary, - CastFixedSizeBinary, - CastString, - CastTimestamp, - // - IsValid, - IsNull, - // - Equal, - NotEqual, - Less, - LessEqual, - Greater, - GreaterEqual, - // - Invert, - And, - Or, - Xor, - // - Add, - Subtract, - Multiply, - Divide, - Abs, - Negate, - Gcd, - Lcm, - Modulo, - ModuloOrZero, - AddNotNull, - SubtractNotNull, - MultiplyNotNull, - DivideNotNull, - // - BinaryLength, - MatchSubstring, - // math - Acosh, - Atanh, - Cbrt, - Cosh, - E, - Erf, - Erfc, - Exp, - Exp2, - Exp10, - Hypot, - Lgamma, - Pi, - Sinh, - Sqrt, - Tgamma, - // round - Floor, - Ceil, - Trunc, - Round, - RoundBankers, - RoundToExp2 -}; +using EOperation = NKikimr::NKernels::EOperation; enum class EAggregate { Unspecified = 0, diff --git a/ydb/core/formats/ut/CMakeLists.darwin.txt b/ydb/core/formats/ut/CMakeLists.darwin.txt index d5029e328fe..fbe229af5b8 100644 --- a/ydb/core/formats/ut/CMakeLists.darwin.txt +++ b/ydb/core/formats/ut/CMakeLists.darwin.txt @@ -23,6 +23,7 @@ target_link_libraries(ydb-core-formats-ut PUBLIC cpp-testing-unittest_main ydb-core-formats libs-apache-arrow + ydb-library-arrow_kernels ydb-core-base udf-service-exception_policy yql-sql-pg_dummy @@ -37,11 +38,7 @@ target_link_options(ydb-core-formats-ut PRIVATE ) target_sources(ydb-core-formats-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_arrow.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_arithmetic.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_math.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_round.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_program_step.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/formats/custom_registry.cpp ) add_test( NAME diff --git a/ydb/core/formats/ut/CMakeLists.linux-aarch64.txt b/ydb/core/formats/ut/CMakeLists.linux-aarch64.txt index 42a6b5a4c9f..e7c38545ffd 100644 --- a/ydb/core/formats/ut/CMakeLists.linux-aarch64.txt +++ b/ydb/core/formats/ut/CMakeLists.linux-aarch64.txt @@ -23,6 +23,7 @@ target_link_libraries(ydb-core-formats-ut PUBLIC cpp-testing-unittest_main ydb-core-formats libs-apache-arrow + ydb-library-arrow_kernels ydb-core-base udf-service-exception_policy yql-sql-pg_dummy @@ -39,11 +40,7 @@ target_link_options(ydb-core-formats-ut PRIVATE ) target_sources(ydb-core-formats-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_arrow.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_arithmetic.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_math.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_round.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_program_step.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/formats/custom_registry.cpp ) add_test( NAME diff --git a/ydb/core/formats/ut/CMakeLists.linux.txt b/ydb/core/formats/ut/CMakeLists.linux.txt index 41b2721bf22..d86039b749a 100644 --- a/ydb/core/formats/ut/CMakeLists.linux.txt +++ b/ydb/core/formats/ut/CMakeLists.linux.txt @@ -25,6 +25,7 @@ target_link_libraries(ydb-core-formats-ut PUBLIC cpp-testing-unittest_main ydb-core-formats libs-apache-arrow + ydb-library-arrow_kernels ydb-core-base udf-service-exception_policy yql-sql-pg_dummy @@ -41,11 +42,7 @@ target_link_options(ydb-core-formats-ut PRIVATE ) target_sources(ydb-core-formats-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_arrow.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_arithmetic.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_math.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_round.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/ut_program_step.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/formats/custom_registry.cpp ) add_test( NAME diff --git a/ydb/core/formats/ut_program_step.cpp b/ydb/core/formats/ut_program_step.cpp index a3a62ab9578..2bc31ecf473 100644 --- a/ydb/core/formats/ut_program_step.cpp +++ b/ydb/core/formats/ut_program_step.cpp @@ -6,12 +6,15 @@ #include <contrib/libs/apache/arrow/cpp/src/arrow/compute/exec.h> #include <contrib/libs/apache/arrow/cpp/src/arrow/type_fwd.h> #include <library/cpp/testing/unittest/registar.h> +#include <ydb/library/arrow_kernels/ut_common.h> #include "custom_registry.h" #include "program.h" #include "arrow_helpers.h" namespace NKikimr::NArrow { +using NKernels::NumVecToArray; + size_t FilterTest(std::vector<std::shared_ptr<arrow::Array>> args, EOperation frst, EOperation scnd) { auto schema = std::make_shared<arrow::Schema>(std::vector{ std::make_shared<arrow::Field>("x", args.at(0)->type()), diff --git a/ydb/core/tx/columnshard/engines/predicate.h b/ydb/core/tx/columnshard/engines/predicate.h index 6b50e633e85..ee209421415 100644 --- a/ydb/core/tx/columnshard/engines/predicate.h +++ b/ydb/core/tx/columnshard/engines/predicate.h @@ -48,7 +48,7 @@ struct TPredicate { } friend IOutputStream& operator << (IOutputStream& out, const TPredicate& pred) { - out << GetFunctionName(pred.Operation); + out << NArrow::GetFunctionName(pred.Operation); if (pred.Inclusive) { out << "(incl) "; } else { diff --git a/ydb/library/CMakeLists.txt b/ydb/library/CMakeLists.txt index e6671974c27..5147904f2a2 100644 --- a/ydb/library/CMakeLists.txt +++ b/ydb/library/CMakeLists.txt @@ -9,6 +9,7 @@ add_subdirectory(accessor) add_subdirectory(aclib) add_subdirectory(arrow_clickhouse) +add_subdirectory(arrow_kernels) add_subdirectory(backup) add_subdirectory(binary_json) add_subdirectory(dynumber) diff --git a/ydb/library/arrow_kernels/CMakeLists.txt b/ydb/library/arrow_kernels/CMakeLists.txt new file mode 100644 index 00000000000..ac3feb64927 --- /dev/null +++ b/ydb/library/arrow_kernels/CMakeLists.txt @@ -0,0 +1,20 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(ut) + +add_library(ydb-library-arrow_kernels) +target_link_libraries(ydb-library-arrow_kernels PUBLIC + contrib-libs-cxxsupp + yutil + libs-apache-arrow +) +target_sources(ydb-library-arrow_kernels PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels/func_cast.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels/ut_common.cpp +) diff --git a/ydb/core/formats/bit_cast.h b/ydb/library/arrow_kernels/bit_cast.h index 486645877d5..6875525e63e 100644 --- a/ydb/core/formats/bit_cast.h +++ b/ydb/library/arrow_kernels/bit_cast.h @@ -4,6 +4,7 @@ #include <algorithm> #include <type_traits> +namespace NKikimr::NKernels { /** \brief Returns value `from` converted to type `To` while retaining bit representation. * `To` and `From` must satisfy `CopyConstructible`. @@ -24,4 +25,6 @@ std::decay_t<To> safe_bit_cast(const From & from) { static_assert(sizeof(To) == sizeof(From), "bit cast on types of different width"); return bit_cast<To, From>(from); -}
\ No newline at end of file +} + +} diff --git a/ydb/core/formats/clickhouse_type_traits.h b/ydb/library/arrow_kernels/clickhouse_type_traits.h index b694d2b91fb..15cf4956855 100644 --- a/ydb/core/formats/clickhouse_type_traits.h +++ b/ydb/library/arrow_kernels/clickhouse_type_traits.h @@ -3,7 +3,7 @@ #include <contrib/libs/apache/arrow/cpp/src/arrow/api.h> #include <contrib/libs/apache/arrow/cpp/src/arrow/type.h> -namespace NKikimr::NArrow { +namespace NKikimr::NKernels { constexpr size_t NextSize(size_t size) { if (size < 8) { diff --git a/ydb/core/formats/execs.h b/ydb/library/arrow_kernels/execs.h index 253bae48318..95311448450 100644 --- a/ydb/core/formats/execs.h +++ b/ydb/library/arrow_kernels/execs.h @@ -13,17 +13,17 @@ #include <contrib/libs/apache/arrow/cpp/src/arrow/compute/function.h> #include <contrib/libs/apache/arrow/cpp/src/arrow/compute/cast.h> + #include <util/datetime/base.h> #include <util/system/yassert.h> #include <cstdint> #include <memory> #include <type_traits> #include <vector> -#include "switch_type.h" namespace cp = arrow::compute; -namespace NKikimr::NArrow { +namespace NKikimr::NKernels { template <template <typename... Args> class KernelGenerator, typename Op> cp::ArrayKernelExec ArithmeticBinaryExec(cp::internal::detail::GetTypeId getId) { diff --git a/ydb/core/formats/func_cast.cpp b/ydb/library/arrow_kernels/func_cast.cpp index 7d480bdac07..9481584fad4 100644 --- a/ydb/core/formats/func_cast.cpp +++ b/ydb/library/arrow_kernels/func_cast.cpp @@ -13,7 +13,7 @@ namespace cp = ::arrow::compute; -namespace NKikimr::NArrow { +namespace NKikimr::NKernels { namespace { @@ -118,7 +118,8 @@ YdbCastMetaFunction::YdbCastMetaFunction() return castFunc->Execute(args, options, ctx); } -} // NKikimr::NArrow +} + namespace arrow::compute::internal { @@ -129,17 +130,17 @@ struct CastFunctor<TimestampType, UInt32Type> { return ::arrow::Status::IndexError("Cast from uint32 to timestamp received empty batch."); } Y_VERIFY(batch[0].kind() == Datum::ARRAY, "Cast from uint32 to timestamp expected ARRAY as input."); - + const auto& out_type = checked_cast<const ::arrow::TimestampType&>(*out->type()); // get conversion MICROSECONDS -> unit auto conversion = ::arrow::util::GetTimestampConversion(::arrow::TimeUnit::MICRO, out_type.unit()); Y_VERIFY(conversion.first == ::arrow::util::MULTIPLY, "Cast from uint32 to timestamp failed because timestamp unit is greater than seconds."); - + auto input = batch[0].array(); auto output = out->mutable_array(); auto in_data = input->GetValues<uint32_t>(1); auto out_data = output->GetMutableValues<int64_t>(1); - + for (int64_t i = 0; i < input->length; i++) { out_data[i] = static_cast<int64_t>(in_data[i] * conversion.second); } @@ -147,4 +148,4 @@ struct CastFunctor<TimestampType, UInt32Type> { } }; -} // namespace arrow::compute::internal
\ No newline at end of file +} // namespace arrow::compute::internal diff --git a/ydb/core/formats/func_cast.h b/ydb/library/arrow_kernels/func_cast.h index 532f3164d7c..5e2a20150c1 100644 --- a/ydb/core/formats/func_cast.h +++ b/ydb/library/arrow_kernels/func_cast.h @@ -4,7 +4,7 @@ #include <type_traits> -namespace NKikimr::NArrow { +namespace NKikimr::NKernels { // Metafunction for dispatching to appropriate CastFunction. This corresponds // to the standard SQL CAST(expr AS target_type) @@ -19,4 +19,4 @@ class YdbCastMetaFunction : public ::arrow::compute::MetaFunction { ::arrow::compute::ExecContext* ctx) const override; }; -} // NKikimr::NArrow +} diff --git a/ydb/core/formats/func_common.h b/ydb/library/arrow_kernels/func_common.h index c697fcf03ed..62c74c06a5c 100644 --- a/ydb/core/formats/func_common.h +++ b/ydb/library/arrow_kernels/func_common.h @@ -10,14 +10,13 @@ #include <type_traits> -#include "switch_type.h" #include "execs.h" namespace cp = arrow::compute; using cp::internal::applicator::ScalarBinary; using cp::internal::applicator::ScalarUnary; -namespace NKikimr::NArrow { +namespace NKikimr::NKernels { template <typename T> using IsUnsignedInteger = diff --git a/ydb/core/formats/func_gcd.h b/ydb/library/arrow_kernels/func_gcd.h index 0f6fe4bc8dc..bc2ad222067 100644 --- a/ydb/core/formats/func_gcd.h +++ b/ydb/library/arrow_kernels/func_gcd.h @@ -9,7 +9,7 @@ #define INLINE __attribute__((always_inline)) #endif -namespace NKikimr::NArrow { +namespace NKikimr::NKernels { template<typename T> INLINE void FastIntSwap(T& lhs, T& rhs) { diff --git a/ydb/core/formats/func_lcm.h b/ydb/library/arrow_kernels/func_lcm.h index 1e31b0e516a..986b8193e26 100644 --- a/ydb/core/formats/func_lcm.h +++ b/ydb/library/arrow_kernels/func_lcm.h @@ -4,7 +4,7 @@ #include "func_gcd.h" #include "clickhouse_type_traits.h" -namespace NKikimr::NArrow { +namespace NKikimr::NKernels { struct TLeastCommonMultiple { diff --git a/ydb/core/formats/func_math.h b/ydb/library/arrow_kernels/func_math.h index 13474e0018e..85b38408e25 100644 --- a/ydb/core/formats/func_math.h +++ b/ydb/library/arrow_kernels/func_math.h @@ -2,7 +2,7 @@ #include "func_common.h" #include <cmath> -namespace NKikimr::NArrow { +namespace NKikimr::NKernels { struct TAcosh { diff --git a/ydb/core/formats/func_modulo.h b/ydb/library/arrow_kernels/func_modulo.h index 85bcc7b9a12..742bc083ce2 100644 --- a/ydb/core/formats/func_modulo.h +++ b/ydb/library/arrow_kernels/func_modulo.h @@ -2,7 +2,7 @@ #include "func_common.h" #include "clickhouse_type_traits.h" -namespace NKikimr::NArrow { +namespace NKikimr::NKernels { struct TModulo { diff --git a/ydb/core/formats/func_modulo_or_zero.h b/ydb/library/arrow_kernels/func_modulo_or_zero.h index a2a4a494a37..06e2d18901b 100644 --- a/ydb/core/formats/func_modulo_or_zero.h +++ b/ydb/library/arrow_kernels/func_modulo_or_zero.h @@ -2,7 +2,7 @@ #include "func_common.h" #include "clickhouse_type_traits.h" -namespace NKikimr::NArrow { +namespace NKikimr::NKernels { struct TModuloOrZero { diff --git a/ydb/core/formats/func_mul.h b/ydb/library/arrow_kernels/func_mul.h index d9be3198e9e..7cfb4d34676 100644 --- a/ydb/core/formats/func_mul.h +++ b/ydb/library/arrow_kernels/func_mul.h @@ -2,7 +2,7 @@ namespace cp = arrow::compute; -namespace NKikimr::NArrow { +namespace NKikimr::NKernels { template <typename T, typename TUnsigned = typename std::make_unsigned<T>::type> constexpr TUnsigned ToUnsigned(T sgnd) { diff --git a/ydb/core/formats/func_round.h b/ydb/library/arrow_kernels/func_round.h index 3b82591c45c..639d19af48a 100644 --- a/ydb/core/formats/func_round.h +++ b/ydb/library/arrow_kernels/func_round.h @@ -18,7 +18,7 @@ #define CLZLL __builtin_clzll #endif -namespace NKikimr::NArrow { +namespace NKikimr::NKernels { struct TRound { diff --git a/ydb/core/formats/functions.h b/ydb/library/arrow_kernels/functions.h index 2f4523a4fe2..2f4523a4fe2 100644 --- a/ydb/core/formats/functions.h +++ b/ydb/library/arrow_kernels/functions.h diff --git a/ydb/library/arrow_kernels/operations.h b/ydb/library/arrow_kernels/operations.h new file mode 100644 index 00000000000..02f04ce75f9 --- /dev/null +++ b/ydb/library/arrow_kernels/operations.h @@ -0,0 +1,81 @@ +namespace NKikimr::NKernels { + +enum class EOperation { + Unspecified = 0, + Constant, + // + CastBoolean, + CastInt8, + CastInt16, + CastInt32, + CastInt64, + CastUInt8, + CastUInt16, + CastUInt32, + CastUInt64, + CastFloat, + CastDouble, + CastBinary, + CastFixedSizeBinary, + CastString, + CastTimestamp, + // + IsValid, + IsNull, + // + Equal, + NotEqual, + Less, + LessEqual, + Greater, + GreaterEqual, + // + Invert, + And, + Or, + Xor, + // + Add, + Subtract, + Multiply, + Divide, + Abs, + Negate, + Gcd, + Lcm, + Modulo, + ModuloOrZero, + AddNotNull, + SubtractNotNull, + MultiplyNotNull, + DivideNotNull, + // + BinaryLength, + MatchSubstring, + // math + Acosh, + Atanh, + Cbrt, + Cosh, + E, + Erf, + Erfc, + Exp, + Exp2, + Exp10, + Hypot, + Lgamma, + Pi, + Sinh, + Sqrt, + Tgamma, + // round + Floor, + Ceil, + Trunc, + Round, + RoundBankers, + RoundToExp2 +}; + +} diff --git a/ydb/library/arrow_kernels/ut/CMakeLists.darwin.txt b/ydb/library/arrow_kernels/ut/CMakeLists.darwin.txt new file mode 100644 index 00000000000..e57be38692d --- /dev/null +++ b/ydb/library/arrow_kernels/ut/CMakeLists.darwin.txt @@ -0,0 +1,49 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-library-arrow_kernels-ut) +target_compile_options(ydb-library-arrow_kernels-ut PRIVATE + -Wno-unused-parameter +) +target_include_directories(ydb-library-arrow_kernels-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_clickhouse +) +target_link_libraries(ydb-library-arrow_kernels-ut PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + ydb-library-arrow_kernels + libs-apache-arrow +) +target_link_options(ydb-library-arrow_kernels-ut PRIVATE + -Wl,-no_deduplicate + -Wl,-sdk_version,10.15 + -fPIC + -fPIC +) +target_sources(ydb-library-arrow_kernels-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels/ut_common.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels/ut_arithmetic.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels/ut_math.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels/ut_round.cpp +) +add_test( + NAME + ydb-library-arrow_kernels-ut + COMMAND + ydb-library-arrow_kernels-ut + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +vcs_info(ydb-library-arrow_kernels-ut) diff --git a/ydb/library/arrow_kernels/ut/CMakeLists.linux-aarch64.txt b/ydb/library/arrow_kernels/ut/CMakeLists.linux-aarch64.txt new file mode 100644 index 00000000000..65aef6876b7 --- /dev/null +++ b/ydb/library/arrow_kernels/ut/CMakeLists.linux-aarch64.txt @@ -0,0 +1,53 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-library-arrow_kernels-ut) +target_compile_options(ydb-library-arrow_kernels-ut PRIVATE + -Wno-unused-parameter +) +target_include_directories(ydb-library-arrow_kernels-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_clickhouse +) +target_link_libraries(ydb-library-arrow_kernels-ut PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-lfalloc + cpp-testing-unittest_main + ydb-library-arrow_kernels + libs-apache-arrow +) +target_link_options(ydb-library-arrow_kernels-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(ydb-library-arrow_kernels-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels/ut_common.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels/ut_arithmetic.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels/ut_math.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels/ut_round.cpp +) +add_test( + NAME + ydb-library-arrow_kernels-ut + COMMAND + ydb-library-arrow_kernels-ut + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +vcs_info(ydb-library-arrow_kernels-ut) diff --git a/ydb/library/arrow_kernels/ut/CMakeLists.linux.txt b/ydb/library/arrow_kernels/ut/CMakeLists.linux.txt new file mode 100644 index 00000000000..f3e17ee71dd --- /dev/null +++ b/ydb/library/arrow_kernels/ut/CMakeLists.linux.txt @@ -0,0 +1,55 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-library-arrow_kernels-ut) +target_compile_options(ydb-library-arrow_kernels-ut PRIVATE + -Wno-unused-parameter +) +target_include_directories(ydb-library-arrow_kernels-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_clickhouse +) +target_link_libraries(ydb-library-arrow_kernels-ut PUBLIC + contrib-libs-cxxsupp + yutil + cpp-malloc-tcmalloc + libs-tcmalloc-no_percpu_cache + library-cpp-cpuid_check + cpp-testing-unittest_main + ydb-library-arrow_kernels + libs-apache-arrow +) +target_link_options(ydb-library-arrow_kernels-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(ydb-library-arrow_kernels-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels/ut_common.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels/ut_arithmetic.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels/ut_math.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/arrow_kernels/ut_round.cpp +) +add_test( + NAME + ydb-library-arrow_kernels-ut + COMMAND + ydb-library-arrow_kernels-ut + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +vcs_info(ydb-library-arrow_kernels-ut) diff --git a/ydb/library/arrow_kernels/ut/CMakeLists.txt b/ydb/library/arrow_kernels/ut/CMakeLists.txt new file mode 100644 index 00000000000..3e0811fb22e --- /dev/null +++ b/ydb/library/arrow_kernels/ut/CMakeLists.txt @@ -0,0 +1,15 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID) + include(CMakeLists.linux-aarch64.txt) +elseif (APPLE) + include(CMakeLists.darwin.txt) +elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID) + include(CMakeLists.linux.txt) +endif() diff --git a/ydb/core/formats/ut_arithmetic.cpp b/ydb/library/arrow_kernels/ut_arithmetic.cpp index ffffad99cc8..8523dc0f748 100644 --- a/ydb/core/formats/ut_arithmetic.cpp +++ b/ydb/library/arrow_kernels/ut_arithmetic.cpp @@ -1,25 +1,12 @@ -#include <cmath> -#include <cstdint> -#include <iterator> -#include <library/cpp/testing/unittest/registar.h> -#include <ctime> -#include <vector> -#include <algorithm> - -#include <contrib/libs/apache/arrow/cpp/src/arrow/api.h> -#include <contrib/libs/apache/arrow/cpp/src/arrow/compute/api.h> - -#include "func_common.h" -#include "functions.h" -#include "custom_registry.h" -#include "arrow_helpers.h" - -namespace NKikimr::NArrow { +#include "ut_common.h" namespace cp = ::arrow::compute; using cp::internal::applicator::ScalarBinary; using cp::internal::applicator::ScalarUnary; + +namespace NKikimr::NKernels { + const std::vector<std::shared_ptr<arrow::DataType>> nonPrimitiveTypes = { arrow::list(arrow::utf8()), arrow::list(arrow::int64()), @@ -343,7 +330,6 @@ Y_UNIT_TEST_SUITE(ModuloTest) { UNIT_ASSERT(!res.ok()); } } - } Y_UNIT_TEST_SUITE(ModuloOrZeroTest) { @@ -366,7 +352,6 @@ Y_UNIT_TEST_SUITE(ModuloOrZeroTest) { UNIT_ASSERT(res->Equals(expected)); } } - } } diff --git a/ydb/library/arrow_kernels/ut_common.cpp b/ydb/library/arrow_kernels/ut_common.cpp new file mode 100644 index 00000000000..e2ff267b7e8 --- /dev/null +++ b/ydb/library/arrow_kernels/ut_common.cpp @@ -0,0 +1,168 @@ +#include "ut_common.h" +#include "operations.h" + +#include <util/system/yassert.h> + +namespace cp = ::arrow::compute; + + +namespace NKikimr::NKernels { + +template <typename TType> +struct TTypeWrapper +{ + using T = TType; +}; + +template <typename TFunc, bool EnableNull = false> +bool SwitchType(arrow::Type::type typeId, TFunc&& f) { + switch (typeId) { + case arrow::Type::NA: { + if constexpr (EnableNull) { + return f(TTypeWrapper<arrow::NullType>()); + } + break; + } + case arrow::Type::BOOL: + return f(TTypeWrapper<arrow::BooleanType>()); + case arrow::Type::UINT8: + return f(TTypeWrapper<arrow::UInt8Type>()); + case arrow::Type::INT8: + return f(TTypeWrapper<arrow::Int8Type>()); + case arrow::Type::UINT16: + return f(TTypeWrapper<arrow::UInt16Type>()); + case arrow::Type::INT16: + return f(TTypeWrapper<arrow::Int16Type>()); + case arrow::Type::UINT32: + return f(TTypeWrapper<arrow::UInt32Type>()); + case arrow::Type::INT32: + return f(TTypeWrapper<arrow::Int32Type>()); + case arrow::Type::UINT64: + return f(TTypeWrapper<arrow::UInt64Type>()); + case arrow::Type::INT64: + return f(TTypeWrapper<arrow::Int64Type>()); + case arrow::Type::HALF_FLOAT: + return f(TTypeWrapper<arrow::HalfFloatType>()); + case arrow::Type::FLOAT: + return f(TTypeWrapper<arrow::FloatType>()); + case arrow::Type::DOUBLE: + return f(TTypeWrapper<arrow::DoubleType>()); + case arrow::Type::STRING: + return f(TTypeWrapper<arrow::StringType>()); + case arrow::Type::BINARY: + return f(TTypeWrapper<arrow::BinaryType>()); + case arrow::Type::FIXED_SIZE_BINARY: + return f(TTypeWrapper<arrow::FixedSizeBinaryType>()); + case arrow::Type::DATE32: + return f(TTypeWrapper<arrow::Date32Type>()); + case arrow::Type::DATE64: + return f(TTypeWrapper<arrow::Date64Type>()); + case arrow::Type::TIMESTAMP: + return f(TTypeWrapper<arrow::TimestampType>()); + case arrow::Type::TIME32: + return f(TTypeWrapper<arrow::Time32Type>()); + case arrow::Type::TIME64: + return f(TTypeWrapper<arrow::Time64Type>()); + case arrow::Type::INTERVAL_MONTHS: + return f(TTypeWrapper<arrow::MonthIntervalType>()); + case arrow::Type::DECIMAL: + return f(TTypeWrapper<arrow::Decimal128Type>()); + case arrow::Type::DURATION: + return f(TTypeWrapper<arrow::DurationType>()); + case arrow::Type::LARGE_STRING: + return f(TTypeWrapper<arrow::LargeStringType>()); + case arrow::Type::LARGE_BINARY: + return f(TTypeWrapper<arrow::LargeBinaryType>()); + case arrow::Type::DECIMAL256: + case arrow::Type::DENSE_UNION: + case arrow::Type::DICTIONARY: + case arrow::Type::EXTENSION: + case arrow::Type::FIXED_SIZE_LIST: + case arrow::Type::INTERVAL_DAY_TIME: + case arrow::Type::LARGE_LIST: + case arrow::Type::LIST: + case arrow::Type::MAP: + case arrow::Type::MAX_ID: + case arrow::Type::SPARSE_UNION: + case arrow::Type::STRUCT: + break; + } + + return false; +} + +std::shared_ptr<arrow::Array> NumVecToArray(const std::shared_ptr<arrow::DataType>& type, + const std::vector<double>& vec) { + std::shared_ptr<arrow::Array> out; + SwitchType(type->id(), [&](const auto& type) { + using TWrap = std::decay_t<decltype(type)>; + if constexpr (arrow::is_number_type<typename TWrap::T>::value) { + typename arrow::TypeTraits<typename TWrap::T>::BuilderType builder; + for (const auto val : vec) { + Y_VERIFY(builder.Append(static_cast<typename TWrap::T::c_type>(val)).ok()); + } + Y_VERIFY(builder.Finish(&out).ok()); + return true; + } + return false; + }); + return out; +} + +static void RegisterMath(cp::FunctionRegistry* registry) { + Y_VERIFY(registry->AddFunction(MakeMathUnary<TAcosh>(TAcosh::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeMathUnary<TAtanh>(TAtanh::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeMathUnary<TCbrt>(TCbrt::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeMathUnary<TCosh>(TCosh::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeConstNullary<TE>(TE::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeMathUnary<TErf>(TErf::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeMathUnary<TErfc>(TErfc::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeMathUnary<TExp>(TExp::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeMathUnary<TExp2>(TExp2::Name)).ok()); + // Temporarily disabled because of compilation error on Windows. +#if 0 + Y_VERIFY(registry->AddFunction(MakeMathUnary<TExp10>(TExp10::Name)).ok()); +#endif + Y_VERIFY(registry->AddFunction(MakeMathBinary<THypot>(THypot::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeMathUnary<TLgamma>(TLgamma::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeConstNullary<TPi>(TPi::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeMathUnary<TSinh>(TSinh::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeMathUnary<TSqrt>(TSqrt::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeMathUnary<TTgamma>(TTgamma::Name)).ok()); +} + +static void RegisterRound(cp::FunctionRegistry* registry) { + Y_VERIFY(registry->AddFunction(MakeArithmeticUnary<TRound>(TRound::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeArithmeticUnary<TRoundBankers>(TRoundBankers::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeArithmeticUnary<TRoundToExp2>(TRoundToExp2::Name)).ok()); +} + +static void RegisterArithmetic(cp::FunctionRegistry* registry) { + Y_VERIFY(registry->AddFunction(MakeArithmeticIntBinary<TGreatestCommonDivisor>(TGreatestCommonDivisor::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeArithmeticIntBinary<TLeastCommonMultiple>(TLeastCommonMultiple::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeArithmeticBinary<TModulo>(TModulo::Name)).ok()); + Y_VERIFY(registry->AddFunction(MakeArithmeticBinary<TModuloOrZero>(TModuloOrZero::Name)).ok()); +} + +static std::unique_ptr<cp::FunctionRegistry> CreateCustomRegistry() { + auto registry = cp::FunctionRegistry::Make(); + RegisterMath(registry.get()); + RegisterRound(registry.get()); + RegisterArithmetic(registry.get()); + cp::internal::RegisterScalarCast(registry.get()); + return registry; +} + +// Creates singleton custom registry +cp::FunctionRegistry* GetCustomFunctionRegistry() { + static auto g_registry = CreateCustomRegistry(); + return g_registry.get(); +} + +// We want to have ExecContext per thread. All these context use one custom registry. +cp::ExecContext* GetCustomExecContext() { + static thread_local cp::ExecContext context(arrow::default_memory_pool(), nullptr, GetCustomFunctionRegistry()); + return &context; +} + +} diff --git a/ydb/library/arrow_kernels/ut_common.h b/ydb/library/arrow_kernels/ut_common.h new file mode 100644 index 00000000000..b1095c3def6 --- /dev/null +++ b/ydb/library/arrow_kernels/ut_common.h @@ -0,0 +1,24 @@ +#include <cmath> +#include <cstdint> +#include <iterator> +#include <ctime> +#include <vector> +#include <algorithm> + +#include <contrib/libs/apache/arrow/cpp/src/arrow/api.h> +#include <contrib/libs/apache/arrow/cpp/src/arrow/compute/api.h> +#include <contrib/libs/apache/arrow/cpp/src/arrow/compute/registry_internal.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include "func_common.h" +#include "functions.h" + +namespace NKikimr::NKernels { + +std::shared_ptr<arrow::Array> NumVecToArray(const std::shared_ptr<arrow::DataType>& type, + const std::vector<double>& vec); + +arrow::compute::ExecContext* GetCustomExecContext(); + +} diff --git a/ydb/core/formats/ut_math.cpp b/ydb/library/arrow_kernels/ut_math.cpp index c9945c4e096..44507bbd19d 100644 --- a/ydb/core/formats/ut_math.cpp +++ b/ydb/library/arrow_kernels/ut_math.cpp @@ -1,26 +1,10 @@ -#include <cmath> -#include <cstdint> -#include <iterator> -#include <library/cpp/testing/unittest/registar.h> -#include <ctime> -#include <vector> -#include <algorithm> - -#include <contrib/libs/apache/arrow/cpp/src/arrow/api.h> -#include <contrib/libs/apache/arrow/cpp/src/arrow/compute/api.h> - - -#include "func_common.h" -#include "functions.h" -#include "custom_registry.h" -#include "arrow_helpers.h" - - -namespace NKikimr::NArrow { +#include "ut_common.h" namespace cp = ::arrow::compute; +namespace NKikimr::NKernels { + Y_UNIT_TEST_SUITE(MathTest) { Y_UNIT_TEST(E) { auto res = arrow::compute::CallFunction(TE::Name, {}, GetCustomExecContext()); @@ -66,7 +50,6 @@ Y_UNIT_TEST_SUITE(MathTest) { UNIT_ASSERT(res->Equals(expRes)); } } - } } diff --git a/ydb/core/formats/ut_round.cpp b/ydb/library/arrow_kernels/ut_round.cpp index 7b639b0888d..39cb983615f 100644 --- a/ydb/core/formats/ut_round.cpp +++ b/ydb/library/arrow_kernels/ut_round.cpp @@ -1,25 +1,10 @@ -#include <cmath> -#include <cstdint> -#include <iterator> -#include <library/cpp/testing/unittest/registar.h> -#include <ctime> -#include <vector> -#include <algorithm> - -#include <contrib/libs/apache/arrow/cpp/src/arrow/api.h> -#include <contrib/libs/apache/arrow/cpp/src/arrow/compute/api.h> - -#include "func_common.h" -#include "functions.h" -#include "custom_registry.h" -#include "arrow_helpers.h" - - -namespace NKikimr::NArrow { +#include "ut_common.h" namespace cp = ::arrow::compute; +namespace NKikimr::NKernels { + Y_UNIT_TEST_SUITE(RoundsTest) { Y_UNIT_TEST(RoundTest) { for (auto ty : cp::internal::FloatingPointTypes()) { @@ -47,7 +32,6 @@ Y_UNIT_TEST_SUITE(RoundsTest) { UNIT_ASSERT(res->Equals(expRes)); } } - } } |