diff options
author | atarasov5 <atarasov5@yandex-team.com> | 2025-04-01 10:18:39 +0300 |
---|---|---|
committer | atarasov5 <atarasov5@yandex-team.com> | 2025-04-01 10:35:40 +0300 |
commit | ff038a2ffe1e33ff9a12bb0ba45e97f3d7a52b03 (patch) | |
tree | 2d0d93c4b28f8a09547f85da1237c3a0ffddf9cc | |
parent | bf4197b54ff69b2ec6ad452fa090c64d303e60f6 (diff) | |
download | ydb-ff038a2ffe1e33ff9a12bb0ba45e97f3d7a52b03.tar.gz |
YQL-19645: Add more types for coalesce
commit_hash:063bee7c99ef14a1a51edffe1410bbc7f7b6303c
5 files changed, 134 insertions, 35 deletions
diff --git a/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp b/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp index aa1ea57703a..544811424a4 100644 --- a/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp +++ b/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp @@ -95,3 +95,6 @@ static void CustomArguments(benchmark::internal::Benchmark* b) { BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<ui8>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments); BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<ui16>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments); BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<ui32>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments); +BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<ui64>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments); +BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<float>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments); +BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<double>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments); diff --git a/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp index ae190e777e8..76ed0dce8a9 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp @@ -78,30 +78,39 @@ bool DispatchBlendingCoalesce(const arrow::Datum& left, const arrow::Datum& righ auto typeId = typeData.GetTypeId(); switch (NYql::NUdf::GetDataSlot(typeId)) { - case NYql::NUdf::EDataSlot::Int8: - DispatchCoalesceImpl<i8>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); - return true; case NYql::NUdf::EDataSlot::Bool: + case NYql::NUdf::EDataSlot::Int8: case NYql::NUdf::EDataSlot::Uint8: DispatchCoalesceImpl<ui8>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); return true; case NYql::NUdf::EDataSlot::Int16: - DispatchCoalesceImpl<i16>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); - return true; case NYql::NUdf::EDataSlot::Uint16: + case NYql::NUdf::EDataSlot::Date: DispatchCoalesceImpl<ui16>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); return true; case NYql::NUdf::EDataSlot::Int32: - DispatchCoalesceImpl<i32>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); - return true; case NYql::NUdf::EDataSlot::Uint32: + case NYql::NUdf::EDataSlot::Date32: + case NYql::NUdf::EDataSlot::Datetime: DispatchCoalesceImpl<ui32>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); return true; case NYql::NUdf::EDataSlot::Int64: case NYql::NUdf::EDataSlot::Uint64: + case NYql::NUdf::EDataSlot::Datetime64: + case NYql::NUdf::EDataSlot::Timestamp64: + case NYql::NUdf::EDataSlot::Interval64: + case NYql::NUdf::EDataSlot::Interval: + case NYql::NUdf::EDataSlot::Timestamp: + DispatchCoalesceImpl<ui64>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); + return true; case NYql::NUdf::EDataSlot::Double: + static_assert(sizeof(NUdf::TDataType<double>::TLayout) == sizeof(NUdf::TDataType<ui64>::TLayout)); + DispatchCoalesceImpl<ui64>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); + return true; case NYql::NUdf::EDataSlot::Float: - // TODO(YQL-19645): Support other numeric types. + static_assert(sizeof(NUdf::TDataType<float>::TLayout) == sizeof(NUdf::TDataType<ui32>::TLayout)); + DispatchCoalesceImpl<ui32>(left, right, out, /*outIsOptional=*/rightIsOptional, pool); + return true; default: // Fallback to general builder/reader pipeline. return false; diff --git a/yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp b/yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp index 1a6a044bcf7..4f00f15633c 100644 --- a/yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp +++ b/yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp @@ -21,24 +21,60 @@ namespace { #define UNIT_TEST_WITH_INTEGER(TestName) \ template <typename TTestType> \ void TestName##Execute(NUnitTest::TTestContext& ut_context Y_DECLARE_UNUSED); \ - Y_UNIT_TEST(TestName##i8) { \ + Y_UNIT_TEST(TestName##_i8) { \ TestName##Execute<i8>(ut_context); \ } \ - Y_UNIT_TEST(TestName##ui8) { \ + Y_UNIT_TEST(TestName##_ui8) { \ TestName##Execute<ui8>(ut_context); \ } \ - Y_UNIT_TEST(TestName##i16) { \ + Y_UNIT_TEST(TestName##_i16) { \ TestName##Execute<i16>(ut_context); \ } \ - Y_UNIT_TEST(TestName##ui16) { \ + Y_UNIT_TEST(TestName##_ui16) { \ TestName##Execute<ui16>(ut_context); \ } \ - Y_UNIT_TEST(TestName##i32) { \ + Y_UNIT_TEST(TestName##_i32) { \ TestName##Execute<i32>(ut_context); \ } \ - Y_UNIT_TEST(TestName##ui32) { \ + Y_UNIT_TEST(TestName##_ui32) { \ TestName##Execute<ui32>(ut_context); \ } \ + Y_UNIT_TEST(TestName##_i64) { \ + TestName##Execute<i64>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##_ui64) { \ + TestName##Execute<ui64>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##_float) { \ + TestName##Execute<float>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##_double) { \ + TestName##Execute<double>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##_TDate) { \ + TestName##Execute<NYql::NUdf::TDate>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##_TDatetime) { \ + TestName##Execute<NYql::NUdf::TDatetime>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##_TTimestamp) { \ + TestName##Execute<NYql::NUdf::TTimestamp>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##_TInterval) { \ + TestName##Execute<NYql::NUdf::TInterval>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##_TDate32) { \ + TestName##Execute<NYql::NUdf::TDate32>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##_TDatetime64) { \ + TestName##Execute<NYql::NUdf::TDatetime64>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##_TTimestamp64) { \ + TestName##Execute<NYql::NUdf::TTimestamp64>(ut_context); \ + } \ + Y_UNIT_TEST(TestName##_TInterval64) { \ + TestName##Execute<NYql::NUdf::TInterval64>(ut_context); \ + } \ \ template <typename TTestType> \ void TestName##Execute(NUnitTest::TTestContext& ut_context Y_DECLARE_UNUSED) @@ -76,8 +112,17 @@ enum class ERightOperandType { OPTIONAL_SCALAR }; +template <typename T> +using InputOptionalVector = + std::vector<TMaybe<typename NUdf::TDataType<T>::TLayout>>; + template <typename T, ERightOperandType rightType = ERightOperandType::ARRAY> -void TestBlockCoalesceForVector(std::vector<TMaybe<T>> left, std::vector<TMaybe<T>> right, std::vector<TMaybe<T>> expected, size_t leftOffset, size_t rightOffset) { +void TestBlockCoalesceForVector(InputOptionalVector<T> left, + InputOptionalVector<T> right, + InputOptionalVector<T> expected, + size_t leftOffset, + size_t rightOffset) { + using TLayout = typename NUdf::TDataType<T>::TLayout; TSetup<false> setup; NYql::TExprContext exprCtx; auto* type = setup.PgmBuilder->NewDataType(NUdf::TDataType<T>::Id); @@ -101,12 +146,12 @@ void TestBlockCoalesceForVector(std::vector<TMaybe<T>> left, std::vector<TMaybe< arrow::Datum rightOperand; if constexpr (rightType == ERightOperandType::SCALAR) { - rightOperand = MakeScalarDatum<T>(right[0].GetRef()); + rightOperand = MakeScalarDatum<TLayout>(right[0].GetRef()); } else if constexpr (rightType == ERightOperandType::OPTIONAL_SCALAR) { if (right[0]) { - rightOperand = MakeScalarDatum<T>(right[0].GetRef()); + rightOperand = MakeScalarDatum<TLayout>(right[0].GetRef()); } else { - rightOperand = MakeScalarDatum<T>(0); + rightOperand = MakeScalarDatum<TLayout>(0); rightOperand.scalar()->is_valid = false; } } else { @@ -133,7 +178,9 @@ void TestBlockCoalesceForVector(std::vector<TMaybe<T>> left, std::vector<TMaybe< } template <typename T, ERightOperandType rightType = ERightOperandType::ARRAY> -void TestBlockCoalesce(std::vector<TMaybe<T>> left, std::vector<TMaybe<T>> right, std::vector<TMaybe<T>> expected) { +void TestBlockCoalesce(InputOptionalVector<T> left, + InputOptionalVector<T> right, + InputOptionalVector<T> expected) { // First test different offsets. for (size_t leftOffset = 0; leftOffset < 10; leftOffset++) { for (size_t rightOffset = 0; rightOffset < 10; rightOffset++) { @@ -247,16 +294,16 @@ Y_UNIT_TEST(CoalesceGraphTest) { } UNIT_TEST_WITH_INTEGER(KernelRightIsNotNullArray) { - auto max = std::numeric_limits<TTestType>::max(); - auto min = std::numeric_limits<TTestType>::min(); + auto max = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::max(); + auto min = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::min(); TestBlockCoalesce<TTestType, ERightOperandType::ARRAY>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20}, {101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120}, {101, 2, 3, 104, 5, 6, 7, max, 9, 110, 11, 12, 13, 114, 115, 116, min, 118, 19, 20}); } UNIT_TEST_WITH_INTEGER(KernelRightIsScalar) { - auto max = std::numeric_limits<TTestType>::max(); - auto min = std::numeric_limits<TTestType>::min(); + auto max = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::max(); + auto min = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::min(); TestBlockCoalesce<TTestType, ERightOperandType::SCALAR>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20}, {77}, @@ -264,8 +311,8 @@ UNIT_TEST_WITH_INTEGER(KernelRightIsScalar) { } UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalArray) { - auto max = std::numeric_limits<TTestType>::max(); - auto min = std::numeric_limits<TTestType>::min(); + auto max = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::max(); + auto min = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::min(); TestBlockCoalesce<TTestType, ERightOperandType::OPTIONAL_ARRAY>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20}, {Nothing(), 102, Nothing(), 104, Nothing(), 106, 107, 108, 109, 110, 111, 112, 113, 114, Nothing(), 116, 117, 118, Nothing(), 120}, @@ -273,8 +320,8 @@ UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalArray) { } UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalInvalidScalar) { - auto max = std::numeric_limits<TTestType>::max(); - auto min = std::numeric_limits<TTestType>::min(); + auto max = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::max(); + auto min = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::min(); TestBlockCoalesce<TTestType, ERightOperandType::OPTIONAL_SCALAR>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20}, {Nothing()}, @@ -282,8 +329,8 @@ UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalInvalidScalar) { } UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalValidScalar) { - auto max = std::numeric_limits<TTestType>::max(); - auto min = std::numeric_limits<TTestType>::min(); + auto max = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::max(); + auto min = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::min(); TestBlockCoalesce<TTestType, ERightOperandType::OPTIONAL_SCALAR>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20}, {77}, diff --git a/yql/essentials/public/udf/arrow/bit_util.h b/yql/essentials/public/udf/arrow/bit_util.h index 091a47bbcc6..d8911d4a881 100644 --- a/yql/essentials/public/udf/arrow/bit_util.h +++ b/yql/essentials/public/udf/arrow/bit_util.h @@ -143,6 +143,18 @@ Y_FORCE_INLINE ui32 ReplicateEachBitFourTimes(ui8 b) { return x; } +// Repeat 8 times every bit in an 8-bit value. +// Example: 0b01010101 -> 0b0000000011111111000000001111111100000000111111110000000011111111. +Y_FORCE_INLINE ui64 ReplicateEachBitEightTimes(ui8 x) { + ui64 expanded = x; + expanded = (expanded * 0x8040201008040201ULL); + expanded &= 0x8080808080808080ULL; + expanded >>= 7; + expanded *= 0xFF; + expanded = NYql::SwapBytes(expanded); + return expanded; +} + // BitToByteExpand - Expands the individual bits of an 8-bit input x into an array of 8 elements of type TType. // Each output element corresponds to one bit from the original value, expanded (via specialized routines) to fill the entire TType // Example: BitToByteExpand<ui8>(0b10101010) yields REVERSE({0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00}). @@ -153,12 +165,7 @@ Y_FORCE_INLINE std::array<TType, 8> BitToByteExpand(ui8 x); template <> Y_FORCE_INLINE std::array<ui8, 8> BitToByteExpand(ui8 x) { std::array<ui8, 8> result; - ui64 expanded = x; - expanded = (expanded * 0x8040201008040201ULL); - expanded &= 0x8080808080808080ULL; - expanded >>= 7; - expanded *= 0xFF; - expanded = NYql::SwapBytes(expanded); + ui64 expanded = ReplicateEachBitEightTimes(x); memcpy(&result[0], &expanded, sizeof(expanded)); return result; } @@ -186,5 +193,18 @@ Y_FORCE_INLINE std::array<ui32, 8> BitToByteExpand(ui8 x) { return output; } + +template <> +Y_FORCE_INLINE std::array<ui64, 8> BitToByteExpand(ui8 x) { + std::array<ui8, 8> input = BitToByteExpand<ui8>(x); + std::array<ui64, 8> output{}; + + for (size_t i = 0; i < 8; ++i) { + output[i] = ReplicateEachBitEightTimes(input[i]); + } + + return output; } + +} // namespace NUdf } diff --git a/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp b/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp index 4af399c8deb..dd95030c1b3 100644 --- a/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp +++ b/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp @@ -80,6 +80,25 @@ Y_UNIT_TEST(ReplicateEachBitFourTimes) { UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0x80), 0xF0000000); } +Y_UNIT_TEST(ReplicateEachBitEightTimes) { + // Test case 1: All zeros + UNIT_ASSERT_EQUAL(ReplicateEachBitEightTimes(0x00), 0x00000000); + + // Test case 2: All ones + UNIT_ASSERT_EQUAL(ReplicateEachBitEightTimes(0xFF), 0xFFFFFFFFFFFFFFFF); + + // Test case 3: Alternating bits + UNIT_ASSERT_EQUAL(ReplicateEachBitEightTimes(0x55), 0x00FF00FF00FF00FF); + UNIT_ASSERT_EQUAL(ReplicateEachBitEightTimes(0xAA), 0xFF00FF00FF00FF00); + + // Test case 4: Random pattern + UNIT_ASSERT_EQUAL(ReplicateEachBitEightTimes(0x3C), 0x0000FFFFFFFF0000); + + // Test case 5: Single bit set + UNIT_ASSERT_EQUAL(ReplicateEachBitEightTimes(0x01), 0x00000000000000FF); + UNIT_ASSERT_EQUAL(ReplicateEachBitEightTimes(0x80), 0xFF00000000000000); +} + Y_UNIT_TEST(BitToByteExpand) { auto testBody = [](auto n) { using T = decltype(n); @@ -125,6 +144,7 @@ Y_UNIT_TEST(BitToByteExpand) { testBody(ui8()); testBody(ui16()); testBody(ui32()); + testBody(ui64()); } } // Y_UNIT_TEST_SUITE(BitExpanding) |