aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoratarasov5 <atarasov5@yandex-team.com>2025-04-01 10:18:39 +0300
committeratarasov5 <atarasov5@yandex-team.com>2025-04-01 10:35:40 +0300
commitff038a2ffe1e33ff9a12bb0ba45e97f3d7a52b03 (patch)
tree2d0d93c4b28f8a09547f85da1237c3a0ffddf9cc
parentbf4197b54ff69b2ec6ad452fa090c64d303e60f6 (diff)
downloadydb-ff038a2ffe1e33ff9a12bb0ba45e97f3d7a52b03.tar.gz
YQL-19645: Add more types for coalesce
commit_hash:063bee7c99ef14a1a51edffe1410bbc7f7b6303c
-rw-r--r--yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp3
-rw-r--r--yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp25
-rw-r--r--yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp89
-rw-r--r--yql/essentials/public/udf/arrow/bit_util.h32
-rw-r--r--yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp20
5 files changed, 134 insertions, 35 deletions
diff --git a/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp b/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp
index aa1ea57703a..544811424a4 100644
--- a/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp
+++ b/yql/essentials/minikql/comp_nodes/benchmark/block_coalesce/bench.cpp
@@ -95,3 +95,6 @@ static void CustomArguments(benchmark::internal::Benchmark* b) {
BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<ui8>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments);
BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<ui16>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments);
BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<ui32>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments);
+BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<ui64>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments);
+BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<float>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments);
+BENCHMARK(NKikimr::NMiniKQL::BenchmarkFixedSizeCoalesce<double>)->Unit(benchmark::kMillisecond)->Apply(CustomArguments);
diff --git a/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp
index ae190e777e8..76ed0dce8a9 100644
--- a/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp
+++ b/yql/essentials/minikql/comp_nodes/mkql_block_coalesce.cpp
@@ -78,30 +78,39 @@ bool DispatchBlendingCoalesce(const arrow::Datum& left, const arrow::Datum& righ
auto typeId = typeData.GetTypeId();
switch (NYql::NUdf::GetDataSlot(typeId)) {
- case NYql::NUdf::EDataSlot::Int8:
- DispatchCoalesceImpl<i8>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
- return true;
case NYql::NUdf::EDataSlot::Bool:
+ case NYql::NUdf::EDataSlot::Int8:
case NYql::NUdf::EDataSlot::Uint8:
DispatchCoalesceImpl<ui8>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
return true;
case NYql::NUdf::EDataSlot::Int16:
- DispatchCoalesceImpl<i16>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
- return true;
case NYql::NUdf::EDataSlot::Uint16:
+ case NYql::NUdf::EDataSlot::Date:
DispatchCoalesceImpl<ui16>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
return true;
case NYql::NUdf::EDataSlot::Int32:
- DispatchCoalesceImpl<i32>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
- return true;
case NYql::NUdf::EDataSlot::Uint32:
+ case NYql::NUdf::EDataSlot::Date32:
+ case NYql::NUdf::EDataSlot::Datetime:
DispatchCoalesceImpl<ui32>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
return true;
case NYql::NUdf::EDataSlot::Int64:
case NYql::NUdf::EDataSlot::Uint64:
+ case NYql::NUdf::EDataSlot::Datetime64:
+ case NYql::NUdf::EDataSlot::Timestamp64:
+ case NYql::NUdf::EDataSlot::Interval64:
+ case NYql::NUdf::EDataSlot::Interval:
+ case NYql::NUdf::EDataSlot::Timestamp:
+ DispatchCoalesceImpl<ui64>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
+ return true;
case NYql::NUdf::EDataSlot::Double:
+ static_assert(sizeof(NUdf::TDataType<double>::TLayout) == sizeof(NUdf::TDataType<ui64>::TLayout));
+ DispatchCoalesceImpl<ui64>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
+ return true;
case NYql::NUdf::EDataSlot::Float:
- // TODO(YQL-19645): Support other numeric types.
+ static_assert(sizeof(NUdf::TDataType<float>::TLayout) == sizeof(NUdf::TDataType<ui32>::TLayout));
+ DispatchCoalesceImpl<ui32>(left, right, out, /*outIsOptional=*/rightIsOptional, pool);
+ return true;
default:
// Fallback to general builder/reader pipeline.
return false;
diff --git a/yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp b/yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp
index 1a6a044bcf7..4f00f15633c 100644
--- a/yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp
+++ b/yql/essentials/minikql/comp_nodes/ut/mkql_block_coalesce_ut.cpp
@@ -21,24 +21,60 @@ namespace {
#define UNIT_TEST_WITH_INTEGER(TestName) \
template <typename TTestType> \
void TestName##Execute(NUnitTest::TTestContext& ut_context Y_DECLARE_UNUSED); \
- Y_UNIT_TEST(TestName##i8) { \
+ Y_UNIT_TEST(TestName##_i8) { \
TestName##Execute<i8>(ut_context); \
} \
- Y_UNIT_TEST(TestName##ui8) { \
+ Y_UNIT_TEST(TestName##_ui8) { \
TestName##Execute<ui8>(ut_context); \
} \
- Y_UNIT_TEST(TestName##i16) { \
+ Y_UNIT_TEST(TestName##_i16) { \
TestName##Execute<i16>(ut_context); \
} \
- Y_UNIT_TEST(TestName##ui16) { \
+ Y_UNIT_TEST(TestName##_ui16) { \
TestName##Execute<ui16>(ut_context); \
} \
- Y_UNIT_TEST(TestName##i32) { \
+ Y_UNIT_TEST(TestName##_i32) { \
TestName##Execute<i32>(ut_context); \
} \
- Y_UNIT_TEST(TestName##ui32) { \
+ Y_UNIT_TEST(TestName##_ui32) { \
TestName##Execute<ui32>(ut_context); \
} \
+ Y_UNIT_TEST(TestName##_i64) { \
+ TestName##Execute<i64>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##_ui64) { \
+ TestName##Execute<ui64>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##_float) { \
+ TestName##Execute<float>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##_double) { \
+ TestName##Execute<double>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##_TDate) { \
+ TestName##Execute<NYql::NUdf::TDate>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##_TDatetime) { \
+ TestName##Execute<NYql::NUdf::TDatetime>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##_TTimestamp) { \
+ TestName##Execute<NYql::NUdf::TTimestamp>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##_TInterval) { \
+ TestName##Execute<NYql::NUdf::TInterval>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##_TDate32) { \
+ TestName##Execute<NYql::NUdf::TDate32>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##_TDatetime64) { \
+ TestName##Execute<NYql::NUdf::TDatetime64>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##_TTimestamp64) { \
+ TestName##Execute<NYql::NUdf::TTimestamp64>(ut_context); \
+ } \
+ Y_UNIT_TEST(TestName##_TInterval64) { \
+ TestName##Execute<NYql::NUdf::TInterval64>(ut_context); \
+ } \
\
template <typename TTestType> \
void TestName##Execute(NUnitTest::TTestContext& ut_context Y_DECLARE_UNUSED)
@@ -76,8 +112,17 @@ enum class ERightOperandType {
OPTIONAL_SCALAR
};
+template <typename T>
+using InputOptionalVector =
+ std::vector<TMaybe<typename NUdf::TDataType<T>::TLayout>>;
+
template <typename T, ERightOperandType rightType = ERightOperandType::ARRAY>
-void TestBlockCoalesceForVector(std::vector<TMaybe<T>> left, std::vector<TMaybe<T>> right, std::vector<TMaybe<T>> expected, size_t leftOffset, size_t rightOffset) {
+void TestBlockCoalesceForVector(InputOptionalVector<T> left,
+ InputOptionalVector<T> right,
+ InputOptionalVector<T> expected,
+ size_t leftOffset,
+ size_t rightOffset) {
+ using TLayout = typename NUdf::TDataType<T>::TLayout;
TSetup<false> setup;
NYql::TExprContext exprCtx;
auto* type = setup.PgmBuilder->NewDataType(NUdf::TDataType<T>::Id);
@@ -101,12 +146,12 @@ void TestBlockCoalesceForVector(std::vector<TMaybe<T>> left, std::vector<TMaybe<
arrow::Datum rightOperand;
if constexpr (rightType == ERightOperandType::SCALAR) {
- rightOperand = MakeScalarDatum<T>(right[0].GetRef());
+ rightOperand = MakeScalarDatum<TLayout>(right[0].GetRef());
} else if constexpr (rightType == ERightOperandType::OPTIONAL_SCALAR) {
if (right[0]) {
- rightOperand = MakeScalarDatum<T>(right[0].GetRef());
+ rightOperand = MakeScalarDatum<TLayout>(right[0].GetRef());
} else {
- rightOperand = MakeScalarDatum<T>(0);
+ rightOperand = MakeScalarDatum<TLayout>(0);
rightOperand.scalar()->is_valid = false;
}
} else {
@@ -133,7 +178,9 @@ void TestBlockCoalesceForVector(std::vector<TMaybe<T>> left, std::vector<TMaybe<
}
template <typename T, ERightOperandType rightType = ERightOperandType::ARRAY>
-void TestBlockCoalesce(std::vector<TMaybe<T>> left, std::vector<TMaybe<T>> right, std::vector<TMaybe<T>> expected) {
+void TestBlockCoalesce(InputOptionalVector<T> left,
+ InputOptionalVector<T> right,
+ InputOptionalVector<T> expected) {
// First test different offsets.
for (size_t leftOffset = 0; leftOffset < 10; leftOffset++) {
for (size_t rightOffset = 0; rightOffset < 10; rightOffset++) {
@@ -247,16 +294,16 @@ Y_UNIT_TEST(CoalesceGraphTest) {
}
UNIT_TEST_WITH_INTEGER(KernelRightIsNotNullArray) {
- auto max = std::numeric_limits<TTestType>::max();
- auto min = std::numeric_limits<TTestType>::min();
+ auto max = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::max();
+ auto min = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::min();
TestBlockCoalesce<TTestType, ERightOperandType::ARRAY>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20},
{101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120},
{101, 2, 3, 104, 5, 6, 7, max, 9, 110, 11, 12, 13, 114, 115, 116, min, 118, 19, 20});
}
UNIT_TEST_WITH_INTEGER(KernelRightIsScalar) {
- auto max = std::numeric_limits<TTestType>::max();
- auto min = std::numeric_limits<TTestType>::min();
+ auto max = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::max();
+ auto min = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::min();
TestBlockCoalesce<TTestType, ERightOperandType::SCALAR>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20},
{77},
@@ -264,8 +311,8 @@ UNIT_TEST_WITH_INTEGER(KernelRightIsScalar) {
}
UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalArray) {
- auto max = std::numeric_limits<TTestType>::max();
- auto min = std::numeric_limits<TTestType>::min();
+ auto max = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::max();
+ auto min = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::min();
TestBlockCoalesce<TTestType, ERightOperandType::OPTIONAL_ARRAY>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20},
{Nothing(), 102, Nothing(), 104, Nothing(), 106, 107, 108, 109, 110, 111, 112, 113, 114, Nothing(), 116, 117, 118, Nothing(), 120},
@@ -273,8 +320,8 @@ UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalArray) {
}
UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalInvalidScalar) {
- auto max = std::numeric_limits<TTestType>::max();
- auto min = std::numeric_limits<TTestType>::min();
+ auto max = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::max();
+ auto min = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::min();
TestBlockCoalesce<TTestType, ERightOperandType::OPTIONAL_SCALAR>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20},
{Nothing()},
@@ -282,8 +329,8 @@ UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalInvalidScalar) {
}
UNIT_TEST_WITH_INTEGER(KernelRightIsOptionalValidScalar) {
- auto max = std::numeric_limits<TTestType>::max();
- auto min = std::numeric_limits<TTestType>::min();
+ auto max = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::max();
+ auto min = std::numeric_limits<typename NUdf::TDataType<TTestType>::TLayout>::min();
TestBlockCoalesce<TTestType, ERightOperandType::OPTIONAL_SCALAR>({Nothing(), 2, 3, Nothing(), 5, 6, 7, max, 9, Nothing(), 11, 12, 13, Nothing(), Nothing(), Nothing(), min, Nothing(), 19, 20},
{77},
diff --git a/yql/essentials/public/udf/arrow/bit_util.h b/yql/essentials/public/udf/arrow/bit_util.h
index 091a47bbcc6..d8911d4a881 100644
--- a/yql/essentials/public/udf/arrow/bit_util.h
+++ b/yql/essentials/public/udf/arrow/bit_util.h
@@ -143,6 +143,18 @@ Y_FORCE_INLINE ui32 ReplicateEachBitFourTimes(ui8 b) {
return x;
}
+// Repeat 8 times every bit in an 8-bit value.
+// Example: 0b01010101 -> 0b0000000011111111000000001111111100000000111111110000000011111111.
+Y_FORCE_INLINE ui64 ReplicateEachBitEightTimes(ui8 x) {
+ ui64 expanded = x;
+ expanded = (expanded * 0x8040201008040201ULL);
+ expanded &= 0x8080808080808080ULL;
+ expanded >>= 7;
+ expanded *= 0xFF;
+ expanded = NYql::SwapBytes(expanded);
+ return expanded;
+}
+
// BitToByteExpand - Expands the individual bits of an 8-bit input x into an array of 8 elements of type TType.
// Each output element corresponds to one bit from the original value, expanded (via specialized routines) to fill the entire TType
// Example: BitToByteExpand<ui8>(0b10101010) yields REVERSE({0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00}).
@@ -153,12 +165,7 @@ Y_FORCE_INLINE std::array<TType, 8> BitToByteExpand(ui8 x);
template <>
Y_FORCE_INLINE std::array<ui8, 8> BitToByteExpand(ui8 x) {
std::array<ui8, 8> result;
- ui64 expanded = x;
- expanded = (expanded * 0x8040201008040201ULL);
- expanded &= 0x8080808080808080ULL;
- expanded >>= 7;
- expanded *= 0xFF;
- expanded = NYql::SwapBytes(expanded);
+ ui64 expanded = ReplicateEachBitEightTimes(x);
memcpy(&result[0], &expanded, sizeof(expanded));
return result;
}
@@ -186,5 +193,18 @@ Y_FORCE_INLINE std::array<ui32, 8> BitToByteExpand(ui8 x) {
return output;
}
+
+template <>
+Y_FORCE_INLINE std::array<ui64, 8> BitToByteExpand(ui8 x) {
+ std::array<ui8, 8> input = BitToByteExpand<ui8>(x);
+ std::array<ui64, 8> output{};
+
+ for (size_t i = 0; i < 8; ++i) {
+ output[i] = ReplicateEachBitEightTimes(input[i]);
+ }
+
+ return output;
}
+
+} // namespace NUdf
}
diff --git a/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp b/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp
index 4af399c8deb..dd95030c1b3 100644
--- a/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp
+++ b/yql/essentials/public/udf/arrow/ut/bit_util_ut.cpp
@@ -80,6 +80,25 @@ Y_UNIT_TEST(ReplicateEachBitFourTimes) {
UNIT_ASSERT_EQUAL(ReplicateEachBitFourTimes(0x80), 0xF0000000);
}
+Y_UNIT_TEST(ReplicateEachBitEightTimes) {
+ // Test case 1: All zeros
+ UNIT_ASSERT_EQUAL(ReplicateEachBitEightTimes(0x00), 0x00000000);
+
+ // Test case 2: All ones
+ UNIT_ASSERT_EQUAL(ReplicateEachBitEightTimes(0xFF), 0xFFFFFFFFFFFFFFFF);
+
+ // Test case 3: Alternating bits
+ UNIT_ASSERT_EQUAL(ReplicateEachBitEightTimes(0x55), 0x00FF00FF00FF00FF);
+ UNIT_ASSERT_EQUAL(ReplicateEachBitEightTimes(0xAA), 0xFF00FF00FF00FF00);
+
+ // Test case 4: Random pattern
+ UNIT_ASSERT_EQUAL(ReplicateEachBitEightTimes(0x3C), 0x0000FFFFFFFF0000);
+
+ // Test case 5: Single bit set
+ UNIT_ASSERT_EQUAL(ReplicateEachBitEightTimes(0x01), 0x00000000000000FF);
+ UNIT_ASSERT_EQUAL(ReplicateEachBitEightTimes(0x80), 0xFF00000000000000);
+}
+
Y_UNIT_TEST(BitToByteExpand) {
auto testBody = [](auto n) {
using T = decltype(n);
@@ -125,6 +144,7 @@ Y_UNIT_TEST(BitToByteExpand) {
testBody(ui8());
testBody(ui16());
testBody(ui32());
+ testBody(ui64());
}
} // Y_UNIT_TEST_SUITE(BitExpanding)