aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvvvv <vvvv@ydb.tech>2022-11-23 19:17:46 +0300
committervvvv <vvvv@ydb.tech>2022-11-23 19:17:46 +0300
commita84c09d1d4783305b944430bffdbc32e8c7b4728 (patch)
treef44d0c96876d858cd6fa04f2feb2c6fd80e1b0c5
parent3ddfb0470a13e7007289af879595658dd6a7eb7f (diff)
downloadydb-a84c09d1d4783305b944430bffdbc32e8c7b4728.tar.gz
pushdown of filter for count/sum/avg/min/max
-rw-r--r--ydb/library/yql/minikql/comp_nodes/mkql_block_agg.cpp21
-rw-r--r--ydb/library/yql/minikql/comp_nodes/mkql_block_agg_count.cpp29
-rw-r--r--ydb/library/yql/minikql/comp_nodes/mkql_block_agg_minmax.cpp156
-rw-r--r--ydb/library/yql/minikql/comp_nodes/mkql_block_agg_sum.cpp142
4 files changed, 269 insertions, 79 deletions
diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_block_agg.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_block_agg.cpp
index f6acca37d35..e730a28c5df 100644
--- a/ydb/library/yql/minikql/comp_nodes/mkql_block_agg.cpp
+++ b/ydb/library/yql/minikql/comp_nodes/mkql_block_agg.cpp
@@ -57,13 +57,22 @@ public:
std::optional<ui64> filtered;
if (FilterColumn_) {
- arrow::BooleanArray arr(TArrowBlock::From(s.Values_[*FilterColumn_]).GetDatum().array());
- ui64 popCount = (ui64)arr.true_count();
- if (popCount == 0) {
- continue;
+ auto filterDatum = TArrowBlock::From(s.Values_[*FilterColumn_]).GetDatum();
+ if (filterDatum.is_scalar()) {
+ if (!filterDatum.scalar_as<arrow::BooleanScalar>().value) {
+ continue;
+ }
+ } else {
+ arrow::BooleanArray arr(filterDatum.array());
+ ui64 popCount = (ui64)arr.true_count();
+ if (popCount == 0) {
+ continue;
+ }
+
+ if (popCount < batchLength) {
+ filtered = popCount;
+ }
}
-
- filtered = popCount;
}
s.HasValues_ = true;
diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_block_agg_count.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_block_agg_count.cpp
index c985b182206..5afa75b3df8 100644
--- a/ydb/library/yql/minikql/comp_nodes/mkql_block_agg_count.cpp
+++ b/ydb/library/yql/minikql/comp_nodes/mkql_block_agg_count.cpp
@@ -36,15 +36,38 @@ public:
}
void AddMany(const NUdf::TUnboxedValue* columns, ui64 batchLength, std::optional<ui64> filtered) final {
- Y_ENSURE(!filtered);
const auto& datum = TArrowBlock::From(columns[ArgColumn_]).GetDatum();
if (datum.is_scalar()) {
if (datum.scalar()->is_valid) {
- State_ += batchLength;
+ State_ += filtered ? *filtered : batchLength;
}
} else {
const auto& array = datum.array();
- State_ += array->length - array->GetNullCount();
+ if (!filtered) {
+ State_ += array->length - array->GetNullCount();
+ } else if (array->GetNullCount() == array->length) {
+ // all nulls
+ return;
+ } else if (array->GetNullCount() == 0) {
+ // no nulls
+ State_ += *filtered;
+ } else {
+ const auto& filterDatum = TArrowBlock::From(columns[*FilterColumn_]).GetDatum();
+ // intersect masks from nulls and filter column
+ const auto& filterArray = filterDatum.array();
+ MKQL_ENSURE(filterArray->GetNullCount() == 0, "Expected non-nullable bool column");
+ auto nullBitmapPtr = array->GetValues<uint8_t>(0, 0);
+ auto filterBitmap = filterArray->GetValues<uint8_t>(1, 0);
+ auto state = State_;
+ for (ui32 i = 0; i < array->length; ++i) {
+ ui64 fullIndex = i + array->offset;
+ auto bit1 = ((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1);
+ auto bit2 = ((filterBitmap[fullIndex >> 3] >> (fullIndex & 0x07)) & 1);
+ state += bit1 & bit2;
+ }
+
+ State_ = state;
+ }
}
}
diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_block_agg_minmax.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_block_agg_minmax.cpp
index 9cea6d1fbc7..a2f1d53f6ce 100644
--- a/ydb/library/yql/minikql/comp_nodes/mkql_block_agg_minmax.cpp
+++ b/ydb/library/yql/minikql/comp_nodes/mkql_block_agg_minmax.cpp
@@ -34,7 +34,7 @@ public:
}
void AddMany(const NUdf::TUnboxedValue* columns, ui64 batchLength, std::optional<ui64> filtered) final {
- Y_ENSURE(!filtered);
+ Y_UNUSED(batchLength);
const auto& datum = TArrowBlock::From(columns[ArgColumn_]).GetDatum();
if (datum.is_scalar()) {
if (datum.scalar()->is_valid) {
@@ -50,23 +50,56 @@ public:
return;
}
- IsValid_ = true;
- TIn value = Value_;
- if (array->GetNullCount() == 0) {
- for (int64_t i = 0; i < len; ++i) {
- value = UpdateMinMax<IsMin>(value, ptr[i]);
+ if (!filtered) {
+ IsValid_ = true;
+ TIn value = Value_;
+ if (array->GetNullCount() == 0) {
+ for (int64_t i = 0; i < len; ++i) {
+ value = UpdateMinMax<IsMin>(value, ptr[i]);
+ }
+ } else {
+ auto nullBitmapPtr = array->GetValues<uint8_t>(0, 0);
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
+ TIn mask = (((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
+ value = UpdateMinMax<IsMin>(value, TIn((ptr[i] & mask) | (value & ~mask)));
+ }
}
+
+ Value_ = value;
} else {
- auto nullBitmapPtr = array->GetValues<uint8_t>(0, 0);
- for (int64_t i = 0; i < len; ++i) {
- ui64 fullIndex = i + array->offset;
- // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
- TIn mask = (((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
- value = UpdateMinMax<IsMin>(value, TIn((ptr[i] & mask) | (value & ~mask)));
+ const auto& filterDatum = TArrowBlock::From(columns[*FilterColumn_]).GetDatum();
+ const auto& filterArray = filterDatum.array();
+ MKQL_ENSURE(filterArray->GetNullCount() == 0, "Expected non-nullable bool column");
+ auto filterBitmap = filterArray->template GetValues<uint8_t>(1, 0);
+
+ TIn value = Value_;
+ if (array->GetNullCount() == 0) {
+ IsValid_ = true;
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ TIn filterMask = (((filterBitmap[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
+ value = UpdateMinMax<IsMin>(value, TIn((ptr[i] & filterMask) | (value & ~filterMask)));
+ }
+ } else {
+ ui64 count = 0;
+ auto nullBitmapPtr = array->GetValues<uint8_t>(0, 0);
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
+ TIn mask = (((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
+ TIn filterMask = (((filterBitmap[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
+ mask &= filterMask;
+ value = UpdateMinMax<IsMin>(value, TIn((ptr[i] & mask) | (value & ~mask)));
+ count += mask & 1;
+ }
+
+ IsValid_ = IsValid_ || count > 0;
}
- }
- Value_ = value;
+ Value_ = value;
+ }
}
}
@@ -99,7 +132,6 @@ public:
}
void AddMany(const NUdf::TUnboxedValue* columns, ui64 batchLength, std::optional<ui64> filtered) final {
- Y_ENSURE(!filtered);
Y_UNUSED(batchLength);
const auto& datum = TArrowBlock::From(columns[ArgColumn_]).GetDatum();
MKQL_ENSURE(datum.is_array(), "Expected array");
@@ -108,12 +140,28 @@ public:
auto len = array->length;
MKQL_ENSURE(array->GetNullCount() == 0, "Expected no nulls");
MKQL_ENSURE(len > 0, "Expected at least one value");
- TIn value = Value_;
- for (int64_t i = 0; i < len; ++i) {
- value = UpdateMinMax<IsMin>(value, ptr[i]);
- }
+ if (!filtered) {
+ TIn value = Value_;
+ for (int64_t i = 0; i < len; ++i) {
+ value = UpdateMinMax<IsMin>(value, ptr[i]);
+ }
- Value_ = value;
+ Value_ = value;
+ } else {
+ const auto& filterDatum = TArrowBlock::From(columns[*FilterColumn_]).GetDatum();
+ const auto& filterArray = filterDatum.array();
+ MKQL_ENSURE(filterArray->GetNullCount() == 0, "Expected non-nullable bool column");
+ auto filterBitmap = filterArray->template GetValues<uint8_t>(1, 0);
+
+ TIn value = Value_;
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ TIn filterMask = (((filterBitmap[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
+ value = UpdateMinMax<IsMin>(value, TIn((ptr[i] & filterMask) | (value & ~filterMask)));
+ }
+
+ Value_ = value;
+ }
}
NUdf::TUnboxedValue Finish() final {
@@ -140,7 +188,6 @@ public:
}
void AddMany(const NUdf::TUnboxedValue* columns, ui64 batchLength, std::optional<ui64> filtered) final {
- Y_ENSURE(!filtered);
const auto& datum = TArrowBlock::From(columns[ArgColumn_]).GetDatum();
if (datum.is_scalar()) {
if (datum.scalar()->is_valid) {
@@ -156,26 +203,61 @@ public:
return;
}
- IsValid_ = true;
- ui8 value = Value_;
- if (array->GetNullCount() == 0) {
- for (int64_t i = 0; i < len; ++i) {
- ui64 fullIndex = i + array->offset;
- ui8 in = ((ptr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1);
- value = UpdateMinMax<IsMin>(value, in);
+ if (!filtered) {
+ IsValid_ = true;
+ ui8 value = Value_;
+ if (array->GetNullCount() == 0) {
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ ui8 in = ((ptr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1);
+ value = UpdateMinMax<IsMin>(value, in);
+ }
+ } else {
+ auto nullBitmapPtr = array->GetValues<uint8_t>(0, 0);
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
+ ui8 in = ((ptr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1);
+ ui8 mask = (((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - ui8(1);
+ value = UpdateMinMax<IsMin>(value, ui8((in & mask) | (value & ~mask)));
+ }
}
+
+ Value_ = value;
} else {
- auto nullBitmapPtr = array->GetValues<uint8_t>(0, 0);
- for (int64_t i = 0; i < len; ++i) {
- ui64 fullIndex = i + array->offset;
- // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
- ui8 in = ((ptr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1);
- ui8 mask = (((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - ui8(1);
- value = UpdateMinMax<IsMin>(value, ui8((in & mask) | (value & ~mask)));
+ const auto& filterDatum = TArrowBlock::From(columns[*FilterColumn_]).GetDatum();
+ const auto& filterArray = filterDatum.array();
+ MKQL_ENSURE(filterArray->GetNullCount() == 0, "Expected non-nullable bool column");
+ auto filterBitmap = filterArray->template GetValues<uint8_t>(1, 0);
+
+ ui8 value = Value_;
+ if (array->GetNullCount() == 0) {
+ IsValid_ = true;
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ ui8 in = ((ptr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1);
+ ui8 filterMask = (((filterBitmap[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - ui8(1);
+ value = UpdateMinMax<IsMin>(value, ui8((in & filterMask) | (value & ~filterMask)));
+ }
+ } else {
+ ui64 count = 0;
+ auto nullBitmapPtr = array->GetValues<uint8_t>(0, 0);
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
+ ui8 in = ((ptr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1);
+ ui8 mask = (((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - ui8(1);
+ ui8 filterMask = (((filterBitmap[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - ui8(1);
+ mask &= filterMask;
+ value = UpdateMinMax<IsMin>(value, ui8((in & mask) | (value & ~mask)));
+ count += mask & 1;
+ }
+
+ IsValid_ = IsValid_ || count > 0;
}
- }
- Value_ = value;
+ Value_ = value;
+ }
}
}
diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_block_agg_sum.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_block_agg_sum.cpp
index dbf0d5a5edb..a4033fc71b4 100644
--- a/ydb/library/yql/minikql/comp_nodes/mkql_block_agg_sum.cpp
+++ b/ydb/library/yql/minikql/comp_nodes/mkql_block_agg_sum.cpp
@@ -20,11 +20,10 @@ public:
}
void AddMany(const NUdf::TUnboxedValue* columns, ui64 batchLength, std::optional<ui64> filtered) final {
- Y_ENSURE(!filtered);
const auto& datum = TArrowBlock::From(columns[ArgColumn_]).GetDatum();
if (datum.is_scalar()) {
if (datum.scalar()->is_valid) {
- Sum_ += batchLength * datum.scalar_as<TInScalar>().value;
+ Sum_ += (filtered ? *filtered : batchLength) * datum.scalar_as<TInScalar>().value;
IsValid_ = true;
}
} else {
@@ -36,23 +35,56 @@ public:
return;
}
- IsValid_ = true;
- TSum sum = Sum_;
- if (array->GetNullCount() == 0) {
- for (int64_t i = 0; i < len; ++i) {
- sum += ptr[i];
+ if (!filtered) {
+ IsValid_ = true;
+ TSum sum = Sum_;
+ if (array->GetNullCount() == 0) {
+ for (int64_t i = 0; i < len; ++i) {
+ sum += ptr[i];
+ }
+ } else {
+ auto nullBitmapPtr = array->GetValues<uint8_t>(0, 0);
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
+ TIn mask = (((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
+ sum += (ptr[i] & mask);
+ }
}
+
+ Sum_ = sum;
} else {
- auto nullBitmapPtr = array->GetValues<uint8_t>(0, 0);
- for (int64_t i = 0; i < len; ++i) {
- ui64 fullIndex = i + array->offset;
- // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
- TIn mask = (((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
- sum += (ptr[i] & mask);
+ const auto& filterDatum = TArrowBlock::From(columns[*FilterColumn_]).GetDatum();
+ const auto& filterArray = filterDatum.array();
+ MKQL_ENSURE(filterArray->GetNullCount() == 0, "Expected non-nullable bool column");
+ auto filterBitmap = filterArray->template GetValues<uint8_t>(1, 0);
+ TSum sum = Sum_;
+ if (array->GetNullCount() == 0) {
+ IsValid_ = true;
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
+ TIn filterMask = (((filterBitmap[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
+ sum += ptr[i] & filterMask;
+ }
+ } else {
+ ui64 count = 0;
+ auto nullBitmapPtr = array->template GetValues<uint8_t>(0, 0);
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
+ TIn mask = (((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
+ TIn filterMask = (((filterBitmap[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
+ mask &= filterMask;
+ sum += (ptr[i] & mask);
+ count += mask & 1;
+ }
+
+ IsValid_ = IsValid_ || count > 0;
}
- }
- Sum_ = sum;
+ Sum_ = sum;
+ }
}
}
@@ -80,7 +112,6 @@ public:
}
void AddMany(const NUdf::TUnboxedValue* columns, ui64 batchLength, std::optional<ui64> filtered) final {
- Y_ENSURE(!filtered);
Y_UNUSED(batchLength);
const auto& datum = TArrowBlock::From(columns[ArgColumn_]).GetDatum();
MKQL_ENSURE(datum.is_array(), "Expected array");
@@ -91,8 +122,21 @@ public:
MKQL_ENSURE(len > 0, "Expected at least one value");
TSum sum = Sum_;
- for (int64_t i = 0; i < len; ++i) {
- sum += ptr[i];
+ if (!filtered) {
+ for (int64_t i = 0; i < len; ++i) {
+ sum += ptr[i];
+ }
+ } else {
+ const auto& filterDatum = TArrowBlock::From(columns[*FilterColumn_]).GetDatum();
+ const auto& filterArray = filterDatum.array();
+ MKQL_ENSURE(filterArray->GetNullCount() == 0, "Expected non-nullable bool column");
+ auto filterBitmap = filterArray->template GetValues<uint8_t>(1, 0);
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
+ TIn filterMask = (((filterBitmap[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
+ sum += ptr[i] & filterMask;
+ }
}
Sum_ = sum;
@@ -118,11 +162,10 @@ public:
}
void AddMany(const NUdf::TUnboxedValue* columns, ui64 batchLength, std::optional<ui64> filtered) final {
- Y_ENSURE(!filtered);
const auto& datum = TArrowBlock::From(columns[ArgColumn_]).GetDatum();
if (datum.is_scalar()) {
if (datum.scalar()->is_valid) {
- Sum_ += double(batchLength * datum.scalar_as<TInScalar>().value);
+ Sum_ += double((filtered ? *filtered : batchLength) * datum.scalar_as<TInScalar>().value);
Count_ += batchLength;
}
} else {
@@ -134,23 +177,56 @@ public:
return;
}
- Count_ += count;
- double sum = Sum_;
- if (array->GetNullCount() == 0) {
- for (int64_t i = 0; i < len; ++i) {
- sum += double(ptr[i]);
+ if (!filtered) {
+ Count_ += count;
+ double sum = Sum_;
+ if (array->GetNullCount() == 0) {
+ for (int64_t i = 0; i < len; ++i) {
+ sum += double(ptr[i]);
+ }
+ } else {
+ auto nullBitmapPtr = array->GetValues<uint8_t>(0, 0);
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
+ TIn mask = (((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
+ sum += double(ptr[i] & mask);
+ }
}
+
+ Sum_ = sum;
} else {
- auto nullBitmapPtr = array->GetValues<uint8_t>(0, 0);
- for (int64_t i = 0; i < len; ++i) {
- ui64 fullIndex = i + array->offset;
- // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
- TIn mask = (((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
- sum += double(ptr[i] & mask);
+ const auto& filterDatum = TArrowBlock::From(columns[*FilterColumn_]).GetDatum();
+ const auto& filterArray = filterDatum.array();
+ MKQL_ENSURE(filterArray->GetNullCount() == 0, "Expected non-nullable bool column");
+ auto filterBitmap = filterArray->template GetValues<uint8_t>(1, 0);
+
+ double sum = Sum_;
+ ui64 count = Count_;
+ if (array->GetNullCount() == 0) {
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
+ TIn filterMask = (((filterBitmap[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
+ sum += double(ptr[i] & filterMask);
+ count += filterMask & 1;
+ }
+ } else {
+ auto nullBitmapPtr = array->GetValues<uint8_t>(0, 0);
+ for (int64_t i = 0; i < len; ++i) {
+ ui64 fullIndex = i + array->offset;
+ // bit 1 -> mask 0xFF..FF, bit 0 -> mask 0x00..00
+ TIn mask = (((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
+ TIn filterMask = (((filterBitmap[fullIndex >> 3] >> (fullIndex & 0x07)) & 1) ^ 1) - TIn(1);
+ mask &= filterMask;
+ sum += double(ptr[i] & mask);
+ count += mask & 1;
+ }
}
- }
- Sum_ = sum;
+ Sum_ = sum;
+ Count_ = count;
+ }
}
}