aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjsjant <jsjant@gmail.com>2025-04-23 17:59:33 +0300
committerGitHub <noreply@github.com>2025-04-23 17:59:33 +0300
commit0555fc053f1d41dd454dba019a88f786c802af15 (patch)
tree811f6da78a82c36a6ef8fa2e8eef05c5fc0991d1
parent5b143711f0ed0d12b1bbb59a0c8f33aa543da6f2 (diff)
downloadydb-0555fc053f1d41dd454dba019a88f786c802af15.tar.gz
Introduce Intersect operation to Roaring UDF (#17611)
-rw-r--r--ydb/library/yql/udfs/common/roaring/roaring.cpp77
-rw-r--r--ydb/library/yql/udfs/common/roaring/test/canondata/test.test_intersect_/results.txt180
-rw-r--r--ydb/library/yql/udfs/common/roaring/test/cases/intersect.sql9
3 files changed, 265 insertions, 1 deletions
diff --git a/ydb/library/yql/udfs/common/roaring/roaring.cpp b/ydb/library/yql/udfs/common/roaring/roaring.cpp
index ba1e559d3fa..a20b788a309 100644
--- a/ydb/library/yql/udfs/common/roaring/roaring.cpp
+++ b/ydb/library/yql/udfs/common/roaring/roaring.cpp
@@ -428,6 +428,65 @@ namespace {
}
};
+ class TRoaringIntersect: public TBoxedValue {
+ public:
+ TRoaringIntersect(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
+
+ static TStringRef Name() {
+ return TStringRef::Of("Intersect");
+ }
+
+ private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ Y_UNUSED(valueBuilder);
+ try {
+ auto* left = GetBitmapFromArg(args[0]);
+ auto* right = GetBitmapFromArg(args[1]);
+
+ return TUnboxedValuePod(roaring_bitmap_intersect(left, right));
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ TSourcePosition Pos_;
+ };
+
+ class TRoaringIntersectWithBinary: public TBoxedValue {
+ public:
+ TRoaringIntersectWithBinary(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
+
+ static TStringRef Name() {
+ return TStringRef::Of("IntersectWithBinary");
+ }
+
+ private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ Y_UNUSED(valueBuilder);
+ try {
+ auto* left = GetBitmapFromArg(args[0]);
+ auto* right = DeserializePortable(args[1].AsStringRef());
+
+ auto intersect = roaring_bitmap_intersect(left, right);
+ roaring_bitmap_free(right);
+ return TUnboxedValuePod(intersect);
+
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
+ }
+ }
+
+ TSourcePosition Pos_;
+ };
+
class TRoaringModule: public IUdfModule {
public:
class TMemoryHookInitializer {
@@ -611,6 +670,24 @@ namespace {
if (!typesOnly) {
builder.Implementation(new TRoaringNaiveBulkAndWithBinary(builder.GetSourcePosition()));
}
+ } else if (TRoaringIntersect::Name() == name) {
+ builder.Returns<bool>()
+ .Args()
+ ->Add<TAutoMap<TResource<RoaringResourceName>>>()
+ .Add<TAutoMap<TResource<RoaringResourceName>>>();
+
+ if (!typesOnly) {
+ builder.Implementation(new TRoaringIntersect(builder.GetSourcePosition()));
+ }
+ } else if (TRoaringIntersectWithBinary::Name() == name) {
+ builder.Returns<bool>()
+ .Args()
+ ->Add<TAutoMap<TResource<RoaringResourceName>>>()
+ .Add<TAutoMap<char*>>();
+
+ if (!typesOnly) {
+ builder.Implementation(new TRoaringIntersectWithBinary(builder.GetSourcePosition()));
+ }
} else {
TStringBuilder sb;
sb << "Unknown function: " << name.Data();
diff --git a/ydb/library/yql/udfs/common/roaring/test/canondata/test.test_intersect_/results.txt b/ydb/library/yql/udfs/common/roaring/test/canondata/test.test_intersect_/results.txt
index 9c9ed2126d9..f14baff2331 100644
--- a/ydb/library/yql/udfs/common/roaring/test/canondata/test.test_intersect_/results.txt
+++ b/ydb/library/yql/udfs/common/roaring/test/canondata/test.test_intersect_/results.txt
@@ -476,5 +476,185 @@
]
}
]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "Intersect";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ %true
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "IntersectNull0";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "IntersectNull1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "IntersectWithBinary";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ %true
+ ]
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "IntersectWithBinaryNull";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ #
+ ]
+ ]
+ }
+ ]
+ };
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "IntersectFalse";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ %false
+ ]
+ ]
+ ]
+ }
+ ]
}
] \ No newline at end of file
diff --git a/ydb/library/yql/udfs/common/roaring/test/cases/intersect.sql b/ydb/library/yql/udfs/common/roaring/test/cases/intersect.sql
index bdd9319a06f..ba9e53c1757 100644
--- a/ydb/library/yql/udfs/common/roaring/test/cases/intersect.sql
+++ b/ydb/library/yql/udfs/common/roaring/test/cases/intersect.sql
@@ -15,4 +15,11 @@ SELECT Roaring::Uint32List(Roaring::AndNotWithBinary(Roaring::Deserialize(right)
SELECT Roaring::Uint32List(Roaring::AndNotWithBinary(Roaring::Deserialize(right), NULL, true)) AS AndNotWithBinaryListEmptyInplace FROM Input;
SELECT Roaring::Uint32List(Roaring::NaiveBulkAnd(AsList(Roaring::Deserialize(right), Roaring::Deserialize(left)))) AS NaiveBulkAnd FROM Input;
-SELECT Roaring::Uint32List(Roaring::NaiveBulkAndWithBinary(AsList(right, left))) AS NaiveBulkAndWithBinary FROM Input; \ No newline at end of file
+SELECT Roaring::Uint32List(Roaring::NaiveBulkAndWithBinary(AsList(right, left))) AS NaiveBulkAndWithBinary FROM Input;
+
+SELECT Roaring::Intersect(Roaring::Deserialize(right), Roaring::Deserialize(left)) AS Intersect FROM Input;
+SELECT Roaring::Intersect(NULL, Roaring::Deserialize(left)) AS IntersectNull0 FROM Input;
+SELECT Roaring::Intersect(Roaring::Deserialize(right), NULL) AS IntersectNull1 FROM Input;
+SELECT Roaring::IntersectWithBinary(Roaring::Deserialize(right), left) AS IntersectWithBinary FROM Input;
+SELECT Roaring::IntersectWithBinary(Roaring::Deserialize(right), NULL) AS IntersectWithBinaryNull FROM Input;
+SELECT Roaring::Intersect(Roaring::Deserialize(right), Roaring::FromUint32List(AsList(100500))) AS IntersectFalse FROM Input;